fix: QUALITY_THRESHOLD 0.5→0.3, prompt scoring más generoso

2026-05-04 10:35:08 +00:00
parent 0d8aee63be
commit e5b77ad72d
2 changed files with 4 additions and 2 deletions
@@ -31,7 +31,7 @@ class Settings(BaseSettings):
    # Processing
    chunk_size: int = Field(800, env="CHUNK_SIZE")                  # tokens per chunk
    chunk_overlap: int = Field(100, env="CHUNK_OVERLAP")
-    quality_threshold: float = Field(0.5, env="QUALITY_THRESHOLD")  # 0-1, chunks below discarded
+    quality_threshold: float = Field(0.3, env="QUALITY_THRESHOLD")  # 0-1, chunks below discarded

    # App
    log_level: str = Field("INFO", env="LOG_LEVEL")
@@ -227,6 +227,8 @@ class ContentProcessor:
        import anthropic
        prompt = (
            f'Rate 0-10 how relevant this text is to the topic "{topic}". '
+            f'Be generous — if the text is tangentially related, score 4+. '
+            f'Only score below 3 if completely unrelated. '
            f'Reply with only a number.\n\nText:\n{chunk[:500]}'
        )
        try:
@@ -251,7 +253,7 @@ class ContentProcessor:
                normalized = min(1.0, score / 10.0)
                logger.debug("Claude relevance score", raw=score, normalized=round(normalized, 2))
                return normalized
-            return 0.6
+            return 0.5
        except Exception as e:
            logger.warning("Claude scoring failed, falling back to Ollama", error=str(e))
            return await self._score_with_ollama(chunk, topic)