diff --git a/src/config.py b/src/config.py index 81cf9f9..8363b87 100644 --- a/src/config.py +++ b/src/config.py @@ -31,7 +31,7 @@ class Settings(BaseSettings): # Processing chunk_size: int = Field(800, env="CHUNK_SIZE") # tokens per chunk chunk_overlap: int = Field(100, env="CHUNK_OVERLAP") - quality_threshold: float = Field(0.5, env="QUALITY_THRESHOLD") # 0-1, chunks below discarded + quality_threshold: float = Field(0.3, env="QUALITY_THRESHOLD") # 0-1, chunks below discarded # App log_level: str = Field("INFO", env="LOG_LEVEL") diff --git a/src/processor/processor.py b/src/processor/processor.py index bc4a526..7cb9125 100644 --- a/src/processor/processor.py +++ b/src/processor/processor.py @@ -227,6 +227,8 @@ class ContentProcessor: import anthropic prompt = ( f'Rate 0-10 how relevant this text is to the topic "{topic}". ' + f'Be generous — if the text is tangentially related, score 4+. ' + f'Only score below 3 if completely unrelated. ' f'Reply with only a number.\n\nText:\n{chunk[:500]}' ) try: @@ -251,7 +253,7 @@ class ContentProcessor: normalized = min(1.0, score / 10.0) logger.debug("Claude relevance score", raw=score, normalized=round(normalized, 2)) return normalized - return 0.6 + return 0.5 except Exception as e: logger.warning("Claude scoring failed, falling back to Ollama", error=str(e)) return await self._score_with_ollama(chunk, topic)