From e5b77ad72d2c02dca1c284c5d43d6c6420ae25a7 Mon Sep 17 00:00:00 2001 From: ChemaVX Date: Mon, 4 May 2026 10:35:08 +0000 Subject: [PATCH] =?UTF-8?q?fix:=20QUALITY=5FTHRESHOLD=200.5=E2=86=920.3,?= =?UTF-8?q?=20prompt=20scoring=20m=C3=A1s=20generoso?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config.py | 2 +- src/processor/processor.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/config.py b/src/config.py index 81cf9f9..8363b87 100644 --- a/src/config.py +++ b/src/config.py @@ -31,7 +31,7 @@ class Settings(BaseSettings): # Processing chunk_size: int = Field(800, env="CHUNK_SIZE") # tokens per chunk chunk_overlap: int = Field(100, env="CHUNK_OVERLAP") - quality_threshold: float = Field(0.5, env="QUALITY_THRESHOLD") # 0-1, chunks below discarded + quality_threshold: float = Field(0.3, env="QUALITY_THRESHOLD") # 0-1, chunks below discarded # App log_level: str = Field("INFO", env="LOG_LEVEL") diff --git a/src/processor/processor.py b/src/processor/processor.py index bc4a526..7cb9125 100644 --- a/src/processor/processor.py +++ b/src/processor/processor.py @@ -227,6 +227,8 @@ class ContentProcessor: import anthropic prompt = ( f'Rate 0-10 how relevant this text is to the topic "{topic}". ' + f'Be generous — if the text is tangentially related, score 4+. ' + f'Only score below 3 if completely unrelated. ' f'Reply with only a number.\n\nText:\n{chunk[:500]}' ) try: @@ -251,7 +253,7 @@ class ContentProcessor: normalized = min(1.0, score / 10.0) logger.debug("Claude relevance score", raw=score, normalized=round(normalized, 2)) return normalized - return 0.6 + return 0.5 except Exception as e: logger.warning("Claude scoring failed, falling back to Ollama", error=str(e)) return await self._score_with_ollama(chunk, topic)