From d0e55ddb50c53e9181093deea64835bf9a2a941f Mon Sep 17 00:00:00 2001 From: ChemaVX Date: Wed, 29 Apr 2026 08:04:12 +0000 Subject: [PATCH] feat: Claude Haiku for relevance scoring, fallback to Ollama processor.py: split _score_quality into _score_with_claude and _score_with_ollama; if ANTHROPIC_API_KEY is set, use Claude Haiku (claude-haiku-4-5) with max_tokens=10 for fast, accurate 0-10 relevance scoring; falls back to Ollama on any error requirements.txt: add anthropic>=0.40.0 k8s: ANTHROPIC_API_KEY added to researchowl-secrets and mounted in deployment; QUALITY_THRESHOLD restored to 0.4 (Claude scoring is accurate enough to use the threshold) Co-Authored-By: Claude Sonnet 4.6 --- requirements.txt | 3 +++ src/processor/processor.py | 38 ++++++++++++++++++++++++++++++++++---- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7ceab3e..329db96 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,6 +23,9 @@ tiktoken==0.7.0 numpy==1.26.4 scikit-learn==1.5.1 +# Claude API (scoring) +anthropic>=0.40.0 + # Utilities pydantic==2.8.0 pydantic-settings==2.4.0 diff --git a/src/processor/processor.py b/src/processor/processor.py index 041449f..294ac0f 100644 --- a/src/processor/processor.py +++ b/src/processor/processor.py @@ -216,7 +216,37 @@ class ContentProcessor: return stored async def _score_quality(self, chunk: str, topic: str) -> float: - """Score 0-1 how relevant chunk is to topic. Single axis — avoids off-topic content.""" + """Score 0-1 relevance to topic. Uses Claude Haiku if API key set, else Ollama.""" + if settings.anthropic_api_key: + return await self._score_with_claude(chunk, topic) + return await self._score_with_ollama(chunk, topic) + + async def _score_with_claude(self, chunk: str, topic: str) -> float: + import anthropic + prompt = ( + f'Rate 0-10 how relevant this text is to the topic "{topic}". ' + f'Reply with only a number.\n\nText:\n{chunk[:500]}' + ) + try: + client = anthropic.AsyncAnthropic(api_key=settings.anthropic_api_key) + msg = await client.messages.create( + model=settings.claude_model, + max_tokens=10, + messages=[{"role": "user", "content": prompt}] + ) + response = msg.content[0].text.strip() + numbers = re.findall(r'\b(\d+(?:\.\d+)?)\b', response) + if numbers: + score = float(numbers[0]) + normalized = min(1.0, score / 10.0) + logger.debug("Claude relevance score", raw=score, normalized=round(normalized, 2)) + return normalized + return 0.6 + except Exception as e: + logger.warning("Claude scoring failed, falling back to Ollama", error=str(e)) + return await self._score_with_ollama(chunk, topic) + + async def _score_with_ollama(self, chunk: str, topic: str) -> float: prompt = ( f'Score 0-10: how relevant is this text to the topic "{topic}"?\n' f"0 = completely unrelated, 10 = directly and specifically about this topic.\n\n" @@ -229,12 +259,12 @@ class ContentProcessor: if numbers: score = float(numbers[0]) normalized = min(1.0, score / 10.0) - logger.debug("Relevance score", raw=score, normalized=round(normalized, 2)) + logger.debug("Ollama relevance score", raw=score, normalized=round(normalized, 2)) return normalized - logger.debug("No number in relevance response", response=response[:80]) + logger.debug("No number in Ollama relevance response", response=response[:80]) return 0.6 except Exception as e: - logger.warning("Relevance scoring failed", error=str(e)) + logger.warning("Ollama relevance scoring failed", error=str(e)) return 0.6 async def rag_query(self, session_id: int, query: str, top_k: int = 20) -> str: