From d0e55ddb50c53e9181093deea64835bf9a2a941f Mon Sep 17 00:00:00 2001
From: ChemaVX <jmivanez@gmail.com>
Date: Wed, 29 Apr 2026 08:04:12 +0000
Subject: [PATCH] feat: Claude Haiku for relevance scoring, fallback to Ollama

processor.py: split _score_quality into _score_with_claude and
  _score_with_ollama; if ANTHROPIC_API_KEY is set, use Claude Haiku
  (claude-haiku-4-5) with max_tokens=10 for fast, accurate 0-10
  relevance scoring; falls back to Ollama on any error

requirements.txt: add anthropic>=0.40.0

k8s: ANTHROPIC_API_KEY added to researchowl-secrets and mounted in
  deployment; QUALITY_THRESHOLD restored to 0.4 (Claude scoring
  is accurate enough to use the threshold)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 requirements.txt           |  3 +++
 src/processor/processor.py | 38 ++++++++++++++++++++++++++++++++++----
 2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 7ceab3e..329db96 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,6 +23,9 @@ tiktoken==0.7.0
 numpy==1.26.4
 scikit-learn==1.5.1
 
+# Claude API (scoring)
+anthropic>=0.40.0
+
 # Utilities
 pydantic==2.8.0
 pydantic-settings==2.4.0
diff --git a/src/processor/processor.py b/src/processor/processor.py
index 041449f..294ac0f 100644
--- a/src/processor/processor.py
+++ b/src/processor/processor.py
@@ -216,7 +216,37 @@ class ContentProcessor:
         return stored
 
     async def _score_quality(self, chunk: str, topic: str) -> float:
-        """Score 0-1 how relevant chunk is to topic. Single axis — avoids off-topic content."""
+        """Score 0-1 relevance to topic. Uses Claude Haiku if API key set, else Ollama."""
+        if settings.anthropic_api_key:
+            return await self._score_with_claude(chunk, topic)
+        return await self._score_with_ollama(chunk, topic)
+
+    async def _score_with_claude(self, chunk: str, topic: str) -> float:
+        import anthropic
+        prompt = (
+            f'Rate 0-10 how relevant this text is to the topic "{topic}". '
+            f'Reply with only a number.\n\nText:\n{chunk[:500]}'
+        )
+        try:
+            client = anthropic.AsyncAnthropic(api_key=settings.anthropic_api_key)
+            msg = await client.messages.create(
+                model=settings.claude_model,
+                max_tokens=10,
+                messages=[{"role": "user", "content": prompt}]
+            )
+            response = msg.content[0].text.strip()
+            numbers = re.findall(r'\b(\d+(?:\.\d+)?)\b', response)
+            if numbers:
+                score = float(numbers[0])
+                normalized = min(1.0, score / 10.0)
+                logger.debug("Claude relevance score", raw=score, normalized=round(normalized, 2))
+                return normalized
+            return 0.6
+        except Exception as e:
+            logger.warning("Claude scoring failed, falling back to Ollama", error=str(e))
+            return await self._score_with_ollama(chunk, topic)
+
+    async def _score_with_ollama(self, chunk: str, topic: str) -> float:
         prompt = (
             f'Score 0-10: how relevant is this text to the topic "{topic}"?\n'
             f"0 = completely unrelated, 10 = directly and specifically about this topic.\n\n"
@@ -229,12 +259,12 @@ class ContentProcessor:
             if numbers:
                 score = float(numbers[0])
                 normalized = min(1.0, score / 10.0)
-                logger.debug("Relevance score", raw=score, normalized=round(normalized, 2))
+                logger.debug("Ollama relevance score", raw=score, normalized=round(normalized, 2))
                 return normalized
-            logger.debug("No number in relevance response", response=response[:80])
+            logger.debug("No number in Ollama relevance response", response=response[:80])
             return 0.6
         except Exception as e:
-            logger.warning("Relevance scoring failed", error=str(e))
+            logger.warning("Ollama relevance scoring failed", error=str(e))
             return 0.6
 
     async def rag_query(self, session_id: int, query: str, top_k: int = 20) -> str: