diff --git a/requirements.txt b/requirements.txt index 7ceab3e..329db96 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,6 +23,9 @@ tiktoken==0.7.0 numpy==1.26.4 scikit-learn==1.5.1 +# Claude API (scoring) +anthropic>=0.40.0 + # Utilities pydantic==2.8.0 pydantic-settings==2.4.0 diff --git a/src/processor/processor.py b/src/processor/processor.py index 041449f..294ac0f 100644 --- a/src/processor/processor.py +++ b/src/processor/processor.py @@ -216,7 +216,37 @@ class ContentProcessor: return stored async def _score_quality(self, chunk: str, topic: str) -> float: - """Score 0-1 how relevant chunk is to topic. Single axis — avoids off-topic content.""" + """Score 0-1 relevance to topic. Uses Claude Haiku if API key set, else Ollama.""" + if settings.anthropic_api_key: + return await self._score_with_claude(chunk, topic) + return await self._score_with_ollama(chunk, topic) + + async def _score_with_claude(self, chunk: str, topic: str) -> float: + import anthropic + prompt = ( + f'Rate 0-10 how relevant this text is to the topic "{topic}". ' + f'Reply with only a number.\n\nText:\n{chunk[:500]}' + ) + try: + client = anthropic.AsyncAnthropic(api_key=settings.anthropic_api_key) + msg = await client.messages.create( + model=settings.claude_model, + max_tokens=10, + messages=[{"role": "user", "content": prompt}] + ) + response = msg.content[0].text.strip() + numbers = re.findall(r'\b(\d+(?:\.\d+)?)\b', response) + if numbers: + score = float(numbers[0]) + normalized = min(1.0, score / 10.0) + logger.debug("Claude relevance score", raw=score, normalized=round(normalized, 2)) + return normalized + return 0.6 + except Exception as e: + logger.warning("Claude scoring failed, falling back to Ollama", error=str(e)) + return await self._score_with_ollama(chunk, topic) + + async def _score_with_ollama(self, chunk: str, topic: str) -> float: prompt = ( f'Score 0-10: how relevant is this text to the topic "{topic}"?\n' f"0 = completely unrelated, 10 = directly and specifically about this topic.\n\n" @@ -229,12 +259,12 @@ class ContentProcessor: if numbers: score = float(numbers[0]) normalized = min(1.0, score / 10.0) - logger.debug("Relevance score", raw=score, normalized=round(normalized, 2)) + logger.debug("Ollama relevance score", raw=score, normalized=round(normalized, 2)) return normalized - logger.debug("No number in relevance response", response=response[:80]) + logger.debug("No number in Ollama relevance response", response=response[:80]) return 0.6 except Exception as e: - logger.warning("Relevance scoring failed", error=str(e)) + logger.warning("Ollama relevance scoring failed", error=str(e)) return 0.6 async def rag_query(self, session_id: int, query: str, top_k: int = 20) -> str: