feat: Claude Haiku for relevance scoring, fallback to Ollama
Build & Deploy ResearchOwl / build-and-push (push) Successful in 45s
Build & Deploy ResearchOwl / build-and-push (push) Successful in 45s
processor.py: split _score_quality into _score_with_claude and _score_with_ollama; if ANTHROPIC_API_KEY is set, use Claude Haiku (claude-haiku-4-5) with max_tokens=10 for fast, accurate 0-10 relevance scoring; falls back to Ollama on any error requirements.txt: add anthropic>=0.40.0 k8s: ANTHROPIC_API_KEY added to researchowl-secrets and mounted in deployment; QUALITY_THRESHOLD restored to 0.4 (Claude scoring is accurate enough to use the threshold) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -23,6 +23,9 @@ tiktoken==0.7.0
|
|||||||
numpy==1.26.4
|
numpy==1.26.4
|
||||||
scikit-learn==1.5.1
|
scikit-learn==1.5.1
|
||||||
|
|
||||||
|
# Claude API (scoring)
|
||||||
|
anthropic>=0.40.0
|
||||||
|
|
||||||
# Utilities
|
# Utilities
|
||||||
pydantic==2.8.0
|
pydantic==2.8.0
|
||||||
pydantic-settings==2.4.0
|
pydantic-settings==2.4.0
|
||||||
|
|||||||
@@ -216,7 +216,37 @@ class ContentProcessor:
|
|||||||
return stored
|
return stored
|
||||||
|
|
||||||
async def _score_quality(self, chunk: str, topic: str) -> float:
|
async def _score_quality(self, chunk: str, topic: str) -> float:
|
||||||
"""Score 0-1 how relevant chunk is to topic. Single axis — avoids off-topic content."""
|
"""Score 0-1 relevance to topic. Uses Claude Haiku if API key set, else Ollama."""
|
||||||
|
if settings.anthropic_api_key:
|
||||||
|
return await self._score_with_claude(chunk, topic)
|
||||||
|
return await self._score_with_ollama(chunk, topic)
|
||||||
|
|
||||||
|
async def _score_with_claude(self, chunk: str, topic: str) -> float:
|
||||||
|
import anthropic
|
||||||
|
prompt = (
|
||||||
|
f'Rate 0-10 how relevant this text is to the topic "{topic}". '
|
||||||
|
f'Reply with only a number.\n\nText:\n{chunk[:500]}'
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
client = anthropic.AsyncAnthropic(api_key=settings.anthropic_api_key)
|
||||||
|
msg = await client.messages.create(
|
||||||
|
model=settings.claude_model,
|
||||||
|
max_tokens=10,
|
||||||
|
messages=[{"role": "user", "content": prompt}]
|
||||||
|
)
|
||||||
|
response = msg.content[0].text.strip()
|
||||||
|
numbers = re.findall(r'\b(\d+(?:\.\d+)?)\b', response)
|
||||||
|
if numbers:
|
||||||
|
score = float(numbers[0])
|
||||||
|
normalized = min(1.0, score / 10.0)
|
||||||
|
logger.debug("Claude relevance score", raw=score, normalized=round(normalized, 2))
|
||||||
|
return normalized
|
||||||
|
return 0.6
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Claude scoring failed, falling back to Ollama", error=str(e))
|
||||||
|
return await self._score_with_ollama(chunk, topic)
|
||||||
|
|
||||||
|
async def _score_with_ollama(self, chunk: str, topic: str) -> float:
|
||||||
prompt = (
|
prompt = (
|
||||||
f'Score 0-10: how relevant is this text to the topic "{topic}"?\n'
|
f'Score 0-10: how relevant is this text to the topic "{topic}"?\n'
|
||||||
f"0 = completely unrelated, 10 = directly and specifically about this topic.\n\n"
|
f"0 = completely unrelated, 10 = directly and specifically about this topic.\n\n"
|
||||||
@@ -229,12 +259,12 @@ class ContentProcessor:
|
|||||||
if numbers:
|
if numbers:
|
||||||
score = float(numbers[0])
|
score = float(numbers[0])
|
||||||
normalized = min(1.0, score / 10.0)
|
normalized = min(1.0, score / 10.0)
|
||||||
logger.debug("Relevance score", raw=score, normalized=round(normalized, 2))
|
logger.debug("Ollama relevance score", raw=score, normalized=round(normalized, 2))
|
||||||
return normalized
|
return normalized
|
||||||
logger.debug("No number in relevance response", response=response[:80])
|
logger.debug("No number in Ollama relevance response", response=response[:80])
|
||||||
return 0.6
|
return 0.6
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Relevance scoring failed", error=str(e))
|
logger.warning("Ollama relevance scoring failed", error=str(e))
|
||||||
return 0.6
|
return 0.6
|
||||||
|
|
||||||
async def rag_query(self, session_id: int, query: str, top_k: int = 20) -> str:
|
async def rag_query(self, session_id: int, query: str, top_k: int = 20) -> str:
|
||||||
|
|||||||
Reference in New Issue
Block a user