feat: fase 1 — top_k 30→80, pool 100→300, sin truncado, max_tokens 16000

2026-05-04 10:23:19 +00:00
parent b5518ac95a
commit 0d8aee63be
2 changed files with 3 additions and 8 deletions
@@ -156,7 +156,7 @@ class OutputGenerator:
        # RAG: get most relevant context for this output type
        query = self._get_rag_query(output_type, topic)
-        context = await self.processor.rag_query(session_id, query, top_k=30)
+        context = await self.processor.rag_query(session_id, query, top_k=80)
        if not context:
            # Fallback: use raw top chunks
@@ -166,11 +166,6 @@ class OutputGenerator:
        if not context:
            raise ValueError("No processed content available. Run /process first.")
        # Truncate context to avoid Ollama context limits
        context_words = context.split()
        if len(context_words) > 6000:
            context = " ".join(context_words[:6000]) + "\n\n[... additional material truncated ...]"
        backend = "Claude Haiku" if settings.anthropic_api_key else "Ollama"
        if progress_callback:
            await progress_callback(f"✍️ Generando {output_type} con {backend}... (2-5 min)")
@@ -201,7 +196,7 @@ class OutputGenerator:
    async def _generate_with_claude(self, prompt: str, system: str, output_type: OutputType,
                                     session_id: int | None = None) -> str:
        import anthropic
-        max_tokens = 4096 if output_type == OutputType.THREAD else 8192
+        max_tokens = 4096 if output_type == OutputType.THREAD else 16000
        try:
            client = anthropic.AsyncAnthropic(api_key=settings.anthropic_api_key)
            msg = await client.messages.create(
@@ -285,7 +285,7 @@ class ContentProcessor:
        query_embedding = await self.ollama.embed(query)
        # Get top quality chunks
-        chunks = await self.db.get_top_chunks(session_id, limit=100)
+        chunks = await self.db.get_top_chunks(session_id, limit=300)
        if query_embedding and chunks:
            # Rank by embedding similarity