diff --git a/src/bot/bot.py b/src/bot/bot.py index 7842f13..da35205 100644 --- a/src/bot/bot.py +++ b/src/bot/bot.py @@ -153,6 +153,7 @@ async def cmd_start(update: Update, ctx: ContextTypes.DEFAULT_TYPE): "`/sources` — List all sources found\n" "`/outputs` — List generated outputs\n" "`/export` — Exportar último output como PDF\n" + "`/publish` — Publicar último output en Ghost como borrador\n" "`/compare vs ` — Análisis comparativo\n" "`/costs` — Show API usage costs\n" "`/watch [h]` — Schedule periodic research\n" @@ -931,6 +932,77 @@ async def cmd_purge(update: Update, ctx: ContextTypes.DEFAULT_TYPE): await db_conn.close() +async def cmd_publish(update: Update, ctx: ContextTypes.DEFAULT_TYPE): + if not is_authorized(update.effective_user.id): + return + + chat_id = update.effective_chat.id + db_conn = await get_db() + db = ResearchDB(db_conn) + + try: + from src.generator.generator import GhostPublisher, _extract_title + + ghost = GhostPublisher() + if not ghost.is_configured(): + await update.message.reply_text( + "❌ Ghost no configurado. Asegúrate de que `GHOST_URL` y `GHOST_API_KEY` están definidos.", + parse_mode=ParseMode.MARKDOWN + ) + return + + cursor = await db_conn.execute( + "SELECT * FROM research_sessions WHERE telegram_chat_id = ? ORDER BY created_at DESC LIMIT 1", + (chat_id,) + ) + row = await cursor.fetchone() + if not row: + await update.message.reply_text("No hay sesiones. Usa /research primero.") + return + + session = dict(row) + outputs = await db.get_outputs(session["id"]) + if not outputs: + await update.message.reply_text( + "No hay outputs generados. Usa `/generate blog|report` primero.", + parse_mode=ParseMode.MARKDOWN + ) + return + + priority = ["blog_extended", "blog", "report_extended", "report", + "podcast_extended", "podcast", "thread"] + chosen = None + for ptype in priority: + for o in outputs: + if o["output_type"] == ptype: + chosen = o + break + if chosen: + break + if not chosen: + chosen = outputs[-1] + + msg = await update.message.reply_text("📤 Publicando en Ghost como borrador…") + + title = _extract_title(chosen["content"]) or session["topic"] + result = await ghost.publish_draft(title, chosen["content"]) + post = result["posts"][0] + admin_url = f"{ghost.url}/ghost/#/editor/post/{post['id']}" + + await msg.edit_text( + f"✅ *Publicado en Ghost como borrador*\n\n" + f"📝 Título: `{title}`\n" + f"🔗 Editar: {admin_url}", + parse_mode=ParseMode.MARKDOWN + ) + + except Exception as e: + logger.error("Publish to Ghost failed", error=str(e)) + await update.message.reply_text(f"❌ Error publicando en Ghost: {str(e)[:200]}") + finally: + await db_conn.close() + + async def cmd_compare(update: Update, ctx: ContextTypes.DEFAULT_TYPE): if not is_authorized(update.effective_user.id): return @@ -1097,6 +1169,7 @@ def create_bot() -> Application: app.add_handler(CommandHandler("process", cmd_process)) app.add_handler(CommandHandler("cancel", cmd_cancel)) app.add_handler(CommandHandler("purge", cmd_purge)) + app.add_handler(CommandHandler("publish", cmd_publish)) app.add_handler(CommandHandler("compare", cmd_compare)) return app diff --git a/src/config.py b/src/config.py index 03420de..c4af2e0 100644 --- a/src/config.py +++ b/src/config.py @@ -33,6 +33,10 @@ class Settings(BaseSettings): chunk_overlap: int = Field(100, env="CHUNK_OVERLAP") quality_threshold: float = Field(0.3, env="QUALITY_THRESHOLD") # 0-1, chunks below discarded + # Ghost CMS + ghost_url: Optional[str] = Field(None, env="GHOST_URL") + ghost_api_key: Optional[str] = Field(None, env="GHOST_API_KEY") + # Alerts cost_alert_threshold: float = Field(0.15, env="COST_ALERT_THRESHOLD") diff --git a/src/generator/generator.py b/src/generator/generator.py index 5c3236a..e41c772 100644 --- a/src/generator/generator.py +++ b/src/generator/generator.py @@ -2,6 +2,12 @@ ResearchOwl Generators Produces structured outputs from processed research using Claude or Ollama """ +import base64 +import hashlib +import hmac +import json +import time + import structlog from src.config import settings @@ -196,6 +202,92 @@ Material disponible (resumen): """ +# ─── Ghost CMS ──────────────────────────────────────────────────────────────── + +def _b64url(data: bytes | str) -> str: + if isinstance(data, str): + data = data.encode() + return base64.urlsafe_b64encode(data).rstrip(b"=").decode() + + +def _extract_title(content: str) -> str: + """Return first H1 heading from markdown, skipping the ResearchOwl header block.""" + in_header = False + for line in content.splitlines(): + stripped = line.strip() + if stripped == "---": + in_header = not in_header + continue + if in_header: + continue + if stripped.startswith("# ") and not stripped.startswith("## "): + return stripped[2:].strip() + return "" + + +def _strip_researchowl_header(content: str) -> str: + """Remove the ---...--- metadata block that ResearchOwl prepends to outputs.""" + lines = content.splitlines(keepends=True) + dashes_seen = 0 + for i, line in enumerate(lines): + if line.strip() == "---": + dashes_seen += 1 + if dashes_seen == 2: + return "".join(lines[i + 1:]).lstrip("\n") + return content + + +class GhostPublisher: + def __init__(self): + self.url = (settings.ghost_url or "").rstrip("/") + self.api_key = settings.ghost_api_key or "" + + def is_configured(self) -> bool: + return bool(self.url and self.api_key) + + def _make_token(self) -> str: + key_id, secret = self.api_key.split(":", 1) + now = int(time.time()) + header = _b64url(json.dumps({"alg": "HS256", "typ": "JWT", "kid": key_id})) + payload = _b64url(json.dumps({"iat": now, "exp": now + 300, "aud": "/admin/"})) + signing = f"{header}.{payload}" + sig = _b64url( + hmac.new(bytes.fromhex(secret), signing.encode(), hashlib.sha256).digest() + ) + return f"{signing}.{sig}" + + async def publish_draft(self, title: str, markdown_content: str, + tags: list[str] | None = None) -> dict: + import aiohttp as _aio + import markdown as _md + clean = _strip_researchowl_header(markdown_content) + html = _md.markdown(clean, extensions=["extra"]) + token = self._make_token() + body = { + "posts": [{ + "title": title, + "html": html, + "status": "draft", + "tags": [{"name": t} for t in (tags or ["investigacion"])], + }] + } + async with _aio.ClientSession() as sess: + async with sess.post( + f"{self.url}/ghost/api/admin/posts/", + json=body, + headers={ + "Authorization": f"Ghost {token}", + "Accept-Version": "v5.0", + }, + ) as resp: + if resp.status not in (200, 201): + text = await resp.text() + raise ValueError(f"Ghost API {resp.status}: {text[:300]}") + return await resp.json() + + +# ─── Output generation ──────────────────────────────────────────────────────── + class OutputGenerator: def __init__(self, db: ResearchDB, ollama: OllamaClient, processor: ContentProcessor): self.db = db @@ -250,8 +342,26 @@ class OutputGenerator: # Save to DB await self.db.save_output(session_id, output_type, full_output) + # Auto-publish to Ghost for blog outputs + ghost_notice = "" + if output_type in (OutputType.BLOG, OutputType.BLOG_EXTENDED): + ghost = GhostPublisher() + if ghost.is_configured(): + try: + title = _extract_title(full_output) or topic + result = await ghost.publish_draft(title, full_output) + post = result["posts"][0] + ghost_notice = ( + f"\n\n---\n" + f"📤 *Borrador publicado en Ghost*\n" + f"Editar: {ghost.url}/ghost/#/editor/post/{post['id']}" + ) + logger.info("Auto-published blog to Ghost", post_id=post["id"]) + except Exception as e: + logger.warning("Auto-publish to Ghost failed", error=str(e)) + logger.info("Output generated", type=output_type, length=len(full_output)) - return full_output + return full_output + ghost_notice async def _generate(self, prompt: str, system: str, output_type: OutputType, session_id: int | None = None) -> str: @@ -403,9 +513,28 @@ class OutputGenerator: full_output = header + "\n\n" + full_content await self.db.save_output(session_id, output_type, full_output) + + # Auto-publish to Ghost for extended blog outputs + ghost_notice = "" + if output_type == OutputType.BLOG_EXTENDED: + ghost = GhostPublisher() + if ghost.is_configured(): + try: + title = _extract_title(full_output) or topic + result = await ghost.publish_draft(title, full_output) + post = result["posts"][0] + ghost_notice = ( + f"\n\n---\n" + f"📤 *Borrador publicado en Ghost*\n" + f"Editar: {ghost.url}/ghost/#/editor/post/{post['id']}" + ) + logger.info("Auto-published extended blog to Ghost", post_id=post["id"]) + except Exception as e: + logger.warning("Auto-publish to Ghost failed (extended)", error=str(e)) + logger.info("Extended output generated", type=output_type, sections=len(sections), length=len(full_output)) - return full_output + return full_output + ghost_notice async def _generate_raw(self, prompt: str, session_id: int | None = None) -> str: