feat: Ghost CMS integration — auto-publish blog + /publish command
Build & Deploy ResearchOwl / build-and-push (push) Successful in 6s

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
ChemaVX
2026-05-08 10:26:22 +00:00
parent 94d209dd8a
commit 83eb2359be
3 changed files with 208 additions and 2 deletions
+73
View File
@@ -153,6 +153,7 @@ async def cmd_start(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
"`/sources` — List all sources found\n" "`/sources` — List all sources found\n"
"`/outputs` — List generated outputs\n" "`/outputs` — List generated outputs\n"
"`/export` — Exportar último output como PDF\n" "`/export` — Exportar último output como PDF\n"
"`/publish` — Publicar último output en Ghost como borrador\n"
"`/compare <tema1> vs <tema2>` — Análisis comparativo\n" "`/compare <tema1> vs <tema2>` — Análisis comparativo\n"
"`/costs` — Show API usage costs\n" "`/costs` — Show API usage costs\n"
"`/watch <topic> [h]` — Schedule periodic research\n" "`/watch <topic> [h]` — Schedule periodic research\n"
@@ -931,6 +932,77 @@ async def cmd_purge(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
await db_conn.close() await db_conn.close()
async def cmd_publish(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not is_authorized(update.effective_user.id):
return
chat_id = update.effective_chat.id
db_conn = await get_db()
db = ResearchDB(db_conn)
try:
from src.generator.generator import GhostPublisher, _extract_title
ghost = GhostPublisher()
if not ghost.is_configured():
await update.message.reply_text(
"❌ Ghost no configurado. Asegúrate de que `GHOST_URL` y `GHOST_API_KEY` están definidos.",
parse_mode=ParseMode.MARKDOWN
)
return
cursor = await db_conn.execute(
"SELECT * FROM research_sessions WHERE telegram_chat_id = ? ORDER BY created_at DESC LIMIT 1",
(chat_id,)
)
row = await cursor.fetchone()
if not row:
await update.message.reply_text("No hay sesiones. Usa /research primero.")
return
session = dict(row)
outputs = await db.get_outputs(session["id"])
if not outputs:
await update.message.reply_text(
"No hay outputs generados. Usa `/generate blog|report` primero.",
parse_mode=ParseMode.MARKDOWN
)
return
priority = ["blog_extended", "blog", "report_extended", "report",
"podcast_extended", "podcast", "thread"]
chosen = None
for ptype in priority:
for o in outputs:
if o["output_type"] == ptype:
chosen = o
break
if chosen:
break
if not chosen:
chosen = outputs[-1]
msg = await update.message.reply_text("📤 Publicando en Ghost como borrador…")
title = _extract_title(chosen["content"]) or session["topic"]
result = await ghost.publish_draft(title, chosen["content"])
post = result["posts"][0]
admin_url = f"{ghost.url}/ghost/#/editor/post/{post['id']}"
await msg.edit_text(
f"✅ *Publicado en Ghost como borrador*\n\n"
f"📝 Título: `{title}`\n"
f"🔗 Editar: {admin_url}",
parse_mode=ParseMode.MARKDOWN
)
except Exception as e:
logger.error("Publish to Ghost failed", error=str(e))
await update.message.reply_text(f"❌ Error publicando en Ghost: {str(e)[:200]}")
finally:
await db_conn.close()
async def cmd_compare(update: Update, ctx: ContextTypes.DEFAULT_TYPE): async def cmd_compare(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not is_authorized(update.effective_user.id): if not is_authorized(update.effective_user.id):
return return
@@ -1097,6 +1169,7 @@ def create_bot() -> Application:
app.add_handler(CommandHandler("process", cmd_process)) app.add_handler(CommandHandler("process", cmd_process))
app.add_handler(CommandHandler("cancel", cmd_cancel)) app.add_handler(CommandHandler("cancel", cmd_cancel))
app.add_handler(CommandHandler("purge", cmd_purge)) app.add_handler(CommandHandler("purge", cmd_purge))
app.add_handler(CommandHandler("publish", cmd_publish))
app.add_handler(CommandHandler("compare", cmd_compare)) app.add_handler(CommandHandler("compare", cmd_compare))
return app return app
+4
View File
@@ -33,6 +33,10 @@ class Settings(BaseSettings):
chunk_overlap: int = Field(100, env="CHUNK_OVERLAP") chunk_overlap: int = Field(100, env="CHUNK_OVERLAP")
quality_threshold: float = Field(0.3, env="QUALITY_THRESHOLD") # 0-1, chunks below discarded quality_threshold: float = Field(0.3, env="QUALITY_THRESHOLD") # 0-1, chunks below discarded
# Ghost CMS
ghost_url: Optional[str] = Field(None, env="GHOST_URL")
ghost_api_key: Optional[str] = Field(None, env="GHOST_API_KEY")
# Alerts # Alerts
cost_alert_threshold: float = Field(0.15, env="COST_ALERT_THRESHOLD") cost_alert_threshold: float = Field(0.15, env="COST_ALERT_THRESHOLD")
+131 -2
View File
@@ -2,6 +2,12 @@
ResearchOwl Generators ResearchOwl Generators
Produces structured outputs from processed research using Claude or Ollama Produces structured outputs from processed research using Claude or Ollama
""" """
import base64
import hashlib
import hmac
import json
import time
import structlog import structlog
from src.config import settings from src.config import settings
@@ -196,6 +202,92 @@ Material disponible (resumen):
""" """
# ─── Ghost CMS ────────────────────────────────────────────────────────────────
def _b64url(data: bytes | str) -> str:
if isinstance(data, str):
data = data.encode()
return base64.urlsafe_b64encode(data).rstrip(b"=").decode()
def _extract_title(content: str) -> str:
"""Return first H1 heading from markdown, skipping the ResearchOwl header block."""
in_header = False
for line in content.splitlines():
stripped = line.strip()
if stripped == "---":
in_header = not in_header
continue
if in_header:
continue
if stripped.startswith("# ") and not stripped.startswith("## "):
return stripped[2:].strip()
return ""
def _strip_researchowl_header(content: str) -> str:
"""Remove the ---...--- metadata block that ResearchOwl prepends to outputs."""
lines = content.splitlines(keepends=True)
dashes_seen = 0
for i, line in enumerate(lines):
if line.strip() == "---":
dashes_seen += 1
if dashes_seen == 2:
return "".join(lines[i + 1:]).lstrip("\n")
return content
class GhostPublisher:
def __init__(self):
self.url = (settings.ghost_url or "").rstrip("/")
self.api_key = settings.ghost_api_key or ""
def is_configured(self) -> bool:
return bool(self.url and self.api_key)
def _make_token(self) -> str:
key_id, secret = self.api_key.split(":", 1)
now = int(time.time())
header = _b64url(json.dumps({"alg": "HS256", "typ": "JWT", "kid": key_id}))
payload = _b64url(json.dumps({"iat": now, "exp": now + 300, "aud": "/admin/"}))
signing = f"{header}.{payload}"
sig = _b64url(
hmac.new(bytes.fromhex(secret), signing.encode(), hashlib.sha256).digest()
)
return f"{signing}.{sig}"
async def publish_draft(self, title: str, markdown_content: str,
tags: list[str] | None = None) -> dict:
import aiohttp as _aio
import markdown as _md
clean = _strip_researchowl_header(markdown_content)
html = _md.markdown(clean, extensions=["extra"])
token = self._make_token()
body = {
"posts": [{
"title": title,
"html": html,
"status": "draft",
"tags": [{"name": t} for t in (tags or ["investigacion"])],
}]
}
async with _aio.ClientSession() as sess:
async with sess.post(
f"{self.url}/ghost/api/admin/posts/",
json=body,
headers={
"Authorization": f"Ghost {token}",
"Accept-Version": "v5.0",
},
) as resp:
if resp.status not in (200, 201):
text = await resp.text()
raise ValueError(f"Ghost API {resp.status}: {text[:300]}")
return await resp.json()
# ─── Output generation ────────────────────────────────────────────────────────
class OutputGenerator: class OutputGenerator:
def __init__(self, db: ResearchDB, ollama: OllamaClient, processor: ContentProcessor): def __init__(self, db: ResearchDB, ollama: OllamaClient, processor: ContentProcessor):
self.db = db self.db = db
@@ -250,8 +342,26 @@ class OutputGenerator:
# Save to DB # Save to DB
await self.db.save_output(session_id, output_type, full_output) await self.db.save_output(session_id, output_type, full_output)
# Auto-publish to Ghost for blog outputs
ghost_notice = ""
if output_type in (OutputType.BLOG, OutputType.BLOG_EXTENDED):
ghost = GhostPublisher()
if ghost.is_configured():
try:
title = _extract_title(full_output) or topic
result = await ghost.publish_draft(title, full_output)
post = result["posts"][0]
ghost_notice = (
f"\n\n---\n"
f"📤 *Borrador publicado en Ghost*\n"
f"Editar: {ghost.url}/ghost/#/editor/post/{post['id']}"
)
logger.info("Auto-published blog to Ghost", post_id=post["id"])
except Exception as e:
logger.warning("Auto-publish to Ghost failed", error=str(e))
logger.info("Output generated", type=output_type, length=len(full_output)) logger.info("Output generated", type=output_type, length=len(full_output))
return full_output return full_output + ghost_notice
async def _generate(self, prompt: str, system: str, output_type: OutputType, async def _generate(self, prompt: str, system: str, output_type: OutputType,
session_id: int | None = None) -> str: session_id: int | None = None) -> str:
@@ -403,9 +513,28 @@ class OutputGenerator:
full_output = header + "\n\n" + full_content full_output = header + "\n\n" + full_content
await self.db.save_output(session_id, output_type, full_output) await self.db.save_output(session_id, output_type, full_output)
# Auto-publish to Ghost for extended blog outputs
ghost_notice = ""
if output_type == OutputType.BLOG_EXTENDED:
ghost = GhostPublisher()
if ghost.is_configured():
try:
title = _extract_title(full_output) or topic
result = await ghost.publish_draft(title, full_output)
post = result["posts"][0]
ghost_notice = (
f"\n\n---\n"
f"📤 *Borrador publicado en Ghost*\n"
f"Editar: {ghost.url}/ghost/#/editor/post/{post['id']}"
)
logger.info("Auto-published extended blog to Ghost", post_id=post["id"])
except Exception as e:
logger.warning("Auto-publish to Ghost failed (extended)", error=str(e))
logger.info("Extended output generated", type=output_type, logger.info("Extended output generated", type=output_type,
sections=len(sections), length=len(full_output)) sections=len(sections), length=len(full_output))
return full_output return full_output + ghost_notice
async def _generate_raw(self, prompt: str, async def _generate_raw(self, prompt: str,
session_id: int | None = None) -> str: session_id: int | None = None) -> str: