feat: fase 3 — export PDF con reportlab + /export command
Build & Deploy ResearchOwl / build-and-push (push) Successful in 1m2s

This commit is contained in:
ChemaVX
2026-05-04 12:57:21 +00:00
parent c33bb5337d
commit 4c7f5b521b
5 changed files with 198 additions and 0 deletions
+1
View File
@@ -6,6 +6,7 @@ WORKDIR /app
RUN apt-get update && apt-get install -y \
gcc g++ \
libxml2-dev libxslt-dev \
libfreetype6-dev \
&& rm -rf /var/lib/apt/lists/*
COPY requirements.txt .
+4
View File
@@ -26,6 +26,10 @@ scikit-learn==1.5.1
# Claude API (scoring)
anthropic>=0.40.0
# PDF export
markdown==3.7
reportlab==4.2.5
# Utilities
pydantic==2.8.0
pydantic-settings==2.4.0
+79
View File
@@ -146,6 +146,7 @@ async def cmd_start(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
" Extended: podcast_extended|blog_extended|report_extended\n"
"`/sources` — List all sources found\n"
"`/outputs` — List generated outputs\n"
"`/export` — Exportar último output como PDF\n"
"`/costs` — Show API usage costs\n"
"`/watch <topic> [h]` — Schedule periodic research\n"
"`/unwatch <topic>` — Remove a watch\n"
@@ -760,6 +761,83 @@ async def _on_startup(app: Application) -> None:
await _start_scheduler(app)
async def cmd_export(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not is_authorized(update.effective_user.id):
return
chat_id = update.effective_chat.id
db_conn = await get_db()
db = ResearchDB(db_conn)
try:
session = await db.get_latest_session(chat_id)
if not session:
await update.message.reply_text("No hay sesiones de investigación.")
return
session_id = session["id"]
topic = session["topic"]
outputs = await db.get_outputs(session_id)
if not outputs:
await update.message.reply_text(
"No hay outputs generados. Usa `/generate <tipo>` primero.",
parse_mode=ParseMode.MARKDOWN
)
return
priority = [
"report_extended", "blog_extended", "podcast_extended",
"report", "blog", "podcast", "thread",
]
chosen = None
for ptype in priority:
for o in outputs:
if o["output_type"] == ptype:
chosen = o
break
if chosen:
break
if not chosen:
chosen = outputs[0]
msg = await update.message.reply_text(
f"📄 Generando PDF para `{topic}`…",
parse_mode=ParseMode.MARKDOWN
)
try:
from src.generator.generator import generate_pdf
pdf_bytes = generate_pdf(chosen["content"], title=topic)
except ImportError:
await msg.edit_text("❌ reportlab no está instalado. Ejecuta: `pip install reportlab`")
return
except Exception as e:
await msg.edit_text(f"❌ Error generando PDF: {str(e)[:200]}")
return
safe_topic = topic[:40].replace(" ", "_").replace("/", "-")
filename = f"researchowl_{safe_topic}_{chosen['output_type']}.pdf"
import io
await update.message.reply_document(
document=io.BytesIO(pdf_bytes),
filename=filename,
caption=f"📄 *{chosen['output_type'].upper()}* — {topic}\nExportado por ResearchOwl 🦉",
parse_mode=ParseMode.MARKDOWN
)
try:
await msg.delete()
except Exception:
pass
except Exception as e:
logger.error("Export failed", error=str(e))
await update.message.reply_text(f"❌ Export failed: {str(e)[:200]}")
finally:
await db_conn.close()
async def cmd_purge(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not is_authorized(update.effective_user.id):
return
@@ -820,6 +898,7 @@ def create_bot() -> Application:
app.add_handler(CommandHandler("generate", cmd_generate))
app.add_handler(CommandHandler("sources", cmd_sources))
app.add_handler(CommandHandler("outputs", cmd_outputs))
app.add_handler(CommandHandler("export", cmd_export))
app.add_handler(CommandHandler("costs", cmd_costs))
app.add_handler(CommandHandler("watch", cmd_watch))
app.add_handler(CommandHandler("unwatch", cmd_unwatch))
+8
View File
@@ -151,6 +151,14 @@ class ResearchDB:
row = await cursor.fetchone()
return dict(row) if row else None
async def get_latest_session(self, chat_id: int) -> Optional[dict]:
cursor = await self.db.execute(
"SELECT * FROM research_sessions WHERE telegram_chat_id = ? ORDER BY created_at DESC LIMIT 1",
(chat_id,)
)
row = await cursor.fetchone()
return dict(row) if row else None
async def get_active_session(self, chat_id: int) -> Optional[dict]:
cursor = await self.db.execute(
"""SELECT * FROM research_sessions
+106
View File
@@ -445,3 +445,109 @@ Iterations: {session.get('iterations', 0)}
Total words researched: {session.get('total_words', 0):,}
---
"""
def generate_pdf(content: str, title: str = "ResearchOwl Output") -> bytes:
try:
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import cm
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, HRFlowable
from reportlab.lib.enums import TA_LEFT
from reportlab.lib import colors
import io
import re
except ImportError:
raise ImportError("reportlab is required for PDF export — pip install reportlab")
buf = io.BytesIO()
doc = SimpleDocTemplate(
buf,
pagesize=A4,
rightMargin=2 * cm,
leftMargin=2 * cm,
topMargin=2.5 * cm,
bottomMargin=2 * cm,
title=title,
)
base = getSampleStyleSheet()
normal = ParagraphStyle("RO_Normal", parent=base["Normal"],
fontSize=10, leading=14, spaceAfter=4)
h1 = ParagraphStyle("RO_H1", parent=base["Heading1"],
fontSize=18, spaceBefore=12, spaceAfter=6,
textColor=colors.HexColor("#1a1a2e"))
h2 = ParagraphStyle("RO_H2", parent=base["Heading2"],
fontSize=14, spaceBefore=10, spaceAfter=4,
textColor=colors.HexColor("#16213e"))
h3 = ParagraphStyle("RO_H3", parent=base["Heading3"],
fontSize=12, spaceBefore=8, spaceAfter=4)
code_style = ParagraphStyle("RO_Code", parent=base["Code"],
fontSize=9, leading=12, fontName="Courier",
backColor=colors.HexColor("#f4f4f4"), spaceAfter=4)
bullet_style = ParagraphStyle("RO_Bullet", parent=normal,
leftIndent=20, bulletIndent=10, spaceAfter=2)
def md_to_para(text: str) -> str:
text = text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
text = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', text)
text = re.sub(r'__(.+?)__', r'<b>\1</b>', text)
text = re.sub(r'\*(.+?)\*', r'<i>\1</i>', text)
text = re.sub(r'_(.+?)_', r'<i>\1</i>', text)
text = re.sub(r'`(.+?)`', r'<font name="Courier">\1</font>', text)
return text
story = []
lines = content.split("\n")
in_code = False
code_buf = []
for line in lines:
if line.startswith("```"):
if not in_code:
in_code = True
code_buf = []
else:
in_code = False
try:
story.append(Paragraph(
"<br/>".join(l.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
for l in code_buf),
code_style
))
except Exception:
pass
continue
if in_code:
code_buf.append(line)
continue
if re.match(r'^[-*_]{3,}$', line.strip()):
story.append(HRFlowable(width="100%", thickness=0.5,
color=colors.grey, spaceAfter=6))
continue
try:
if line.startswith("### "):
story.append(Paragraph(md_to_para(line[4:]), h3))
elif line.startswith("## "):
story.append(Paragraph(md_to_para(line[3:]), h2))
elif line.startswith("# "):
story.append(Paragraph(md_to_para(line[2:]), h1))
elif re.match(r'^[-*+] ', line):
story.append(Paragraph("" + md_to_para(line[2:]), bullet_style))
elif re.match(r'^\d+\. ', line):
story.append(Paragraph(md_to_para(line), bullet_style))
elif line.strip() == "":
story.append(Spacer(1, 6))
else:
story.append(Paragraph(md_to_para(line), normal))
except Exception:
try:
story.append(Paragraph(line[:300], normal))
except Exception:
pass
doc.build(story)
return buf.getvalue()