feat: fase 3 — export PDF con reportlab + /export command
Build & Deploy ResearchOwl / build-and-push (push) Successful in 1m2s
Build & Deploy ResearchOwl / build-and-push (push) Successful in 1m2s
This commit is contained in:
@@ -6,6 +6,7 @@ WORKDIR /app
|
||||
RUN apt-get update && apt-get install -y \
|
||||
gcc g++ \
|
||||
libxml2-dev libxslt-dev \
|
||||
libfreetype6-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY requirements.txt .
|
||||
|
||||
@@ -26,6 +26,10 @@ scikit-learn==1.5.1
|
||||
# Claude API (scoring)
|
||||
anthropic>=0.40.0
|
||||
|
||||
# PDF export
|
||||
markdown==3.7
|
||||
reportlab==4.2.5
|
||||
|
||||
# Utilities
|
||||
pydantic==2.8.0
|
||||
pydantic-settings==2.4.0
|
||||
|
||||
@@ -146,6 +146,7 @@ async def cmd_start(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
||||
" Extended: podcast_extended|blog_extended|report_extended\n"
|
||||
"`/sources` — List all sources found\n"
|
||||
"`/outputs` — List generated outputs\n"
|
||||
"`/export` — Exportar último output como PDF\n"
|
||||
"`/costs` — Show API usage costs\n"
|
||||
"`/watch <topic> [h]` — Schedule periodic research\n"
|
||||
"`/unwatch <topic>` — Remove a watch\n"
|
||||
@@ -760,6 +761,83 @@ async def _on_startup(app: Application) -> None:
|
||||
await _start_scheduler(app)
|
||||
|
||||
|
||||
async def cmd_export(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
||||
if not is_authorized(update.effective_user.id):
|
||||
return
|
||||
|
||||
chat_id = update.effective_chat.id
|
||||
db_conn = await get_db()
|
||||
db = ResearchDB(db_conn)
|
||||
|
||||
try:
|
||||
session = await db.get_latest_session(chat_id)
|
||||
if not session:
|
||||
await update.message.reply_text("No hay sesiones de investigación.")
|
||||
return
|
||||
|
||||
session_id = session["id"]
|
||||
topic = session["topic"]
|
||||
|
||||
outputs = await db.get_outputs(session_id)
|
||||
if not outputs:
|
||||
await update.message.reply_text(
|
||||
"No hay outputs generados. Usa `/generate <tipo>` primero.",
|
||||
parse_mode=ParseMode.MARKDOWN
|
||||
)
|
||||
return
|
||||
|
||||
priority = [
|
||||
"report_extended", "blog_extended", "podcast_extended",
|
||||
"report", "blog", "podcast", "thread",
|
||||
]
|
||||
chosen = None
|
||||
for ptype in priority:
|
||||
for o in outputs:
|
||||
if o["output_type"] == ptype:
|
||||
chosen = o
|
||||
break
|
||||
if chosen:
|
||||
break
|
||||
if not chosen:
|
||||
chosen = outputs[0]
|
||||
|
||||
msg = await update.message.reply_text(
|
||||
f"📄 Generando PDF para `{topic}`…",
|
||||
parse_mode=ParseMode.MARKDOWN
|
||||
)
|
||||
|
||||
try:
|
||||
from src.generator.generator import generate_pdf
|
||||
pdf_bytes = generate_pdf(chosen["content"], title=topic)
|
||||
except ImportError:
|
||||
await msg.edit_text("❌ reportlab no está instalado. Ejecuta: `pip install reportlab`")
|
||||
return
|
||||
except Exception as e:
|
||||
await msg.edit_text(f"❌ Error generando PDF: {str(e)[:200]}")
|
||||
return
|
||||
|
||||
safe_topic = topic[:40].replace(" ", "_").replace("/", "-")
|
||||
filename = f"researchowl_{safe_topic}_{chosen['output_type']}.pdf"
|
||||
|
||||
import io
|
||||
await update.message.reply_document(
|
||||
document=io.BytesIO(pdf_bytes),
|
||||
filename=filename,
|
||||
caption=f"📄 *{chosen['output_type'].upper()}* — {topic}\nExportado por ResearchOwl 🦉",
|
||||
parse_mode=ParseMode.MARKDOWN
|
||||
)
|
||||
try:
|
||||
await msg.delete()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Export failed", error=str(e))
|
||||
await update.message.reply_text(f"❌ Export failed: {str(e)[:200]}")
|
||||
finally:
|
||||
await db_conn.close()
|
||||
|
||||
|
||||
async def cmd_purge(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
||||
if not is_authorized(update.effective_user.id):
|
||||
return
|
||||
@@ -820,6 +898,7 @@ def create_bot() -> Application:
|
||||
app.add_handler(CommandHandler("generate", cmd_generate))
|
||||
app.add_handler(CommandHandler("sources", cmd_sources))
|
||||
app.add_handler(CommandHandler("outputs", cmd_outputs))
|
||||
app.add_handler(CommandHandler("export", cmd_export))
|
||||
app.add_handler(CommandHandler("costs", cmd_costs))
|
||||
app.add_handler(CommandHandler("watch", cmd_watch))
|
||||
app.add_handler(CommandHandler("unwatch", cmd_unwatch))
|
||||
|
||||
@@ -151,6 +151,14 @@ class ResearchDB:
|
||||
row = await cursor.fetchone()
|
||||
return dict(row) if row else None
|
||||
|
||||
async def get_latest_session(self, chat_id: int) -> Optional[dict]:
|
||||
cursor = await self.db.execute(
|
||||
"SELECT * FROM research_sessions WHERE telegram_chat_id = ? ORDER BY created_at DESC LIMIT 1",
|
||||
(chat_id,)
|
||||
)
|
||||
row = await cursor.fetchone()
|
||||
return dict(row) if row else None
|
||||
|
||||
async def get_active_session(self, chat_id: int) -> Optional[dict]:
|
||||
cursor = await self.db.execute(
|
||||
"""SELECT * FROM research_sessions
|
||||
|
||||
@@ -445,3 +445,109 @@ Iterations: {session.get('iterations', 0)}
|
||||
Total words researched: {session.get('total_words', 0):,}
|
||||
---
|
||||
"""
|
||||
|
||||
|
||||
def generate_pdf(content: str, title: str = "ResearchOwl Output") -> bytes:
|
||||
try:
|
||||
from reportlab.lib.pagesizes import A4
|
||||
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
||||
from reportlab.lib.units import cm
|
||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, HRFlowable
|
||||
from reportlab.lib.enums import TA_LEFT
|
||||
from reportlab.lib import colors
|
||||
import io
|
||||
import re
|
||||
except ImportError:
|
||||
raise ImportError("reportlab is required for PDF export — pip install reportlab")
|
||||
|
||||
buf = io.BytesIO()
|
||||
doc = SimpleDocTemplate(
|
||||
buf,
|
||||
pagesize=A4,
|
||||
rightMargin=2 * cm,
|
||||
leftMargin=2 * cm,
|
||||
topMargin=2.5 * cm,
|
||||
bottomMargin=2 * cm,
|
||||
title=title,
|
||||
)
|
||||
|
||||
base = getSampleStyleSheet()
|
||||
normal = ParagraphStyle("RO_Normal", parent=base["Normal"],
|
||||
fontSize=10, leading=14, spaceAfter=4)
|
||||
h1 = ParagraphStyle("RO_H1", parent=base["Heading1"],
|
||||
fontSize=18, spaceBefore=12, spaceAfter=6,
|
||||
textColor=colors.HexColor("#1a1a2e"))
|
||||
h2 = ParagraphStyle("RO_H2", parent=base["Heading2"],
|
||||
fontSize=14, spaceBefore=10, spaceAfter=4,
|
||||
textColor=colors.HexColor("#16213e"))
|
||||
h3 = ParagraphStyle("RO_H3", parent=base["Heading3"],
|
||||
fontSize=12, spaceBefore=8, spaceAfter=4)
|
||||
code_style = ParagraphStyle("RO_Code", parent=base["Code"],
|
||||
fontSize=9, leading=12, fontName="Courier",
|
||||
backColor=colors.HexColor("#f4f4f4"), spaceAfter=4)
|
||||
bullet_style = ParagraphStyle("RO_Bullet", parent=normal,
|
||||
leftIndent=20, bulletIndent=10, spaceAfter=2)
|
||||
|
||||
def md_to_para(text: str) -> str:
|
||||
text = text.replace("&", "&").replace("<", "<").replace(">", ">")
|
||||
text = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', text)
|
||||
text = re.sub(r'__(.+?)__', r'<b>\1</b>', text)
|
||||
text = re.sub(r'\*(.+?)\*', r'<i>\1</i>', text)
|
||||
text = re.sub(r'_(.+?)_', r'<i>\1</i>', text)
|
||||
text = re.sub(r'`(.+?)`', r'<font name="Courier">\1</font>', text)
|
||||
return text
|
||||
|
||||
story = []
|
||||
lines = content.split("\n")
|
||||
in_code = False
|
||||
code_buf = []
|
||||
|
||||
for line in lines:
|
||||
if line.startswith("```"):
|
||||
if not in_code:
|
||||
in_code = True
|
||||
code_buf = []
|
||||
else:
|
||||
in_code = False
|
||||
try:
|
||||
story.append(Paragraph(
|
||||
"<br/>".join(l.replace("&", "&").replace("<", "<").replace(">", ">")
|
||||
for l in code_buf),
|
||||
code_style
|
||||
))
|
||||
except Exception:
|
||||
pass
|
||||
continue
|
||||
|
||||
if in_code:
|
||||
code_buf.append(line)
|
||||
continue
|
||||
|
||||
if re.match(r'^[-*_]{3,}$', line.strip()):
|
||||
story.append(HRFlowable(width="100%", thickness=0.5,
|
||||
color=colors.grey, spaceAfter=6))
|
||||
continue
|
||||
|
||||
try:
|
||||
if line.startswith("### "):
|
||||
story.append(Paragraph(md_to_para(line[4:]), h3))
|
||||
elif line.startswith("## "):
|
||||
story.append(Paragraph(md_to_para(line[3:]), h2))
|
||||
elif line.startswith("# "):
|
||||
story.append(Paragraph(md_to_para(line[2:]), h1))
|
||||
elif re.match(r'^[-*+] ', line):
|
||||
story.append(Paragraph("• " + md_to_para(line[2:]), bullet_style))
|
||||
elif re.match(r'^\d+\. ', line):
|
||||
story.append(Paragraph(md_to_para(line), bullet_style))
|
||||
elif line.strip() == "":
|
||||
story.append(Spacer(1, 6))
|
||||
else:
|
||||
story.append(Paragraph(md_to_para(line), normal))
|
||||
except Exception:
|
||||
try:
|
||||
story.append(Paragraph(line[:300], normal))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
doc.build(story)
|
||||
return buf.getvalue()
|
||||
|
||||
Reference in New Issue
Block a user