feat: fase 3 — export PDF con reportlab + /export command
Build & Deploy ResearchOwl / build-and-push (push) Successful in 1m2s
Build & Deploy ResearchOwl / build-and-push (push) Successful in 1m2s
This commit is contained in:
@@ -6,6 +6,7 @@ WORKDIR /app
|
|||||||
RUN apt-get update && apt-get install -y \
|
RUN apt-get update && apt-get install -y \
|
||||||
gcc g++ \
|
gcc g++ \
|
||||||
libxml2-dev libxslt-dev \
|
libxml2-dev libxslt-dev \
|
||||||
|
libfreetype6-dev \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
|
|||||||
@@ -26,6 +26,10 @@ scikit-learn==1.5.1
|
|||||||
# Claude API (scoring)
|
# Claude API (scoring)
|
||||||
anthropic>=0.40.0
|
anthropic>=0.40.0
|
||||||
|
|
||||||
|
# PDF export
|
||||||
|
markdown==3.7
|
||||||
|
reportlab==4.2.5
|
||||||
|
|
||||||
# Utilities
|
# Utilities
|
||||||
pydantic==2.8.0
|
pydantic==2.8.0
|
||||||
pydantic-settings==2.4.0
|
pydantic-settings==2.4.0
|
||||||
|
|||||||
@@ -146,6 +146,7 @@ async def cmd_start(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
|||||||
" Extended: podcast_extended|blog_extended|report_extended\n"
|
" Extended: podcast_extended|blog_extended|report_extended\n"
|
||||||
"`/sources` — List all sources found\n"
|
"`/sources` — List all sources found\n"
|
||||||
"`/outputs` — List generated outputs\n"
|
"`/outputs` — List generated outputs\n"
|
||||||
|
"`/export` — Exportar último output como PDF\n"
|
||||||
"`/costs` — Show API usage costs\n"
|
"`/costs` — Show API usage costs\n"
|
||||||
"`/watch <topic> [h]` — Schedule periodic research\n"
|
"`/watch <topic> [h]` — Schedule periodic research\n"
|
||||||
"`/unwatch <topic>` — Remove a watch\n"
|
"`/unwatch <topic>` — Remove a watch\n"
|
||||||
@@ -760,6 +761,83 @@ async def _on_startup(app: Application) -> None:
|
|||||||
await _start_scheduler(app)
|
await _start_scheduler(app)
|
||||||
|
|
||||||
|
|
||||||
|
async def cmd_export(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
||||||
|
if not is_authorized(update.effective_user.id):
|
||||||
|
return
|
||||||
|
|
||||||
|
chat_id = update.effective_chat.id
|
||||||
|
db_conn = await get_db()
|
||||||
|
db = ResearchDB(db_conn)
|
||||||
|
|
||||||
|
try:
|
||||||
|
session = await db.get_latest_session(chat_id)
|
||||||
|
if not session:
|
||||||
|
await update.message.reply_text("No hay sesiones de investigación.")
|
||||||
|
return
|
||||||
|
|
||||||
|
session_id = session["id"]
|
||||||
|
topic = session["topic"]
|
||||||
|
|
||||||
|
outputs = await db.get_outputs(session_id)
|
||||||
|
if not outputs:
|
||||||
|
await update.message.reply_text(
|
||||||
|
"No hay outputs generados. Usa `/generate <tipo>` primero.",
|
||||||
|
parse_mode=ParseMode.MARKDOWN
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
priority = [
|
||||||
|
"report_extended", "blog_extended", "podcast_extended",
|
||||||
|
"report", "blog", "podcast", "thread",
|
||||||
|
]
|
||||||
|
chosen = None
|
||||||
|
for ptype in priority:
|
||||||
|
for o in outputs:
|
||||||
|
if o["output_type"] == ptype:
|
||||||
|
chosen = o
|
||||||
|
break
|
||||||
|
if chosen:
|
||||||
|
break
|
||||||
|
if not chosen:
|
||||||
|
chosen = outputs[0]
|
||||||
|
|
||||||
|
msg = await update.message.reply_text(
|
||||||
|
f"📄 Generando PDF para `{topic}`…",
|
||||||
|
parse_mode=ParseMode.MARKDOWN
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from src.generator.generator import generate_pdf
|
||||||
|
pdf_bytes = generate_pdf(chosen["content"], title=topic)
|
||||||
|
except ImportError:
|
||||||
|
await msg.edit_text("❌ reportlab no está instalado. Ejecuta: `pip install reportlab`")
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
await msg.edit_text(f"❌ Error generando PDF: {str(e)[:200]}")
|
||||||
|
return
|
||||||
|
|
||||||
|
safe_topic = topic[:40].replace(" ", "_").replace("/", "-")
|
||||||
|
filename = f"researchowl_{safe_topic}_{chosen['output_type']}.pdf"
|
||||||
|
|
||||||
|
import io
|
||||||
|
await update.message.reply_document(
|
||||||
|
document=io.BytesIO(pdf_bytes),
|
||||||
|
filename=filename,
|
||||||
|
caption=f"📄 *{chosen['output_type'].upper()}* — {topic}\nExportado por ResearchOwl 🦉",
|
||||||
|
parse_mode=ParseMode.MARKDOWN
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
await msg.delete()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Export failed", error=str(e))
|
||||||
|
await update.message.reply_text(f"❌ Export failed: {str(e)[:200]}")
|
||||||
|
finally:
|
||||||
|
await db_conn.close()
|
||||||
|
|
||||||
|
|
||||||
async def cmd_purge(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
async def cmd_purge(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
||||||
if not is_authorized(update.effective_user.id):
|
if not is_authorized(update.effective_user.id):
|
||||||
return
|
return
|
||||||
@@ -820,6 +898,7 @@ def create_bot() -> Application:
|
|||||||
app.add_handler(CommandHandler("generate", cmd_generate))
|
app.add_handler(CommandHandler("generate", cmd_generate))
|
||||||
app.add_handler(CommandHandler("sources", cmd_sources))
|
app.add_handler(CommandHandler("sources", cmd_sources))
|
||||||
app.add_handler(CommandHandler("outputs", cmd_outputs))
|
app.add_handler(CommandHandler("outputs", cmd_outputs))
|
||||||
|
app.add_handler(CommandHandler("export", cmd_export))
|
||||||
app.add_handler(CommandHandler("costs", cmd_costs))
|
app.add_handler(CommandHandler("costs", cmd_costs))
|
||||||
app.add_handler(CommandHandler("watch", cmd_watch))
|
app.add_handler(CommandHandler("watch", cmd_watch))
|
||||||
app.add_handler(CommandHandler("unwatch", cmd_unwatch))
|
app.add_handler(CommandHandler("unwatch", cmd_unwatch))
|
||||||
|
|||||||
@@ -151,6 +151,14 @@ class ResearchDB:
|
|||||||
row = await cursor.fetchone()
|
row = await cursor.fetchone()
|
||||||
return dict(row) if row else None
|
return dict(row) if row else None
|
||||||
|
|
||||||
|
async def get_latest_session(self, chat_id: int) -> Optional[dict]:
|
||||||
|
cursor = await self.db.execute(
|
||||||
|
"SELECT * FROM research_sessions WHERE telegram_chat_id = ? ORDER BY created_at DESC LIMIT 1",
|
||||||
|
(chat_id,)
|
||||||
|
)
|
||||||
|
row = await cursor.fetchone()
|
||||||
|
return dict(row) if row else None
|
||||||
|
|
||||||
async def get_active_session(self, chat_id: int) -> Optional[dict]:
|
async def get_active_session(self, chat_id: int) -> Optional[dict]:
|
||||||
cursor = await self.db.execute(
|
cursor = await self.db.execute(
|
||||||
"""SELECT * FROM research_sessions
|
"""SELECT * FROM research_sessions
|
||||||
|
|||||||
@@ -445,3 +445,109 @@ Iterations: {session.get('iterations', 0)}
|
|||||||
Total words researched: {session.get('total_words', 0):,}
|
Total words researched: {session.get('total_words', 0):,}
|
||||||
---
|
---
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def generate_pdf(content: str, title: str = "ResearchOwl Output") -> bytes:
|
||||||
|
try:
|
||||||
|
from reportlab.lib.pagesizes import A4
|
||||||
|
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
||||||
|
from reportlab.lib.units import cm
|
||||||
|
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, HRFlowable
|
||||||
|
from reportlab.lib.enums import TA_LEFT
|
||||||
|
from reportlab.lib import colors
|
||||||
|
import io
|
||||||
|
import re
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError("reportlab is required for PDF export — pip install reportlab")
|
||||||
|
|
||||||
|
buf = io.BytesIO()
|
||||||
|
doc = SimpleDocTemplate(
|
||||||
|
buf,
|
||||||
|
pagesize=A4,
|
||||||
|
rightMargin=2 * cm,
|
||||||
|
leftMargin=2 * cm,
|
||||||
|
topMargin=2.5 * cm,
|
||||||
|
bottomMargin=2 * cm,
|
||||||
|
title=title,
|
||||||
|
)
|
||||||
|
|
||||||
|
base = getSampleStyleSheet()
|
||||||
|
normal = ParagraphStyle("RO_Normal", parent=base["Normal"],
|
||||||
|
fontSize=10, leading=14, spaceAfter=4)
|
||||||
|
h1 = ParagraphStyle("RO_H1", parent=base["Heading1"],
|
||||||
|
fontSize=18, spaceBefore=12, spaceAfter=6,
|
||||||
|
textColor=colors.HexColor("#1a1a2e"))
|
||||||
|
h2 = ParagraphStyle("RO_H2", parent=base["Heading2"],
|
||||||
|
fontSize=14, spaceBefore=10, spaceAfter=4,
|
||||||
|
textColor=colors.HexColor("#16213e"))
|
||||||
|
h3 = ParagraphStyle("RO_H3", parent=base["Heading3"],
|
||||||
|
fontSize=12, spaceBefore=8, spaceAfter=4)
|
||||||
|
code_style = ParagraphStyle("RO_Code", parent=base["Code"],
|
||||||
|
fontSize=9, leading=12, fontName="Courier",
|
||||||
|
backColor=colors.HexColor("#f4f4f4"), spaceAfter=4)
|
||||||
|
bullet_style = ParagraphStyle("RO_Bullet", parent=normal,
|
||||||
|
leftIndent=20, bulletIndent=10, spaceAfter=2)
|
||||||
|
|
||||||
|
def md_to_para(text: str) -> str:
|
||||||
|
text = text.replace("&", "&").replace("<", "<").replace(">", ">")
|
||||||
|
text = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', text)
|
||||||
|
text = re.sub(r'__(.+?)__', r'<b>\1</b>', text)
|
||||||
|
text = re.sub(r'\*(.+?)\*', r'<i>\1</i>', text)
|
||||||
|
text = re.sub(r'_(.+?)_', r'<i>\1</i>', text)
|
||||||
|
text = re.sub(r'`(.+?)`', r'<font name="Courier">\1</font>', text)
|
||||||
|
return text
|
||||||
|
|
||||||
|
story = []
|
||||||
|
lines = content.split("\n")
|
||||||
|
in_code = False
|
||||||
|
code_buf = []
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
if line.startswith("```"):
|
||||||
|
if not in_code:
|
||||||
|
in_code = True
|
||||||
|
code_buf = []
|
||||||
|
else:
|
||||||
|
in_code = False
|
||||||
|
try:
|
||||||
|
story.append(Paragraph(
|
||||||
|
"<br/>".join(l.replace("&", "&").replace("<", "<").replace(">", ">")
|
||||||
|
for l in code_buf),
|
||||||
|
code_style
|
||||||
|
))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
continue
|
||||||
|
|
||||||
|
if in_code:
|
||||||
|
code_buf.append(line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if re.match(r'^[-*_]{3,}$', line.strip()):
|
||||||
|
story.append(HRFlowable(width="100%", thickness=0.5,
|
||||||
|
color=colors.grey, spaceAfter=6))
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
if line.startswith("### "):
|
||||||
|
story.append(Paragraph(md_to_para(line[4:]), h3))
|
||||||
|
elif line.startswith("## "):
|
||||||
|
story.append(Paragraph(md_to_para(line[3:]), h2))
|
||||||
|
elif line.startswith("# "):
|
||||||
|
story.append(Paragraph(md_to_para(line[2:]), h1))
|
||||||
|
elif re.match(r'^[-*+] ', line):
|
||||||
|
story.append(Paragraph("• " + md_to_para(line[2:]), bullet_style))
|
||||||
|
elif re.match(r'^\d+\. ', line):
|
||||||
|
story.append(Paragraph(md_to_para(line), bullet_style))
|
||||||
|
elif line.strip() == "":
|
||||||
|
story.append(Spacer(1, 6))
|
||||||
|
else:
|
||||||
|
story.append(Paragraph(md_to_para(line), normal))
|
||||||
|
except Exception:
|
||||||
|
try:
|
||||||
|
story.append(Paragraph(line[:300], normal))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
doc.build(story)
|
||||||
|
return buf.getvalue()
|
||||||
|
|||||||
Reference in New Issue
Block a user