From 4c7f5b521b4ec4c1ae454f91388df44726bf12b8 Mon Sep 17 00:00:00 2001 From: ChemaVX Date: Mon, 4 May 2026 12:57:21 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20fase=203=20=E2=80=94=20export=20PDF=20c?= =?UTF-8?q?on=20reportlab=20+=20/export=20command?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 1 + requirements.txt | 4 ++ src/bot/bot.py | 79 +++++++++++++++++++++++++++ src/db/database.py | 8 +++ src/generator/generator.py | 106 +++++++++++++++++++++++++++++++++++++ 5 files changed, 198 insertions(+) diff --git a/Dockerfile b/Dockerfile index af116ec..83360a8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,7 @@ WORKDIR /app RUN apt-get update && apt-get install -y \ gcc g++ \ libxml2-dev libxslt-dev \ + libfreetype6-dev \ && rm -rf /var/lib/apt/lists/* COPY requirements.txt . diff --git a/requirements.txt b/requirements.txt index 329db96..e2d8d91 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,6 +26,10 @@ scikit-learn==1.5.1 # Claude API (scoring) anthropic>=0.40.0 +# PDF export +markdown==3.7 +reportlab==4.2.5 + # Utilities pydantic==2.8.0 pydantic-settings==2.4.0 diff --git a/src/bot/bot.py b/src/bot/bot.py index 94aaa27..1258cb8 100644 --- a/src/bot/bot.py +++ b/src/bot/bot.py @@ -146,6 +146,7 @@ async def cmd_start(update: Update, ctx: ContextTypes.DEFAULT_TYPE): " Extended: podcast_extended|blog_extended|report_extended\n" "`/sources` — List all sources found\n" "`/outputs` — List generated outputs\n" + "`/export` — Exportar último output como PDF\n" "`/costs` — Show API usage costs\n" "`/watch [h]` — Schedule periodic research\n" "`/unwatch ` — Remove a watch\n" @@ -760,6 +761,83 @@ async def _on_startup(app: Application) -> None: await _start_scheduler(app) +async def cmd_export(update: Update, ctx: ContextTypes.DEFAULT_TYPE): + if not is_authorized(update.effective_user.id): + return + + chat_id = update.effective_chat.id + db_conn = await get_db() + db = ResearchDB(db_conn) + + try: + session = await db.get_latest_session(chat_id) + if not session: + await update.message.reply_text("No hay sesiones de investigación.") + return + + session_id = session["id"] + topic = session["topic"] + + outputs = await db.get_outputs(session_id) + if not outputs: + await update.message.reply_text( + "No hay outputs generados. Usa `/generate ` primero.", + parse_mode=ParseMode.MARKDOWN + ) + return + + priority = [ + "report_extended", "blog_extended", "podcast_extended", + "report", "blog", "podcast", "thread", + ] + chosen = None + for ptype in priority: + for o in outputs: + if o["output_type"] == ptype: + chosen = o + break + if chosen: + break + if not chosen: + chosen = outputs[0] + + msg = await update.message.reply_text( + f"📄 Generando PDF para `{topic}`…", + parse_mode=ParseMode.MARKDOWN + ) + + try: + from src.generator.generator import generate_pdf + pdf_bytes = generate_pdf(chosen["content"], title=topic) + except ImportError: + await msg.edit_text("❌ reportlab no está instalado. Ejecuta: `pip install reportlab`") + return + except Exception as e: + await msg.edit_text(f"❌ Error generando PDF: {str(e)[:200]}") + return + + safe_topic = topic[:40].replace(" ", "_").replace("/", "-") + filename = f"researchowl_{safe_topic}_{chosen['output_type']}.pdf" + + import io + await update.message.reply_document( + document=io.BytesIO(pdf_bytes), + filename=filename, + caption=f"📄 *{chosen['output_type'].upper()}* — {topic}\nExportado por ResearchOwl 🦉", + parse_mode=ParseMode.MARKDOWN + ) + try: + await msg.delete() + except Exception: + pass + + except Exception as e: + logger.error("Export failed", error=str(e)) + await update.message.reply_text(f"❌ Export failed: {str(e)[:200]}") + finally: + await db_conn.close() + + async def cmd_purge(update: Update, ctx: ContextTypes.DEFAULT_TYPE): if not is_authorized(update.effective_user.id): return @@ -820,6 +898,7 @@ def create_bot() -> Application: app.add_handler(CommandHandler("generate", cmd_generate)) app.add_handler(CommandHandler("sources", cmd_sources)) app.add_handler(CommandHandler("outputs", cmd_outputs)) + app.add_handler(CommandHandler("export", cmd_export)) app.add_handler(CommandHandler("costs", cmd_costs)) app.add_handler(CommandHandler("watch", cmd_watch)) app.add_handler(CommandHandler("unwatch", cmd_unwatch)) diff --git a/src/db/database.py b/src/db/database.py index fb01534..cd4beaa 100644 --- a/src/db/database.py +++ b/src/db/database.py @@ -151,6 +151,14 @@ class ResearchDB: row = await cursor.fetchone() return dict(row) if row else None + async def get_latest_session(self, chat_id: int) -> Optional[dict]: + cursor = await self.db.execute( + "SELECT * FROM research_sessions WHERE telegram_chat_id = ? ORDER BY created_at DESC LIMIT 1", + (chat_id,) + ) + row = await cursor.fetchone() + return dict(row) if row else None + async def get_active_session(self, chat_id: int) -> Optional[dict]: cursor = await self.db.execute( """SELECT * FROM research_sessions diff --git a/src/generator/generator.py b/src/generator/generator.py index 8317ee3..a516fd2 100644 --- a/src/generator/generator.py +++ b/src/generator/generator.py @@ -445,3 +445,109 @@ Iterations: {session.get('iterations', 0)} Total words researched: {session.get('total_words', 0):,} --- """ + + +def generate_pdf(content: str, title: str = "ResearchOwl Output") -> bytes: + try: + from reportlab.lib.pagesizes import A4 + from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle + from reportlab.lib.units import cm + from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, HRFlowable + from reportlab.lib.enums import TA_LEFT + from reportlab.lib import colors + import io + import re + except ImportError: + raise ImportError("reportlab is required for PDF export — pip install reportlab") + + buf = io.BytesIO() + doc = SimpleDocTemplate( + buf, + pagesize=A4, + rightMargin=2 * cm, + leftMargin=2 * cm, + topMargin=2.5 * cm, + bottomMargin=2 * cm, + title=title, + ) + + base = getSampleStyleSheet() + normal = ParagraphStyle("RO_Normal", parent=base["Normal"], + fontSize=10, leading=14, spaceAfter=4) + h1 = ParagraphStyle("RO_H1", parent=base["Heading1"], + fontSize=18, spaceBefore=12, spaceAfter=6, + textColor=colors.HexColor("#1a1a2e")) + h2 = ParagraphStyle("RO_H2", parent=base["Heading2"], + fontSize=14, spaceBefore=10, spaceAfter=4, + textColor=colors.HexColor("#16213e")) + h3 = ParagraphStyle("RO_H3", parent=base["Heading3"], + fontSize=12, spaceBefore=8, spaceAfter=4) + code_style = ParagraphStyle("RO_Code", parent=base["Code"], + fontSize=9, leading=12, fontName="Courier", + backColor=colors.HexColor("#f4f4f4"), spaceAfter=4) + bullet_style = ParagraphStyle("RO_Bullet", parent=normal, + leftIndent=20, bulletIndent=10, spaceAfter=2) + + def md_to_para(text: str) -> str: + text = text.replace("&", "&").replace("<", "<").replace(">", ">") + text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) + text = re.sub(r'__(.+?)__', r'\1', text) + text = re.sub(r'\*(.+?)\*', r'\1', text) + text = re.sub(r'_(.+?)_', r'\1', text) + text = re.sub(r'`(.+?)`', r'\1', text) + return text + + story = [] + lines = content.split("\n") + in_code = False + code_buf = [] + + for line in lines: + if line.startswith("```"): + if not in_code: + in_code = True + code_buf = [] + else: + in_code = False + try: + story.append(Paragraph( + "
".join(l.replace("&", "&").replace("<", "<").replace(">", ">") + for l in code_buf), + code_style + )) + except Exception: + pass + continue + + if in_code: + code_buf.append(line) + continue + + if re.match(r'^[-*_]{3,}$', line.strip()): + story.append(HRFlowable(width="100%", thickness=0.5, + color=colors.grey, spaceAfter=6)) + continue + + try: + if line.startswith("### "): + story.append(Paragraph(md_to_para(line[4:]), h3)) + elif line.startswith("## "): + story.append(Paragraph(md_to_para(line[3:]), h2)) + elif line.startswith("# "): + story.append(Paragraph(md_to_para(line[2:]), h1)) + elif re.match(r'^[-*+] ', line): + story.append(Paragraph("• " + md_to_para(line[2:]), bullet_style)) + elif re.match(r'^\d+\. ', line): + story.append(Paragraph(md_to_para(line), bullet_style)) + elif line.strip() == "": + story.append(Spacer(1, 6)) + else: + story.append(Paragraph(md_to_para(line), normal)) + except Exception: + try: + story.append(Paragraph(line[:300], normal)) + except Exception: + pass + + doc.build(story) + return buf.getvalue()