fix: eliminar títulos h1 duplicados en export PDF

2026-05-04 13:12:32 +00:00
parent e0a42f0b91
commit 6aaa85a1f8
1 changed files with 24 additions and 0 deletions
@@ -447,7 +447,31 @@ Total words researched: {session.get('total_words', 0):,}
 """


+def _remove_duplicate_headings(text: str) -> str:
+    lines = text.split('\n')
+    result = []
+    i = 0
+    while i < len(lines):
+        line = lines[i].rstrip()
+        if line.startswith('# ') and not line.startswith('## '):
+            h1_text = line[2:].strip().lower()
+            prev_heading = ''
+            for prev in reversed(result):
+                prev_stripped = prev.strip()
+                if prev_stripped:
+                    if prev_stripped.startswith('## '):
+                        prev_heading = prev_stripped[3:].strip().lower()
+                    break
+            if prev_heading and prev_heading == h1_text:
+                i += 1
+                continue
+        result.append(lines[i])
+        i += 1
+    return '\n'.join(result)
+
+
 def generate_pdf(content: str, title: str = "ResearchOwl Output") -> bytes:
+    content = _remove_duplicate_headings(content)
    try:
        from reportlab.lib.pagesizes import A4
        from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle