diff --git a/src/generator/generator.py b/src/generator/generator.py index a516fd2..4978067 100644 --- a/src/generator/generator.py +++ b/src/generator/generator.py @@ -447,7 +447,31 @@ Total words researched: {session.get('total_words', 0):,} """ +def _remove_duplicate_headings(text: str) -> str: + lines = text.split('\n') + result = [] + i = 0 + while i < len(lines): + line = lines[i].rstrip() + if line.startswith('# ') and not line.startswith('## '): + h1_text = line[2:].strip().lower() + prev_heading = '' + for prev in reversed(result): + prev_stripped = prev.strip() + if prev_stripped: + if prev_stripped.startswith('## '): + prev_heading = prev_stripped[3:].strip().lower() + break + if prev_heading and prev_heading == h1_text: + i += 1 + continue + result.append(lines[i]) + i += 1 + return '\n'.join(result) + + def generate_pdf(content: str, title: str = "ResearchOwl Output") -> bytes: + content = _remove_duplicate_headings(content) try: from reportlab.lib.pagesizes import A4 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle