diff --git a/src/processor/processor.py b/src/processor/processor.py
index b84cde8..7fd02a1 100644
--- a/src/processor/processor.py
+++ b/src/processor/processor.py
@@ -211,33 +211,26 @@ class ContentProcessor:
         return stored
 
     async def _score_quality(self, chunk: str, topic: str) -> float:
-        """
-        Ask Ollama to score relevance and quality of a chunk.
-        Returns 0.0-1.0
-        """
-        prompt = f"""Rate this text chunk on a scale of 0-10 for:
-1. Relevance to topic: "{topic}"
-2. Information density (facts, data, insights)
-3. Credibility (not speculation, not clickbait)
-
-Text:
-{chunk[:500]}
-
-Respond with ONLY a single number 0-10. No explanation."""
-
+        """Score 0-1 how relevant chunk is to topic. Single axis — avoids off-topic content."""
+        prompt = (
+            f'Score 0-10: how relevant is this text to the topic "{topic}"?\n'
+            f"0 = completely unrelated, 10 = directly and specifically about this topic.\n\n"
+            f"Text:\n{chunk[:500]}\n\n"
+            f"Reply with ONLY a single integer 0-10. No explanation."
+        )
         try:
             response = await self.ollama.generate(prompt)
             numbers = re.findall(r'\b(\d+(?:\.\d+)?)\b', response)
             if numbers:
                 score = float(numbers[0])
                 normalized = min(1.0, score / 10.0)
-                logger.debug("Quality score", raw=score, normalized=round(normalized, 2))
+                logger.debug("Relevance score", raw=score, normalized=round(normalized, 2))
                 return normalized
-            logger.debug("No number in quality response", response=response[:80])
-            return 0.6  # above threshold so chunk is kept
+            logger.debug("No number in relevance response", response=response[:80])
+            return 0.6
         except Exception as e:
-            logger.warning("Quality scoring failed", error=str(e))
-            return 0.6  # above threshold so chunk is kept on Ollama error
+            logger.warning("Relevance scoring failed", error=str(e))
+            return 0.6
 
     async def rag_query(self, session_id: int, query: str, top_k: int = 20) -> str:
         """
diff --git a/src/scraper/exhaustive.py b/src/scraper/exhaustive.py
index f07b5cf..c5b6744 100644
--- a/src/scraper/exhaustive.py
+++ b/src/scraper/exhaustive.py
@@ -90,6 +90,14 @@ class ExhaustiveScraper:
     Keeps expanding until saturation or limits hit.
     """
 
+    # Common stopwords to ignore when extracting topic keywords
+    _STOPWORDS = {
+        'the','a','an','and','or','of','in','on','at','to','for','is','are','was',
+        'were','be','been','have','has','had','do','does','did','about','with','from',
+        'el','la','los','las','de','del','en','un','una','y','o','que','se','por',
+        'con','para','sobre','como','pero','más',
+    }
+
     def __init__(self, db: ResearchDB, session_id: int, topic: str,
                  progress_callback=None):
         self.db = db
@@ -100,6 +108,18 @@ class ExhaustiveScraper:
         self.total_sources = 0
         self._stop = False
         self._http: Optional[aiohttp.ClientSession] = None
+        # Pre-compute topic keywords for child-URL relevance filtering
+        self._keywords = [
+            w for w in re.findall(r'\b\w{3,}\b', topic.lower())
+            if w not in self._STOPWORDS
+        ]
+
+    def _url_is_relevant(self, url: str, title: str = "") -> bool:
+        """True if URL path or title contains at least one topic keyword."""
+        if not self._keywords:
+            return True
+        text = (urlparse(url).path + " " + (title or "")).lower()
+        return any(kw in text for kw in self._keywords)
 
     async def stop(self):
         self._stop = True
@@ -300,13 +320,16 @@ class ExhaustiveScraper:
                     content, title = await self._extract_youtube(url)
                 elif source_type == "wikipedia":
                     content, title, new_urls = await self._extract_wikipedia(url)
+                    added = 0
                     for new_url in (new_urls or []):
-                        await self.db.add_source(
-                            self.session_id, new_url, "wikipedia",
-                            depth=source["depth"] + 1
-                        )
+                        if self._url_is_relevant(new_url):
+                            await self.db.add_source(
+                                self.session_id, new_url, "wikipedia",
+                                depth=source["depth"] + 1
+                            )
+                            added += 1
                     await self._mark_scraped(source_id, content, title, url)
-                    return len(new_urls or [])
+                    return added
                 elif source_type == "reddit":
                     content, title = await self._extract_reddit(url)
                     # Small delay between Reddit requests to avoid rate limiting
@@ -315,14 +338,17 @@ class ExhaustiveScraper:
                     content, title = await self._extract_pdf(url)
                 else:
                     content, title, new_urls = await self._extract_web(url, source["depth"])
+                    added = 0
                     for new_url in (new_urls or []):
-                        await self.db.add_source(
-                            self.session_id, new_url,
-                            detect_source_type(new_url),
-                            depth=source["depth"] + 1
-                        )
+                        if self._url_is_relevant(new_url):
+                            await self.db.add_source(
+                                self.session_id, new_url,
+                                detect_source_type(new_url),
+                                depth=source["depth"] + 1
+                            )
+                            added += 1
                     await self._mark_scraped(source_id, content, title, url)
-                    return len(new_urls or [])
+                    return added
 
                 await self._mark_scraped(source_id, content, title, url)
                 return 0