Odysseus v1.0

2026-05-31 23:58:26 +09:00
commit e5c99a5eee
421 changed files with 271349 additions and 0 deletions
--- a/src/deep_research.py
+++ b/src/deep_research.py
@@ -0,0 +1,820 @@
+# src/deep_research.py
+"""
+IterResearch-style deep research engine.
+
+Implements an iterative Think→Search→Extract→Synthesize loop where the LLM
+drives every decision: what to search, what's relevant, what's missing, and
+when to stop.  Inspired by Alibaba's IterResearch approach.
+"""
+import asyncio
+import json
+import logging
+import re
+import time
+from typing import Callable, Dict, List, Optional, Set
+
+from src.research_utils import strip_thinking, is_low_quality
+
+from src.goal_based_extractor import EXTRACTOR_PROMPT
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Prompts
+# ---------------------------------------------------------------------------
+RESEARCH_PLAN_PROMPT = """\
+You are a research strategist. Before searching, analyze this question and create a research plan.
+
+**Question:** {question}
+
+Break this question down:
+1. What are the key sub-topics that need to be covered for a comprehensive answer?
+2. What specific data points, facts, or perspectives should we look for?
+3. What would a complete, high-quality answer include?
+
+Return a JSON object with:
+- "sub_questions": Array of 3-6 specific sub-questions to investigate
+- "key_topics": Array of key topics/angles to cover
+- "success_criteria": One sentence describing what a complete answer looks like
+
+Example:
+{{
+  "sub_questions": ["What is the cost of living in X?", "How is the healthcare system?"],
+  "key_topics": ["economy", "healthcare", "safety", "culture"],
+  "success_criteria": "A balanced comparison covering cost, quality of life, and practical considerations."
+}}
+"""
+
+QUERY_GEN_PROMPT = """\
+You are a research assistant planning web searches.
+
+**Original question:** {question}
+
+**Research plan:**
+{research_plan}
+
+**What we know so far:**
+{report}
+
+**Round:** {round_num}
+
+Generate {num_queries} focused search queries that will help answer the question.
+{round_instruction}
+
+Return ONLY a JSON array of query strings, nothing else.
+Example: ["query one", "query two", "query three"]
+"""
+
+SYNTHESIZE_PROMPT = """\
+You are updating an evolving research report.
+
+**Original question:** {question}
+
+**Current report:**
+{report}
+
+**New findings from this round:**
+{new_findings}
+
+Integrate the new findings into the existing report. Produce an updated, well-organized \
+report that answers the original question as completely as possible given all evidence so far. \
+Remove redundancy, resolve contradictions, and maintain logical flow. \
+Keep source URLs as inline citations where relevant.
+
+Write only the updated report — no preamble or meta-commentary.
+"""
+
+STOP_PROMPT = """\
+You are deciding whether a research report is comprehensive enough.
+
+**Original question:** {question}
+
+**Current report:**
+{report}
+
+**Rounds completed:** {round_num}
+
+Based on the report so far, do we have enough information to answer the question \
+comprehensively?  Consider:
+- Are the key aspects of the question addressed?
+- Are there obvious gaps or unanswered sub-questions?
+- Is the evidence sufficient and from multiple sources?
+
+Reply with ONLY "YES" or "NO" followed by a brief one-sentence reason.
+Example: "YES — The report covers all major aspects with evidence from multiple sources."
+Example: "NO — We still lack information about the economic impact."
+"""
+
+FINAL_REPORT_PROMPT = """\
+Write a **long, detailed, comprehensive** research report answering this question:
+
+**Question:** {question}
+
+**All collected evidence and analysis:**
+{report}
+
+Requirements:
+- Write at MINIMUM 1500 words — this should be a thorough, magazine-quality article
+- Use clear ## headings and ### subheadings to organize into logical sections
+- Each section should have multiple detailed paragraphs, not just bullet points
+- Synthesize and analyze the information — explain WHY things matter, draw comparisons, provide context
+- Include specific data points, numbers, and statistics from the evidence
+- Include source URLs as inline citations [like this](url)
+- Note where sources agree and where they disagree
+- Add a brief executive summary at the top
+- End with a clear conclusion that directly answers the question
+- Write in an engaging, informative style — not dry or robotic
+"""
+
+CATEGORY_PROMPTS = {
+    "product": """IMPORTANT FORMAT OVERRIDE — this is a PRODUCT research report:
+- Structure as a RANKED LIST of products/options (best first)
+- For EACH product include: name as ### heading, approximate price, 2-3 sentence summary, **Pros:** bullet list, **Cons:** bullet list, **Where to buy:** URLs as links
+- Start with a quick-compare markdown table of top picks (columns: Name, Price, Best For, Rating)
+- End with a ## Verdict section picking Best Overall and Best Value
+- Still include source citations inline""",
+
+    "comparison": """IMPORTANT FORMAT OVERRIDE — this is a COMPARISON report:
+- Create a ## Comparison Table as a markdown table comparing ALL options across key criteria (rows = criteria, columns = options)
+- Use checkmarks, ratings, or short values in cells
+- Write a ## section per option with its strengths, weaknesses, and ideal use case
+- End with ## Best For verdicts (e.g., "**Best for small teams:** Option A because...")
+- Include a ## Shared Considerations section for things that apply to all options""",
+
+    "howto": """IMPORTANT FORMAT OVERRIDE — this is a HOW-TO guide:
+- Start with ## Quick Guide — a super concise numbered list (one line per step, no details, just the action). Example: 1. Install X  2. Run Y  3. Configure Z
+- Then ## Prerequisites listing what's needed before starting
+- Then the detailed steps: ## Step 1: ..., ## Step 2: ...
+- Each step should have a clear heading and detailed instructions
+- Use blockquotes (> ) for tips and warnings: > **Tip:** ... or > **Warning:** ...
+- End with ## Common Mistakes section
+- Add estimated time and difficulty level near the top""",
+
+    "factcheck": """IMPORTANT FORMAT OVERRIDE — this is a FACT-CHECK report:
+- Start with ## The Claim restating what's being checked
+- Create ## Evidence For and ## Evidence Against sections
+- Each piece of evidence should be a ### with source name, what it found, and how strong the evidence is
+- Include a ## Verdict section with one of: **Supported**, **Mixed Evidence**, or **Unsupported**
+- End with ## Nuance & Caveats for important context and limitations
+- Be balanced and cite sources for every claim""",
+}
+
+# ---------------------------------------------------------------------------
+# DeepResearcher
+# ---------------------------------------------------------------------------
+class DeepResearcher:
+    """
+    Iterative research engine following the IterResearch pattern.
+
+    Each round: LLM generates queries → SearXNG search → LLM extracts from
+    top pages → LLM synthesizes into evolving report → LLM decides continue/stop.
+    """
+
+    def __init__(
+        self,
+        llm_endpoint: str,
+        llm_model: str,
+        llm_headers: Optional[Dict] = None,
+        max_rounds: int = 8,
+        max_time: int = 300,
+        max_urls_per_round: int = 3,
+        max_content_chars: int = 15000,
+        max_report_tokens: int = 8192,
+        min_rounds: int = 2,
+        max_empty_rounds: int = 2,
+        synthesis_window: int = 10,
+        progress_callback: Optional[Callable] = None,
+        search_provider: Optional[str] = None,
+        category: Optional[str] = None,
+    ):
+        self.llm_endpoint = llm_endpoint
+        self.llm_model = llm_model
+        self.llm_headers = llm_headers
+        self.search_provider_override = search_provider
+        self.category = category
+        self.max_rounds = max_rounds
+        self.max_time = max_time
+        self.max_urls_per_round = max_urls_per_round
+        self.max_content_chars = max_content_chars
+        self.max_report_tokens = max_report_tokens
+        self.min_rounds = min_rounds
+        self.max_empty_rounds = max_empty_rounds
+        self.synthesis_window = synthesis_window
+        self._progress = progress_callback
+        self._cancelled = False
+        self._start_time: float = 0
+        self.queries_used: Set[str] = set()
+        self.urls_fetched: Set[str] = set()
+        self.round_count: int = 0
+        # Track which search providers actually returned results during the
+        # run, in arrival order — surfaced in the visual report so users can
+        # see whether searxng / brave / tavily etc. carried the work.
+        self.providers_used: List[str] = []
+        self.findings: List[Dict] = []
+        self.evolving_report: str = ""
+        self.research_plan: str = ""
+
+    def cancel(self):
+        """Request cooperative cancellation of the research loop."""
+        self._cancelled = True
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+    async def research(
+        self,
+        question: str,
+        prior_report: str = "",
+        prior_findings: Optional[List[Dict]] = None,
+        prior_urls: Optional[Set[str]] = None,
+    ) -> str:
+        """Run iterative research and return a final report.
+
+        Args:
+            question: The research question.
+            prior_report: Previous report to continue from (for follow-up research).
+            prior_findings: Previous findings to build on.
+            prior_urls: URLs already visited (won't be re-fetched).
+        """
+        self._start_time = time.time()
+        findings: List[Dict] = list(prior_findings) if prior_findings else []
+        report = prior_report or ""
+
+        # PLAN: Analyze the question and create a research strategy
+        if not prior_report:
+            self._emit(phase="planning")
+            self.research_plan = await self._create_plan(question)
+            logger.info(f"Research plan: {self.research_plan[:200]}")
+        else:
+            # Continuation — plan around the follow-up
+            self._emit(phase="planning")
+            self.research_plan = await self._create_plan(question)
+            logger.info(f"Continuation plan: {self.research_plan[:200]}")
+        if not self.category and not prior_report:
+            self.category = await self._classify_category(question)
+            if self.category:
+                logger.info(f"Auto-detected category: {self.category}")
+
+        if prior_urls:
+            self.urls_fetched.update(prior_urls)
+        self.findings = findings  # expose for handler
+        consecutive_empty_rounds = 0
+
+        for round_num in range(1, self.max_rounds + 1):
+            self.round_count = round_num
+            if self._cancelled:
+                logger.info(f"Research cancelled after {round_num - 1} rounds")
+                break
+            if self._time_exceeded():
+                logger.info(f"Time limit reached after {round_num - 1} rounds")
+                break
+
+            logger.info(f"=== Research Round {round_num} ===")
+            self._emit(phase="searching", round=round_num, total_sources=len(self.urls_fetched))
+
+            # THINK: generate queries
+            queries = await self._generate_queries(question, report, round_num)
+            if not queries:
+                logger.warning(f"Round {round_num}: no queries generated, stopping")
+                break
+
+            self._emit(phase="searching", round=round_num, queries=len(queries),
+                       query_preview=queries[0] if queries else "",
+                       total_sources=len(self.urls_fetched))
+
+            # SEARCH + EXTRACT
+            round_findings = await self._search_and_extract(queries, question)
+            if round_findings:
+                findings.extend(round_findings)
+                consecutive_empty_rounds = 0
+                logger.info(f"Round {round_num}: extracted {len(round_findings)} findings")
+                self._emit(phase="reading", round=round_num,
+                           new_sources=len(round_findings),
+                           total_sources=len(self.urls_fetched),
+                           total_findings=len(findings))
+            else:
+                consecutive_empty_rounds += 1
+                logger.info(f"Round {round_num}: no new findings ({consecutive_empty_rounds} consecutive empty)")
+                if consecutive_empty_rounds >= self.max_empty_rounds:
+                    logger.warning(f"Search appears to be down — {self.max_empty_rounds} consecutive rounds with no results")
+                    err_detail = getattr(self, '_last_search_error', 'unknown error')
+                    self._emit(phase="error", message=f"Search engine unavailable: {err_detail}")
+                    if not findings:
+                        return (
+                            f"**Search unavailable** — Web search failed after "
+                            f"{round_num} rounds. Error: {err_detail}\n\n"
+                            "Please check your search provider settings and ensure the service is running."
+                        )
+                    break
+
+            # SYNTHESIZE
+            if findings:
+                self._emit(phase="analyzing", round=round_num,
+                           total_sources=len(self.urls_fetched),
+                           total_findings=len(findings))
+                report = await self._synthesize(question, findings, report)
+
+            # DECIDE
+            if round_num >= self.min_rounds:
+                should_stop = await self._should_stop(question, report, round_num)
+                if should_stop:
+                    logger.info(f"LLM decided to stop after round {round_num}")
+                    break
+
+        # FINAL REPORT
+        self._emit(phase="writing", total_sources=len(self.urls_fetched),
+                   total_findings=len(findings))
+        if not report:
+            return "No information could be gathered for this question."
+
+        self.evolving_report = report  # preserve pre-synthesis report
+        final = await self._final_report(question, report)
+        elapsed = time.time() - self._start_time
+        logger.info(
+            f"Research complete: {self.round_count} rounds, "
+            f"{len(findings)} findings, {len(self.urls_fetched)} URLs, "
+            f"{elapsed:.1f}s"
+        )
+        return final
+
+    # ------------------------------------------------------------------
+    # LLM helper
+    # ------------------------------------------------------------------
+    async def _llm(self, messages: List[Dict], temperature: float = 0.3,
+                   max_tokens: int = 4096, timeout: int = 60) -> str:
+        """Call the LLM asynchronously and strip thinking tags."""
+        from src.llm_core import llm_call_async
+        response = await llm_call_async(
+            url=self.llm_endpoint,
+            model=self.llm_model,
+            messages=messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            headers=self.llm_headers,
+            timeout=timeout,
+        )
+        return strip_thinking(response)
+
+    # ------------------------------------------------------------------
+    # PLAN: create research strategy
+    # ------------------------------------------------------------------
+    async def _create_plan(self, question: str) -> str:
+        """LLM analyzes the question and creates a research plan."""
+        prompt = RESEARCH_PLAN_PROMPT.format(question=question)
+        try:
+            response = await self._llm(
+                [{"role": "user", "content": prompt}],
+                temperature=0.3,
+                max_tokens=1024,
+                timeout=30,
+            )
+            # Try to parse as JSON for structured plan
+            parsed = self._parse_json_object(response)
+            if parsed:
+                parts = []
+                if parsed.get("sub_questions"):
+                    parts.append("Sub-questions: " + "; ".join(parsed["sub_questions"]))
+                if parsed.get("key_topics"):
+                    parts.append("Key topics: " + ", ".join(parsed["key_topics"]))
+                if parsed.get("success_criteria"):
+                    parts.append("Success: " + parsed["success_criteria"])
+                return "\n".join(parts) if parts else response
+            return response
+        except Exception as e:
+            logger.warning(f"Research planning failed: {e}")
+            self._emit(phase="warning", message="Planning step failed, proceeding with direct search")
+            return ""
+
+    async def _classify_category(self, question: str) -> Optional[str]:
+        """Fast LLM call to classify the research question into a category."""
+        valid = ", ".join(CATEGORY_PROMPTS.keys())
+        prompt = (
+            f"Classify this research question into exactly ONE category.\n"
+            f"Categories: {valid}\n"
+            f"If none fit well, respond with: general\n\n"
+            f"Question: {question}\n\n"
+            f"Respond with ONLY the category name, nothing else."
+        )
+        try:
+            result = await self._llm(
+                [{"role": "user", "content": prompt}],
+                temperature=0, max_tokens=20, timeout=15,
+            )
+            cat = (result or "").strip().lower()
+            # Clean one-word answer first.
+            first = cat.split()[0].strip(".,\"'*:") if cat.split() else ""
+            if first in CATEGORY_PROMPTS:
+                return first
+            # Weak local models often wrap the label in preamble ("the category
+            # is product") — scan the whole reply for any known category word
+            # before giving up (which would default to the generic format).
+            for c in CATEGORY_PROMPTS:
+                if c in cat:
+                    return c
+            return None
+        except Exception as e:
+            logger.warning(f"Category classification failed: {e}")
+            return None
+
+    # ------------------------------------------------------------------
+    # THINK: generate search queries
+    # ------------------------------------------------------------------
+    async def _generate_queries(self, question: str, report: str,
+                                round_num: int) -> List[str]:
+        if round_num == 1:
+            num_queries = 4
+            round_instruction = (
+                "This is the first round — generate broad, diverse queries "
+                "that explore the key facets of the question."
+            )
+        else:
+            num_queries = 3
+            round_instruction = (
+                "We already have partial findings.  Generate targeted follow-up "
+                "queries to fill gaps, verify claims, or explore specific aspects "
+                "that the report doesn't yet cover well."
+            )
+
+        prompt = QUERY_GEN_PROMPT.format(
+            question=question,
+            research_plan=self.research_plan or "(No plan — search broadly.)",
+            report=report or "(No findings yet.)",
+            round_num=round_num,
+            num_queries=num_queries,
+            round_instruction=round_instruction,
+        )
+
+        try:
+            response = await self._llm(
+                [{"role": "user", "content": prompt}],
+                temperature=0.5,
+                max_tokens=4096,
+            )
+            queries = self._parse_json_array(response)
+            # Deduplicate
+            new_queries = [q for q in queries if q not in self.queries_used]
+            self.queries_used.update(new_queries)
+            logger.info(f"Round {round_num} queries: {new_queries}")
+            return new_queries
+        except Exception as e:
+            logger.error(f"Query generation failed: {e}")
+            self._emit(phase="warning", message=f"Query generation failed: {e}")
+            return []
+
+    # ------------------------------------------------------------------
+    # SEARCH + EXTRACT
+    # ------------------------------------------------------------------
+    async def _search_and_extract(self, queries: List[str],
+                                  question: str) -> List[Dict]:
+        """Search each query and extract relevant info from top results."""
+        all_findings: List[Dict] = []
+
+        # Search all queries in parallel
+        search_tasks = [self._search(q) for q in queries]
+        search_results = await asyncio.gather(*search_tasks, return_exceptions=True)
+
+        # Collect URLs to fetch from all search results
+        urls_to_fetch = []
+        for result in search_results:
+            if isinstance(result, Exception):
+                logger.warning(f"Search error: {result}")
+                continue
+            if not result:
+                continue
+            for r in result:
+                url = r.get("url", "")
+                if url and url not in self.urls_fetched:
+                    urls_to_fetch.append(r)
+                    self.urls_fetched.add(url)
+                if len(urls_to_fetch) >= self.max_urls_per_round * len(queries):
+                    break
+
+        if self._cancelled or self._time_exceeded():
+            return all_findings
+
+        # Fetch and extract all URLs concurrently
+        extract_tasks = [
+            self._fetch_and_extract(r["url"], question, r.get("title", ""))
+            for r in urls_to_fetch
+        ]
+        results_gathered = await asyncio.gather(*extract_tasks, return_exceptions=True)
+
+        for result in results_gathered:
+            if isinstance(result, Exception):
+                logger.warning(f"Extraction error: {result}")
+                continue
+            if result:
+                all_findings.append(result)
+
+        return all_findings
+
+    async def _search(self, query: str) -> List[Dict]:
+        """Run a search query using the configured research search provider."""
+        try:
+            from src.search.providers import _get_search_settings
+            from src.search.core import _call_provider, _build_provider_chain
+
+            settings = _get_search_settings()
+            provider = (self.search_provider_override or "").strip()
+            if not provider:
+                provider = (settings.get("research_search_provider") or "").strip()
+            if not provider:
+                provider = settings.get("search_provider", "searxng")
+
+            if provider == "disabled":
+                logger.info("Search is disabled for research")
+                return []
+
+            # Try primary provider, then fallbacks
+            for prov in _build_provider_chain(provider):
+                try:
+                    results = await asyncio.to_thread(_call_provider, prov, query, 10)
+                    if results:
+                        logger.info(f"Research search: {prov} returned {len(results)} results")
+                        if prov not in self.providers_used:
+                            self.providers_used.append(prov)
+                        return results
+                except Exception as e:
+                    logger.warning(f"Research search: {prov} failed: {e}")
+                    self._last_search_error = f"{prov}: {e}"
+            return []
+        except Exception as e:
+            logger.error(f"Search failed for '{query}': {e}")
+            self._last_search_error = str(e)
+            return []
+
+    async def _fetch_and_extract(self, url: str, question: str,
+                                 title: str) -> Optional[Dict]:
+        """Fetch a URL's content and use LLM to extract relevant info."""
+        display = title or url
+        self._emit(phase="reading", url=url, title=display,
+                   total_sources=len(self.urls_fetched))
+        try:
+            from src.search import fetch_webpage_content
+            page = await asyncio.to_thread(fetch_webpage_content, url, 10)
+        except Exception as e:
+            logger.warning(f"Failed to fetch {url}: {e}")
+            return None
+
+        if not page.get("success") or not page.get("content"):
+            return None
+
+        content = page["content"]
+        # Truncate to avoid blowing up context, preferring paragraph boundary
+        if len(content) > self.max_content_chars:
+            truncated = content[:self.max_content_chars]
+            last_para = truncated.rfind('\n\n')
+            if last_para > self.max_content_chars * 0.8:
+                content = truncated[:last_para]
+            else:
+                content = truncated
+
+        prompt = EXTRACTOR_PROMPT.format(webpage_content=content, goal=question)
+
+        try:
+            response = await self._llm(
+                [{"role": "user", "content": prompt}],
+                temperature=0.2,
+                max_tokens=2048,
+                timeout=45,
+            )
+            parsed = self._parse_json_object(response)
+            if parsed:
+                parsed["url"] = url
+                parsed["title"] = title or page.get("title", "")
+                parsed["og_image"] = page.get("og_image", "")
+                # Skip findings where the LLM says the page is useless
+                if is_low_quality(parsed.get("summary", "")):
+                    logger.info(f"Skipping low-quality extraction from {url}")
+                    return None
+                return parsed
+            # If JSON parsing fails, treat entire response as evidence
+            return {
+                "url": url,
+                "title": title or page.get("title", ""),
+                "og_image": page.get("og_image", ""),
+                "rational": "LLM extraction (raw)",
+                "evidence": response[:3000],
+                "summary": response[:500],
+            }
+        except Exception as e:
+            logger.warning(f"LLM extraction failed for {url}: {e}")
+            return None
+
+    # ------------------------------------------------------------------
+    # SYNTHESIZE
+    # ------------------------------------------------------------------
+    async def _synthesize(self, question: str, findings: List[Dict],
+                          current_report: str) -> str:
+        """LLM synthesizes all findings into an updated report."""
+        # Format findings for the prompt
+        window = findings[-self.synthesis_window:]
+        if len(findings) > self.synthesis_window:
+            logger.info(f"Synthesis using last {self.synthesis_window} of {len(findings)} findings")
+        findings_text = self._format_findings(window)
+
+        prompt = SYNTHESIZE_PROMPT.format(
+            question=question,
+            report=current_report or "(First round — no report yet.)",
+            new_findings=findings_text,
+        )
+
+        try:
+            return await self._llm(
+                [{"role": "user", "content": prompt}],
+                temperature=0.3,
+                max_tokens=self.max_report_tokens,
+                timeout=60,
+            )
+        except Exception as e:
+            logger.error(f"Synthesis failed: {e}")
+            self._emit(phase="warning", message="Synthesis failed, keeping previous report")
+            return current_report  # keep the old report on failure
+
+    # ------------------------------------------------------------------
+    # DECIDE
+    # ------------------------------------------------------------------
+    async def _should_stop(self, question: str, report: str,
+                           round_num: int) -> bool:
+        """Let the LLM decide whether the report is comprehensive enough."""
+        prompt = STOP_PROMPT.format(
+            question=question,
+            report=report,
+            round_num=round_num,
+        )
+
+        try:
+            response = await self._llm(
+                [{"role": "user", "content": prompt}],
+                temperature=0.1,
+                max_tokens=128,
+            )
+            # Reasoning models prepend a <think>...</think> block — strip it
+            # before checking for YES/NO, otherwise the answer always looks
+            # like it starts with "<THINK>" and the engine never stops.
+            clean = strip_thinking(response).strip()
+            # Tolerate "**YES**", "Yes.", quotes, etc.
+            answer = re.sub(r'^[\s*_`"\'>#\-]+', '', clean).upper()
+            should_stop = answer.startswith("YES")
+            logger.info(f"Stop decision (round {round_num}): {clean[:120]}")
+            return should_stop
+        except Exception as e:
+            logger.warning(f"Stop decision failed: {e}")
+            return False  # continue on error
+
+    # ------------------------------------------------------------------
+    # FINAL REPORT
+    # ------------------------------------------------------------------
+    async def _final_report(self, question: str, report: str) -> str:
+        """LLM writes a polished final report, retrying if too short."""
+        prompt = FINAL_REPORT_PROMPT.format(
+            question=question,
+            report=report,
+        )
+        cat_extra = CATEGORY_PROMPTS.get(self.category or "", "")
+        if cat_extra:
+            prompt += "\n\n" + cat_extra
+
+        try:
+            result = await self._llm(
+                [{"role": "user", "content": prompt}],
+                temperature=0.3,
+                max_tokens=self.max_report_tokens,
+                timeout=180,
+            )
+
+            # If report is too short, ask the LLM to expand it
+            if len(result.split()) < 400:
+                logger.info(f"Final report too short ({len(result.split())} words), requesting expansion")
+                self._emit(phase="writing", message="Expanding report...")
+                expanded = await self._llm(
+                    [
+                        {"role": "user", "content": prompt},
+                        {"role": "assistant", "content": result},
+                        {"role": "user", "content":
+                            "This report is too brief. Please expand it significantly:\n"
+                            "- Add detailed paragraphs for each section (not just bullet points)\n"
+                            "- Include specific data, numbers, and comparisons from the evidence\n"
+                            "- Explain context and significance — don't just list facts\n"
+                            "- Use ## headings and ### subheadings\n"
+                            "- Target at least 1000 words\n"
+                            "Write the full expanded report now."
+                        },
+                    ],
+                    temperature=0.4,
+                    max_tokens=self.max_report_tokens,
+                    timeout=180,
+                )
+                if len(expanded.split()) > len(result.split()):
+                    return expanded
+
+            return result
+        except Exception as e:
+            logger.error(f"Final report generation failed: {e}")
+            return report  # return the evolving report as-is
+
+    # ------------------------------------------------------------------
+    # Helpers
+    # ------------------------------------------------------------------
+    def _emit(self, **kwargs):
+        """Send a progress event via the callback, if one is registered."""
+        if self._progress:
+            try:
+                self._progress(kwargs)
+            except Exception:
+                pass
+
+    def _time_exceeded(self) -> bool:
+        return (time.time() - self._start_time) > self.max_time
+
+    # _strip_think_tags removed — use research_utils.strip_thinking()
+
+    @staticmethod
+    def _strip_code_block(text: str) -> str:
+        """Strip markdown code-block fences (```json ... ```) if present."""
+        text = text.strip()
+        if text.startswith("```"):
+            text = re.sub(r'^```(?:json)?\s*', '', text)
+            text = re.sub(r'\s*```$', '', text)
+        return text.strip()
+
+    def _parse_json_array(self, text: str) -> List[str]:
+        """Extract a JSON array of strings from LLM output."""
+        text = self._strip_code_block(text)
+        try:
+            parsed = json.loads(text)
+            if isinstance(parsed, list):
+                return [str(item) for item in parsed]
+        except json.JSONDecodeError:
+            pass
+
+        # Greedy match to capture the full outermost array
+        match = re.search(r'\[[\s\S]*\]', text)
+        if match:
+            try:
+                parsed = json.loads(match.group())
+                if isinstance(parsed, list):
+                    return [str(item) for item in parsed]
+            except json.JSONDecodeError:
+                pass
+
+        # Handle truncated arrays — e.g. '["query one", "query two", "query thr'
+        # Try to find the start of an array and repair it
+        arr_start = text.find('[')
+        if arr_start != -1:
+            fragment = text[arr_start:]
+            # Find the last complete quoted string
+            complete_items = re.findall(r'"([^"]*)"', fragment)
+            if complete_items:
+                logger.info(f"Repaired truncated JSON array: recovered {len(complete_items)} items")
+                return complete_items
+
+        logger.warning(f"Could not parse JSON array from: {text[:200]}")
+        return []
+
+    def _parse_json_object(self, text: str) -> Optional[Dict]:
+        """Extract a JSON object from LLM output."""
+        text = self._strip_code_block(text)
+        try:
+            return json.loads(text)
+        except json.JSONDecodeError:
+            pass
+
+        # Greedy match to capture the full outermost object
+        match = re.search(r'\{[\s\S]*\}', text)
+        if match:
+            try:
+                return json.loads(match.group())
+            except json.JSONDecodeError:
+                pass
+
+        return None
+
+    def _format_findings(self, findings: List[Dict]) -> str:
+        """Format findings list into readable text for synthesis prompt."""
+        parts = []
+        for i, f in enumerate(findings, 1):
+            url = f.get("url", "unknown")
+            title = f.get("title", "")
+            summary = f.get("summary", "")
+            evidence = f.get("evidence", "")
+            # Use summary if available, fall back to truncated evidence
+            content = summary if summary else (evidence[:1000] if evidence else "(no content)")
+            parts.append(f"**Finding {i}** — [{title}]({url})\n{content}")
+        return "\n\n".join(parts)
+
+    def get_stats(self) -> Dict:
+        """Return research statistics."""
+        elapsed = time.time() - self._start_time if self._start_time else 0
+        stats = {
+            "Duration": f"{elapsed:.1f}s",
+            "Rounds": self.round_count,
+            "Queries": len(self.queries_used),
+            "URLs": len(self.urls_fetched),
+            "Model": self.llm_model,
+        }
+        if self.providers_used:
+            stats["Search"] = ", ".join(self.providers_used)
+        if self.category:
+            stats["Category"] = self.category.capitalize()
+        return stats