feat(bot): 5-phase strategy upgrade — edge neto, families, GNews priority, regimes

Phase 1 — Edge neto real (paper.py, bayesian.py, risk/manager.py, db.py): - Trade records now store edge_gross, edge_net, prior_prob, final_prob, mid_price, spread_estimate, commission, family_key - edge_net = edge_gross - SPREAD_ESTIMATE(0.02) - COMMISSION_RATE(0.02) NOTE: both constants are heuristics, not exact Polymarket exchange costs - Execution gate changed from edge_gross > MIN_EDGE to edge_net > regime_min_edge Phase 2 — Market families (polymarket.py): - market_family_key(market) groups related markets: texas-republican-2026, fed-april-2026, openai-2026, etc. - At most 1 trade per family per cycle; occupied_families propagated via main.py - Family key logged on every TRADE and SKIP line Phase 3 — GNews priority (news.py, bayesian.py, main.py): - NewsClient.get_freshness() returns 1.0/0.75/0.40/0.10 by cache age - gnews_priority(market, news) = uncertainty × volume_score × freshness - Politics markets sorted by priority DESC before eval so best markets get the 5-query/cycle GNews budget first Phase 4 — Regime min-edge by category/horizon (bayesian.py): - politics >60d → 0.12, 30-60d → 0.10, <30d → 0.08 - tech / crypto/finance → 0.10 - All thresholds applied to edge_net (not edge_gross) Phase 5 — Observability (bayesian.py, main.py): - Structured skip labels: SKIP_UNSUPPORTED, SKIP_NO_SIGNALS, SKIP_PRIOR_EXTREME, SKIP_FAMILY, SKIP_GNEWS_PRIORITY, SKIP_EDGE_NET - TRADE lines now include family_key, edge_gross, edge_net, regime_min, days - schema.sql: 8 new cols on trades, 7 new cols on signals (via ALTER TABLE IF NOT EXISTS) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-16 15:34:46 +00:00
parent a0cbdc0256
commit 63d9f637ff
8 changed files with 620 additions and 141 deletions
@@ -33,13 +33,21 @@ class Database:
            await conn.execute("""
                INSERT INTO trades (
                    id, market_id, question, direction, size_usdc,
-                    entry_price, shares, fee_usdc, net_cost, timestamp, reasoning, paper
-                ) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12)
+                    entry_price, shares, fee_usdc, net_cost, timestamp, reasoning, paper,
+                    edge_gross, edge_net, prior_prob, final_prob,
+                    mid_price, spread_estimate, commission, family_key
+                ) VALUES (
+                    $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,
+                    $13,$14,$15,$16,$17,$18,$19,$20
+                )
                ON CONFLICT (id) DO NOTHING
            """,
                trade.id, trade.market_id, trade.question, trade.direction,
                trade.size_usdc, trade.entry_price, trade.shares, trade.fee_usdc,
                trade.net_cost, trade.timestamp, trade.reasoning, trade.paper,
+                # Phase 1 fields
+                trade.edge_gross, trade.edge_net, trade.prior_prob, trade.final_prob,
+                trade.mid_price, trade.spread_estimate, trade.commission, trade.family_key,
            )

    async def save_daily_metrics(self, metrics: dict) -> None:
@@ -69,6 +77,18 @@ class Database:
            )
            return {r["market_id"]: float(r["total"]) for r in rows}

+    async def get_open_families(self) -> set[str]:
+        """Return the set of family_key values from all open positions.
+
+        Used at startup to rebuild occupied_families from DB state so the
+        family-deduplication logic survives pod restarts.
+        """
+        async with self._pool.acquire() as conn:
+            rows = await conn.fetch(
+                "SELECT DISTINCT family_key FROM trades WHERE family_key IS NOT NULL"
+            )
+            return {r["family_key"] for r in rows if r["family_key"]}
+
    async def get_recent_trades(self, limit: int = 100) -> list[dict]:
        async with self._pool.acquire() as conn:
            rows = await conn.fetch(
@@ -155,6 +155,32 @@ class NewsClient:
    async def close(self) -> None:
        await self._client.aclose()

+    def get_freshness(self, question: str) -> float:
+        """
+        Return a freshness score [0.1, 1.0] for GNews priority calculation.
+
+        Score interpretation:
+          1.00 — never queried (maximum priority for GNews budget)
+          0.75 — last queried >6 h ago (cache expired, worth re-querying)
+          0.40 — queried 2–6 h ago (in-cache but moderately stale)
+          0.10 — queried <2 h ago (cache very fresh, low re-query value)
+
+        If the API key is absent, always returns 1.0 (key missing means the
+        query will be skipped anyway; don't penalise the priority score).
+        """
+        if not self._api_key:
+            return 1.0
+        query = self._build_query(question)
+        cached = self._cache.get(query.lower())
+        if cached is None:
+            return 1.0
+        age_seconds = time.monotonic() - cached[0]
+        if age_seconds > 6 * 3600:
+            return 0.75
+        if age_seconds > 2 * 3600:
+            return 0.40
+        return 0.10
+
    # ------------------------------------------------------------------
    # Internal helpers
    # ------------------------------------------------------------------
@@ -5,6 +5,7 @@ Docs: https://docs.polymarket.com
 import asyncio
 import logging
 import os
+import re
 from dataclasses import dataclass, field
 from datetime import datetime, timezone, timedelta
 from typing import Optional
@@ -15,6 +16,158 @@ log = logging.getLogger(__name__)
 POLYMARKET_API = "https://clob.polymarket.com"
 GAMMA_API = "https://gamma-api.polymarket.com"

+# ─────────────────────────────────────────────────────────────────────────────
+# Phase 2 — Market family classification helpers
+# Used by market_family_key() below.
+# ─────────────────────────────────────────────────────────────────────────────
+
+_YEAR_RE = re.compile(r"\b(202\d|203\d)\b")
+_MONTH_RE = re.compile(
+    r"\b(january|february|march|april|may|june|july|august|"
+    r"september|october|november|december)\b",
+    re.IGNORECASE,
+)
+_FED_TRIGGER_RE = re.compile(
+    r"\b(federal reserve|interest rate|bps|basis point|fed\s+(rate|meeting|decision))",
+    re.IGNORECASE,
+)
+_US_STATE_RE = re.compile(
+    r"\b(Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|"
+    r"Delaware|Florida|Georgia|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|"
+    r"Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|"
+    r"Mississippi|Missouri|Montana|Nebraska|Nevada|New\s+Hampshire|"
+    r"New\s+Jersey|New\s+Mexico|New\s+York|North\s+Carolina|North\s+Dakota|"
+    r"Ohio|Oklahoma|Oregon|Pennsylvania|Rhode\s+Island|South\s+Carolina|"
+    r"South\s+Dakota|Tennessee|Texas|Utah|Vermont|Virginia|Washington|"
+    r"West\s+Virginia|Wisconsin|Wyoming)\b",
+    re.IGNORECASE,
+)
+_PARTY_RE = re.compile(r"\b(Republican|Democrats?|Democratic|GOP)\b", re.IGNORECASE)
+_ELECTION_TYPE_RE = re.compile(
+    r"\b(presidential|president|mayoral|mayor|gubernatorial|governor|"
+    r"senate|congress(?:ional)?|primary|election)\b",
+    re.IGNORECASE,
+)
+
+# Ordered list of (pattern, place_slug) for named non-US locations.
+# Checked after US-state patterns so US city/state names don't shadow these.
+_NAMED_PLACES: list[tuple[re.Pattern, str]] = [
+    (re.compile(r"\bColomb", re.IGNORECASE),            "colombia"),
+    (re.compile(r"\bSeoul\b", re.IGNORECASE),           "seoul"),
+    (re.compile(r"\bBusan\b", re.IGNORECASE),           "busan"),
+    (re.compile(r"\bGyeonggi\b", re.IGNORECASE),        "gyeonggi"),
+    (re.compile(r"\bChungcheong", re.IGNORECASE),       "chungcheong"),
+    (re.compile(r"\bSouth\s+Korean?\b", re.IGNORECASE), "south-korea"),
+    (re.compile(r"\bLos\s+Angeles\b", re.IGNORECASE),   "los-angeles"),
+    (re.compile(r"\bCuba\b", re.IGNORECASE),            "cuba"),
+    (re.compile(r"\bLebanon\b", re.IGNORECASE),         "lebanon"),
+    (re.compile(r"\bIsrael\b", re.IGNORECASE),          "israel"),
+    (re.compile(r"\bUkraine\b", re.IGNORECASE),         "ukraine"),
+    (re.compile(r"\bRussia\b", re.IGNORECASE),          "russia"),
+]
+
+# Ordered list of (pattern, company_slug) for tech/company markets.
+_NAMED_COMPANIES: list[tuple[re.Pattern, str]] = [
+    (re.compile(r"\bopenai\b", re.IGNORECASE),     "openai"),
+    (re.compile(r"\banthropic\b", re.IGNORECASE),  "anthropic"),
+    (re.compile(r"\bnvidia\b", re.IGNORECASE),     "nvidia"),
+    (re.compile(r"\bapple\b", re.IGNORECASE),      "apple"),
+    (re.compile(r"\bmicrosoft\b", re.IGNORECASE),  "microsoft"),
+    (re.compile(r"\bgoogle\b", re.IGNORECASE),     "google"),
+    (re.compile(r"\btesla\b", re.IGNORECASE),      "tesla"),
+    # \bmeta\b does NOT match MetaMask (no word boundary mid-compound-word)
+    (re.compile(r"\bmeta\b", re.IGNORECASE),       "meta"),
+]
+
+
+def _end_month(market: "Market") -> str:
+    """Return market end_date formatted as YYYY-MM, or '' if unparseable."""
+    raw = market.end_date
+    if not raw:
+        return ""
+    try:
+        dt = datetime.fromisoformat(raw.replace("Z", "+00:00"))
+        return dt.strftime("%Y-%m")
+    except (ValueError, TypeError):
+        return ""
+
+
+def market_family_key(market: "Market") -> str:
+    """
+    Return a stable slug that groups related markets together.
+
+    Markets in the same family share an underlying event (same election,
+    same Fed meeting decision, same company).  The bot allows at most one
+    open position per family per cycle to avoid correlated exposure.
+
+    Priority order (first match wins):
+      1. Fed / interest-rate decision  →  fed-{month}-{year}
+      2. US state + party election      →  {state}-{party}-{year}
+      3. Named non-US city/country      →  {place}-{event_type}-{year}
+      4. Named tech company             →  {company}-{year}
+      5. Fallback                       →  {category}-{end_YYYY-MM}
+
+    Examples:
+      "Will Ken Paxton win the 2026 Texas Republican Primary"
+          → texas-republican-2026
+      "Will the Fed decrease rates by 25 bps after April 2026 meeting"
+          → fed-april-2026
+      "Will OpenAI IPO by December 31 2026?"
+          → openai-2026
+    """
+    q = market.question
+
+    # Prefer year from question text; fall back to end_date year if absent
+    year_m = _YEAR_RE.search(q)
+    if year_m:
+        year = year_m.group(1)
+    else:
+        end_m = _end_month(market)      # e.g. "2026-06"
+        year = end_m[:4] if end_m else "unknown"
+
+    # 1. Fed / interest-rate meeting
+    if _FED_TRIGGER_RE.search(q):
+        month_m = _MONTH_RE.search(q)
+        if month_m:
+            return f"fed-{month_m.group(1).lower()}-{year}"
+        return f"fed-{year}"
+
+    # 2. US state + party (primary, senate, governor, etc.)
+    state_m = _US_STATE_RE.search(q)
+    party_m = _PARTY_RE.search(q)
+    if state_m and party_m:
+        state = re.sub(r"\s+", "-", state_m.group(1).lower())
+        raw_party = party_m.group(1).lower()
+        # "democrat" prefix covers "democrat", "democrats", "democratic"
+        party = "democrat" if "democrat" in raw_party else "republican"
+        return f"{state}-{party}-{year}"
+
+    # 3. Named non-US city / country
+    for place_re, place_slug in _NAMED_PLACES:
+        if place_re.search(q):
+            etype_m = _ELECTION_TYPE_RE.search(q)
+            if etype_m:
+                raw_etype = etype_m.group(1).lower()
+                # Normalise synonyms
+                etype = {
+                    "president": "presidential",
+                    "mayor":     "mayoral",
+                    "governor":  "gubernatorial",
+                }.get(raw_etype, raw_etype)
+            else:
+                etype = "event"
+            return f"{place_slug}-{etype}-{year}"
+
+    # 4. Named tech company
+    for company_re, company_slug in _NAMED_COMPANIES:
+        if company_re.search(q):
+            return f"{company_slug}-{year}"
+
+    # 5. Fallback: category + end_date month
+    end_month = _end_month(market)
+    base = market.category if market.category else "misc"
+    return f"{base}-{end_month}" if end_month else f"{base}-{year}"
+

@dataclass
 class Market:
@@ -55,3 +55,46 @@ CREATE INDEX IF NOT EXISTS idx_trades_timestamp ON trades(timestamp DESC);
 CREATE INDEX IF NOT EXISTS idx_trades_market ON trades(market_id);
 CREATE INDEX IF NOT EXISTS idx_metrics_timestamp ON metrics_daily(timestamp DESC);
 CREATE INDEX IF NOT EXISTS idx_signals_timestamp ON signals(timestamp DESC);
+
+-- ─────────────────────────────────────────────────────────────────────────────
+-- Phase 1 migrations: edge neto real
+--
+-- spread_estimate and commission are HEURISTICS, not exact Polymarket exchange
+-- costs.  spread_estimate ≈ estimated half-spread for medium-liquidity markets.
+-- commission = COMMISSION_RATE (0.02) * size_usdc — mirrors Polymarket taker fee.
+-- edge_net = edge_gross - spread_estimate - commission/size_usdc
+--          = edge_gross - 0.02 - 0.02   (always 0.04 deduction at current rates)
+--
+-- These are stored per-trade so we can audit whether the model's cost assumptions
+-- were met in practice once markets resolve.
+-- ─────────────────────────────────────────────────────────────────────────────
+ALTER TABLE trades ADD COLUMN IF NOT EXISTS edge_gross      DOUBLE PRECISION;
+ALTER TABLE trades ADD COLUMN IF NOT EXISTS edge_net        DOUBLE PRECISION;
+ALTER TABLE trades ADD COLUMN IF NOT EXISTS prior_prob      DOUBLE PRECISION;
+ALTER TABLE trades ADD COLUMN IF NOT EXISTS final_prob      DOUBLE PRECISION;
+ALTER TABLE trades ADD COLUMN IF NOT EXISTS mid_price       DOUBLE PRECISION;
+ALTER TABLE trades ADD COLUMN IF NOT EXISTS spread_estimate DOUBLE PRECISION;
+ALTER TABLE trades ADD COLUMN IF NOT EXISTS commission      DOUBLE PRECISION;
+ALTER TABLE trades ADD COLUMN IF NOT EXISTS family_key      TEXT;
+
+-- ─────────────────────────────────────────────────────────────────────────────
+-- Phase 2 / Phase 5 migrations: market families + observability
+--
+-- Signals table extended so each evaluated market carries its audit trail:
+--   skip_reason  — why the market was not traded ("edge_net", "family",
+--                  "gnews_priority", "regime", "prior_extreme", etc.)
+--   passed_gross — True if edge_gross alone met regime_min_edge
+--   passed_net   — True if edge_net met regime_min_edge (the actual gate)
+--   family_key   — market family slug (e.g. "texas-republican-2026")
+--   regime_min_edge — threshold that applied to this market/category
+-- ─────────────────────────────────────────────────────────────────────────────
+ALTER TABLE signals ADD COLUMN IF NOT EXISTS edge_gross      DOUBLE PRECISION;
+ALTER TABLE signals ADD COLUMN IF NOT EXISTS edge_net        DOUBLE PRECISION;
+ALTER TABLE signals ADD COLUMN IF NOT EXISTS family_key      TEXT;
+ALTER TABLE signals ADD COLUMN IF NOT EXISTS regime_min_edge DOUBLE PRECISION;
+ALTER TABLE signals ADD COLUMN IF NOT EXISTS skip_reason     TEXT;
+ALTER TABLE signals ADD COLUMN IF NOT EXISTS passed_gross    BOOLEAN;
+ALTER TABLE signals ADD COLUMN IF NOT EXISTS passed_net      BOOLEAN;
+
+CREATE INDEX IF NOT EXISTS idx_signals_market ON signals(market_id);
+CREATE INDEX IF NOT EXISTS idx_trades_family  ON trades(family_key);
@@ -6,7 +6,7 @@ All trades are logged to PostgreSQL for metrics analysis.
 """
 import logging
 import uuid
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from datetime import datetime, UTC
 from typing import Optional

@@ -15,7 +15,10 @@ from bot.data.db import Database

 log = logging.getLogger(__name__)

-POLYMARKET_FEE = 0.02  # 2% fee on each trade
+# Polymarket taker fee used for paper simulation.
+# Also stored as commission in each Trade for audit purposes.
+# NOTE: this is a heuristic — see COMMISSION_RATE in bayesian.py for context.
+POLYMARKET_FEE = 0.02  # 2%


@dataclass
@@ -32,11 +35,27 @@ class Trade:
    timestamp: datetime
    reasoning: str
    paper: bool = True
+    # ── Phase 1: edge neto audit fields ──────────────────────────────────────
+    # edge_gross: raw model edge before any cost deductions
+    # edge_net:   edge_gross - spread_estimate - commission/size_usdc
+    # Both are heuristic estimates — see schema.sql comment for details.
+    edge_gross: float = 0.0
+    edge_net: float = 0.0
+    prior_prob: float = 0.0      # market.yes_price clamped, before Bayesian update
+    final_prob: float = 0.0      # estimated probability after all signals
+    # mid_price: order-book midpoint when available; falls back to market.yes_price
+    mid_price: float = 0.0
+    spread_estimate: float = 0.02
+    commission: float = 0.0      # = POLYMARKET_FEE * size_usdc
+    # ── Phase 2: market family ────────────────────────────────────────────────
+    family_key: str = ""

    def __str__(self) -> str:
        return (
            f"[PAPER] {self.direction} {self.shares:.1f} shares @ {self.entry_price:.3f} "
-            f"= ${self.net_cost:.2f} (fee ${self.fee_usdc:.2f}) | {self.question[:40]}"
+            f"= ${self.net_cost:.2f} (fee ${self.fee_usdc:.2f}) "
+            f"edge_net={self.edge_net:+.3f} family={self.family_key} "
+            f"| {self.question[:40]}"
        )


@@ -102,6 +121,10 @@ class PaperExecutor:
        net_cost = order.size_usdc + fee
        shares = order.size_usdc / entry_price

+        # commission mirrors the heuristic COMMISSION_RATE applied in bayesian.py
+        # when computing edge_net.  Stored for audit: confirms cost assumption held.
+        commission = order.size_usdc * POLYMARKET_FEE  # = fee_usdc at current rate
+
        trade = Trade(
            id=str(uuid.uuid4()),
            market_id=order.market_id,
@@ -115,6 +138,16 @@ class PaperExecutor:
            timestamp=datetime.now(UTC),
            reasoning=order.reasoning,
            paper=True,
+            # Phase 1 audit fields
+            edge_gross=order.edge_gross,
+            edge_net=order.edge_net,
+            prior_prob=order.prior_prob,
+            final_prob=order.final_prob,
+            mid_price=order.mid_price,
+            spread_estimate=order.spread_estimate,
+            commission=commission,
+            # Phase 2 family
+            family_key=order.family_key,
        )

        # Update paper portfolio
@@ -1,17 +1,16 @@
 """
 Polymarket Trading Bot — Main Entry Point
-# ci-test: 2026-04-14
+# ci-test: 2026-04-16
 """
 import asyncio
 import logging
 import os
-from contextlib import asynccontextmanager
 from datetime import datetime, timezone

-from bot.data.polymarket import PolymarketClient
+from bot.data.polymarket import PolymarketClient, market_family_key
 from bot.data.external import ExternalDataClient
 from bot.data.news import NewsClient
-from bot.strategy.bayesian import BayesianStrategy
+from bot.strategy.bayesian import BayesianStrategy, gnews_priority
 from bot.risk.manager import RiskManager
 from bot.executor.paper import PaperExecutor
 from bot.metrics.tracker import MetricsTracker
@@ -34,65 +33,100 @@ async def run_trading_loop(
    risk: RiskManager,
    executor: PaperExecutor,
    metrics: MetricsTracker,
+    db: Database,
 ) -> None:
    """Main trading loop — runs every 60 seconds."""
    log.info("Trading loop started. PAPER_MODE=%s", PAPER_MODE)

    while True:
        try:
-            # 1. Fetch active crypto/finance markets
+            # 1. Fetch active markets (90-day window)
            markets = await poly.get_active_markets()
            log.info("Found %d active markets", len(markets))

-            # Sort: politics markets first (soonest-resolving → highest GNews priority),
-            # then all others.  This ensures the 5-query-per-cycle cap hits the most
-            # time-sensitive political markets before the budget runs out.
-            def _sort_key(m):
-                is_pol = m.category == "politics"
-                try:
-                    dt = datetime.fromisoformat(m.end_date.replace("Z", "+00:00"))
-                except Exception:
-                    dt = datetime(9999, 12, 31, tzinfo=timezone.utc)
-                return (0 if is_pol else 1, dt)
-
-            markets = sorted(markets, key=_sort_key)
-            for _m in markets:
-                log.info("  [market] %s | ends: %s | yes_price: %.3f",
-                         _m.question, _m.end_date, _m.yes_price)
-
            # 2. Get external signals
            ext_data = await external.get_all_signals()

-            # Reset per-cycle GNews counter so the limit applies fresh each cycle
+            # 3. Build occupied_families from the current open portfolio positions.
+            #    This prevents re-entering a family where we already hold a position.
+            #    We also pull from DB to survive pod restarts.
+            portfolio = executor.get_portfolio()
+            occupied_families: set[str] = set()
+            for market_id in portfolio.positions:
+                mkt = next((m for m in markets if m.id == market_id), None)
+                if mkt:
+                    occupied_families.add(market_family_key(mkt))
+            # Also seed from DB in case a family was traded in a prior cycle
+            # that isn't reflected in the current markets list
+            db_families = await db.get_open_families()
+            occupied_families |= db_families
+            if occupied_families:
+                log.info("Occupied families (from portfolio): %s", sorted(occupied_families))
+
+            # 4. Sort markets.
+            #    Politics: sort by gnews_priority DESC (highest-value markets get
+            #              GNews budget first — Phase 3).
+            #    Others:   sort by end_date ASC (soonest-resolving first).
+            def _sort_key(m):
+                try:
+                    dt = datetime.fromisoformat(m.end_date.replace("Z", "+00:00"))
+                except Exception:
+                    dt = datetime(9999, 12, 31, tzinfo=timezone.utc)
+                if m.category == "politics":
+                    priority = gnews_priority(m, strategy._news) if strategy._news else 0.0
+                    # Bucket 0 = politics, sort by priority DESC (negate for asc sort)
+                    return (0, -priority, dt)
+                return (1, 0.0, dt)
+
+            markets = sorted(markets, key=_sort_key)
+
+            for _m in markets:
+                log.info(
+                    "  [market] %-55s | cat=%-12s | family=%-28s | ends=%s | yes=%.3f",
+                    _m.question[:55], _m.category, market_family_key(_m),
+                    _m.end_date[:10] if _m.end_date else "?", _m.yes_price,
+                )
+
+            # Reset per-cycle GNews counter
            strategy.reset_cycle()

+            # 5. Evaluate each market
+            cycle_trades = 0
            for market in markets:
-                # 3. Estimate true probability
-                signal = await strategy.evaluate(market, ext_data)
+                # evaluate() returns None for all skips — reasons are logged internally
+                signal = await strategy.evaluate(market, ext_data, occupied_families)
                if signal is None:
                    continue

                log.info(
-                    "Signal: market=%s poly_price=%.3f our_estimate=%.3f confidence=%.2f",
+                    "Signal generated: market=%-50s | edge_gross=%+.3f | edge_net=%+.3f | "
+                    "regime_min=%.2f | family=%s | conf=%.2f",
                    market.question[:50],
-                    signal.polymarket_price,
-                    signal.estimated_prob,
+                    signal.edge_gross,
+                    signal.edge_net,
+                    signal.regime_min_edge,
+                    signal.family_key,
                    signal.confidence,
                )

-                # 4. Risk check + position sizing
-                order = risk.size_order(signal, executor.get_portfolio())
+                # 6. Risk check + position sizing
+                order = risk.size_order(signal, portfolio)
                if order is None:
                    log.debug("Risk manager rejected order for %s", market.id)
                    continue

-                # 5. Execute (paper or real)
+                # 7. Execute (paper)
                trade = await executor.execute(order)
                if trade:
                    await metrics.record_trade(trade)
                    log.info("Trade executed: %s", trade)
+                    # Block this family for the rest of the cycle (Phase 2)
+                    occupied_families.add(signal.family_key)
+                    cycle_trades += 1

-            # 6. Update daily metrics
+            log.info("Cycle complete — trades this cycle: %d", cycle_trades)
+
+            # 8. Update daily metrics
            await metrics.update_daily_summary()

        except Exception as e:
@@ -123,7 +157,6 @@ async def main() -> None:
    metrics = MetricsTracker(db=db)

    if executor is None:
-        # Import real executor only when explicitly needed
        from bot.executor.real import RealExecutor  # noqa
        executor = RealExecutor(db=db)

@@ -131,7 +164,7 @@ async def main() -> None:
        await executor.initialize()

    try:
-        await run_trading_loop(poly, external, strategy, risk, executor, metrics)
+        await run_trading_loop(poly, external, strategy, risk, executor, metrics, db)
    finally:
        await db.disconnect()
        await news.close()
@@ -45,6 +45,17 @@ class Order:
    signal_edge: float
    signal_confidence: float
    reasoning: str
+    # Phase 1 — edge neto audit fields (passed through from TradingSignal)
+    edge_gross: float = 0.0
+    edge_net: float = 0.0
+    prior_prob: float = 0.0
+    final_prob: float = 0.0
+    mid_price: float = 0.0
+    spread_estimate: float = 0.02
+    # Phase 2 — market family
+    family_key: str = ""
+    # Phase 4 — regime threshold applied
+    regime_min_edge: float = 0.10


 class RiskManager:
@@ -125,4 +136,15 @@ class RiskManager:
            signal_edge=signal.edge,
            signal_confidence=signal.confidence,
            reasoning=signal.reasoning,
+            # Phase 1 — pass audit fields through to executor
+            edge_gross=signal.edge_gross,
+            edge_net=signal.edge_net,
+            prior_prob=signal.prior_prob,
+            final_prob=signal.final_prob,
+            mid_price=signal.mid_price,
+            spread_estimate=signal.spread_estimate,
+            # Phase 2 — family
+            family_key=signal.family_key,
+            # Phase 4 — regime
+            regime_min_edge=signal.regime_min_edge,
        )
@@ -12,20 +12,39 @@ Polymarket might reflect in a slow-moving order book.
 """
 import logging
 import math
-from dataclasses import dataclass
-from typing import Optional
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from typing import Optional, TYPE_CHECKING

-from bot.data.polymarket import Market
+from bot.data.polymarket import Market, market_family_key
 from bot.data.external import ExternalSignals
-from bot.data.news import NewsClient
+
+if TYPE_CHECKING:
+    from bot.data.news import NewsClient

 log = logging.getLogger(__name__)

-# Minimum edge required to place a trade.
-# With an informed prior (poly price), 10% means our signals strongly disagree
-# with the market — much higher bar than before, but necessary to avoid noise.
-MIN_EDGE = 0.10  # 10% edge minimum
-MIN_CONFIDENCE = 0.55  # Minimum confidence in our estimate
+# ─────────────────────────────────────────────────────────────────────────────
+# Cost constants (Phase 1 — heuristics, not exact Polymarket exchange costs)
+# ─────────────────────────────────────────────────────────────────────────────
+# spread_estimate: approximate half-spread for medium-liquidity Polymarket
+#   markets.  Real spread varies by market and time; 0.02 is a conservative
+#   starting estimate.  Replace with live order-book data when available.
+SPREAD_ESTIMATE: float = 0.02
+
+# commission_rate: Polymarket taker fee approximation.  Current Polymarket fee
+#   is 0% on CLOB but was 2% historically; keeping 2% as a conservative buffer
+#   against future fee changes and exchange rate effects.
+COMMISSION_RATE: float = 0.02
+
+# Combined cost floor deducted from edge_gross to get edge_net.
+# edge_net = edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE
+TOTAL_COST_RATE: float = SPREAD_ESTIMATE + COMMISSION_RATE  # 0.04
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Other strategy constants
+# ─────────────────────────────────────────────────────────────────────────────
+MIN_CONFIDENCE = 0.55   # Minimum confidence to generate a signal

 # Log-odds weight applied to the GNews sentiment score (range ±1.0).
 # A weight of 1.5 means a fully negative/positive signal shifts log-odds by ±1.5,
@@ -37,17 +56,103 @@ NEWS_LOGODDS_WEIGHT = 1.5
 MAX_NEWS_QUERIES_PER_CYCLE = 5


+# ─────────────────────────────────────────────────────────────────────────────
+# Phase 4 — Regime-based minimum edge (uses edge_NET, not edge_gross)
+# ─────────────────────────────────────────────────────────────────────────────
+
+def _regime_min_edge(category: str, days_to_resolution: int) -> float:
+    """
+    Return the minimum edge_net required to execute a trade.
+
+    Thresholds are higher for far-future politics markets (less signal, more
+    noise) and lower for near-term politics (time pressure makes any edge
+    actionable).  Tech/crypto use a flat threshold.
+
+    category              | days_to_resolution | min_edge_net
+    ──────────────────────┼────────────────────┼─────────────
+    politics              | > 60 d             | 0.12
+    politics              | 30–60 d            | 0.10
+    politics              | < 30 d             | 0.08
+    tech / crypto/finance | any                | 0.10
+    other / unknown       | any                | 0.10
+    """
+    if category == "politics":
+        if days_to_resolution > 60:
+            return 0.12
+        if days_to_resolution > 30:
+            return 0.10
+        return 0.08
+    return 0.10  # tech, crypto/finance, events, default
+
+
+def _days_to_resolution(end_date: str) -> int:
+    """Return calendar days until market resolution, or 30 if unknown."""
+    if not end_date:
+        return 30  # conservative: treat as medium-term
+    try:
+        dt = datetime.fromisoformat(end_date.replace("Z", "+00:00"))
+        if dt.tzinfo is None:
+            dt = dt.replace(tzinfo=timezone.utc)
+        days = (dt - datetime.now(timezone.utc)).days
+        return max(0, days)
+    except (ValueError, TypeError):
+        return 30
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Phase 3 — GNews priority scoring
+# ─────────────────────────────────────────────────────────────────────────────
+
+def gnews_priority(market: Market, news: "NewsClient") -> float:
+    """
+    Score a market for GNews query priority (higher = more valuable to query).
+
+    Formula:  priority = uncertainty × volume_score × freshness
+
+      uncertainty  = 1 - |prior - 0.5| × 2   (1.0 at 50%, 0.0 at 0%/100%)
+      volume_score = min(volume_24h / 10_000, 1.0)
+      freshness    = NewsClient.get_freshness(question)
+                     (1.0 never queried → 0.10 queried <2h ago)
+
+    Markets with occupied families, or that have already been queried recently,
+    score lower and receive GNews budget only if capacity remains.
+    """
+    prior = max(0.05, min(0.95, market.yes_price))
+    uncertainty = 1.0 - abs(prior - 0.5) * 2
+    volume_score = min(market.volume_24h / 10_000, 1.0)
+    freshness = news.get_freshness(market.question)
+    return uncertainty * volume_score * freshness
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Signal and strategy classes
+# ─────────────────────────────────────────────────────────────────────────────
+
@dataclass
 class TradingSignal:
    market_id: str
    question: str
-    polymarket_price: float    # Current market price for YES (0-1)
-    estimated_prob: float      # Our Bayesian estimate (0-1)
-    edge: float                # estimated_prob - polymarket_price
-    confidence: float          # How confident we are (0-1)
-    direction: str             # "BUY_YES" | "BUY_NO"
-    reasoning: str             # Human-readable explanation for logging
-    sources: list[str]         # Data sources used
+    polymarket_price: float     # Current market price for YES (0-1)
+    estimated_prob: float       # Our Bayesian estimate (0-1)
+    edge: float                 # Kept for backward compat — equals edge_gross
+    confidence: float           # How confident we are (0-1)
+    direction: str              # "BUY_YES" | "BUY_NO"
+    reasoning: str              # Human-readable explanation for logging
+    sources: list[str]          # Data sources used
+    # ── Phase 1: edge neto ───────────────────────────────────────────────────
+    edge_gross: float = 0.0         # |estimated_prob - polymarket_price|
+    edge_net: float = 0.0           # edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE
+    prior_prob: float = 0.0         # market.yes_price clamped to [0.05, 0.95]
+    final_prob: float = 0.0         # estimated_prob (explicit alias)
+    # mid_price: (bid+ask)/2 from order book when available; falls back to
+    # market.yes_price.  Order-book fetching is a future enhancement — using
+    # yes_price here is conservative (already the ask side).
+    mid_price: float = 0.0
+    spread_estimate: float = SPREAD_ESTIMATE
+    # ── Phase 2: market families ─────────────────────────────────────────────
+    family_key: str = ""
+    # ── Phase 4: regime ──────────────────────────────────────────────────────
+    regime_min_edge: float = 0.10


 class BayesianStrategy:
@@ -59,34 +164,59 @@ class BayesianStrategy:
    - BTC/ETH price momentum
    - Fear & Greed index
    - Market cap trend / BTC dominance
-    We only bet when our signals move the estimate far enough from the prior
-    to justify the fee + slippage cost (MIN_EDGE).
+    - GNews sentiment (politics only, capped at MAX_NEWS_QUERIES_PER_CYCLE)
+
+    Execution gate (Phase 1 + 4):
+    - Compute edge_net = edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE
+    - Only trade when edge_net > regime_min_edge(category, days_to_resolution)
+
+    Family deduplication (Phase 2):
+    - At most 1 open position per market family per cycle.
+    - Caller passes occupied_families; this method skips and logs SKIP_FAMILY.
+
+    GNews prioritisation (Phase 3):
+    - Caller pre-sorts politics markets by gnews_priority() (desc) so the
+      highest-value markets consume the GNews budget first.
+    - Within evaluate(), the per-cycle cap is enforced.
    """

-    def __init__(self, news: Optional[NewsClient] = None) -> None:
+    def __init__(self, news: Optional["NewsClient"] = None) -> None:
        self._signal_count = 0
-        self._news = news  # Optional; degrades gracefully when None or key missing
+        self._news = news
        self._news_queries_this_cycle = 0

    def reset_cycle(self) -> None:
-        """Call once at the start of each trading cycle to reset the per-cycle GNews counter."""
+        """Call once at the start of each trading cycle to reset per-cycle counters."""
        self._news_queries_this_cycle = 0

    async def evaluate(
        self,
        market: Market,
        ext: ExternalSignals,
+        occupied_families: set[str],
    ) -> Optional[TradingSignal]:
        """
-        Evaluate a market and return a signal if edge exists.
-        Returns None if no actionable opportunity.
+        Evaluate a market and return a TradingSignal if actionable.
+
+        Returns None with a structured log line in all skip cases.
+        Skip reasons (Phase 5 observability):
+          SKIP_UNSUPPORTED  — category not supported
+          SKIP_NO_SIGNALS   — external data unavailable
+          SKIP_PRIOR_EXTREME — prior < 0.08 or > 0.92
+          SKIP_FAMILY        — family already has an open/pending position
+          SKIP_EDGE_NET      — edge_net < regime_min_edge
+          SKIP_CONFIDENCE    — confidence < MIN_CONFIDENCE
        """
        question_lower = market.question.lower()
-        category = market.category  # set by PolymarketClient
+        category = market.category

-        # Classify what kind of market this is
-        is_price_above = any(w in question_lower for w in ["above", "over", "exceed", "higher", "atleast", "reach"])
-        is_price_below = any(w in question_lower for w in ["below", "under", "less than", "lower", "drop"])
+        # ── Classify market type ─────────────────────────────────────────────
+        is_price_above = any(w in question_lower for w in [
+            "above", "over", "exceed", "higher", "atleast", "reach",
+        ])
+        is_price_below = any(w in question_lower for w in [
+            "below", "under", "less than", "lower", "drop",
+        ])

        is_btc = "btc" in question_lower or "bitcoin" in question_lower
        is_eth = "eth" in question_lower or "ethereum" in question_lower
@@ -100,7 +230,9 @@ class BayesianStrategy:
            w in question_lower for w in ["crypto", "market cap", "total market", "altcoin", "defi"]
        )
        is_macro = any(
-            w in question_lower for w in ["nasdaq", "s&p", "sp500", "inflation", "fed rate", "interest rate", "tariff"]
+            w in question_lower for w in [
+                "nasdaq", "s&p", "sp500", "inflation", "fed rate", "interest rate", "tariff",
+            ]
        )
        is_politics = category == "politics"
        is_tech = category == "tech"
@@ -112,45 +244,52 @@ class BayesianStrategy:
        )
        if not is_any_supported:
            log.info(
-                "SKIP  %-50s | reason=unsupported category=%r",
+                "SKIP_UNSUPPORTED  %-50s | cat=%r",
                market.question[:50], category,
            )
            return None

        if not ext.valid:
            log.info(
-                "SKIP  %-50s | reason=no external signals",
+                "SKIP_NO_SIGNALS   %-50s | reason=external data unavailable",
                market.question[:50],
            )
-            return None  # Can't reason without external data
+            return None

-        # --- Bayesian probability estimation ---
-        # Prior = Polymarket consensus price, clamped away from extremes.
-        # The market already aggregates information from many traders;
-        # our signals update from that informed baseline, not from 0.5.
+        # ── Phase 1: prior + prior-extreme filter ────────────────────────────
        prior = max(0.05, min(0.95, market.yes_price))

-        # Skip markets where the crowd has already reached near-certainty.
-        # Below 0.08 or above 0.92 there is not enough room for our signals
-        # to generate MIN_EDGE — any trade would be fighting near-certain consensus.
        if market.yes_price < 0.08:
            log.info(
-                "SKIP  %-50s | cat=%-12s | prior=%.3f | reason=prior too low, market already certain",
+                "SKIP_PRIOR_EXTREME %-50s | cat=%-12s | prior=%.3f | reason=prior<0.08",
                market.question[:50], category, market.yes_price,
            )
            return None
        if market.yes_price > 0.92:
            log.info(
-                "SKIP  %-50s | cat=%-12s | prior=%.3f | reason=prior too high, market already certain",
+                "SKIP_PRIOR_EXTREME %-50s | cat=%-12s | prior=%.3f | reason=prior>0.92",
                market.question[:50], category, market.yes_price,
            )
            return None

+        # ── Phase 2: family deduplication ────────────────────────────────────
+        family = market_family_key(market)
+        if family in occupied_families:
+            log.info(
+                "SKIP_FAMILY        %-50s | cat=%-12s | family=%s",
+                market.question[:50], category, family,
+            )
+            return None
+
+        # ── Phase 4: regime min-edge ─────────────────────────────────────────
+        days = _days_to_resolution(market.end_date)
+        regime_min = _regime_min_edge(category, days)
+
+        # ── Bayesian probability estimation ──────────────────────────────────
        sources: list[str] = [f"Prior=poly({prior:.3f})"]
        adjustments: list[float] = []

-        # Signal 1: Price momentum (asset-specific or total market cap as proxy)
-        # For politics/tech/events use BTC as a broad sentiment proxy.
+        # Signal 1: price momentum (asset-specific or BTC as sentiment proxy)
        if is_btc:
            momentum = ext.btc_change_24h
            asset_label = "BTC"
@@ -158,17 +297,14 @@ class BayesianStrategy:
            momentum = ext.eth_change_24h
            asset_label = "ETH"
        elif is_politics or is_tech or is_events:
-            # BTC as risk-sentiment proxy for non-crypto categories
            momentum = ext.btc_change_24h
            asset_label = "BTC(sentiment)"
        else:
-            # Altcoins and general crypto: use total market cap change as proxy
            momentum = ext.total_market_cap_change
            asset_label = "total mktcap"

        if abs(momentum) > 2:
-            momentum_adj = math.tanh(momentum / 20) * 0.15  # Max ±15%
-            # For non-directional markets (politics/events/tech), momentum is weaker signal
+            momentum_adj = math.tanh(momentum / 20) * 0.15
            if is_politics or is_tech or is_events:
                momentum_adj *= 0.5
            adjustments.append(momentum_adj if is_price_above else -momentum_adj)
@@ -185,26 +321,19 @@ class BayesianStrategy:
        else:
            fg_adj = (fg - 50) / 50 * 0.04
            sources.append(f"Fear&Greed: {fg} (neutral)")
-
        adjustments.append(fg_adj if is_price_above else -fg_adj)

        # Signal 3: BTC dominance — hurts altcoins when high
        if (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance > 55:
-            dom_adj = -0.03 if is_price_above else 0.03
-            adjustments.append(dom_adj)
+            adjustments.append(-0.03 if is_price_above else 0.03)
            sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (high → alt pressure)")
        elif (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance < 45:
-            dom_adj = 0.03 if is_price_above else -0.03
-            adjustments.append(dom_adj)
+            adjustments.append(0.03 if is_price_above else -0.03)
            sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (low → alt season)")

-        # Signal 4: GNews sentiment — politics markets only.
-        # BTC/F&G already cover crypto and macro; GNews budget is too tight to
-        # waste on tech/events.  Cap at MAX_NEWS_QUERIES_PER_CYCLE per cycle so
-        # we prioritise the soonest-resolving markets (caller sorts by end_date).
-        # Applied as a direct log-odds shift — stronger signal than macro proxies.
-        # Weight NEWS_LOGODDS_WEIGHT=1.5 means a ±1.0 sentiment score shifts
-        # log-odds by ±1.5 (e.g. 50% prior → ~82% / ~18%).
+        # Signal 4: GNews sentiment (politics only, budget-gated)
+        # Phase 3: caller has pre-sorted markets by gnews_priority() so the
+        # highest-value markets reach this block first.
        news_log_adj = 0.0
        if is_politics and self._news is not None:
            if self._news_queries_this_cycle < MAX_NEWS_QUERIES_PER_CYCLE:
@@ -214,72 +343,81 @@ class BayesianStrategy:
                    news_log_adj = sentiment * NEWS_LOGODDS_WEIGHT
                    sources.append(f"GNews: {sentiment:+.2f}")
            else:
-                log.debug(
-                    "GNews cycle limit (%d) reached — skipping news for %r",
-                    MAX_NEWS_QUERIES_PER_CYCLE, market.question[:50],
+                log.info(
+                    "SKIP_GNEWS_PRIORITY %-50s | reason=cycle budget %d reached",
+                    market.question[:50], MAX_NEWS_QUERIES_PER_CYCLE,
                )

-        # Macro/politics/tech/events: cap confidence lower to reflect weaker signal quality
-        if is_macro or is_politics or is_tech or is_events:
-            confidence_cap = 0.65
-        else:
-            confidence_cap = 0.90
+        # Confidence cap: macro/politics/tech signals are weaker proxies
+        confidence_cap = 0.65 if (is_macro or is_politics or is_tech or is_events) else 0.90

-        # Compute posterior using log-odds updating.
-        # total_adj (BTC/F&G/dominance) is amplified ×2 because those are weak proxies.
-        # news_log_adj is applied at face value — it IS a direct log-odds signal.
+        # Posterior via log-odds updating
        log_odds_prior = math.log(prior / (1 - prior))
        total_adj = sum(adjustments)
        estimated_prob = _sigmoid(log_odds_prior + total_adj * 2 + news_log_adj)
        estimated_prob = max(0.05, min(0.95, estimated_prob))

-        # Compute edge
-        edge = estimated_prob - market.yes_price
-        direction = "BUY_YES" if edge > 0 else "BUY_NO"
-        abs_edge = abs(edge)
+        # ── Phase 1: edge_gross and edge_net ─────────────────────────────────
+        raw_edge = estimated_prob - market.yes_price
+        direction = "BUY_YES" if raw_edge > 0 else "BUY_NO"
+        edge_gross = abs(raw_edge)
+        # NOTE: commission/size_usdc = COMMISSION_RATE always (constant fraction).
+        edge_net = edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE
+        # mid_price falls back to yes_price; live order-book data is a future enhancement
+        mid_price = market.yes_price

        # Confidence based on signal agreement
        agreement = sum(1 for a in adjustments if (a > 0) == (total_adj > 0))
        confidence = min(confidence_cap, 0.4 + (agreement / max(len(adjustments), 1)) * 0.5)
-        # News signal available → boost confidence by 0.10 (news corroborates macro signals)
        if news_log_adj != 0.0:
            confidence = min(confidence_cap, confidence + 0.10)

-        # Log evaluation result for every market
-        action = "TRADE" if (abs_edge >= MIN_EDGE and confidence >= MIN_CONFIDENCE) else "SKIP"
-        skip_reason = ""
-        if action == "SKIP":
-            reasons = []
-            if abs_edge < MIN_EDGE:
-                reasons.append(f"edge={abs_edge:.3f}<{MIN_EDGE}")
+        # ── Phase 5: structured audit log ────────────────────────────────────
+        passed_gross = edge_gross >= regime_min
+        passed_net = edge_net >= regime_min
+        can_trade = passed_net and confidence >= MIN_CONFIDENCE
+
+        if not can_trade:
+            skip_parts: list[str] = []
+            if not passed_gross:
+                skip_parts.append(f"edge_gross={edge_gross:.3f}<{regime_min:.2f}(regime)")
+            elif not passed_net:
+                skip_parts.append(
+                    f"edge_net={edge_net:.3f}<{regime_min:.2f}(regime) "
+                    f"[gross={edge_gross:.3f} pass]"
+                )
            if confidence < MIN_CONFIDENCE:
-                reasons.append(f"conf={confidence:.2f}<{MIN_CONFIDENCE}")
-            skip_reason = " | reason=" + ",".join(reasons)
-
-        log.info(
-            "%-5s %-50s | cat=%-12s | prior=%.3f | est=%.3f | edge=%+.3f | conf=%.2f | dir=%-8s | signals=%s%s",
-            action,
-            market.question[:50],
-            category,
-            prior,
-            estimated_prob,
-            edge,
-            confidence,
-            direction,
-            ", ".join(sources[1:]) or "none",
-            skip_reason,
-        )
-
-        # Filter: only trade if edge and confidence thresholds met
-        if abs_edge < MIN_EDGE or confidence < MIN_CONFIDENCE:
+                skip_parts.append(f"conf={confidence:.2f}<{MIN_CONFIDENCE}")
+            log.info(
+                "SKIP_EDGE_NET      %-50s | cat=%-12s | family=%-28s | "
+                "prior=%.3f | est=%.3f | gross=%+.3f | net=%+.3f | "
+                "regime=%.2f | days=%d | conf=%.2f | signals=%s | %s",
+                market.question[:50], category, family,
+                prior, estimated_prob, edge_gross, edge_net,
+                regime_min, days, confidence,
+                ", ".join(sources[1:]) or "none",
+                " | ".join(skip_parts),
+            )
            return None

        reasoning = (
            f"Prior=poly({prior:.3f}) → estimate={estimated_prob:.3f} | "
            f"Poly price={market.yes_price:.3f} | "
-            f"Edge={edge:+.3f} | "
+            f"edge_gross={edge_gross:+.3f} | edge_net={edge_net:+.3f} | "
+            f"regime_min={regime_min:.2f} | days={days} | "
+            f"family={family} | "
            f"Direction={direction} | "
-            f"Signals: {', '.join(sources[1:])}"  # skip the prior label already shown
+            f"Signals: {', '.join(sources[1:])}"
+        )
+
+        log.info(
+            "TRADE              %-50s | cat=%-12s | family=%-28s | "
+            "prior=%.3f | est=%.3f | gross=%+.3f | net=%+.3f | "
+            "regime=%.2f | days=%d | conf=%.2f | dir=%-8s | signals=%s",
+            market.question[:50], category, family,
+            prior, estimated_prob, edge_gross, edge_net,
+            regime_min, days, confidence, direction,
+            ", ".join(sources[1:]) or "none",
        )

        self._signal_count += 1
@@ -288,11 +426,22 @@ class BayesianStrategy:
            question=market.question,
            polymarket_price=market.yes_price,
            estimated_prob=estimated_prob,
-            edge=abs_edge,
+            edge=edge_gross,             # backward compat — same as edge_gross
            confidence=confidence,
            direction=direction,
            reasoning=reasoning,
            sources=sources,
+            # Phase 1 new fields
+            edge_gross=edge_gross,
+            edge_net=edge_net,
+            prior_prob=prior,
+            final_prob=estimated_prob,
+            mid_price=mid_price,
+            spread_estimate=SPREAD_ESTIMATE,
+            # Phase 2 new fields
+            family_key=family,
+            # Phase 4 new fields
+            regime_min_edge=regime_min,
        )