feat(bot): 5-phase strategy upgrade — edge neto, families, GNews priority, regimes

Phase 1 — Edge neto real (paper.py, bayesian.py, risk/manager.py, db.py): - Trade records now store edge_gross, edge_net, prior_prob, final_prob, mid_price, spread_estimate, commission, family_key - edge_net = edge_gross - SPREAD_ESTIMATE(0.02) - COMMISSION_RATE(0.02) NOTE: both constants are heuristics, not exact Polymarket exchange costs - Execution gate changed from edge_gross > MIN_EDGE to edge_net > regime_min_edge Phase 2 — Market families (polymarket.py): - market_family_key(market) groups related markets: texas-republican-2026, fed-april-2026, openai-2026, etc. - At most 1 trade per family per cycle; occupied_families propagated via main.py - Family key logged on every TRADE and SKIP line Phase 3 — GNews priority (news.py, bayesian.py, main.py): - NewsClient.get_freshness() returns 1.0/0.75/0.40/0.10 by cache age - gnews_priority(market, news) = uncertainty × volume_score × freshness - Politics markets sorted by priority DESC before eval so best markets get the 5-query/cycle GNews budget first Phase 4 — Regime min-edge by category/horizon (bayesian.py): - politics >60d → 0.12, 30-60d → 0.10, <30d → 0.08 - tech / crypto/finance → 0.10 - All thresholds applied to edge_net (not edge_gross) Phase 5 — Observability (bayesian.py, main.py): - Structured skip labels: SKIP_UNSUPPORTED, SKIP_NO_SIGNALS, SKIP_PRIOR_EXTREME, SKIP_FAMILY, SKIP_GNEWS_PRIORITY, SKIP_EDGE_NET - TRADE lines now include family_key, edge_gross, edge_net, regime_min, days - schema.sql: 8 new cols on trades, 7 new cols on signals (via ALTER TABLE IF NOT EXISTS) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-16 15:34:46 +00:00
parent a0cbdc0256
commit 63d9f637ff
8 changed files with 620 additions and 141 deletions
@@ -33,13 +33,21 @@ class Database:
            await conn.execute("""
                INSERT INTO trades (
                    id, market_id, question, direction, size_usdc,
-                    entry_price, shares, fee_usdc, net_cost, timestamp, reasoning, paper
+                    entry_price, shares, fee_usdc, net_cost, timestamp, reasoning, paper,
-                ) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12)
+                    edge_gross, edge_net, prior_prob, final_prob,
                    mid_price, spread_estimate, commission, family_key
                ) VALUES (
                    $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,
                    $13,$14,$15,$16,$17,$18,$19,$20
                )
                ON CONFLICT (id) DO NOTHING
            """,
                trade.id, trade.market_id, trade.question, trade.direction,
                trade.size_usdc, trade.entry_price, trade.shares, trade.fee_usdc,
                trade.net_cost, trade.timestamp, trade.reasoning, trade.paper,
                # Phase 1 fields
                trade.edge_gross, trade.edge_net, trade.prior_prob, trade.final_prob,
                trade.mid_price, trade.spread_estimate, trade.commission, trade.family_key,
            )
    async def save_daily_metrics(self, metrics: dict) -> None:
@@ -69,6 +77,18 @@ class Database:
            )
            return {r["market_id"]: float(r["total"]) for r in rows}
    async def get_open_families(self) -> set[str]:
        """Return the set of family_key values from all open positions.
        Used at startup to rebuild occupied_families from DB state so the
        family-deduplication logic survives pod restarts.
        """
        async with self._pool.acquire() as conn:
            rows = await conn.fetch(
                "SELECT DISTINCT family_key FROM trades WHERE family_key IS NOT NULL"
            )
            return {r["family_key"] for r in rows if r["family_key"]}
    async def get_recent_trades(self, limit: int = 100) -> list[dict]:
        async with self._pool.acquire() as conn:
            rows = await conn.fetch(
@@ -155,6 +155,32 @@ class NewsClient:
    async def close(self) -> None:
        await self._client.aclose()
    def get_freshness(self, question: str) -> float:
        """
        Return a freshness score [0.1, 1.0] for GNews priority calculation.
        Score interpretation:
          1.00 — never queried (maximum priority for GNews budget)
          0.75 — last queried >6 h ago (cache expired, worth re-querying)
          0.40 — queried 2–6 h ago (in-cache but moderately stale)
          0.10 — queried <2 h ago (cache very fresh, low re-query value)
        If the API key is absent, always returns 1.0 (key missing means the
        query will be skipped anyway; don't penalise the priority score).
        """
        if not self._api_key:
            return 1.0
        query = self._build_query(question)
        cached = self._cache.get(query.lower())
        if cached is None:
            return 1.0
        age_seconds = time.monotonic() - cached[0]
        if age_seconds > 6 * 3600:
            return 0.75
        if age_seconds > 2 * 3600:
            return 0.40
        return 0.10
    # ------------------------------------------------------------------
    # Internal helpers
    # ------------------------------------------------------------------
@@ -5,6 +5,7 @@ Docs: https://docs.polymarket.com
 import asyncio
 import logging
 import os
 import re
 from dataclasses import dataclass, field
 from datetime import datetime, timezone, timedelta
 from typing import Optional
@@ -15,6 +16,158 @@ log = logging.getLogger(__name__)
 POLYMARKET_API = "https://clob.polymarket.com"
 GAMMA_API = "https://gamma-api.polymarket.com"
 # ─────────────────────────────────────────────────────────────────────────────
 # Phase 2 — Market family classification helpers
 # Used by market_family_key() below.
 # ─────────────────────────────────────────────────────────────────────────────
 _YEAR_RE = re.compile(r"\b(202\d|203\d)\b")
 _MONTH_RE = re.compile(
    r"\b(january|february|march|april|may|june|july|august|"
    r"september|october|november|december)\b",
    re.IGNORECASE,
 )
 _FED_TRIGGER_RE = re.compile(
    r"\b(federal reserve|interest rate|bps|basis point|fed\s+(rate|meeting|decision))",
    re.IGNORECASE,
 )
 _US_STATE_RE = re.compile(
    r"\b(Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|"
    r"Delaware|Florida|Georgia|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|"
    r"Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|"
    r"Mississippi|Missouri|Montana|Nebraska|Nevada|New\s+Hampshire|"
    r"New\s+Jersey|New\s+Mexico|New\s+York|North\s+Carolina|North\s+Dakota|"
    r"Ohio|Oklahoma|Oregon|Pennsylvania|Rhode\s+Island|South\s+Carolina|"
    r"South\s+Dakota|Tennessee|Texas|Utah|Vermont|Virginia|Washington|"
    r"West\s+Virginia|Wisconsin|Wyoming)\b",
    re.IGNORECASE,
 )
 _PARTY_RE = re.compile(r"\b(Republican|Democrats?|Democratic|GOP)\b", re.IGNORECASE)
 _ELECTION_TYPE_RE = re.compile(
    r"\b(presidential|president|mayoral|mayor|gubernatorial|governor|"
    r"senate|congress(?:ional)?|primary|election)\b",
    re.IGNORECASE,
 )
 # Ordered list of (pattern, place_slug) for named non-US locations.
 # Checked after US-state patterns so US city/state names don't shadow these.
 _NAMED_PLACES: list[tuple[re.Pattern, str]] = [
    (re.compile(r"\bColomb", re.IGNORECASE),            "colombia"),
    (re.compile(r"\bSeoul\b", re.IGNORECASE),           "seoul"),
    (re.compile(r"\bBusan\b", re.IGNORECASE),           "busan"),
    (re.compile(r"\bGyeonggi\b", re.IGNORECASE),        "gyeonggi"),
    (re.compile(r"\bChungcheong", re.IGNORECASE),       "chungcheong"),
    (re.compile(r"\bSouth\s+Korean?\b", re.IGNORECASE), "south-korea"),
    (re.compile(r"\bLos\s+Angeles\b", re.IGNORECASE),   "los-angeles"),
    (re.compile(r"\bCuba\b", re.IGNORECASE),            "cuba"),
    (re.compile(r"\bLebanon\b", re.IGNORECASE),         "lebanon"),
    (re.compile(r"\bIsrael\b", re.IGNORECASE),          "israel"),
    (re.compile(r"\bUkraine\b", re.IGNORECASE),         "ukraine"),
    (re.compile(r"\bRussia\b", re.IGNORECASE),          "russia"),
 ]
 # Ordered list of (pattern, company_slug) for tech/company markets.
 _NAMED_COMPANIES: list[tuple[re.Pattern, str]] = [
    (re.compile(r"\bopenai\b", re.IGNORECASE),     "openai"),
    (re.compile(r"\banthropic\b", re.IGNORECASE),  "anthropic"),
    (re.compile(r"\bnvidia\b", re.IGNORECASE),     "nvidia"),
    (re.compile(r"\bapple\b", re.IGNORECASE),      "apple"),
    (re.compile(r"\bmicrosoft\b", re.IGNORECASE),  "microsoft"),
    (re.compile(r"\bgoogle\b", re.IGNORECASE),     "google"),
    (re.compile(r"\btesla\b", re.IGNORECASE),      "tesla"),
    # \bmeta\b does NOT match MetaMask (no word boundary mid-compound-word)
    (re.compile(r"\bmeta\b", re.IGNORECASE),       "meta"),
 ]
 def _end_month(market: "Market") -> str:
    """Return market end_date formatted as YYYY-MM, or '' if unparseable."""
    raw = market.end_date
    if not raw:
        return ""
    try:
        dt = datetime.fromisoformat(raw.replace("Z", "+00:00"))
        return dt.strftime("%Y-%m")
    except (ValueError, TypeError):
        return ""
 def market_family_key(market: "Market") -> str:
    """
    Return a stable slug that groups related markets together.
    Markets in the same family share an underlying event (same election,
    same Fed meeting decision, same company).  The bot allows at most one
    open position per family per cycle to avoid correlated exposure.
    Priority order (first match wins):
      1. Fed / interest-rate decision  →  fed-{month}-{year}
      2. US state + party election      →  {state}-{party}-{year}
      3. Named non-US city/country      →  {place}-{event_type}-{year}
      4. Named tech company             →  {company}-{year}
      5. Fallback                       →  {category}-{end_YYYY-MM}
    Examples:
      "Will Ken Paxton win the 2026 Texas Republican Primary"
          → texas-republican-2026
      "Will the Fed decrease rates by 25 bps after April 2026 meeting"
          → fed-april-2026
      "Will OpenAI IPO by December 31 2026?"
          → openai-2026
    """
    q = market.question
    # Prefer year from question text; fall back to end_date year if absent
    year_m = _YEAR_RE.search(q)
    if year_m:
        year = year_m.group(1)
    else:
        end_m = _end_month(market)      # e.g. "2026-06"
        year = end_m[:4] if end_m else "unknown"
    # 1. Fed / interest-rate meeting
    if _FED_TRIGGER_RE.search(q):
        month_m = _MONTH_RE.search(q)
        if month_m:
            return f"fed-{month_m.group(1).lower()}-{year}"
        return f"fed-{year}"
    # 2. US state + party (primary, senate, governor, etc.)
    state_m = _US_STATE_RE.search(q)
    party_m = _PARTY_RE.search(q)
    if state_m and party_m:
        state = re.sub(r"\s+", "-", state_m.group(1).lower())
        raw_party = party_m.group(1).lower()
        # "democrat" prefix covers "democrat", "democrats", "democratic"
        party = "democrat" if "democrat" in raw_party else "republican"
        return f"{state}-{party}-{year}"
    # 3. Named non-US city / country
    for place_re, place_slug in _NAMED_PLACES:
        if place_re.search(q):
            etype_m = _ELECTION_TYPE_RE.search(q)
            if etype_m:
                raw_etype = etype_m.group(1).lower()
                # Normalise synonyms
                etype = {
                    "president": "presidential",
                    "mayor":     "mayoral",
                    "governor":  "gubernatorial",
                }.get(raw_etype, raw_etype)
            else:
                etype = "event"
            return f"{place_slug}-{etype}-{year}"
    # 4. Named tech company
    for company_re, company_slug in _NAMED_COMPANIES:
        if company_re.search(q):
            return f"{company_slug}-{year}"
    # 5. Fallback: category + end_date month
    end_month = _end_month(market)
    base = market.category if market.category else "misc"
    return f"{base}-{end_month}" if end_month else f"{base}-{year}"
@dataclass
 class Market:
@@ -55,3 +55,46 @@ CREATE INDEX IF NOT EXISTS idx_trades_timestamp ON trades(timestamp DESC);
 CREATE INDEX IF NOT EXISTS idx_trades_market ON trades(market_id);
 CREATE INDEX IF NOT EXISTS idx_metrics_timestamp ON metrics_daily(timestamp DESC);
 CREATE INDEX IF NOT EXISTS idx_signals_timestamp ON signals(timestamp DESC);
 -- ─────────────────────────────────────────────────────────────────────────────
 -- Phase 1 migrations: edge neto real
 --
 -- spread_estimate and commission are HEURISTICS, not exact Polymarket exchange
 -- costs.  spread_estimate ≈ estimated half-spread for medium-liquidity markets.
 -- commission = COMMISSION_RATE (0.02) * size_usdc — mirrors Polymarket taker fee.
 -- edge_net = edge_gross - spread_estimate - commission/size_usdc
 --          = edge_gross - 0.02 - 0.02   (always 0.04 deduction at current rates)
 --
 -- These are stored per-trade so we can audit whether the model's cost assumptions
 -- were met in practice once markets resolve.
 -- ─────────────────────────────────────────────────────────────────────────────
 ALTER TABLE trades ADD COLUMN IF NOT EXISTS edge_gross      DOUBLE PRECISION;
 ALTER TABLE trades ADD COLUMN IF NOT EXISTS edge_net        DOUBLE PRECISION;
 ALTER TABLE trades ADD COLUMN IF NOT EXISTS prior_prob      DOUBLE PRECISION;
 ALTER TABLE trades ADD COLUMN IF NOT EXISTS final_prob      DOUBLE PRECISION;
 ALTER TABLE trades ADD COLUMN IF NOT EXISTS mid_price       DOUBLE PRECISION;
 ALTER TABLE trades ADD COLUMN IF NOT EXISTS spread_estimate DOUBLE PRECISION;
 ALTER TABLE trades ADD COLUMN IF NOT EXISTS commission      DOUBLE PRECISION;
 ALTER TABLE trades ADD COLUMN IF NOT EXISTS family_key      TEXT;
 -- ─────────────────────────────────────────────────────────────────────────────
 -- Phase 2 / Phase 5 migrations: market families + observability
 --
 -- Signals table extended so each evaluated market carries its audit trail:
 --   skip_reason  — why the market was not traded ("edge_net", "family",
 --                  "gnews_priority", "regime", "prior_extreme", etc.)
 --   passed_gross — True if edge_gross alone met regime_min_edge
 --   passed_net   — True if edge_net met regime_min_edge (the actual gate)
 --   family_key   — market family slug (e.g. "texas-republican-2026")
 --   regime_min_edge — threshold that applied to this market/category
 -- ─────────────────────────────────────────────────────────────────────────────
 ALTER TABLE signals ADD COLUMN IF NOT EXISTS edge_gross      DOUBLE PRECISION;
 ALTER TABLE signals ADD COLUMN IF NOT EXISTS edge_net        DOUBLE PRECISION;
 ALTER TABLE signals ADD COLUMN IF NOT EXISTS family_key      TEXT;
 ALTER TABLE signals ADD COLUMN IF NOT EXISTS regime_min_edge DOUBLE PRECISION;
 ALTER TABLE signals ADD COLUMN IF NOT EXISTS skip_reason     TEXT;
 ALTER TABLE signals ADD COLUMN IF NOT EXISTS passed_gross    BOOLEAN;
 ALTER TABLE signals ADD COLUMN IF NOT EXISTS passed_net      BOOLEAN;
 CREATE INDEX IF NOT EXISTS idx_signals_market ON signals(market_id);
 CREATE INDEX IF NOT EXISTS idx_trades_family  ON trades(family_key);
@@ -6,7 +6,7 @@ All trades are logged to PostgreSQL for metrics analysis.
 """
 import logging
 import uuid
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from datetime import datetime, UTC
 from typing import Optional
@@ -15,7 +15,10 @@ from bot.data.db import Database
 log = logging.getLogger(__name__)
-POLYMARKET_FEE = 0.02  # 2% fee on each trade
+# Polymarket taker fee used for paper simulation.
 # Also stored as commission in each Trade for audit purposes.
 # NOTE: this is a heuristic — see COMMISSION_RATE in bayesian.py for context.
 POLYMARKET_FEE = 0.02  # 2%
@dataclass
@@ -32,11 +35,27 @@ class Trade:
    timestamp: datetime
    reasoning: str
    paper: bool = True
    # ── Phase 1: edge neto audit fields ──────────────────────────────────────
    # edge_gross: raw model edge before any cost deductions
    # edge_net:   edge_gross - spread_estimate - commission/size_usdc
    # Both are heuristic estimates — see schema.sql comment for details.
    edge_gross: float = 0.0
    edge_net: float = 0.0
    prior_prob: float = 0.0      # market.yes_price clamped, before Bayesian update
    final_prob: float = 0.0      # estimated probability after all signals
    # mid_price: order-book midpoint when available; falls back to market.yes_price
    mid_price: float = 0.0
    spread_estimate: float = 0.02
    commission: float = 0.0      # = POLYMARKET_FEE * size_usdc
    # ── Phase 2: market family ────────────────────────────────────────────────
    family_key: str = ""
    def __str__(self) -> str:
        return (
            f"[PAPER] {self.direction} {self.shares:.1f} shares @ {self.entry_price:.3f} "
-            f"= ${self.net_cost:.2f} (fee ${self.fee_usdc:.2f}) | {self.question[:40]}"
+            f"= ${self.net_cost:.2f} (fee ${self.fee_usdc:.2f}) "
            f"edge_net={self.edge_net:+.3f} family={self.family_key} "
            f"| {self.question[:40]}"
        )
@@ -102,6 +121,10 @@ class PaperExecutor:
        net_cost = order.size_usdc + fee
        shares = order.size_usdc / entry_price
        # commission mirrors the heuristic COMMISSION_RATE applied in bayesian.py
        # when computing edge_net.  Stored for audit: confirms cost assumption held.
        commission = order.size_usdc * POLYMARKET_FEE  # = fee_usdc at current rate
        trade = Trade(
            id=str(uuid.uuid4()),
            market_id=order.market_id,
@@ -115,6 +138,16 @@ class PaperExecutor:
            timestamp=datetime.now(UTC),
            reasoning=order.reasoning,
            paper=True,
            # Phase 1 audit fields
            edge_gross=order.edge_gross,
            edge_net=order.edge_net,
            prior_prob=order.prior_prob,
            final_prob=order.final_prob,
            mid_price=order.mid_price,
            spread_estimate=order.spread_estimate,
            commission=commission,
            # Phase 2 family
            family_key=order.family_key,
        )
        # Update paper portfolio
@@ -1,17 +1,16 @@
 """
 Polymarket Trading Bot — Main Entry Point
-# ci-test: 2026-04-14
+# ci-test: 2026-04-16
 """
 import asyncio
 import logging
 import os
 from contextlib import asynccontextmanager
 from datetime import datetime, timezone
-from bot.data.polymarket import PolymarketClient
+from bot.data.polymarket import PolymarketClient, market_family_key
 from bot.data.external import ExternalDataClient
 from bot.data.news import NewsClient
-from bot.strategy.bayesian import BayesianStrategy
+from bot.strategy.bayesian import BayesianStrategy, gnews_priority
 from bot.risk.manager import RiskManager
 from bot.executor.paper import PaperExecutor
 from bot.metrics.tracker import MetricsTracker
@@ -34,65 +33,100 @@ async def run_trading_loop(
    risk: RiskManager,
    executor: PaperExecutor,
    metrics: MetricsTracker,
    db: Database,
 ) -> None:
    """Main trading loop — runs every 60 seconds."""
    log.info("Trading loop started. PAPER_MODE=%s", PAPER_MODE)
    while True:
        try:
-            # 1. Fetch active crypto/finance markets
+            # 1. Fetch active markets (90-day window)
            markets = await poly.get_active_markets()
            log.info("Found %d active markets", len(markets))
            # Sort: politics markets first (soonest-resolving → highest GNews priority),
            # then all others.  This ensures the 5-query-per-cycle cap hits the most
            # time-sensitive political markets before the budget runs out.
            def _sort_key(m):
                is_pol = m.category == "politics"
                try:
                    dt = datetime.fromisoformat(m.end_date.replace("Z", "+00:00"))
                except Exception:
                    dt = datetime(9999, 12, 31, tzinfo=timezone.utc)
                return (0 if is_pol else 1, dt)
            markets = sorted(markets, key=_sort_key)
            for _m in markets:
                log.info("  [market] %s | ends: %s | yes_price: %.3f",
                         _m.question, _m.end_date, _m.yes_price)
            # 2. Get external signals
            ext_data = await external.get_all_signals()
-            # Reset per-cycle GNews counter so the limit applies fresh each cycle
+            # 3. Build occupied_families from the current open portfolio positions.
            #    This prevents re-entering a family where we already hold a position.
            #    We also pull from DB to survive pod restarts.
            portfolio = executor.get_portfolio()
            occupied_families: set[str] = set()
            for market_id in portfolio.positions:
                mkt = next((m for m in markets if m.id == market_id), None)
                if mkt:
                    occupied_families.add(market_family_key(mkt))
            # Also seed from DB in case a family was traded in a prior cycle
            # that isn't reflected in the current markets list
            db_families = await db.get_open_families()
            occupied_families |= db_families
            if occupied_families:
                log.info("Occupied families (from portfolio): %s", sorted(occupied_families))
            # 4. Sort markets.
            #    Politics: sort by gnews_priority DESC (highest-value markets get
            #              GNews budget first — Phase 3).
            #    Others:   sort by end_date ASC (soonest-resolving first).
            def _sort_key(m):
                try:
                    dt = datetime.fromisoformat(m.end_date.replace("Z", "+00:00"))
                except Exception:
                    dt = datetime(9999, 12, 31, tzinfo=timezone.utc)
                if m.category == "politics":
                    priority = gnews_priority(m, strategy._news) if strategy._news else 0.0
                    # Bucket 0 = politics, sort by priority DESC (negate for asc sort)
                    return (0, -priority, dt)
                return (1, 0.0, dt)
            markets = sorted(markets, key=_sort_key)
            for _m in markets:
                log.info(
                    "  [market] %-55s | cat=%-12s | family=%-28s | ends=%s | yes=%.3f",
                    _m.question[:55], _m.category, market_family_key(_m),
                    _m.end_date[:10] if _m.end_date else "?", _m.yes_price,
                )
            # Reset per-cycle GNews counter
            strategy.reset_cycle()
            # 5. Evaluate each market
            cycle_trades = 0
            for market in markets:
-                # 3. Estimate true probability
+                # evaluate() returns None for all skips — reasons are logged internally
-                signal = await strategy.evaluate(market, ext_data)
+                signal = await strategy.evaluate(market, ext_data, occupied_families)
                if signal is None:
                    continue
                log.info(
-                    "Signal: market=%s poly_price=%.3f our_estimate=%.3f confidence=%.2f",
+                    "Signal generated: market=%-50s | edge_gross=%+.3f | edge_net=%+.3f | "
                    "regime_min=%.2f | family=%s | conf=%.2f",
                    market.question[:50],
-                    signal.polymarket_price,
+                    signal.edge_gross,
-                    signal.estimated_prob,
+                    signal.edge_net,
                    signal.regime_min_edge,
                    signal.family_key,
                    signal.confidence,
                )
-                # 4. Risk check + position sizing
+                # 6. Risk check + position sizing
-                order = risk.size_order(signal, executor.get_portfolio())
+                order = risk.size_order(signal, portfolio)
                if order is None:
                    log.debug("Risk manager rejected order for %s", market.id)
                    continue
-                # 5. Execute (paper or real)
+                # 7. Execute (paper)
                trade = await executor.execute(order)
                if trade:
                    await metrics.record_trade(trade)
                    log.info("Trade executed: %s", trade)
                    # Block this family for the rest of the cycle (Phase 2)
                    occupied_families.add(signal.family_key)
                    cycle_trades += 1
-            # 6. Update daily metrics
+            log.info("Cycle complete — trades this cycle: %d", cycle_trades)
            # 8. Update daily metrics
            await metrics.update_daily_summary()
        except Exception as e:
@@ -123,7 +157,6 @@ async def main() -> None:
    metrics = MetricsTracker(db=db)
    if executor is None:
        # Import real executor only when explicitly needed
        from bot.executor.real import RealExecutor  # noqa
        executor = RealExecutor(db=db)
@@ -131,7 +164,7 @@ async def main() -> None:
        await executor.initialize()
    try:
-        await run_trading_loop(poly, external, strategy, risk, executor, metrics)
+        await run_trading_loop(poly, external, strategy, risk, executor, metrics, db)
    finally:
        await db.disconnect()
        await news.close()
@@ -45,6 +45,17 @@ class Order:
    signal_edge: float
    signal_confidence: float
    reasoning: str
    # Phase 1 — edge neto audit fields (passed through from TradingSignal)
    edge_gross: float = 0.0
    edge_net: float = 0.0
    prior_prob: float = 0.0
    final_prob: float = 0.0
    mid_price: float = 0.0
    spread_estimate: float = 0.02
    # Phase 2 — market family
    family_key: str = ""
    # Phase 4 — regime threshold applied
    regime_min_edge: float = 0.10
 class RiskManager:
@@ -125,4 +136,15 @@ class RiskManager:
            signal_edge=signal.edge,
            signal_confidence=signal.confidence,
            reasoning=signal.reasoning,
            # Phase 1 — pass audit fields through to executor
            edge_gross=signal.edge_gross,
            edge_net=signal.edge_net,
            prior_prob=signal.prior_prob,
            final_prob=signal.final_prob,
            mid_price=signal.mid_price,
            spread_estimate=signal.spread_estimate,
            # Phase 2 — family
            family_key=signal.family_key,
            # Phase 4 — regime
            regime_min_edge=signal.regime_min_edge,
        )
@@ -12,20 +12,39 @@ Polymarket might reflect in a slow-moving order book.
 """
 import logging
 import math
-from dataclasses import dataclass
+from dataclasses import dataclass, field
-from typing import Optional
+from datetime import datetime, timezone
 from typing import Optional, TYPE_CHECKING
-from bot.data.polymarket import Market
+from bot.data.polymarket import Market, market_family_key
 from bot.data.external import ExternalSignals
 if TYPE_CHECKING:
    from bot.data.news import NewsClient
 log = logging.getLogger(__name__)
-# Minimum edge required to place a trade.
+# ─────────────────────────────────────────────────────────────────────────────
-# With an informed prior (poly price), 10% means our signals strongly disagree
+# Cost constants (Phase 1 — heuristics, not exact Polymarket exchange costs)
-# with the market — much higher bar than before, but necessary to avoid noise.
+# ─────────────────────────────────────────────────────────────────────────────
-MIN_EDGE = 0.10  # 10% edge minimum
+# spread_estimate: approximate half-spread for medium-liquidity Polymarket
-MIN_CONFIDENCE = 0.55  # Minimum confidence in our estimate
+#   markets.  Real spread varies by market and time; 0.02 is a conservative
 #   starting estimate.  Replace with live order-book data when available.
 SPREAD_ESTIMATE: float = 0.02
 # commission_rate: Polymarket taker fee approximation.  Current Polymarket fee
 #   is 0% on CLOB but was 2% historically; keeping 2% as a conservative buffer
 #   against future fee changes and exchange rate effects.
 COMMISSION_RATE: float = 0.02
 # Combined cost floor deducted from edge_gross to get edge_net.
 # edge_net = edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE
 TOTAL_COST_RATE: float = SPREAD_ESTIMATE + COMMISSION_RATE  # 0.04
 # ─────────────────────────────────────────────────────────────────────────────
 # Other strategy constants
 # ─────────────────────────────────────────────────────────────────────────────
 MIN_CONFIDENCE = 0.55   # Minimum confidence to generate a signal
 # Log-odds weight applied to the GNews sentiment score (range ±1.0).
 # A weight of 1.5 means a fully negative/positive signal shifts log-odds by ±1.5,
@@ -37,17 +56,103 @@ NEWS_LOGODDS_WEIGHT = 1.5
 MAX_NEWS_QUERIES_PER_CYCLE = 5
 # ─────────────────────────────────────────────────────────────────────────────
 # Phase 4 — Regime-based minimum edge (uses edge_NET, not edge_gross)
 # ─────────────────────────────────────────────────────────────────────────────
 def _regime_min_edge(category: str, days_to_resolution: int) -> float:
    """
    Return the minimum edge_net required to execute a trade.
    Thresholds are higher for far-future politics markets (less signal, more
    noise) and lower for near-term politics (time pressure makes any edge
    actionable).  Tech/crypto use a flat threshold.
    category              | days_to_resolution | min_edge_net
    ──────────────────────┼────────────────────┼─────────────
    politics              | > 60 d             | 0.12
    politics              | 30–60 d            | 0.10
    politics              | < 30 d             | 0.08
    tech / crypto/finance | any                | 0.10
    other / unknown       | any                | 0.10
    """
    if category == "politics":
        if days_to_resolution > 60:
            return 0.12
        if days_to_resolution > 30:
            return 0.10
        return 0.08
    return 0.10  # tech, crypto/finance, events, default
 def _days_to_resolution(end_date: str) -> int:
    """Return calendar days until market resolution, or 30 if unknown."""
    if not end_date:
        return 30  # conservative: treat as medium-term
    try:
        dt = datetime.fromisoformat(end_date.replace("Z", "+00:00"))
        if dt.tzinfo is None:
            dt = dt.replace(tzinfo=timezone.utc)
        days = (dt - datetime.now(timezone.utc)).days
        return max(0, days)
    except (ValueError, TypeError):
        return 30
 # ─────────────────────────────────────────────────────────────────────────────
 # Phase 3 — GNews priority scoring
 # ─────────────────────────────────────────────────────────────────────────────
 def gnews_priority(market: Market, news: "NewsClient") -> float:
    """
    Score a market for GNews query priority (higher = more valuable to query).
    Formula:  priority = uncertainty × volume_score × freshness
      uncertainty  = 1 - |prior - 0.5| × 2   (1.0 at 50%, 0.0 at 0%/100%)
      volume_score = min(volume_24h / 10_000, 1.0)
      freshness    = NewsClient.get_freshness(question)
                     (1.0 never queried → 0.10 queried <2h ago)
    Markets with occupied families, or that have already been queried recently,
    score lower and receive GNews budget only if capacity remains.
    """
    prior = max(0.05, min(0.95, market.yes_price))
    uncertainty = 1.0 - abs(prior - 0.5) * 2
    volume_score = min(market.volume_24h / 10_000, 1.0)
    freshness = news.get_freshness(market.question)
    return uncertainty * volume_score * freshness
 # ─────────────────────────────────────────────────────────────────────────────
 # Signal and strategy classes
 # ─────────────────────────────────────────────────────────────────────────────
@dataclass
 class TradingSignal:
    market_id: str
    question: str
    polymarket_price: float     # Current market price for YES (0-1)
    estimated_prob: float       # Our Bayesian estimate (0-1)
-    edge: float                # estimated_prob - polymarket_price
+    edge: float                 # Kept for backward compat — equals edge_gross
    confidence: float           # How confident we are (0-1)
    direction: str              # "BUY_YES" | "BUY_NO"
    reasoning: str              # Human-readable explanation for logging
    sources: list[str]          # Data sources used
    # ── Phase 1: edge neto ───────────────────────────────────────────────────
    edge_gross: float = 0.0         # |estimated_prob - polymarket_price|
    edge_net: float = 0.0           # edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE
    prior_prob: float = 0.0         # market.yes_price clamped to [0.05, 0.95]
    final_prob: float = 0.0         # estimated_prob (explicit alias)
    # mid_price: (bid+ask)/2 from order book when available; falls back to
    # market.yes_price.  Order-book fetching is a future enhancement — using
    # yes_price here is conservative (already the ask side).
    mid_price: float = 0.0
    spread_estimate: float = SPREAD_ESTIMATE
    # ── Phase 2: market families ─────────────────────────────────────────────
    family_key: str = ""
    # ── Phase 4: regime ──────────────────────────────────────────────────────
    regime_min_edge: float = 0.10
 class BayesianStrategy:
@@ -59,34 +164,59 @@ class BayesianStrategy:
    - BTC/ETH price momentum
    - Fear & Greed index
    - Market cap trend / BTC dominance
-    We only bet when our signals move the estimate far enough from the prior
+    - GNews sentiment (politics only, capped at MAX_NEWS_QUERIES_PER_CYCLE)
-    to justify the fee + slippage cost (MIN_EDGE).
+
    Execution gate (Phase 1 + 4):
    - Compute edge_net = edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE
    - Only trade when edge_net > regime_min_edge(category, days_to_resolution)
    Family deduplication (Phase 2):
    - At most 1 open position per market family per cycle.
    - Caller passes occupied_families; this method skips and logs SKIP_FAMILY.
    GNews prioritisation (Phase 3):
    - Caller pre-sorts politics markets by gnews_priority() (desc) so the
      highest-value markets consume the GNews budget first.
    - Within evaluate(), the per-cycle cap is enforced.
    """
-    def __init__(self, news: Optional[NewsClient] = None) -> None:
+    def __init__(self, news: Optional["NewsClient"] = None) -> None:
        self._signal_count = 0
-        self._news = news  # Optional; degrades gracefully when None or key missing
+        self._news = news
        self._news_queries_this_cycle = 0
    def reset_cycle(self) -> None:
-        """Call once at the start of each trading cycle to reset the per-cycle GNews counter."""
+        """Call once at the start of each trading cycle to reset per-cycle counters."""
        self._news_queries_this_cycle = 0
    async def evaluate(
        self,
        market: Market,
        ext: ExternalSignals,
        occupied_families: set[str],
    ) -> Optional[TradingSignal]:
        """
-        Evaluate a market and return a signal if edge exists.
+        Evaluate a market and return a TradingSignal if actionable.
-        Returns None if no actionable opportunity.
+
        Returns None with a structured log line in all skip cases.
        Skip reasons (Phase 5 observability):
          SKIP_UNSUPPORTED  — category not supported
          SKIP_NO_SIGNALS   — external data unavailable
          SKIP_PRIOR_EXTREME — prior < 0.08 or > 0.92
          SKIP_FAMILY        — family already has an open/pending position
          SKIP_EDGE_NET      — edge_net < regime_min_edge
          SKIP_CONFIDENCE    — confidence < MIN_CONFIDENCE
        """
        question_lower = market.question.lower()
-        category = market.category  # set by PolymarketClient
+        category = market.category
-        # Classify what kind of market this is
+        # ── Classify market type ─────────────────────────────────────────────
-        is_price_above = any(w in question_lower for w in ["above", "over", "exceed", "higher", "atleast", "reach"])
+        is_price_above = any(w in question_lower for w in [
-        is_price_below = any(w in question_lower for w in ["below", "under", "less than", "lower", "drop"])
+            "above", "over", "exceed", "higher", "atleast", "reach",
        ])
        is_price_below = any(w in question_lower for w in [
            "below", "under", "less than", "lower", "drop",
        ])
        is_btc = "btc" in question_lower or "bitcoin" in question_lower
        is_eth = "eth" in question_lower or "ethereum" in question_lower
@@ -100,7 +230,9 @@ class BayesianStrategy:
            w in question_lower for w in ["crypto", "market cap", "total market", "altcoin", "defi"]
        )
        is_macro = any(
-            w in question_lower for w in ["nasdaq", "s&p", "sp500", "inflation", "fed rate", "interest rate", "tariff"]
+            w in question_lower for w in [
                "nasdaq", "s&p", "sp500", "inflation", "fed rate", "interest rate", "tariff",
            ]
        )
        is_politics = category == "politics"
        is_tech = category == "tech"
@@ -112,45 +244,52 @@ class BayesianStrategy:
        )
        if not is_any_supported:
            log.info(
-                "SKIP  %-50s | reason=unsupported category=%r",
+                "SKIP_UNSUPPORTED  %-50s | cat=%r",
                market.question[:50], category,
            )
            return None
        if not ext.valid:
            log.info(
-                "SKIP  %-50s | reason=no external signals",
+                "SKIP_NO_SIGNALS   %-50s | reason=external data unavailable",
                market.question[:50],
            )
-            return None  # Can't reason without external data
+            return None
-        # --- Bayesian probability estimation ---
+        # ── Phase 1: prior + prior-extreme filter ────────────────────────────
        # Prior = Polymarket consensus price, clamped away from extremes.
        # The market already aggregates information from many traders;
        # our signals update from that informed baseline, not from 0.5.
        prior = max(0.05, min(0.95, market.yes_price))
        # Skip markets where the crowd has already reached near-certainty.
        # Below 0.08 or above 0.92 there is not enough room for our signals
        # to generate MIN_EDGE — any trade would be fighting near-certain consensus.
        if market.yes_price < 0.08:
            log.info(
-                "SKIP  %-50s | cat=%-12s | prior=%.3f | reason=prior too low, market already certain",
+                "SKIP_PRIOR_EXTREME %-50s | cat=%-12s | prior=%.3f | reason=prior<0.08",
                market.question[:50], category, market.yes_price,
            )
            return None
        if market.yes_price > 0.92:
            log.info(
-                "SKIP  %-50s | cat=%-12s | prior=%.3f | reason=prior too high, market already certain",
+                "SKIP_PRIOR_EXTREME %-50s | cat=%-12s | prior=%.3f | reason=prior>0.92",
                market.question[:50], category, market.yes_price,
            )
            return None
        # ── Phase 2: family deduplication ────────────────────────────────────
        family = market_family_key(market)
        if family in occupied_families:
            log.info(
                "SKIP_FAMILY        %-50s | cat=%-12s | family=%s",
                market.question[:50], category, family,
            )
            return None
        # ── Phase 4: regime min-edge ─────────────────────────────────────────
        days = _days_to_resolution(market.end_date)
        regime_min = _regime_min_edge(category, days)
        # ── Bayesian probability estimation ──────────────────────────────────
        sources: list[str] = [f"Prior=poly({prior:.3f})"]
        adjustments: list[float] = []
-        # Signal 1: Price momentum (asset-specific or total market cap as proxy)
+        # Signal 1: price momentum (asset-specific or BTC as sentiment proxy)
        # For politics/tech/events use BTC as a broad sentiment proxy.
        if is_btc:
            momentum = ext.btc_change_24h
            asset_label = "BTC"
@@ -158,17 +297,14 @@ class BayesianStrategy:
            momentum = ext.eth_change_24h
            asset_label = "ETH"
        elif is_politics or is_tech or is_events:
            # BTC as risk-sentiment proxy for non-crypto categories
            momentum = ext.btc_change_24h
            asset_label = "BTC(sentiment)"
        else:
            # Altcoins and general crypto: use total market cap change as proxy
            momentum = ext.total_market_cap_change
            asset_label = "total mktcap"
        if abs(momentum) > 2:
-            momentum_adj = math.tanh(momentum / 20) * 0.15  # Max ±15%
+            momentum_adj = math.tanh(momentum / 20) * 0.15
            # For non-directional markets (politics/events/tech), momentum is weaker signal
            if is_politics or is_tech or is_events:
                momentum_adj *= 0.5
            adjustments.append(momentum_adj if is_price_above else -momentum_adj)
@@ -185,26 +321,19 @@ class BayesianStrategy:
        else:
            fg_adj = (fg - 50) / 50 * 0.04
            sources.append(f"Fear&Greed: {fg} (neutral)")
        adjustments.append(fg_adj if is_price_above else -fg_adj)
        # Signal 3: BTC dominance — hurts altcoins when high
        if (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance > 55:
-            dom_adj = -0.03 if is_price_above else 0.03
+            adjustments.append(-0.03 if is_price_above else 0.03)
            adjustments.append(dom_adj)
            sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (high → alt pressure)")
        elif (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance < 45:
-            dom_adj = 0.03 if is_price_above else -0.03
+            adjustments.append(0.03 if is_price_above else -0.03)
            adjustments.append(dom_adj)
            sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (low → alt season)")
-        # Signal 4: GNews sentiment — politics markets only.
+        # Signal 4: GNews sentiment (politics only, budget-gated)
-        # BTC/F&G already cover crypto and macro; GNews budget is too tight to
+        # Phase 3: caller has pre-sorted markets by gnews_priority() so the
-        # waste on tech/events.  Cap at MAX_NEWS_QUERIES_PER_CYCLE per cycle so
+        # highest-value markets reach this block first.
        # we prioritise the soonest-resolving markets (caller sorts by end_date).
        # Applied as a direct log-odds shift — stronger signal than macro proxies.
        # Weight NEWS_LOGODDS_WEIGHT=1.5 means a ±1.0 sentiment score shifts
        # log-odds by ±1.5 (e.g. 50% prior → ~82% / ~18%).
        news_log_adj = 0.0
        if is_politics and self._news is not None:
            if self._news_queries_this_cycle < MAX_NEWS_QUERIES_PER_CYCLE:
@@ -214,72 +343,81 @@ class BayesianStrategy:
                    news_log_adj = sentiment * NEWS_LOGODDS_WEIGHT
                    sources.append(f"GNews: {sentiment:+.2f}")
            else:
-                log.debug(
+                log.info(
-                    "GNews cycle limit (%d) reached — skipping news for %r",
+                    "SKIP_GNEWS_PRIORITY %-50s | reason=cycle budget %d reached",
-                    MAX_NEWS_QUERIES_PER_CYCLE, market.question[:50],
+                    market.question[:50], MAX_NEWS_QUERIES_PER_CYCLE,
                )
-        # Macro/politics/tech/events: cap confidence lower to reflect weaker signal quality
+        # Confidence cap: macro/politics/tech signals are weaker proxies
-        if is_macro or is_politics or is_tech or is_events:
+        confidence_cap = 0.65 if (is_macro or is_politics or is_tech or is_events) else 0.90
            confidence_cap = 0.65
        else:
            confidence_cap = 0.90
-        # Compute posterior using log-odds updating.
+        # Posterior via log-odds updating
        # total_adj (BTC/F&G/dominance) is amplified ×2 because those are weak proxies.
        # news_log_adj is applied at face value — it IS a direct log-odds signal.
        log_odds_prior = math.log(prior / (1 - prior))
        total_adj = sum(adjustments)
        estimated_prob = _sigmoid(log_odds_prior + total_adj * 2 + news_log_adj)
        estimated_prob = max(0.05, min(0.95, estimated_prob))
-        # Compute edge
+        # ── Phase 1: edge_gross and edge_net ─────────────────────────────────
-        edge = estimated_prob - market.yes_price
+        raw_edge = estimated_prob - market.yes_price
-        direction = "BUY_YES" if edge > 0 else "BUY_NO"
+        direction = "BUY_YES" if raw_edge > 0 else "BUY_NO"
-        abs_edge = abs(edge)
+        edge_gross = abs(raw_edge)
        # NOTE: commission/size_usdc = COMMISSION_RATE always (constant fraction).
        edge_net = edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE
        # mid_price falls back to yes_price; live order-book data is a future enhancement
        mid_price = market.yes_price
        # Confidence based on signal agreement
        agreement = sum(1 for a in adjustments if (a > 0) == (total_adj > 0))
        confidence = min(confidence_cap, 0.4 + (agreement / max(len(adjustments), 1)) * 0.5)
        # News signal available → boost confidence by 0.10 (news corroborates macro signals)
        if news_log_adj != 0.0:
            confidence = min(confidence_cap, confidence + 0.10)
-        # Log evaluation result for every market
+        # ── Phase 5: structured audit log ────────────────────────────────────
-        action = "TRADE" if (abs_edge >= MIN_EDGE and confidence >= MIN_CONFIDENCE) else "SKIP"
+        passed_gross = edge_gross >= regime_min
-        skip_reason = ""
+        passed_net = edge_net >= regime_min
-        if action == "SKIP":
+        can_trade = passed_net and confidence >= MIN_CONFIDENCE
            reasons = []
            if abs_edge < MIN_EDGE:
                reasons.append(f"edge={abs_edge:.3f}<{MIN_EDGE}")
            if confidence < MIN_CONFIDENCE:
                reasons.append(f"conf={confidence:.2f}<{MIN_CONFIDENCE}")
            skip_reason = " | reason=" + ",".join(reasons)
        if not can_trade:
            skip_parts: list[str] = []
            if not passed_gross:
                skip_parts.append(f"edge_gross={edge_gross:.3f}<{regime_min:.2f}(regime)")
            elif not passed_net:
                skip_parts.append(
                    f"edge_net={edge_net:.3f}<{regime_min:.2f}(regime) "
                    f"[gross={edge_gross:.3f} pass]"
                )
            if confidence < MIN_CONFIDENCE:
                skip_parts.append(f"conf={confidence:.2f}<{MIN_CONFIDENCE}")
            log.info(
-            "%-5s %-50s | cat=%-12s | prior=%.3f | est=%.3f | edge=%+.3f | conf=%.2f | dir=%-8s | signals=%s%s",
+                "SKIP_EDGE_NET      %-50s | cat=%-12s | family=%-28s | "
-            action,
+                "prior=%.3f | est=%.3f | gross=%+.3f | net=%+.3f | "
-            market.question[:50],
+                "regime=%.2f | days=%d | conf=%.2f | signals=%s | %s",
-            category,
+                market.question[:50], category, family,
-            prior,
+                prior, estimated_prob, edge_gross, edge_net,
-            estimated_prob,
+                regime_min, days, confidence,
            edge,
            confidence,
            direction,
                ", ".join(sources[1:]) or "none",
-            skip_reason,
+                " | ".join(skip_parts),
            )
        # Filter: only trade if edge and confidence thresholds met
        if abs_edge < MIN_EDGE or confidence < MIN_CONFIDENCE:
            return None
        reasoning = (
            f"Prior=poly({prior:.3f}) → estimate={estimated_prob:.3f} | "
            f"Poly price={market.yes_price:.3f} | "
-            f"Edge={edge:+.3f} | "
+            f"edge_gross={edge_gross:+.3f} | edge_net={edge_net:+.3f} | "
            f"regime_min={regime_min:.2f} | days={days} | "
            f"family={family} | "
            f"Direction={direction} | "
-            f"Signals: {', '.join(sources[1:])}"  # skip the prior label already shown
+            f"Signals: {', '.join(sources[1:])}"
        )
        log.info(
            "TRADE              %-50s | cat=%-12s | family=%-28s | "
            "prior=%.3f | est=%.3f | gross=%+.3f | net=%+.3f | "
            "regime=%.2f | days=%d | conf=%.2f | dir=%-8s | signals=%s",
            market.question[:50], category, family,
            prior, estimated_prob, edge_gross, edge_net,
            regime_min, days, confidence, direction,
            ", ".join(sources[1:]) or "none",
        )
        self._signal_count += 1
@@ -288,11 +426,22 @@ class BayesianStrategy:
            question=market.question,
            polymarket_price=market.yes_price,
            estimated_prob=estimated_prob,
-            edge=abs_edge,
+            edge=edge_gross,             # backward compat — same as edge_gross
            confidence=confidence,
            direction=direction,
            reasoning=reasoning,
            sources=sources,
            # Phase 1 new fields
            edge_gross=edge_gross,
            edge_net=edge_net,
            prior_prob=prior,
            final_prob=estimated_prob,
            mid_price=mid_price,
            spread_estimate=SPREAD_ESTIMATE,
            # Phase 2 new fields
            family_key=family,
            # Phase 4 new fields
            regime_min_edge=regime_min,
        )