polymarket-bot/api/main.py

"""
FastAPI Backend — serves metrics and trade data to the React dashboard.
"""
import asyncio
from contextlib import asynccontextmanager
from datetime import datetime, timezone
import os
import re

from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware

from bot.data.db import Database

# Phase 6 format (Phase 6+): values already in log-odds space.
# "fg_lo=+0.1200 mom_lo=+0.0000 news_lo=+0.0000 mfld_lo=-0.7483 btc_dom_lo=+0.0000"
_FEAT_RE_LO = re.compile(
    r"fg_lo=([+-]?[\d.]+).*?mom_lo=([+-]?[\d.]+).*?"
    r"news_lo=([+-]?[\d.]+).*?mfld_lo=([+-]?[\d.]+).*?btc_dom_lo=([+-]?[\d.]+)"
)

# Pre-Phase-6 format: raw probability-delta values (fg/mom need ×2 for log-odds).
# "fg=+0.0600 mom=+0.0000 news=+0.0000 mfld=-0.7483"
_FEAT_RE_RAW = re.compile(
    r"fg=([+-]?[\d.]+).*?mom=([+-]?[\d.]+).*?news=([+-]?[\d.]+).*?mfld=([+-]?[\d.]+)"
)


def _dominant_feature(sc: dict | None) -> str | None:
    """Return the name of the signal_components key with the largest abs log-odds value.

    Returns None if signal_components is absent or all features are below threshold.
    Threshold 0.0001 matches the "triggered" definition used in /api/metrics/features.
    """
    if not sc:
        return None
    candidates = {
        k: abs(v)
        for k, v in sc.items()
        if k != "unit" and v is not None and abs(v) > 0.0001
    }
    if not candidates:
        return None
    return max(candidates, key=candidates.__getitem__)


def _enrich_trade(trade: dict) -> dict:
    """Add days_open, signal_components (log-odds), and dominant_feature to a trade."""
    ts = trade.get("timestamp")
    if ts is not None:
        now = datetime.now(timezone.utc)
        if getattr(ts, "tzinfo", None) is None:
            ts = ts.replace(tzinfo=timezone.utc)
        trade["days_open"] = round((now - ts).total_seconds() / 86400, 1)
    else:
        trade["days_open"] = None

    # Prefer DB columns (Phase 6+) — exact, no parsing required.
    if trade.get("feat_fg_lo") is not None:
        sc = {
            "unit":    "log_odds",
            "fg":      trade["feat_fg_lo"],
            "mom":     trade["feat_mom_lo"],
            "news":    trade["feat_news_lo"],
            "mfld":    trade["feat_mfld_lo"],
            "btc_dom": trade.get("feat_btc_dom_lo"),
        }
    else:
        # Fallback: parse reasoning string (trades before Phase 6 DB columns exist).
        reasoning = trade.get("reasoning") or ""
        m_lo = _FEAT_RE_LO.search(reasoning)
        m_raw = _FEAT_RE_RAW.search(reasoning)
        if m_lo:
            sc = {
                "unit":    "log_odds",
                "fg":      float(m_lo.group(1)),
                "mom":     float(m_lo.group(2)),
                "news":    float(m_lo.group(3)),
                "mfld":    float(m_lo.group(4)),
                "btc_dom": float(m_lo.group(5)),
            }
        elif m_raw:
            # Pre-Phase-6: fg/mom are raw probability-deltas → multiply ×2.
            sc = {
                "unit":    "log_odds",
                "fg":      float(m_raw.group(1)) * 2,
                "mom":     float(m_raw.group(2)) * 2,
                "news":    float(m_raw.group(3)),
                "mfld":    float(m_raw.group(4)),
                "btc_dom": None,
            }
        else:
            sc = None

    trade["signal_components"] = sc
    trade["dominant_feature"] = _dominant_feature(sc)
    return trade

db = Database()


@asynccontextmanager
async def lifespan(app: FastAPI):
    await db.connect()
    yield
    await db.disconnect()


app = FastAPI(title="Polymarket Bot API", lifespan=lifespan)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["GET"],
    allow_headers=["*"],
)


@app.get("/health")
async def health():
    return {"status": "ok", "paper_mode": os.getenv("PAPER_MODE", "true")}


@app.get("/api/metrics")
async def get_metrics():
    history = await db.get_metrics_history(days=42)
    if not history:
        return {"history": [], "latest": None}
    return {"history": history, "latest": history[0]}


@app.get("/api/trades")
async def get_trades(limit: int = 50, status: str = "open"):
    """
    status: "open" (default) | "closed" | "all"
    Open trades include days_open and signal_components {fg, mom, news, mfld}.
    """
    if status not in ("open", "closed", "all"):
        status = "open"
    filter_status = None if status == "all" else status
    trades = await db.get_recent_trades(limit=limit, status=filter_status)
    if filter_status == "open":
        trades = [_enrich_trade(t) for t in trades]
    return {"trades": trades, "count": len(trades), "status_filter": status}


@app.get("/api/metrics/features")
async def get_feature_metrics():
    """Per-signal-feature performance breakdown — all values in log-odds space.

    Each feature key contains:
      unit                     "log_odds" (common unit for all features)
      materiality_threshold    |lo| threshold for "material" classification
      triggered_count          trades where |feat_lo| > 0.0001 (signal fired)
      material_count           trades where |feat_lo| >= threshold (moved the model)
      avg_contribution_lo      mean signed contribution (triggered trades)
      avg_abs_contribution_lo  mean absolute contribution (triggered trades)
      avg_edge_net_when_material  mean edge_net for material trades
      unrealized_pnl_est       estimated open-position PnL (triggered trades)
      realized_pnl             sum close_pnl for resolved triggered trades
      resolved_count           closed triggered trades with known outcome
      win_rate                 null if resolved_count < 5
      net_positive_count       triggered trades where feature pushed BUY direction
      net_negative_count       triggered trades where feature pushed SELL direction

    NULL values in resolved_count / win_rate are expected early in the paper run.
    """
    features = await db.compute_feature_metrics_from_db()
    return {"features": features}


@app.get("/api/trades/legacy")
async def get_legacy_trades():
    """Trades with NULL edge_net — pre-Phase-1 data, excluded from PnL estimates.

    These trades have no signal quality information (edge_net, final_prob)
    and are excluded from unrealized_pnl_est in /api/summary.
    They may also be missing feat_*_lo columns if the reasoning string
    predates the Phase 6 format.
    """
    trades = await db.get_legacy_incomplete_trades()
    return {"trades": trades, "count": len(trades)}


@app.get("/api/metrics/attribution")
async def get_attribution():
    """Alpha attribution by dominant signal feature.

    Groups Phase 6 trades by their dominant feature — the feat_*_lo with the
    largest absolute log-odds value — to reveal which signal is actually
    generating opportunities.

    Each key in "attribution" contains:
      trade_count          trades where this feature was dominant
      avg_edge_net         mean net edge for those trades
      unrealized_pnl_est   open-position PnL estimate (edge_net × net_cost − fee)
      realized_pnl         sum close_pnl for resolved trades in this group
      resolved_count       closed trades with known outcome
      win_rate             null if resolved_count < 5

    "none" appears for Phase 6 trades where all feat_*_lo values are < 0.0001
    (signals fired but below trigger threshold).

    Only Phase 6 trades (feat_fg_lo IS NOT NULL) are included.
    Pre-Phase-6 trades appear in /api/trades/legacy instead.
    """
    attribution = await db.compute_attribution_from_db()
    total = sum(v["trade_count"] for v in attribution.values())
    return {"attribution": attribution, "total_attributed_trades": total}


@app.get("/api/metrics/manifold-matches")
async def get_manifold_matches():
    """Manifold match audit — summary stats and recent match attempts.

    summary:
      total_accepted           — matches accepted (score >= 0.40, inversion unambiguous)
      total_rejected           — matches rejected (low score or ambiguous inversion)
      total_no_results         — no Manifold market found or API error
      avg_match_score          — mean Jaccard score for accepted matches
      trades_dominated_by_mfld — open trades where feat_mfld_lo is the largest signal

    recent_matches: last 50 rows from manifold_match_audit, newest first.
      used_in_trade=True only when status='accepted' AND a trade was actually executed.
    """
    data = await db.get_manifold_matches(limit=50)
    for match in data["recent_matches"]:
        ts = match.get("timestamp")
        if ts is not None and hasattr(ts, "isoformat"):
            match["timestamp"] = ts.isoformat()
    return data


@app.get("/api/summary")
async def get_summary():
    """Dashboard summary card data.

    All portfolio counts (total_trades, open_trades_count, total_deployed,
    cash_available) are computed live from the DB on every request.

    PnL and performance metrics come from the latest metrics_daily snapshot,
    which is written by the bot every cycle via MetricsTracker.update_daily_summary().
    After Fix 3, that snapshot is also DB-computed — not dependent on pod restarts.
    """
    latest_metrics, open_trades, all_trades, inverted, legacy_count = await asyncio.gather(
        db.get_metrics_history(days=1),
        db.get_recent_trades(limit=500, status="open"),
        db.get_recent_trades(limit=500),
        db.get_recently_closed_inverted(hours=24),
        db.get_legacy_incomplete_count(),
    )

    latest = latest_metrics[0] if latest_metrics else {}
    paper_bankroll = float(os.getenv("PAPER_BANKROLL", "10000"))
    total_deployed = sum(t.get("net_cost", 0) for t in open_trades)

    return {
        # ── Portfolio state (live from DB) ──────────────────────────────────
        "paper_mode":              os.getenv("PAPER_MODE", "true") == "true",
        "paper_bankroll":          paper_bankroll,
        "total_trades":            len(all_trades),           # exact, from DB
        "open_trades_count":       len(open_trades),          # exact, from DB
        "closed_trades_count":     len(all_trades) - len(open_trades),  # exact
        "total_deployed":          total_deployed,            # exact, from DB
        "cash_available":          max(0.0, paper_bankroll - total_deployed),  # exact
        "legacy_incomplete_count": legacy_count,              # exact, from DB
        "reentry_guard_blocks_24h": len(inverted),            # exact, from DB

        # ── P&L (from latest metrics_daily snapshot) ────────────────────────
        # unrealized_pnl_est: open positions, edge_net × net_cost − fee.
        #   Estimated — uses model signal, not live price. Source: open trades.
        # realized_pnl: closed positions with known resolution.
        #   Exact — computed from (resolution − entry_price) × shares.
        # total_pnl: sum of both.
        "unrealized_pnl_est": latest.get("unrealized_pnl_est") or 0,
        "realized_pnl":       latest.get("realized_pnl") or 0,
        "total_pnl":          latest.get("total_pnl") or 0,

        # ── Performance metrics (from latest metrics_daily snapshot) ─────────
        # win_rate: fraction of resolved closed trades where close_pnl > 0.
        #   null if fewer than 5 resolved trades. Source: closed+resolved trades.
        # sharpe_ratio: 0.0 — requires daily-return time series (not yet tracked).
        # calibration_score: 1 − Brier score on resolved trades (higher = better).
        #   null if fewer than 10 resolved trades. Source: closed+resolved trades.
        "win_rate":           latest.get("win_rate"),         # null if < 5 resolved
        "sharpe_ratio":       latest.get("sharpe_ratio") or 0,  # 0.0 until tracked
        "calibration_score":  latest.get("calibration_score"),  # null if < 10 resolved

        # ── Counters from snapshot ───────────────────────────────────────────
        "resolved_count":     latest.get("resolved_count") or 0,

        # ── Promotion gate ───────────────────────────────────────────────────
        # All thresholds must pass; null metrics count as not-ready.
        "promotion_ready": (
            (latest.get("sharpe_ratio") or 0) >= 0.5
            and (latest.get("win_rate") or 0) >= 0.52
            and (latest.get("calibration_score") or 0) >= 0.7
            and len(all_trades) >= 50
        ),
    }