polymarket-bot/bot/strategy/bayesian.py

"""
Bayesian Market Making Strategy.

Core idea:
1. Compute a prior probability for a market outcome using external data
2. Compare with Polymarket's current price
3. If divergence > threshold + confidence is high enough → generate signal

For crypto markets: if BTC is up 5% and fear/greed is 75 (greed),
a market asking "Will BTC be above $X?" should be priced higher than
Polymarket might reflect in a slow-moving order book.
"""
import logging
import math
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Optional, TYPE_CHECKING

from bot.data.polymarket import Market, market_family_key
from bot.data.external import ExternalSignals

if TYPE_CHECKING:
    from bot.data.news import NewsClient

log = logging.getLogger(__name__)

# ─────────────────────────────────────────────────────────────────────────────
# Cost constants (Phase 1 — heuristics, not exact Polymarket exchange costs)
# ─────────────────────────────────────────────────────────────────────────────
# spread_estimate: approximate half-spread for medium-liquidity Polymarket
#   markets.  Real spread varies by market and time; 0.02 is a conservative
#   starting estimate.  Replace with live order-book data when available.
SPREAD_ESTIMATE: float = 0.02

# commission_rate: Polymarket taker fee approximation.  Current Polymarket fee
#   is 0% on CLOB but was 2% historically; keeping 2% as a conservative buffer
#   against future fee changes and exchange rate effects.
COMMISSION_RATE: float = 0.02

# Combined cost floor deducted from edge_gross to get edge_net.
# edge_net = edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE
TOTAL_COST_RATE: float = SPREAD_ESTIMATE + COMMISSION_RATE  # 0.04

# ─────────────────────────────────────────────────────────────────────────────
# Other strategy constants
# ─────────────────────────────────────────────────────────────────────────────
MIN_CONFIDENCE = 0.55   # Minimum confidence to generate a signal

# Log-odds weight applied to the GNews sentiment score (range ±1.0).
# A weight of 1.5 means a fully negative/positive signal shifts log-odds by ±1.5,
# which moves a 50% prior to ~18%/82% — strong but not overwhelming.
NEWS_LOGODDS_WEIGHT = 1.5

# GNews free tier: 100 req/day.  We limit to 5 queries per trading cycle
# (politics markets only) and rely on 6 h cache to stay within budget.
MAX_NEWS_QUERIES_PER_CYCLE = 5


# ─────────────────────────────────────────────────────────────────────────────
# Phase 4 — Regime-based minimum edge (uses edge_NET, not edge_gross)
# ─────────────────────────────────────────────────────────────────────────────

def _regime_min_edge(category: str, days_to_resolution: int) -> float:
    """
    Return the minimum edge_net required to execute a trade.

    Thresholds are higher for far-future politics markets (less signal, more
    noise) and lower for near-term politics (time pressure makes any edge
    actionable).  Tech/crypto use a flat threshold.

    category              | days_to_resolution | min_edge_net
    ──────────────────────┼────────────────────┼─────────────
    politics              | > 60 d             | 0.12
    politics              | 30–60 d            | 0.10
    politics              | < 30 d             | 0.08
    tech / crypto/finance | any                | 0.10
    other / unknown       | any                | 0.10
    """
    if category == "politics":
        if days_to_resolution > 60:
            return 0.12
        if days_to_resolution > 30:
            return 0.10
        return 0.08
    return 0.10  # tech, crypto/finance, events, default


def _days_to_resolution(end_date: str) -> int:
    """Return calendar days until market resolution, or 30 if unknown."""
    if not end_date:
        return 30  # conservative: treat as medium-term
    try:
        dt = datetime.fromisoformat(end_date.replace("Z", "+00:00"))
        if dt.tzinfo is None:
            dt = dt.replace(tzinfo=timezone.utc)
        days = (dt - datetime.now(timezone.utc)).days
        return max(0, days)
    except (ValueError, TypeError):
        return 30


# ─────────────────────────────────────────────────────────────────────────────
# Phase 3 — GNews priority scoring
# ─────────────────────────────────────────────────────────────────────────────

def gnews_priority(market: Market, news: "NewsClient") -> float:
    """
    Score a market for GNews query priority (higher = more valuable to query).

    Formula:  priority = uncertainty × volume_score × freshness

      uncertainty  = 1 - |prior - 0.5| × 2   (1.0 at 50%, 0.0 at 0%/100%)
      volume_score = min(volume_24h / 10_000, 1.0)
      freshness    = NewsClient.get_freshness(question)
                     (1.0 never queried → 0.10 queried <2h ago)

    Markets with occupied families, or that have already been queried recently,
    score lower and receive GNews budget only if capacity remains.
    """
    prior = max(0.05, min(0.95, market.yes_price))
    uncertainty = 1.0 - abs(prior - 0.5) * 2
    volume_score = min(market.volume_24h / 10_000, 1.0)
    freshness = news.get_freshness(market.question)
    return uncertainty * volume_score * freshness


# ─────────────────────────────────────────────────────────────────────────────
# Signal and strategy classes
# ─────────────────────────────────────────────────────────────────────────────

@dataclass
class TradingSignal:
    market_id: str
    question: str
    polymarket_price: float     # Current market price for YES (0-1)
    estimated_prob: float       # Our Bayesian estimate (0-1)
    edge: float                 # Kept for backward compat — equals edge_gross
    confidence: float           # How confident we are (0-1)
    direction: str              # "BUY_YES" | "BUY_NO"
    reasoning: str              # Human-readable explanation for logging
    sources: list[str]          # Data sources used
    # ── Phase 1: edge neto ───────────────────────────────────────────────────
    edge_gross: float = 0.0         # |estimated_prob - polymarket_price|
    edge_net: float = 0.0           # edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE
    prior_prob: float = 0.0         # market.yes_price clamped to [0.05, 0.95]
    final_prob: float = 0.0         # estimated_prob (explicit alias)
    # mid_price: (bid+ask)/2 from order book when available; falls back to
    # market.yes_price.  Order-book fetching is a future enhancement — using
    # yes_price here is conservative (already the ask side).
    mid_price: float = 0.0
    spread_estimate: float = SPREAD_ESTIMATE
    # ── Phase 2: market families ─────────────────────────────────────────────
    family_key: str = ""
    # ── Phase 4: regime ──────────────────────────────────────────────────────
    regime_min_edge: float = 0.10


class BayesianStrategy:
    """
    Estimates true probability using external signals and Bayesian updating.

    Prior: Polymarket's current YES price (market consensus — not 0.5)
    Likelihood updates from:
    - BTC/ETH price momentum
    - Fear & Greed index
    - Market cap trend / BTC dominance
    - GNews sentiment (politics only, capped at MAX_NEWS_QUERIES_PER_CYCLE)

    Execution gate (Phase 1 + 4):
    - Compute edge_net = edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE
    - Only trade when edge_net > regime_min_edge(category, days_to_resolution)

    Family deduplication (Phase 2):
    - At most 1 open position per market family per cycle.
    - Caller passes occupied_families; this method skips and logs SKIP_FAMILY.

    GNews prioritisation (Phase 3):
    - Caller pre-sorts politics markets by gnews_priority() (desc) so the
      highest-value markets consume the GNews budget first.
    - Within evaluate(), the per-cycle cap is enforced.
    """

    def __init__(self, news: Optional["NewsClient"] = None) -> None:
        self._signal_count = 0
        self._news = news
        self._news_queries_this_cycle = 0
        # Per-cycle counters — reset by reset_cycle(), read by get_cycle_stats()
        self._skip_family: int = 0
        self._skip_prior_extreme: int = 0
        self._skip_edge_net_nonpositive: int = 0   # edge_net <= 0
        self._skip_edge_net_below_regime: int = 0  # 0 < edge_net < regime_min
        # (edge_gross, edge_net, regime_min) for every market that reached the
        # edge computation stage (passed prior-extreme, family, unsupported filters)
        self._evaluated_edges: list[tuple[float, float, float]] = []

    def reset_cycle(self) -> None:
        """Call once at the start of each trading cycle to reset per-cycle counters."""
        self._news_queries_this_cycle = 0
        self._skip_family = 0
        self._skip_prior_extreme = 0
        self._skip_edge_net_nonpositive = 0
        self._skip_edge_net_below_regime = 0
        self._evaluated_edges = []

    def get_cycle_stats(self) -> dict:
        """Return per-cycle counters for the [CYCLE SUMMARY] log block."""
        edges = self._evaluated_edges
        all_gross = [g for g, n, r in edges]
        all_net   = [n for g, n, r in edges]
        return {
            "skip_family":                self._skip_family,
            "skip_prior_extreme":         self._skip_prior_extreme,
            "skip_edge_net_nonpositive":  self._skip_edge_net_nonpositive,
            "skip_edge_net_below_regime": self._skip_edge_net_below_regime,
            "gnews_queries_used":         self._news_queries_this_cycle,
            "max_edge_gross": max(all_gross) if all_gross else 0.0,
            "max_edge_net":   max(all_net)   if all_net   else 0.0,
            "evaluated_count": len(edges),
            "gross_gt_002": sum(1 for g in all_gross if g > 0.02),
            "gross_gt_004": sum(1 for g in all_gross if g > 0.04),
        }

    async def evaluate(
        self,
        market: Market,
        ext: ExternalSignals,
        occupied_families: set[str],
    ) -> Optional[TradingSignal]:
        """
        Evaluate a market and return a TradingSignal if actionable.

        Returns None with a structured log line in all skip cases.
        Skip reasons (Phase 5 observability):
          SKIP_UNSUPPORTED  — category not supported
          SKIP_NO_SIGNALS   — external data unavailable
          SKIP_PRIOR_EXTREME — prior < 0.08 or > 0.92
          SKIP_FAMILY        — family already has an open/pending position
          SKIP_EDGE_NET      — edge_net < regime_min_edge
          SKIP_CONFIDENCE    — confidence < MIN_CONFIDENCE
        """
        question_lower = market.question.lower()
        category = market.category

        # ── Classify market type ─────────────────────────────────────────────
        is_price_above = any(w in question_lower for w in [
            "above", "over", "exceed", "higher", "atleast", "reach",
        ])
        is_price_below = any(w in question_lower for w in [
            "below", "under", "less than", "lower", "drop",
        ])

        is_btc = "btc" in question_lower or "bitcoin" in question_lower
        is_eth = "eth" in question_lower or "ethereum" in question_lower
        is_sol = "sol" in question_lower or "solana" in question_lower
        is_xrp = "xrp" in question_lower or "ripple" in question_lower
        is_doge = "doge" in question_lower or "dogecoin" in question_lower
        is_altcoin = is_sol or is_xrp or is_doge or any(
            w in question_lower for w in ["ltc", "litecoin", "bnb", "ada", "cardano", "avax", "avalanche"]
        )
        is_general_crypto = any(
            w in question_lower for w in ["crypto", "market cap", "total market", "altcoin", "defi"]
        )
        is_macro = any(
            w in question_lower for w in [
                "nasdaq", "s&p", "sp500", "inflation", "fed rate", "interest rate", "tariff",
            ]
        )
        is_politics = category == "politics"
        is_tech = category == "tech"
        is_events = category == "events"

        is_any_supported = (
            is_btc or is_eth or is_altcoin or is_general_crypto or is_macro
            or is_politics or is_tech or is_events
        )
        if not is_any_supported:
            log.info(
                "SKIP_UNSUPPORTED  %-50s | cat=%r",
                market.question[:50], category,
            )
            return None

        if not ext.valid:
            log.info(
                "SKIP_NO_SIGNALS   %-50s | reason=external data unavailable",
                market.question[:50],
            )
            return None

        # ── Phase 1: prior + prior-extreme filter ────────────────────────────
        prior = max(0.05, min(0.95, market.yes_price))

        if market.yes_price < 0.08:
            self._skip_prior_extreme += 1
            log.info(
                "SKIP_PRIOR_EXTREME %-50s | cat=%-12s | prior=%.3f | reason=prior<0.08",
                market.question[:50], category, market.yes_price,
            )
            return None
        if market.yes_price > 0.92:
            self._skip_prior_extreme += 1
            log.info(
                "SKIP_PRIOR_EXTREME %-50s | cat=%-12s | prior=%.3f | reason=prior>0.92",
                market.question[:50], category, market.yes_price,
            )
            return None

        # ── Phase 2: family deduplication ────────────────────────────────────
        family = market_family_key(market)
        if family in occupied_families:
            self._skip_family += 1
            log.info(
                "SKIP_FAMILY        %-50s | cat=%-12s | family=%s",
                market.question[:50], category, family,
            )
            return None

        # ── Phase 4: regime min-edge ─────────────────────────────────────────
        days = _days_to_resolution(market.end_date)
        regime_min = _regime_min_edge(category, days)

        # ── Bayesian probability estimation ──────────────────────────────────
        sources: list[str] = [f"Prior=poly({prior:.3f})"]
        adjustments: list[float] = []

        # Signal 1: price momentum (asset-specific or BTC as sentiment proxy)
        if is_btc:
            momentum = ext.btc_change_24h
            asset_label = "BTC"
        elif is_eth:
            momentum = ext.eth_change_24h
            asset_label = "ETH"
        elif is_politics or is_tech or is_events:
            momentum = ext.btc_change_24h
            asset_label = "BTC(sentiment)"
        else:
            momentum = ext.total_market_cap_change
            asset_label = "total mktcap"

        if abs(momentum) > 2:
            momentum_adj = math.tanh(momentum / 20) * 0.15
            if is_politics or is_tech or is_events:
                momentum_adj *= 0.5
            adjustments.append(momentum_adj if is_price_above else -momentum_adj)
            sources.append(f"{asset_label} 24h: {momentum:+.1f}%")

        # Signal 2: Fear & Greed
        fg = ext.fear_greed_index
        if fg > 70:
            fg_adj = 0.06
            sources.append(f"Fear&Greed: {fg} (greed)")
        elif fg < 30:
            fg_adj = -0.06
            sources.append(f"Fear&Greed: {fg} (fear)")
        else:
            fg_adj = (fg - 50) / 50 * 0.04
            sources.append(f"Fear&Greed: {fg} (neutral)")
        adjustments.append(fg_adj if is_price_above else -fg_adj)

        # Signal 3: BTC dominance — hurts altcoins when high
        if (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance > 55:
            adjustments.append(-0.03 if is_price_above else 0.03)
            sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (high → alt pressure)")
        elif (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance < 45:
            adjustments.append(0.03 if is_price_above else -0.03)
            sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (low → alt season)")

        # Signal 4: GNews sentiment (politics only, budget-gated)
        # Phase 3: caller has pre-sorted markets by gnews_priority() so the
        # highest-value markets reach this block first.
        news_log_adj = 0.0
        if is_politics and self._news is not None:
            if self._news_queries_this_cycle < MAX_NEWS_QUERIES_PER_CYCLE:
                self._news_queries_this_cycle += 1
                sentiment = await self._news.get_sentiment(market.question)
                if abs(sentiment) > 0.05:
                    news_log_adj = sentiment * NEWS_LOGODDS_WEIGHT
                    sources.append(f"GNews: {sentiment:+.2f}")
            else:
                log.info(
                    "SKIP_GNEWS_PRIORITY %-50s | reason=cycle budget %d reached",
                    market.question[:50], MAX_NEWS_QUERIES_PER_CYCLE,
                )

        # Confidence cap: macro/politics/tech signals are weaker proxies
        confidence_cap = 0.65 if (is_macro or is_politics or is_tech or is_events) else 0.90

        # Posterior via log-odds updating
        log_odds_prior = math.log(prior / (1 - prior))
        total_adj = sum(adjustments)
        estimated_prob = _sigmoid(log_odds_prior + total_adj * 2 + news_log_adj)
        estimated_prob = max(0.05, min(0.95, estimated_prob))

        # ── Phase 1: edge_gross and edge_net ─────────────────────────────────
        raw_edge = estimated_prob - market.yes_price
        direction = "BUY_YES" if raw_edge > 0 else "BUY_NO"
        edge_gross = abs(raw_edge)
        # NOTE: commission/size_usdc = COMMISSION_RATE always (constant fraction).
        edge_net = edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE
        # mid_price falls back to yes_price; live order-book data is a future enhancement
        mid_price = market.yes_price

        # Record for cycle summary — every market that reached edge computation
        self._evaluated_edges.append((edge_gross, edge_net, regime_min))

        # Confidence based on signal agreement
        agreement = sum(1 for a in adjustments if (a > 0) == (total_adj > 0))
        confidence = min(confidence_cap, 0.4 + (agreement / max(len(adjustments), 1)) * 0.5)
        if news_log_adj != 0.0:
            confidence = min(confidence_cap, confidence + 0.10)

        # ── Phase 5: structured audit log ────────────────────────────────────
        passed_gross = edge_gross >= regime_min
        passed_net = edge_net >= regime_min
        can_trade = passed_net and confidence >= MIN_CONFIDENCE

        if not can_trade:
            # Increment the appropriate edge-net counter
            if edge_net <= 0:
                self._skip_edge_net_nonpositive += 1
            else:
                self._skip_edge_net_below_regime += 1
            skip_parts: list[str] = []
            if not passed_gross:
                skip_parts.append(f"edge_gross={edge_gross:.3f}<{regime_min:.2f}(regime)")
            elif not passed_net:
                skip_parts.append(
                    f"edge_net={edge_net:.3f}<{regime_min:.2f}(regime) "
                    f"[gross={edge_gross:.3f} pass]"
                )
            if confidence < MIN_CONFIDENCE:
                skip_parts.append(f"conf={confidence:.2f}<{MIN_CONFIDENCE}")
            log.info(
                "SKIP_EDGE_NET      %-50s | cat=%-12s | family=%-28s | "
                "prior=%.3f | est=%.3f | gross=%+.3f | net=%+.3f | "
                "regime=%.2f | days=%d | conf=%.2f | signals=%s | %s",
                market.question[:50], category, family,
                prior, estimated_prob, edge_gross, edge_net,
                regime_min, days, confidence,
                ", ".join(sources[1:]) or "none",
                " | ".join(skip_parts),
            )
            return None

        reasoning = (
            f"Prior=poly({prior:.3f}) → estimate={estimated_prob:.3f} | "
            f"Poly price={market.yes_price:.3f} | "
            f"edge_gross={edge_gross:+.3f} | edge_net={edge_net:+.3f} | "
            f"regime_min={regime_min:.2f} | days={days} | "
            f"family={family} | "
            f"Direction={direction} | "
            f"Signals: {', '.join(sources[1:])}"
        )

        log.info(
            "TRADE              %-50s | cat=%-12s | family=%-28s | "
            "prior=%.3f | est=%.3f | gross=%+.3f | net=%+.3f | "
            "regime=%.2f | days=%d | conf=%.2f | dir=%-8s | signals=%s",
            market.question[:50], category, family,
            prior, estimated_prob, edge_gross, edge_net,
            regime_min, days, confidence, direction,
            ", ".join(sources[1:]) or "none",
        )

        self._signal_count += 1
        return TradingSignal(
            market_id=market.id,
            question=market.question,
            polymarket_price=market.yes_price,
            estimated_prob=estimated_prob,
            edge=edge_gross,             # backward compat — same as edge_gross
            confidence=confidence,
            direction=direction,
            reasoning=reasoning,
            sources=sources,
            # Phase 1 new fields
            edge_gross=edge_gross,
            edge_net=edge_net,
            prior_prob=prior,
            final_prob=estimated_prob,
            mid_price=mid_price,
            spread_estimate=SPREAD_ESTIMATE,
            # Phase 2 new fields
            family_key=family,
            # Phase 4 new fields
            regime_min_edge=regime_min,
        )


def _sigmoid(x: float) -> float:
    return 1 / (1 + math.exp(-x))