feat(bot): 5-phase strategy upgrade — edge neto, families, GNews priority, regimes
CI/CD / build-and-push (push) Successful in 2m30s

Phase 1 — Edge neto real (paper.py, bayesian.py, risk/manager.py, db.py):
- Trade records now store edge_gross, edge_net, prior_prob, final_prob,
  mid_price, spread_estimate, commission, family_key
- edge_net = edge_gross - SPREAD_ESTIMATE(0.02) - COMMISSION_RATE(0.02)
  NOTE: both constants are heuristics, not exact Polymarket exchange costs
- Execution gate changed from edge_gross > MIN_EDGE to edge_net > regime_min_edge

Phase 2 — Market families (polymarket.py):
- market_family_key(market) groups related markets:
    texas-republican-2026, fed-april-2026, openai-2026, etc.
- At most 1 trade per family per cycle; occupied_families propagated via main.py
- Family key logged on every TRADE and SKIP line

Phase 3 — GNews priority (news.py, bayesian.py, main.py):
- NewsClient.get_freshness() returns 1.0/0.75/0.40/0.10 by cache age
- gnews_priority(market, news) = uncertainty × volume_score × freshness
- Politics markets sorted by priority DESC before eval so best markets get
  the 5-query/cycle GNews budget first

Phase 4 — Regime min-edge by category/horizon (bayesian.py):
- politics >60d → 0.12, 30-60d → 0.10, <30d → 0.08
- tech / crypto/finance → 0.10
- All thresholds applied to edge_net (not edge_gross)

Phase 5 — Observability (bayesian.py, main.py):
- Structured skip labels: SKIP_UNSUPPORTED, SKIP_NO_SIGNALS,
  SKIP_PRIOR_EXTREME, SKIP_FAMILY, SKIP_GNEWS_PRIORITY, SKIP_EDGE_NET
- TRADE lines now include family_key, edge_gross, edge_net, regime_min, days
- schema.sql: 8 new cols on trades, 7 new cols on signals (via ALTER TABLE IF NOT EXISTS)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
chemavx
2026-04-16 15:34:46 +00:00
parent a0cbdc0256
commit 63d9f637ff
8 changed files with 620 additions and 141 deletions
+22 -2
View File
@@ -33,13 +33,21 @@ class Database:
await conn.execute("""
INSERT INTO trades (
id, market_id, question, direction, size_usdc,
entry_price, shares, fee_usdc, net_cost, timestamp, reasoning, paper
) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12)
entry_price, shares, fee_usdc, net_cost, timestamp, reasoning, paper,
edge_gross, edge_net, prior_prob, final_prob,
mid_price, spread_estimate, commission, family_key
) VALUES (
$1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,
$13,$14,$15,$16,$17,$18,$19,$20
)
ON CONFLICT (id) DO NOTHING
""",
trade.id, trade.market_id, trade.question, trade.direction,
trade.size_usdc, trade.entry_price, trade.shares, trade.fee_usdc,
trade.net_cost, trade.timestamp, trade.reasoning, trade.paper,
# Phase 1 fields
trade.edge_gross, trade.edge_net, trade.prior_prob, trade.final_prob,
trade.mid_price, trade.spread_estimate, trade.commission, trade.family_key,
)
async def save_daily_metrics(self, metrics: dict) -> None:
@@ -69,6 +77,18 @@ class Database:
)
return {r["market_id"]: float(r["total"]) for r in rows}
async def get_open_families(self) -> set[str]:
"""Return the set of family_key values from all open positions.
Used at startup to rebuild occupied_families from DB state so the
family-deduplication logic survives pod restarts.
"""
async with self._pool.acquire() as conn:
rows = await conn.fetch(
"SELECT DISTINCT family_key FROM trades WHERE family_key IS NOT NULL"
)
return {r["family_key"] for r in rows if r["family_key"]}
async def get_recent_trades(self, limit: int = 100) -> list[dict]:
async with self._pool.acquire() as conn:
rows = await conn.fetch(
+26
View File
@@ -155,6 +155,32 @@ class NewsClient:
async def close(self) -> None:
await self._client.aclose()
def get_freshness(self, question: str) -> float:
"""
Return a freshness score [0.1, 1.0] for GNews priority calculation.
Score interpretation:
1.00 — never queried (maximum priority for GNews budget)
0.75 — last queried >6 h ago (cache expired, worth re-querying)
0.40 — queried 26 h ago (in-cache but moderately stale)
0.10 — queried <2 h ago (cache very fresh, low re-query value)
If the API key is absent, always returns 1.0 (key missing means the
query will be skipped anyway; don't penalise the priority score).
"""
if not self._api_key:
return 1.0
query = self._build_query(question)
cached = self._cache.get(query.lower())
if cached is None:
return 1.0
age_seconds = time.monotonic() - cached[0]
if age_seconds > 6 * 3600:
return 0.75
if age_seconds > 2 * 3600:
return 0.40
return 0.10
# ------------------------------------------------------------------
# Internal helpers
# ------------------------------------------------------------------
+153
View File
@@ -5,6 +5,7 @@ Docs: https://docs.polymarket.com
import asyncio
import logging
import os
import re
from dataclasses import dataclass, field
from datetime import datetime, timezone, timedelta
from typing import Optional
@@ -15,6 +16,158 @@ log = logging.getLogger(__name__)
POLYMARKET_API = "https://clob.polymarket.com"
GAMMA_API = "https://gamma-api.polymarket.com"
# ─────────────────────────────────────────────────────────────────────────────
# Phase 2 — Market family classification helpers
# Used by market_family_key() below.
# ─────────────────────────────────────────────────────────────────────────────
_YEAR_RE = re.compile(r"\b(202\d|203\d)\b")
_MONTH_RE = re.compile(
r"\b(january|february|march|april|may|june|july|august|"
r"september|october|november|december)\b",
re.IGNORECASE,
)
_FED_TRIGGER_RE = re.compile(
r"\b(federal reserve|interest rate|bps|basis point|fed\s+(rate|meeting|decision))",
re.IGNORECASE,
)
_US_STATE_RE = re.compile(
r"\b(Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|"
r"Delaware|Florida|Georgia|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|"
r"Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|"
r"Mississippi|Missouri|Montana|Nebraska|Nevada|New\s+Hampshire|"
r"New\s+Jersey|New\s+Mexico|New\s+York|North\s+Carolina|North\s+Dakota|"
r"Ohio|Oklahoma|Oregon|Pennsylvania|Rhode\s+Island|South\s+Carolina|"
r"South\s+Dakota|Tennessee|Texas|Utah|Vermont|Virginia|Washington|"
r"West\s+Virginia|Wisconsin|Wyoming)\b",
re.IGNORECASE,
)
_PARTY_RE = re.compile(r"\b(Republican|Democrats?|Democratic|GOP)\b", re.IGNORECASE)
_ELECTION_TYPE_RE = re.compile(
r"\b(presidential|president|mayoral|mayor|gubernatorial|governor|"
r"senate|congress(?:ional)?|primary|election)\b",
re.IGNORECASE,
)
# Ordered list of (pattern, place_slug) for named non-US locations.
# Checked after US-state patterns so US city/state names don't shadow these.
_NAMED_PLACES: list[tuple[re.Pattern, str]] = [
(re.compile(r"\bColomb", re.IGNORECASE), "colombia"),
(re.compile(r"\bSeoul\b", re.IGNORECASE), "seoul"),
(re.compile(r"\bBusan\b", re.IGNORECASE), "busan"),
(re.compile(r"\bGyeonggi\b", re.IGNORECASE), "gyeonggi"),
(re.compile(r"\bChungcheong", re.IGNORECASE), "chungcheong"),
(re.compile(r"\bSouth\s+Korean?\b", re.IGNORECASE), "south-korea"),
(re.compile(r"\bLos\s+Angeles\b", re.IGNORECASE), "los-angeles"),
(re.compile(r"\bCuba\b", re.IGNORECASE), "cuba"),
(re.compile(r"\bLebanon\b", re.IGNORECASE), "lebanon"),
(re.compile(r"\bIsrael\b", re.IGNORECASE), "israel"),
(re.compile(r"\bUkraine\b", re.IGNORECASE), "ukraine"),
(re.compile(r"\bRussia\b", re.IGNORECASE), "russia"),
]
# Ordered list of (pattern, company_slug) for tech/company markets.
_NAMED_COMPANIES: list[tuple[re.Pattern, str]] = [
(re.compile(r"\bopenai\b", re.IGNORECASE), "openai"),
(re.compile(r"\banthropic\b", re.IGNORECASE), "anthropic"),
(re.compile(r"\bnvidia\b", re.IGNORECASE), "nvidia"),
(re.compile(r"\bapple\b", re.IGNORECASE), "apple"),
(re.compile(r"\bmicrosoft\b", re.IGNORECASE), "microsoft"),
(re.compile(r"\bgoogle\b", re.IGNORECASE), "google"),
(re.compile(r"\btesla\b", re.IGNORECASE), "tesla"),
# \bmeta\b does NOT match MetaMask (no word boundary mid-compound-word)
(re.compile(r"\bmeta\b", re.IGNORECASE), "meta"),
]
def _end_month(market: "Market") -> str:
"""Return market end_date formatted as YYYY-MM, or '' if unparseable."""
raw = market.end_date
if not raw:
return ""
try:
dt = datetime.fromisoformat(raw.replace("Z", "+00:00"))
return dt.strftime("%Y-%m")
except (ValueError, TypeError):
return ""
def market_family_key(market: "Market") -> str:
"""
Return a stable slug that groups related markets together.
Markets in the same family share an underlying event (same election,
same Fed meeting decision, same company). The bot allows at most one
open position per family per cycle to avoid correlated exposure.
Priority order (first match wins):
1. Fed / interest-rate decision → fed-{month}-{year}
2. US state + party election → {state}-{party}-{year}
3. Named non-US city/country → {place}-{event_type}-{year}
4. Named tech company → {company}-{year}
5. Fallback → {category}-{end_YYYY-MM}
Examples:
"Will Ken Paxton win the 2026 Texas Republican Primary"
→ texas-republican-2026
"Will the Fed decrease rates by 25 bps after April 2026 meeting"
→ fed-april-2026
"Will OpenAI IPO by December 31 2026?"
→ openai-2026
"""
q = market.question
# Prefer year from question text; fall back to end_date year if absent
year_m = _YEAR_RE.search(q)
if year_m:
year = year_m.group(1)
else:
end_m = _end_month(market) # e.g. "2026-06"
year = end_m[:4] if end_m else "unknown"
# 1. Fed / interest-rate meeting
if _FED_TRIGGER_RE.search(q):
month_m = _MONTH_RE.search(q)
if month_m:
return f"fed-{month_m.group(1).lower()}-{year}"
return f"fed-{year}"
# 2. US state + party (primary, senate, governor, etc.)
state_m = _US_STATE_RE.search(q)
party_m = _PARTY_RE.search(q)
if state_m and party_m:
state = re.sub(r"\s+", "-", state_m.group(1).lower())
raw_party = party_m.group(1).lower()
# "democrat" prefix covers "democrat", "democrats", "democratic"
party = "democrat" if "democrat" in raw_party else "republican"
return f"{state}-{party}-{year}"
# 3. Named non-US city / country
for place_re, place_slug in _NAMED_PLACES:
if place_re.search(q):
etype_m = _ELECTION_TYPE_RE.search(q)
if etype_m:
raw_etype = etype_m.group(1).lower()
# Normalise synonyms
etype = {
"president": "presidential",
"mayor": "mayoral",
"governor": "gubernatorial",
}.get(raw_etype, raw_etype)
else:
etype = "event"
return f"{place_slug}-{etype}-{year}"
# 4. Named tech company
for company_re, company_slug in _NAMED_COMPANIES:
if company_re.search(q):
return f"{company_slug}-{year}"
# 5. Fallback: category + end_date month
end_month = _end_month(market)
base = market.category if market.category else "misc"
return f"{base}-{end_month}" if end_month else f"{base}-{year}"
@dataclass
class Market:
+43
View File
@@ -55,3 +55,46 @@ CREATE INDEX IF NOT EXISTS idx_trades_timestamp ON trades(timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_trades_market ON trades(market_id);
CREATE INDEX IF NOT EXISTS idx_metrics_timestamp ON metrics_daily(timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_signals_timestamp ON signals(timestamp DESC);
-- ─────────────────────────────────────────────────────────────────────────────
-- Phase 1 migrations: edge neto real
--
-- spread_estimate and commission are HEURISTICS, not exact Polymarket exchange
-- costs. spread_estimate ≈ estimated half-spread for medium-liquidity markets.
-- commission = COMMISSION_RATE (0.02) * size_usdc — mirrors Polymarket taker fee.
-- edge_net = edge_gross - spread_estimate - commission/size_usdc
-- = edge_gross - 0.02 - 0.02 (always 0.04 deduction at current rates)
--
-- These are stored per-trade so we can audit whether the model's cost assumptions
-- were met in practice once markets resolve.
-- ─────────────────────────────────────────────────────────────────────────────
ALTER TABLE trades ADD COLUMN IF NOT EXISTS edge_gross DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS edge_net DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS prior_prob DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS final_prob DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS mid_price DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS spread_estimate DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS commission DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS family_key TEXT;
-- ─────────────────────────────────────────────────────────────────────────────
-- Phase 2 / Phase 5 migrations: market families + observability
--
-- Signals table extended so each evaluated market carries its audit trail:
-- skip_reason — why the market was not traded ("edge_net", "family",
-- "gnews_priority", "regime", "prior_extreme", etc.)
-- passed_gross — True if edge_gross alone met regime_min_edge
-- passed_net — True if edge_net met regime_min_edge (the actual gate)
-- family_key — market family slug (e.g. "texas-republican-2026")
-- regime_min_edge — threshold that applied to this market/category
-- ─────────────────────────────────────────────────────────────────────────────
ALTER TABLE signals ADD COLUMN IF NOT EXISTS edge_gross DOUBLE PRECISION;
ALTER TABLE signals ADD COLUMN IF NOT EXISTS edge_net DOUBLE PRECISION;
ALTER TABLE signals ADD COLUMN IF NOT EXISTS family_key TEXT;
ALTER TABLE signals ADD COLUMN IF NOT EXISTS regime_min_edge DOUBLE PRECISION;
ALTER TABLE signals ADD COLUMN IF NOT EXISTS skip_reason TEXT;
ALTER TABLE signals ADD COLUMN IF NOT EXISTS passed_gross BOOLEAN;
ALTER TABLE signals ADD COLUMN IF NOT EXISTS passed_net BOOLEAN;
CREATE INDEX IF NOT EXISTS idx_signals_market ON signals(market_id);
CREATE INDEX IF NOT EXISTS idx_trades_family ON trades(family_key);