feat(bot): 5-phase strategy upgrade — edge neto, families, GNews priority, regimes
CI/CD / build-and-push (push) Successful in 2m30s
CI/CD / build-and-push (push) Successful in 2m30s
Phase 1 — Edge neto real (paper.py, bayesian.py, risk/manager.py, db.py):
- Trade records now store edge_gross, edge_net, prior_prob, final_prob,
mid_price, spread_estimate, commission, family_key
- edge_net = edge_gross - SPREAD_ESTIMATE(0.02) - COMMISSION_RATE(0.02)
NOTE: both constants are heuristics, not exact Polymarket exchange costs
- Execution gate changed from edge_gross > MIN_EDGE to edge_net > regime_min_edge
Phase 2 — Market families (polymarket.py):
- market_family_key(market) groups related markets:
texas-republican-2026, fed-april-2026, openai-2026, etc.
- At most 1 trade per family per cycle; occupied_families propagated via main.py
- Family key logged on every TRADE and SKIP line
Phase 3 — GNews priority (news.py, bayesian.py, main.py):
- NewsClient.get_freshness() returns 1.0/0.75/0.40/0.10 by cache age
- gnews_priority(market, news) = uncertainty × volume_score × freshness
- Politics markets sorted by priority DESC before eval so best markets get
the 5-query/cycle GNews budget first
Phase 4 — Regime min-edge by category/horizon (bayesian.py):
- politics >60d → 0.12, 30-60d → 0.10, <30d → 0.08
- tech / crypto/finance → 0.10
- All thresholds applied to edge_net (not edge_gross)
Phase 5 — Observability (bayesian.py, main.py):
- Structured skip labels: SKIP_UNSUPPORTED, SKIP_NO_SIGNALS,
SKIP_PRIOR_EXTREME, SKIP_FAMILY, SKIP_GNEWS_PRIORITY, SKIP_EDGE_NET
- TRADE lines now include family_key, edge_gross, edge_net, regime_min, days
- schema.sql: 8 new cols on trades, 7 new cols on signals (via ALTER TABLE IF NOT EXISTS)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+22
-2
@@ -33,13 +33,21 @@ class Database:
|
||||
await conn.execute("""
|
||||
INSERT INTO trades (
|
||||
id, market_id, question, direction, size_usdc,
|
||||
entry_price, shares, fee_usdc, net_cost, timestamp, reasoning, paper
|
||||
) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12)
|
||||
entry_price, shares, fee_usdc, net_cost, timestamp, reasoning, paper,
|
||||
edge_gross, edge_net, prior_prob, final_prob,
|
||||
mid_price, spread_estimate, commission, family_key
|
||||
) VALUES (
|
||||
$1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,
|
||||
$13,$14,$15,$16,$17,$18,$19,$20
|
||||
)
|
||||
ON CONFLICT (id) DO NOTHING
|
||||
""",
|
||||
trade.id, trade.market_id, trade.question, trade.direction,
|
||||
trade.size_usdc, trade.entry_price, trade.shares, trade.fee_usdc,
|
||||
trade.net_cost, trade.timestamp, trade.reasoning, trade.paper,
|
||||
# Phase 1 fields
|
||||
trade.edge_gross, trade.edge_net, trade.prior_prob, trade.final_prob,
|
||||
trade.mid_price, trade.spread_estimate, trade.commission, trade.family_key,
|
||||
)
|
||||
|
||||
async def save_daily_metrics(self, metrics: dict) -> None:
|
||||
@@ -69,6 +77,18 @@ class Database:
|
||||
)
|
||||
return {r["market_id"]: float(r["total"]) for r in rows}
|
||||
|
||||
async def get_open_families(self) -> set[str]:
|
||||
"""Return the set of family_key values from all open positions.
|
||||
|
||||
Used at startup to rebuild occupied_families from DB state so the
|
||||
family-deduplication logic survives pod restarts.
|
||||
"""
|
||||
async with self._pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"SELECT DISTINCT family_key FROM trades WHERE family_key IS NOT NULL"
|
||||
)
|
||||
return {r["family_key"] for r in rows if r["family_key"]}
|
||||
|
||||
async def get_recent_trades(self, limit: int = 100) -> list[dict]:
|
||||
async with self._pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
|
||||
@@ -155,6 +155,32 @@ class NewsClient:
|
||||
async def close(self) -> None:
|
||||
await self._client.aclose()
|
||||
|
||||
def get_freshness(self, question: str) -> float:
|
||||
"""
|
||||
Return a freshness score [0.1, 1.0] for GNews priority calculation.
|
||||
|
||||
Score interpretation:
|
||||
1.00 — never queried (maximum priority for GNews budget)
|
||||
0.75 — last queried >6 h ago (cache expired, worth re-querying)
|
||||
0.40 — queried 2–6 h ago (in-cache but moderately stale)
|
||||
0.10 — queried <2 h ago (cache very fresh, low re-query value)
|
||||
|
||||
If the API key is absent, always returns 1.0 (key missing means the
|
||||
query will be skipped anyway; don't penalise the priority score).
|
||||
"""
|
||||
if not self._api_key:
|
||||
return 1.0
|
||||
query = self._build_query(question)
|
||||
cached = self._cache.get(query.lower())
|
||||
if cached is None:
|
||||
return 1.0
|
||||
age_seconds = time.monotonic() - cached[0]
|
||||
if age_seconds > 6 * 3600:
|
||||
return 0.75
|
||||
if age_seconds > 2 * 3600:
|
||||
return 0.40
|
||||
return 0.10
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@@ -5,6 +5,7 @@ Docs: https://docs.polymarket.com
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from typing import Optional
|
||||
@@ -15,6 +16,158 @@ log = logging.getLogger(__name__)
|
||||
POLYMARKET_API = "https://clob.polymarket.com"
|
||||
GAMMA_API = "https://gamma-api.polymarket.com"
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Phase 2 — Market family classification helpers
|
||||
# Used by market_family_key() below.
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
_YEAR_RE = re.compile(r"\b(202\d|203\d)\b")
|
||||
_MONTH_RE = re.compile(
|
||||
r"\b(january|february|march|april|may|june|july|august|"
|
||||
r"september|october|november|december)\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_FED_TRIGGER_RE = re.compile(
|
||||
r"\b(federal reserve|interest rate|bps|basis point|fed\s+(rate|meeting|decision))",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_US_STATE_RE = re.compile(
|
||||
r"\b(Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|"
|
||||
r"Delaware|Florida|Georgia|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|"
|
||||
r"Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|"
|
||||
r"Mississippi|Missouri|Montana|Nebraska|Nevada|New\s+Hampshire|"
|
||||
r"New\s+Jersey|New\s+Mexico|New\s+York|North\s+Carolina|North\s+Dakota|"
|
||||
r"Ohio|Oklahoma|Oregon|Pennsylvania|Rhode\s+Island|South\s+Carolina|"
|
||||
r"South\s+Dakota|Tennessee|Texas|Utah|Vermont|Virginia|Washington|"
|
||||
r"West\s+Virginia|Wisconsin|Wyoming)\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_PARTY_RE = re.compile(r"\b(Republican|Democrats?|Democratic|GOP)\b", re.IGNORECASE)
|
||||
_ELECTION_TYPE_RE = re.compile(
|
||||
r"\b(presidential|president|mayoral|mayor|gubernatorial|governor|"
|
||||
r"senate|congress(?:ional)?|primary|election)\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Ordered list of (pattern, place_slug) for named non-US locations.
|
||||
# Checked after US-state patterns so US city/state names don't shadow these.
|
||||
_NAMED_PLACES: list[tuple[re.Pattern, str]] = [
|
||||
(re.compile(r"\bColomb", re.IGNORECASE), "colombia"),
|
||||
(re.compile(r"\bSeoul\b", re.IGNORECASE), "seoul"),
|
||||
(re.compile(r"\bBusan\b", re.IGNORECASE), "busan"),
|
||||
(re.compile(r"\bGyeonggi\b", re.IGNORECASE), "gyeonggi"),
|
||||
(re.compile(r"\bChungcheong", re.IGNORECASE), "chungcheong"),
|
||||
(re.compile(r"\bSouth\s+Korean?\b", re.IGNORECASE), "south-korea"),
|
||||
(re.compile(r"\bLos\s+Angeles\b", re.IGNORECASE), "los-angeles"),
|
||||
(re.compile(r"\bCuba\b", re.IGNORECASE), "cuba"),
|
||||
(re.compile(r"\bLebanon\b", re.IGNORECASE), "lebanon"),
|
||||
(re.compile(r"\bIsrael\b", re.IGNORECASE), "israel"),
|
||||
(re.compile(r"\bUkraine\b", re.IGNORECASE), "ukraine"),
|
||||
(re.compile(r"\bRussia\b", re.IGNORECASE), "russia"),
|
||||
]
|
||||
|
||||
# Ordered list of (pattern, company_slug) for tech/company markets.
|
||||
_NAMED_COMPANIES: list[tuple[re.Pattern, str]] = [
|
||||
(re.compile(r"\bopenai\b", re.IGNORECASE), "openai"),
|
||||
(re.compile(r"\banthropic\b", re.IGNORECASE), "anthropic"),
|
||||
(re.compile(r"\bnvidia\b", re.IGNORECASE), "nvidia"),
|
||||
(re.compile(r"\bapple\b", re.IGNORECASE), "apple"),
|
||||
(re.compile(r"\bmicrosoft\b", re.IGNORECASE), "microsoft"),
|
||||
(re.compile(r"\bgoogle\b", re.IGNORECASE), "google"),
|
||||
(re.compile(r"\btesla\b", re.IGNORECASE), "tesla"),
|
||||
# \bmeta\b does NOT match MetaMask (no word boundary mid-compound-word)
|
||||
(re.compile(r"\bmeta\b", re.IGNORECASE), "meta"),
|
||||
]
|
||||
|
||||
|
||||
def _end_month(market: "Market") -> str:
|
||||
"""Return market end_date formatted as YYYY-MM, or '' if unparseable."""
|
||||
raw = market.end_date
|
||||
if not raw:
|
||||
return ""
|
||||
try:
|
||||
dt = datetime.fromisoformat(raw.replace("Z", "+00:00"))
|
||||
return dt.strftime("%Y-%m")
|
||||
except (ValueError, TypeError):
|
||||
return ""
|
||||
|
||||
|
||||
def market_family_key(market: "Market") -> str:
|
||||
"""
|
||||
Return a stable slug that groups related markets together.
|
||||
|
||||
Markets in the same family share an underlying event (same election,
|
||||
same Fed meeting decision, same company). The bot allows at most one
|
||||
open position per family per cycle to avoid correlated exposure.
|
||||
|
||||
Priority order (first match wins):
|
||||
1. Fed / interest-rate decision → fed-{month}-{year}
|
||||
2. US state + party election → {state}-{party}-{year}
|
||||
3. Named non-US city/country → {place}-{event_type}-{year}
|
||||
4. Named tech company → {company}-{year}
|
||||
5. Fallback → {category}-{end_YYYY-MM}
|
||||
|
||||
Examples:
|
||||
"Will Ken Paxton win the 2026 Texas Republican Primary"
|
||||
→ texas-republican-2026
|
||||
"Will the Fed decrease rates by 25 bps after April 2026 meeting"
|
||||
→ fed-april-2026
|
||||
"Will OpenAI IPO by December 31 2026?"
|
||||
→ openai-2026
|
||||
"""
|
||||
q = market.question
|
||||
|
||||
# Prefer year from question text; fall back to end_date year if absent
|
||||
year_m = _YEAR_RE.search(q)
|
||||
if year_m:
|
||||
year = year_m.group(1)
|
||||
else:
|
||||
end_m = _end_month(market) # e.g. "2026-06"
|
||||
year = end_m[:4] if end_m else "unknown"
|
||||
|
||||
# 1. Fed / interest-rate meeting
|
||||
if _FED_TRIGGER_RE.search(q):
|
||||
month_m = _MONTH_RE.search(q)
|
||||
if month_m:
|
||||
return f"fed-{month_m.group(1).lower()}-{year}"
|
||||
return f"fed-{year}"
|
||||
|
||||
# 2. US state + party (primary, senate, governor, etc.)
|
||||
state_m = _US_STATE_RE.search(q)
|
||||
party_m = _PARTY_RE.search(q)
|
||||
if state_m and party_m:
|
||||
state = re.sub(r"\s+", "-", state_m.group(1).lower())
|
||||
raw_party = party_m.group(1).lower()
|
||||
# "democrat" prefix covers "democrat", "democrats", "democratic"
|
||||
party = "democrat" if "democrat" in raw_party else "republican"
|
||||
return f"{state}-{party}-{year}"
|
||||
|
||||
# 3. Named non-US city / country
|
||||
for place_re, place_slug in _NAMED_PLACES:
|
||||
if place_re.search(q):
|
||||
etype_m = _ELECTION_TYPE_RE.search(q)
|
||||
if etype_m:
|
||||
raw_etype = etype_m.group(1).lower()
|
||||
# Normalise synonyms
|
||||
etype = {
|
||||
"president": "presidential",
|
||||
"mayor": "mayoral",
|
||||
"governor": "gubernatorial",
|
||||
}.get(raw_etype, raw_etype)
|
||||
else:
|
||||
etype = "event"
|
||||
return f"{place_slug}-{etype}-{year}"
|
||||
|
||||
# 4. Named tech company
|
||||
for company_re, company_slug in _NAMED_COMPANIES:
|
||||
if company_re.search(q):
|
||||
return f"{company_slug}-{year}"
|
||||
|
||||
# 5. Fallback: category + end_date month
|
||||
end_month = _end_month(market)
|
||||
base = market.category if market.category else "misc"
|
||||
return f"{base}-{end_month}" if end_month else f"{base}-{year}"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Market:
|
||||
|
||||
@@ -55,3 +55,46 @@ CREATE INDEX IF NOT EXISTS idx_trades_timestamp ON trades(timestamp DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_trades_market ON trades(market_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_metrics_timestamp ON metrics_daily(timestamp DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_signals_timestamp ON signals(timestamp DESC);
|
||||
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
-- Phase 1 migrations: edge neto real
|
||||
--
|
||||
-- spread_estimate and commission are HEURISTICS, not exact Polymarket exchange
|
||||
-- costs. spread_estimate ≈ estimated half-spread for medium-liquidity markets.
|
||||
-- commission = COMMISSION_RATE (0.02) * size_usdc — mirrors Polymarket taker fee.
|
||||
-- edge_net = edge_gross - spread_estimate - commission/size_usdc
|
||||
-- = edge_gross - 0.02 - 0.02 (always 0.04 deduction at current rates)
|
||||
--
|
||||
-- These are stored per-trade so we can audit whether the model's cost assumptions
|
||||
-- were met in practice once markets resolve.
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
ALTER TABLE trades ADD COLUMN IF NOT EXISTS edge_gross DOUBLE PRECISION;
|
||||
ALTER TABLE trades ADD COLUMN IF NOT EXISTS edge_net DOUBLE PRECISION;
|
||||
ALTER TABLE trades ADD COLUMN IF NOT EXISTS prior_prob DOUBLE PRECISION;
|
||||
ALTER TABLE trades ADD COLUMN IF NOT EXISTS final_prob DOUBLE PRECISION;
|
||||
ALTER TABLE trades ADD COLUMN IF NOT EXISTS mid_price DOUBLE PRECISION;
|
||||
ALTER TABLE trades ADD COLUMN IF NOT EXISTS spread_estimate DOUBLE PRECISION;
|
||||
ALTER TABLE trades ADD COLUMN IF NOT EXISTS commission DOUBLE PRECISION;
|
||||
ALTER TABLE trades ADD COLUMN IF NOT EXISTS family_key TEXT;
|
||||
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
-- Phase 2 / Phase 5 migrations: market families + observability
|
||||
--
|
||||
-- Signals table extended so each evaluated market carries its audit trail:
|
||||
-- skip_reason — why the market was not traded ("edge_net", "family",
|
||||
-- "gnews_priority", "regime", "prior_extreme", etc.)
|
||||
-- passed_gross — True if edge_gross alone met regime_min_edge
|
||||
-- passed_net — True if edge_net met regime_min_edge (the actual gate)
|
||||
-- family_key — market family slug (e.g. "texas-republican-2026")
|
||||
-- regime_min_edge — threshold that applied to this market/category
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
ALTER TABLE signals ADD COLUMN IF NOT EXISTS edge_gross DOUBLE PRECISION;
|
||||
ALTER TABLE signals ADD COLUMN IF NOT EXISTS edge_net DOUBLE PRECISION;
|
||||
ALTER TABLE signals ADD COLUMN IF NOT EXISTS family_key TEXT;
|
||||
ALTER TABLE signals ADD COLUMN IF NOT EXISTS regime_min_edge DOUBLE PRECISION;
|
||||
ALTER TABLE signals ADD COLUMN IF NOT EXISTS skip_reason TEXT;
|
||||
ALTER TABLE signals ADD COLUMN IF NOT EXISTS passed_gross BOOLEAN;
|
||||
ALTER TABLE signals ADD COLUMN IF NOT EXISTS passed_net BOOLEAN;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_signals_market ON signals(market_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_trades_family ON trades(family_key);
|
||||
|
||||
Reference in New Issue
Block a user