feat(manifold): add matcher versioning to separate legacy accepted matches from v3_outcome_guard metrics
CI/CD / build-and-push (push) Successful in 9s
CI/CD / build-and-push (push) Successful in 9s
Add MANIFOLD_MATCHER_VERSION="v3_outcome_guard" tag persisted to manifold_match_audit.matcher_version so metrics can isolate current-matcher stats from pre-versioning records, whose accepted matches the outcome guard would now reject. - schema: add matcher_version column + index; idempotent startup backfill tagging NULL rows as legacy_pre_outcome_guard (no outcome types) or v2_outcome_guard_no_version (has outcome type, version not persisted) - save_manifold_audit: write matcher_version on every new record - get_manifold_matches: split summary into current_version / all_time / legacy; recent_matches now carry matcher_version Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+10
-4
@@ -211,16 +211,22 @@ async def get_attribution():
|
|||||||
|
|
||||||
@app.get("/api/metrics/manifold-matches")
|
@app.get("/api/metrics/manifold-matches")
|
||||||
async def get_manifold_matches():
|
async def get_manifold_matches():
|
||||||
"""Manifold match audit — summary stats and recent match attempts.
|
"""Manifold match audit — version-split summary and recent match attempts.
|
||||||
|
|
||||||
summary:
|
summary.current_version — stats for the active matcher (MANIFOLD_MATCHER_VERSION):
|
||||||
|
version — the matcher version string
|
||||||
total_accepted — matches accepted (score >= 0.40, inversion unambiguous)
|
total_accepted — matches accepted (score >= 0.40, inversion unambiguous)
|
||||||
total_rejected — matches rejected (low score or ambiguous inversion)
|
total_rejected — matches rejected (low score or ambiguous inversion)
|
||||||
total_no_results — no Manifold market found or API error
|
total_no_results — no Manifold market found or API error
|
||||||
avg_match_score — mean Jaccard score for accepted matches
|
avg_match_score — mean Jaccard score for accepted matches
|
||||||
trades_dominated_by_mfld — open trades where feat_mfld_lo is the largest signal
|
used_in_trade — accepted matches that were actually executed
|
||||||
|
summary.all_time — accepted/rejected/no_results across every matcher version.
|
||||||
|
summary.legacy.accepted_without_outcome_type — pre-outcome-guard accepted
|
||||||
|
records that the current matcher would reject (not counted in current_version).
|
||||||
|
summary.trades_dominated_by_mfld — open trades where feat_mfld_lo is the largest signal.
|
||||||
|
|
||||||
recent_matches: last 50 rows from manifold_match_audit, newest first.
|
recent_matches: last 50 rows from manifold_match_audit, newest first, each
|
||||||
|
tagged with matcher_version.
|
||||||
used_in_trade=True only when status='accepted' AND a trade was actually executed.
|
used_in_trade=True only when status='accepted' AND a trade was actually executed.
|
||||||
"""
|
"""
|
||||||
data = await db.get_manifold_matches(limit=50)
|
data = await db.get_manifold_matches(limit=50)
|
||||||
|
|||||||
+47
-9
@@ -4,6 +4,8 @@ import os
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
import asyncpg
|
import asyncpg
|
||||||
|
|
||||||
|
from bot.data.manifold import MANIFOLD_MATCHER_VERSION
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@@ -532,6 +534,7 @@ class Database:
|
|||||||
match_status: str,
|
match_status: str,
|
||||||
poly_outcome_type: Optional[str] = None,
|
poly_outcome_type: Optional[str] = None,
|
||||||
mfld_outcome_type: Optional[str] = None,
|
mfld_outcome_type: Optional[str] = None,
|
||||||
|
matcher_version: Optional[str] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
async with self._pool.acquire() as conn:
|
async with self._pool.acquire() as conn:
|
||||||
await conn.execute("""
|
await conn.execute("""
|
||||||
@@ -540,14 +543,14 @@ class Database:
|
|||||||
mfld_market_id, mfld_market_title, mfld_market_url,
|
mfld_market_id, mfld_market_title, mfld_market_url,
|
||||||
prob_raw, prob_final, inverted,
|
prob_raw, prob_final, inverted,
|
||||||
match_score, match_reason, match_status, used_in_trade,
|
match_score, match_reason, match_status, used_in_trade,
|
||||||
poly_outcome_type, mfld_outcome_type
|
poly_outcome_type, mfld_outcome_type, matcher_version
|
||||||
) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,FALSE,$14,$15)
|
) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,FALSE,$14,$15,$16)
|
||||||
""",
|
""",
|
||||||
audit_id, poly_market_id, poly_question, search_query,
|
audit_id, poly_market_id, poly_question, search_query,
|
||||||
mfld_market_id, mfld_market_title, mfld_market_url,
|
mfld_market_id, mfld_market_title, mfld_market_url,
|
||||||
prob_raw, prob_final, inverted,
|
prob_raw, prob_final, inverted,
|
||||||
match_score, match_reason, match_status,
|
match_score, match_reason, match_status,
|
||||||
poly_outcome_type, mfld_outcome_type,
|
poly_outcome_type, mfld_outcome_type, matcher_version,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def mark_manifold_audit_used(self, audit_id: str) -> None:
|
async def mark_manifold_audit_used(self, audit_id: str) -> None:
|
||||||
@@ -558,14 +561,36 @@ class Database:
|
|||||||
)
|
)
|
||||||
|
|
||||||
async def get_manifold_matches(self, limit: int = 50) -> dict:
|
async def get_manifold_matches(self, limit: int = 50) -> dict:
|
||||||
|
"""Manifold match audit, with summary split by matcher version.
|
||||||
|
|
||||||
|
The summary separates the current matcher (MANIFOLD_MATCHER_VERSION) from
|
||||||
|
all-time totals and from legacy pre-outcome-guard records, whose accepted
|
||||||
|
matches would now be rejected by the outcome-compatibility guard and so
|
||||||
|
must not be conflated with current-version stats.
|
||||||
|
"""
|
||||||
async with self._pool.acquire() as conn:
|
async with self._pool.acquire() as conn:
|
||||||
summary = await conn.fetchrow("""
|
current = await conn.fetchrow("""
|
||||||
SELECT
|
SELECT
|
||||||
COUNT(*) FILTER (WHERE match_status = 'accepted') AS total_accepted,
|
COUNT(*) FILTER (WHERE match_status = 'accepted') AS total_accepted,
|
||||||
COUNT(*) FILTER (WHERE match_status = 'rejected') AS total_rejected,
|
COUNT(*) FILTER (WHERE match_status = 'rejected') AS total_rejected,
|
||||||
COUNT(*) FILTER (WHERE match_status = 'no_results') AS total_no_results,
|
COUNT(*) FILTER (WHERE match_status = 'no_results') AS total_no_results,
|
||||||
AVG(match_score) FILTER (WHERE match_status = 'accepted') AS avg_match_score
|
AVG(match_score) FILTER (WHERE match_status = 'accepted') AS avg_match_score,
|
||||||
|
COUNT(*) FILTER (WHERE used_in_trade = TRUE) AS used_in_trade
|
||||||
FROM manifold_match_audit
|
FROM manifold_match_audit
|
||||||
|
WHERE matcher_version = $1
|
||||||
|
""", MANIFOLD_MATCHER_VERSION)
|
||||||
|
all_time = await conn.fetchrow("""
|
||||||
|
SELECT
|
||||||
|
COUNT(*) FILTER (WHERE match_status = 'accepted') AS total_accepted,
|
||||||
|
COUNT(*) FILTER (WHERE match_status = 'rejected') AS total_rejected,
|
||||||
|
COUNT(*) FILTER (WHERE match_status = 'no_results') AS total_no_results
|
||||||
|
FROM manifold_match_audit
|
||||||
|
""")
|
||||||
|
legacy = await conn.fetchrow("""
|
||||||
|
SELECT COUNT(*) AS accepted_without_outcome_type
|
||||||
|
FROM manifold_match_audit
|
||||||
|
WHERE matcher_version = 'legacy_pre_outcome_guard'
|
||||||
|
AND match_status = 'accepted'
|
||||||
""")
|
""")
|
||||||
mfld_dominated = await conn.fetchrow("""
|
mfld_dominated = await conn.fetchrow("""
|
||||||
SELECT COUNT(*) AS cnt FROM trades
|
SELECT COUNT(*) AS cnt FROM trades
|
||||||
@@ -582,10 +607,23 @@ class Database:
|
|||||||
)
|
)
|
||||||
return {
|
return {
|
||||||
"summary": {
|
"summary": {
|
||||||
"total_accepted": int(summary["total_accepted"] or 0),
|
"current_version": {
|
||||||
"total_rejected": int(summary["total_rejected"] or 0),
|
"version": MANIFOLD_MATCHER_VERSION,
|
||||||
"total_no_results": int(summary["total_no_results"] or 0),
|
"total_accepted": int(current["total_accepted"] or 0),
|
||||||
"avg_match_score": _f(summary["avg_match_score"]),
|
"total_rejected": int(current["total_rejected"] or 0),
|
||||||
|
"total_no_results": int(current["total_no_results"] or 0),
|
||||||
|
"avg_match_score": _f(current["avg_match_score"]),
|
||||||
|
"used_in_trade": int(current["used_in_trade"] or 0),
|
||||||
|
},
|
||||||
|
"all_time": {
|
||||||
|
"total_accepted": int(all_time["total_accepted"] or 0),
|
||||||
|
"total_rejected": int(all_time["total_rejected"] or 0),
|
||||||
|
"total_no_results": int(all_time["total_no_results"] or 0),
|
||||||
|
},
|
||||||
|
"legacy": {
|
||||||
|
"accepted_without_outcome_type":
|
||||||
|
int(legacy["accepted_without_outcome_type"] or 0),
|
||||||
|
},
|
||||||
"trades_dominated_by_mfld": int(mfld_dominated["cnt"] or 0),
|
"trades_dominated_by_mfld": int(mfld_dominated["cnt"] or 0),
|
||||||
},
|
},
|
||||||
"recent_matches": [dict(r) for r in rows],
|
"recent_matches": [dict(r) for r in rows],
|
||||||
|
|||||||
@@ -33,6 +33,12 @@ from typing import Optional
|
|||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
|
# Version tag for every audit record this matcher produces. Persisted to
|
||||||
|
# manifold_match_audit.matcher_version so metrics can isolate current-version
|
||||||
|
# stats from legacy/pre-versioning records. Do NOT change this value once set;
|
||||||
|
# bump to a new string only when matcher semantics change materially.
|
||||||
|
MANIFOLD_MATCHER_VERSION = "v3_outcome_guard"
|
||||||
|
|
||||||
MANIFOLD_API = "https://api.manifold.markets/v0"
|
MANIFOLD_API = "https://api.manifold.markets/v0"
|
||||||
CACHE_TTL_SEC = 1800 # 30 minutes
|
CACHE_TTL_SEC = 1800 # 30 minutes
|
||||||
|
|
||||||
|
|||||||
@@ -220,6 +220,36 @@ CREATE INDEX IF NOT EXISTS idx_mfld_audit_poly_mkt ON manifold_match_audit(poly
|
|||||||
ALTER TABLE manifold_match_audit ADD COLUMN IF NOT EXISTS poly_outcome_type TEXT;
|
ALTER TABLE manifold_match_audit ADD COLUMN IF NOT EXISTS poly_outcome_type TEXT;
|
||||||
ALTER TABLE manifold_match_audit ADD COLUMN IF NOT EXISTS mfld_outcome_type TEXT;
|
ALTER TABLE manifold_match_audit ADD COLUMN IF NOT EXISTS mfld_outcome_type TEXT;
|
||||||
|
|
||||||
|
-- ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
-- Matcher versioning — separate current-matcher metrics from legacy records
|
||||||
|
--
|
||||||
|
-- matcher_version tags each audit row with the matcher that produced it
|
||||||
|
-- (MANIFOLD_MATCHER_VERSION in bot/data/manifold.py). This lets the metrics
|
||||||
|
-- endpoint isolate current_version stats from pre-versioning records, whose
|
||||||
|
-- accepted matches would now be rejected by the outcome-compatibility guard.
|
||||||
|
--
|
||||||
|
-- Backfill is one-shot and idempotent (only touches NULL matcher_version rows):
|
||||||
|
-- * rows with no outcome types → 'legacy_pre_outcome_guard' (pre outcome-guard;
|
||||||
|
-- accepted without any outcome-type validation)
|
||||||
|
-- * rows with an outcome type → 'v2_outcome_guard_no_version' (existed between
|
||||||
|
-- the outcome-guard and this versioning; real version not persisted)
|
||||||
|
-- We tag rather than infer the exact version that wasn't recorded.
|
||||||
|
-- ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
ALTER TABLE manifold_match_audit ADD COLUMN IF NOT EXISTS matcher_version TEXT;
|
||||||
|
|
||||||
|
UPDATE manifold_match_audit
|
||||||
|
SET matcher_version = 'legacy_pre_outcome_guard'
|
||||||
|
WHERE matcher_version IS NULL
|
||||||
|
AND poly_outcome_type IS NULL
|
||||||
|
AND mfld_outcome_type IS NULL;
|
||||||
|
|
||||||
|
UPDATE manifold_match_audit
|
||||||
|
SET matcher_version = 'v2_outcome_guard_no_version'
|
||||||
|
WHERE matcher_version IS NULL
|
||||||
|
AND (poly_outcome_type IS NOT NULL OR mfld_outcome_type IS NOT NULL);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_mfld_audit_version ON manifold_match_audit(matcher_version);
|
||||||
|
|
||||||
-- ─────────────────────────────────────────────────────────────────────────────
|
-- ─────────────────────────────────────────────────────────────────────────────
|
||||||
-- Metric exclusion — administrative closure flag
|
-- Metric exclusion — administrative closure flag
|
||||||
--
|
--
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ from typing import Optional, TYPE_CHECKING
|
|||||||
|
|
||||||
from bot.data.polymarket import Market, market_family_key
|
from bot.data.polymarket import Market, market_family_key
|
||||||
from bot.data.external import ExternalSignals
|
from bot.data.external import ExternalSignals
|
||||||
from bot.data.manifold import ManifoldMatchResult
|
from bot.data.manifold import MANIFOLD_MATCHER_VERSION, ManifoldMatchResult
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from bot.data.news import NewsClient
|
from bot.data.news import NewsClient
|
||||||
@@ -472,6 +472,7 @@ class BayesianStrategy:
|
|||||||
match_status=manifold_result.status,
|
match_status=manifold_result.status,
|
||||||
poly_outcome_type=manifold_result.poly_outcome_type,
|
poly_outcome_type=manifold_result.poly_outcome_type,
|
||||||
mfld_outcome_type=manifold_result.mfld_outcome_type,
|
mfld_outcome_type=manifold_result.mfld_outcome_type,
|
||||||
|
matcher_version=MANIFOLD_MATCHER_VERSION,
|
||||||
)
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
log.warning("Failed to save manifold audit: %s", exc)
|
log.warning("Failed to save manifold audit: %s", exc)
|
||||||
|
|||||||
Reference in New Issue
Block a user