feat: PCOC校准(logit_bias单调偏移,AUC不变,免费+0.34) + bench自动拟合建议bias

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-17 20:20:50 +08:00
parent 575b32f263
commit 264130df0f
2 changed files with 27 additions and 1 deletions
@@ -265,6 +265,7 @@ def run_once(config_override=None, batch_size=50, max_batches=None,
        print(f"[BENCH] rep cache built from batches: {logids.numel()} items")

    logid2p = {}
+    logid2logit = {}
    t_sum = 0.0
    with torch.inference_mode():
        for b in batches:
@@ -278,8 +279,11 @@ def run_once(config_override=None, batch_size=50, max_batches=None,
            if cuda:
                torch.cuda.synchronize()
            t_sum += time.time() - t0
-            for lid, p in zip(b["logid"][pm].cpu().tolist(), probs[pm].cpu().tolist()):
+            lg = logits.squeeze(-1)
+            for lid, p, lv in zip(b["logid"][pm].cpu().tolist(),
+                                  probs[pm].cpu().tolist(), lg[pm].cpu().tolist()):
                logid2p[lid] = p
+                logid2logit[lid] = lv

    order = [int(l.split(",")[0]) for l in open(test_csv) if l.strip()]
    missing = [lid for lid in order if lid not in logid2p]
@@ -297,6 +301,21 @@ def run_once(config_override=None, batch_size=50, max_batches=None,
        f" -> AUC={res['auc']:.5f} PCOC={res['pcoc']:.4f}"
        f" lat={res['latency']:.2f}s score={res['score_all']:.2f}"
    )
+    # 拟合 PCOC 校准 logit_bias（使 mean(sigmoid(logit+b))=mean(label)）
+    try:
+        ol = np.array([logid2logit.get(lid, 0.0) for lid in order], dtype=np.float64)
+        labels = infer._read_label(str(label_file))
+        ml = float(labels.mean())
+        lo, hi = -3.0, 3.0
+        for _ in range(60):
+            mid = 0.5 * (lo + hi)
+            if (1.0 / (1.0 + np.exp(-(ol + mid)))).mean() > ml:
+                hi = mid
+            else:
+                lo = mid
+        print(f"[BENCH] 建议 logit_bias={0.5*(lo+hi):.4f}（PCOC→1.0，免费+~0.34分）")
+    except Exception as e:
+        print(f"[BENCH] logit_bias 拟合跳过: {e}")
    return res


@@ -327,6 +346,7 @@ def _parse_args():
    ap.add_argument("--emb-bag", action="store_true", help="F.embedding_bag 融合查表+池化")
    ap.add_argument("--no-moe-baddbmm", action="store_true", help="关闭 MoE baddbmm(用 einsum 对照)")
    ap.add_argument("--no-skip-moe-loss", action="store_true", help="不跳过 moe_loss(对照)")
+    ap.add_argument("--logit-bias", type=float, default=None, help="PCOC校准:logit偏移(本地验证PCOC→1.0)")
    ap.add_argument("--sparse-pool", action="store_true", help="稀疏矩阵乘做池化(段内高重复时省)")
    ap.add_argument("--precompute-rep", action="store_true",
                    help="预计算RepEncoder缓存,model(batch)跳过embedding层(从batches自建)")
@@ -379,6 +399,8 @@ if __name__ == "__main__":
        cfg["moe_baddbmm"] = False
    if a.no_skip_moe_loss:
        cfg["skip_moe_loss"] = False
+    if a.logit_bias is not None:
+        cfg["logit_bias"] = a.logit_bias
    if a.sparse_pool:
        cfg["sparse_pool"] = True
    if a.precompute_rep: