"""本地测量闭环:设置 infer.CONFIG,跑推理,同步计时,打印 AUC/PCOC/延迟/总分。 不进提交包。在 AI Studio notebook(带 dataset/ 与 ckpt.pt)里运行: %cd /home/aistudio/code !python bench.py # 默认配置基准 或在 notebook cell 里逐配置扫描: import bench bench.run_once({"fp16": False, "expert_merge": False}) # FP32 参考跑 bench.run_once({"signid_mode": "modulo"}) # 取模 vs clamp """ import os import sys import time from pathlib import Path # baseline 把依赖装在 --target 目录(非默认 site-packages),在 kernel 里 import # 之前必须先把它加到 sys.path,否则 import torch 会 ModuleNotFoundError。 for _p in ("/home/aistudio/external-libraries", "/home/aistudio/libraries", os.path.abspath("../libraries"), os.path.abspath("./libraries")): if os.path.isdir(_p) and _p not in sys.path: sys.path.insert(0, _p) import torch from torch.utils.data import DataLoader import infer # 同目录 def run_once(config_override=None, batch_size=50, max_batches=None, max_feasign_per_slot=None): """跑一次本地推理并打分。 Args: config_override: 覆盖 infer.CONFIG 的字典(如 {"fp16": False}) batch_size: DataLoader 的 batch 大小(本地参考;评测端可能自有设定) max_batches: 只跑前 N 个 batch(快速冒烟用),None=全量 max_feasign_per_slot: 传给 CTRTestSeqDataset 的截断字典,None=不截断; 默认沿用 baseline 的 {1: 2} Returns: infer._cal_score 的结果 dict """ if config_override is None: config_override = {} if max_feasign_per_slot is None: max_feasign_per_slot = {1: 2} infer.CONFIG.update(config_override) infer.CONFIG["sync_timing"] = True cur = Path(__file__).parent ref = cur / "dataset" history = ref / "history" test_csv = ref / "test.csv" label_file = ref / "label_data.txt" # ----- 加载数据 ----- files = (sorted(history.glob("*.csv")) if history.exists() else []) + [test_csv] item_dict, user_seq = infer.load_sample_files(files) test_logids = infer.load_logids_from_file(test_csv) ds = infer.CTRTestSeqDataset( test_logids_ordered=list(test_logids), item_dict=item_dict, user_seq=user_seq, max_feasign_per_slot=max_feasign_per_slot, max_ctx_len=None, ) loader = DataLoader( ds, batch_size=batch_size, shuffle=False, num_workers=0, collate_fn=infer.make_collate_fn(ds.max_slot_id), ) batches = [] for b in loader: batches.append(infer.move_batch_to_device(b, torch.device("cpu"))) if max_batches is not None and len(batches) >= max_batches: break # ----- 加载模型 ----- model, dev = infer.load_model(ckpt_path=None) # ----- 推理 + 同步计时 ----- logid2p = {} t_sum = 0.0 cuda = (dev.type == "cuda") with torch.inference_mode(): for b in batches: b = infer.move_batch_to_device(b, dev) pm = b["pred_mask"].bool() if cuda: torch.cuda.synchronize() t0 = time.time() logits, _ = model(b) probs = torch.sigmoid(logits.squeeze(-1)) if cuda: torch.cuda.synchronize() t_sum += time.time() - t0 for lid, p in zip(b["logid"][pm].cpu().tolist(), probs[pm].cpu().tolist()): logid2p[lid] = p # ----- 按 test.csv 顺序写 predict.txt 并打分 ----- order = [int(l.split(",")[0]) for l in open(test_csv) if l.strip()] pred_path = cur / "predict.txt" with open(pred_path, "w") as f: for lid in order: f.write(f"{logid2p[lid]}\n") res = infer._cal_score(pred_path, label_file, default_latency=t_sum) print( f"[BENCH] cfg={config_override} bs={batch_size}" f"{'' if max_batches is None else f' (first {max_batches} batches)'}" f" -> AUC={res['auc']:.5f} PCOC={res['pcoc']:.4f}" f" lat={res['latency']:.2f}s score={res['score_all']:.2f}" ) return res if __name__ == "__main__": run_once({}) # 默认配置基准