From 5488ad02fd8efb481252e2b42c1389a09f673a9e Mon Sep 17 00:00:00 2001 From: OwnerSunshine530 Date: Sat, 20 Jun 2026 15:34:48 +0800 Subject: [PATCH] =?UTF-8?q?revert:=20collate=5Fdedup=E9=BB=98=E8=AE=A4?= =?UTF-8?q?=E5=85=B3(=E8=AF=84=E6=B5=8B33.44>33.00,per=5Fsample=5Fweights?= =?UTF-8?q?=E5=8A=A0=E6=9D=83kernel=E6=9B=B4=E6=85=A2+=E8=AF=84=E6=B5=8B?= =?UTF-8?q?=E9=87=8D=E5=A4=8D=E7=8E=87=E4=B8=8D=E5=A4=9F)=E3=80=82?= =?UTF-8?q?=E9=94=81=E5=AE=9A71.34?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 代码/code/infer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/代码/code/infer.py b/代码/code/infer.py index 74a7050..160a238 100644 --- a/代码/code/infer.py +++ b/代码/code/infer.py @@ -168,7 +168,8 @@ CONFIG = { "syncfree_mask": True, # True=用searchsorted构造因果mask(无同步);False=repeat_interleave(同步) "emb_fp16": True, # True=Embedding表转FP16(查表带宽减半,实测AUC 0.75932≈无损) "use_embedding_bag": True, # F.embedding_bag 融合查表+池化(单kernel,消dedup的unique同步,AUC≈无损) - "collate_dedup": True, # collate(不计时)段内去重+计数→embedding_bag per_sample_weights,减查表带宽(本地4.10→3.98,AUC精确不变) + # 评测净负33.44>33.00:per_sample_weights走更慢的加权kernel+评测重复率不够,盖过带宽节省。退回。 + "collate_dedup": False, # True=collate段内去重+计数(本地快评测慢,勿开) "dedup_embedding": True, # True=查表前对sign去重(只查唯一值再展开),本地7.80->6.49s,AUC逐位等价 "sparse_pool": False, # True=用(段×唯一)稀疏矩阵乘做池化,避免materialize整个[M,512](段内高重复时省) "compile": False, # 是否 torch.compile(实测慢5×,勿开)