From a358dfd0a3f1f97cebf847ab79a03047ddd94e6d Mon Sep 17 00:00:00 2001 From: OwnerSunshine530 Date: Mon, 15 Jun 2026 14:21:45 +0800 Subject: [PATCH] =?UTF-8?q?perf:=20dedup=5Fembedding=20=E9=BB=98=E8=AE=A4?= =?UTF-8?q?=E5=BC=80=E5=90=AF=20=E2=80=94=20=E6=9C=AC=E5=9C=B07.80->6.49s(?= =?UTF-8?q?=E5=BF=AB17%),AUC=E9=80=90=E4=BD=8D=E4=B8=8D=E5=8F=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 --- 代码/code/infer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/代码/code/infer.py b/代码/code/infer.py index ac078e6..6bad1a1 100644 --- a/代码/code/infer.py +++ b/代码/code/infer.py @@ -52,7 +52,7 @@ CONFIG = { "fuse_embedding": True, # True=28个slot的查表+池化融合为1次(减per-batch kernel启动) "syncfree_mask": True, # True=用searchsorted构造因果mask(无同步);False=repeat_interleave(同步) "emb_fp16": True, # True=Embedding表转FP16(查表带宽减半,实测AUC 0.75932≈无损) - "dedup_embedding": False, # True=查表前对sign去重(只查唯一值再展开),减少大表随机访存。数学等价 + "dedup_embedding": True, # True=查表前对sign去重(只查唯一值再展开),本地7.80->6.49s,AUC逐位等价 "compile": False, # 是否 torch.compile(实测慢5×,勿开) }