From 887a8cff86914a03de63006ae495757ced5686f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E8=88=AA=E5=AE=87?= <3364451258@qq.com> Date: Mon, 15 Jun 2026 17:33:54 +0800 Subject: [PATCH] =?UTF-8?q?chore:=20=E7=A7=BB=E9=99=A4=20emb=5Ffp16=20?= =?UTF-8?q?=E5=BC=80=E5=85=B3=EF=BC=8C=E6=9A=82=E4=B8=8D=E5=90=AF=E7=94=A8?= =?UTF-8?q?=20Embedding=20FP16?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude --- 代码/code/infer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/代码/code/infer.py b/代码/code/infer.py index 058a353..066652b 100644 --- a/代码/code/infer.py +++ b/代码/code/infer.py @@ -34,7 +34,6 @@ except Exception: # ============================================================ CONFIG = { "fp16": True, # True=半精度推理;False=FP32 参考跑(确立 AUC 天花板) - "emb_fp16": False, # True=Embedding 也 FP16(省 ~10GB 显存带宽,AUC 可能微降) "keep_fp32_modules": (), # fp16 下仍保留 FP32 的子模块名前缀,如 ("linear",) "expert_merge": True, # 是否做 expert 权重相似度合并 "merge_threshold": 0.90, # 合并的余弦相似度阈值