From 7b429cf7fb88d0afdf69b6d101ae2e6264dc9433 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E8=88=AA=E5=AE=87?= <3364451258@qq.com> Date: Sat, 13 Jun 2026 14:37:38 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20torch.compile=20=E5=85=A8=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B=20+=20dynamic=3DTrue=EF=BC=88=E5=91=8A=E7=9F=A5?= =?UTF-8?q?=E7=BC=96=E8=AF=91=E5=99=A8=E5=BD=A2=E7=8A=B6=E5=8F=AF=E5=8F=98?= =?UTF-8?q?=EF=BC=8C=E9=81=BF=E5=85=8D=E9=87=8D=E7=BC=96=E8=AF=91=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 代码/code/infer.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/代码/code/infer.py b/代码/code/infer.py index 7436b70..77f5250 100644 --- a/代码/code/infer.py +++ b/代码/code/infer.py @@ -505,16 +505,12 @@ def load_model(ckpt_path, device='cuda:0'): model.to(dev) - # === torch.compile 融合 Expert FFN(fc1→relu→fc2),不含动态分支 === + # === torch.compile + dynamic=True:告知编译器形状可变,避免重编译 === try: - cc = 0 - for moe_layer in model.seq_encoder.moe: - for expert in moe_layer.experts: - expert.forward = torch.compile(expert.forward, mode="default") - cc += 1 - print(f"[INFO] torch.compile applied to {cc} Expert.forward methods") + model = torch.compile(model, dynamic=True) + print(f"[INFO] torch.compile applied (dynamic=True)") except Exception as e: - print(f"[WARNING] Expert torch.compile failed ({e}), using original forward") + print(f"[WARNING] torch.compile failed ({e}), using original model") model.eval() print(f"[INFO] Model ready. Device: {dev}")