fix: torch.compile mode 改为 default(避免 CUDA Graph 因 N 变化重编译)
This commit is contained in:
+1
-1
@@ -510,7 +510,7 @@ def load_model(ckpt_path, device='cuda:0'):
|
|||||||
cc = 0
|
cc = 0
|
||||||
for moe_layer in model.seq_encoder.moe:
|
for moe_layer in model.seq_encoder.moe:
|
||||||
for expert in moe_layer.experts:
|
for expert in moe_layer.experts:
|
||||||
expert.forward = torch.compile(expert.forward, mode="reduce-overhead")
|
expert.forward = torch.compile(expert.forward, mode="default")
|
||||||
cc += 1
|
cc += 1
|
||||||
print(f"[INFO] torch.compile applied to {cc} Expert.forward methods")
|
print(f"[INFO] torch.compile applied to {cc} Expert.forward methods")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user