revert: 移除 torch.compile(default 模式也因动态 batch 形状导致编译开销 > 收益)
保留 inference_mode + FP16 + Flash Attention(当前最优 56.98 分)
This commit is contained in:
@@ -511,11 +511,6 @@ def load_model(ckpt_path, device='cuda:0'):
|
||||
|
||||
model.to(dev)
|
||||
model.eval()
|
||||
|
||||
# === torch.compile(default):算子融合,不用 CUDA Graph,兼容动态 batch 形状 ===
|
||||
model = torch.compile(model, mode="default")
|
||||
print("[INFO] torch.compile applied (mode=default)")
|
||||
|
||||
print(f"[INFO] Model ready. Device: {dev}")
|
||||
|
||||
return model, dev
|
||||
|
||||
Reference in New Issue
Block a user