You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
ifis_speculativeanduse_tp: # and ("cuda" in device):
310
+
ifis_speculativeanduse_tpand ("cuda"indevice):
307
311
torch._inductor.config.triton.cudagraph_trees=False# Bug with cudagraph trees in this case
308
312
309
313
ifis_speculative:
@@ -354,8 +358,15 @@ def callback(x):
354
358
if (i!=num_samples-1ornotprofile) or (use_tpandrank!=0):
355
359
prof=contextlib.nullcontext()
356
360
else:
357
-
torch.profiler._utils._init_for_cuda_graphs()
358
-
prof=torch.profiler.profile()
361
+
if"cuda"indevice:
362
+
torch.profiler._utils._init_for_cuda_graphs()
363
+
prof=torch.profiler.profile()
364
+
elif"xpu"indevice:
365
+
prof=torch.profiler.profile(
366
+
activities=[
367
+
torch.profiler.ProfilerActivity.CPU,
368
+
torch.profiler.ProfilerActivity.XPU],
369
+
)
359
370
withprof:
360
371
y, metrics=generate(
361
372
model,
@@ -419,6 +430,11 @@ def callback(x):
419
430
parser.add_argument('--device', type=str, default=default_device, help='Device to use')
420
431
421
432
args=parser.parse_args()
433
+
if"xpu"inargs.device:
434
+
try:
435
+
importintel_extension_for_pytorchasipex
436
+
except:
437
+
raiseModuleNotFoundError(f"Intel Extension for PyTorch (intel_extension_for_pytorch) is required to run PyTorch code on Intel GPU (XPU). Please check https://github.com/intel/intel-extension-for-pytorch for details.")
if (i!=num_samples-1ornotprofile) or (use_tpandrank!=0):
249
253
prof=contextlib.nullcontext()
250
254
else:
251
-
torch.profiler._utils._init_for_cuda_graphs()
252
-
prof=torch.profiler.profile()
255
+
if"cuda"indevice:
256
+
torch.profiler._utils._init_for_cuda_graphs()
257
+
prof=torch.profiler.profile()
258
+
elif"xpu"indevice:
259
+
prof=torch.profiler.profile(
260
+
activities=[
261
+
torch.profiler.ProfilerActivity.CPU,
262
+
torch.profiler.ProfilerActivity.XPU],
263
+
)
253
264
withprof:
254
265
y=generate(
255
266
model,
@@ -302,6 +313,11 @@ def callback(x):
302
313
parser.add_argument('--device', type=str, default="cuda", help='device to use')
303
314
304
315
args=parser.parse_args()
316
+
if"xpu"inargs.device:
317
+
try:
318
+
importintel_extension_for_pytorchasipex
319
+
except:
320
+
raiseModuleNotFoundError(f"Intel Extension for PyTorch (intel_extension_for_pytorch) is required to run PyTorch code on Intel GPU (XPU). Please check https://github.com/intel/intel-extension-for-pytorch for details.")
raiseModuleNotFoundError(f"OneCCL bindings for PyTorch (oneccl_bindings_for_pytorch) is required to run tensor parallel on Intel GPU (XPU). Please check https://github.com/intel/torch-ccl for details.")
Copy file name to clipboardExpand all lines: quantize.py
+5-1Lines changed: 5 additions & 1 deletion
Original file line number
Diff line number
Diff line change
@@ -539,7 +539,6 @@ def quantize(
539
539
device: str=default_device,
540
540
) ->None:
541
541
assertcheckpoint_path.is_file(), checkpoint_path
542
-
device='cpu'
543
542
precision=torch.bfloat16
544
543
545
544
print("Loading model ...")
@@ -621,4 +620,9 @@ def quantize(
621
620
parser.add_argument('--device', type=str, default=default_device, help='device to use')
622
621
623
622
args=parser.parse_args()
623
+
if"xpu"inargs.device:
624
+
try:
625
+
importintel_extension_for_pytorchasipex
626
+
except:
627
+
raiseModuleNotFoundError(f"Intel Extension for PyTorch (intel_extension_for_pytorch) is required to run PyTorch code on Intel GPU (XPU). Please check https://github.com/intel/intel-extension-for-pytorch for details.")
raiseModuleNotFoundError(f"OneCCL bindings for PyTorch (oneccl_bindings_for_pytorch) is required to run tensor parallel on Intel GPU (XPU). Please check https://github.com/intel/torch-ccl for details.")
0 commit comments