You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
ifis_speculativeanduse_tp: # and ("cuda" in device):
309
+
ifis_speculativeanduse_tpand ("cuda"indevice):
307
310
torch._inductor.config.triton.cudagraph_trees=False# Bug with cudagraph trees in this case
308
311
309
312
ifis_speculative:
@@ -354,8 +357,15 @@ def callback(x):
354
357
if (i!=num_samples-1ornotprofile) or (use_tpandrank!=0):
355
358
prof=contextlib.nullcontext()
356
359
else:
357
-
torch.profiler._utils._init_for_cuda_graphs()
358
-
prof=torch.profiler.profile()
360
+
if"cuda"indevice:
361
+
torch.profiler._utils._init_for_cuda_graphs()
362
+
prof=torch.profiler.profile()
363
+
elif"xpu"indevice:
364
+
prof=torch.profiler.profile(
365
+
activities=[
366
+
torch.profiler.ProfilerActivity.CPU,
367
+
torch.profiler.ProfilerActivity.XPU],
368
+
)
359
369
withprof:
360
370
y, metrics=generate(
361
371
model,
@@ -419,6 +429,11 @@ def callback(x):
419
429
parser.add_argument('--device', type=str, default=default_device, help='Device to use')
420
430
421
431
args=parser.parse_args()
432
+
if"xpu"inargs.device:
433
+
try:
434
+
importintel_extension_for_pytorchasipex
435
+
except:
436
+
raiseModuleNotFoundError(f"Intel Extension for PyTorch (intel_extension_for_pytorch) is required to run PyTorch code on Intel GPU (XPU). Please check https://github.com/intel/intel-extension-for-pytorch for details.")
if (i!=num_samples-1ornotprofile) or (use_tpandrank!=0):
249
253
prof=contextlib.nullcontext()
250
254
else:
251
-
torch.profiler._utils._init_for_cuda_graphs()
252
-
prof=torch.profiler.profile()
255
+
if"cuda"indevice:
256
+
torch.profiler._utils._init_for_cuda_graphs()
257
+
prof=torch.profiler.profile()
258
+
elif"xpu"indevice:
259
+
prof=torch.profiler.profile(
260
+
activities=[
261
+
torch.profiler.ProfilerActivity.CPU,
262
+
torch.profiler.ProfilerActivity.XPU],
263
+
)
253
264
withprof:
254
265
y=generate(
255
266
model,
@@ -302,6 +313,11 @@ def callback(x):
302
313
parser.add_argument('--device', type=str, default="cuda", help='device to use')
303
314
304
315
args=parser.parse_args()
316
+
if"xpu"inargs.device:
317
+
try:
318
+
importintel_extension_for_pytorchasipex
319
+
except:
320
+
raiseModuleNotFoundError(f"Intel Extension for PyTorch (intel_extension_for_pytorch) is required to run PyTorch code on Intel GPU (XPU). Please check https://github.com/intel/intel-extension-for-pytorch for details.")
raiseModuleNotFoundError(f"OneCCL bindings for PyTorch (oneccl_bindings_for_pytorch) is required to run tensor parallel on Intel GPU (XPU). Please check https://github.com/intel/torch-ccl for details.")
Copy file name to clipboardExpand all lines: quantize.py
+5-1Lines changed: 5 additions & 1 deletion
Original file line number
Diff line number
Diff line change
@@ -539,7 +539,6 @@ def quantize(
539
539
device: str=default_device,
540
540
) ->None:
541
541
assertcheckpoint_path.is_file(), checkpoint_path
542
-
device='cpu'
543
542
precision=torch.bfloat16
544
543
545
544
print("Loading model ...")
@@ -621,4 +620,9 @@ def quantize(
621
620
parser.add_argument('--device', type=str, default=default_device, help='device to use')
622
621
623
622
args=parser.parse_args()
623
+
if"xpu"inargs.device:
624
+
try:
625
+
importintel_extension_for_pytorchasipex
626
+
except:
627
+
raiseModuleNotFoundError(f"Intel Extension for PyTorch (intel_extension_for_pytorch) is required to run PyTorch code on Intel GPU (XPU). Please check https://github.com/intel/intel-extension-for-pytorch for details.")
raiseModuleNotFoundError(f"OneCCL bindings for PyTorch (oneccl_bindings_for_pytorch) is required to run tensor parallel on Intel GPU (XPU). Please check https://github.com/intel/torch-ccl for details.")
0 commit comments