Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 16 additions & 15 deletions src/llmcompressor/modifiers/awq/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import inspect
from typing import Dict, List, Optional, Tuple, Union
import warnings

import torch
from compressed_tensors.quantization import (
Expand Down Expand Up @@ -183,25 +184,25 @@ def validate_model_after(model: "AWQModifier") -> "AWQModifier":

model._group_size = next(iter(group_size_set))

in_num_bits_set = set(
num_bits_set = set(
group.input_activations.num_bits
for group in config.config_groups.values()
if group.input_activations is not None
).union(
set(
group.output_activations.num_bits
for group in config.config_groups.values()
if group.output_activations is not None
)
)
assert len(in_num_bits_set) == 0 or in_num_bits_set == {16}, (
"AWQ activations must be 16-bit precision, "
f"input activations {in_num_bits_set} not allowed"
)

out_num_bits_set = set(
group.output_activations.num_bits
for group in config.config_groups.values()
if group.output_activations is not None
)
assert len(out_num_bits_set) == 0 or out_num_bits_set == {16}, (
"AWQ activations must be 16-bit precision, "
f"output activations {out_num_bits_set} not allowed"
)
if not (len(num_bits_set) == 0 or num_bits_set == {16}):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if not (len(num_bits_set) == 0 or num_bits_set == {16}):
if (len(num_bits_set) != 0 and num_bits_set != {16}):

warnings.warn(
"A strategy including activation quantization was detected. "
"AWQ was originally intended for weight-only quantization. "
"Lower-precision activations are an experimental feautre, and "
"overall performance may be poor. If it is, consider using "
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@HDCharles I think you reported that AWQ activation quantization is supposed to perform decently?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've definitely seen people do it but I think I was echoing the same sentiment as here, the paper is entirely weight only quantization oriented which is odd for an equalization technique.

"`W4A16` or `W4A16_ASYM` quantization schemes instead."
)

return model

Expand Down
Loading