Skip to content

Commit 8948293

Browse files
committed
issue/170: success cpu gptq
1 parent 34e1134 commit 8948293

File tree

6 files changed

+462
-385
lines changed

6 files changed

+462
-385
lines changed

include/infiniop/ops/matmul_gptq.h

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,25 +5,34 @@
55

66
typedef InfiniopDescriptor *infiniopMatmulGptqDescriptor_t;
77

8-
__C __export infiniStatus_t infiniopCreateMatmulGptqDescriptor(
9-
infiniopHandle_t handle,
10-
infiniopMatmulGptqDescriptor_t *desc_ptr,
11-
infiniopTensorDescriptor_t c_desc,
12-
infiniopTensorDescriptor_t a_desc,
13-
infiniopTensorDescriptor_t b_desc,
14-
infiniopTensorDescriptor_t b_scale_desc,
15-
infiniopTensorDescriptor_t zero_desc);
8+
__C __export infiniStatus_t infiniopCreateMatmulGptqDescriptor(infiniopHandle_t handle,
9+
infiniopMatmulGptqDescriptor_t *desc_ptr,
10+
infiniopTensorDescriptor_t c_desc,
11+
infiniopTensorDescriptor_t a_desc,
12+
infiniopTensorDescriptor_t packed_weights_desc,
13+
infiniopTensorDescriptor_t b_scale_desc,
14+
infiniopTensorDescriptor_t zero_desc);
1615

1716
__C __export infiniStatus_t infiniopGetMatmulGptqWorkspaceSize(infiniopMatmulGptqDescriptor_t desc, size_t *size);
1817

18+
__C __export infiniStatus_t infiniopMatmulQuant(infiniopMatmulGptqDescriptor_t desc,
19+
void *workspace,
20+
size_t workspace_size,
21+
void *packed_weights,
22+
void *b_scale,
23+
void *zero,
24+
const void *a,
25+
const void *b,
26+
void *stream);
27+
1928
__C __export infiniStatus_t infiniopMatmulGptq(infiniopMatmulGptqDescriptor_t desc,
2029
void *workspace,
2130
size_t workspace_size,
2231
void *c,
2332
const void *a,
24-
const void *b,
25-
const void *b_scale,
26-
const void *zero,
33+
void *packed_weights,
34+
void *b_scale,
35+
void *zero,
2736
void *stream);
2837

2938
__C __export infiniStatus_t infiniopDestroyMatmulGptqDescriptor(infiniopMatmulGptqDescriptor_t desc);

0 commit comments

Comments
 (0)