Skip to content

Commit 2c512d5

Browse files
committed
issue/170: quantize_gptq
1 parent 0c80339 commit 2c512d5

File tree

13 files changed

+3329
-0
lines changed

13 files changed

+3329
-0
lines changed

include/infinicore.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ typedef enum {
7070
INFINI_DTYPE_C64 = 17,
7171
INFINI_DTYPE_C128 = 18,
7272
INFINI_DTYPE_BF16 = 19,
73+
INFINI_DTYPE_I4 = 20,
7374
} infiniDtype_t;
7475

7576
#endif // __INFINICORE_API_H__

include/infiniop.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "infiniop/ops/max_pool.h"
1414
#include "infiniop/ops/mlp.h"
1515
#include "infiniop/ops/mul.h"
16+
#include "infiniop/ops/quantize_gptq.h"
1617
#include "infiniop/ops/random_sample.h"
1718
#include "infiniop/ops/rearrange.h"
1819
#include "infiniop/ops/relu.h"
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#ifndef __INFINIOP_QUANTIZE_GPTQ_API_H__
2+
#define __INFINIOP_QUANTIZE_GPTQ_API_H__
3+
4+
#include "../operator_descriptor.h"
5+
6+
typedef InfiniopDescriptor *infiniopQuantizeGPTQDescriptor_t;
7+
8+
__C __export infiniStatus_t infiniopCreateQuantizeGPTQDescriptor(infiniopHandle_t handle,
9+
infiniopQuantizeGPTQDescriptor_t *desc_ptr,
10+
infiniopTensorDescriptor_t c_desc,
11+
infiniopTensorDescriptor_t a_desc,
12+
infiniopTensorDescriptor_t packed_weights_desc,
13+
infiniopTensorDescriptor_t b_scale_desc,
14+
infiniopTensorDescriptor_t zero_desc);
15+
16+
__C __export infiniStatus_t infiniopGetQuantizeGPTQWorkspaceSize(infiniopQuantizeGPTQDescriptor_t desc, size_t *size);
17+
18+
__C __export infiniStatus_t infiniopQuantizeGPTQ(infiniopQuantizeGPTQDescriptor_t desc,
19+
void *workspace,
20+
size_t workspace_size,
21+
void *packed_weights,
22+
void *b_scale,
23+
void *zero,
24+
const void *a,
25+
const void *b,
26+
void *stream);
27+
28+
__C __export infiniStatus_t infiniopQuantizeLinearGPTQ(infiniopQuantizeGPTQDescriptor_t desc,
29+
void *workspace,
30+
size_t workspace_size,
31+
void *c,
32+
const void *a,
33+
void *packed_weights,
34+
void *b_scale,
35+
void *zero,
36+
void *stream);
37+
38+
__C __export infiniStatus_t infiniopDestroyQuantizeGPTQDescriptor(infiniopQuantizeGPTQDescriptor_t desc);
39+
40+
#endif

0 commit comments

Comments
 (0)