Skip to content

Commit c7f8aa6

Browse files
committed
issue/170: gptq
1 parent 1a4cfb9 commit c7f8aa6

File tree

9 files changed

+885
-0
lines changed

9 files changed

+885
-0
lines changed

include/infinicore.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ typedef enum {
7070
INFINI_DTYPE_C64 = 17,
7171
INFINI_DTYPE_C128 = 18,
7272
INFINI_DTYPE_BF16 = 19,
73+
INFINI_DTYPE_I4 = 20,
7374
} infiniDtype_t;
7475

7576
#endif // __INFINICORE_API_H__

include/infiniop.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "infiniop/ops/expand.h"
1111
#include "infiniop/ops/gemm.h"
1212
#include "infiniop/ops/global_avg_pool.h"
13+
#include "infiniop/ops/matmul_gptq.h"
1314
#include "infiniop/ops/max_pool.h"
1415
#include "infiniop/ops/mlp.h"
1516
#include "infiniop/ops/mul.h"

include/infiniop/ops/matmul_gptq.h

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#ifndef __INFINIOP_MATMUL_GPTQ_API_H__
2+
#define __INFINIOP_MATMUL_GPTQ_API_H__
3+
4+
#include "../operator_descriptor.h"
5+
6+
typedef InfiniopDescriptor *infiniopMatmulGptqDescriptor_t;
7+
8+
__C __export infiniStatus_t infiniopCreateMatmulGptqDescriptor(infiniopHandle_t handle,
9+
infiniopMatmulGptqDescriptor_t *desc_ptr,
10+
infiniopTensorDescriptor_t c_desc,
11+
infiniopTensorDescriptor_t a_desc,
12+
infiniopTensorDescriptor_t packed_weights_desc,
13+
infiniopTensorDescriptor_t b_scale_desc,
14+
infiniopTensorDescriptor_t zero_desc);
15+
16+
__C __export infiniStatus_t infiniopGetMatmulGptqWorkspaceSize(infiniopMatmulGptqDescriptor_t desc, size_t *size);
17+
18+
__C __export infiniStatus_t infiniopMatmulQuant(infiniopMatmulGptqDescriptor_t desc,
19+
void *workspace,
20+
size_t workspace_size,
21+
void *packed_weights,
22+
void *b_scale,
23+
void *zero,
24+
const void *a,
25+
const void *b,
26+
void *stream);
27+
28+
__C __export infiniStatus_t infiniopMatmulGptq(infiniopMatmulGptqDescriptor_t desc,
29+
void *workspace,
30+
size_t workspace_size,
31+
void *c,
32+
const void *a,
33+
void *packed_weights,
34+
void *b_scale,
35+
void *zero,
36+
void *stream);
37+
38+
__C __export infiniStatus_t infiniopDestroyMatmulGptqDescriptor(infiniopMatmulGptqDescriptor_t desc);
39+
40+
#endif

0 commit comments

Comments
 (0)