Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/infinicore.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ typedef enum {
INFINI_DTYPE_C64 = 17,
INFINI_DTYPE_C128 = 18,
INFINI_DTYPE_BF16 = 19,

} infiniDtype_t;

#endif // __INFINICORE_API_H__
1 change: 1 addition & 0 deletions include/infiniop.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "infiniop/ops/conv.h"
#include "infiniop/ops/gemm.h"
#include "infiniop/ops/mul.h"
#include "infiniop/ops/quantize_gptq.h"
#include "infiniop/ops/random_sample.h"
#include "infiniop/ops/rearrange.h"
#include "infiniop/ops/relu.h"
Expand Down
40 changes: 40 additions & 0 deletions include/infiniop/ops/quantize_gptq.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#ifndef __INFINIOP_QUANTIZE_GPTQ_API_H__
#define __INFINIOP_QUANTIZE_GPTQ_API_H__

#include "../operator_descriptor.h"

typedef InfiniopDescriptor *infiniopQuantizeGPTQDescriptor_t;

__C __export infiniStatus_t infiniopCreateQuantizeGPTQDescriptor(infiniopHandle_t handle,
infiniopQuantizeGPTQDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t c_desc,
infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t packed_weights_desc,
infiniopTensorDescriptor_t b_scale_desc,
infiniopTensorDescriptor_t zero_desc);

__C __export infiniStatus_t infiniopGetQuantizeGPTQWorkspaceSize(infiniopQuantizeGPTQDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopQuantizeGPTQ(infiniopQuantizeGPTQDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *packed_weights,
void *b_scale,
void *zero,
const void *a,
const void *b,
void *stream);

__C __export infiniStatus_t infiniopQuantizeLinearGPTQ(infiniopQuantizeGPTQDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *c,
const void *a,
void *packed_weights,
void *b_scale,
void *zero,
void *stream);

__C __export infiniStatus_t infiniopDestroyQuantizeGPTQDescriptor(infiniopQuantizeGPTQDescriptor_t desc);

#endif
Loading
Loading