Skip to content

Commit 728696d

Browse files
author
Le Yao
committed
Update the deployment to get better performance
Signed-off-by: Le Yao <le.yao@intel.com>
1 parent fb5a89e commit 728696d

File tree

2 files changed

+20
-15
lines changed

2 files changed

+20
-15
lines changed

deployment/cse-aigc-worker-amx.yaml

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ apiVersion: apps/v1
22
kind: Deployment
33
metadata:
44
name: cse-aigc-model-worker-amx
5-
namespace: amx
5+
namespace: aigc-amx
66
spec:
77
replicas: 1
88
selector:
@@ -16,6 +16,9 @@ spec:
1616
containers:
1717
- name: cse-aigc-model-worker-amx
1818
image: gar-registry.caas.intel.com/cpio/cnagc-fastchat-k8s:latest
19+
resources:
20+
limits:
21+
cpu: "32"
1922
ports:
2023
- containerPort: 8000
2124
name: http
@@ -25,31 +28,30 @@ spec:
2528
protocol: TCP
2629
env:
2730
- name: MODEL_NAME
28-
value: "vicuna-7b-v1.3"
29-
- name: ATEN_CPU_CAPABILITY
30-
value: "amx"
31+
#value: "vicuna-7b-v1.3"
32+
value: "Llama-2-7b-chat-hf-sharded-bf16"
3133
- name: CPU_ISA
3234
value: "amx"
3335
- name: DEPLOY_TYPE
3436
value: "model"
3537
- name: FASTCHAT_ROOT
3638
value: "/fastchat"
3739
- name: MODEL_WORKER_SVC
38-
value: cse-aigc-model-worker-amx.amx.svc.cluster.local
40+
value: cse-aigc-model-worker-amx.aigc-amx.svc.cluster.local
3941
- name: CONTROLLER_SVC
4042
value: cse-aigc-controller.default.svc.cluster.local
4143
- name: MODEL_WORKER_PORT
4244
value: "21003"
4345
- name: OMP_NUM_THREADS
44-
value: "62"
46+
value: "32"
4547
command: ["/entrypoint.sh"]
4648

4749
---
4850
apiVersion: v1
4951
kind: Service
5052
metadata:
5153
name: cse-aigc-model-worker-amx
52-
namespace: amx
54+
namespace: aigc-amx
5355
labels:
5456
app: cse-aigc-model-worker-amx
5557
spec:

deployment/cse-aigc-worker-non.yaml

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ apiVersion: apps/v1
22
kind: Deployment
33
metadata:
44
name: cse-aigc-model-worker-non
5-
namespace: non
5+
namespace: aigc-non
66
spec:
77
replicas: 1
88
selector:
@@ -16,39 +16,42 @@ spec:
1616
containers:
1717
- name: cse-aigc-model-worker-non
1818
image: gar-registry.caas.intel.com/cpio/cnagc-fastchat-k8s:latest
19+
resources:
20+
limits:
21+
cpu: "32"
1922
ports:
2023
- containerPort: 8000
2124
name: http
2225
protocol: TCP
23-
- containerPort: 21002
26+
- containerPort: 21003
2427
name: model
2528
protocol: TCP
2629
env:
2730
- name: MODEL_NAME
28-
value: "vicuna-7b-v1.3"
29-
#value: "opt-1.3b-bf16-8b-samples"
30-
- name: ATEN_CPU_CAPABILITY
31+
#value: "vicuna-7b-v1.3"
32+
value: "Llama-2-7b-chat-hf-sharded-bf16"
33+
- name: CPU_ISA
3134
value: "avx2"
3235
- name: DEPLOY_TYPE
3336
value: "model"
3437
- name: FASTCHAT_ROOT
3538
value: "/fastchat"
3639
- name: MODEL_WORKER_SVC
37-
value: cse-aigc-model-worker-non.non.svc.cluster.local
40+
value: cse-aigc-model-worker-non.aigc-non.svc.cluster.local
3841
- name: MODEL_WORKER_PORT
3942
value: "21002"
4043
- name: CONTROLLER_SVC
4144
value: cse-aigc-controller.default.svc.cluster.local
4245
- name: OMP_NUM_THREADS
43-
value: "62"
46+
value: "32"
4447
command: ["/entrypoint.sh"]
4548

4649
---
4750
apiVersion: v1
4851
kind: Service
4952
metadata:
5053
name: cse-aigc-model-worker-non
51-
namespace: non
54+
namespace: aigc-non
5255
labels:
5356
app: cse-aigc-model-worker-non
5457
spec:

0 commit comments

Comments
 (0)