File tree Expand file tree Collapse file tree 2 files changed +20
-15
lines changed Expand file tree Collapse file tree 2 files changed +20
-15
lines changed Original file line number Diff line number Diff line change @@ -2,7 +2,7 @@ apiVersion: apps/v1
22kind : Deployment
33metadata :
44 name : cse-aigc-model-worker-amx
5- namespace : amx
5+ namespace : aigc- amx
66spec :
77 replicas : 1
88 selector :
1616 containers :
1717 - name : cse-aigc-model-worker-amx
1818 image : gar-registry.caas.intel.com/cpio/cnagc-fastchat-k8s:latest
19+ resources :
20+ limits :
21+ cpu : " 32"
1922 ports :
2023 - containerPort : 8000
2124 name : http
@@ -25,31 +28,30 @@ spec:
2528 protocol : TCP
2629 env :
2730 - name : MODEL_NAME
28- value : " vicuna-7b-v1.3"
29- - name : ATEN_CPU_CAPABILITY
30- value : " amx"
31+ # value: "vicuna-7b-v1.3"
32+ value : " Llama-2-7b-chat-hf-sharded-bf16"
3133 - name : CPU_ISA
3234 value : " amx"
3335 - name : DEPLOY_TYPE
3436 value : " model"
3537 - name : FASTCHAT_ROOT
3638 value : " /fastchat"
3739 - name : MODEL_WORKER_SVC
38- value : cse-aigc-model-worker-amx.amx.svc.cluster.local
40+ value : cse-aigc-model-worker-amx.aigc- amx.svc.cluster.local
3941 - name : CONTROLLER_SVC
4042 value : cse-aigc-controller.default.svc.cluster.local
4143 - name : MODEL_WORKER_PORT
4244 value : " 21003"
4345 - name : OMP_NUM_THREADS
44- value : " 62 "
46+ value : " 32 "
4547 command : ["/entrypoint.sh"]
4648
4749---
4850apiVersion : v1
4951kind : Service
5052metadata :
5153 name : cse-aigc-model-worker-amx
52- namespace : amx
54+ namespace : aigc- amx
5355 labels :
5456 app : cse-aigc-model-worker-amx
5557spec :
Original file line number Diff line number Diff line change @@ -2,7 +2,7 @@ apiVersion: apps/v1
22kind : Deployment
33metadata :
44 name : cse-aigc-model-worker-non
5- namespace : non
5+ namespace : aigc- non
66spec :
77 replicas : 1
88 selector :
@@ -16,39 +16,42 @@ spec:
1616 containers :
1717 - name : cse-aigc-model-worker-non
1818 image : gar-registry.caas.intel.com/cpio/cnagc-fastchat-k8s:latest
19+ resources :
20+ limits :
21+ cpu : " 32"
1922 ports :
2023 - containerPort : 8000
2124 name : http
2225 protocol : TCP
23- - containerPort : 21002
26+ - containerPort : 21003
2427 name : model
2528 protocol : TCP
2629 env :
2730 - name : MODEL_NAME
28- value : " vicuna-7b-v1.3"
29- # value: "opt-1.3b-bf16-8b-samples "
30- - name : ATEN_CPU_CAPABILITY
31+ # value: "vicuna-7b-v1.3"
32+ value : " Llama-2-7b-chat-hf-sharded-bf16 "
33+ - name : CPU_ISA
3134 value : " avx2"
3235 - name : DEPLOY_TYPE
3336 value : " model"
3437 - name : FASTCHAT_ROOT
3538 value : " /fastchat"
3639 - name : MODEL_WORKER_SVC
37- value : cse-aigc-model-worker-non.non.svc.cluster.local
40+ value : cse-aigc-model-worker-non.aigc- non.svc.cluster.local
3841 - name : MODEL_WORKER_PORT
3942 value : " 21002"
4043 - name : CONTROLLER_SVC
4144 value : cse-aigc-controller.default.svc.cluster.local
4245 - name : OMP_NUM_THREADS
43- value : " 62 "
46+ value : " 32 "
4447 command : ["/entrypoint.sh"]
4548
4649---
4750apiVersion : v1
4851kind : Service
4952metadata :
5053 name : cse-aigc-model-worker-non
51- namespace : non
54+ namespace : aigc- non
5255 labels :
5356 app : cse-aigc-model-worker-non
5457spec :
You can’t perform that action at this time.
0 commit comments