Document Sync by Tina

Chivier · Chivier · commit 13dc664385de · 2024-09-24T08:21:22.000Z
diff --git a/docs/stable/getting_started/docker_quickstart.md b/docs/stable/getting_started/docker_quickstart.md
@@ -95,7 +95,7 @@ Open a new terminal, activate the `sllm` environment, and set the `LLM_SERVER_UR
 
 ```bash
 conda activate sllm
-export LLM_SERVER_URL=http://localhost:8343/
+export LLM_SERVER_URL=http://127.0.0.1:8343/
 ```
 
 Deploy a model to the ServerlessLLM server using the `sllm-cli`:
@@ -117,7 +117,7 @@ INFO 08-01 07:39:00 deploy.py:49] Model registered successfully.
 
 Now, you can query the model by any OpenAI API client. For example, you can use the following Python code to query the model:
 ```bash
-curl http://localhost:8343/v1/chat/completions \
+curl http://127.0.0.1:8343/v1/chat/completions \
 -H "Content-Type: application/json" \
 -d '{
         "model": "facebook/opt-1.3b",
diff --git a/docs/stable/getting_started/installation.md b/docs/stable/getting_started/installation.md
@@ -24,13 +24,13 @@ cd ServerlessLLM
 conda create -n sllm python=3.10 -y
 conda activate sllm
 pip install -e .
-pip install -i https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ serverless_llm_store==0.0.1.dev4
+pip install -i https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ serverless_llm_store==0.0.1.dev5
 
 # worker node
 conda create -n sllm-worker python=3.10 -y
 conda activate sllm-worker
 pip install -e ".[worker]"
-pip install -i https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ serverless_llm_store==0.0.1.dev4
+pip install -i https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ serverless_llm_store==0.0.1.dev5
 ```
 
 # vLLM Patch
diff --git a/docs/stable/getting_started/quickstart.md b/docs/stable/getting_started/quickstart.md
@@ -19,7 +19,7 @@ ray start --head --port=6379 --num-cpus=4 --num-gpus=0 \
 In a new terminal, start the worker node:
 ```bash
 conda activate sllm-worker
-ray start --address=localhost:6379 --num-cpus=4 --num-gpus=2 \
+ray start --address=0.0.0.0:6379 --num-cpus=4 --num-gpus=2 \
 --resources='{"worker_node": 1, "worker_id_0": 1}' --block
 ```
 
@@ -67,7 +67,7 @@ sllm-cli deploy --model facebook/opt-1.3b
 
 Now, you can query the model by any OpenAI API client. For example, you can use the following Python code to query the model:
 ```bash
-curl http://localhost:8343/v1/chat/completions \
+curl http://127.0.0.1:8343/v1/chat/completions \
 -H "Content-Type: application/json" \
 -d '{
         "model": "facebook/opt-1.3b",
diff --git a/docs/stable/serve/storage_aware_scheduling.md b/docs/stable/serve/storage_aware_scheduling.md
@@ -136,7 +136,7 @@ echo '{
 
 ```bash
 conda activate sllm
-export LLM_SERVER_URL=http://localhost:8343/
+export LLM_SERVER_URL=http://127.0.0.1:8343/
 
 sllm-cli deploy --config config-opt-2.7b.json
 sllm-cli deploy --config config-opt-1.3b.json
@@ -145,7 +145,7 @@ sllm-cli deploy --config config-opt-1.3b.json
 3. Verify the deployment.
 
 ```bash
-curl http://localhost:8343/v1/chat/completions \
+curl http://127.0.0.1:8343/v1/chat/completions \
 -H "Content-Type: application/json" \
 -d '{
         "model": "opt-2.7b",
@@ -155,7 +155,7 @@ curl http://localhost:8343/v1/chat/completions \
         ]
     }'
 
-curl http://localhost:8343/v1/chat/completions \
+curl http://127.0.0.1:8343/v1/chat/completions \
 -H "Content-Type: application/json" \
 -d '{
         "model": "opt-1.3b",
diff --git a/docs/stable/store/quickstart.md b/docs/stable/store/quickstart.md
@@ -26,7 +26,7 @@ conda activate sllm-store
 
 ### Install with pip
 ```bash
-pip install -i https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ serverless_llm_store==0.0.1.dev4
+pip install -i https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ serverless_llm_store==0.0.1.dev5
 ```
 
 ### Install from source
@@ -69,7 +69,7 @@ save_model(model, './models/facebook/opt-1.3b')
 2. Launch the checkpoint store server in a separate process:
 ```bash
 # 'mem_pool_size' is the maximum size of the memory pool in GB. It should be larger than the model size.
-sllm-store-server --storage_path $PWD/models --mem_pool_size 32
+sllm-store-server --storage_path $PWD/models --mem_pool_size 4
 ```
 
 <!-- Running the server using a container:
@@ -224,7 +224,7 @@ After downloading the model, you can launch the checkpoint store server and load
 2. Launch the checkpoint store server in a separate process:
 ```bash
 # 'mem_pool_size' is the maximum size of the memory pool in GB. It should be larger than the model size.
-sllm-store-server --storage_path $PWD/models --mem_pool_size 32
+sllm-store-server --storage_path $PWD/models --mem_pool_size 4
 ```
 
 3. Load the model in vLLM: