[Inference Providers] Snippets: prefer the namespace/model:provider syntax for conversational (#1830)

SBrandeis · web-flow · commit 220051e2e88a · 2025-11-04T11:41:57.000+01:00
# TL;DR

For conversational task, when using the Inference Clients libraries,
prefer the `namespace/model:provider` syntax over explicitly specifying
the provider in the parameters

Also don't specify `provider="auto"` because it's the default
diff --git a/packages/inference/src/snippets/getInferenceSnippets.ts b/packages/inference/src/snippets/getInferenceSnippets.ts
@@ -60,6 +60,8 @@ interface TemplateParams {
 	importBase64?: boolean; // specific to snippetImportRequests
 	importJson?: boolean; // specific to snippetImportRequests
 	endpointUrl?: string;
+	task?: InferenceTask;
+	directRequest?: boolean;
 }
 
 // Helpers to find + load templates
@@ -263,6 +265,8 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
 					: providerModelId ?? model.id,
 			billTo: opts?.billTo,
 			endpointUrl: opts?.endpointUrl,
+			task,
+			directRequest: !!opts?.directRequest,
 		};
 
 		/// Iterate over clients => check if a snippet exists => generate
diff --git a/packages/inference/src/snippets/templates/js/huggingface.js/conversational.jinja b/packages/inference/src/snippets/templates/js/huggingface.js/conversational.jinja
@@ -6,8 +6,12 @@ const chatCompletion = await client.chatCompletion({
 {% if endpointUrl %}
     endpointUrl: "{{ endpointUrl }}",
 {% endif %}
+{% if directRequest %}
     provider: "{{ provider }}",
     model: "{{ model.id }}",
+{% else %}
+    model: "{{ providerModelId }}",
+{% endif %}
 {{ inputs.asTsString }}
 }{% if billTo %}, {
     billTo: "{{ billTo }}",
diff --git a/packages/inference/src/snippets/templates/js/huggingface.js/conversationalStream.jinja b/packages/inference/src/snippets/templates/js/huggingface.js/conversationalStream.jinja
@@ -8,8 +8,7 @@ const stream = client.chatCompletionStream({
 {% if endpointUrl %}
     endpointUrl: "{{ endpointUrl }}",
 {% endif %}
-    provider: "{{ provider }}",
-    model: "{{ model.id }}",
+    model: "{{ providerModelId }}",
 {{ inputs.asTsString }}
 }{% if billTo %}, {
     billTo: "{{ billTo }}",
diff --git a/packages/inference/src/snippets/templates/python/huggingface_hub/conversational.jinja b/packages/inference/src/snippets/templates/python/huggingface_hub/conversational.jinja
@@ -1,5 +1,9 @@
 completion = client.chat.completions.create(
+{% if directRequest %}
     model="{{ model.id }}",
+{% else %}
+    model="{{ providerModelId }}",
+{% endif %}
 {{ inputs.asPythonString }}
 )
 
diff --git a/packages/inference/src/snippets/templates/python/huggingface_hub/conversationalStream.jinja b/packages/inference/src/snippets/templates/python/huggingface_hub/conversationalStream.jinja
@@ -1,5 +1,5 @@
 stream = client.chat.completions.create(
-    model="{{ model.id }}",
+    model="{{ providerModelId }}",
 {{ inputs.asPythonString }}
     stream=True,
 )
diff --git a/packages/inference/src/snippets/templates/python/huggingface_hub/importInferenceClient.jinja b/packages/inference/src/snippets/templates/python/huggingface_hub/importInferenceClient.jinja
@@ -4,7 +4,9 @@ client = InferenceClient(
 {% if endpointUrl %}
     base_url="{{ baseUrl }}",
 {% endif %}
+{% if task != "conversational" or directRequest %}
     provider="{{ provider }}",
+{% endif %}
     api_key="{{ accessToken }}",
 {% if billTo %}
     bill_to="{{ billTo }}",
diff --git a/packages/tasks-gen/snippets-fixtures/bill-to-param/js/huggingface.js/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/bill-to-param/js/huggingface.js/0.hf-inference.js
@@ -3,8 +3,7 @@ import { InferenceClient } from "@huggingface/inference";
 const client = new InferenceClient(process.env.HF_TOKEN);
 
 const chatCompletion = await client.chatCompletion({
-    provider: "hf-inference",
-    model: "meta-llama/Llama-3.1-8B-Instruct",
+    model: "meta-llama/Llama-3.1-8B-Instruct:hf-inference",
     messages: [
         {
             role: "user",
diff --git a/packages/tasks-gen/snippets-fixtures/bill-to-param/python/huggingface_hub/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/bill-to-param/python/huggingface_hub/0.hf-inference.py
@@ -2,13 +2,12 @@
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-    provider="hf-inference",
     api_key=os.environ["HF_TOKEN"],
     bill_to="huggingface",
 )
 
 completion = client.chat.completions.create(
-    model="meta-llama/Llama-3.1-8B-Instruct",
+    model="meta-llama/Llama-3.1-8B-Instruct:hf-inference",
     messages=[
         {
             "role": "user",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-custom-endpoint/js/huggingface.js/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-custom-endpoint/js/huggingface.js/0.hf-inference.js
@@ -4,7 +4,6 @@ const client = new InferenceClient(process.env.API_TOKEN);
 
 const chatCompletion = await client.chatCompletion({
     endpointUrl: "http://localhost:8080/v1",
-    provider: "hf-inference",
     model: "meta-llama/Llama-3.1-8B-Instruct",
     messages: [
         {
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-custom-endpoint/python/huggingface_hub/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-custom-endpoint/python/huggingface_hub/0.hf-inference.py
@@ -3,7 +3,6 @@
 
 client = InferenceClient(
     base_url="http://localhost:8080/v1",
-    provider="hf-inference",
     api_key=os.environ["API_TOKEN"],
 )
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.auto.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.auto.js
@@ -3,7 +3,6 @@ import { InferenceClient } from "@huggingface/inference";
 const client = new InferenceClient(process.env.HF_TOKEN);
 
 const chatCompletion = await client.chatCompletion({
-    provider: "auto",
     model: "meta-llama/Llama-3.1-8B-Instruct",
     messages: [
         {
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.hf-inference.js
@@ -3,8 +3,7 @@ import { InferenceClient } from "@huggingface/inference";
 const client = new InferenceClient(process.env.HF_TOKEN);
 
 const chatCompletion = await client.chatCompletion({
-    provider: "hf-inference",
-    model: "meta-llama/Llama-3.1-8B-Instruct",
+    model: "meta-llama/Llama-3.1-8B-Instruct:hf-inference",
     messages: [
         {
             role: "user",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.together.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.together.js
@@ -3,8 +3,7 @@ import { InferenceClient } from "@huggingface/inference";
 const client = new InferenceClient(process.env.HF_TOKEN);
 
 const chatCompletion = await client.chatCompletion({
-    provider: "together",
-    model: "meta-llama/Llama-3.1-8B-Instruct",
+    model: "meta-llama/Llama-3.1-8B-Instruct:together",
     messages: [
         {
             role: "user",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/huggingface_hub/0.auto.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/huggingface_hub/0.auto.py
@@ -2,7 +2,6 @@
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-    provider="auto",
     api_key=os.environ["HF_TOKEN"],
 )
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/huggingface_hub/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/huggingface_hub/0.hf-inference.py
@@ -2,12 +2,11 @@
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-    provider="hf-inference",
     api_key=os.environ["HF_TOKEN"],
 )
 
 completion = client.chat.completions.create(
-    model="meta-llama/Llama-3.1-8B-Instruct",
+    model="meta-llama/Llama-3.1-8B-Instruct:hf-inference",
     messages=[
         {
             "role": "user",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/huggingface_hub/0.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/huggingface_hub/0.together.py
@@ -2,12 +2,11 @@
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-    provider="together",
     api_key=os.environ["HF_TOKEN"],
 )
 
 completion = client.chat.completions.create(
-    model="meta-llama/Llama-3.1-8B-Instruct",
+    model="meta-llama/Llama-3.1-8B-Instruct:together",
     messages=[
         {
             "role": "user",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/huggingface.js/0.auto.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/huggingface.js/0.auto.js
@@ -5,7 +5,6 @@ const client = new InferenceClient(process.env.HF_TOKEN);
 let out = "";
 
 const stream = client.chatCompletionStream({
-    provider: "auto",
     model: "meta-llama/Llama-3.1-8B-Instruct",
     messages: [
         {
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/huggingface.js/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/huggingface.js/0.hf-inference.js
@@ -5,8 +5,7 @@ const client = new InferenceClient(process.env.HF_TOKEN);
 let out = "";
 
 const stream = client.chatCompletionStream({
-    provider: "hf-inference",
-    model: "meta-llama/Llama-3.1-8B-Instruct",
+    model: "meta-llama/Llama-3.1-8B-Instruct:hf-inference",
     messages: [
         {
             role: "user",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/huggingface.js/0.together.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/huggingface.js/0.together.js
@@ -5,8 +5,7 @@ const client = new InferenceClient(process.env.HF_TOKEN);
 let out = "";
 
 const stream = client.chatCompletionStream({
-    provider: "together",
-    model: "meta-llama/Llama-3.1-8B-Instruct",
+    model: "meta-llama/Llama-3.1-8B-Instruct:together",
     messages: [
         {
             role: "user",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/huggingface_hub/0.auto.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/huggingface_hub/0.auto.py
@@ -2,7 +2,6 @@
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-    provider="auto",
     api_key=os.environ["HF_TOKEN"],
 )
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/huggingface_hub/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/huggingface_hub/0.hf-inference.py
@@ -2,12 +2,11 @@
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-    provider="hf-inference",
     api_key=os.environ["HF_TOKEN"],
 )
 
 stream = client.chat.completions.create(
-    model="meta-llama/Llama-3.1-8B-Instruct",
+    model="meta-llama/Llama-3.1-8B-Instruct:hf-inference",
     messages=[
         {
             "role": "user",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/huggingface_hub/0.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/huggingface_hub/0.together.py
@@ -2,12 +2,11 @@
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-    provider="together",
     api_key=os.environ["HF_TOKEN"],
 )
 
 stream = client.chat.completions.create(
-    model="meta-llama/Llama-3.1-8B-Instruct",
+    model="meta-llama/Llama-3.1-8B-Instruct:together",
     messages=[
         {
             "role": "user",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/huggingface.js/0.auto.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/huggingface.js/0.auto.js
@@ -3,7 +3,6 @@ import { InferenceClient } from "@huggingface/inference";
 const client = new InferenceClient(process.env.HF_TOKEN);
 
 const chatCompletion = await client.chatCompletion({
-    provider: "auto",
     model: "meta-llama/Llama-3.2-11B-Vision-Instruct",
     messages: [
         {
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/huggingface.js/0.fireworks-ai.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/huggingface.js/0.fireworks-ai.js
@@ -3,8 +3,7 @@ import { InferenceClient } from "@huggingface/inference";
 const client = new InferenceClient(process.env.HF_TOKEN);
 
 const chatCompletion = await client.chatCompletion({
-    provider: "fireworks-ai",
-    model: "meta-llama/Llama-3.2-11B-Vision-Instruct",
+    model: "meta-llama/Llama-3.2-11B-Vision-Instruct:fireworks-ai",
     messages: [
         {
             role: "user",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/huggingface.js/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/huggingface.js/0.hf-inference.js
@@ -3,8 +3,7 @@ import { InferenceClient } from "@huggingface/inference";
 const client = new InferenceClient(process.env.HF_TOKEN);
 
 const chatCompletion = await client.chatCompletion({
-    provider: "hf-inference",
-    model: "meta-llama/Llama-3.2-11B-Vision-Instruct",
+    model: "meta-llama/Llama-3.2-11B-Vision-Instruct:hf-inference",
     messages: [
         {
             role: "user",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/huggingface_hub/0.auto.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/huggingface_hub/0.auto.py
@@ -2,7 +2,6 @@
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-    provider="auto",
     api_key=os.environ["HF_TOKEN"],
 )
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/huggingface_hub/0.fireworks-ai.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/huggingface_hub/0.fireworks-ai.py
@@ -2,12 +2,11 @@
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-    provider="fireworks-ai",
     api_key=os.environ["HF_TOKEN"],
 )
 
 completion = client.chat.completions.create(
-    model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+    model="meta-llama/Llama-3.2-11B-Vision-Instruct:fireworks-ai",
     messages=[
         {
             "role": "user",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/huggingface_hub/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/huggingface_hub/0.hf-inference.py
@@ -2,12 +2,11 @@
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-    provider="hf-inference",
     api_key=os.environ["HF_TOKEN"],
 )
 
 completion = client.chat.completions.create(
-    model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+    model="meta-llama/Llama-3.2-11B-Vision-Instruct:hf-inference",
     messages=[
         {
             "role": "user",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/huggingface.js/0.auto.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/huggingface.js/0.auto.js
@@ -5,7 +5,6 @@ const client = new InferenceClient(process.env.HF_TOKEN);
 let out = "";
 
 const stream = client.chatCompletionStream({
-    provider: "auto",
     model: "meta-llama/Llama-3.2-11B-Vision-Instruct",
     messages: [
         {
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/huggingface.js/0.fireworks-ai.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/huggingface.js/0.fireworks-ai.js
@@ -5,8 +5,7 @@ const client = new InferenceClient(process.env.HF_TOKEN);
 let out = "";
 
 const stream = client.chatCompletionStream({
-    provider: "fireworks-ai",
-    model: "meta-llama/Llama-3.2-11B-Vision-Instruct",
+    model: "meta-llama/Llama-3.2-11B-Vision-Instruct:fireworks-ai",
     messages: [
         {
             role: "user",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/huggingface.js/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/huggingface.js/0.hf-inference.js
@@ -5,8 +5,7 @@ const client = new InferenceClient(process.env.HF_TOKEN);
 let out = "";
 
 const stream = client.chatCompletionStream({
-    provider: "hf-inference",
-    model: "meta-llama/Llama-3.2-11B-Vision-Instruct",
+    model: "meta-llama/Llama-3.2-11B-Vision-Instruct:hf-inference",
     messages: [
         {
             role: "user",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/huggingface_hub/0.auto.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/huggingface_hub/0.auto.py
@@ -2,7 +2,6 @@
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-    provider="auto",
     api_key=os.environ["HF_TOKEN"],
 )
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/huggingface_hub/0.fireworks-ai.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/huggingface_hub/0.fireworks-ai.py
@@ -2,12 +2,11 @@
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-    provider="fireworks-ai",
     api_key=os.environ["HF_TOKEN"],
 )
 
 stream = client.chat.completions.create(
-    model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+    model="meta-llama/Llama-3.2-11B-Vision-Instruct:fireworks-ai",
     messages=[
         {
             "role": "user",
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/huggingface_hub/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/huggingface_hub/0.hf-inference.py
@@ -2,12 +2,11 @@
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-    provider="hf-inference",
     api_key=os.environ["HF_TOKEN"],
 )
 
 stream = client.chat.completions.create(
-    model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+    model="meta-llama/Llama-3.2-11B-Vision-Instruct:hf-inference",
     messages=[
         {
             "role": "user",
diff --git a/packages/tasks-gen/snippets-fixtures/with-access-token/js/huggingface.js/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/with-access-token/js/huggingface.js/0.hf-inference.js
@@ -3,8 +3,7 @@ import { InferenceClient } from "@huggingface/inference";
 const client = new InferenceClient("hf_xxx");
 
 const chatCompletion = await client.chatCompletion({
-    provider: "hf-inference",
-    model: "meta-llama/Llama-3.1-8B-Instruct",
+    model: "meta-llama/Llama-3.1-8B-Instruct:hf-inference",
     messages: [
         {
             role: "user",
diff --git a/packages/tasks-gen/snippets-fixtures/with-access-token/python/huggingface_hub/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/with-access-token/python/huggingface_hub/0.hf-inference.py
@@ -1,12 +1,11 @@
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-    provider="hf-inference",
     api_key="hf_xxx",
 )
 
 completion = client.chat.completions.create(
-    model="meta-llama/Llama-3.1-8B-Instruct",
+    model="meta-llama/Llama-3.1-8B-Instruct:hf-inference",
     messages=[
         {
             "role": "user",

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,9 @@`
`1`	`1`	`completion = client.chat.completions.create(`
	`2`	`+{% if directRequest %}`
`2`	`3`	`model="{{ model.id }}",`
	`4`	`+{% else %}`
	`5`	`+ model="{{ providerModelId }}",`
	`6`	`+{% endif %}`
`3`	`7`	`{{ inputs.asPythonString }}`
`4`	`8`	`)`
`5`	`9`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`stream = client.chat.completions.create(`
`2`		`- model="{{ model.id }}",`
	`2`	`+ model="{{ providerModelId }}",`
`3`	`3`	`{{ inputs.asPythonString }}`
`4`	`4`	`stream=True,`
`5`	`5`	`)`
Original file line number	Diff line number	Diff line change
`@@ -3,8 +3,7 @@ import { InferenceClient } from "@huggingface/inference";`
`3`	`3`	`const client = new InferenceClient(process.env.HF_TOKEN);`
`4`	`4`
`5`	`5`	`const chatCompletion = await client.chatCompletion({`
`6`		`- provider: "hf-inference",`
`7`		`- model: "meta-llama/Llama-3.1-8B-Instruct",`
	`6`	`+ model: "meta-llama/Llama-3.1-8B-Instruct:hf-inference",`
`8`	`7`	`messages: [`
`9`	`8`	`{`
`10`	`9`	`role: "user",`
Original file line number	Diff line number	Diff line change
`@@ -2,13 +2,12 @@`
`2`	`2`	`from huggingface_hub import InferenceClient`
`3`	`3`
`4`	`4`	`client = InferenceClient(`
`5`		`- provider="hf-inference",`
`6`	`5`	`api_key=os.environ["HF_TOKEN"],`
`7`	`6`	`bill_to="huggingface",`
`8`	`7`	`)`
`9`	`8`
`10`	`9`	`completion = client.chat.completions.create(`
`11`		`- model="meta-llama/Llama-3.1-8B-Instruct",`
	`10`	`+ model="meta-llama/Llama-3.1-8B-Instruct:hf-inference",`
`12`	`11`	`messages=[`
`13`	`12`	`{`
`14`	`13`	`"role": "user",`
Original file line number	Diff line number	Diff line change
`@@ -4,7 +4,6 @@ const client = new InferenceClient(process.env.API_TOKEN);`
`4`	`4`
`5`	`5`	`const chatCompletion = await client.chatCompletion({`
`6`	`6`	`endpointUrl: "http://localhost:8080/v1",`
`7`		`- provider: "hf-inference",`
`8`	`7`	`model: "meta-llama/Llama-3.1-8B-Instruct",`
`9`	`8`	`messages: [`
`10`	`9`	`{`
Original file line number	Diff line number	Diff line change
`@@ -3,7 +3,6 @@`
`3`	`3`
`4`	`4`	`client = InferenceClient(`
`5`	`5`	`base_url="http://localhost:8080/v1",`
`6`		`- provider="hf-inference",`
`7`	`6`	`api_key=os.environ["API_TOKEN"],`
`8`	`7`	`)`
`9`	`8`