@@ -22,22 +22,22 @@ async def test_model_single_request(tgi_service):
2222 greedy_expectations = {
2323 "llama" : " and how does it work?\n Deep learning is a subset of machine learning that uses artificial" ,
2424 "qwen2" : " - Deep Learning is a subset of Machine Learning that involves the use of artificial neural networks" ,
25- "granite" : "\n \n Deep learning is a subset of machine learning techniques based on artificial neural networks " ,
26- "qwen3" : " A Deep Learning is a subset of machine learning that uses neural networks with multiple layers to " ,
25+ "granite" : "\n \n Deep Learning is a subset of machine learning that is inspired by the structure and " ,
26+ "qwen3" : " And Why Should You Care? \n \n Deep learning is a subset of machine learning that uses neural" ,
2727 "phi3" : "\n \n Deep learning is a subfield of machine learning that focuses on creating" ,
2828 }
2929 assert response .generated_text == greedy_expectations [service_name ]
3030
3131 # Greedy bounded with input
32- response = await tgi_service .client .text_generation (
32+ greedy_response = await tgi_service .client .text_generation (
3333 "What is Deep Learning?" ,
3434 max_new_tokens = 17 ,
3535 return_full_text = True ,
3636 details = True ,
3737 decoder_input_details = True ,
3838 )
39- assert response .details .generated_tokens == 17
40- assert response .generated_text == prompt + greedy_expectations [service_name ]
39+ assert greedy_response .details .generated_tokens == 17
40+ assert greedy_response .generated_text == prompt + greedy_expectations [service_name ]
4141
4242 # Sampling
4343 response = await tgi_service .client .text_generation (
@@ -52,16 +52,12 @@ async def test_model_single_request(tgi_service):
5252 # The response must be different
5353 assert not response .startswith (greedy_expectations [service_name ])
5454
55- # Sampling with stop sequence (using one of the words returned from the previous test)
56- stop_sequence = response .split (" " )[- 5 ]
55+ # Greedy with stop sequence (using one of the words returned from the previous test)
56+ stop_sequence = greedy_response . generated_text .split (" " )[- 5 ]
5757 response = await tgi_service .client .text_generation (
5858 "What is Deep Learning?" ,
59- do_sample = True ,
60- top_k = 50 ,
61- top_p = 0.9 ,
62- repetition_penalty = 1.2 ,
59+ do_sample = False ,
6360 max_new_tokens = 128 ,
64- seed = 42 ,
6561 stop_sequences = [stop_sequence ],
6662 )
6763 assert response .endswith (stop_sequence )
@@ -81,8 +77,8 @@ async def test_model_multiple_requests(tgi_service, neuron_generate_load):
8177 expectations = {
8278 "llama" : "Deep learning is a subset of machine learning that uses artificial" ,
8379 "qwen2" : "Deep Learning is a subset of Machine Learning that involves" ,
84- "granite" : "Deep learning is a subset of machine learning techniques " ,
85- "qwen3" : "Deep Learning is a subset of machine learning that uses neural networks " ,
80+ "granite" : "Deep Learning is a subset of machine learning that is inspired by the structure and " ,
81+ "qwen3" : " And Why Should You Care? \n \n Deep learning is a subset of machine learning that uses neural" ,
8682 "phi3" : "Deep learning is a subfield of machine learning that focuses on creating" ,
8783 }
8884 expected = expectations [tgi_service .client .service_name ]
0 commit comments