@@ -700,32 +700,32 @@ export const LLM_USERNAMES: LLM2String = {
700700 "gpt-3.5-turbo" : "GPT-3.5" ,
701701 "gpt-3.5-turbo-16k" : "GPT-3.5-16k" ,
702702 "gpt-4-turbo-preview" : "GPT-4 Turbo 128k" ,
703- "gpt-4-turbo-preview-8k" : "GPT-4 Turbo 8k " ,
703+ "gpt-4-turbo-preview-8k" : "GPT-4 Turbo" ,
704704 "gpt-4-turbo" : "GPT-4 Turbo 128k" ,
705- "gpt-4-turbo-8k" : "GPT-4 Turbo 8k " ,
706- "gpt-4o" : "GPT-4 Omni 128k" ,
707- "gpt-4o-8k" : "GPT-4 Omni 8k " ,
705+ "gpt-4-turbo-8k" : "GPT-4 Turbo" ,
706+ "gpt-4o" : "GPT-4o 128k" ,
707+ "gpt-4o-8k" : "GPT-4o " ,
708708 "gpt-4o-mini" : "GPT-4o Mini 128k" ,
709- "gpt-4o-mini-8k" : "GPT-4o Mini 8k " ,
709+ "gpt-4o-mini-8k" : "GPT-4o Mini" ,
710710 "text-embedding-ada-002" : "Text Embedding Ada 002" , // TODO: this is for embeddings, should be moved to a different place
711711 "text-bison-001" : "PaLM 2" ,
712712 "chat-bison-001" : "PaLM 2" ,
713713 "gemini-pro" : "Gemini 1.0 Pro" ,
714714 "gemini-1.0-ultra" : "Gemini 1.0 Ultra" ,
715715 "gemini-1.5-pro" : "Gemini 1.5 Pro 1m" ,
716- "gemini-1.5-pro-8k" : "Gemini 1.5 Pro 8k " ,
717- "gemini-1.5-flash-8k" : "Gemini 1.5 Flash 8k " ,
716+ "gemini-1.5-pro-8k" : "Gemini 1.5 Pro" ,
717+ "gemini-1.5-flash-8k" : "Gemini 1.5 Flash" ,
718718 "mistral-small-latest" : "Mistral AI Small" ,
719719 "mistral-medium-latest" : "Mistral AI Medium" ,
720720 "mistral-large-latest" : "Mistral AI Large" ,
721- "claude-3-haiku" : "Claude 3 Haiku" ,
722- "claude-3-haiku-8k" : "Claude 3 Haiku 8k " ,
723- "claude-3-sonnet" : "Claude 3 Sonnet" ,
724- "claude-3-sonnet-4k" : "Claude 3 Sonnet 4k " ,
725- "claude-3-5-sonnet" : "Claude 3.5 Sonnet" ,
726- "claude-3-5-sonnet-4k" : "Claude 3.5 Sonnet 4k " ,
721+ "claude-3-haiku" : "Claude 3 Haiku 200k " ,
722+ "claude-3-haiku-8k" : "Claude 3 Haiku" ,
723+ "claude-3-sonnet" : "Claude 3 Sonnet 200k " ,
724+ "claude-3-sonnet-4k" : "Claude 3 Sonnet" ,
725+ "claude-3-5-sonnet" : "Claude 3.5 Sonnet 200k " ,
726+ "claude-3-5-sonnet-4k" : "Claude 3.5 Sonnet" ,
727727 "claude-3-opus" : "Claude 3 Opus 200k" ,
728- "claude-3-opus-8k" : "Claude 3 Opus 8k " ,
728+ "claude-3-opus-8k" : "Claude 3 Opus" ,
729729} as const ;
730730
731731// similar to the above, we map to short user-visible description texts
@@ -736,17 +736,17 @@ export const LLM_DESCR: LLM2String = {
736736 chatgpt4 :
737737 "Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)" ,
738738 "gpt-4" :
739- "Most powerful OpenAI model. Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)" ,
739+ "Powerful OpenAI model. Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)" ,
740740 "gpt-4-32k" : "" ,
741741 "gpt-3.5-turbo" : "Fast, great for everyday tasks. (OpenAI, 4k token context)" ,
742742 "gpt-3.5-turbo-16k" : `Same as ${ LLM_USERNAMES [ "gpt-3.5-turbo" ] } but with larger 16k token context` ,
743743 "gpt-4-turbo-preview-8k" :
744744 "More powerful, fresher knowledge, and lower price than GPT-4. (OpenAI, 8k token context)" ,
745745 "gpt-4-turbo-preview" :
746- "Like GPT-4 Turbo 8k , but with up to 128k token context" ,
746+ "Like GPT-4 Turbo, but with up to 128k token context" ,
747747 "gpt-4-turbo-8k" :
748748 "Faster, fresher knowledge, and lower price than GPT-4. (OpenAI, 8k token context)" ,
749- "gpt-4-turbo" : "Like GPT-4 Turbo 8k , but with up to 128k token context" ,
749+ "gpt-4-turbo" : "Like GPT-4 Turbo, but with up to 128k token context" ,
750750 "gpt-4o-8k" :
751751 "Most powerful, fastest, and cheapest (OpenAI, 8k token context)" ,
752752 "gpt-4o" : "Most powerful fastest, and cheapest (OpenAI, 128k token context)" ,
@@ -873,14 +873,14 @@ export const LLM_COST: { [name in LanguageModelCore]: Cost } = {
873873 free : false ,
874874 } ,
875875 "gpt-3.5-turbo" : {
876- prompt_tokens : usd1Mtokens ( 1.5 ) ,
877- completion_tokens : usd1Mtokens ( 2 ) ,
876+ prompt_tokens : usd1Mtokens ( 3 ) ,
877+ completion_tokens : usd1Mtokens ( 6 ) ,
878878 max_tokens : 4096 ,
879879 free : true ,
880880 } ,
881881 "gpt-3.5-turbo-16k" : {
882- prompt_tokens : usd1Mtokens ( 0.5 ) ,
883- completion_tokens : usd1Mtokens ( 1.5 ) ,
882+ prompt_tokens : usd1Mtokens ( 3 ) ,
883+ completion_tokens : usd1Mtokens ( 6 ) ,
884884 max_tokens : 16384 ,
885885 free : false ,
886886 } ,
@@ -910,14 +910,14 @@ export const LLM_COST: { [name in LanguageModelCore]: Cost } = {
910910 free : false ,
911911 } ,
912912 "gpt-4o-8k" : {
913- prompt_tokens : usd1Mtokens ( 5 ) ,
914- completion_tokens : usd1Mtokens ( 15 ) ,
913+ prompt_tokens : usd1Mtokens ( 2. 5) ,
914+ completion_tokens : usd1Mtokens ( 10 ) ,
915915 max_tokens : 8192 , // like gpt-4-turbo-8k
916916 free : false ,
917917 } ,
918918 "gpt-4o" : {
919- prompt_tokens : usd1Mtokens ( 5 ) ,
920- completion_tokens : usd1Mtokens ( 15 ) ,
919+ prompt_tokens : usd1Mtokens ( 2. 5) ,
920+ completion_tokens : usd1Mtokens ( 10 ) ,
921921 max_tokens : 128000 , // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
922922 free : false ,
923923 } ,
@@ -966,15 +966,15 @@ export const LLM_COST: { [name in LanguageModelCore]: Cost } = {
966966 free : true ,
967967 } ,
968968 "gemini-1.5-flash-8k" : {
969- prompt_tokens : usd1Mtokens ( 0.35 ) ,
970- completion_tokens : usd1Mtokens ( 1.05 ) ,
969+ prompt_tokens : usd1Mtokens ( 0.075 ) ,
970+ completion_tokens : usd1Mtokens ( 0.3 ) ,
971971 max_tokens : 8_000 ,
972972 free : true ,
973973 } ,
974974 // https://mistral.ai/technology/
975975 "mistral-small-latest" : {
976- prompt_tokens : usd1Mtokens ( 1 ) ,
977- completion_tokens : usd1Mtokens ( 3 ) ,
976+ prompt_tokens : usd1Mtokens ( 0.2 ) ,
977+ completion_tokens : usd1Mtokens ( 0.6 ) ,
978978 max_tokens : 4096 , // TODO don't know the real value, see getMaxTokens
979979 free : true ,
980980 } ,
@@ -985,8 +985,8 @@ export const LLM_COST: { [name in LanguageModelCore]: Cost } = {
985985 free : true ,
986986 } ,
987987 "mistral-large-latest" : {
988- prompt_tokens : usd1Mtokens ( 4 ) ,
989- completion_tokens : usd1Mtokens ( 12 ) ,
988+ prompt_tokens : usd1Mtokens ( 2 ) ,
989+ completion_tokens : usd1Mtokens ( 6 ) ,
990990 max_tokens : 4096 , // TODO don't know the real value, see getMaxTokens
991991 free : false ,
992992 } ,
@@ -1006,13 +1006,13 @@ export const LLM_COST: { [name in LanguageModelCore]: Cost } = {
10061006 "claude-3-5-sonnet" : {
10071007 prompt_tokens : usd1Mtokens ( 3 ) ,
10081008 completion_tokens : usd1Mtokens ( 15 ) ,
1009- max_tokens : 4_000 , // limited to 4k tokens, offered for free
1009+ max_tokens : 200_000 ,
10101010 free : false ,
10111011 } ,
10121012 "claude-3-5-sonnet-4k" : {
10131013 prompt_tokens : usd1Mtokens ( 3 ) ,
10141014 completion_tokens : usd1Mtokens ( 15 ) ,
1015- max_tokens : 4_000 , // limited to 4k tokens, offered for free
1015+ max_tokens : 4_000 , // limited to 4k tokens
10161016 free : false ,
10171017 } ,
10181018 "claude-3-sonnet-4k" : {
0 commit comments