@@ -14,8 +14,24 @@ class ChatTemplateTests: XCTestCase {
1414 " content " : " Describe the Swift programming language. " ,
1515 ] ]
1616
17+ static let phiTokenizerTask = Task {
18+ try await AutoTokenizer . from ( pretrained: " microsoft/Phi-3-mini-128k-instruct " )
19+ }
20+
21+ static func sharedPhiTokenizer( ) async throws -> Tokenizer {
22+ try await phiTokenizerTask. value
23+ }
24+
25+ static let tokenizerWithTemplateArrayTask = Task {
26+ try await AutoTokenizer . from ( pretrained: " mlx-community/Mistral-7B-Instruct-v0.3-4bit " )
27+ }
28+
29+ static func sharedTokenizerWithTemplateArray( ) async throws -> Tokenizer {
30+ try await tokenizerWithTemplateArrayTask. value
31+ }
32+
1733 func testTemplateFromConfig( ) async throws {
18- let tokenizer = try await AutoTokenizer . from ( pretrained : " microsoft/Phi-3-mini-128k-instruct " )
34+ let tokenizer = try await Self . sharedPhiTokenizer ( )
1935 let encoded = try tokenizer. applyChatTemplate ( messages: messages)
2036 let encodedTarget = [ 32010 , 4002 , 29581 , 278 , 14156 , 8720 , 4086 , 29889 , 32007 , 32001 ]
2137 let decoded = tokenizer. decode ( tokens: encoded)
@@ -36,7 +52,7 @@ class ChatTemplateTests: XCTestCase {
3652 }
3753
3854 func testDefaultTemplateFromArrayInConfig( ) async throws {
39- let tokenizer = try await AutoTokenizer . from ( pretrained : " mlx-community/Mistral-7B-Instruct-v0.3-4bit " )
55+ let tokenizer = try await Self . sharedTokenizerWithTemplateArray ( )
4056 let encoded = try tokenizer. applyChatTemplate ( messages: messages)
4157 let encodedTarget = [ 1 , 29473 , 3 , 28752 , 1040 , 4672 , 2563 , 17060 , 4610 , 29491 , 29473 , 4 ]
4258 let decoded = tokenizer. decode ( tokens: encoded)
@@ -46,7 +62,7 @@ class ChatTemplateTests: XCTestCase {
4662 }
4763
4864 func testTemplateFromArgumentWithEnum( ) async throws {
49- let tokenizer = try await AutoTokenizer . from ( pretrained : " microsoft/Phi-3-mini-128k-instruct " )
65+ let tokenizer = try await Self . sharedPhiTokenizer ( )
5066 // Purposely not using the correct template for this model to verify that the template from the config is not being used
5167 let mistral7BDefaultTemplate = " {{bos_token}}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ ' [INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %} "
5268 let encoded = try tokenizer. applyChatTemplate ( messages: messages, chatTemplate: . literal( mistral7BDefaultTemplate) )
@@ -58,7 +74,7 @@ class ChatTemplateTests: XCTestCase {
5874 }
5975
6076 func testTemplateFromArgumentWithString( ) async throws {
61- let tokenizer = try await AutoTokenizer . from ( pretrained : " microsoft/Phi-3-mini-128k-instruct " )
77+ let tokenizer = try await Self . sharedPhiTokenizer ( )
6278 // Purposely not using the correct template for this model to verify that the template from the config is not being used
6379 let mistral7BDefaultTemplate = " {{bos_token}}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ ' [INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %} "
6480 let encoded = try tokenizer. applyChatTemplate ( messages: messages, chatTemplate: mistral7BDefaultTemplate)
@@ -70,7 +86,7 @@ class ChatTemplateTests: XCTestCase {
7086 }
7187
7288 func testNamedTemplateFromArgument( ) async throws {
73- let tokenizer = try await AutoTokenizer . from ( pretrained : " mlx-community/Mistral-7B-Instruct-v0.3-4bit " )
89+ let tokenizer = try await Self . sharedTokenizerWithTemplateArray ( )
7490 // Normally it is not necessary to specify the name `default`, but I'm not aware of models with lists of templates in the config that are not `default` or `tool_use`
7591 let encoded = try tokenizer. applyChatTemplate ( messages: messages, chatTemplate: . name( " default " ) )
7692 let encodedTarget = [ 1 , 29473 , 3 , 28752 , 1040 , 4672 , 2563 , 17060 , 4610 , 29491 , 29473 , 4 ]
0 commit comments