@@ -27,7 +27,129 @@ public function __construct(
2727 ) {
2828 }
2929
30- public function vectorizeEmbeddableDocuments (array $ documents , array $ options = []): array
30+ public function vectorize (string |\Stringable |EmbeddableDocumentInterface |array $ values , array $ options = []): Vector |VectorDocument |array
31+ {
32+ if (\is_string ($ values ) || $ values instanceof \Stringable) {
33+ return $ this ->vectorizeString ($ values , $ options );
34+ }
35+
36+ if ($ values instanceof EmbeddableDocumentInterface) {
37+ return $ this ->vectorizeEmbeddableDocument ($ values , $ options );
38+ }
39+
40+ if ([] === $ values ) {
41+ return [];
42+ }
43+
44+ $ firstElement = reset ($ values );
45+ if ($ firstElement instanceof EmbeddableDocumentInterface) {
46+ $ this ->validateArray ($ values , EmbeddableDocumentInterface::class);
47+
48+ return $ this ->vectorizeEmbeddableDocuments ($ values , $ options );
49+ }
50+
51+ if (\is_string ($ firstElement ) || $ firstElement instanceof \Stringable) {
52+ $ this ->validateArray ($ values , 'string|stringable ' );
53+
54+ return $ this ->vectorizeStrings ($ values , $ options );
55+ }
56+
57+ throw new RuntimeException ('Array must contain only strings, Stringable objects, or EmbeddableDocumentInterface instances. ' );
58+ }
59+
60+ /**
61+ * @param array<mixed> $values
62+ */
63+ private function validateArray (array $ values , string $ expectedType ): void
64+ {
65+ foreach ($ values as $ value ) {
66+ if ('string|stringable ' === $ expectedType ) {
67+ if (!\is_string ($ value ) && !$ value instanceof \Stringable) {
68+ throw new RuntimeException ('Array must contain only strings or Stringable objects. ' );
69+ }
70+ } elseif (!$ value instanceof $ expectedType ) {
71+ throw new RuntimeException (\sprintf ('Array must contain only "%s" instances. ' , $ expectedType ));
72+ }
73+ }
74+ }
75+
76+ /**
77+ * @param array<string, mixed> $options
78+ */
79+ private function vectorizeString (string |\Stringable $ string , array $ options = []): Vector
80+ {
81+ $ stringValue = (string ) $ string ;
82+ $ this ->logger ->debug ('Vectorizing string ' , ['string ' => $ stringValue ]);
83+
84+ $ result = $ this ->platform ->invoke ($ this ->model , $ stringValue , $ options );
85+ $ vectors = $ result ->asVectors ();
86+
87+ if (!isset ($ vectors [0 ])) {
88+ throw new RuntimeException ('No vector returned for string vectorization. ' );
89+ }
90+
91+ return $ vectors [0 ];
92+ }
93+
94+ /**
95+ * @param array<string, mixed> $options
96+ */
97+ private function vectorizeEmbeddableDocument (EmbeddableDocumentInterface $ document , array $ options = []): VectorDocument
98+ {
99+ $ this ->logger ->debug ('Vectorizing embeddable document ' , ['document_id ' => $ document ->getId ()]);
100+
101+ $ vector = $ this ->vectorizeString ($ document ->getContent (), $ options );
102+
103+ return new VectorDocument ($ document ->getId (), $ vector , $ document ->getMetadata ());
104+ }
105+
106+ /**
107+ * @param array<string|\Stringable> $strings
108+ * @param array<string, mixed> $options
109+ *
110+ * @return array<Vector>
111+ */
112+ private function vectorizeStrings (array $ strings , array $ options = []): array
113+ {
114+ $ stringCount = \count ($ strings );
115+ $ this ->logger ->info ('Starting vectorization of strings ' , ['string_count ' => $ stringCount ]);
116+
117+ // Convert all values to strings
118+ $ stringValues = array_map (fn (string |\Stringable $ s ) => (string ) $ s , $ strings );
119+
120+ if ($ this ->platform ->getModelCatalog ()->getModel ($ this ->model )->supports (Capability::INPUT_MULTIPLE )) {
121+ $ this ->logger ->debug ('Using batch vectorization with model that supports multiple inputs ' );
122+ $ result = $ this ->platform ->invoke ($ this ->model , $ stringValues , $ options );
123+
124+ $ vectors = $ result ->asVectors ();
125+ $ this ->logger ->debug ('Batch vectorization completed ' , ['vector_count ' => \count ($ vectors )]);
126+ } else {
127+ $ this ->logger ->debug ('Using sequential vectorization for model without multiple input support ' );
128+ $ results = [];
129+ foreach ($ stringValues as $ i => $ string ) {
130+ $ this ->logger ->debug ('Vectorizing string ' , ['string_index ' => $ i ]);
131+ $ results [] = $ this ->platform ->invoke ($ this ->model , $ string , $ options );
132+ }
133+
134+ $ vectors = [];
135+ foreach ($ results as $ result ) {
136+ $ vectors = array_merge ($ vectors , $ result ->asVectors ());
137+ }
138+ $ this ->logger ->debug ('Sequential vectorization completed ' , ['vector_count ' => \count ($ vectors )]);
139+ }
140+
141+ $ this ->logger ->info ('Vectorization process completed ' , ['string_count ' => $ stringCount , 'vector_count ' => \count ($ vectors )]);
142+
143+ return $ vectors ;
144+ }
145+
146+ /**
147+ * @param array<EmbeddableDocumentInterface> $documents
148+ * @param array<string, mixed> $options
149+ *
150+ * @return array<VectorDocument>
151+ */
152+ private function vectorizeEmbeddableDocuments (array $ documents , array $ options = []): array
31153 {
32154 $ documentCount = \count ($ documents );
33155 $ this ->logger ->info ('Starting vectorization process ' , ['document_count ' => $ documentCount ]);
@@ -65,18 +187,4 @@ public function vectorizeEmbeddableDocuments(array $documents, array $options =
65187
66188 return $ vectorDocuments ;
67189 }
68-
69- public function vectorize (string |\Stringable $ string , array $ options = []): Vector
70- {
71- $ this ->logger ->debug ('Vectorizing string ' , ['string ' => (string ) $ string ]);
72-
73- $ result = $ this ->platform ->invoke ($ this ->model , (string ) $ string , $ options );
74- $ vectors = $ result ->asVectors ();
75-
76- if (!isset ($ vectors [0 ])) {
77- throw new RuntimeException ('No vector returned for string vectorization. ' );
78- }
79-
80- return $ vectors [0 ];
81- }
82190}
0 commit comments