@@ -916,6 +916,45 @@ def suggest_terms(self, fields, prefix, handler="terms", **kwargs):
916916 )
917917 return res
918918
919+ def _build_docs (self , docs , boost = None , fieldUpdates = None , commitWithin = None ):
920+ # if no boost needed use json multidocument api
921+ # The JSON API skips the XML conversion and speedup load from 15 to 20 times.
922+ # CPU Usage is drastically lower.
923+ if boost is None :
924+ solrapi = "JSON"
925+ message = docs
926+ # single doc convert to array of docs
927+ if isinstance (message , dict ):
928+ # convert dict to list
929+ message = [message ]
930+ # json array of docs
931+ if isinstance (message , list ):
932+ # convert to string
933+ cleaned_message = [
934+ self ._build_json_doc (doc , fieldUpdates = fieldUpdates )
935+ for doc in message
936+ ]
937+ m = self .encoder .encode (cleaned_message ).encode ("utf-8" )
938+ else :
939+ raise ValueError ("wrong message type" )
940+ else :
941+ solrapi = "XML"
942+ message = ElementTree .Element ("add" )
943+
944+ if commitWithin :
945+ message .set ("commitWithin" , commitWithin )
946+
947+ for doc in docs :
948+ el = self ._build_xml_doc (doc , boost = boost , fieldUpdates = fieldUpdates )
949+ message .append (el )
950+
951+ # This returns a bytestring. Ugh.
952+ m = ElementTree .tostring (message , encoding = "utf-8" )
953+ # Convert back to Unicode please.
954+ m = force_unicode (m )
955+
956+ return (solrapi , m , len (message ))
957+
919958 def _build_json_doc (self , doc , fieldUpdates = None ):
920959 if fieldUpdates is None :
921960 cleaned_doc = {k : v for k , v in doc .items () if not self ._is_null_value (v )}
@@ -928,7 +967,6 @@ def _build_json_doc(self, doc, fieldUpdates=None):
928967 }
929968
930969 return cleaned_doc
931-
932970
933971 def _build_xml_doc (self , doc , boost = None , fieldUpdates = None ):
934972 doc_elem = ElementTree .Element ("doc" )
@@ -1035,43 +1073,13 @@ def add(
10351073 """
10361074 start_time = time .time ()
10371075 self .log .debug ("Starting to build add request..." )
1038- solrapi = "XML"
1039- # if no commands (no boost) needed use json multidocument api
1040- # The JSON API skips the XML conversion and speedup load from 15 to 20 times.
1041- # CPU Usage is drastically lower.
1042- if boost is None :
1043- solrapi = "JSON"
1044- message = docs
1045- # single doc convert to array of docs
1046- if isinstance (message , dict ):
1047- # convert dict to list
1048- message = [message ]
1049- # json array of docs
1050- if isinstance (message , list ):
1051- # convert to string
1052- cleaned_message = [self ._build_json_doc (doc , fieldUpdates = fieldUpdates ) for doc in message ]
1053- m = self .encoder .encode (cleaned_message ).encode ("utf-8" )
1054- else :
1055- raise ValueError ("wrong message type" )
1056- else :
1057- message = ElementTree .Element ("add" )
1058-
1059- if commitWithin :
1060- message .set ("commitWithin" , commitWithin )
1061-
1062- for doc in docs :
1063- el = self ._build_xml_doc (doc , boost = boost , fieldUpdates = fieldUpdates )
1064- message .append (el )
1065-
1066- # This returns a bytestring. Ugh.
1067- m = ElementTree .tostring (message , encoding = "utf-8" )
1068- # Convert back to Unicode please.
1069- m = force_unicode (m )
1070-
1076+ solrapi , m , len_message = self ._build_docs (
1077+ docs , boost , fieldUpdates , commitWithin
1078+ )
10711079 end_time = time .time ()
10721080 self .log .debug (
10731081 "Built add request of %s docs in %0.2f seconds." ,
1074- len ( message ) ,
1082+ len_message ,
10751083 end_time - start_time ,
10761084 )
10771085 return self ._update (
0 commit comments