@@ -919,8 +919,56 @@ def suggest_terms(self, fields, prefix, handler="terms", **kwargs):
919919 )
920920 return res
921921
922- def _build_json_doc (self , doc ):
923- cleaned_doc = {k : v for k , v in doc .items () if not self ._is_null_value (v )}
922+ def _build_docs (self , docs , boost = None , fieldUpdates = None , commitWithin = None ):
923+ # if no boost needed use json multidocument api
924+ # The JSON API skips the XML conversion and speedup load from 15 to 20 times.
925+ # CPU Usage is drastically lower.
926+ if boost is None :
927+ solrapi = "JSON"
928+ message = docs
929+ # single doc convert to array of docs
930+ if isinstance (message , dict ):
931+ # convert dict to list
932+ message = [message ]
933+ # json array of docs
934+ if isinstance (message , list ):
935+ # convert to string
936+ cleaned_message = [
937+ self ._build_json_doc (doc , fieldUpdates = fieldUpdates )
938+ for doc in message
939+ ]
940+ m = self .encoder .encode (cleaned_message ).encode ("utf-8" )
941+ else :
942+ raise ValueError ("wrong message type" )
943+ else :
944+ solrapi = "XML"
945+ message = ElementTree .Element ("add" )
946+
947+ if commitWithin :
948+ message .set ("commitWithin" , commitWithin )
949+
950+ for doc in docs :
951+ el = self ._build_xml_doc (doc , boost = boost , fieldUpdates = fieldUpdates )
952+ message .append (el )
953+
954+ # This returns a bytestring. Ugh.
955+ m = ElementTree .tostring (message , encoding = "utf-8" )
956+ # Convert back to Unicode please.
957+ m = force_unicode (m )
958+
959+ return (solrapi , m , len (message ))
960+
961+ def _build_json_doc (self , doc , fieldUpdates = None ):
962+ if fieldUpdates is None :
963+ cleaned_doc = {k : v for k , v in doc .items () if not self ._is_null_value (v )}
964+ else :
965+ # id must be added without a modifier
966+ # if using field updates, all other fields should have a modifier
967+ cleaned_doc = {
968+ k : {fieldUpdates [k ]: v } if k in fieldUpdates else v
969+ for k , v in doc .items ()
970+ }
971+
924972 return cleaned_doc
925973
926974 def _build_xml_doc (self , doc , boost = None , fieldUpdates = None ):
@@ -1028,43 +1076,13 @@ def add(
10281076 """
10291077 start_time = time .time ()
10301078 self .log .debug ("Starting to build add request..." )
1031- solrapi = "XML"
1032- # if no commands (no boost, no atomic updates) needed use json multidocument api
1033- # The JSON API skips the XML conversion and speedup load from 15 to 20 times.
1034- # CPU Usage is drastically lower.
1035- if boost is None and fieldUpdates is None :
1036- solrapi = "JSON"
1037- message = docs
1038- # single doc convert to array of docs
1039- if isinstance (message , dict ):
1040- # convert dict to list
1041- message = [message ]
1042- # json array of docs
1043- if isinstance (message , list ):
1044- # convert to string
1045- cleaned_message = [self ._build_json_doc (doc ) for doc in message ]
1046- m = self .encoder .encode (cleaned_message ).encode ("utf-8" )
1047- else :
1048- raise ValueError ("wrong message type" )
1049- else :
1050- message = ElementTree .Element ("add" )
1051-
1052- if commitWithin :
1053- message .set ("commitWithin" , commitWithin )
1054-
1055- for doc in docs :
1056- el = self ._build_xml_doc (doc , boost = boost , fieldUpdates = fieldUpdates )
1057- message .append (el )
1058-
1059- # This returns a bytestring. Ugh.
1060- m = ElementTree .tostring (message , encoding = "utf-8" )
1061- # Convert back to Unicode please.
1062- m = force_unicode (m )
1063-
1079+ solrapi , m , len_message = self ._build_docs (
1080+ docs , boost , fieldUpdates , commitWithin
1081+ )
10641082 end_time = time .time ()
10651083 self .log .debug (
10661084 "Built add request of %s docs in %0.2f seconds." ,
1067- len ( message ) ,
1085+ len_message ,
10681086 end_time - start_time ,
10691087 )
10701088 return self ._update (
0 commit comments