Skip to content

Commit 1806121

Browse files
Factor out build docs for building XML or JSON doc
Allows testing of logic to choose between XML and JSON
1 parent d086416 commit 1806121

File tree

2 files changed

+73
-35
lines changed

2 files changed

+73
-35
lines changed

pysolr.py

Lines changed: 43 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -916,6 +916,45 @@ def suggest_terms(self, fields, prefix, handler="terms", **kwargs):
916916
)
917917
return res
918918

919+
def _build_docs(self, docs, boost=None, fieldUpdates=None, commitWithin=None):
920+
# if no boost needed use json multidocument api
921+
# The JSON API skips the XML conversion and speedup load from 15 to 20 times.
922+
# CPU Usage is drastically lower.
923+
if boost is None:
924+
solrapi = "JSON"
925+
message = docs
926+
# single doc convert to array of docs
927+
if isinstance(message, dict):
928+
# convert dict to list
929+
message = [message]
930+
# json array of docs
931+
if isinstance(message, list):
932+
# convert to string
933+
cleaned_message = [
934+
self._build_json_doc(doc, fieldUpdates=fieldUpdates)
935+
for doc in message
936+
]
937+
m = self.encoder.encode(cleaned_message).encode("utf-8")
938+
else:
939+
raise ValueError("wrong message type")
940+
else:
941+
solrapi = "XML"
942+
message = ElementTree.Element("add")
943+
944+
if commitWithin:
945+
message.set("commitWithin", commitWithin)
946+
947+
for doc in docs:
948+
el = self._build_xml_doc(doc, boost=boost, fieldUpdates=fieldUpdates)
949+
message.append(el)
950+
951+
# This returns a bytestring. Ugh.
952+
m = ElementTree.tostring(message, encoding="utf-8")
953+
# Convert back to Unicode please.
954+
m = force_unicode(m)
955+
956+
return (solrapi, m, len(message))
957+
919958
def _build_json_doc(self, doc, fieldUpdates=None):
920959
if fieldUpdates is None:
921960
cleaned_doc = {k: v for k, v in doc.items() if not self._is_null_value(v)}
@@ -928,7 +967,6 @@ def _build_json_doc(self, doc, fieldUpdates=None):
928967
}
929968

930969
return cleaned_doc
931-
932970

933971
def _build_xml_doc(self, doc, boost=None, fieldUpdates=None):
934972
doc_elem = ElementTree.Element("doc")
@@ -1035,43 +1073,13 @@ def add(
10351073
"""
10361074
start_time = time.time()
10371075
self.log.debug("Starting to build add request...")
1038-
solrapi = "XML"
1039-
# if no commands (no boost) needed use json multidocument api
1040-
# The JSON API skips the XML conversion and speedup load from 15 to 20 times.
1041-
# CPU Usage is drastically lower.
1042-
if boost is None:
1043-
solrapi = "JSON"
1044-
message = docs
1045-
# single doc convert to array of docs
1046-
if isinstance(message, dict):
1047-
# convert dict to list
1048-
message = [message]
1049-
# json array of docs
1050-
if isinstance(message, list):
1051-
# convert to string
1052-
cleaned_message = [self._build_json_doc(doc, fieldUpdates=fieldUpdates) for doc in message]
1053-
m = self.encoder.encode(cleaned_message).encode("utf-8")
1054-
else:
1055-
raise ValueError("wrong message type")
1056-
else:
1057-
message = ElementTree.Element("add")
1058-
1059-
if commitWithin:
1060-
message.set("commitWithin", commitWithin)
1061-
1062-
for doc in docs:
1063-
el = self._build_xml_doc(doc, boost=boost, fieldUpdates=fieldUpdates)
1064-
message.append(el)
1065-
1066-
# This returns a bytestring. Ugh.
1067-
m = ElementTree.tostring(message, encoding="utf-8")
1068-
# Convert back to Unicode please.
1069-
m = force_unicode(m)
1070-
1076+
solrapi, m, len_message = self._build_docs(
1077+
docs, boost, fieldUpdates, commitWithin
1078+
)
10711079
end_time = time.time()
10721080
self.log.debug(
10731081
"Built add request of %s docs in %0.2f seconds.",
1074-
len(message),
1082+
len_message,
10751083
end_time - start_time,
10761084
)
10771085
return self._update(

tests/test_client.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -772,6 +772,36 @@ def test_build_json_doc_matches_xml(self):
772772
self.assertNotIn("title", doc_json)
773773
self.assertIsNone(doc_xml.find("*[name='title']"))
774774

775+
def test__build_docs_plain(self):
776+
docs = [{
777+
"id": "doc_1",
778+
"title": "",
779+
"price": 12.59,
780+
"popularity": 10
781+
}]
782+
solrapi, m, len_message = self.solr._build_docs(docs)
783+
self.assertEqual(solrapi, "JSON")
784+
785+
def test__build_docs_boost(self):
786+
docs = [{
787+
"id": "doc_1",
788+
"title": "",
789+
"price": 12.59,
790+
"popularity": 10
791+
}]
792+
solrapi, m, len_message = self.solr._build_docs(docs, boost={"title": 10.0})
793+
self.assertEqual(solrapi, "XML")
794+
795+
def test__build_docs_field_updates(self):
796+
docs = [{
797+
"id": "doc_1",
798+
"popularity": 10
799+
}]
800+
solrapi, m, len_message = self.solr._build_docs(
801+
docs, fieldUpdates={"popularity": "inc"}
802+
)
803+
self.assertEqual(solrapi, "JSON")
804+
775805
def test_add(self):
776806
self.assertEqual(len(self.solr.search("doc")), 3)
777807
self.assertEqual(len(self.solr.search("example")), 2)

0 commit comments

Comments
 (0)