Skip to content

Commit 699e387

Browse files
zsziaartemrys
andauthored
fix: handle JSON character encoding for Splunk HEC (#112)
* fix: handle JSON character encoding for hec (Umlauts) After applying the fix searching for a field with Umlauts is now working correctly. * style: pre-commit * test: update integration tests to include fixed scenario * test: sleep before search Co-authored-by: Artem Rys <rysartem@gmail.com>
1 parent 01c28c4 commit 699e387

File tree

6 files changed

+88
-4
lines changed

6 files changed

+88
-4
lines changed

.github/workflows/build-test-release.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ jobs:
112112
test-splunk:
113113
name: test-splunk
114114
runs-on: ubuntu-latest
115+
continue-on-error: true
115116
needs:
116117
- meta
117118
strategy:

solnlib/modular_input/event.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ def _to_hec(self, event_field):
216216
if hasattr(self, "_fields"):
217217
event["fields"] = self._fields
218218

219-
return json.dumps(event)
219+
return json.dumps(event, ensure_ascii=False)
220220

221221
@classmethod
222222
def format_events(cls, events: List, event_field: str = "event") -> List:

solnlib/modular_input/event_writer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,7 @@ def write_events(
442442
try:
443443
self._rest_client.post(
444444
self.HTTP_EVENT_COLLECTOR_ENDPOINT,
445-
body=event,
445+
body=event.encode("utf-8"),
446446
headers=self.headers,
447447
)
448448
except binding.HTTPError as e:

tests/integration/_search.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#
2+
# Copyright 2021 Splunk Inc.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#
16+
import os.path as op
17+
import sys
18+
import time
19+
20+
sys.path.insert(0, op.dirname(op.dirname(op.abspath(__file__))))
21+
import context
22+
from splunklib import client
23+
from splunklib import results as splunklib_results
24+
25+
26+
def search(session_key, query):
27+
service = client.connect(host=context.host, token=session_key)
28+
job = service.jobs.create(query)
29+
while True:
30+
while not job.is_ready():
31+
pass
32+
stats = {
33+
"isDone": job["isDone"],
34+
"doneProgress": job["doneProgress"],
35+
"scanCount": job["scanCount"],
36+
"eventCount": job["eventCount"],
37+
"resultCount": job["resultCount"],
38+
}
39+
if stats["isDone"] == "1":
40+
break
41+
time.sleep(0.5)
42+
json_results_reader = splunklib_results.JSONResultsReader(
43+
job.results(output_mode="json")
44+
)
45+
results = []
46+
for result in json_results_reader:
47+
if isinstance(result, dict):
48+
results.append(result)
49+
return results

tests/integration/test_hec_event_writer.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,13 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
#
16-
1716
import os.path as op
1817
import sys
18+
import time
1919

2020
sys.path.insert(0, op.dirname(op.dirname(op.abspath(__file__))))
2121
import context
22+
from _search import search
2223

2324
from solnlib.modular_input import event_writer as hew
2425

@@ -36,3 +37,35 @@ def test_hec_event_writer():
3637
m2[i] = "test2 data %s" % i
3738
e2 = ew.create_event(m2, index="main", host="testing", sourcetype="hec")
3839
ew.write_events([e1, e2])
40+
41+
42+
def test_hec_event_writes_with_non_utf_8():
43+
# To test scenario listed in https://github.com/splunk/addonfactory-solutions-library-python/pull/112.
44+
test_name = "test_hec_event_writes_with_non_utf_8"
45+
session_key = context.get_session_key()
46+
ew = hew.HECEventWriter("test", session_key)
47+
event = ew.create_event(
48+
[
49+
{
50+
"test_name": test_name,
51+
"field_a": "Üü_Öö_Ää_some_text",
52+
"field_b": "some_text_Üü_Öö_Ää",
53+
},
54+
],
55+
index="main",
56+
host="testing",
57+
sourcetype="hec",
58+
)
59+
ew.write_events([event])
60+
time.sleep(2)
61+
62+
search_results = search(
63+
session_key, f"search index=main sourcetype=hec {test_name}"
64+
)
65+
66+
assert len(search_results) == 1
67+
_raw_event = search_results[0]["_raw"]
68+
assert "Üü_Öö_Ää_some_text" in _raw_event
69+
assert "some_text_Üü_Öö_Ää" in _raw_event
70+
assert "\\u00dc\\u00fc_\\u00d6\\u00f6_\\u00c4\\u00e4_some_text" not in _raw_event
71+
assert "some_text_\\u00dc\\u00fc_\\u00d6\\u00f6_\\u00c4\\u00e4" not in _raw_event

tests/unit/test_modular_input_event_writer.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ def mock_post(
9696
self, path_segment, owner=None, app=None, sharing=None, headers=None, **query
9797
):
9898
event_strings = [
99-
json.dumps(json.loads(e), sort_keys=True) for e in query["body"].split("\n")
99+
json.dumps(json.loads(e), sort_keys=True)
100+
for e in query["body"].decode("utf-8").split("\n")
100101
]
101102

102103
assert (

0 commit comments

Comments
 (0)