diff --git a/.gitignore b/.gitignore index 022ac422..700155a7 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,9 @@ lib/PyLD.egg-info profiler tests/test_caching.py tests/data/test_caching.json + +# JetBrains IDEs +.idea + +# pyenv +.python-version diff --git a/lib/pyld/jsonld.py b/lib/pyld/jsonld.py index 49d62122..292f71e1 100644 --- a/lib/pyld/jsonld.py +++ b/lib/pyld/jsonld.py @@ -3661,12 +3661,33 @@ def _object_to_rdf(self, item, issuer, triples, rdfDirection): elif _is_bool(value): object['value'] = 'true' if value else 'false' object['datatype'] = datatype or XSD_BOOLEAN - elif _is_double(value) or datatype == XSD_DOUBLE: - # canonical double representation - object['value'] = re.sub( - r'(\d)0*E\+?0*(\d)', r'\1E\2', - ('%1.15E' % value)) - object['datatype'] = datatype or XSD_DOUBLE + + elif _is_double(value): + return { + **object, + 'value': _canonicalize_double(value), + 'datatype': datatype or XSD_DOUBLE, + } + + elif datatype == XSD_DOUBLE: + # Since the previous branch did not activate, we know that `value` is not a float number. + try: + float_value = float(value) + except (ValueError, TypeError): + # If `value` is not convertible to float, we will return it as-is. + return { + **object, + 'value': value, + 'datatype': XSD_DOUBLE, + } + else: + # We have a float, and canonicalization may proceed. + return { + **object, + 'value': _canonicalize_double(float_value), + 'datatype': XSD_DOUBLE, + } + elif _is_integer(value): object['value'] = str(value) object['datatype'] = datatype or XSD_INTEGER @@ -6390,6 +6411,13 @@ def _is_double(v): return not isinstance(v, Integral) and isinstance(v, Real) +def _canonicalize_double(value: float) -> str: + """Convert a float value to canonical lexical form of `xsd:double`.""" + return re.sub( + r'(\d)0*E\+?0*(\d)', r'\1E\2', + ('%1.15E' % value)) + + def _is_numeric(v): """ Returns True if the given value is numeric. diff --git a/tests/test_double_to_rdf.py b/tests/test_double_to_rdf.py new file mode 100644 index 00000000..d191a538 --- /dev/null +++ b/tests/test_double_to_rdf.py @@ -0,0 +1,69 @@ +""" +Tests for to_rdf functionality, specifically focusing on double/float handling bugs. +""" + +import json +import sys +import os +import unittest + +# Add the lib directory to the path so we can import pyld +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'lib')) + +import pyld.jsonld + + +class TestDoubleToRdf(unittest.TestCase): + """Test cases for to_rdf functionality with double/float values.""" + + def test_offline_pyld_bug_reproduction(self): + """Test reproducing the PyLD bug with captured Wikidata data structure.""" + # This is the exact problematic data structure captured from Wikidata Q399 + # The bug occurs when PyLD tries to convert this to RDF + data = { + "@context": { + "xsd": "http://www.w3.org/2001/XMLSchema#", + "geoLongitude": "http://www.w3.org/2003/01/geo/wgs84_pos#longitude" + }, + "@graph": [ + { + "@id": "http://www.wikidata.org/entity/Q399", + "geoLongitude": { + "@type": "xsd:double", + "@value": "45" # This string number causes the PyLD bug + } + } + ] + } + + # This should work now that the bug is fixed + # The bug was in PyLD's _object_to_rdf method where string values + # with @type: "xsd:double" were not being converted to float + result = pyld.jsonld.to_rdf(data) + + # Expected result after bug fix + expected = { + "@default": [ + { + "subject": { + "type": "IRI", + "value": "http://www.wikidata.org/entity/Q399" + }, + "predicate": { + "type": "IRI", + "value": "http://www.w3.org/2003/01/geo/wgs84_pos#longitude" + }, + "object": { + "type": "literal", + "value": "4.5E1", + "datatype": "http://www.w3.org/2001/XMLSchema#double" + } + } + ] + } + + self.assertEqual(result, expected) + + +if __name__ == '__main__': + unittest.main()