Skip to content

Commit bcfe5b2

Browse files
Zero != None fix with CSV quote conversion (#15)
* Fix various nits * Fix falsey check on zero values with quote conversion
1 parent a54d7db commit bcfe5b2

File tree

1 file changed

+14
-14
lines changed

1 file changed

+14
-14
lines changed

bulk_insert.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,17 @@
33
import io
44
import sys
55
import struct
6+
import json
67
from timeit import default_timer as timer
78
import redis
89
import click
9-
import json
1010

1111
# Global variables
1212
CONFIGS = None # thresholds for batching Redis queries
1313
NODE_DICT = {} # global node dictionary
1414
TOP_NODE_ID = 0 # next ID to assign to a node
1515
QUERY_BUF = None # Buffer for query being constructed
16+
QUOTING = None
1617

1718
FIELD_TYPES = None
1819

@@ -204,7 +205,7 @@ def process_entities(self, expected_col_count):
204205
if NODE_DICT is not None:
205206
if row[0] in NODE_DICT:
206207
sys.stderr.write("Node identifier '%s' was used multiple times - second occurrence at %s:%d\n"
207-
% (row[0], self.infile.name, self.reader.line_num))
208+
% (row[0], self.infile.name, self.reader.line_num))
208209
if CONFIGS.skip_invalid_nodes is False:
209210
exit(1)
210211
NODE_DICT[row[0]] = TOP_NODE_ID
@@ -263,8 +264,7 @@ def process_entities(self, expected_col_count):
263264
print("Relationship specified a non-existent identifier. src: %s; dest: %s" % (row[0], row[1]))
264265
if CONFIGS.skip_invalid_edges is False:
265266
raise e
266-
else:
267-
continue
267+
continue
268268
fmt = "=QQ" # 8-byte unsigned ints for src and dest
269269
row_binary = struct.pack(fmt, src, dest) + self.pack_props(row)
270270
row_binary_len = len(row_binary)
@@ -287,37 +287,37 @@ def process_entities(self, expected_col_count):
287287
# Convert a single CSV property field into a binary stream.
288288
# Supported property types are string, numeric, boolean, and NULL.
289289
# type is either Type.NUMERIC, Type.BOOL or Type.STRING, and explicitly sets the value to this type if possible
290-
def prop_to_binary(prop_str, type):
290+
def prop_to_binary(prop_val, type):
291291
# All format strings start with an unsigned char to represent our Type enum
292292
format_str = "=B"
293-
if not prop_str:
293+
if prop_val is None:
294294
# An empty field indicates a NULL property
295295
return struct.pack(format_str, Type.NULL)
296296

297297
# If field can be cast to a float, allow it
298298
if type == None or type == Type.NUMERIC:
299299
try:
300-
numeric_prop = float(prop_str)
300+
numeric_prop = float(prop_val)
301301
return struct.pack(format_str + "d", Type.NUMERIC, numeric_prop)
302302
except:
303303
pass
304304

305305
if type == None or type == Type.BOOL:
306306
# If field is 'false' or 'true', it is a boolean
307-
if prop_str.lower() == 'false':
307+
if prop_val.lower() == 'false':
308308
return struct.pack(format_str + '?', Type.BOOL, False)
309-
elif prop_str.lower() == 'true':
309+
elif prop_val.lower() == 'true':
310310
return struct.pack(format_str + '?', Type.BOOL, True)
311311

312312
if type == None or type == Type.STRING:
313313
# If we've reached this point, the property is a string
314-
encoded_str = str.encode(prop_str) # struct.pack requires bytes objects as arguments
314+
encoded_str = str.encode(prop_val) # struct.pack requires bytes objects as arguments
315315
# Encoding len+1 adds a null terminator to the string
316316
format_str += "%ds" % (len(encoded_str) + 1)
317317
return struct.pack(format_str, Type.STRING, encoded_str)
318318

319319
## if it hasn't returned by this point, it is trying to set it to a type that it can't adopt
320-
raise Exception("unable to parse [" + prop_str + "] with type ["+repr(type)+"]")
320+
raise Exception("unable to parse [" + prop_val + "] with type ["+repr(type)+"]")
321321

322322
# For each node input file, validate contents and convert to binary format.
323323
# If any buffer limits have been reached, flush all enqueued inserts to Redis.
@@ -368,13 +368,13 @@ def bulk_insert(graph, host, port, password, nodes, relations, separator, max_to
368368
if sys.version_info[0] < 3:
369369
raise Exception("Python 3 is required for the RedisGraph bulk loader.")
370370

371-
if field_types != None:
372-
try :
371+
if field_types is not None:
372+
try:
373373
FIELD_TYPES = json.loads(field_types)
374374
except:
375375
raise Exception("Problem parsing field-types. Use the format {<label>:[<col1 type>, <col2 type> ...]} where type can be 0(null),1(bool),2(numeric),3(string) ")
376376

377-
QUOTING=int(quote)
377+
QUOTING = int(quote)
378378

379379
TOP_NODE_ID = 0 # reset global ID variable (in case we are calling bulk_insert from unit tests)
380380
CONFIGS = Configs(max_token_count, max_buffer_size, max_token_size, skip_invalid_nodes, skip_invalid_edges)

0 commit comments

Comments
 (0)