|
1 | 1 | import csv |
2 | 2 | import os |
3 | 3 | import io |
| 4 | +import sys |
4 | 5 | import struct |
5 | 6 | from timeit import default_timer as timer |
6 | 7 | import redis |
7 | 8 | import click |
8 | | -from backports import csv |
9 | 9 |
|
10 | 10 | # Global variables |
11 | 11 | CONFIGS = None # thresholds for batching Redis queries |
@@ -99,17 +99,17 @@ def report_completion(self, runtime): |
99 | 99 | class EntityFile(object): |
100 | 100 | def __init__(self, filename): |
101 | 101 | # The label or relation type string is the basename of the file |
102 | | - self.entity_str = os.path.splitext(os.path.basename(filename))[0].encode('utf-8') |
| 102 | + self.entity_str = os.path.splitext(os.path.basename(filename))[0].encode() |
103 | 103 | # Input file handling |
104 | | - self.infile = io.open(filename, 'rt', encoding='utf-8') |
| 104 | + self.infile = io.open(filename, 'rt') |
105 | 105 | # Initialize CSV reader that ignores leading whitespace in each field |
106 | 106 | # and does not modify input quote characters |
107 | 107 | self.reader = csv.reader(self.infile, skipinitialspace=True, quoting=csv.QUOTE_NONE) |
108 | 108 |
|
109 | 109 | self.prop_offset = 0 # Starting index of properties in row |
110 | 110 | self.prop_count = 0 # Number of properties per entity |
111 | 111 |
|
112 | | - self.packed_header = "" |
| 112 | + self.packed_header = b'' |
113 | 113 | self.binary_entities = [] |
114 | 114 | self.binary_size = 0 # size of binary token |
115 | 115 | self.count_entities() # number of entities/row in file. |
@@ -143,7 +143,7 @@ def pack_header(self, header): |
143 | 143 | fmt = "=%dsI" % (len(self.entity_str) + 1) # Unaligned native, entity_string, count of properties |
144 | 144 | args = [self.entity_str, prop_count] |
145 | 145 | for p in header[self.prop_offset:]: |
146 | | - prop = p.encode('utf-8') |
| 146 | + prop = p.encode() |
147 | 147 | fmt += "%ds" % (len(prop) + 1) # encode string with a null terminator |
148 | 148 | args.append(prop) |
149 | 149 | return struct.pack(fmt, *args) |
@@ -291,8 +291,8 @@ def prop_to_binary(prop_str): |
291 | 291 | return struct.pack(format_str + '?', Type.BOOL, True) |
292 | 292 |
|
293 | 293 | # If we've reached this point, the property is a string |
| 294 | + encoded_str = str.encode(prop_str) # struct.pack requires bytes objects as arguments |
294 | 295 | # Encoding len+1 adds a null terminator to the string |
295 | | - encoded_str = prop_str.encode('utf-8') |
296 | 296 | format_str += "%ds" % (len(encoded_str) + 1) |
297 | 297 | return struct.pack(format_str, Type.STRING, encoded_str) |
298 | 298 |
|
@@ -334,6 +334,9 @@ def bulk_insert(graph, host, port, password, nodes, relations, max_token_count, |
334 | 334 | global TOP_NODE_ID |
335 | 335 | global QUERY_BUF |
336 | 336 |
|
| 337 | + if sys.version_info[0] < 3: |
| 338 | + raise Exception("Python 3 is required for the RedisGraph bulk loader.") |
| 339 | + |
337 | 340 | TOP_NODE_ID = 0 # reset global ID variable (in case we are calling bulk_insert from unit tests) |
338 | 341 | CONFIGS = Configs(max_token_count, max_buffer_size, max_token_size) |
339 | 342 |
|
|
0 commit comments