Merge pull request #5 from RedisLabsModules/enforce-python3

jeffreylovitz · web-flow · commit 01eb9599d914 · 2018-12-18T16:26:10.000-05:00
Require a Python 3 interpreter, better Unicode support
diff --git a/README.md b/README.md
@@ -1,15 +1,18 @@
 # redisgraph-bulk-loader
 A Python utility for building RedisGraph databases from CSV inputs
 
+## Requirements
+The bulk loader utility requires a Python 3 interpreter.
+
+A Redis server with the [RedisGraph](https://github.com/RedisLabsModules/RedisGraph) module must be running. Installation instructions may be found at:
+https://oss.redislabs.com/redisgraph/
+
 ## Installation
 The bulk loader script's dependencies can be resolved using pip:
 ```
 pip install --user -r requirements.txt
 ```
 
-A Redis server with the [RedisGraph](https://github.com/RedisLabsModules/RedisGraph) module must be running. Installation instructions may be found at:
-https://oss.redislabs.com/redisgraph/
-
 ## Usage
 bulk_insert.py GRAPHNAME [OPTIONS]
 
diff --git a/bulk_insert.py b/bulk_insert.py
@@ -1,11 +1,11 @@
 import csv
 import os
 import io
+import sys
 import struct
 from timeit import default_timer as timer
 import redis
 import click
-from backports import csv
 
 # Global variables
 CONFIGS = None # thresholds for batching Redis queries
@@ -99,17 +99,17 @@ def report_completion(self, runtime):
 class EntityFile(object):
     def __init__(self, filename):
         # The label or relation type string is the basename of the file
-        self.entity_str = os.path.splitext(os.path.basename(filename))[0].encode('utf-8')
+        self.entity_str = os.path.splitext(os.path.basename(filename))[0].encode()
         # Input file handling
-        self.infile = io.open(filename, 'rt', encoding='utf-8')
+        self.infile = io.open(filename, 'rt')
         # Initialize CSV reader that ignores leading whitespace in each field
         # and does not modify input quote characters
         self.reader = csv.reader(self.infile, skipinitialspace=True, quoting=csv.QUOTE_NONE)
 
         self.prop_offset = 0 # Starting index of properties in row
         self.prop_count = 0 # Number of properties per entity
 
-        self.packed_header = ""
+        self.packed_header = b''
         self.binary_entities = []
         self.binary_size = 0 # size of binary token
         self.count_entities() # number of entities/row in file.
@@ -143,7 +143,7 @@ def pack_header(self, header):
         fmt = "=%dsI" % (len(self.entity_str) + 1) # Unaligned native, entity_string, count of properties
         args = [self.entity_str, prop_count]
         for p in header[self.prop_offset:]:
-            prop = p.encode('utf-8')
+            prop = p.encode()
             fmt += "%ds" % (len(prop) + 1) # encode string with a null terminator
             args.append(prop)
         return struct.pack(fmt, *args)
@@ -291,8 +291,8 @@ def prop_to_binary(prop_str):
         return struct.pack(format_str + '?', Type.BOOL, True)
 
     # If we've reached this point, the property is a string
+    encoded_str = str.encode(prop_str) # struct.pack requires bytes objects as arguments
     # Encoding len+1 adds a null terminator to the string
-    encoded_str = prop_str.encode('utf-8')
     format_str += "%ds" % (len(encoded_str) + 1)
     return struct.pack(format_str, Type.STRING, encoded_str)
 
@@ -334,6 +334,9 @@ def bulk_insert(graph, host, port, password, nodes, relations, max_token_count,
     global TOP_NODE_ID
     global QUERY_BUF
 
+    if sys.version_info[0] < 3:
+        raise Exception("Python 3 is required for the RedisGraph bulk loader.")
+
     TOP_NODE_ID = 0 # reset global ID variable (in case we are calling bulk_insert from unit tests)
     CONFIGS = Configs(max_token_count, max_buffer_size, max_token_size)
 
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,2 @@
 redis==2.10.6
 click>=6.7
-backports.csv

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,2 @@`
`1`	`1`	`redis==2.10.6`
`2`	`2`	`click>=6.7`
`3`		`-backports.csv`