Skip to content

Commit d88c20e

Browse files
Merge pull request #6 from maguec/enable_smart_quoting
make smart quoting available as a flag
2 parents d583a0b + 268e8b1 commit d88c20e

File tree

2 files changed

+14
-6
lines changed

2 files changed

+14
-6
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ bulk_insert.py GRAPHNAME [OPTIONS]
2626
| -t | --max-token-count INT | max number of tokens sent in each Redis query (default 1024) |
2727
| -b | --max-buffer-size INT | max batch size (MBs) of each Redis query (default 4096) |
2828
| -c | --max-token-size INT | max size (MBs) of each token sent to Redis (default 500) |
29+
| -q | --quote-minimal | enable smart quoting for items within the CSV |
2930

3031

3132
The only required arguments are the name to give the newly-created graph (which can appear anywhere) and at least one node CSV file.

bulk_insert.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
import click
99

1010
# Global variables
11-
CONFIGS = None # thresholds for batching Redis queries
12-
NODE_DICT = {} # global node dictionary
13-
TOP_NODE_ID = 0 # next ID to assign to a node
14-
QUERY_BUF = None # Buffer for query being constructed
11+
CONFIGS = None # thresholds for batching Redis queries
12+
NODE_DICT = {} # global node dictionary
13+
TOP_NODE_ID = 0 # next ID to assign to a node
14+
QUERY_BUF = None # Buffer for query being constructed
1515

1616
# Custom error class for invalid inputs
1717
class CSVError(Exception):
@@ -104,7 +104,7 @@ def __init__(self, filename):
104104
self.infile = io.open(filename, 'rt')
105105
# Initialize CSV reader that ignores leading whitespace in each field
106106
# and does not modify input quote characters
107-
self.reader = csv.reader(self.infile, skipinitialspace=True, quoting=csv.QUOTE_NONE)
107+
self.reader = csv.reader(self.infile, skipinitialspace=True, quoting=QUOTING)
108108

109109
self.prop_offset = 0 # Starting index of properties in row
110110
self.prop_count = 0 # Number of properties per entity
@@ -328,16 +328,23 @@ def process_entity_csvs(cls, csvs):
328328
@click.option('--max-token-count', '-c', default=1024, help='max number of processed CSVs to send per query (default 1024)')
329329
@click.option('--max-buffer-size', '-b', default=2048, help='max buffer size in megabytes (default 2048)')
330330
@click.option('--max-token-size', '-t', default=500, help='max size of each token in megabytes (default 500, max 512)')
331+
@click.option('--quote-minimal/--no-quote-minimal', '-q/-d', default=False, help='only quote those fields which contain special characters such as delimiter, quotechar or any of the characters in lineterminator')
331332

332-
def bulk_insert(graph, host, port, password, nodes, relations, max_token_count, max_buffer_size, max_token_size):
333+
def bulk_insert(graph, host, port, password, nodes, relations, max_token_count, max_buffer_size, max_token_size, quote_minimal):
333334
global CONFIGS
334335
global NODE_DICT
335336
global TOP_NODE_ID
336337
global QUERY_BUF
338+
global QUOTING
337339

338340
if sys.version_info[0] < 3:
339341
raise Exception("Python 3 is required for the RedisGraph bulk loader.")
340342

343+
if quote_minimal:
344+
QUOTING=csv.QUOTE_MINIMAL
345+
else:
346+
QUOTING=csv.QUOTE_NONE
347+
341348
TOP_NODE_ID = 0 # reset global ID variable (in case we are calling bulk_insert from unit tests)
342349
CONFIGS = Configs(max_token_count, max_buffer_size, max_token_size)
343350

0 commit comments

Comments
 (0)