From e525cdfb16c9190d06af96ba503e7fcc305587e5 Mon Sep 17 00:00:00 2001 From: tracy loisel Date: Tue, 4 Oct 2022 05:24:59 +0200 Subject: [PATCH 1/8] fix issue with invisible chars while importing csv files --- lib/csv_importer/csv_reader.rb | 4 ++-- lib/csv_importer/header.rb | 2 +- spec/csv_importer_spec.rb | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/csv_importer/csv_reader.rb b/lib/csv_importer/csv_reader.rb index c5ece38..95af8bd 100644 --- a/lib/csv_importer/csv_reader.rb +++ b/lib/csv_importer/csv_reader.rb @@ -70,11 +70,11 @@ def detect_separator(csv_content) end end - # Remove trailing white spaces and ensure we always return a string + # Remove trailing white spaces, invisible characters and ensure we always return a string def sanitize_cells(rows) rows.map do |cells| cells.map do |cell| - cell ? cell.strip : "" + cell ? cell.strip.gsub(/\P{Print}|\p{Cf}/, '') : "" end end end diff --git a/lib/csv_importer/header.rb b/lib/csv_importer/header.rb index b4e550f..1b96c3c 100644 --- a/lib/csv_importer/header.rb +++ b/lib/csv_importer/header.rb @@ -10,7 +10,7 @@ class Header def columns column_names.map do |column_name| # ensure column name escapes invisible characters - column_name = column_name.gsub(/[^[:print:]]/, '') + column_name = column_name.gsub(/\P{Print}|\p{Cf}/, '') Column.new( name: column_name, diff --git a/spec/csv_importer_spec.rb b/spec/csv_importer_spec.rb index e2b610d..932eab6 100644 --- a/spec/csv_importer_spec.rb +++ b/spec/csv_importer_spec.rb @@ -430,7 +430,7 @@ class ImportUserCSVByFirstName bob@example.com , true, bob ,," # insert invisible characters - csv_content.insert(-1, "\u{FEFF}") + csv_content.insert(0, "\u{FEFF}") csv_io = StringIO.new(csv_content) import = ImportUserCSV.new(file: csv_io) From cc691c7eecd49f1826af9d35c9663363da4b0428 Mon Sep 17 00:00:00 2001 From: tracy loisel Date: Tue, 19 Sep 2023 13:16:41 +0200 Subject: [PATCH 2/8] dont try to save a row if it has errors --- lib/csv_importer/runner.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/csv_importer/runner.rb b/lib/csv_importer/runner.rb index e4a473f..e2f976f 100644 --- a/lib/csv_importer/runner.rb +++ b/lib/csv_importer/runner.rb @@ -54,6 +54,8 @@ def persist_rows! if row.skip? tags << :skip + elsif row.errors.size > 0 + tags << :failure else if row.model.save tags << :success From b32f55dee1c5db7a00435d49a3aa880bd17caf5d Mon Sep 17 00:00:00 2001 From: tracy loisel Date: Tue, 26 Sep 2023 12:38:57 +0200 Subject: [PATCH 3/8] add full_transaction --- lib/csv_importer/runner.rb | 73 +++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 25 deletions(-) diff --git a/lib/csv_importer/runner.rb b/lib/csv_importer/runner.rb index e2f976f..79bc11e 100644 --- a/lib/csv_importer/runner.rb +++ b/lib/csv_importer/runner.rb @@ -12,6 +12,7 @@ def self.call(*args) attribute :rows, Array[Row] attribute :when_invalid, Symbol attribute :after_save_blocks, Array[Proc], default: [] + attribute :sql_transaction, Symbol attribute :report, Report, default: proc { Report.new } @@ -41,38 +42,48 @@ def abort_when_invalid? when_invalid == :abort end + def sql_transaction_all_rows? + sql_transaction == :all_rows + end + + def sql_transaction_one_row? + sql_transaction == :each_row + end + def persist_rows! - transaction do + full_transaction do rows.each do |row| - tags = [] + transaction do + tags = [] - if row.model.persisted? - tags << :update - else - tags << :create - end - - if row.skip? - tags << :skip - elsif row.errors.size > 0 - tags << :failure - else - if row.model.save - tags << :success + if row.model.persisted? + tags << :update else + tags << :create + end + + if row.skip? + tags << :skip + elsif row.errors.size > 0 tags << :failure + else + if row.model.save + tags << :success + else + tags << :failure + end end - end - add_to_report(row, tags) + add_to_report(row, tags) - after_save_blocks.each do |block| - case block.arity - when 0 then block.call - when 1 then block.call(row.model) - when 2 then block.call(row.model, row.csv_attributes) - else - raise ArgumentError, "after_save block of arity #{ block.arity } is not supported" + after_save_blocks.each do |block| + case block.arity + when 0 then block.call + when 1 then block.call(row.model) + when 2 then block.call(row.model, row.csv_attributes) + else + raise ArgumentError, "after_save block of arity #{ block.arity } is not supported" + end end end end @@ -102,8 +113,20 @@ def add_to_report(row, tags) raise ImportAborted if abort_when_invalid? && tags[1] == :failure end + def full_transaction(&block) + if sql_transaction_all_rows? + rows.first.model.class.transaction(&block) + else + block.call + end + end + def transaction(&block) - rows.first.model.class.transaction(&block) + if sql_transaction_one_row? + rows.first.model.class.transaction(&block) + else + block.call + end end end end From 1a8cd10aa103ce53260d3ceb19cfca52a000ef6f Mon Sep 17 00:00:00 2001 From: tracy loisel Date: Tue, 26 Sep 2023 12:49:46 +0200 Subject: [PATCH 4/8] fix dsl sql_transaction --- lib/csv_importer/config.rb | 2 ++ lib/csv_importer/dsl.rb | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/lib/csv_importer/config.rb b/lib/csv_importer/config.rb index 1100b7a..127242f 100644 --- a/lib/csv_importer/config.rb +++ b/lib/csv_importer/config.rb @@ -9,6 +9,7 @@ class Config attribute :when_invalid, Symbol, default: proc { :skip } attribute :after_build_blocks, Array[Proc], default: [] attribute :after_save_blocks, Array[Proc], default: [] + attribute :sql_transaction, Symbol, default: proc { :all_rows } def initialize_copy(orig) super @@ -16,6 +17,7 @@ def initialize_copy(orig) self.identifiers = orig.identifiers.dup self.after_save_blocks = orig.after_save_blocks.dup self.after_build_blocks = orig.after_build_blocks.dup + self.sql_transaction = orig.sql_transaction.dup end def after_build(block) diff --git a/lib/csv_importer/dsl.rb b/lib/csv_importer/dsl.rb index 2ca6e72..f0f27e9 100644 --- a/lib/csv_importer/dsl.rb +++ b/lib/csv_importer/dsl.rb @@ -27,5 +27,9 @@ def after_build(&block) def after_save(&block) config.after_save(block) end + + def sql_transaction(behavior) + config.sql_transaction = behavior + end end end From d9ade95b678730986377f3a6b4b89440c2073093 Mon Sep 17 00:00:00 2001 From: tracy loisel Date: Tue, 10 Oct 2023 10:44:15 +0200 Subject: [PATCH 5/8] rm sql_transaction config --- lib/csv_importer/config.rb | 2 -- lib/csv_importer/dsl.rb | 4 --- lib/csv_importer/runner.rb | 73 +++++++++++++------------------------- 3 files changed, 25 insertions(+), 54 deletions(-) diff --git a/lib/csv_importer/config.rb b/lib/csv_importer/config.rb index 127242f..1100b7a 100644 --- a/lib/csv_importer/config.rb +++ b/lib/csv_importer/config.rb @@ -9,7 +9,6 @@ class Config attribute :when_invalid, Symbol, default: proc { :skip } attribute :after_build_blocks, Array[Proc], default: [] attribute :after_save_blocks, Array[Proc], default: [] - attribute :sql_transaction, Symbol, default: proc { :all_rows } def initialize_copy(orig) super @@ -17,7 +16,6 @@ def initialize_copy(orig) self.identifiers = orig.identifiers.dup self.after_save_blocks = orig.after_save_blocks.dup self.after_build_blocks = orig.after_build_blocks.dup - self.sql_transaction = orig.sql_transaction.dup end def after_build(block) diff --git a/lib/csv_importer/dsl.rb b/lib/csv_importer/dsl.rb index f0f27e9..2ca6e72 100644 --- a/lib/csv_importer/dsl.rb +++ b/lib/csv_importer/dsl.rb @@ -27,9 +27,5 @@ def after_build(&block) def after_save(&block) config.after_save(block) end - - def sql_transaction(behavior) - config.sql_transaction = behavior - end end end diff --git a/lib/csv_importer/runner.rb b/lib/csv_importer/runner.rb index 79bc11e..e2f976f 100644 --- a/lib/csv_importer/runner.rb +++ b/lib/csv_importer/runner.rb @@ -12,7 +12,6 @@ def self.call(*args) attribute :rows, Array[Row] attribute :when_invalid, Symbol attribute :after_save_blocks, Array[Proc], default: [] - attribute :sql_transaction, Symbol attribute :report, Report, default: proc { Report.new } @@ -42,48 +41,38 @@ def abort_when_invalid? when_invalid == :abort end - def sql_transaction_all_rows? - sql_transaction == :all_rows - end - - def sql_transaction_one_row? - sql_transaction == :each_row - end - def persist_rows! - full_transaction do + transaction do rows.each do |row| - transaction do - tags = [] + tags = [] - if row.model.persisted? - tags << :update - else - tags << :create - end + if row.model.persisted? + tags << :update + else + tags << :create + end - if row.skip? - tags << :skip - elsif row.errors.size > 0 - tags << :failure + if row.skip? + tags << :skip + elsif row.errors.size > 0 + tags << :failure + else + if row.model.save + tags << :success else - if row.model.save - tags << :success - else - tags << :failure - end + tags << :failure end + end - add_to_report(row, tags) + add_to_report(row, tags) - after_save_blocks.each do |block| - case block.arity - when 0 then block.call - when 1 then block.call(row.model) - when 2 then block.call(row.model, row.csv_attributes) - else - raise ArgumentError, "after_save block of arity #{ block.arity } is not supported" - end + after_save_blocks.each do |block| + case block.arity + when 0 then block.call + when 1 then block.call(row.model) + when 2 then block.call(row.model, row.csv_attributes) + else + raise ArgumentError, "after_save block of arity #{ block.arity } is not supported" end end end @@ -113,20 +102,8 @@ def add_to_report(row, tags) raise ImportAborted if abort_when_invalid? && tags[1] == :failure end - def full_transaction(&block) - if sql_transaction_all_rows? - rows.first.model.class.transaction(&block) - else - block.call - end - end - def transaction(&block) - if sql_transaction_one_row? - rows.first.model.class.transaction(&block) - else - block.call - end + rows.first.model.class.transaction(&block) end end end From 4ec1c2c78327f5ea58b56353241ea4c7097bc3ee Mon Sep 17 00:00:00 2001 From: tracy loisel Date: Tue, 12 Nov 2024 20:09:53 +0100 Subject: [PATCH 6/8] read columns based on the order of dcolumn definitions and not order from csv file --- lib/csv_importer/column.rb | 1 + lib/csv_importer/header.rb | 6 +++++- lib/csv_importer/row.rb | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/csv_importer/column.rb b/lib/csv_importer/column.rb index 1a69773..7aa3aa6 100644 --- a/lib/csv_importer/column.rb +++ b/lib/csv_importer/column.rb @@ -6,5 +6,6 @@ class Column attribute :name, String attribute :definition, ColumnDefinition + attribute :rank, Integer, default: 0 end end diff --git a/lib/csv_importer/header.rb b/lib/csv_importer/header.rb index 1b96c3c..a7312d9 100644 --- a/lib/csv_importer/header.rb +++ b/lib/csv_importer/header.rb @@ -8,13 +8,17 @@ class Header attribute :column_names, Array[String] def columns + max_column = column_names.size + column_names.map do |column_name| # ensure column name escapes invisible characters column_name = column_name.gsub(/\P{Print}|\p{Cf}/, '') + rank = column_definitions.index { |definition| definition.match?(column_name) } Column.new( name: column_name, - definition: find_column_definition(column_name) + definition: find_column_definition(column_name), + rank: rank || max_column ) end end diff --git a/lib/csv_importer/row.rb b/lib/csv_importer/row.rb index eb9021c..a9d3249 100644 --- a/lib/csv_importer/row.rb +++ b/lib/csv_importer/row.rb @@ -33,7 +33,7 @@ def csv_attributes # Set attributes def set_attributes(model) - header.columns.each do |column| + header.columns.sort_by(&:rank).each do |column| value = csv_attributes[column.name] begin value = value.dup if value From e3363bfa8fb58ff6e735fd962f268683541b481d Mon Sep 17 00:00:00 2001 From: tracy loisel Date: Mon, 18 Nov 2024 14:39:36 +0100 Subject: [PATCH 7/8] if a column has no definition, its rank will be the last one --- lib/csv_importer/header.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/csv_importer/header.rb b/lib/csv_importer/header.rb index a7312d9..6968995 100644 --- a/lib/csv_importer/header.rb +++ b/lib/csv_importer/header.rb @@ -13,6 +13,10 @@ def columns column_names.map do |column_name| # ensure column name escapes invisible characters column_name = column_name.gsub(/\P{Print}|\p{Cf}/, '') + + # the column will be processed not in the order found in the csv + # but in the order of the importation code + # first column declared, first column processed rank = column_definitions.index { |definition| definition.match?(column_name) } Column.new( From f5aa50e38e5bde637df8b16e74b27bf0bc3c3698 Mon Sep 17 00:00:00 2001 From: tracy loisel Date: Fri, 13 Dec 2024 18:08:20 +0100 Subject: [PATCH 8/8] fix issue with max_columns in header columns configuration --- lib/csv_importer/header.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/csv_importer/header.rb b/lib/csv_importer/header.rb index 6968995..d3ce792 100644 --- a/lib/csv_importer/header.rb +++ b/lib/csv_importer/header.rb @@ -8,7 +8,7 @@ class Header attribute :column_names, Array[String] def columns - max_column = column_names.size + max_column = column_definitions.size column_names.map do |column_name| # ensure column name escapes invisible characters