Skip to content

Commit 9f12f4a

Browse files
authored
Merge pull request ClickHouse#33302 from Avogar/formats-with-suffixes
Allow to create new files on insert for File/S3/HDFS engines
2 parents b9fb741 + 1f49acc commit 9f12f4a

25 files changed

+634
-94
lines changed

src/Common/ErrorCodes.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,7 @@
609609
M(638, SNAPPY_UNCOMPRESS_FAILED) \
610610
M(639, SNAPPY_COMPRESS_FAILED) \
611611
M(640, NO_HIVEMETASTORE) \
612+
M(641, CANNOT_APPEND_TO_FILE) \
612613
\
613614
M(999, KEEPER_EXCEPTION) \
614615
M(1000, POCO_EXCEPTION) \

src/Core/Settings.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,11 @@ class IColumn;
7575
M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \
7676
M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \
7777
M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \
78+
M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \
79+
M(Bool, s3_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in s3 engine tables", 0) \
7880
M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \
81+
M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \
82+
M(Bool, hdfs_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in hdfs engine tables", 0) \
7983
M(UInt64, hsts_max_age, 0, "Expired time for hsts. 0 means disable HSTS.", 0) \
8084
M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \
8185
M(Bool, use_uncompressed_cache, false, "Whether to use the cache of uncompressed blocks.", 0) \
@@ -490,6 +494,7 @@ class IColumn;
490494
\
491495
M(Bool, engine_file_empty_if_not_exists, false, "Allows to select data from a file engine table without file", 0) \
492496
M(Bool, engine_file_truncate_on_insert, false, "Enables or disables truncate before insert in file engine tables", 0) \
497+
M(Bool, engine_file_allow_create_multiple_files, false, "Enables or disables creating a new file on each insert in file engine tables if format has suffix.", 0) \
493498
M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \
494499
M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \
495500
M(UInt64, max_distributed_depth, 5, "Maximum distributed query depth", 0) \

src/Formats/FormatFactory.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,27 @@ void FormatFactory::registerNonTrivialPrefixAndSuffixChecker(const String & name
394394
target = std::move(non_trivial_prefix_and_suffix_checker);
395395
}
396396

397+
void FormatFactory::registerAppendSupportChecker(const String & name, AppendSupportChecker append_support_checker)
398+
{
399+
auto & target = dict[name].append_support_checker;
400+
if (target)
401+
throw Exception("FormatFactory: Suffix checker " + name + " is already registered", ErrorCodes::LOGICAL_ERROR);
402+
target = std::move(append_support_checker);
403+
}
404+
405+
void FormatFactory::markFormatHasNoAppendSupport(const String & name)
406+
{
407+
registerAppendSupportChecker(name, [](const FormatSettings &){ return false; });
408+
}
409+
410+
bool FormatFactory::checkIfFormatSupportAppend(const String & name, ContextPtr context, const std::optional<FormatSettings> & format_settings_)
411+
{
412+
auto format_settings = format_settings_ ? *format_settings_ : getFormatSettings(context);
413+
auto & append_support_checker = dict[name].append_support_checker;
414+
/// By default we consider that format supports append
415+
return !append_support_checker || append_support_checker(format_settings);
416+
}
417+
397418
void FormatFactory::registerOutputFormat(const String & name, OutputCreator output_creator)
398419
{
399420
auto & target = dict[name].output_creator;

src/Formats/FormatFactory.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,10 @@ class FormatFactory final : private boost::noncopyable
9393
/// The checker should return true if parallel parsing should be disabled.
9494
using NonTrivialPrefixAndSuffixChecker = std::function<bool(ReadBuffer & buf)>;
9595

96+
/// Some formats can support append depending on settings.
97+
/// The checker should return true if format support append.
98+
using AppendSupportChecker = std::function<bool(const FormatSettings & settings)>;
99+
96100
using SchemaReaderCreator = std::function<SchemaReaderPtr(ReadBuffer & in, const FormatSettings & settings, ContextPtr context)>;
97101
using ExternalSchemaReaderCreator = std::function<ExternalSchemaReaderPtr(const FormatSettings & settings)>;
98102

@@ -106,6 +110,7 @@ class FormatFactory final : private boost::noncopyable
106110
bool supports_parallel_formatting{false};
107111
bool is_column_oriented{false};
108112
NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker;
113+
AppendSupportChecker append_support_checker;
109114
};
110115

111116
using FormatsDictionary = std::unordered_map<String, Creators>;
@@ -167,6 +172,14 @@ class FormatFactory final : private boost::noncopyable
167172

168173
void registerNonTrivialPrefixAndSuffixChecker(const String & name, NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker);
169174

175+
void registerAppendSupportChecker(const String & name, AppendSupportChecker append_support_checker);
176+
177+
/// If format always doesn't support append, you can use this method instead of
178+
/// registerAppendSupportChecker with append_support_checker that always returns true.
179+
void markFormatHasNoAppendSupport(const String & name);
180+
181+
bool checkIfFormatSupportAppend(const String & name, ContextPtr context, const std::optional<FormatSettings> & format_settings_ = std::nullopt);
182+
170183
/// Register format by its name.
171184
void registerInputFormat(const String & name, InputCreator input_creator);
172185
void registerOutputFormat(const String & name, OutputCreator output_creator);

src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ void registerOutputFormatArrow(FormatFactory & factory)
9393
{
9494
return std::make_shared<ArrowBlockOutputFormat>(buf, sample, false, format_settings);
9595
});
96+
factory.markFormatHasNoAppendSupport("Arrow");
9697

9798
factory.registerOutputFormat(
9899
"ArrowStream",
@@ -103,6 +104,7 @@ void registerOutputFormatArrow(FormatFactory & factory)
103104
{
104105
return std::make_shared<ArrowBlockOutputFormat>(buf, sample, true, format_settings);
105106
});
107+
factory.markFormatHasNoAppendSupport("ArrowStream");
106108
}
107109

108110
}

src/Processors/Formats/Impl/AvroRowOutputFormat.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,7 @@ void registerOutputFormatAvro(FormatFactory & factory)
479479
{
480480
return std::make_shared<AvroRowOutputFormat>(buf, sample, params, settings);
481481
});
482+
factory.markFormatHasNoAppendSupport("Avro");
482483
}
483484

484485
}

src/Processors/Formats/Impl/CustomSeparatedRowOutputFormat.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,11 @@ void registerOutputFormatCustomSeparated(FormatFactory & factory)
9191
});
9292

9393
factory.markOutputFormatSupportsParallelFormatting(format_name);
94+
95+
factory.registerAppendSupportChecker(format_name, [](const FormatSettings & settings)
96+
{
97+
return settings.custom.result_after_delimiter.empty();
98+
});
9499
};
95100

96101
registerWithNamesAndTypes("CustomSeparated", register_func);

src/Processors/Formats/Impl/JSONRowOutputFormat.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ void registerOutputFormatJSON(FormatFactory & factory)
284284
});
285285

286286
factory.markOutputFormatSupportsParallelFormatting("JSON");
287+
factory.markFormatHasNoAppendSupport("JSON");
287288

288289
factory.registerOutputFormat("JSONStrings", [](
289290
WriteBuffer & buf,
@@ -295,6 +296,7 @@ void registerOutputFormatJSON(FormatFactory & factory)
295296
});
296297

297298
factory.markOutputFormatSupportsParallelFormatting("JSONStrings");
299+
factory.markFormatHasNoAppendSupport("JSONStrings");
298300
}
299301

300302
}

src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,7 @@ void registerOutputFormatORC(FormatFactory & factory)
526526
{
527527
return std::make_shared<ORCBlockOutputFormat>(buf, sample, format_settings);
528528
});
529+
factory.markFormatHasNoAppendSupport("ORC");
529530
}
530531

531532
}

src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ void registerOutputFormatParquet(FormatFactory & factory)
8585
{
8686
return std::make_shared<ParquetBlockOutputFormat>(buf, sample, format_settings);
8787
});
88+
factory.markFormatHasNoAppendSupport("Parquet");
8889
}
8990

9091
}

0 commit comments

Comments
 (0)