@@ -17,13 +17,16 @@ use std::io::BufRead;
1717use std:: io:: Cursor ;
1818use std:: ops:: Not ;
1919use std:: sync:: Arc ;
20+ use std:: time:: Instant ;
2021
2122use aho_corasick:: AhoCorasick ;
2223use common_ast:: ast:: Expr ;
2324use common_ast:: parser:: parse_comma_separated_exprs;
2425use common_ast:: parser:: tokenize_sql;
2526use common_ast:: Backtrace ;
2627use common_base:: runtime:: GlobalIORuntime ;
28+ use common_catalog:: plan:: StageFileStatus ;
29+ use common_catalog:: plan:: StageTableInfo ;
2730use common_catalog:: table:: AppendMode ;
2831use common_datablocks:: DataBlock ;
2932use common_datavalues:: prelude:: * ;
@@ -33,19 +36,26 @@ use common_formats::parse_timezone;
3336use common_formats:: FastFieldDecoderValues ;
3437use common_io:: cursor_ext:: ReadBytesExt ;
3538use common_io:: cursor_ext:: ReadCheckPointExt ;
39+ use common_meta_types:: UserStageInfo ;
40+ use common_pipeline_core:: Pipeline ;
3641use common_pipeline_sources:: processors:: sources:: AsyncSource ;
3742use common_pipeline_sources:: processors:: sources:: AsyncSourcer ;
3843use common_pipeline_transforms:: processors:: transforms:: Transform ;
3944use common_sql:: evaluator:: ChunkOperator ;
4045use common_sql:: evaluator:: CompoundChunkOperator ;
46+ use common_sql:: executor:: table_read_plan:: ToReadDataSourcePlan ;
4147use common_sql:: Metadata ;
4248use common_sql:: MetadataRef ;
49+ use common_storages_factory:: Table ;
50+ use common_storages_stage:: StageTable ;
51+ use common_users:: UserApiProvider ;
4352use parking_lot:: Mutex ;
4453use parking_lot:: RwLock ;
4554
4655use crate :: interpreters:: common:: append2table;
4756use crate :: interpreters:: Interpreter ;
4857use crate :: interpreters:: InterpreterPtr ;
58+ use crate :: pipelines:: processors:: TransformAddOn ;
4959use crate :: pipelines:: PipelineBuildResult ;
5060use crate :: pipelines:: SourcePipeBuilder ;
5161use crate :: schedulers:: build_query_pipeline;
@@ -101,6 +111,113 @@ impl InsertInterpreterV2 {
101111 let cast_needed = select_schema != * output_schema;
102112 Ok ( cast_needed)
103113 }
114+
115+ // TODO:(everpcpc)
116+ async fn build_insert_from_stage_pipeline (
117+ & self ,
118+ table : Arc < dyn Table > ,
119+ stage_location : & str ,
120+ pipeline : & mut Pipeline ,
121+ ) -> Result < ( ) > {
122+ let start = Instant :: now ( ) ;
123+ let ctx = self . ctx . clone ( ) ;
124+ let table_ctx: Arc < dyn TableContext > = ctx. clone ( ) ;
125+ let source_schema = self . plan . schema ( ) ;
126+ let target_schema = table. schema ( ) ;
127+ let catalog_name = self . plan . catalog . clone ( ) ;
128+ let overwrite = self . plan . overwrite ;
129+
130+ let ( stage_info, path) = parse_stage_location ( & self . ctx , stage_location) . await ?;
131+
132+ let mut stage_table_info = StageTableInfo {
133+ schema : source_schema. clone ( ) ,
134+ user_stage_info : stage_info,
135+ path : path. to_string ( ) ,
136+ files : vec ! [ ] ,
137+ pattern : "" . to_string ( ) ,
138+ files_to_copy : None ,
139+ } ;
140+
141+ let all_source_file_infos = StageTable :: list_files ( & table_ctx, & stage_table_info) . await ?;
142+
143+ // TODO: color_copied_files
144+
145+ let mut need_copied_file_infos = vec ! [ ] ;
146+ for file in & all_source_file_infos {
147+ if file. status == StageFileStatus :: NeedCopy {
148+ need_copied_file_infos. push ( file. clone ( ) ) ;
149+ }
150+ }
151+
152+ // DEBUG:
153+ tracing:: warn!(
154+ "insert: read all sideload files finished, all:{}, need copy:{}, elapsed:{}" ,
155+ all_source_file_infos. len( ) ,
156+ need_copied_file_infos. len( ) ,
157+ start. elapsed( ) . as_secs( )
158+ ) ;
159+
160+ if need_copied_file_infos. is_empty ( ) {
161+ return Ok ( ( ) ) ;
162+ }
163+
164+ stage_table_info. files_to_copy = Some ( need_copied_file_infos. clone ( ) ) ;
165+ let stage_table = StageTable :: try_create ( stage_table_info. clone ( ) ) ?;
166+ let read_source_plan = {
167+ stage_table
168+ . read_plan_with_catalog ( ctx. clone ( ) , catalog_name, None )
169+ . await ?
170+ } ;
171+
172+ stage_table. read_data ( table_ctx, & read_source_plan, pipeline) ?;
173+
174+ let need_fill_missing_columns = target_schema != source_schema;
175+ if need_fill_missing_columns {
176+ pipeline. add_transform ( |transform_input_port, transform_output_port| {
177+ TransformAddOn :: try_create (
178+ transform_input_port,
179+ transform_output_port,
180+ source_schema. clone ( ) ,
181+ target_schema. clone ( ) ,
182+ ctx. clone ( ) ,
183+ )
184+ } ) ?;
185+ }
186+ table. append_data ( ctx. clone ( ) , pipeline, AppendMode :: Copy , false ) ?;
187+
188+ pipeline. set_on_finished ( move |may_error| {
189+ // capture out variable
190+ let overwrite = overwrite;
191+ let ctx = ctx. clone ( ) ;
192+ let table = table. clone ( ) ;
193+
194+ match may_error {
195+ Some ( error) => {
196+ tracing:: error!( "insert stage file error: {}" , error) ;
197+ Err ( may_error. as_ref ( ) . unwrap ( ) . clone ( ) )
198+ }
199+ None => {
200+ let append_entries = ctx. consume_precommit_blocks ( ) ;
201+ // We must put the commit operation to global runtime, which will avoid the "dispatch dropped without returning error" in tower
202+ return GlobalIORuntime :: instance ( ) . block_on ( async move {
203+ // DEBUG:
204+ tracing:: warn!(
205+ "insert: try to commit append entries:{}, elapsed:{}" ,
206+ append_entries. len( ) ,
207+ start. elapsed( ) . as_secs( )
208+ ) ;
209+ table
210+ . commit_insertion ( ctx, append_entries, overwrite)
211+ . await ?;
212+ Ok ( ( ) )
213+ // TODO: purge copied files
214+ } ) ;
215+ }
216+ }
217+ } ) ;
218+
219+ Ok ( ( ) )
220+ }
104221}
105222
106223#[ async_trait:: async_trait]
@@ -156,6 +273,27 @@ impl Interpreter for InsertInterpreterV2 {
156273 . format
157274 . exec_stream ( input_context. clone ( ) , & mut build_res. main_pipeline ) ?;
158275 }
276+ InsertInputSource :: Sideload ( opts) => {
277+ // DEBUG:
278+ tracing:: warn!( "==> sideload insert: {:?}" , opts) ;
279+
280+ match & opts. stage {
281+ None => {
282+ return Err ( ErrorCode :: BadDataValueType (
283+ "No stage location provided" . to_string ( ) ,
284+ ) ) ;
285+ }
286+ Some ( stage_location) => {
287+ self . build_insert_from_stage_pipeline (
288+ table. clone ( ) ,
289+ stage_location,
290+ & mut build_res. main_pipeline ,
291+ )
292+ . await ?;
293+ }
294+ }
295+ return Ok ( build_res) ;
296+ }
159297 InsertInputSource :: SelectPlan ( plan) => {
160298 let table1 = table. clone ( ) ;
161299 let ( mut select_plan, select_column_bindings) = match plan. as_ref ( ) {
@@ -602,3 +740,25 @@ async fn exprs_to_datavalue<'a>(
602740 let datavalues: Vec < DataValue > = res. columns ( ) . iter ( ) . skip ( 1 ) . map ( |col| col. get ( 0 ) ) . collect ( ) ;
603741 Ok ( datavalues)
604742}
743+
744+ // FIXME: tmp copy from src/query/sql/src/planner/binder/copy.rs
745+ async fn parse_stage_location (
746+ ctx : & Arc < QueryContext > ,
747+ location : & str ,
748+ ) -> Result < ( UserStageInfo , String ) > {
749+ let s: Vec < & str > = location. split ( '@' ) . collect ( ) ;
750+ // @my_ext_stage/abc/
751+ let names: Vec < & str > = s[ 1 ] . splitn ( 2 , '/' ) . filter ( |v| !v. is_empty ( ) ) . collect ( ) ;
752+
753+ let stage = if names[ 0 ] == "~" {
754+ UserStageInfo :: new_user_stage ( & ctx. get_current_user ( ) ?. name )
755+ } else {
756+ UserApiProvider :: instance ( )
757+ . get_stage ( & ctx. get_tenant ( ) , names[ 0 ] )
758+ . await ?
759+ } ;
760+
761+ let path = names. get ( 1 ) . unwrap_or ( & "" ) . trim_start_matches ( '/' ) ;
762+
763+ Ok ( ( stage, path. to_string ( ) ) )
764+ }
0 commit comments