@@ -17,14 +17,17 @@ use std::io::BufRead;
1717use std:: io:: Cursor ;
1818use std:: ops:: Not ;
1919use std:: sync:: Arc ;
20+ use std:: time:: Instant ;
2021
2122use aho_corasick:: AhoCorasick ;
2223use common_ast:: ast:: Expr ;
2324use common_ast:: parser:: parse_comma_separated_exprs;
2425use common_ast:: parser:: tokenize_sql;
2526use common_ast:: Backtrace ;
2627use common_base:: runtime:: GlobalIORuntime ;
28+ use common_catalog:: plan:: StageTableInfo ;
2729use common_catalog:: table:: AppendMode ;
30+ use common_catalog:: table_context:: StageAttachment ;
2831use common_datablocks:: DataBlock ;
2932use common_datavalues:: prelude:: * ;
3033use common_exception:: ErrorCode ;
@@ -33,19 +36,26 @@ use common_formats::parse_timezone;
3336use common_formats:: FastFieldDecoderValues ;
3437use common_io:: cursor_ext:: ReadBytesExt ;
3538use common_io:: cursor_ext:: ReadCheckPointExt ;
39+ use common_meta_types:: UserStageInfo ;
40+ use common_pipeline_core:: Pipeline ;
3641use common_pipeline_sources:: processors:: sources:: AsyncSource ;
3742use common_pipeline_sources:: processors:: sources:: AsyncSourcer ;
3843use common_pipeline_transforms:: processors:: transforms:: Transform ;
3944use common_sql:: evaluator:: ChunkOperator ;
4045use common_sql:: evaluator:: CompoundChunkOperator ;
46+ use common_sql:: executor:: table_read_plan:: ToReadDataSourcePlan ;
4147use common_sql:: Metadata ;
4248use common_sql:: MetadataRef ;
49+ use common_storages_factory:: Table ;
50+ use common_storages_stage:: StageTable ;
51+ use common_users:: UserApiProvider ;
4352use parking_lot:: Mutex ;
4453use parking_lot:: RwLock ;
4554
4655use crate :: interpreters:: common:: append2table;
4756use crate :: interpreters:: Interpreter ;
4857use crate :: interpreters:: InterpreterPtr ;
58+ use crate :: pipelines:: processors:: TransformAddOn ;
4959use crate :: pipelines:: PipelineBuildResult ;
5060use crate :: pipelines:: SourcePipeBuilder ;
5161use crate :: schedulers:: build_query_pipeline;
@@ -101,6 +111,100 @@ impl InsertInterpreterV2 {
101111 let cast_needed = select_schema != * output_schema;
102112 Ok ( cast_needed)
103113 }
114+
115+ async fn build_insert_from_stage_pipeline (
116+ & self ,
117+ table : Arc < dyn Table > ,
118+ attachment : Arc < StageAttachment > ,
119+ pipeline : & mut Pipeline ,
120+ ) -> Result < ( ) > {
121+ let start = Instant :: now ( ) ;
122+ let ctx = self . ctx . clone ( ) ;
123+ let table_ctx: Arc < dyn TableContext > = ctx. clone ( ) ;
124+ let source_schema = self . plan . schema ( ) ;
125+ let target_schema = table. schema ( ) ;
126+ let catalog_name = self . plan . catalog . clone ( ) ;
127+ let overwrite = self . plan . overwrite ;
128+
129+ let ( mut stage_info, path) = parse_stage_location ( & self . ctx , & attachment. location ) . await ?;
130+ stage_info. apply_format_options ( & attachment. format_options ) ?;
131+ stage_info. apply_copy_options ( & attachment. copy_options ) ?;
132+
133+ let mut stage_table_info = StageTableInfo {
134+ schema : source_schema. clone ( ) ,
135+ user_stage_info : stage_info,
136+ path : path. to_string ( ) ,
137+ files : vec ! [ ] ,
138+ pattern : "" . to_string ( ) ,
139+ files_to_copy : None ,
140+ } ;
141+
142+ let all_source_file_infos = StageTable :: list_files ( & table_ctx, & stage_table_info) . await ?;
143+
144+ tracing:: info!(
145+ "insert: read all stage attachment files finished: {}, elapsed:{}" ,
146+ all_source_file_infos. len( ) ,
147+ start. elapsed( ) . as_secs( )
148+ ) ;
149+
150+ stage_table_info. files_to_copy = Some ( all_source_file_infos. clone ( ) ) ;
151+ let stage_table = StageTable :: try_create ( stage_table_info. clone ( ) ) ?;
152+ let read_source_plan = {
153+ stage_table
154+ . read_plan_with_catalog ( ctx. clone ( ) , catalog_name, None )
155+ . await ?
156+ } ;
157+
158+ stage_table. read_data ( table_ctx, & read_source_plan, pipeline) ?;
159+
160+ let need_fill_missing_columns = target_schema != source_schema;
161+ if need_fill_missing_columns {
162+ pipeline. add_transform ( |transform_input_port, transform_output_port| {
163+ TransformAddOn :: try_create (
164+ transform_input_port,
165+ transform_output_port,
166+ source_schema. clone ( ) ,
167+ target_schema. clone ( ) ,
168+ ctx. clone ( ) ,
169+ )
170+ } ) ?;
171+ }
172+ table. append_data ( ctx. clone ( ) , pipeline, AppendMode :: Copy , false ) ?;
173+
174+ pipeline. set_on_finished ( move |may_error| {
175+ // capture out variable
176+ let overwrite = overwrite;
177+ let ctx = ctx. clone ( ) ;
178+ let table = table. clone ( ) ;
179+
180+ match may_error {
181+ Some ( error) => {
182+ tracing:: error!( "insert stage file error: {}" , error) ;
183+ Err ( may_error. as_ref ( ) . unwrap ( ) . clone ( ) )
184+ }
185+ None => {
186+ let append_entries = ctx. consume_precommit_blocks ( ) ;
187+ // We must put the commit operation to global runtime, which will avoid the "dispatch dropped without returning error" in tower
188+ GlobalIORuntime :: instance ( ) . block_on ( async move {
189+ tracing:: info!(
190+ "insert: try to commit append entries:{}, elapsed:{}" ,
191+ append_entries. len( ) ,
192+ start. elapsed( ) . as_secs( )
193+ ) ;
194+ table
195+ . commit_insertion ( ctx, append_entries, overwrite)
196+ . await ?;
197+
198+ // TODO:(everpcpc) purge copied files
199+
200+ Ok ( ( ) )
201+ } )
202+ }
203+ }
204+ } ) ;
205+
206+ Ok ( ( ) )
207+ }
104208}
105209
106210#[ async_trait:: async_trait]
@@ -156,6 +260,16 @@ impl Interpreter for InsertInterpreterV2 {
156260 . format
157261 . exec_stream ( input_context. clone ( ) , & mut build_res. main_pipeline ) ?;
158262 }
263+ InsertInputSource :: Stage ( opts) => {
264+ tracing:: info!( "insert: from stage with options {:?}" , opts) ;
265+ self . build_insert_from_stage_pipeline (
266+ table. clone ( ) ,
267+ opts. clone ( ) ,
268+ & mut build_res. main_pipeline ,
269+ )
270+ . await ?;
271+ return Ok ( build_res) ;
272+ }
159273 InsertInputSource :: SelectPlan ( plan) => {
160274 let table1 = table. clone ( ) ;
161275 let ( mut select_plan, select_column_bindings) = match plan. as_ref ( ) {
@@ -602,3 +716,26 @@ async fn exprs_to_datavalue<'a>(
602716 let datavalues: Vec < DataValue > = res. columns ( ) . iter ( ) . skip ( 1 ) . map ( |col| col. get ( 0 ) ) . collect ( ) ;
603717 Ok ( datavalues)
604718}
719+
720+ // TODO:(everpcpc) tmp copy from src/query/sql/src/planner/binder/copy.rs
721+ // move to user stage module
722+ async fn parse_stage_location (
723+ ctx : & Arc < QueryContext > ,
724+ location : & str ,
725+ ) -> Result < ( UserStageInfo , String ) > {
726+ let s: Vec < & str > = location. split ( '@' ) . collect ( ) ;
727+ // @my_ext_stage/abc/
728+ let names: Vec < & str > = s[ 1 ] . splitn ( 2 , '/' ) . filter ( |v| !v. is_empty ( ) ) . collect ( ) ;
729+
730+ let stage = if names[ 0 ] == "~" {
731+ UserStageInfo :: new_user_stage ( & ctx. get_current_user ( ) ?. name )
732+ } else {
733+ UserApiProvider :: instance ( )
734+ . get_stage ( & ctx. get_tenant ( ) , names[ 0 ] )
735+ . await ?
736+ } ;
737+
738+ let path = names. get ( 1 ) . unwrap_or ( & "" ) . trim_start_matches ( '/' ) ;
739+
740+ Ok ( ( stage, path. to_string ( ) ) )
741+ }
0 commit comments