@@ -14,7 +14,7 @@ use crate::{LlvmCodegenBackend, SimpleCx, attributes};
1414
1515pub ( crate ) fn handle_gpu_code < ' ll > (
1616 _cgcx : & CodegenContext < LlvmCodegenBackend > ,
17- _cx : & ' ll SimpleCx < ' _ > ,
17+ cx : & ' ll SimpleCx < ' _ > ,
1818) {
1919 /*
2020 // The offload memory transfer type for each kernel
@@ -256,15 +256,14 @@ pub(crate) fn add_global<'ll>(
256256// This function returns a memtransfer value which encodes how arguments to this kernel shall be
257257// mapped to/from the gpu. It also returns a region_id with the name of this kernel, to be
258258// concatenated into the list of region_ids.
259- pub ( crate ) fn gen_define_handling < ' ll , ' tcx > (
259+ pub ( crate ) fn gen_define_handling < ' ll > (
260260 cx : & SimpleCx < ' ll > ,
261- tcx : TyCtxt < ' tcx > ,
262- kernel : & ' ll llvm:: Value ,
261+ llfn : & ' ll llvm:: Value ,
263262 offload_entry_ty : & ' ll llvm:: Type ,
264- metadata : Vec < OffloadMetadata > ,
263+ metadata : & Vec < OffloadMetadata > ,
265264 symbol : & str ,
266265) -> ( & ' ll llvm:: Value , & ' ll llvm:: Value ) {
267- let types = cx. func_params_types ( cx. get_type_of_global ( kernel ) ) ;
266+ let types = cx. func_params_types ( cx. get_type_of_global ( llfn ) ) ;
268267 // It seems like non-pointer values are automatically mapped. So here, we focus on pointer (or
269268 // reference) types.
270269 let ptr_meta = types
@@ -274,7 +273,7 @@ pub(crate) fn gen_define_handling<'ll, 'tcx>(
274273 rustc_codegen_ssa:: common:: TypeKind :: Pointer => Some ( meta) ,
275274 _ => None ,
276275 } )
277- . collect :: < Vec < OffloadMetadata > > ( ) ;
276+ . collect :: < Vec < _ > > ( ) ;
278277
279278 let ptr_sizes = ptr_meta. iter ( ) . map ( |m| m. payload_size ) . collect :: < Vec < _ > > ( ) ;
280279 let ptr_transfer = ptr_meta. iter ( ) . map ( |m| m. mode as u64 | 0x20 ) . collect :: < Vec < _ > > ( ) ;
@@ -283,7 +282,7 @@ pub(crate) fn gen_define_handling<'ll, 'tcx>(
283282 // A follow-up pr will track these from the frontend, where we still have Rust types.
284283 // Then, we will be able to figure out that e.g. `&[f32;256]` will result in 4*256 bytes.
285284 // I decided that 1024 bytes is a great placeholder value for now.
286- add_priv_unnamed_arr ( & cx, & format ! ( ".offload_sizes.{symbol}" ) , & ptr_sizes) ;
285+ let offload_sizes = add_priv_unnamed_arr ( & cx, & format ! ( ".offload_sizes.{symbol}" ) , & ptr_sizes) ;
287286 // Here we figure out whether something needs to be copied to the gpu (=1), from the gpu (=2),
288287 // or both to and from the gpu (=3). Other values shouldn't affect us for now.
289288 // A non-mutable reference or pointer will be 1, an array that's not read, but fully overwritten
@@ -323,6 +322,8 @@ pub(crate) fn gen_define_handling<'ll, 'tcx>(
323322 llvm:: set_alignment ( llglobal, Align :: EIGHT ) ;
324323 let c_section_name = CString :: new ( "llvm_offload_entries" ) . unwrap ( ) ;
325324 llvm:: set_section ( llglobal, & c_section_name) ;
325+
326+ add_to_llvm_used ( cx, & [ offload_sizes, memtransfer_types, region_id, llglobal] ) ;
326327 ( memtransfer_types, region_id)
327328}
328329
@@ -364,11 +365,10 @@ fn declare_offload_fn<'ll>(
364365pub ( crate ) fn gen_call_handling < ' ll > (
365366 cx : & SimpleCx < ' ll > ,
366367 bb : & BasicBlock ,
367- kernel : & ' ll llvm:: Value ,
368368 memtransfer_types : & [ & ' ll llvm:: Value ] ,
369369 region_ids : & [ & ' ll llvm:: Value ] ,
370370 llfn : & ' ll Value ,
371- metadata : Vec < OffloadMetadata > ,
371+ metadata : & Vec < OffloadMetadata > ,
372372) {
373373 let ( tgt_decl, tgt_target_kernel_ty) = generate_launcher ( & cx) ;
374374 // %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
@@ -383,7 +383,7 @@ pub(crate) fn gen_call_handling<'ll>(
383383
384384 let mut builder = SBuilder :: build ( cx, bb) ;
385385
386- let types = cx. func_params_types ( cx. get_type_of_global ( kernel ) ) ;
386+ let types = cx. func_params_types ( cx. get_type_of_global ( llfn ) ) ;
387387 let num_args = types. len ( ) as u64 ;
388388
389389 // Step 0)
@@ -439,7 +439,7 @@ pub(crate) fn gen_call_handling<'ll>(
439439 // As mentioned above, we don't use Rust type information yet. So for now we will just
440440 // assume that we have 1024 bytes, 256 f32 values.
441441 // FIXME(offload): write an offload frontend and handle arbitrary types.
442- builder. store ( cx. get_const_i64 ( metadata[ i] . payload_size ) , gep3, Align :: EIGHT ) ;
442+ builder. store ( cx. get_const_i64 ( metadata[ i as usize ] . payload_size ) , gep3, Align :: EIGHT ) ;
443443 }
444444
445445 // For now we have a very simplistic indexing scheme into our
@@ -514,3 +514,41 @@ pub(crate) fn gen_call_handling<'ll>(
514514
515515 drop ( builder) ;
516516}
517+
518+ // TODO(Sa4dUs): check if there's a better way of doing this, also move to a proper location
519+ fn add_to_llvm_used < ' ll > ( cx : & ' ll SimpleCx < ' _ > , globals : & [ & ' ll Value ] ) {
520+ let ptr_ty = cx. type_ptr ( ) ;
521+ let arr_ty = cx. type_array ( ptr_ty, globals. len ( ) as u64 ) ;
522+ let arr_val = cx. const_array ( ptr_ty, globals) ;
523+
524+ let name = CString :: new ( "llvm.used" ) . unwrap ( ) ;
525+
526+ let used_global_opt = unsafe { llvm:: LLVMGetNamedGlobal ( cx. llmod , name. as_ptr ( ) ) } ;
527+
528+ if used_global_opt. is_none ( ) {
529+ let new_global = unsafe { llvm:: LLVMAddGlobal ( cx. llmod , arr_ty, name. as_ptr ( ) ) } ;
530+ unsafe { llvm:: LLVMSetLinkage ( new_global, llvm:: Linkage :: AppendingLinkage ) } ;
531+ unsafe {
532+ llvm:: LLVMSetSection ( new_global, CString :: new ( "llvm.metadata" ) . unwrap ( ) . as_ptr ( ) )
533+ } ;
534+ unsafe { llvm:: LLVMSetInitializer ( new_global, arr_val) } ;
535+ llvm:: LLVMSetGlobalConstant ( new_global, llvm:: TRUE ) ;
536+ return ;
537+ }
538+
539+ let used_global = used_global_opt. expect ( "expected @llvm.used" ) ;
540+ let mut combined: Vec < & ' ll Value > = Vec :: new ( ) ;
541+
542+ if let Some ( existing_init) = llvm:: LLVMGetInitializer ( used_global) {
543+ let num_elems = unsafe { llvm:: LLVMGetNumOperands ( existing_init) } ;
544+ for i in 0 ..num_elems {
545+ if let Some ( elem) = unsafe { llvm:: LLVMGetOperand ( existing_init, i) } {
546+ combined. push ( elem) ;
547+ }
548+ }
549+ }
550+
551+ combined. extend_from_slice ( globals) ;
552+ let new_arr = cx. const_array ( ptr_ty, & combined) ;
553+ unsafe { llvm:: LLVMSetInitializer ( used_global, new_arr) } ;
554+ }
0 commit comments