@@ -14,7 +14,7 @@ use crate::{LlvmCodegenBackend, SimpleCx, attributes};
1414
1515pub ( crate ) fn handle_gpu_code < ' ll > (
1616 _cgcx : & CodegenContext < LlvmCodegenBackend > ,
17- _cx : & ' ll SimpleCx < ' _ > ,
17+ cx : & ' ll SimpleCx < ' _ > ,
1818) {
1919 /*
2020 // The offload memory transfer type for each kernel
@@ -259,15 +259,14 @@ pub(crate) fn add_global<'ll>(
259259// This function returns a memtransfer value which encodes how arguments to this kernel shall be
260260// mapped to/from the gpu. It also returns a region_id with the name of this kernel, to be
261261// concatenated into the list of region_ids.
262- pub ( crate ) fn gen_define_handling < ' ll , ' tcx > (
262+ pub ( crate ) fn gen_define_handling < ' ll > (
263263 cx : & SimpleCx < ' ll > ,
264- tcx : TyCtxt < ' tcx > ,
265- kernel : & ' ll llvm:: Value ,
264+ llfn : & ' ll llvm:: Value ,
266265 offload_entry_ty : & ' ll llvm:: Type ,
267- metadata : Vec < OffloadMetadata > ,
266+ metadata : & Vec < OffloadMetadata > ,
268267 symbol : & str ,
269268) -> ( & ' ll llvm:: Value , & ' ll llvm:: Value ) {
270- let types = cx. func_params_types ( cx. get_type_of_global ( kernel ) ) ;
269+ let types = cx. func_params_types ( cx. get_type_of_global ( llfn ) ) ;
271270 // It seems like non-pointer values are automatically mapped. So here, we focus on pointer (or
272271 // reference) types.
273272 let ptr_meta = types
@@ -277,7 +276,7 @@ pub(crate) fn gen_define_handling<'ll, 'tcx>(
277276 rustc_codegen_ssa:: common:: TypeKind :: Pointer => Some ( meta) ,
278277 _ => None ,
279278 } )
280- . collect :: < Vec < OffloadMetadata > > ( ) ;
279+ . collect :: < Vec < _ > > ( ) ;
281280
282281 let ptr_sizes = ptr_meta. iter ( ) . map ( |m| m. payload_size ) . collect :: < Vec < _ > > ( ) ;
283282 let ptr_transfer = ptr_meta. iter ( ) . map ( |m| m. mode as u64 | 0x20 ) . collect :: < Vec < _ > > ( ) ;
@@ -286,7 +285,7 @@ pub(crate) fn gen_define_handling<'ll, 'tcx>(
286285 // A follow-up pr will track these from the frontend, where we still have Rust types.
287286 // Then, we will be able to figure out that e.g. `&[f32;256]` will result in 4*256 bytes.
288287 // I decided that 1024 bytes is a great placeholder value for now.
289- add_priv_unnamed_arr ( & cx, & format ! ( ".offload_sizes.{symbol}" ) , & ptr_sizes) ;
288+ let offload_sizes = add_priv_unnamed_arr ( & cx, & format ! ( ".offload_sizes.{symbol}" ) , & ptr_sizes) ;
290289 // Here we figure out whether something needs to be copied to the gpu (=1), from the gpu (=2),
291290 // or both to and from the gpu (=3). Other values shouldn't affect us for now.
292291 // A non-mutable reference or pointer will be 1, an array that's not read, but fully overwritten
@@ -326,6 +325,8 @@ pub(crate) fn gen_define_handling<'ll, 'tcx>(
326325 llvm:: set_alignment ( llglobal, Align :: EIGHT ) ;
327326 let c_section_name = CString :: new ( "llvm_offload_entries" ) . unwrap ( ) ;
328327 llvm:: set_section ( llglobal, & c_section_name) ;
328+
329+ add_to_llvm_used ( cx, & [ offload_sizes, memtransfer_types, region_id, llglobal] ) ;
329330 ( memtransfer_types, region_id)
330331}
331332
@@ -367,11 +368,10 @@ fn declare_offload_fn<'ll>(
367368pub ( crate ) fn gen_call_handling < ' ll > (
368369 cx : & SimpleCx < ' ll > ,
369370 bb : & BasicBlock ,
370- kernel : & ' ll llvm:: Value ,
371371 memtransfer_types : & [ & ' ll llvm:: Value ] ,
372372 region_ids : & [ & ' ll llvm:: Value ] ,
373373 llfn : & ' ll Value ,
374- metadata : Vec < OffloadMetadata > ,
374+ metadata : & Vec < OffloadMetadata > ,
375375) {
376376 let ( tgt_decl, tgt_target_kernel_ty) = generate_launcher ( & cx) ;
377377 // %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
@@ -386,7 +386,7 @@ pub(crate) fn gen_call_handling<'ll>(
386386
387387 let mut builder = SBuilder :: build ( cx, bb) ;
388388
389- let types = cx. func_params_types ( cx. get_type_of_global ( kernel ) ) ;
389+ let types = cx. func_params_types ( cx. get_type_of_global ( llfn ) ) ;
390390 let num_args = types. len ( ) as u64 ;
391391
392392 // Step 0)
@@ -442,7 +442,7 @@ pub(crate) fn gen_call_handling<'ll>(
442442 // As mentioned above, we don't use Rust type information yet. So for now we will just
443443 // assume that we have 1024 bytes, 256 f32 values.
444444 // FIXME(offload): write an offload frontend and handle arbitrary types.
445- builder. store ( cx. get_const_i64 ( metadata[ i] . payload_size ) , gep3, Align :: EIGHT ) ;
445+ builder. store ( cx. get_const_i64 ( metadata[ i as usize ] . payload_size ) , gep3, Align :: EIGHT ) ;
446446 }
447447
448448 // For now we have a very simplistic indexing scheme into our
@@ -517,3 +517,41 @@ pub(crate) fn gen_call_handling<'ll>(
517517
518518 drop ( builder) ;
519519}
520+
521+ // TODO(Sa4dUs): check if there's a better way of doing this, also move to a proper location
522+ fn add_to_llvm_used < ' ll > ( cx : & ' ll SimpleCx < ' _ > , globals : & [ & ' ll Value ] ) {
523+ let ptr_ty = cx. type_ptr ( ) ;
524+ let arr_ty = cx. type_array ( ptr_ty, globals. len ( ) as u64 ) ;
525+ let arr_val = cx. const_array ( ptr_ty, globals) ;
526+
527+ let name = CString :: new ( "llvm.used" ) . unwrap ( ) ;
528+
529+ let used_global_opt = unsafe { llvm:: LLVMGetNamedGlobal ( cx. llmod , name. as_ptr ( ) ) } ;
530+
531+ if used_global_opt. is_none ( ) {
532+ let new_global = unsafe { llvm:: LLVMAddGlobal ( cx. llmod , arr_ty, name. as_ptr ( ) ) } ;
533+ unsafe { llvm:: LLVMSetLinkage ( new_global, llvm:: Linkage :: AppendingLinkage ) } ;
534+ unsafe {
535+ llvm:: LLVMSetSection ( new_global, CString :: new ( "llvm.metadata" ) . unwrap ( ) . as_ptr ( ) )
536+ } ;
537+ unsafe { llvm:: LLVMSetInitializer ( new_global, arr_val) } ;
538+ llvm:: LLVMSetGlobalConstant ( new_global, llvm:: TRUE ) ;
539+ return ;
540+ }
541+
542+ let used_global = used_global_opt. expect ( "expected @llvm.used" ) ;
543+ let mut combined: Vec < & ' ll Value > = Vec :: new ( ) ;
544+
545+ if let Some ( existing_init) = llvm:: LLVMGetInitializer ( used_global) {
546+ let num_elems = unsafe { llvm:: LLVMGetNumOperands ( existing_init) } ;
547+ for i in 0 ..num_elems {
548+ if let Some ( elem) = unsafe { llvm:: LLVMGetOperand ( existing_init, i) } {
549+ combined. push ( elem) ;
550+ }
551+ }
552+ }
553+
554+ combined. extend_from_slice ( globals) ;
555+ let new_arr = cx. const_array ( ptr_ty, & combined) ;
556+ unsafe { llvm:: LLVMSetInitializer ( used_global, new_arr) } ;
557+ }
0 commit comments