Skip to content

Commit e0fd7be

Browse files
committed
Mark globals as used + some minor fixes
1 parent 26ed1ad commit e0fd7be

File tree

5 files changed

+72
-68
lines changed

5 files changed

+72
-68
lines changed

compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs

Lines changed: 50 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use crate::{LlvmCodegenBackend, SimpleCx, attributes};
1414

1515
pub(crate) fn handle_gpu_code<'ll>(
1616
_cgcx: &CodegenContext<LlvmCodegenBackend>,
17-
_cx: &'ll SimpleCx<'_>,
17+
cx: &'ll SimpleCx<'_>,
1818
) {
1919
/*
2020
// The offload memory transfer type for each kernel
@@ -256,15 +256,14 @@ pub(crate) fn add_global<'ll>(
256256
// This function returns a memtransfer value which encodes how arguments to this kernel shall be
257257
// mapped to/from the gpu. It also returns a region_id with the name of this kernel, to be
258258
// concatenated into the list of region_ids.
259-
pub(crate) fn gen_define_handling<'ll, 'tcx>(
259+
pub(crate) fn gen_define_handling<'ll>(
260260
cx: &SimpleCx<'ll>,
261-
tcx: TyCtxt<'tcx>,
262-
kernel: &'ll llvm::Value,
261+
llfn: &'ll llvm::Value,
263262
offload_entry_ty: &'ll llvm::Type,
264-
metadata: Vec<OffloadMetadata>,
263+
metadata: &Vec<OffloadMetadata>,
265264
symbol: &str,
266265
) -> (&'ll llvm::Value, &'ll llvm::Value) {
267-
let types = cx.func_params_types(cx.get_type_of_global(kernel));
266+
let types = cx.func_params_types(cx.get_type_of_global(llfn));
268267
// It seems like non-pointer values are automatically mapped. So here, we focus on pointer (or
269268
// reference) types.
270269
let ptr_meta = types
@@ -274,7 +273,7 @@ pub(crate) fn gen_define_handling<'ll, 'tcx>(
274273
rustc_codegen_ssa::common::TypeKind::Pointer => Some(meta),
275274
_ => None,
276275
})
277-
.collect::<Vec<OffloadMetadata>>();
276+
.collect::<Vec<_>>();
278277

279278
let ptr_sizes = ptr_meta.iter().map(|m| m.payload_size).collect::<Vec<_>>();
280279
let ptr_transfer = ptr_meta.iter().map(|m| m.mode as u64 | 0x20).collect::<Vec<_>>();
@@ -283,7 +282,7 @@ pub(crate) fn gen_define_handling<'ll, 'tcx>(
283282
// A follow-up pr will track these from the frontend, where we still have Rust types.
284283
// Then, we will be able to figure out that e.g. `&[f32;256]` will result in 4*256 bytes.
285284
// I decided that 1024 bytes is a great placeholder value for now.
286-
add_priv_unnamed_arr(&cx, &format!(".offload_sizes.{symbol}"), &ptr_sizes);
285+
let offload_sizes = add_priv_unnamed_arr(&cx, &format!(".offload_sizes.{symbol}"), &ptr_sizes);
287286
// Here we figure out whether something needs to be copied to the gpu (=1), from the gpu (=2),
288287
// or both to and from the gpu (=3). Other values shouldn't affect us for now.
289288
// A non-mutable reference or pointer will be 1, an array that's not read, but fully overwritten
@@ -323,6 +322,8 @@ pub(crate) fn gen_define_handling<'ll, 'tcx>(
323322
llvm::set_alignment(llglobal, Align::EIGHT);
324323
let c_section_name = CString::new("llvm_offload_entries").unwrap();
325324
llvm::set_section(llglobal, &c_section_name);
325+
326+
add_to_llvm_used(cx, &[offload_sizes, memtransfer_types, region_id, llglobal]);
326327
(memtransfer_types, region_id)
327328
}
328329

@@ -364,11 +365,10 @@ fn declare_offload_fn<'ll>(
364365
pub(crate) fn gen_call_handling<'ll>(
365366
cx: &SimpleCx<'ll>,
366367
bb: &BasicBlock,
367-
kernel: &'ll llvm::Value,
368368
memtransfer_types: &[&'ll llvm::Value],
369369
region_ids: &[&'ll llvm::Value],
370370
llfn: &'ll Value,
371-
metadata: Vec<OffloadMetadata>,
371+
metadata: &Vec<OffloadMetadata>,
372372
) {
373373
let (tgt_decl, tgt_target_kernel_ty) = generate_launcher(&cx);
374374
// %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
@@ -383,7 +383,7 @@ pub(crate) fn gen_call_handling<'ll>(
383383

384384
let mut builder = SBuilder::build(cx, bb);
385385

386-
let types = cx.func_params_types(cx.get_type_of_global(kernel));
386+
let types = cx.func_params_types(cx.get_type_of_global(llfn));
387387
let num_args = types.len() as u64;
388388

389389
// Step 0)
@@ -439,7 +439,7 @@ pub(crate) fn gen_call_handling<'ll>(
439439
// As mentioned above, we don't use Rust type information yet. So for now we will just
440440
// assume that we have 1024 bytes, 256 f32 values.
441441
// FIXME(offload): write an offload frontend and handle arbitrary types.
442-
builder.store(cx.get_const_i64(metadata[i].payload_size), gep3, Align::EIGHT);
442+
builder.store(cx.get_const_i64(metadata[i as usize].payload_size), gep3, Align::EIGHT);
443443
}
444444

445445
// For now we have a very simplistic indexing scheme into our
@@ -514,3 +514,41 @@ pub(crate) fn gen_call_handling<'ll>(
514514

515515
drop(builder);
516516
}
517+
518+
// TODO(Sa4dUs): check if there's a better way of doing this, also move to a proper location
519+
fn add_to_llvm_used<'ll>(cx: &'ll SimpleCx<'_>, globals: &[&'ll Value]) {
520+
let ptr_ty = cx.type_ptr();
521+
let arr_ty = cx.type_array(ptr_ty, globals.len() as u64);
522+
let arr_val = cx.const_array(ptr_ty, globals);
523+
524+
let name = CString::new("llvm.used").unwrap();
525+
526+
let used_global_opt = unsafe { llvm::LLVMGetNamedGlobal(cx.llmod, name.as_ptr()) };
527+
528+
if used_global_opt.is_none() {
529+
let new_global = unsafe { llvm::LLVMAddGlobal(cx.llmod, arr_ty, name.as_ptr()) };
530+
unsafe { llvm::LLVMSetLinkage(new_global, llvm::Linkage::AppendingLinkage) };
531+
unsafe {
532+
llvm::LLVMSetSection(new_global, CString::new("llvm.metadata").unwrap().as_ptr())
533+
};
534+
unsafe { llvm::LLVMSetInitializer(new_global, arr_val) };
535+
llvm::LLVMSetGlobalConstant(new_global, llvm::TRUE);
536+
return;
537+
}
538+
539+
let used_global = used_global_opt.expect("expected @llvm.used");
540+
let mut combined: Vec<&'ll Value> = Vec::new();
541+
542+
if let Some(existing_init) = llvm::LLVMGetInitializer(used_global) {
543+
let num_elems = unsafe { llvm::LLVMGetNumOperands(existing_init) };
544+
for i in 0..num_elems {
545+
if let Some(elem) = unsafe { llvm::LLVMGetOperand(existing_init, i) } {
546+
combined.push(elem);
547+
}
548+
}
549+
}
550+
551+
combined.extend_from_slice(globals);
552+
let new_arr = cx.const_array(ptr_ty, &combined);
553+
unsafe { llvm::LLVMSetInitializer(used_global, new_arr) };
554+
}

compiler/rustc_codegen_llvm/src/intrinsic.rs

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1262,9 +1262,6 @@ fn codegen_offload<'ll, 'tcx>(
12621262
};
12631263

12641264
let target_symbol = symbol_name_for_instance_in_crate(tcx, fn_target.clone(), LOCAL_CRATE);
1265-
let Some(kernel) = cx.get_function(&target_symbol) else {
1266-
bug!("could not find target function")
1267-
};
12681265

12691266
let offload_entry_ty = TgtOffloadEntry::new_decl(&cx);
12701267

@@ -1273,29 +1270,26 @@ fn codegen_offload<'ll, 'tcx>(
12731270
let inputs = sig.inputs();
12741271

12751272
let metadata = inputs.iter().map(|ty| OffloadMetadata::from_ty(tcx, *ty)).collect::<Vec<_>>();
1273+
let llfn = bx.llfn();
12761274

12771275
// TODO(Sa4dUs): separate globals from call-independent headers and use typetrees to reserve the correct amount of memory
12781276
let (memtransfer_type, region_id) = crate::builder::gpu_offload::gen_define_handling(
12791277
cx,
1280-
tcx,
1281-
kernel,
1278+
llfn,
12821279
offload_entry_ty,
1283-
metadata,
1280+
&metadata,
12841281
&target_symbol,
12851282
);
12861283

1287-
let llfn = bx.llfn();
1288-
12891284
// TODO(Sa4dUs): this is just to a void lifetime's issues
12901285
let bb = unsafe { llvm::LLVMGetInsertBlock(bx.llbuilder) };
12911286
crate::builder::gpu_offload::gen_call_handling(
12921287
cx,
12931288
bb,
1294-
kernel,
12951289
&[memtransfer_type],
12961290
&[region_id],
12971291
llfn,
1298-
metadata,
1292+
&metadata,
12991293
);
13001294
}
13011295

compiler/rustc_codegen_llvm/src/llvm/ffi.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,6 +1149,7 @@ unsafe extern "C" {
11491149
pub(crate) fn LLVMGetOperand(Val: &Value, Index: c_uint) -> Option<&Value>;
11501150
pub(crate) fn LLVMGetNextInstruction(Val: &Value) -> Option<&Value>;
11511151
pub(crate) fn LLVMInstructionEraseFromParent(Val: &Value);
1152+
pub(crate) fn LLVMGetNumOperands(Val: &Value) -> c_uint;
11521153

11531154
// Operations on call sites
11541155
pub(crate) fn LLVMSetInstructionCallConv(Instr: &Value, CC: c_uint);

tests/codegen-llvm/gpu_offload/gpu_host.rs

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,13 @@
1111
// when inside of a function called main. This, too, is a temporary workaround for not having a
1212
// frontend.
1313

14+
#![feature(core_intrinsics)]
1415
#![no_main]
1516

1617
#[unsafe(no_mangle)]
1718
fn main() {
1819
let mut x = [3.0; 256];
19-
kernel_1(&mut x);
20+
kernel(&mut x);
2021
core::hint::black_box(&x);
2122
}
2223

@@ -25,13 +26,14 @@ fn main() {
2526
// CHECK: %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
2627
// CHECK: %struct.__tgt_kernel_arguments = type { i32, i32, ptr, ptr, ptr, ptr, ptr, ptr, i64, i64, [3 x i32], [3 x i32], i32 }
2728

28-
// CHECK: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 1024]
29-
// CHECK: @.offload_maptypes.1 = private unnamed_addr constant [1 x i64] [i64 35]
30-
// CHECK: @.kernel_1.region_id = weak unnamed_addr constant i8 0
31-
// CHECK: @.offloading.entry_name.1 = internal unnamed_addr constant [9 x i8] c"kernel_1\00", section ".llvm.rodata.offloading", align 1
32-
// CHECK: @.offloading.entry.kernel_1 = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @.kernel_1.region_id, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8
33-
// CHECK: @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
34-
// CHECK: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8
29+
// CHECK: @.offload_sizes._kernel = private unnamed_addr constant [1 x i64] [i64 1024]
30+
// CHECK: @.offload_maptypes._kernel = private unnamed_addr constant [1 x i64] [i64 35]
31+
// CHECK: @._kernel.region_id = weak unnamed_addr constant i8 0
32+
// CHECK: @.offloading.entry_name._kernel = internal unnamed_addr constant [8 x i8] c"_kernel\00", section ".llvm.rodata.offloading", align 1
33+
// CHECK: @.offloading.entry._kernel = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @._kernel.region_id, ptr @.offloading.entry_name._kernel, i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8
34+
35+
// CHECK: @anon.{{.*}}.0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
36+
// CHECK: @anon.{{.*}}.1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @anon.{{.*}}.0 }, align 8
3537

3638
// CHECK: Function Attrs:
3739
// CHECK-NEXT: define{{( dso_local)?}} void @main()
@@ -99,7 +101,13 @@ fn main() {
99101

100102
#[unsafe(no_mangle)]
101103
#[inline(never)]
102-
pub fn kernel_1(x: &mut [f32; 256]) {
104+
pub fn kernel(x: &mut [f32; 256]) {
105+
core::intrinsics::offload(_kernel)
106+
}
107+
108+
#[unsafe(no_mangle)]
109+
#[inline(never)]
110+
pub fn _kernel(x: &mut [f32; 256]) {
103111
for i in 0..256 {
104112
x[i] = 21.0;
105113
}

tests/codegen-llvm/gpu_offload/offload_intrinsic.rs

Lines changed: 0 additions & 37 deletions
This file was deleted.

0 commit comments

Comments
 (0)