Skip to content

Commit 34eae40

Browse files
committed
addressing feedback
1 parent 37ceb03 commit 34eae40

File tree

8 files changed

+29
-6
lines changed

8 files changed

+29
-6
lines changed

compiler/rustc_codegen_llvm/src/back/lto.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -613,7 +613,8 @@ pub(crate) fn run_pass_manager(
613613
write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage);
614614
}
615615

616-
if enable_gpu && !thin && !(cgcx.target_arch == "nvptx64" || cgcx.target_arch == "amdgpu") {
616+
// Here we only handle the GPU host (=cpu) code.
617+
if enable_gpu && !thin && !cgcx.target_is_like_gpu {
617618
let cx =
618619
SimpleCx::new(module.module_llvm.llmod(), &module.module_llvm.llcx, cgcx.pointer_size);
619620
crate::builder::gpu_offload::handle_gpu_code(cgcx, &cx);

compiler/rustc_codegen_llvm/src/back/write.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -671,16 +671,15 @@ pub(crate) unsafe fn llvm_optimize(
671671
let first_arg_name = str::from_utf8(&c_name).unwrap();
672672
// We might call llvm_optimize (and thus this code) multiple times on the same IR,
673673
// but we shouldn't add this helper ptr multiple times.
674+
// FIXME(offload): This could break if the user calls his first argument `dyn_ptr`.
674675
if first_arg_name == "dyn_ptr" {
675676
return;
676677
}
677678

678679
// Create the new parameter list, with ptr as the first argument
679680
let mut new_param_types = Vec::with_capacity(old_param_count as usize + 1);
680681
new_param_types.push(cx.type_ptr());
681-
for old_param in old_param_types {
682-
new_param_types.push(old_param);
683-
}
682+
new_param_types.extend(old_param_types);
684683

685684
// Create the new function type
686685
let ret_ty = unsafe { llvm::LLVMGetReturnType(old_fn_ty) };
@@ -714,10 +713,11 @@ pub(crate) unsafe fn llvm_optimize(
714713
llvm::set_value_name(new_fn, &name);
715714
}
716715

717-
let consider_offload = config.offload.contains(&config::Offload::Enable);
718-
if consider_offload && (cgcx.target_arch == "amdgpu" || cgcx.target_arch == "nvptx64") {
716+
if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Enable) {
719717
let cx =
720718
SimpleCx::new(module.module_llvm.llmod(), module.module_llvm.llcx, cgcx.pointer_size);
719+
// For now we only support up to 10 kernels named kernel_0 ... kernel_9, a follow-up PR is
720+
// introducing a proper offload intrinsic to solve this limitation.
721721
for num in 0..9 {
722722
let name = format!("kernel_{num}");
723723
if let Some(kernel) = cx.get_function(&name) {

compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ pub(crate) fn handle_gpu_code<'ll>(
1919
let mut memtransfer_types = vec![];
2020
let mut region_ids = vec![];
2121
let offload_entry_ty = TgtOffloadEntry::new_decl(&cx);
22+
// This is a temporary hack, we only search for kernel_0 to kernel_9 functions.
23+
// There is a draft PR in progress which will introduce a proper offload intrinsic to remove
24+
// this limitation.
2225
for num in 0..9 {
2326
let kernel = cx.get_function(&format!("kernel_{num}"));
2427
if let Some(kernel) = kernel {

compiler/rustc_codegen_ssa/src/back/write.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ pub struct CodegenContext<B: WriteBackendMethods> {
342342
pub target_arch: String,
343343
pub target_is_like_darwin: bool,
344344
pub target_is_like_aix: bool,
345+
pub target_is_like_gpu: bool,
345346
pub split_debuginfo: rustc_target::spec::SplitDebuginfo,
346347
pub split_dwarf_kind: rustc_session::config::SplitDwarfKind,
347348
pub pointer_size: Size,
@@ -1165,6 +1166,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
11651166
target_arch: tcx.sess.target.arch.to_string(),
11661167
target_is_like_darwin: tcx.sess.target.is_like_darwin,
11671168
target_is_like_aix: tcx.sess.target.is_like_aix,
1169+
target_is_like_gpu: tcx.sess.target.is_like_gpu,
11681170
split_debuginfo: tcx.sess.split_debuginfo(),
11691171
split_dwarf_kind: tcx.sess.opts.unstable_opts.split_dwarf_kind,
11701172
parallel: backend.supports_parallel() && !sess.opts.unstable_opts.no_parallel_backend,

compiler/rustc_target/src/spec/json.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ impl Target {
147147
forward!(is_like_darwin);
148148
forward!(is_like_solaris);
149149
forward!(is_like_windows);
150+
forward!(is_like_gpu);
150151
forward!(is_like_msvc);
151152
forward!(is_like_wasm);
152153
forward!(is_like_android);
@@ -337,6 +338,7 @@ impl ToJson for Target {
337338
target_option_val!(is_like_darwin);
338339
target_option_val!(is_like_solaris);
339340
target_option_val!(is_like_windows);
341+
target_option_val!(is_like_gpu);
340342
target_option_val!(is_like_msvc);
341343
target_option_val!(is_like_wasm);
342344
target_option_val!(is_like_android);
@@ -556,6 +558,7 @@ struct TargetSpecJson {
556558
is_like_darwin: Option<bool>,
557559
is_like_solaris: Option<bool>,
558560
is_like_windows: Option<bool>,
561+
is_like_gpu: Option<bool>,
559562
is_like_msvc: Option<bool>,
560563
is_like_wasm: Option<bool>,
561564
is_like_android: Option<bool>,

compiler/rustc_target/src/spec/mod.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2090,6 +2090,8 @@ pub struct TargetOptions {
20902090
/// Also indicates whether to use Apple-specific ABI changes, such as extending function
20912091
/// parameters to 32-bits.
20922092
pub is_like_darwin: bool,
2093+
/// Whether the target is a GPU (e.g. NVIDIA, AMD, Intel).
2094+
pub is_like_gpu: bool,
20932095
/// Whether the target toolchain is like Solaris's.
20942096
/// Only useful for compiling against Illumos/Solaris,
20952097
/// as they have a different set of linker flags. Defaults to false.
@@ -2493,6 +2495,7 @@ impl Default for TargetOptions {
24932495
abi_return_struct_as_int: false,
24942496
is_like_aix: false,
24952497
is_like_darwin: false,
2498+
is_like_gpu: false,
24962499
is_like_solaris: false,
24972500
is_like_windows: false,
24982501
is_like_msvc: false,
@@ -2658,6 +2661,11 @@ impl Target {
26582661
self.os == "solaris" || self.os == "illumos",
26592662
"`is_like_solaris` must be set if and only if `os` is `solaris` or `illumos`"
26602663
);
2664+
check_eq!(
2665+
self.is_like_gpu,
2666+
self.os == "nvptx64" || self.os == "amdgcn",
2667+
"`is_like_gpu` must be set if and only if `target` is `nvptx64` or `amdgcn`"
2668+
);
26612669
check_eq!(
26622670
self.is_like_windows,
26632671
self.os == "windows" || self.os == "uefi" || self.os == "cygwin",

compiler/rustc_target/src/spec/targets/amdgcn_amd_amdhsa.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ pub(crate) fn target() -> Target {
3232
no_builtins: true,
3333
simd_types_indirect: false,
3434

35+
// Clearly a GPU
36+
is_like_gpu: true,
37+
3538
// Allow `cdylib` crate type.
3639
dynamic_linking: true,
3740
only_cdylib: true,

compiler/rustc_target/src/spec/targets/nvptx64_nvidia_cuda.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ pub(crate) fn target() -> Target {
4242
// Let the `ptx-linker` to handle LLVM lowering into MC / assembly.
4343
obj_is_bitcode: true,
4444

45+
// Clearly a GPU
46+
is_like_gpu: true,
47+
4548
// Convenient and predicable naming scheme.
4649
dll_prefix: "".into(),
4750
dll_suffix: ".ptx".into(),

0 commit comments

Comments
 (0)