Skip to content

Commit 96f36d3

Browse files
committed
addressing feedback
1 parent 14da7a0 commit 96f36d3

File tree

8 files changed

+29
-6
lines changed

8 files changed

+29
-6
lines changed

compiler/rustc_codegen_llvm/src/back/lto.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -616,7 +616,8 @@ pub(crate) fn run_pass_manager(
616616
write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage);
617617
}
618618

619-
if enable_gpu && !thin && !(cgcx.target_arch == "nvptx64" || cgcx.target_arch == "amdgpu") {
619+
// Here we only handle the GPU host (=cpu) code.
620+
if enable_gpu && !thin && !cgcx.target_is_like_gpu {
620621
let cx =
621622
SimpleCx::new(module.module_llvm.llmod(), &module.module_llvm.llcx, cgcx.pointer_size);
622623
crate::builder::gpu_offload::handle_gpu_code(cgcx, &cx);

compiler/rustc_codegen_llvm/src/back/write.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -658,16 +658,15 @@ pub(crate) unsafe fn llvm_optimize(
658658
let first_arg_name = str::from_utf8(&c_name).unwrap();
659659
// We might call llvm_optimize (and thus this code) multiple times on the same IR,
660660
// but we shouldn't add this helper ptr multiple times.
661+
// FIXME(offload): This could break if the user calls his first argument `dyn_ptr`.
661662
if first_arg_name == "dyn_ptr" {
662663
return;
663664
}
664665

665666
// Create the new parameter list, with ptr as the first argument
666667
let mut new_param_types = Vec::with_capacity(old_param_count as usize + 1);
667668
new_param_types.push(cx.type_ptr());
668-
for old_param in old_param_types {
669-
new_param_types.push(old_param);
670-
}
669+
new_param_types.extend(old_param_types);
671670

672671
// Create the new function type
673672
let ret_ty = unsafe { llvm::LLVMGetReturnType(old_fn_ty) };
@@ -701,10 +700,11 @@ pub(crate) unsafe fn llvm_optimize(
701700
llvm::set_value_name(new_fn, &name);
702701
}
703702

704-
let consider_offload = config.offload.contains(&config::Offload::Enable);
705-
if consider_offload && (cgcx.target_arch == "amdgpu" || cgcx.target_arch == "nvptx64") {
703+
if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Enable) {
706704
let cx =
707705
SimpleCx::new(module.module_llvm.llmod(), module.module_llvm.llcx, cgcx.pointer_size);
706+
// For now we only support up to 10 kernels named kernel_0 ... kernel_9, a follow-up PR is
707+
// introducing a proper offload intrinsic to solve this limitation.
708708
for num in 0..9 {
709709
let name = format!("kernel_{num}");
710710
if let Some(kernel) = cx.get_function(&name) {

compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ pub(crate) fn handle_gpu_code<'ll>(
1919
let mut memtransfer_types = vec![];
2020
let mut region_ids = vec![];
2121
let offload_entry_ty = TgtOffloadEntry::new_decl(&cx);
22+
// This is a temporary hack, we only search for kernel_0 to kernel_9 functions.
23+
// There is a draft PR in progress which will introduce a proper offload intrinsic to remove
24+
// this limitation.
2225
for num in 0..9 {
2326
let kernel = cx.get_function(&format!("kernel_{num}"));
2427
if let Some(kernel) = kernel {

compiler/rustc_codegen_ssa/src/back/write.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ pub struct CodegenContext<B: WriteBackendMethods> {
342342
pub target_arch: String,
343343
pub target_is_like_darwin: bool,
344344
pub target_is_like_aix: bool,
345+
pub target_is_like_gpu: bool,
345346
pub split_debuginfo: rustc_target::spec::SplitDebuginfo,
346347
pub split_dwarf_kind: rustc_session::config::SplitDwarfKind,
347348
pub pointer_size: Size,
@@ -1309,6 +1310,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
13091310
target_arch: tcx.sess.target.arch.to_string(),
13101311
target_is_like_darwin: tcx.sess.target.is_like_darwin,
13111312
target_is_like_aix: tcx.sess.target.is_like_aix,
1313+
target_is_like_gpu: tcx.sess.target.is_like_gpu,
13121314
split_debuginfo: tcx.sess.split_debuginfo(),
13131315
split_dwarf_kind: tcx.sess.opts.unstable_opts.split_dwarf_kind,
13141316
parallel: backend.supports_parallel() && !sess.opts.unstable_opts.no_parallel_backend,

compiler/rustc_target/src/spec/json.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ impl Target {
147147
forward!(is_like_darwin);
148148
forward!(is_like_solaris);
149149
forward!(is_like_windows);
150+
forward!(is_like_gpu);
150151
forward!(is_like_msvc);
151152
forward!(is_like_wasm);
152153
forward!(is_like_android);
@@ -337,6 +338,7 @@ impl ToJson for Target {
337338
target_option_val!(is_like_darwin);
338339
target_option_val!(is_like_solaris);
339340
target_option_val!(is_like_windows);
341+
target_option_val!(is_like_gpu);
340342
target_option_val!(is_like_msvc);
341343
target_option_val!(is_like_wasm);
342344
target_option_val!(is_like_android);
@@ -556,6 +558,7 @@ struct TargetSpecJson {
556558
is_like_darwin: Option<bool>,
557559
is_like_solaris: Option<bool>,
558560
is_like_windows: Option<bool>,
561+
is_like_gpu: Option<bool>,
559562
is_like_msvc: Option<bool>,
560563
is_like_wasm: Option<bool>,
561564
is_like_android: Option<bool>,

compiler/rustc_target/src/spec/mod.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2180,6 +2180,8 @@ pub struct TargetOptions {
21802180
/// Also indicates whether to use Apple-specific ABI changes, such as extending function
21812181
/// parameters to 32-bits.
21822182
pub is_like_darwin: bool,
2183+
/// Whether the target is a GPU (e.g. NVIDIA, AMD, Intel).
2184+
pub is_like_gpu: bool,
21832185
/// Whether the target toolchain is like Solaris's.
21842186
/// Only useful for compiling against Illumos/Solaris,
21852187
/// as they have a different set of linker flags. Defaults to false.
@@ -2583,6 +2585,7 @@ impl Default for TargetOptions {
25832585
abi_return_struct_as_int: false,
25842586
is_like_aix: false,
25852587
is_like_darwin: false,
2588+
is_like_gpu: false,
25862589
is_like_solaris: false,
25872590
is_like_windows: false,
25882591
is_like_msvc: false,
@@ -2748,6 +2751,11 @@ impl Target {
27482751
self.os == "solaris" || self.os == "illumos",
27492752
"`is_like_solaris` must be set if and only if `os` is `solaris` or `illumos`"
27502753
);
2754+
check_eq!(
2755+
self.is_like_gpu,
2756+
self.os == "nvptx64" || self.os == "amdgcn",
2757+
"`is_like_gpu` must be set if and only if `target` is `nvptx64` or `amdgcn`"
2758+
);
27512759
check_eq!(
27522760
self.is_like_windows,
27532761
self.os == "windows" || self.os == "uefi" || self.os == "cygwin",

compiler/rustc_target/src/spec/targets/amdgcn_amd_amdhsa.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ pub(crate) fn target() -> Target {
3434
no_builtins: true,
3535
simd_types_indirect: false,
3636

37+
// Clearly a GPU
38+
is_like_gpu: true,
39+
3740
// Allow `cdylib` crate type.
3841
dynamic_linking: true,
3942
only_cdylib: true,

compiler/rustc_target/src/spec/targets/nvptx64_nvidia_cuda.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ pub(crate) fn target() -> Target {
4242
// Let the `ptx-linker` to handle LLVM lowering into MC / assembly.
4343
obj_is_bitcode: true,
4444

45+
// Clearly a GPU
46+
is_like_gpu: true,
47+
4548
// Convenient and predicable naming scheme.
4649
dll_prefix: "".into(),
4750
dll_suffix: ".ptx".into(),

0 commit comments

Comments
 (0)