From a389e141cb1b73b87ad10b4cd5f388a2d07f22f7 Mon Sep 17 00:00:00 2001 From: Miikka Salminen Date: Wed, 18 Jun 2025 20:41:31 +0300 Subject: [PATCH] Optionally, keep radix for integer literals in generated bindings The radices of integer literals in input C and C++ code are retained in the generated bindings, provided that the newly added builder option is set to true, or the corresponding CLI flag is used. If an input header contains constants that have binary, octal, or hexadecimal representation, the literals in the Rust bindings are output with the same number base. E.g., 0x10 in C/C++ header would be output as 0x10 in Rust code. The functionality is accessed with the builder option `keep_integer_radices`, which defaults to `false` for now. `--keep-integer-radices` is the corresponding CLI flag. The existing tests with header files and corresponding expectations that were affected by this change have been adjusted to accommodate the change: The CLI flag for keeping the integer radices was added to the bindgen command for those tests, and the values in the expectations were updated to have the original radix; the new values were confirmed by hand to equal the original ones. --- .../expectations/tests/class_static_const.rs | 4 +- .../expectations/tests/constant-evaluate.rs | 2 +- .../default-macro-constant-type-signed.rs | 8 +- .../default-macro-constant-type-unsigned.rs | 8 +- .../tests/default-macro-constant-type.rs | 8 +- .../tests/different_radix_literals.rs | 45 +++ .../tests/fit-macro-constant-types-signed.rs | 8 +- .../tests/fit-macro-constant-types.rs | 8 +- .../expectations/tests/jsval_layout_opaque.rs | 30 +- .../expectations/tests/layout_eth_conf.rs | 6 +- .../expectations/tests/overflowed_enum.rs | 6 +- .../tests/prepend-enum-constified-variant.rs | 2 +- .../tests/expectations/tests/short-enums.rs | 6 +- .../expectations/tests/wrap-static-fns.rs | 2 +- .../tests/headers/class_static_const.hpp | 2 +- .../tests/headers/constant-evaluate.h | 2 +- .../default-macro-constant-type-signed.h | 2 +- .../default-macro-constant-type-unsigned.h | 2 +- .../headers/default-macro-constant-type.h | 1 + .../headers/different_radix_literals.hpp | 77 +++++ .../headers/fit-macro-constant-types-signed.h | 2 +- .../tests/headers/fit-macro-constant-types.h | 2 +- .../tests/headers/jsval_layout_opaque.hpp | 2 +- bindgen-tests/tests/headers/layout_eth_conf.h | 2 +- .../tests/headers/overflowed_enum.hpp | 2 +- .../headers/prepend-enum-constified-variant.h | 2 +- bindgen-tests/tests/headers/short-enums.hpp | 2 +- bindgen-tests/tests/headers/wrap-static-fns.h | 2 +- bindgen/clang.rs | 43 +++ bindgen/codegen/helpers.rs | 225 +++++++++++++- bindgen/codegen/mod.rs | 17 +- bindgen/ir/enum_ty.rs | 17 ++ bindgen/ir/var.rs | 276 ++++++++++++++++-- bindgen/options/cli.rs | 5 + bindgen/options/mod.rs | 16 + 35 files changed, 745 insertions(+), 99 deletions(-) create mode 100644 bindgen-tests/tests/expectations/tests/different_radix_literals.rs create mode 100644 bindgen-tests/tests/headers/different_radix_literals.hpp diff --git a/bindgen-tests/tests/expectations/tests/class_static_const.rs b/bindgen-tests/tests/expectations/tests/class_static_const.rs index d628239c4c..c09d181d0f 100644 --- a/bindgen-tests/tests/expectations/tests/class_static_const.rs +++ b/bindgen-tests/tests/expectations/tests/class_static_const.rs @@ -5,8 +5,8 @@ pub struct A { pub _address: u8, } pub const A_a: ::std::os::raw::c_int = 0; -pub const A_b: i32 = 63; -pub const A_c: u32 = 255; +pub const A_b: i32 = 0o77; +pub const A_c: u32 = 0xff; #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of A"][::std::mem::size_of::() - 1usize]; diff --git a/bindgen-tests/tests/expectations/tests/constant-evaluate.rs b/bindgen-tests/tests/expectations/tests/constant-evaluate.rs index bbcf6d5450..8506729427 100644 --- a/bindgen-tests/tests/expectations/tests/constant-evaluate.rs +++ b/bindgen-tests/tests/expectations/tests/constant-evaluate.rs @@ -8,7 +8,7 @@ pub enum _bindgen_ty_1 { bar = 8, } pub type EasyToOverflow = ::std::os::raw::c_ulonglong; -pub const k: EasyToOverflow = 2147483648; +pub const k: EasyToOverflow = 0x80000000; pub const k_expr: EasyToOverflow = 1152921504606846976; pub const wow: EasyToOverflow = 2147483648; pub const BAZ: ::std::os::raw::c_longlong = 24; diff --git a/bindgen-tests/tests/expectations/tests/default-macro-constant-type-signed.rs b/bindgen-tests/tests/expectations/tests/default-macro-constant-type-signed.rs index 7fca57b6b9..ec0032fc2c 100644 --- a/bindgen-tests/tests/expectations/tests/default-macro-constant-type-signed.rs +++ b/bindgen-tests/tests/expectations/tests/default-macro-constant-type-signed.rs @@ -4,8 +4,8 @@ pub const N1: i32 = 1; pub const N2: i32 = 2; pub const N_1: i32 = -1; pub const N_2: i32 = -2; -pub const MAX_U16: i32 = 65535; -pub const MAX_I16: i32 = 32767; +pub const MAX_U16: i32 = 0xffff; +pub const MAX_I16: i32 = 0x7fff; pub const MAX_I16_Plus1: i32 = 32768; pub const MAX_U16_Plus1: i32 = 65536; pub const MAX_I16_Minus1: i32 = 32766; @@ -16,8 +16,8 @@ pub const MIN_U16_Plus1: i32 = 1; pub const MIN_I16_Plus1: i32 = -32767; pub const MIN_U16_Minus1: i32 = -1; pub const MIN_I16_Minus1: i32 = -32769; -pub const MAX_U32: i64 = 4294967295; -pub const MAX_I32: i32 = 2147483647; +pub const MAX_U32: i64 = 0xffffffff; +pub const MAX_I32: i32 = 0x7fffffff; pub const MAX_I32_Plus1: i64 = 2147483648; pub const MAX_U32_Plus1: i64 = 4294967296; pub const MAX_I32_Minus1: i32 = 2147483646; diff --git a/bindgen-tests/tests/expectations/tests/default-macro-constant-type-unsigned.rs b/bindgen-tests/tests/expectations/tests/default-macro-constant-type-unsigned.rs index d34d050a1a..f492b6bfc4 100644 --- a/bindgen-tests/tests/expectations/tests/default-macro-constant-type-unsigned.rs +++ b/bindgen-tests/tests/expectations/tests/default-macro-constant-type-unsigned.rs @@ -4,8 +4,8 @@ pub const N1: u32 = 1; pub const N2: u32 = 2; pub const N_1: i32 = -1; pub const N_2: i32 = -2; -pub const MAX_U16: u32 = 65535; -pub const MAX_I16: u32 = 32767; +pub const MAX_U16: u32 = 0xffff; +pub const MAX_I16: u32 = 0x7fff; pub const MAX_I16_Plus1: u32 = 32768; pub const MAX_U16_Plus1: u32 = 65536; pub const MAX_I16_Minus1: u32 = 32766; @@ -16,8 +16,8 @@ pub const MIN_U16_Plus1: u32 = 1; pub const MIN_I16_Plus1: i32 = -32767; pub const MIN_U16_Minus1: i32 = -1; pub const MIN_I16_Minus1: i32 = -32769; -pub const MAX_U32: u32 = 4294967295; -pub const MAX_I32: u32 = 2147483647; +pub const MAX_U32: u32 = 0xffffffff; +pub const MAX_I32: u32 = 0x7fffffff; pub const MAX_I32_Plus1: u32 = 2147483648; pub const MAX_U32_Plus1: u64 = 4294967296; pub const MAX_I32_Minus1: u32 = 2147483646; diff --git a/bindgen-tests/tests/expectations/tests/default-macro-constant-type.rs b/bindgen-tests/tests/expectations/tests/default-macro-constant-type.rs index d34d050a1a..f492b6bfc4 100644 --- a/bindgen-tests/tests/expectations/tests/default-macro-constant-type.rs +++ b/bindgen-tests/tests/expectations/tests/default-macro-constant-type.rs @@ -4,8 +4,8 @@ pub const N1: u32 = 1; pub const N2: u32 = 2; pub const N_1: i32 = -1; pub const N_2: i32 = -2; -pub const MAX_U16: u32 = 65535; -pub const MAX_I16: u32 = 32767; +pub const MAX_U16: u32 = 0xffff; +pub const MAX_I16: u32 = 0x7fff; pub const MAX_I16_Plus1: u32 = 32768; pub const MAX_U16_Plus1: u32 = 65536; pub const MAX_I16_Minus1: u32 = 32766; @@ -16,8 +16,8 @@ pub const MIN_U16_Plus1: u32 = 1; pub const MIN_I16_Plus1: i32 = -32767; pub const MIN_U16_Minus1: i32 = -1; pub const MIN_I16_Minus1: i32 = -32769; -pub const MAX_U32: u32 = 4294967295; -pub const MAX_I32: u32 = 2147483647; +pub const MAX_U32: u32 = 0xffffffff; +pub const MAX_I32: u32 = 0x7fffffff; pub const MAX_I32_Plus1: u32 = 2147483648; pub const MAX_U32_Plus1: u64 = 4294967296; pub const MAX_I32_Minus1: u32 = 2147483646; diff --git a/bindgen-tests/tests/expectations/tests/different_radix_literals.rs b/bindgen-tests/tests/expectations/tests/different_radix_literals.rs new file mode 100644 index 0000000000..c1a5ca170b --- /dev/null +++ b/bindgen-tests/tests/expectations/tests/different_radix_literals.rs @@ -0,0 +1,45 @@ +#![allow(dead_code, non_snake_case, non_camel_case_types, non_upper_case_globals)] +pub const DEFINE_BIN_LITERAL: u32 = 0b10; +pub const DEFINE_NEG_BIN_LITERAL: i32 = -0b10; +pub const DEFINE_OCT_LITERAL: u32 = 0o10; +pub const DEFINE_NEG_OCT_LITERAL: i32 = -0o10; +pub const DEFINE_HEX_LITERAL: u32 = 0x10; +pub const DEFINE_NEG_HEX_LITERAL: i32 = -0x10; +pub const DEFINE_DEC_LITERAL: u32 = 10; +pub const DEFINE_NEG_DEC_LITERAL: i32 = -10; +pub const CONST_INT_BIN_LITERAL: ::std::os::raw::c_int = 0b10; +pub const CONST_INT_NEG_BIN_LITERAL: ::std::os::raw::c_int = -0b10; +pub const CONST_INT_OCT_LITERAL: ::std::os::raw::c_int = 0o10; +pub const CONST_INT_NEG_OCT_LITERAL: ::std::os::raw::c_int = -0o10; +pub const CONST_INT_HEX_LITERAL: ::std::os::raw::c_int = 0x10; +pub const CONST_INT_NEG_HEX_LITERAL: ::std::os::raw::c_int = -0x10; +pub const CONST_INT_DEC_LITERAL: ::std::os::raw::c_int = 10; +pub const CONST_INT_NEG_DEC_LITERAL: ::std::os::raw::c_int = -10; +pub const MultiRadixLiteral_ENUM_BIN_LITERAL: MultiRadixLiteral = 0b10; +pub const MultiRadixLiteral_ENUM_NEG_BIN_LITERAL: MultiRadixLiteral = -0b10; +pub const MultiRadixLiteral_ENUM_OCT_LITERAL: MultiRadixLiteral = 0o10; +pub const MultiRadixLiteral_ENUM_NEG_OCT_LITERAL: MultiRadixLiteral = -0o10; +pub const MultiRadixLiteral_ENUM_HEX_LITERAL: MultiRadixLiteral = 0x10; +pub const MultiRadixLiteral_ENUM_NEG_HEX_LITERAL: MultiRadixLiteral = -0x10; +pub const MultiRadixLiteral_ENUM_DEC_LITERAL: MultiRadixLiteral = 10; +pub const MultiRadixLiteral_ENUM_NEG_DEC_LITERAL: MultiRadixLiteral = -10; +pub type MultiRadixLiteral = ::std::os::raw::c_int; +pub const MIN_I64_BIN: ::std::os::raw::c_longlong = -0b1000000000000000000000000000000000000000000000000000000000000000; +pub const MIN_I64_OCT: ::std::os::raw::c_longlong = -0o1000000000000000000000; +pub const MIN_I64_DEC: ::std::os::raw::c_longlong = -9223372036854775808; +pub const MIN_I64_HEX: ::std::os::raw::c_longlong = -0x8000000000000000; +pub const BIG_B_BIN: ::std::os::raw::c_int = 0b1; +pub const BIG_X_HEX: ::std::os::raw::c_int = 0xf; +pub const AGENT: ::std::os::raw::c_char = 0o7; +pub const SEP_BIN: ::std::os::raw::c_ulonglong = 0b1111111100000000; +pub const SEP_OCT: ::std::os::raw::c_ulonglong = 0o777777777777; +pub const SEP_DEC: ::std::os::raw::c_ulonglong = 299792458; +pub const SEP_HEX: ::std::os::raw::c_ulonglong = 0x1111bbbbccccdddd; +pub const BIN_1ST: ::std::os::raw::c_long = 0b10101010; +pub const OCT_2ND: ::std::os::raw::c_long = 0o777; +pub const DEC_3RD: ::std::os::raw::c_long = 1234; +pub const HEX_4TH: ::std::os::raw::c_long = 0xffff; +pub const USHORT_HEX: ::std::os::raw::c_ushort = 0xffff; +pub const SHORT_HEX: ::std::os::raw::c_short = 0x7fff; +pub const UCHAR_HEX: ::std::os::raw::c_uchar = 0xff; +pub const CHAR_HEX: ::std::os::raw::c_char = 0x7f; diff --git a/bindgen-tests/tests/expectations/tests/fit-macro-constant-types-signed.rs b/bindgen-tests/tests/expectations/tests/fit-macro-constant-types-signed.rs index d4ad5e0fcc..9d3588473b 100644 --- a/bindgen-tests/tests/expectations/tests/fit-macro-constant-types-signed.rs +++ b/bindgen-tests/tests/expectations/tests/fit-macro-constant-types-signed.rs @@ -4,8 +4,8 @@ pub const N1: i8 = 1; pub const N2: i8 = 2; pub const N_1: i8 = -1; pub const N_2: i8 = -2; -pub const MAX_U16: i32 = 65535; -pub const MAX_I16: i16 = 32767; +pub const MAX_U16: i32 = 0xffff; +pub const MAX_I16: i16 = 0x7fff; pub const MAX_I16_Plus1: i32 = 32768; pub const MAX_U16_Plus1: i32 = 65536; pub const MAX_I16_Minus1: i16 = 32766; @@ -16,8 +16,8 @@ pub const MIN_U16_Plus1: i8 = 1; pub const MIN_I16_Plus1: i16 = -32767; pub const MIN_U16_Minus1: i8 = -1; pub const MIN_I16_Minus1: i32 = -32769; -pub const MAX_U32: i64 = 4294967295; -pub const MAX_I32: i32 = 2147483647; +pub const MAX_U32: i64 = 0xffffffff; +pub const MAX_I32: i32 = 0x7fffffff; pub const MAX_I32_Plus1: i64 = 2147483648; pub const MAX_U32_Plus1: i64 = 4294967296; pub const MAX_I32_Minus1: i32 = 2147483646; diff --git a/bindgen-tests/tests/expectations/tests/fit-macro-constant-types.rs b/bindgen-tests/tests/expectations/tests/fit-macro-constant-types.rs index 5542a645da..0c8d33c493 100644 --- a/bindgen-tests/tests/expectations/tests/fit-macro-constant-types.rs +++ b/bindgen-tests/tests/expectations/tests/fit-macro-constant-types.rs @@ -4,8 +4,8 @@ pub const N1: u8 = 1; pub const N2: u8 = 2; pub const N_1: i8 = -1; pub const N_2: i8 = -2; -pub const MAX_U16: u16 = 65535; -pub const MAX_I16: u16 = 32767; +pub const MAX_U16: u16 = 0xffff; +pub const MAX_I16: u16 = 0x7fff; pub const MAX_I16_Plus1: u16 = 32768; pub const MAX_U16_Plus1: u32 = 65536; pub const MAX_I16_Minus1: u16 = 32766; @@ -16,8 +16,8 @@ pub const MIN_U16_Plus1: u8 = 1; pub const MIN_I16_Plus1: i16 = -32767; pub const MIN_U16_Minus1: i8 = -1; pub const MIN_I16_Minus1: i32 = -32769; -pub const MAX_U32: u32 = 4294967295; -pub const MAX_I32: u32 = 2147483647; +pub const MAX_U32: u32 = 0xffffffff; +pub const MAX_I32: u32 = 0x7fffffff; pub const MAX_I32_Plus1: u32 = 2147483648; pub const MAX_U32_Plus1: u64 = 4294967296; pub const MAX_I32_Minus1: u32 = 2147483646; diff --git a/bindgen-tests/tests/expectations/tests/jsval_layout_opaque.rs b/bindgen-tests/tests/expectations/tests/jsval_layout_opaque.rs index dc0ef8ed7f..6be6a4d2f7 100644 --- a/bindgen-tests/tests/expectations/tests/jsval_layout_opaque.rs +++ b/bindgen-tests/tests/expectations/tests/jsval_layout_opaque.rs @@ -146,27 +146,27 @@ where } } pub const JSVAL_TAG_SHIFT: u32 = 47; -pub const JSVAL_PAYLOAD_MASK: u64 = 140737488355327; -pub const JSVAL_TAG_MASK: i64 = -140737488355328; +pub const JSVAL_PAYLOAD_MASK: u64 = 0x7fffffffffff; +pub const JSVAL_TAG_MASK: i64 = -0x800000000000; #[repr(u8)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub enum JSValueType { - JSVAL_TYPE_DOUBLE = 0, - JSVAL_TYPE_INT32 = 1, - JSVAL_TYPE_UNDEFINED = 2, - JSVAL_TYPE_BOOLEAN = 3, - JSVAL_TYPE_MAGIC = 4, - JSVAL_TYPE_STRING = 5, - JSVAL_TYPE_SYMBOL = 6, - JSVAL_TYPE_NULL = 7, - JSVAL_TYPE_OBJECT = 8, - JSVAL_TYPE_UNKNOWN = 32, - JSVAL_TYPE_MISSING = 33, + JSVAL_TYPE_DOUBLE = 0x0, + JSVAL_TYPE_INT32 = 0x1, + JSVAL_TYPE_UNDEFINED = 0x2, + JSVAL_TYPE_BOOLEAN = 0x3, + JSVAL_TYPE_MAGIC = 0x4, + JSVAL_TYPE_STRING = 0x5, + JSVAL_TYPE_SYMBOL = 0x6, + JSVAL_TYPE_NULL = 0x7, + JSVAL_TYPE_OBJECT = 0x8, + JSVAL_TYPE_UNKNOWN = 0x20, + JSVAL_TYPE_MISSING = 0x21, } #[repr(u32)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub enum JSValueTag { - JSVAL_TAG_MAX_DOUBLE = 131056, + JSVAL_TAG_MAX_DOUBLE = 0x1fff0, JSVAL_TAG_INT32 = 131057, JSVAL_TAG_UNDEFINED = 131058, JSVAL_TAG_STRING = 131061, @@ -179,7 +179,7 @@ pub enum JSValueTag { #[repr(u64)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub enum JSValueShiftedTag { - JSVAL_SHIFTED_TAG_MAX_DOUBLE = 18444492278190833663, + JSVAL_SHIFTED_TAG_MAX_DOUBLE = 0xfff80000ffffffff, JSVAL_SHIFTED_TAG_INT32 = 18444633011384221696, JSVAL_SHIFTED_TAG_UNDEFINED = 18444773748872577024, JSVAL_SHIFTED_TAG_STRING = 18445195961337643008, diff --git a/bindgen-tests/tests/expectations/tests/layout_eth_conf.rs b/bindgen-tests/tests/expectations/tests/layout_eth_conf.rs index 2686d8f5fa..b3dee3da4a 100644 --- a/bindgen-tests/tests/expectations/tests/layout_eth_conf.rs +++ b/bindgen-tests/tests/expectations/tests/layout_eth_conf.rs @@ -87,9 +87,9 @@ where } } } -pub const ETH_MQ_RX_RSS_FLAG: u32 = 1; -pub const ETH_MQ_RX_DCB_FLAG: u32 = 2; -pub const ETH_MQ_RX_VMDQ_FLAG: u32 = 4; +pub const ETH_MQ_RX_RSS_FLAG: u32 = 0x1; +pub const ETH_MQ_RX_DCB_FLAG: u32 = 0x2; +pub const ETH_MQ_RX_VMDQ_FLAG: u32 = 0x4; pub const ETH_VMDQ_MAX_VLAN_FILTERS: u32 = 64; pub const ETH_DCB_NUM_USER_PRIORITIES: u32 = 8; pub const ETH_VMDQ_DCB_NUM_QUEUES: u32 = 128; diff --git a/bindgen-tests/tests/expectations/tests/overflowed_enum.rs b/bindgen-tests/tests/expectations/tests/overflowed_enum.rs index 2c67ba6903..24f3057257 100644 --- a/bindgen-tests/tests/expectations/tests/overflowed_enum.rs +++ b/bindgen-tests/tests/expectations/tests/overflowed_enum.rs @@ -2,9 +2,9 @@ #[repr(u32)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub enum Foo { - BAP_ARM = 9698489, - BAP_X86 = 11960045, - BAP_X86_64 = 3128633167, + BAP_ARM = 0x93fcb9, + BAP_X86 = 0xb67eed, + BAP_X86_64 = 0xba7b274f, } #[repr(u16)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] diff --git a/bindgen-tests/tests/expectations/tests/prepend-enum-constified-variant.rs b/bindgen-tests/tests/expectations/tests/prepend-enum-constified-variant.rs index ff49d684f1..4250e1e1f8 100644 --- a/bindgen-tests/tests/expectations/tests/prepend-enum-constified-variant.rs +++ b/bindgen-tests/tests/expectations/tests/prepend-enum-constified-variant.rs @@ -5,5 +5,5 @@ impl AVCodecID { #[repr(u32)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub enum AVCodecID { - AV_CODEC_ID_FIRST_UNKNOWN = 98304, + AV_CODEC_ID_FIRST_UNKNOWN = 0x18000, } diff --git a/bindgen-tests/tests/expectations/tests/short-enums.rs b/bindgen-tests/tests/expectations/tests/short-enums.rs index 493bb5b419..9295f5b715 100644 --- a/bindgen-tests/tests/expectations/tests/short-enums.rs +++ b/bindgen-tests/tests/expectations/tests/short-enums.rs @@ -2,15 +2,15 @@ #[repr(u8)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub enum one_byte_t { - SOME_VALUE = 1, + SOME_VALUE = 0x1, } #[repr(u16)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub enum two_byte_t { - SOME_OTHER_VALUE = 256, + SOME_OTHER_VALUE = 0x100, } #[repr(u32)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub enum four_byte_t { - SOME_BIGGER_VALUE = 16777216, + SOME_BIGGER_VALUE = 0x1000000, } diff --git a/bindgen-tests/tests/expectations/tests/wrap-static-fns.rs b/bindgen-tests/tests/expectations/tests/wrap-static-fns.rs index bafcad8a7e..4e054d21aa 100644 --- a/bindgen-tests/tests/expectations/tests/wrap-static-fns.rs +++ b/bindgen-tests/tests/expectations/tests/wrap-static-fns.rs @@ -40,7 +40,7 @@ unsafe extern "C" { arg: *const *const ::std::os::raw::c_int, ) -> ::std::os::raw::c_int; } -pub const foo_BAR: foo = 0; +pub const foo_BAR: foo = 0x0; pub type foo = ::std::os::raw::c_uint; unsafe extern "C" { #[link_name = "takes_enum__extern"] diff --git a/bindgen-tests/tests/headers/class_static_const.hpp b/bindgen-tests/tests/headers/class_static_const.hpp index 3e320edcbe..d870f7e290 100644 --- a/bindgen-tests/tests/headers/class_static_const.hpp +++ b/bindgen-tests/tests/headers/class_static_const.hpp @@ -1,4 +1,4 @@ -// bindgen-flags: --with-derive-hash --with-derive-partialeq --with-derive-eq +// bindgen-flags: --with-derive-hash --with-derive-partialeq --with-derive-eq --keep-integer-radices using int32_t = int; typedef unsigned int uint32_t; diff --git a/bindgen-tests/tests/headers/constant-evaluate.h b/bindgen-tests/tests/headers/constant-evaluate.h index 812553ed3e..846fe0b072 100644 --- a/bindgen-tests/tests/headers/constant-evaluate.h +++ b/bindgen-tests/tests/headers/constant-evaluate.h @@ -1,5 +1,5 @@ // bindgen-unstable -// bindgen-flags: --rustified-enum ".*" +// bindgen-flags: --rustified-enum ".*" --keep-integer-radices enum { foo = 4, diff --git a/bindgen-tests/tests/headers/default-macro-constant-type-signed.h b/bindgen-tests/tests/headers/default-macro-constant-type-signed.h index da3f134467..769be60e08 100644 --- a/bindgen-tests/tests/headers/default-macro-constant-type-signed.h +++ b/bindgen-tests/tests/headers/default-macro-constant-type-signed.h @@ -1,3 +1,3 @@ -// bindgen-flags: --default-macro-constant-type signed +// bindgen-flags: --default-macro-constant-type signed --keep-integer-radices // All values are i32 if they fit; otherwise i64. #include "default-macro-constant-type.h" diff --git a/bindgen-tests/tests/headers/default-macro-constant-type-unsigned.h b/bindgen-tests/tests/headers/default-macro-constant-type-unsigned.h index 1078e852ee..e22abb2ae3 100644 --- a/bindgen-tests/tests/headers/default-macro-constant-type-unsigned.h +++ b/bindgen-tests/tests/headers/default-macro-constant-type-unsigned.h @@ -1,3 +1,3 @@ -// bindgen-flags: --default-macro-constant-type unsigned +// bindgen-flags: --default-macro-constant-type unsigned --keep-integer-radices // Negative values are i32 or i64; others are u32 or u64. #include "default-macro-constant-type.h" diff --git a/bindgen-tests/tests/headers/default-macro-constant-type.h b/bindgen-tests/tests/headers/default-macro-constant-type.h index a863362c98..36b7042dd6 100644 --- a/bindgen-tests/tests/headers/default-macro-constant-type.h +++ b/bindgen-tests/tests/headers/default-macro-constant-type.h @@ -1,3 +1,4 @@ +// bindgen-flags: --keep-integer-radices // Test default of --default-macro-constant-type // Negative values are i32 or i64; others are u32 or u64. diff --git a/bindgen-tests/tests/headers/different_radix_literals.hpp b/bindgen-tests/tests/headers/different_radix_literals.hpp new file mode 100644 index 0000000000..af3ea5db6a --- /dev/null +++ b/bindgen-tests/tests/headers/different_radix_literals.hpp @@ -0,0 +1,77 @@ +// bindgen-flags: --keep-integer-radices -- -std=c++14 +// (C23 is not available in clang 9.0, but C++14 supports the same literals) + +// Binary integer literals (C23) - 0b10 is 2 in decimal + +#define DEFINE_BIN_LITERAL 0b10 +#define DEFINE_NEG_BIN_LITERAL -0b10 +const int CONST_INT_BIN_LITERAL = 0b10; +const int CONST_INT_NEG_BIN_LITERAL = -0b10; + +// Octal integer literals - 010 is 8 in decimal + +#define DEFINE_OCT_LITERAL 010 +#define DEFINE_NEG_OCT_LITERAL -010 +const int CONST_INT_OCT_LITERAL = 010; +const int CONST_INT_NEG_OCT_LITERAL = -010; + +// Hexadecimal integer literals - 0x10 is 16 in decimal + +#define DEFINE_HEX_LITERAL 0x10 +#define DEFINE_NEG_HEX_LITERAL -0x10 +const int CONST_INT_HEX_LITERAL = 0x10; +const int CONST_INT_NEG_HEX_LITERAL = -0x10; + +// Default decimal integer literals - 10 is 10 in decimal + +#define DEFINE_DEC_LITERAL 10 +#define DEFINE_NEG_DEC_LITERAL -10 +const int CONST_INT_DEC_LITERAL = 10; +const int CONST_INT_NEG_DEC_LITERAL = -10; + +// Enums with binary, octal, and hexadecimal integer literals + +enum MultiRadixLiteral { + ENUM_BIN_LITERAL = 0b10, + ENUM_NEG_BIN_LITERAL = -0b10, + ENUM_OCT_LITERAL = 010, + ENUM_NEG_OCT_LITERAL = -010, + ENUM_HEX_LITERAL = 0x10, + ENUM_NEG_HEX_LITERAL = -0x10, + ENUM_DEC_LITERAL = 10, + ENUM_NEG_DEC_LITERAL = -10, +}; + +// Edge cases: minimum i64s + +const long long MIN_I64_BIN = -0b1000000000000000000000000000000000000000000000000000000000000000; +const long long MIN_I64_OCT = -01000000000000000000000; +const long long MIN_I64_DEC = -9223372036854775808; +const long long MIN_I64_HEX = -0x8000000000000000; + +// Big B or big X + +const int BIG_B_BIN = 0B1; +const int BIG_X_HEX = 0XF; + +// Octal with extra leading zero + +const char AGENT = 007; + +// C23 and C++14 thousands'/digit separator ' + +const unsigned long long SEP_BIN = 0b11111111'00000000; +const unsigned long long SEP_OCT = 07777'7777'7777; +const unsigned long long SEP_DEC = 299'792'458; +const unsigned long long SEP_HEX = 0x1111'bbbb'cccc'dddd; + +// Multiple declarations + +const long BIN_1ST = 0b10101010, OCT_2ND = 0777, DEC_3RD = 1234, HEX_4TH = 0xffff; + +// Smaller integer types + +const unsigned short USHORT_HEX = 0xFFFF; +const short SHORT_HEX = 0x7FFF; +const unsigned char UCHAR_HEX = 0xFF; +const char CHAR_HEX = 0x7F; diff --git a/bindgen-tests/tests/headers/fit-macro-constant-types-signed.h b/bindgen-tests/tests/headers/fit-macro-constant-types-signed.h index dba20937df..fbd89365a2 100644 --- a/bindgen-tests/tests/headers/fit-macro-constant-types-signed.h +++ b/bindgen-tests/tests/headers/fit-macro-constant-types-signed.h @@ -1,2 +1,2 @@ -// bindgen-flags: --default-macro-constant-type=signed --fit-macro-constant-types +// bindgen-flags: --default-macro-constant-type=signed --fit-macro-constant-types --keep-integer-radices #include "default-macro-constant-type.h" diff --git a/bindgen-tests/tests/headers/fit-macro-constant-types.h b/bindgen-tests/tests/headers/fit-macro-constant-types.h index b995bfc0d4..9d87c2cd69 100644 --- a/bindgen-tests/tests/headers/fit-macro-constant-types.h +++ b/bindgen-tests/tests/headers/fit-macro-constant-types.h @@ -1,4 +1,4 @@ -// bindgen-flags: --fit-macro-constant-types +// bindgen-flags: --fit-macro-constant-types --keep-integer-radices // Test fitting macro constants into smaller integer types // Negative values are i8, i16, i32 or i64; others are u8, u16, u32 or u64. #include "default-macro-constant-type.h" \ No newline at end of file diff --git a/bindgen-tests/tests/headers/jsval_layout_opaque.hpp b/bindgen-tests/tests/headers/jsval_layout_opaque.hpp index ef13b85b25..380f91072f 100644 --- a/bindgen-tests/tests/headers/jsval_layout_opaque.hpp +++ b/bindgen-tests/tests/headers/jsval_layout_opaque.hpp @@ -1,4 +1,4 @@ -// bindgen-flags: --with-derive-hash --with-derive-partialeq --with-derive-eq --rustified-enum ".*" +// bindgen-flags: --with-derive-hash --with-derive-partialeq --with-derive-eq --rustified-enum ".*" --keep-integer-radices // bindgen-flags: -- -std=c++11 /** diff --git a/bindgen-tests/tests/headers/layout_eth_conf.h b/bindgen-tests/tests/headers/layout_eth_conf.h index 1c821c9769..d0d806cda6 100644 --- a/bindgen-tests/tests/headers/layout_eth_conf.h +++ b/bindgen-tests/tests/headers/layout_eth_conf.h @@ -1,4 +1,4 @@ -// bindgen-flags: --with-derive-hash --with-derive-partialeq --with-derive-eq --rustified-enum ".*" --rust-target 1.40 +// bindgen-flags: --with-derive-hash --with-derive-partialeq --with-derive-eq --rustified-enum ".*" --rust-target 1.40 --keep-integer-radices typedef unsigned char uint8_t; typedef unsigned short uint16_t; typedef unsigned int uint32_t; diff --git a/bindgen-tests/tests/headers/overflowed_enum.hpp b/bindgen-tests/tests/headers/overflowed_enum.hpp index 18d3fe4173..42b25022f7 100644 --- a/bindgen-tests/tests/headers/overflowed_enum.hpp +++ b/bindgen-tests/tests/headers/overflowed_enum.hpp @@ -1,4 +1,4 @@ -// bindgen-flags: --rustified-enum ".*" -- -std=c++11 -Wno-narrowing +// bindgen-flags: --rustified-enum ".*" --keep-integer-radices -- -std=c++11 -Wno-narrowing enum Foo { BAP_ARM = 0x93fcb9, diff --git a/bindgen-tests/tests/headers/prepend-enum-constified-variant.h b/bindgen-tests/tests/headers/prepend-enum-constified-variant.h index e9ba0e732b..6c6bc78a24 100644 --- a/bindgen-tests/tests/headers/prepend-enum-constified-variant.h +++ b/bindgen-tests/tests/headers/prepend-enum-constified-variant.h @@ -1,4 +1,4 @@ -// bindgen-flags: --no-prepend-enum-name --rustified-enum ".*" +// bindgen-flags: --no-prepend-enum-name --rustified-enum ".*" --keep-integer-radices enum AVCodecID { AV_CODEC_ID_FIRST_UNKNOWN = 0x18000, diff --git a/bindgen-tests/tests/headers/short-enums.hpp b/bindgen-tests/tests/headers/short-enums.hpp index 14f833de64..0af05b9718 100644 --- a/bindgen-tests/tests/headers/short-enums.hpp +++ b/bindgen-tests/tests/headers/short-enums.hpp @@ -1,4 +1,4 @@ -// bindgen-flags: --rustified-enum ".*" -- -std=c++11 -fshort-enums +// bindgen-flags: --rustified-enum ".*" --keep-integer-radices -- -std=c++11 -fshort-enums typedef enum { SOME_VALUE = 0x1, diff --git a/bindgen-tests/tests/headers/wrap-static-fns.h b/bindgen-tests/tests/headers/wrap-static-fns.h index a35e713f2b..38c506858e 100644 --- a/bindgen-tests/tests/headers/wrap-static-fns.h +++ b/bindgen-tests/tests/headers/wrap-static-fns.h @@ -1,4 +1,4 @@ -// bindgen-flags: --wrap-static-fns +// bindgen-flags: --wrap-static-fns --keep-integer-radices // bindgen-parse-callbacks: wrap-as-variadic-fn // to avoid polluting the expectation tests we put the stdarg.h behind a conditional diff --git a/bindgen/clang.rs b/bindgen/clang.rs index e52fed0d4a..3ee5f5abf0 100644 --- a/bindgen/clang.rs +++ b/bindgen/clang.rs @@ -5,6 +5,7 @@ #![deny(clippy::missing_docs_in_private_items)] use crate::ir::context::BindgenContext; +use crate::ir::var::LiteralRadix; use clang_sys::*; use std::cmp; @@ -973,6 +974,48 @@ impl Cursor { pub(crate) fn is_inline_namespace(&self) -> bool { unsafe { clang_Cursor_isInlineNamespace(self.x) != 0 } } + + /// Obtain the number base (radix) of an integer literal definition + /// corresponding to the cursor. + /// + /// Returns `None` if unable to infer a base. + pub(crate) fn get_literal_radix(&self) -> Option { + self.tokens().iter().find_map(|token| { + if token.kind == CXToken_Literal { + LiteralRadix::from_integer_literal_token(token.spelling()) + } else { + None + } + }) + } + + /// Obtain the number base (radix) of an integer literal definition + /// corresponding to the cursor, ensuring that the radix is from the literal + /// following a given identifier in the list of tokens. + /// + /// Returns `None` if unable to infer a base. + pub(crate) fn get_literal_radix_of_identifier( + &self, + identifier: &str, + ) -> Option { + self.tokens() + .iter() + .scan(false, |identifier_found, token| { + if token.kind == CXToken_Identifier && + token.spelling() == identifier.as_bytes() + { + *identifier_found = true; + } + Some((*identifier_found, token)) + }) + .find_map(|(identifier_found, token)| { + if identifier_found && token.kind == CXToken_Literal { + LiteralRadix::from_integer_literal_token(token.spelling()) + } else { + None + } + }) + } } /// A struct that owns the tokenizer result from a given cursor. diff --git a/bindgen/codegen/helpers.rs b/bindgen/codegen/helpers.rs index 82172f3488..c1f1668156 100644 --- a/bindgen/codegen/helpers.rs +++ b/bindgen/codegen/helpers.rs @@ -139,6 +139,7 @@ pub(crate) mod ast_ty { use crate::ir::function::FunctionSig; use crate::ir::layout::Layout; use crate::ir::ty::{FloatKind, IntKind}; + use crate::ir::var::LiteralRadix; use crate::RustTarget; use proc_macro2::TokenStream; use std::str::FromStr; @@ -291,16 +292,50 @@ pub(crate) mod ast_ty { } } - pub(crate) fn int_expr(val: i64) -> TokenStream { + fn integer_with_radix( + val: u64, + is_negative: bool, + radix: &LiteralRadix, + ) -> TokenStream { + let sign = if is_negative { "-" } else { "" }; + let val = match radix { + LiteralRadix::Binary => format!("{sign}0b{val:b}"), + LiteralRadix::Octal => format!("{sign}0o{val:o}"), + LiteralRadix::Hexadecimal => format!("{sign}0x{val:x}"), + LiteralRadix::Decimal => format!("{sign}{val}"), + }; + TokenStream::from_str(val.as_str()) + .expect("val was infallibly constructed") + } + + pub(crate) fn int_expr( + val: i64, + radix: Option<&LiteralRadix>, + ) -> TokenStream { // Don't use quote! { #val } because that adds the type suffix. - let val = proc_macro2::Literal::i64_unsuffixed(val); - quote!(#val) + match radix { + None | Some(LiteralRadix::Decimal) => { + let val = proc_macro2::Literal::i64_unsuffixed(val); + quote!(#val) + } + Some(radix) => { + integer_with_radix(val.unsigned_abs(), val.is_negative(), radix) + } + } } - pub(crate) fn uint_expr(val: u64) -> TokenStream { + pub(crate) fn uint_expr( + val: u64, + radix: Option<&LiteralRadix>, + ) -> TokenStream { // Don't use quote! { #val } because that adds the type suffix. - let val = proc_macro2::Literal::u64_unsuffixed(val); - quote!(#val) + match radix { + None | Some(LiteralRadix::Decimal) => { + let val = proc_macro2::Literal::u64_unsuffixed(val); + quote!(#val) + } + Some(radix) => integer_with_radix(val, false, radix), + } } pub(crate) fn cstr_expr(mut string: String) -> TokenStream { @@ -392,4 +427,182 @@ pub(crate) mod ast_ty { }) .collect() } + + #[cfg(test)] + mod test { + use super::*; + + #[test] + fn integer_with_radix_outputs_correct_tokens() { + use super::LiteralRadix as R; + struct Ar { + v: u64, + n: bool, + r: R, + } + let inputs_and_expected_results = &[ + (Ar { v: 0b0, n: false, r: R::Binary }, quote! { 0b0 }), + (Ar { v: 0o0, n: false, r: R::Octal }, quote! { 0o0 }), + (Ar { v: 0, n: false, r: R::Decimal }, quote! { 0 }), + (Ar { v: 0x0, n: false, r: R::Hexadecimal }, quote! { 0x0 }), + + (Ar { v: 0b1, n: false, r: R::Binary }, quote! { 0b1 }), + (Ar { v: 0o1, n: false, r: R::Octal }, quote! { 0o1 }), + (Ar { v: 1, n: false, r: R::Decimal }, quote! { 1 }), + (Ar { v: 0x1, n: false, r: R::Hexadecimal }, quote! { 0x1 }), + + (Ar { v: 0b1, n: true, r: R::Binary }, quote! { -0b1 }), + (Ar { v: 0o1, n: true, r: R::Octal }, quote! { -0o1 }), + (Ar { v: 1, n: true, r: R::Decimal }, quote! { -1 }), + (Ar { v: 0x1, n: true, r: R::Hexadecimal }, quote! { -0x1 }), + + (Ar { v: 0b1000000000000000000000000000000000000000000000000000000000000000, n: false, r: R::Binary }, quote! { 0b1000000000000000000000000000000000000000000000000000000000000000 }), + (Ar { v: 0o1000000000000000000000, n: false, r: R::Octal }, quote! { 0o1000000000000000000000 }), + (Ar { v: 9223372036854775808, n: false, r: R::Decimal }, quote! { 9223372036854775808 }), + (Ar { v: 0x8000000000000000, n: false, r: R::Hexadecimal }, quote! { 0x8000000000000000 }), + + (Ar { v: 0b1000000000000000000000000000000000000000000000000000000000000000, n: true, r: R::Binary }, quote! { -0b1000000000000000000000000000000000000000000000000000000000000000 }), + (Ar { v: 0o1000000000000000000000, n: true, r: R::Octal }, quote! { -0o1000000000000000000000 }), + (Ar { v: 9223372036854775808, n: true, r: R::Decimal }, quote! { -9223372036854775808 }), + (Ar { v: 0x8000000000000000, n: true, r: R::Hexadecimal }, quote! { -0x8000000000000000 }), + + (Ar { v: u64::MAX, n: false, r: R::Binary }, quote! { 0b1111111111111111111111111111111111111111111111111111111111111111 }), + (Ar { v: u64::MAX, n: false, r: R::Octal }, quote! { 0o1777777777777777777777 }), + (Ar { v: u64::MAX, n: false, r: R::Decimal }, quote! { 18446744073709551615 }), + (Ar { v: u64::MAX, n: false, r: R::Hexadecimal }, quote! { 0xffffffffffffffff }), + ]; + for (i, e) in inputs_and_expected_results { + assert_eq!( + integer_with_radix(i.v, i.n, &i.r).to_string(), + e.to_string() + ); + } + } + + #[test] + fn int_expr_outputs_correct_tokens() { + use super::LiteralRadix as R; + let values_and_expected_results = &[ + ( + 0, + ( + quote! { 0b0 }, + quote! { 0o0 }, + quote! { 0 }, + quote! { 0x0 }, + ), + ), + ( + 1, + ( + quote! { 0b1 }, + quote! { 0o1 }, + quote! { 1 }, + quote! { 0x1 }, + ), + ), + ( + -1, + ( + quote! { -0b1 }, + quote! { -0o1 }, + quote! { -1 }, + quote! { -0x1 }, + ), + ), + ( + i64::MIN, + ( + quote! { -0b1000000000000000000000000000000000000000000000000000000000000000 }, + quote! { -0o1000000000000000000000 }, + quote! { -9223372036854775808 }, + quote! { -0x8000000000000000 }, + ), + ), + ( + i64::MAX, + ( + quote! { 0b111111111111111111111111111111111111111111111111111111111111111 }, + quote! { 0o777777777777777777777 }, + quote! { 9223372036854775807 }, + quote! { 0x7fffffffffffffff }, + ), + ), + ]; + + for (val, e) in values_and_expected_results { + assert_eq!( + int_expr(*val, Some(&R::Binary)).to_string(), + e.0.to_string() + ); + assert_eq!( + int_expr(*val, Some(&R::Octal)).to_string(), + e.1.to_string() + ); + assert_eq!(int_expr(*val, None).to_string(), e.2.to_string()); + assert_eq!( + int_expr(*val, Some(&R::Decimal)).to_string(), + e.2.to_string() + ); + assert_eq!( + int_expr(*val, Some(&R::Hexadecimal)).to_string(), + e.3.to_string() + ); + } + } + + #[test] + fn uint_expr_outputs_correct_tokens() { + use super::LiteralRadix as R; + let values_and_expected_results = &[ + ( + 0, + ( + quote! { 0b0 }, + quote! { 0o0 }, + quote! { 0 }, + quote! { 0x0 }, + ), + ), + ( + 1, + ( + quote! { 0b1 }, + quote! { 0o1 }, + quote! { 1 }, + quote! { 0x1 }, + ), + ), + ( + u64::MAX, + ( + quote! { 0b1111111111111111111111111111111111111111111111111111111111111111 }, + quote! { 0o1777777777777777777777 }, + quote! { 18446744073709551615 }, + quote! { 0xffffffffffffffff }, + ), + ), + ]; + + for (val, e) in values_and_expected_results { + assert_eq!( + uint_expr(*val, Some(&R::Binary)).to_string(), + e.0.to_string() + ); + assert_eq!( + uint_expr(*val, Some(&R::Octal)).to_string(), + e.1.to_string() + ); + assert_eq!(uint_expr(*val, None).to_string(), e.2.to_string()); + assert_eq!( + uint_expr(*val, Some(&R::Decimal)).to_string(), + e.2.to_string() + ); + assert_eq!( + uint_expr(*val, Some(&R::Hexadecimal)).to_string(), + e.3.to_string() + ); + } + } + } } diff --git a/bindgen/codegen/mod.rs b/bindgen/codegen/mod.rs index 5425962bac..e969484500 100644 --- a/bindgen/codegen/mod.rs +++ b/bindgen/codegen/mod.rs @@ -691,6 +691,7 @@ impl CodeGenerator for Var { }); } VarType::Int(val) => { + let radix = self.radix(); let int_kind = var_ty .into_resolver() .through_type_aliases() @@ -700,9 +701,9 @@ impl CodeGenerator for Var { .as_integer() .unwrap(); let val = if int_kind.is_signed() { - helpers::ast_ty::int_expr(val) + helpers::ast_ty::int_expr(val, radix) } else { - helpers::ast_ty::uint_expr(val as _) + helpers::ast_ty::uint_expr(val as _, radix) }; result.push(quote! { #(#attrs)* @@ -2430,7 +2431,7 @@ impl CodeGenerator for CompInfo { }; fields.insert(0, align_field); } else { - let explicit = helpers::ast_ty::int_expr(explicit as i64); + let explicit = helpers::ast_ty::int_expr(explicit as i64, None); attributes.push(quote! { #[repr(align(#explicit))] }); @@ -3368,11 +3369,15 @@ impl EnumBuilder { let is_rust_enum = self.is_rust_enum(); let expr = match variant.val() { EnumVariantValue::Boolean(v) if is_rust_enum => { - helpers::ast_ty::uint_expr(u64::from(v)) + helpers::ast_ty::uint_expr(u64::from(v), None) } EnumVariantValue::Boolean(v) => quote!(#v), - EnumVariantValue::Signed(v) => helpers::ast_ty::int_expr(v), - EnumVariantValue::Unsigned(v) => helpers::ast_ty::uint_expr(v), + EnumVariantValue::Signed(v) => { + helpers::ast_ty::int_expr(v, variant.radix()) + } + EnumVariantValue::Unsigned(v) => { + helpers::ast_ty::uint_expr(v, variant.radix()) + } }; match self.kind { diff --git a/bindgen/ir/enum_ty.rs b/bindgen/ir/enum_ty.rs index 9b08da3bce..025f54d6f7 100644 --- a/bindgen/ir/enum_ty.rs +++ b/bindgen/ir/enum_ty.rs @@ -6,6 +6,7 @@ use super::item::Item; use super::ty::{Type, TypeKind}; use crate::clang; use crate::ir::annotations::Annotations; +use crate::ir::var::LiteralRadix; use crate::parse::ParseError; use crate::regex_set::RegexSet; @@ -103,6 +104,11 @@ impl Enum { }; if let Some(val) = value { let name = cursor.spelling(); + let radix = if ctx.options().keep_integer_radices { + cursor.get_literal_radix() + } else { + None + }; let annotations = Annotations::new(&cursor); let custom_behavior = ctx .options() @@ -142,6 +148,7 @@ impl Enum { comment, val, custom_behavior, + radix, )); } } @@ -254,6 +261,9 @@ pub(crate) struct EnumVariant { /// The custom behavior this variant may have, if any. custom_behavior: Option, + + /// The radix of the literal value of the variant. + radix: Option, } /// A constant value assigned to an enumeration variant. @@ -277,6 +287,7 @@ impl EnumVariant { comment: Option, val: EnumVariantValue, custom_behavior: Option, + radix: Option, ) -> Self { EnumVariant { name, @@ -284,6 +295,7 @@ impl EnumVariant { comment, val, custom_behavior, + radix, } } @@ -302,6 +314,11 @@ impl EnumVariant { self.val } + /// Get this variant's radix. + pub(crate) fn radix(&self) -> Option<&LiteralRadix> { + self.radix.as_ref() + } + /// Get this variant's documentation. pub(crate) fn comment(&self) -> Option<&str> { self.comment.as_deref() diff --git a/bindgen/ir/var.rs b/bindgen/ir/var.rs index 45f4ba1ba0..aa054185b5 100644 --- a/bindgen/ir/var.rs +++ b/bindgen/ir/var.rs @@ -30,6 +30,108 @@ pub(crate) enum VarType { String(Vec), } +/// Numeric literal's radix. +#[derive(Debug)] +pub(crate) enum LiteralRadix { + /// Binary (base 2). + Binary, + /// Octal (base 8). + Octal, + /// Decimal (base 10). + Decimal, + /// Hexadecimal (base 16). + Hexadecimal, +} + +/// Possible integer literal suffixes, all cases `[Uu]?(L{0,2}|l{0,2})`, from +/// longest to shortest in number of characters, and the empty case. +#[rustfmt::skip] // hand-formatted for clarity +const INTEGER_SUFFIXES: [&str; 15] = [ + "ULL", "Ull", "uLL", "ull", + "UL", "Ul", "uL", "ul", + "LL", "ll", + "U", "u", + "L", "l", + "", +]; + +impl LiteralRadix { + /// Obtain the number base of a bytestring corresponding to an existing + /// integer literal definition. + /// + /// Returns `None` if unable to infer a base. + pub(crate) fn from_integer_literal_token( + tok: impl AsRef<[u8]>, + ) -> Option { + let tok = tok.as_ref(); + + // Strip integer suffix (e.g. ULL) if exists + let tok = INTEGER_SUFFIXES + .iter() + .find_map(|suffix| tok.strip_suffix::<[u8]>(suffix.as_ref())) + .expect("last suffix is empty bytestring"); + + if tok.is_empty() { + return None; + } else if tok.len() == 1 { + // Single digit numbers from 0 to 9 are designated decimal radix, + // although technically, in C standard, a single 0 is octal + return tok[0].is_ascii_digit().then_some(Self::Decimal); + } + + match tok[0] { + b'0' => match tok[1] { + b'x' | b'X' => { + if tok.len() < 3 { + None // "0x" without actual value + } else { + if tok[2] == b'\'' { + return None; + } + // hexadecimal value: 0-9, a-f, A-F + tok[2..] + .iter() + .all(|chr| chr.is_ascii_hexdigit() || chr == &b'\'') + .then_some(Self::Hexadecimal) + } + } + b'b' | b'B' => { + if tok.len() < 3 { + None // "0b" without actual value + } else { + if tok[2] == b'\'' { + return None; + } + // binary value: zeros and ones + tok[2..] + .iter() + .all(|chr| { + (b'0'..=b'1').contains(chr) || chr == &b'\'' + }) + .then_some(Self::Binary) + } + } + b'0'..=b'7' => { + // octal value: digits 0 to 7 (incl.) + tok[2..] + .iter() + .all(|chr| (b'0'..=b'7').contains(chr) || chr == &b'\'') + .then_some(Self::Octal) + } + _ => None, + }, + b'1'..=b'9' => { + // decimal value: digits 0 to 9 (incl.) + tok[1..] + .iter() + .all(|chr| chr.is_ascii_digit() || chr == &b'\'') + .then_some(Self::Decimal) + } + _ => None, + } + } +} + /// A `Var` is our intermediate representation of a variable. #[derive(Debug)] pub(crate) struct Var { @@ -45,6 +147,8 @@ pub(crate) struct Var { val: Option, /// Whether this variable is const. is_const: bool, + /// The radix of the variable, if integer. + radix: Option, } impl Var { @@ -56,6 +160,7 @@ impl Var { ty: TypeId, val: Option, is_const: bool, + radix: Option, ) -> Var { assert!(!name.is_empty()); Var { @@ -65,9 +170,15 @@ impl Var { ty, val, is_const, + radix, } } + /// The radix of this integer variable, if any. + pub(crate) fn radix(&self) -> Option<&LiteralRadix> { + self.radix.as_ref() + } + /// Is this variable `const` qualified? pub(crate) fn is_const(&self) -> bool { self.is_const @@ -223,11 +334,13 @@ impl ClangSubItemParser for Var { // enforce utf8 there, so we should have already panicked at // this point. let name = String::from_utf8(id).unwrap(); - let (type_kind, val) = match value { + let (type_kind, val, radix) = match value { EvalResult::Invalid => return Err(ParseError::Continue), - EvalResult::Float(f) => { - (TypeKind::Float(FloatKind::Double), VarType::Float(f)) - } + EvalResult::Float(f) => ( + TypeKind::Float(FloatKind::Double), + VarType::Float(f), + None, + ), EvalResult::Char(c) => { let c = match c { CChar::Char(c) => { @@ -237,7 +350,7 @@ impl ClangSubItemParser for Var { CChar::Raw(c) => u8::try_from(c).unwrap(), }; - (TypeKind::Int(IntKind::U8), VarType::Char(c)) + (TypeKind::Int(IntKind::U8), VarType::Char(c), None) } EvalResult::Str(val) => { let char_ty = Item::builtin_type( @@ -248,7 +361,7 @@ impl ClangSubItemParser for Var { for callbacks in &ctx.options().parse_callbacks { callbacks.str_macro(&name, &val); } - (TypeKind::Pointer(char_ty), VarType::String(val)) + (TypeKind::Pointer(char_ty), VarType::String(val), None) } EvalResult::Int(Wrapping(value)) => { let kind = ctx @@ -258,14 +371,20 @@ impl ClangSubItemParser for Var { default_macro_constant_type(ctx, value) }); - (TypeKind::Int(kind), VarType::Int(value)) + let radix = if ctx.options().keep_integer_radices { + cursor.get_literal_radix() + } else { + None + }; + + (TypeKind::Int(kind), VarType::Int(value), radix) } }; let ty = Item::builtin_type(type_kind, true, ctx); Ok(ParseResult::New( - Var::new(name, None, None, ty, Some(val), true), + Var::new(name, None, None, ty, Some(val), true, radix), Some(cursor), )) } @@ -334,39 +453,55 @@ impl ClangSubItemParser for Var { // TODO: Strings, though the lookup is a bit more hard (we need // to look at the canonical type of the pointee too, and check // is char, u8, or i8 I guess). - let value = if is_integer { + let (value, radix) = if is_integer { let TypeKind::Int(kind) = *canonical_ty.unwrap().kind() else { unreachable!() }; let mut val = cursor.evaluate().and_then(|v| v.as_int()); + let radix = if ctx.options().keep_integer_radices { + cursor.get_literal_radix_of_identifier(&name) + } else { + None + }; + if val.is_none() || !kind.signedness_matches(val.unwrap()) { val = get_integer_literal_from_cursor(&cursor); } - val.map(|val| { - if kind == IntKind::Bool { - VarType::Bool(val != 0) - } else { - VarType::Int(val) - } - }) + ( + val.map(|val| { + if kind == IntKind::Bool { + VarType::Bool(val != 0) + } else { + VarType::Int(val) + } + }), + radix, + ) } else if is_float { - cursor - .evaluate() - .and_then(|v| v.as_double()) - .map(VarType::Float) + ( + cursor + .evaluate() + .and_then(|v| v.as_double()) + .map(VarType::Float), + None, + ) } else { - cursor - .evaluate() - .and_then(|v| v.as_literal_string()) - .map(VarType::String) + ( + cursor + .evaluate() + .and_then(|v| v.as_literal_string()) + .map(VarType::String), + None, + ) }; let mangling = cursor_mangling(ctx, &cursor); - let var = - Var::new(name, mangling, link_name, ty, value, is_const); + let var = Var::new( + name, mangling, link_name, ty, value, is_const, radix, + ); Ok(ParseResult::New(var, Some(cursor))) } @@ -521,3 +656,92 @@ fn duplicated_macro_diagnostic( .display(); } } + +#[cfg(test)] +mod test { + use super::*; + + impl PartialEq for LiteralRadix { + fn eq(&self, other: &Self) -> bool { + core::mem::discriminant(self) == core::mem::discriminant(other) + } + } + + #[test] + fn parses_correct_radix_from_valid_raw_token() { + let raw_tok_radix_pairs: &[(&[u8], Option)] = &[ + (b"0", Some(LiteralRadix::Decimal)), + (b"1", Some(LiteralRadix::Decimal)), + (b"18446744073709551615", Some(LiteralRadix::Decimal)), // u64::MAX + (b"9223372036854775808", Some(LiteralRadix::Decimal)), // non-prefixed i64::MIN + (b"0b0", Some(LiteralRadix::Binary)), + (b"0b1", Some(LiteralRadix::Binary)), + (b"0B1", Some(LiteralRadix::Binary)), + ( + b"0b10000000'00000000'00000000'00000000", + Some(LiteralRadix::Binary), + ), + (b"00", Some(LiteralRadix::Octal)), + (b"01", Some(LiteralRadix::Octal)), + (b"0x0", Some(LiteralRadix::Hexadecimal)), + (b"0x1", Some(LiteralRadix::Hexadecimal)), + (b"0X1", Some(LiteralRadix::Hexadecimal)), + (b"0ULL", Some(LiteralRadix::Decimal)), + (b"10UL", Some(LiteralRadix::Decimal)), + (b"100L", Some(LiteralRadix::Decimal)), + (b"0b0ULL", Some(LiteralRadix::Binary)), + (b"0b10UL", Some(LiteralRadix::Binary)), + (b"0b100L", Some(LiteralRadix::Binary)), + (b"00ULL", Some(LiteralRadix::Octal)), + (b"010UL", Some(LiteralRadix::Octal)), + (b"0100L", Some(LiteralRadix::Octal)), + (b"0x0ULL", Some(LiteralRadix::Hexadecimal)), + (b"0x10UL", Some(LiteralRadix::Hexadecimal)), + (b"0x100L", Some(LiteralRadix::Hexadecimal)), + ]; + + for (tok, radix) in raw_tok_radix_pairs { + assert_eq!( + LiteralRadix::from_integer_literal_token(tok), + *radix, + "tok = b\"{}\"", + std::str::from_utf8(tok).unwrap() + ); + } + } + + #[test] + fn parses_correct_radix_from_valid_str() { + assert_eq!( + LiteralRadix::from_integer_literal_token("0xf00d"), + Some(LiteralRadix::Hexadecimal), + "tok = \"0xf00d\"" + ); + } + + #[test] + fn parses_none_radix_from_invalid_raw_token() { + let raw_toks: &[&[u8]] = + &[b"", b"0b", b"0b2", b"0x", b"A", b"f", b"0x'", b"0b'"]; + + for tok in raw_toks { + assert_eq!( + LiteralRadix::from_integer_literal_token(tok), + None, + "tok = b\"{}\"", + std::str::from_utf8(tok).unwrap() + ); + } + } + + #[test] + fn parses_none_radix_from_lone_integer_suffixes() { + for suffix in INTEGER_SUFFIXES { + assert_eq!( + LiteralRadix::from_integer_literal_token(suffix), + None, + "tok = \"{suffix}\"" + ); + } + } +} diff --git a/bindgen/options/cli.rs b/bindgen/options/cli.rs index b60de39603..3f399785f6 100644 --- a/bindgen/options/cli.rs +++ b/bindgen/options/cli.rs @@ -525,6 +525,9 @@ struct BindgenCommand { /// be called. #[arg(long)] generate_private_functions: bool, + /// Whether to retain integer literal radices in generated Rust code. + #[arg(long)] + keep_integer_radices: bool, /// Whether to emit diagnostics or not. #[cfg(feature = "experimental")] #[arg(long, requires = "experimental")] @@ -676,6 +679,7 @@ where generate_deleted_functions, generate_pure_virtual_functions, generate_private_functions, + keep_integer_radices, #[cfg(feature = "experimental")] emit_diagnostics, generate_shell_completions, @@ -971,6 +975,7 @@ where generate_deleted_functions, generate_pure_virtual_functions, generate_private_functions, + keep_integer_radices, } ); diff --git a/bindgen/options/mod.rs b/bindgen/options/mod.rs index b876b4d5b3..80fa432d77 100644 --- a/bindgen/options/mod.rs +++ b/bindgen/options/mod.rs @@ -2283,4 +2283,20 @@ options! { }, as_args: "--generate-private-functions", }, + /// Whether to retain integer literal radices in generated Rust code. + keep_integer_radices: bool { + default: false, + methods: { + /// Set whether to retain number bases of C/C++ integer literals. + /// + /// Integer literals defined as binary `0b…`, octal `0…`, and hexadecimal `0x…` have the + /// equivalent notation in the generated Rust code, i.e., `0b…`, `0o…`, and `0x…` + /// respectively. + pub fn keep_integer_radices(mut self, doit: bool) -> Self { + self.options.keep_integer_radices = doit; + self + } + }, + as_args: "--keep-integer-radices", + }, }