|
| 1 | += Unambiguous types |
| 2 | + |
| 3 | +Most of these mappings are obvious, but there are some nuances and gotchas with |
| 4 | +Rust FFI (Foreign Function Interface). |
| 5 | + |
| 6 | +This document defines clear, one-to-one mappings between primitive types in C, |
| 7 | +Rust (and possible other languages in the future). Its purpose is to eliminate |
| 8 | +ambiguity in type widths, signedness, and binary representation across |
| 9 | +platforms and languages. |
| 10 | + |
| 11 | +For Git, the only header required to use these unambiguous types in C is |
| 12 | +`git-compat-util.h`. |
| 13 | + |
| 14 | +== Boolean types |
| 15 | +[cols="1,1", options="header"] |
| 16 | +|=== |
| 17 | +| C Type | Rust Type |
| 18 | +| bool^1^ | bool |
| 19 | +|=== |
| 20 | + |
| 21 | +== Integer types |
| 22 | + |
| 23 | +In C, `<stdint.h>` (or an equivalent) must be included. |
| 24 | + |
| 25 | +[cols="1,1", options="header"] |
| 26 | +|=== |
| 27 | +| C Type | Rust Type |
| 28 | +| uint8_t | u8 |
| 29 | +| uint16_t | u16 |
| 30 | +| uint32_t | u32 |
| 31 | +| uint64_t | u64 |
| 32 | + |
| 33 | +| int8_t | i8 |
| 34 | +| int16_t | i16 |
| 35 | +| int32_t | i32 |
| 36 | +| int64_t | i64 |
| 37 | +|=== |
| 38 | + |
| 39 | +== Floating-point types |
| 40 | + |
| 41 | +Rust requires IEEE-754 semantics. |
| 42 | +In C, that is typically true, but not guaranteed by the standard. |
| 43 | + |
| 44 | +[cols="1,1", options="header"] |
| 45 | +|=== |
| 46 | +| C Type | Rust Type |
| 47 | +| float^2^ | f32 |
| 48 | +| double^2^ | f64 |
| 49 | +|=== |
| 50 | + |
| 51 | +== Size types |
| 52 | + |
| 53 | +These types represent pointer-sized integers and are typically defined in |
| 54 | +`<stddef.h>` or an equivalent header. |
| 55 | + |
| 56 | +Size types should be used any time pointer arithmetic is performed e.g. |
| 57 | +indexing an array, describing the number of elements in memory, etc... |
| 58 | + |
| 59 | +[cols="1,1", options="header"] |
| 60 | +|=== |
| 61 | +| C Type | Rust Type |
| 62 | +| size_t^3^ | usize |
| 63 | +| ptrdiff_t^4^ | isize |
| 64 | +|=== |
| 65 | + |
| 66 | +== Character types |
| 67 | + |
| 68 | +This is where C and Rust don't have a clean one-to-one mapping. A C `char` is |
| 69 | +an 8-bit type that is signless (neither signed nor unsigned) which causes |
| 70 | +problems with e.g. `make DEVELOPER=1`. Rust's `char` type is an unsigned 32-bit |
| 71 | +integer that is used to describe Unicode code points. Even though a C `char` |
| 72 | +is the same width as `u8`, `char` should be converted to u8 where it is |
| 73 | +describing bytes in memory. If a C `char` is not describing bytes, then it |
| 74 | +should be converted to a more accurate unambiguous type. |
| 75 | + |
| 76 | +While you could specify `char` in the C code and `u8` in Rust code, it's not as |
| 77 | +clear what the appropriate type is, but it would work across the FFI boundary. |
| 78 | +However the bigger problem comes from code generation tools like cbindgen and |
| 79 | +bindgen. When cbindgen see u8 in Rust it will generate uint8_t on the C side |
| 80 | +which will cause differ in signedness warnings/errors. Similaraly if bindgen |
| 81 | +see `char` on the C side it will generate `std::ffi::c_char` which has its own |
| 82 | +problems. |
| 83 | + |
| 84 | +=== Notes |
| 85 | +^1^ This is only true if stdbool.h (or equivalent) is used. + |
| 86 | +^2^ C does not enforce IEEE-754 compatibility, but Rust expects it. If the |
| 87 | +platform/arch for C does not follow IEEE-754 then this equivalence does not |
| 88 | +hold. Also, it's assumed that `float` is 32 bits and `double` is 64, but |
| 89 | +there may be a strange platform/arch where even this isn't true. + |
| 90 | +^3^ C also defines uintptr_t, but this should not be used in Git. + |
| 91 | +^4^ C also defines ssize_t and intptr_t, but these should not be used in Git. + |
| 92 | + |
| 93 | +== Problems with std::ffi::c_* types in Rust |
| 94 | +TL;DR: They're not guaranteed to match C types for all possible C |
| 95 | +compilers/platforms/architectures. |
| 96 | + |
| 97 | +Only a few of Rust's C FFI types are considered safe and semantically clear to |
| 98 | +use: + |
| 99 | + |
| 100 | +* `c_void` |
| 101 | +* `CStr` |
| 102 | +* `CString` |
| 103 | + |
| 104 | +Even then, they should be used sparingly, and only where the semantics match |
| 105 | +exactly. |
| 106 | + |
| 107 | +The std::os::raw::c_* (which is deprecated) directly inherits the problems of |
| 108 | +core::ffi, which changes over time and seems to make a best guess at the |
| 109 | +correct definition for a given platform/target. This probably isn't a problem |
| 110 | +for all platforms that Rust supports currently, but can anyone say that Rust |
| 111 | +got it right for all C compilers of all platforms/targets? |
| 112 | + |
| 113 | +On top of all of that we're targeting an older version of Rust which doesn't |
| 114 | +have the latest mappings. |
| 115 | + |
| 116 | +To give an example: c_long is defined in |
| 117 | +footnote:[https://doc.rust-lang.org/1.63.0/src/core/ffi/mod.rs.html#175-189[c_long in 1.63.0]] |
| 118 | +footnote:[https://doc.rust-lang.org/1.89.0/src/core/ffi/primitives.rs.html#135-151[c_long in 1.89.0]] |
| 119 | + |
| 120 | +=== Rust version 1.63.0 |
| 121 | + |
| 122 | +[source] |
| 123 | +---- |
| 124 | +mod c_long_definition { |
| 125 | + cfg_if! { |
| 126 | + if #[cfg(all(target_pointer_width = "64", not(windows)))] { |
| 127 | + pub type c_long = i64; |
| 128 | + pub type NonZero_c_long = crate::num::NonZeroI64; |
| 129 | + pub type c_ulong = u64; |
| 130 | + pub type NonZero_c_ulong = crate::num::NonZeroU64; |
| 131 | + } else { |
| 132 | + // The minimal size of `long` in the C standard is 32 bits |
| 133 | + pub type c_long = i32; |
| 134 | + pub type NonZero_c_long = crate::num::NonZeroI32; |
| 135 | + pub type c_ulong = u32; |
| 136 | + pub type NonZero_c_ulong = crate::num::NonZeroU32; |
| 137 | + } |
| 138 | + } |
| 139 | +} |
| 140 | +---- |
| 141 | + |
| 142 | +=== Rust version 1.89.0 |
| 143 | + |
| 144 | +[source] |
| 145 | +---- |
| 146 | +mod c_long_definition { |
| 147 | + crate::cfg_select! { |
| 148 | + any( |
| 149 | + all(target_pointer_width = "64", not(windows)), |
| 150 | + // wasm32 Linux ABI uses 64-bit long |
| 151 | + all(target_arch = "wasm32", target_os = "linux") |
| 152 | + ) => { |
| 153 | + pub(super) type c_long = i64; |
| 154 | + pub(super) type c_ulong = u64; |
| 155 | + } |
| 156 | + _ => { |
| 157 | + // The minimal size of `long` in the C standard is 32 bits |
| 158 | + pub(super) type c_long = i32; |
| 159 | + pub(super) type c_ulong = u32; |
| 160 | + } |
| 161 | + } |
| 162 | +} |
| 163 | +---- |
| 164 | + |
| 165 | +Even for the cases where C types are correctly mapped to Rust types via |
| 166 | +std::ffi::c_* there are still problems. Let's take c_char for example. On some |
| 167 | +platforms it's u8 on others it's i8. |
| 168 | + |
| 169 | +=== Subtraction underflow in debug mode |
| 170 | + |
| 171 | +The following code will panic in debug on platforms that define c_char as u8, |
| 172 | +but won't if it's an i8. |
| 173 | + |
| 174 | +[source] |
| 175 | +---- |
| 176 | +let mut x: std::ffi::c_char = 0; |
| 177 | +x -= 1; |
| 178 | +---- |
| 179 | + |
| 180 | +=== Inconsistent shift behavior |
| 181 | + |
| 182 | +`x` will be 0xC0 for platforms that use i8, but will be 0x40 where it's u8. |
| 183 | + |
| 184 | +[source] |
| 185 | +---- |
| 186 | +let mut x: std::ffi::c_char = 0x80; |
| 187 | +x >>= 1; |
| 188 | +---- |
| 189 | + |
| 190 | +=== Equality fails to compile on some platforms |
| 191 | + |
| 192 | +The following will not compile on platforms that define c_char as i8, but will |
| 193 | +if it's u8. You can cast x e.g. `assert_eq!(x as u8, b'a');`, but then you get |
| 194 | +a warning on platforms that use u8 and a clean compilation where i8 is used. |
| 195 | + |
| 196 | +[source] |
| 197 | +---- |
| 198 | +let mut x: std::ffi::c_char = 0x61; |
| 199 | +assert_eq!(x, b'a'); |
| 200 | +---- |
| 201 | + |
| 202 | +== Enum types |
| 203 | +Rust enum types should not be used as FFI types. Rust enum types are more like |
| 204 | +C union types than C enum's. For something like: |
| 205 | + |
| 206 | +[source] |
| 207 | +---- |
| 208 | +#[repr(C, u8)] |
| 209 | +enum Fruit { |
| 210 | + Apple, |
| 211 | + Banana, |
| 212 | + Cherry, |
| 213 | +} |
| 214 | +---- |
| 215 | + |
| 216 | +It's easy enough to make sure the Rust enum matches what C would expect, but a |
| 217 | +more complex type like. |
| 218 | + |
| 219 | +[source] |
| 220 | +---- |
| 221 | +enum HashResult { |
| 222 | + SHA1([u8; 20]), |
| 223 | + SHA256([u8; 32]), |
| 224 | +} |
| 225 | +---- |
| 226 | + |
| 227 | +The Rust compiler has to add a discriminant to the enum to distinguish between |
| 228 | +the variants. The width, location, and values for that discriminant is up to |
| 229 | +the Rust compiler and is not ABI stable. |
0 commit comments