|
8 | 8 | using namespace zoo; |
9 | 9 | using namespace zoo::swar; |
10 | 10 |
|
| 11 | +using S4_64 = SWAR<4, uint64_t>; |
| 12 | +using S4_32 = SWAR<4, uint32_t>; |
| 13 | +using S4_16 = SWAR<4, uint16_t>; |
| 14 | +using S4_8 = SWAR<4, uint8_t>; |
| 15 | + |
| 16 | +using S8_64 = SWAR<8, uint64_t>; |
| 17 | +using S8_32 = SWAR<8, uint32_t>; |
| 18 | +using S8_16 = SWAR<8, uint16_t>; |
| 19 | +using S8_8 = SWAR<8, uint8_t>; |
| 20 | + |
| 21 | +using S16_64 = SWAR<16, uint64_t>; |
| 22 | +using S16_32 = SWAR<16, uint32_t>; |
| 23 | +using S16_16 = SWAR<16, uint16_t>; |
| 24 | + |
11 | 25 | namespace Multiplication { |
12 | 26 |
|
13 | 27 | using S4_64 = SWAR<4, uint64_t>; |
@@ -85,19 +99,19 @@ TEST_CASE( |
85 | 99 | "[swar]" |
86 | 100 | ) { |
87 | 101 | for (auto i = 0; i < 63; ++i) { |
88 | | - CHECK(i == isolate<8>(i)); |
89 | | - CHECK(i == isolate<8>(0xFF00+i)); |
90 | | - CHECK(i == isolate<8>(0xFFFF00+i)); |
| 102 | + CHECK(i == isolate<8>(i)); |
| 103 | + CHECK(i == isolate<8>(0xFF00+i)); |
| 104 | + CHECK(i == isolate<8>(0xFFFF00+i)); |
91 | 105 | } |
92 | 106 | for (auto i = 0; i < 31; ++i) { |
93 | | - CHECK(i == isolate<7>(i)); |
94 | | - CHECK(i == isolate<7>(0xFF00+i)); |
95 | | - CHECK(i == isolate<7>(0xFFFF00+i)); |
| 107 | + CHECK(i == isolate<7>(i)); |
| 108 | + CHECK(i == isolate<7>(0xFF00+i)); |
| 109 | + CHECK(i == isolate<7>(0xFFFF00+i)); |
96 | 110 | } |
97 | 111 | for (auto i = 0; i < 31; ++i) { |
98 | | - CHECK(i == isolate<11>(i)); |
99 | | - CHECK(i == isolate<11>(0xF800+i)); |
100 | | - CHECK(i == isolate<11>(0xFFF800+i)); |
| 112 | + CHECK(i == isolate<11>(i)); |
| 113 | + CHECK(i == isolate<11>(0xF800+i)); |
| 114 | + CHECK(i == isolate<11>(0xFFF800+i)); |
101 | 115 | } |
102 | 116 | } |
103 | 117 |
|
@@ -282,3 +296,73 @@ constexpr auto aBooleansWithTrue = booleans(SWAR<4, u32>{0x1}); |
282 | 296 | static_assert(aBooleansWithTrue); |
283 | 297 | static_assert(!aBooleansWithTrue); // this is a pitfall, but lesser evil? |
284 | 298 | static_assert(false == !bool(aBooleansWithTrue)); |
| 299 | + |
| 300 | +TEST_CASE( |
| 301 | + "fullAddition", |
| 302 | + "[swar][signed-swar][unsigned-swar]" |
| 303 | +) { |
| 304 | + SECTION("fullAddition overflow") { |
| 305 | + const auto sum = fullAddition(SWAR<4, u32>(0x0000'1000), SWAR<4, u32>(0x0000'7000)); |
| 306 | + CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.carry.value()); |
| 307 | + CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.overflow.value()); |
| 308 | + CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.result.value()); |
| 309 | + } |
| 310 | + SECTION("no carry or overflow for safe values") { |
| 311 | + const auto sum = fullAddition(SWAR<4, u32>(0x0000'8000), SWAR<4, u32>(0x0000'7000)); |
| 312 | + CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.carry.value()); |
| 313 | + CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.overflow.value()); |
| 314 | + CHECK(SWAR<4, u32>(0x0000'F000).value() == sum.result.value()); |
| 315 | + } |
| 316 | + SECTION("fullAddition signed overflow") { |
| 317 | + const auto sum = fullAddition(SWAR<4, u32>(0x0000'5000), SWAR<4, u32>(0x0000'5000)); |
| 318 | + CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.carry.value()); |
| 319 | + CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.overflow.value()); |
| 320 | + CHECK(SWAR<4, u32>(0x0000'A000).value() == sum.result.value()); |
| 321 | + } |
| 322 | + SECTION("0x0111 (7) + 0x0111 (7) is 0x1110 (0x1110->0x1101->0x0010) (0xe unsigned, 0x2 signed) (signed and unsigned check)") { |
| 323 | + const auto sum = fullAddition(SWAR<4, u32>(0x0000'7000), SWAR<4, u32>(0x0000'7000)); |
| 324 | + CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.carry.value()); |
| 325 | + CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.overflow.value()); |
| 326 | + CHECK(SWAR<4, u32>(0x0000'e000).value() == sum.result.value()); |
| 327 | + } |
| 328 | + SECTION("both carry and overflow") { |
| 329 | + const auto sum = fullAddition(SWAR<4, u32>(0x0000'a000), SWAR<4, u32>(0x0000'a000)); |
| 330 | + CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.carry.value()); |
| 331 | + CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.overflow.value()); |
| 332 | + } |
| 333 | +} |
| 334 | + |
| 335 | +TEST_CASE( |
| 336 | + "BooleanSWAR MSBtoLaneMask", |
| 337 | + "[swar]" |
| 338 | +) { |
| 339 | + // BooleanSWAR as a mask: |
| 340 | + auto bswar =BooleanSWAR<4, u32>(0x0808'0000); |
| 341 | + auto mask = S4_32(0x0F0F'0000); |
| 342 | + CHECK(bswar.MSBtoLaneMask().value() == mask.value()); |
| 343 | +} |
| 344 | + |
| 345 | +constexpr auto fullAddSumTest = fullAddition(S4_32(0x0111'1101), S4_32(0x1000'0010)); |
| 346 | +static_assert( S4_32(0x1111'1111).value() == fullAddSumTest.result.value()); |
| 347 | +static_assert( S4_32(0x0000'0000).value() == fullAddSumTest.carry.value()); |
| 348 | +static_assert( S4_32(0x0000'0000).value() == fullAddSumTest.overflow.value()); |
| 349 | + |
| 350 | +// Verify that saturation works (saturates and doesn't saturate as appropriate) |
| 351 | +static_assert( S4_16(0x0000).value() == saturatingUnsignedAddition(S4_16(0x0000), S4_16(0x0000)).value()); |
| 352 | +static_assert( S4_16(0x0200).value() == saturatingUnsignedAddition(S4_16(0x0100), S4_16(0x0100)).value()); |
| 353 | +static_assert( S4_16(0x0400).value() == saturatingUnsignedAddition(S4_16(0x0300), S4_16(0x0100)).value()); |
| 354 | +static_assert( S4_16(0x0A00).value() == saturatingUnsignedAddition(S4_16(0x0300), S4_16(0x0700)).value()); |
| 355 | +static_assert( S4_16(0x0F00).value() == saturatingUnsignedAddition(S4_16(0x0800), S4_16(0x0700)).value()); |
| 356 | +static_assert( S4_16(0x0F00).value() == saturatingUnsignedAddition(S4_16(0x0800), S4_16(0x0800)).value()); |
| 357 | + |
| 358 | +TEST_CASE( |
| 359 | + "saturatingUnsignedAddition", |
| 360 | + "[swar][saturation]" |
| 361 | +) { |
| 362 | + CHECK(SWAR<4, u16>(0x0200).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0100)).value()); |
| 363 | + CHECK(SWAR<4, u16>(0x0400).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0300)).value()); |
| 364 | + CHECK(SWAR<4, u16>(0x0B00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0300)).value()); |
| 365 | + CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0700)).value()); |
| 366 | + CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value()); |
| 367 | + CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value()); |
| 368 | +} |
0 commit comments