Skip to content

Commit 9a68fa1

Browse files
ngzhianCommit Bot
authored andcommitted
[wasm-simd] Prototype f32x4 and f64x2 pmin and pmax
This patch implements f32x4.pmin, f32x4.pmax, f64x2.pmin, and f64x2.pmax for x64 and interpreter. Pseudo-min and Pseudo-max instructions were proposed in WebAssembly/simd#122. These instructions exactly match std::min and std::max in C++ STL, and thus have different semantics from the existing min and max. The instruction-selector for x64 switches the operands around, because it allows for defining the dst to be same as first (really the second input node), allowing better codegen. For example, b = f32x4.pmin(a, b) directly maps to vminps(b, b, a) or minps(b, a), as long as we can define dst == b, and switching the instruction operands around allows us to do that. Bug: v8:10501 Change-Id: I06f983fc1764caf673e600ac91d9c0ac5166e17e Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2186630 Commit-Queue: Zhi An Ng <zhin@chromium.org> Reviewed-by: Tobias Tebbi <tebbi@chromium.org> Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Cr-Commit-Position: refs/heads/master@{#67688}
1 parent 6adf7e8 commit 9a68fa1

13 files changed

+146
-3
lines changed

src/compiler/backend/instruction-selector.cc

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1884,6 +1884,10 @@ void InstructionSelector::VisitNode(Node* node) {
18841884
return MarkAsSimd128(node), VisitF64x2Qfma(node);
18851885
case IrOpcode::kF64x2Qfms:
18861886
return MarkAsSimd128(node), VisitF64x2Qfms(node);
1887+
case IrOpcode::kF64x2Pmin:
1888+
return MarkAsSimd128(node), VisitF64x2Pmin(node);
1889+
case IrOpcode::kF64x2Pmax:
1890+
return MarkAsSimd128(node), VisitF64x2Pmax(node);
18871891
case IrOpcode::kF32x4Splat:
18881892
return MarkAsSimd128(node), VisitF32x4Splat(node);
18891893
case IrOpcode::kF32x4ExtractLane:
@@ -1930,6 +1934,10 @@ void InstructionSelector::VisitNode(Node* node) {
19301934
return MarkAsSimd128(node), VisitF32x4Qfma(node);
19311935
case IrOpcode::kF32x4Qfms:
19321936
return MarkAsSimd128(node), VisitF32x4Qfms(node);
1937+
case IrOpcode::kF32x4Pmin:
1938+
return MarkAsSimd128(node), VisitF32x4Pmin(node);
1939+
case IrOpcode::kF32x4Pmax:
1940+
return MarkAsSimd128(node), VisitF32x4Pmax(node);
19331941
case IrOpcode::kI64x2Splat:
19341942
return MarkAsSimd128(node), VisitI64x2Splat(node);
19351943
case IrOpcode::kI64x2SplatI32Pair:
@@ -2653,6 +2661,14 @@ void InstructionSelector::VisitI32x4BitMask(Node* node) { UNIMPLEMENTED(); }
26532661
#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32
26542662
// && !V8_TARGET_ARCH_X64
26552663

2664+
// TODO(v8:10501) Prototyping pmin and pmax instructions.
2665+
#if !V8_TARGET_ARCH_X64
2666+
void InstructionSelector::VisitF32x4Pmin(Node* node) { UNIMPLEMENTED(); }
2667+
void InstructionSelector::VisitF32x4Pmax(Node* node) { UNIMPLEMENTED(); }
2668+
void InstructionSelector::VisitF64x2Pmin(Node* node) { UNIMPLEMENTED(); }
2669+
void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); }
2670+
#endif // !V8_TARGET_ARCH_X64
2671+
26562672
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
26572673

26582674
void InstructionSelector::VisitParameter(Node* node) {

src/compiler/backend/x64/code-generator-x64.cc

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2607,6 +2607,30 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
26072607
}
26082608
break;
26092609
}
2610+
case kX64F32x4Pmin: {
2611+
XMMRegister dst = i.OutputSimd128Register();
2612+
DCHECK_EQ(dst, i.InputSimd128Register(0));
2613+
__ Minps(dst, i.InputSimd128Register(1));
2614+
break;
2615+
}
2616+
case kX64F32x4Pmax: {
2617+
XMMRegister dst = i.OutputSimd128Register();
2618+
DCHECK_EQ(dst, i.InputSimd128Register(0));
2619+
__ Maxps(dst, i.InputSimd128Register(1));
2620+
break;
2621+
}
2622+
case kX64F64x2Pmin: {
2623+
XMMRegister dst = i.OutputSimd128Register();
2624+
DCHECK_EQ(dst, i.InputSimd128Register(0));
2625+
__ Minpd(dst, i.InputSimd128Register(1));
2626+
break;
2627+
}
2628+
case kX64F64x2Pmax: {
2629+
XMMRegister dst = i.OutputSimd128Register();
2630+
DCHECK_EQ(dst, i.InputSimd128Register(0));
2631+
__ Maxpd(dst, i.InputSimd128Register(1));
2632+
break;
2633+
}
26102634
case kX64I64x2Splat: {
26112635
XMMRegister dst = i.OutputSimd128Register();
26122636
if (HasRegisterInput(instr, 0)) {

src/compiler/backend/x64/instruction-codes-x64.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,8 @@ namespace compiler {
172172
V(X64F64x2Le) \
173173
V(X64F64x2Qfma) \
174174
V(X64F64x2Qfms) \
175+
V(X64F64x2Pmin) \
176+
V(X64F64x2Pmax) \
175177
V(X64F32x4Splat) \
176178
V(X64F32x4ExtractLane) \
177179
V(X64F32x4ReplaceLane) \
@@ -195,6 +197,8 @@ namespace compiler {
195197
V(X64F32x4Le) \
196198
V(X64F32x4Qfma) \
197199
V(X64F32x4Qfms) \
200+
V(X64F32x4Pmin) \
201+
V(X64F32x4Pmax) \
198202
V(X64I64x2Splat) \
199203
V(X64I64x2ExtractLane) \
200204
V(X64I64x2ReplaceLane) \

src/compiler/backend/x64/instruction-scheduler-x64.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
144144
case kX64F64x2Le:
145145
case kX64F64x2Qfma:
146146
case kX64F64x2Qfms:
147+
case kX64F64x2Pmin:
148+
case kX64F64x2Pmax:
147149
case kX64F32x4Splat:
148150
case kX64F32x4ExtractLane:
149151
case kX64F32x4ReplaceLane:
@@ -167,6 +169,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
167169
case kX64F32x4Le:
168170
case kX64F32x4Qfma:
169171
case kX64F32x4Qfms:
172+
case kX64F32x4Pmin:
173+
case kX64F32x4Pmax:
170174
case kX64I64x2Splat:
171175
case kX64I64x2ExtractLane:
172176
case kX64I64x2ReplaceLane:

src/compiler/backend/x64/instruction-selector-x64.cc

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3380,6 +3380,34 @@ void InstructionSelector::VisitS8x16Swizzle(Node* node) {
33803380
arraysize(temps), temps);
33813381
}
33823382

3383+
namespace {
3384+
void VisitPminOrPmax(InstructionSelector* selector, Node* node,
3385+
ArchOpcode opcode) {
3386+
// Due to the way minps/minpd work, we want the dst to be same as the second
3387+
// input: b = pmin(a, b) directly maps to minps b a.
3388+
X64OperandGenerator g(selector);
3389+
selector->Emit(opcode, g.DefineSameAsFirst(node),
3390+
g.UseRegister(node->InputAt(1)),
3391+
g.UseRegister(node->InputAt(0)));
3392+
}
3393+
} // namespace
3394+
3395+
void InstructionSelector::VisitF32x4Pmin(Node* node) {
3396+
VisitPminOrPmax(this, node, kX64F32x4Pmin);
3397+
}
3398+
3399+
void InstructionSelector::VisitF32x4Pmax(Node* node) {
3400+
VisitPminOrPmax(this, node, kX64F32x4Pmax);
3401+
}
3402+
3403+
void InstructionSelector::VisitF64x2Pmin(Node* node) {
3404+
VisitPminOrPmax(this, node, kX64F64x2Pmin);
3405+
}
3406+
3407+
void InstructionSelector::VisitF64x2Pmax(Node* node) {
3408+
VisitPminOrPmax(this, node, kX64F64x2Pmax);
3409+
}
3410+
33833411
// static
33843412
MachineOperatorBuilder::Flags
33853413
InstructionSelector::SupportedMachineOperatorFlags() {

src/compiler/machine-operator.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,8 @@ ShiftKind ShiftKindOf(Operator const* op) {
337337
V(F64x2Le, Operator::kNoProperties, 2, 0, 1) \
338338
V(F64x2Qfma, Operator::kNoProperties, 3, 0, 1) \
339339
V(F64x2Qfms, Operator::kNoProperties, 3, 0, 1) \
340+
V(F64x2Pmin, Operator::kNoProperties, 2, 0, 1) \
341+
V(F64x2Pmax, Operator::kNoProperties, 2, 0, 1) \
340342
V(F32x4Splat, Operator::kNoProperties, 1, 0, 1) \
341343
V(F32x4SConvertI32x4, Operator::kNoProperties, 1, 0, 1) \
342344
V(F32x4UConvertI32x4, Operator::kNoProperties, 1, 0, 1) \
@@ -358,6 +360,8 @@ ShiftKind ShiftKindOf(Operator const* op) {
358360
V(F32x4Le, Operator::kNoProperties, 2, 0, 1) \
359361
V(F32x4Qfma, Operator::kNoProperties, 3, 0, 1) \
360362
V(F32x4Qfms, Operator::kNoProperties, 3, 0, 1) \
363+
V(F32x4Pmin, Operator::kNoProperties, 2, 0, 1) \
364+
V(F32x4Pmax, Operator::kNoProperties, 2, 0, 1) \
361365
V(I64x2Splat, Operator::kNoProperties, 1, 0, 1) \
362366
V(I64x2SplatI32Pair, Operator::kNoProperties, 2, 0, 1) \
363367
V(I64x2Neg, Operator::kNoProperties, 1, 0, 1) \

src/compiler/machine-operator.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
574574
const Operator* F64x2Le();
575575
const Operator* F64x2Qfma();
576576
const Operator* F64x2Qfms();
577+
const Operator* F64x2Pmin();
578+
const Operator* F64x2Pmax();
577579

578580
const Operator* F32x4Splat();
579581
const Operator* F32x4ExtractLane(int32_t);
@@ -598,6 +600,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
598600
const Operator* F32x4Le();
599601
const Operator* F32x4Qfma();
600602
const Operator* F32x4Qfms();
603+
const Operator* F32x4Pmin();
604+
const Operator* F32x4Pmax();
601605

602606
const Operator* I64x2Splat();
603607
const Operator* I64x2SplatI32Pair();

src/compiler/opcodes.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,8 @@
763763
V(F64x2Le) \
764764
V(F64x2Qfma) \
765765
V(F64x2Qfms) \
766+
V(F64x2Pmin) \
767+
V(F64x2Pmax) \
766768
V(F32x4Splat) \
767769
V(F32x4ExtractLane) \
768770
V(F32x4ReplaceLane) \
@@ -788,6 +790,8 @@
788790
V(F32x4Ge) \
789791
V(F32x4Qfma) \
790792
V(F32x4Qfms) \
793+
V(F32x4Pmin) \
794+
V(F32x4Pmax) \
791795
V(I64x2Splat) \
792796
V(I64x2SplatI32Pair) \
793797
V(I64x2ExtractLane) \

src/compiler/wasm-compiler.cc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4128,6 +4128,12 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
41284128
case wasm::kExprF64x2Qfms:
41294129
return graph()->NewNode(mcgraph()->machine()->F64x2Qfms(), inputs[0],
41304130
inputs[1], inputs[2]);
4131+
case wasm::kExprF64x2Pmin:
4132+
return graph()->NewNode(mcgraph()->machine()->F64x2Pmin(), inputs[0],
4133+
inputs[1]);
4134+
case wasm::kExprF64x2Pmax:
4135+
return graph()->NewNode(mcgraph()->machine()->F64x2Pmax(), inputs[0],
4136+
inputs[1]);
41314137
case wasm::kExprF32x4Splat:
41324138
return graph()->NewNode(mcgraph()->machine()->F32x4Splat(), inputs[0]);
41334139
case wasm::kExprF32x4SConvertI32x4:
@@ -4193,6 +4199,12 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
41934199
case wasm::kExprF32x4Qfms:
41944200
return graph()->NewNode(mcgraph()->machine()->F32x4Qfms(), inputs[0],
41954201
inputs[1], inputs[2]);
4202+
case wasm::kExprF32x4Pmin:
4203+
return graph()->NewNode(mcgraph()->machine()->F32x4Pmin(), inputs[0],
4204+
inputs[1]);
4205+
case wasm::kExprF32x4Pmax:
4206+
return graph()->NewNode(mcgraph()->machine()->F32x4Pmax(), inputs[0],
4207+
inputs[1]);
41964208
case wasm::kExprI64x2Splat:
41974209
return graph()->NewNode(mcgraph()->machine()->I64x2Splat(), inputs[0]);
41984210
case wasm::kExprI64x2Neg:

src/wasm/wasm-interpreter.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2299,12 +2299,16 @@ class ThreadImpl {
22992299
BINOP_CASE(F64x2Div, f64x2, float2, 2, base::Divide(a, b))
23002300
BINOP_CASE(F64x2Min, f64x2, float2, 2, JSMin(a, b))
23012301
BINOP_CASE(F64x2Max, f64x2, float2, 2, JSMax(a, b))
2302+
BINOP_CASE(F64x2Pmin, f64x2, float2, 2, std::min(a, b))
2303+
BINOP_CASE(F64x2Pmax, f64x2, float2, 2, std::max(a, b))
23022304
BINOP_CASE(F32x4Add, f32x4, float4, 4, a + b)
23032305
BINOP_CASE(F32x4Sub, f32x4, float4, 4, a - b)
23042306
BINOP_CASE(F32x4Mul, f32x4, float4, 4, a * b)
23052307
BINOP_CASE(F32x4Div, f32x4, float4, 4, a / b)
23062308
BINOP_CASE(F32x4Min, f32x4, float4, 4, JSMin(a, b))
23072309
BINOP_CASE(F32x4Max, f32x4, float4, 4, JSMax(a, b))
2310+
BINOP_CASE(F32x4Pmin, f32x4, float4, 4, std::min(a, b))
2311+
BINOP_CASE(F32x4Pmax, f32x4, float4, 4, std::max(a, b))
23082312
BINOP_CASE(I64x2Add, i64x2, int2, 2, base::AddWithWraparound(a, b))
23092313
BINOP_CASE(I64x2Sub, i64x2, int2, 2, base::SubWithWraparound(a, b))
23102314
BINOP_CASE(I64x2Mul, i64x2, int2, 2, base::MulWithWraparound(a, b))

0 commit comments

Comments
 (0)