11/*
2- * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
2+ * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
33 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44 *
55 * This code is free software; you can redistribute it and/or modify it
2424 */
2525package jdk .graal .compiler .lir .amd64 ;
2626
27+ import static jdk .graal .compiler .lir .amd64 .AMD64LIRHelper .pointerConstant ;
28+ import static jdk .graal .compiler .lir .amd64 .AMD64LIRHelper .recordExternalAddress ;
2729import static jdk .vm .ci .amd64 .AMD64 .xmm0 ;
2830import static jdk .vm .ci .amd64 .AMD64 .xmm1 ;
31+ import static jdk .vm .ci .amd64 .AMD64 .xmm10 ;
32+ import static jdk .vm .ci .amd64 .AMD64 .xmm11 ;
33+ import static jdk .vm .ci .amd64 .AMD64 .xmm12 ;
34+ import static jdk .vm .ci .amd64 .AMD64 .xmm13 ;
35+ import static jdk .vm .ci .amd64 .AMD64 .xmm14 ;
36+ import static jdk .vm .ci .amd64 .AMD64 .xmm15 ;
2937import static jdk .vm .ci .amd64 .AMD64 .xmm2 ;
3038import static jdk .vm .ci .amd64 .AMD64 .xmm3 ;
3139import static jdk .vm .ci .amd64 .AMD64 .xmm4 ;
3543import static jdk .vm .ci .amd64 .AMD64 .xmm8 ;
3644import static jdk .vm .ci .amd64 .AMD64 .xmm9 ;
3745import static jdk .vm .ci .code .ValueUtil .asRegister ;
38- import static jdk .graal .compiler .lir .amd64 .AMD64LIRHelper .pointerConstant ;
39- import static jdk .graal .compiler .lir .amd64 .AMD64LIRHelper .recordExternalAddress ;
4046
4147import jdk .graal .compiler .asm .Label ;
4248import jdk .graal .compiler .asm .amd64 .AMD64Address ;
4349import jdk .graal .compiler .asm .amd64 .AMD64Assembler .ConditionFlag ;
4450import jdk .graal .compiler .asm .amd64 .AMD64MacroAssembler ;
51+ import jdk .graal .compiler .core .amd64 .AMD64LIRGenerator ;
4552import jdk .graal .compiler .debug .GraalError ;
4653import jdk .graal .compiler .lir .LIRInstructionClass ;
4754import jdk .graal .compiler .lir .SyncPort ;
4855import jdk .graal .compiler .lir .asm .ArrayDataPointerConstant ;
4956import jdk .graal .compiler .lir .asm .CompilationResultBuilder ;
50- import jdk .graal .compiler .lir .gen .LIRGeneratorTool ;
51-
57+ import jdk .vm .ci .amd64 .AMD64 .CPUFeature ;
5258import jdk .vm .ci .amd64 .AMD64Kind ;
5359import jdk .vm .ci .code .Register ;
5460import jdk .vm .ci .meta .AllocatableValue ;
@@ -76,11 +82,11 @@ public final class AMD64SHA1Op extends AMD64LIRInstruction {
7682 @ Temp ({OperandFlag .REG }) private Value [] temps ;
7783 private final boolean multiBlock ;
7884
79- public AMD64SHA1Op (LIRGeneratorTool tool , AllocatableValue bufValue , AllocatableValue stateValue ) {
85+ public AMD64SHA1Op (AMD64LIRGenerator tool , AllocatableValue bufValue , AllocatableValue stateValue ) {
8086 this (tool , bufValue , stateValue , Value .ILLEGAL , Value .ILLEGAL , Value .ILLEGAL , false );
8187 }
8288
83- public AMD64SHA1Op (LIRGeneratorTool tool , AllocatableValue bufValue , AllocatableValue stateValue , AllocatableValue ofsValue ,
89+ public AMD64SHA1Op (AMD64LIRGenerator tool , AllocatableValue bufValue , AllocatableValue stateValue , AllocatableValue ofsValue ,
8490 AllocatableValue limitValue , AllocatableValue resultValue , boolean multiBlock ) {
8591 super (TYPE );
8692
@@ -92,18 +98,40 @@ public AMD64SHA1Op(LIRGeneratorTool tool, AllocatableValue bufValue, Allocatable
9298
9399 this .multiBlock = multiBlock ;
94100
95- this .temps = new Value []{
96- xmm0 .asValue (),
97- xmm1 .asValue (),
98- xmm2 .asValue (),
99- xmm3 .asValue (),
100- xmm4 .asValue (),
101- xmm5 .asValue (),
102- xmm6 .asValue (),
103- xmm7 .asValue (),
104- xmm8 .asValue (),
105- xmm9 .asValue (),
106- };
101+ if (tool .supportsCPUFeature (CPUFeature .AVX )) {
102+ // vzeroupper clears upper bits of xmm0-xmm15
103+ this .temps = new Value []{
104+ xmm0 .asValue (),
105+ xmm1 .asValue (),
106+ xmm2 .asValue (),
107+ xmm3 .asValue (),
108+ xmm4 .asValue (),
109+ xmm5 .asValue (),
110+ xmm6 .asValue (),
111+ xmm7 .asValue (),
112+ xmm8 .asValue (),
113+ xmm9 .asValue (),
114+ xmm10 .asValue (),
115+ xmm11 .asValue (),
116+ xmm12 .asValue (),
117+ xmm13 .asValue (),
118+ xmm14 .asValue (),
119+ xmm15 .asValue (),
120+ };
121+ } else {
122+ this .temps = new Value []{
123+ xmm0 .asValue (),
124+ xmm1 .asValue (),
125+ xmm2 .asValue (),
126+ xmm3 .asValue (),
127+ xmm4 .asValue (),
128+ xmm5 .asValue (),
129+ xmm6 .asValue (),
130+ xmm7 .asValue (),
131+ xmm8 .asValue (),
132+ xmm9 .asValue (),
133+ };
134+ }
107135
108136 if (multiBlock ) {
109137 this .bufTempValue = tool .newVariable (bufValue .getValueKind ());
@@ -168,6 +196,12 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
168196 Label labelDoneHash = new Label ();
169197 Label labelLoop0 = new Label ();
170198
199+ if (masm .supports (CPUFeature .AVX )) {
200+ // Insert vzeroupper here to avoid performance penalty of SSE-AVX transition between
201+ // previously executed AVX instructions and the following SHA-1 instructions.
202+ masm .vzeroupper ();
203+ }
204+
171205 masm .movdqu (abcd , new AMD64Address (state , 0 ));
172206 masm .pinsrd (e0 , new AMD64Address (state , 16 ), 3 );
173207 masm .movdqu (shufMask , recordExternalAddress (crb , upperWordMask ));
0 commit comments