|
79 | 79 | // The magic number m for c is ⌈2^k/c⌉, so we can use |
80 | 80 | // (m+1)/2 = ⌈2^k/(c/2)⌉ instead. |
81 | 81 | // |
82 | | -// 8. An unsigned divide on systems with an avg instruction. |
| 82 | +// 8. A general unsigned divide using an avg instruction. |
83 | 83 | // We noted above that (x*((1<<N)+m))>>N>>s = ((x*m)>>N+x)>>s. |
84 | 84 | // Let hi = (x*m)>>N, so we want (hi+x) >> s = avg(hi, x) >> (s-1). |
85 | | -// |
86 | | -// 9. Unsigned 64-bit divide by 16-bit constant on 32-bit systems. |
87 | | -// Use long division with 16-bit digits. |
88 | | -// |
89 | | -// Note: All systems have Hmul and Avg except for wasm, and the |
90 | | -// wasm JITs may well apply all these optimizations already anyway, |
91 | | -// so it may be worth looking into avoiding this pass entirely on wasm |
92 | | -// and dropping all the useAvg useHmul uncertainty. |
93 | 85 |
|
94 | 86 | // Case 1. Signed divides where 2N ≤ register size. |
95 | 87 | (Div8 <t> x (Const8 [c])) && smagicOK8(c) => |
|
112 | 104 | (Rsh64x64 <t> (SignExt32to64 x) (Const64 <typ.UInt64> [63]))) |
113 | 105 |
|
114 | 106 | // Case 2. Signed divides where m is even. |
115 | | -(Div32 <t> x (Const32 [c])) && smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 == 0 && config.useHmul => |
| 107 | +(Div32 <t> x (Const32 [c])) && smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 == 0 => |
116 | 108 | (Sub32 <t> |
117 | 109 | (Rsh32x64 <t> |
118 | 110 | (Hmul32 <t> x (Const32 <typ.UInt32> [int32(smagic32(c).m/2)])) |
119 | 111 | (Const64 <typ.UInt64> [smagic32(c).s - 1])) |
120 | 112 | (Rsh32x64 <t> x (Const64 <typ.UInt64> [31]))) |
121 | | -(Div64 <t> x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul => |
| 113 | +(Div64 <t> x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 == 0 => |
122 | 114 | (Sub64 <t> |
123 | 115 | (Rsh64x64 <t> |
124 | 116 | (Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m/2)])) |
125 | 117 | (Const64 <typ.UInt64> [smagic64(c).s - 1])) |
126 | 118 | (Rsh64x64 <t> x (Const64 <typ.UInt64> [63]))) |
127 | 119 |
|
128 | 120 | // Case 3. Signed divides where m is odd. |
129 | | -(Div32 <t> x (Const32 [c])) && smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 != 0 && config.useHmul => |
| 121 | +(Div32 <t> x (Const32 [c])) && smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 != 0 => |
130 | 122 | (Sub32 <t> |
131 | 123 | (Rsh32x64 <t> |
132 | 124 | (Add32 <t> x (Hmul32 <t> x (Const32 <typ.UInt32> [int32(smagic32(c).m)]))) |
133 | 125 | (Const64 <typ.UInt64> [smagic32(c).s])) |
134 | 126 | (Rsh32x64 <t> x (Const64 <typ.UInt64> [31]))) |
135 | | -(Div64 <t> x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul => |
| 127 | +(Div64 <t> x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 != 0 => |
136 | 128 | (Sub64 <t> |
137 | 129 | (Rsh64x64 <t> |
138 | 130 | (Add64 <t> x (Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m)]))) |
|
149 | 141 | (Rsh64Ux64 <t> |
150 | 142 | (Mul64 <typ.UInt64> (SignExt32to64 x) (Const64 <typ.UInt64> [int64(smagic32(c).m)])) |
151 | 143 | (Const64 <typ.UInt64> [32 + smagic32(c).s])) |
152 | | -(Div32u <t> x (Const32 [c])) && t.IsSigned() && smagicOK32(c) && config.RegSize == 4 && config.useHmul => |
| 144 | +(Div32u <t> x (Const32 [c])) && t.IsSigned() && smagicOK32(c) && config.RegSize == 4 => |
153 | 145 | (Rsh32Ux64 <t> |
154 | 146 | (Hmul32u <typ.UInt32> x (Const32 <typ.UInt32> [int32(smagic32(c).m)])) |
155 | 147 | (Const64 <typ.UInt64> [smagic32(c).s])) |
156 | | -(Div64u <t> x (Const64 [c])) && t.IsSigned() && smagicOK64(c) && config.useHmul => |
| 148 | +(Div64u <t> x (Const64 [c])) && t.IsSigned() && smagicOK64(c) => |
157 | 149 | (Rsh64Ux64 <t> |
158 | 150 | (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(smagic64(c).m)])) |
159 | 151 | (Const64 <typ.UInt64> [smagic64(c).s])) |
|
181 | 173 | (Rsh64Ux64 <typ.UInt64> |
182 | 174 | (Mul64 <typ.UInt64> (ZeroExt32to64 x) (Const64 <typ.UInt64> [int64(1<<31 + umagic32(c).m/2)])) |
183 | 175 | (Const64 <typ.UInt64> [32 + umagic32(c).s - 1]))) |
184 | | -(Div32u <t> x (Const32 [c])) && umagicOK32(c) && umagic32(c).m&1 == 0 && config.RegSize == 4 && config.useHmul => |
| 176 | +(Div32u <t> x (Const32 [c])) && umagicOK32(c) && umagic32(c).m&1 == 0 && config.RegSize == 4 => |
185 | 177 | (Rsh32Ux64 <t> |
186 | 178 | (Hmul32u <typ.UInt32> x (Const32 <typ.UInt32> [int32(1<<31 + umagic32(c).m/2)])) |
187 | 179 | (Const64 <typ.UInt64> [umagic32(c).s - 1])) |
188 | | -(Div64u <t> x (Const64 [c])) && umagicOK64(c) && umagic64(c).m&1 == 0 && config.useHmul => |
| 180 | +(Div64u <t> x (Const64 [c])) && umagicOK64(c) && umagic64(c).m&1 == 0 => |
189 | 181 | (Rsh64Ux64 <t> |
190 | 182 | (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(1<<63 + umagic64(c).m/2)])) |
191 | 183 | (Const64 <typ.UInt64> [umagic64(c).s - 1])) |
|
205 | 197 | (Rsh64Ux64 <typ.UInt64> (ZeroExt32to64 x) (Const64 <typ.UInt64> [1])) |
206 | 198 | (Const64 <typ.UInt64> [int64(1<<31 + (umagic32(c).m+1)/2)])) |
207 | 199 | (Const64 <typ.UInt64> [32 + umagic32(c).s - 2]))) |
208 | | -(Div32u <t> x (Const32 [c])) && umagicOK32(c) && config.RegSize == 4 && c&1 == 0 && config.useHmul => |
| 200 | +(Div32u <t> x (Const32 [c])) && umagicOK32(c) && config.RegSize == 4 && c&1 == 0 => |
209 | 201 | (Rsh32Ux64 <t> |
210 | 202 | (Hmul32u <typ.UInt32> |
211 | 203 | (Rsh32Ux64 <typ.UInt32> x (Const64 <typ.UInt64> [1])) |
212 | 204 | (Const32 <typ.UInt32> [int32(1<<31 + (umagic32(c).m+1)/2)])) |
213 | 205 | (Const64 <typ.UInt64> [umagic32(c).s - 2])) |
214 | | -(Div64u <t> x (Const64 [c])) && umagicOK64(c) && c&1 == 0 && config.useHmul => |
| 206 | +(Div64u <t> x (Const64 [c])) && umagicOK64(c) && c&1 == 0 => |
215 | 207 | (Rsh64Ux64 <t> |
216 | 208 | (Hmul64u <typ.UInt64> |
217 | 209 | (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [1])) |
218 | 210 | (Const64 <typ.UInt64> [int64(1<<63 + (umagic64(c).m+1)/2)])) |
219 | 211 | (Const64 <typ.UInt64> [umagic64(c).s - 2])) |
220 | 212 |
|
221 | | -// Case 8. Unsigned divide on systems with avg. |
222 | | -(Div16u <t> x (Const16 [c])) && umagicOK16(c) && config.RegSize == 4 && config.useAvg => |
| 213 | +// Case 8. Unsigned divide using avg. |
| 214 | +(Div16u <t> x (Const16 [c])) && umagicOK16(c) && config.RegSize == 4 => |
223 | 215 | (Trunc32to16 <t> |
224 | 216 | (Rsh32Ux64 <typ.UInt32> |
225 | 217 | (Avg32u |
226 | 218 | (Lsh32x64 <typ.UInt32> (ZeroExt16to32 x) (Const64 <typ.UInt64> [16])) |
227 | 219 | (Mul32 <typ.UInt32> (ZeroExt16to32 x) (Const32 <typ.UInt32> [int32(umagic16(c).m)]))) |
228 | 220 | (Const64 <typ.UInt64> [16 + umagic16(c).s - 1]))) |
229 | | -(Div32u <t> x (Const32 [c])) && umagicOK32(c) && config.RegSize == 8 && config.useAvg => |
| 221 | +(Div32u <t> x (Const32 [c])) && umagicOK32(c) && config.RegSize == 8 => |
230 | 222 | (Trunc64to32 <t> |
231 | 223 | (Rsh64Ux64 <typ.UInt64> |
232 | 224 | (Avg64u |
233 | 225 | (Lsh64x64 <typ.UInt64> (ZeroExt32to64 x) (Const64 <typ.UInt64> [32])) |
234 | 226 | (Mul64 <typ.UInt64> (ZeroExt32to64 x) (Const64 <typ.UInt32> [int64(umagic32(c).m)]))) |
235 | 227 | (Const64 <typ.UInt64> [32 + umagic32(c).s - 1]))) |
236 | | -(Div32u <t> x (Const32 [c])) && umagicOK32(c) && config.RegSize == 4 && config.useAvg && config.useHmul => |
| 228 | +(Div32u <t> x (Const32 [c])) && umagicOK32(c) && config.RegSize == 4 => |
237 | 229 | (Rsh32Ux64 <t> |
238 | 230 | (Avg32u x (Hmul32u <typ.UInt32> x (Const32 <typ.UInt32> [int32(umagic32(c).m)]))) |
239 | 231 | (Const64 <typ.UInt64> [umagic32(c).s - 1])) |
240 | | -(Div64u <t> x (Const64 [c])) && umagicOK64(c) && config.useAvg && config.useHmul => |
| 232 | +(Div64u <t> x (Const64 [c])) && umagicOK64(c) => |
241 | 233 | (Rsh64Ux64 <t> |
242 | 234 | (Avg64u x (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(umagic64(c).m)]))) |
243 | 235 | (Const64 <typ.UInt64> [umagic64(c).s - 1])) |
0 commit comments