xref: /llvm-project/mlir/lib/Dialect/Math/Transforms/ExpandPatterns.cpp (revision 3a3377579f137a0a6e14b60d891a9736707e7e8d)
1 //===- ExpandPatterns.cpp - Code to expand various math operations. -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements expansion of various math operations.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "mlir/Dialect/Arith/IR/Arith.h"
14 #include "mlir/Dialect/Math/IR/Math.h"
15 #include "mlir/Dialect/Math/Transforms/Passes.h"
16 #include "mlir/Dialect/SCF/IR/SCF.h"
17 #include "mlir/Dialect/Vector/IR/VectorOps.h"
18 #include "mlir/IR/Builders.h"
19 #include "mlir/IR/ImplicitLocOpBuilder.h"
20 #include "mlir/IR/TypeUtilities.h"
21 #include "mlir/Transforms/DialectConversion.h"
22 
23 using namespace mlir;
24 
25 /// Create a float constant.
26 static Value createFloatConst(Location loc, Type type, APFloat value,
27                               OpBuilder &b) {
28   bool losesInfo = false;
29   auto eltType = getElementTypeOrSelf(type);
30   // Convert double to the given `FloatType` with round-to-nearest-ties-to-even.
31   value.convert(cast<FloatType>(eltType).getFloatSemantics(),
32                 APFloat::rmNearestTiesToEven, &losesInfo);
33   auto attr = b.getFloatAttr(eltType, value);
34   if (auto shapedTy = dyn_cast<ShapedType>(type)) {
35     return b.create<arith::ConstantOp>(loc,
36                                        DenseElementsAttr::get(shapedTy, attr));
37   }
38 
39   return b.create<arith::ConstantOp>(loc, attr);
40 }
41 
42 static Value createFloatConst(Location loc, Type type, double value,
43                               OpBuilder &b) {
44   return createFloatConst(loc, type, APFloat(value), b);
45 }
46 
47 /// Create an integer constant.
48 static Value createIntConst(Location loc, Type type, int64_t value,
49                             OpBuilder &b) {
50   auto attr = b.getIntegerAttr(getElementTypeOrSelf(type), value);
51   if (auto shapedTy = dyn_cast<ShapedType>(type)) {
52     return b.create<arith::ConstantOp>(loc,
53                                        DenseElementsAttr::get(shapedTy, attr));
54   }
55 
56   return b.create<arith::ConstantOp>(loc, attr);
57 }
58 
59 static Value createTruncatedFPValue(Value operand, ImplicitLocOpBuilder &b) {
60   Type opType = operand.getType();
61   Type i64Ty = b.getI64Type();
62   if (auto shapedTy = dyn_cast<ShapedType>(opType))
63     i64Ty = shapedTy.clone(i64Ty);
64   Value fixedConvert = b.create<arith::FPToSIOp>(i64Ty, operand);
65   Value fpFixedConvert = b.create<arith::SIToFPOp>(opType, fixedConvert);
66   // The truncation does not preserve the sign when the truncated
67   // value is -0. So here the sign is copied again.
68   return b.create<math::CopySignOp>(fpFixedConvert, operand);
69 }
70 
71 // sinhf(float x) -> (exp(x) - exp(-x)) / 2
72 static LogicalResult convertSinhOp(math::SinhOp op, PatternRewriter &rewriter) {
73   ImplicitLocOpBuilder b(op->getLoc(), rewriter);
74   Value operand = op.getOperand();
75   Type opType = operand.getType();
76 
77   Value exp = b.create<math::ExpOp>(operand);
78   Value neg = b.create<arith::NegFOp>(operand);
79   Value nexp = b.create<math::ExpOp>(neg);
80   Value sub = b.create<arith::SubFOp>(exp, nexp);
81   Value half = createFloatConst(op->getLoc(), opType, 0.5, rewriter);
82   Value res = b.create<arith::MulFOp>(sub, half);
83   rewriter.replaceOp(op, res);
84   return success();
85 }
86 
87 // coshf(float x) -> (exp(x) + exp(-x)) / 2
88 static LogicalResult convertCoshOp(math::CoshOp op, PatternRewriter &rewriter) {
89   ImplicitLocOpBuilder b(op->getLoc(), rewriter);
90   Value operand = op.getOperand();
91   Type opType = operand.getType();
92 
93   Value exp = b.create<math::ExpOp>(operand);
94   Value neg = b.create<arith::NegFOp>(operand);
95   Value nexp = b.create<math::ExpOp>(neg);
96   Value add = b.create<arith::AddFOp>(exp, nexp);
97   Value half = createFloatConst(op->getLoc(), opType, 0.5, rewriter);
98   Value res = b.create<arith::MulFOp>(add, half);
99   rewriter.replaceOp(op, res);
100   return success();
101 }
102 
103 /// Expands tanh op into
104 /// 1-exp^{-2x} / 1+exp^{-2x}
105 /// To avoid overflow we exploit the reflection symmetry `tanh(-x) = -tanh(x)`.
106 /// We compute a "signs" value which is -1 if input is negative and +1 if input
107 /// is positive.  Then multiply the input by this value, guaranteeing that the
108 /// result is positive, which also guarantees `exp^{-2x * sign(x)}` is in (0,
109 /// 1]. Expand the computation on the input `x * sign(x)`, then multiply the
110 /// result by `sign(x)` to retain sign of the real result.
111 static LogicalResult convertTanhOp(math::TanhOp op, PatternRewriter &rewriter) {
112   auto floatType = op.getOperand().getType();
113   Location loc = op.getLoc();
114   Value zero = createFloatConst(loc, floatType, 0.0, rewriter);
115   Value one = createFloatConst(loc, floatType, 1.0, rewriter);
116   Value negTwo = createFloatConst(loc, floatType, -2.0, rewriter);
117 
118   // Compute sign(x) = cast<float_type>(x < 0) * (-2) + 1
119   Value isNegative = rewriter.create<arith::CmpFOp>(
120       loc, arith::CmpFPredicate::OLT, op.getOperand(), zero);
121   Value isNegativeFloat =
122       rewriter.create<arith::UIToFPOp>(loc, floatType, isNegative);
123   Value isNegativeTimesNegTwo =
124       rewriter.create<arith::MulFOp>(loc, isNegativeFloat, negTwo);
125   Value sign = rewriter.create<arith::AddFOp>(loc, isNegativeTimesNegTwo, one);
126 
127   // Normalize input to positive value: y = sign(x) * x
128   Value positiveX = rewriter.create<arith::MulFOp>(loc, sign, op.getOperand());
129 
130   // Decompose on normalized input
131   Value negDoubledX = rewriter.create<arith::MulFOp>(loc, negTwo, positiveX);
132   Value exp2x = rewriter.create<math::ExpOp>(loc, negDoubledX);
133   Value dividend = rewriter.create<arith::SubFOp>(loc, one, exp2x);
134   Value divisor = rewriter.create<arith::AddFOp>(loc, one, exp2x);
135   Value positiveRes = rewriter.create<arith::DivFOp>(loc, dividend, divisor);
136 
137   // Multiply result by sign(x) to retain signs from negative inputs
138   rewriter.replaceOpWithNewOp<arith::MulFOp>(op, sign, positiveRes);
139 
140   return success();
141 }
142 
143 // Converts math.tan to math.sin, math.cos, and arith.divf.
144 static LogicalResult convertTanOp(math::TanOp op, PatternRewriter &rewriter) {
145   ImplicitLocOpBuilder b(op->getLoc(), rewriter);
146   Value operand = op.getOperand();
147   Type type = operand.getType();
148   Value sin = b.create<math::SinOp>(type, operand);
149   Value cos = b.create<math::CosOp>(type, operand);
150   Value div = b.create<arith::DivFOp>(type, sin, cos);
151   rewriter.replaceOp(op, div);
152   return success();
153 }
154 
155 // asinh(float x) -> log(x + sqrt(x**2 + 1))
156 static LogicalResult convertAsinhOp(math::AsinhOp op,
157                                     PatternRewriter &rewriter) {
158   ImplicitLocOpBuilder b(op->getLoc(), rewriter);
159   Value operand = op.getOperand();
160   Type opType = operand.getType();
161 
162   Value one = createFloatConst(op->getLoc(), opType, 1.0, rewriter);
163   Value fma = b.create<math::FmaOp>(operand, operand, one);
164   Value sqrt = b.create<math::SqrtOp>(fma);
165   Value add = b.create<arith::AddFOp>(operand, sqrt);
166   Value res = b.create<math::LogOp>(add);
167   rewriter.replaceOp(op, res);
168   return success();
169 }
170 
171 // acosh(float x) -> log(x + sqrt(x**2 - 1))
172 static LogicalResult convertAcoshOp(math::AcoshOp op,
173                                     PatternRewriter &rewriter) {
174   ImplicitLocOpBuilder b(op->getLoc(), rewriter);
175   Value operand = op.getOperand();
176   Type opType = operand.getType();
177 
178   Value negOne = createFloatConst(op->getLoc(), opType, -1.0, rewriter);
179   Value fma = b.create<math::FmaOp>(operand, operand, negOne);
180   Value sqrt = b.create<math::SqrtOp>(fma);
181   Value add = b.create<arith::AddFOp>(operand, sqrt);
182   Value res = b.create<math::LogOp>(add);
183   rewriter.replaceOp(op, res);
184   return success();
185 }
186 
187 // atanh(float x) -> log((1 + x) / (1 - x)) / 2
188 static LogicalResult convertAtanhOp(math::AtanhOp op,
189                                     PatternRewriter &rewriter) {
190   ImplicitLocOpBuilder b(op->getLoc(), rewriter);
191   Value operand = op.getOperand();
192   Type opType = operand.getType();
193 
194   Value one = createFloatConst(op->getLoc(), opType, 1.0, rewriter);
195   Value add = b.create<arith::AddFOp>(operand, one);
196   Value neg = b.create<arith::NegFOp>(operand);
197   Value sub = b.create<arith::AddFOp>(neg, one);
198   Value div = b.create<arith::DivFOp>(add, sub);
199   Value log = b.create<math::LogOp>(div);
200   Value half = createFloatConst(op->getLoc(), opType, 0.5, rewriter);
201   Value res = b.create<arith::MulFOp>(log, half);
202   rewriter.replaceOp(op, res);
203   return success();
204 }
205 
206 static LogicalResult convertFmaFOp(math::FmaOp op, PatternRewriter &rewriter) {
207   ImplicitLocOpBuilder b(op->getLoc(), rewriter);
208   Value operandA = op.getOperand(0);
209   Value operandB = op.getOperand(1);
210   Value operandC = op.getOperand(2);
211   Type type = op.getType();
212   Value mult = b.create<arith::MulFOp>(type, operandA, operandB);
213   Value add = b.create<arith::AddFOp>(type, mult, operandC);
214   rewriter.replaceOp(op, add);
215   return success();
216 }
217 
218 // Converts a ceilf() function to the following:
219 // ceilf(float x) ->
220 //      y = (float)(int) x
221 //      if (x > y) then incr = 1 else incr = 0
222 //      y = y + incr   <= replace this op with the ceilf op.
223 static LogicalResult convertCeilOp(math::CeilOp op, PatternRewriter &rewriter) {
224   ImplicitLocOpBuilder b(op->getLoc(), rewriter);
225   Value operand = op.getOperand();
226   Type opType = operand.getType();
227   Value fpFixedConvert = createTruncatedFPValue(operand, b);
228 
229   // Creating constants for later use.
230   Value zero = createFloatConst(op->getLoc(), opType, 0.00, rewriter);
231   Value one = createFloatConst(op->getLoc(), opType, 1.00, rewriter);
232 
233   Value gtCheck = b.create<arith::CmpFOp>(arith::CmpFPredicate::OGT, operand,
234                                           fpFixedConvert);
235   Value incrValue = b.create<arith::SelectOp>(op->getLoc(), gtCheck, one, zero);
236 
237   Value ret = b.create<arith::AddFOp>(opType, fpFixedConvert, incrValue);
238   rewriter.replaceOp(op, ret);
239   return success();
240 }
241 
242 // Convert `math.fpowi` to a series of `arith.mulf` operations.
243 // If the power is negative, we divide one by the result.
244 // If both the base and power are zero, the result is 1.
245 // In the case of non constant power, we convert the operation to `math.powf`.
246 static LogicalResult convertFPowIOp(math::FPowIOp op,
247                                     PatternRewriter &rewriter) {
248   ImplicitLocOpBuilder b(op->getLoc(), rewriter);
249   Value base = op.getOperand(0);
250   Value power = op.getOperand(1);
251   Type baseType = base.getType();
252 
253   auto convertFPowItoPowf = [&]() -> LogicalResult {
254     Value castPowerToFp =
255         rewriter.create<arith::SIToFPOp>(op.getLoc(), baseType, power);
256     Value res = rewriter.create<math::PowFOp>(op.getLoc(), baseType, base,
257                                               castPowerToFp);
258     rewriter.replaceOp(op, res);
259     return success();
260   };
261 
262   Attribute cstAttr;
263   if (!matchPattern(power, m_Constant(&cstAttr)))
264     return convertFPowItoPowf();
265 
266   APInt value;
267   if (!matchPattern(cstAttr, m_ConstantInt(&value)))
268     return convertFPowItoPowf();
269 
270   int64_t powerInt = value.getSExtValue();
271   bool isNegative = powerInt < 0;
272   int64_t absPower = std::abs(powerInt);
273   Value one = createFloatConst(op->getLoc(), baseType, 1.00, rewriter);
274   Value res = createFloatConst(op->getLoc(), baseType, 1.00, rewriter);
275 
276   while (absPower > 0) {
277     if (absPower & 1)
278       res = b.create<arith::MulFOp>(baseType, base, res);
279     absPower >>= 1;
280     base = b.create<arith::MulFOp>(baseType, base, base);
281   }
282 
283   // Make sure not to introduce UB in case of negative power.
284   if (isNegative) {
285     auto &sem = dyn_cast<mlir::FloatType>(getElementTypeOrSelf(baseType))
286                     .getFloatSemantics();
287     Value zero =
288         createFloatConst(op->getLoc(), baseType,
289                          APFloat::getZero(sem, /*Negative=*/false), rewriter);
290     Value negZero =
291         createFloatConst(op->getLoc(), baseType,
292                          APFloat::getZero(sem, /*Negative=*/true), rewriter);
293     Value posInfinity =
294         createFloatConst(op->getLoc(), baseType,
295                          APFloat::getInf(sem, /*Negative=*/false), rewriter);
296     Value negInfinity =
297         createFloatConst(op->getLoc(), baseType,
298                          APFloat::getInf(sem, /*Negative=*/true), rewriter);
299     Value zeroEqCheck =
300         b.create<arith::CmpFOp>(arith::CmpFPredicate::OEQ, res, zero);
301     Value negZeroEqCheck =
302         b.create<arith::CmpFOp>(arith::CmpFPredicate::OEQ, res, negZero);
303     res = b.create<arith::DivFOp>(baseType, one, res);
304     res =
305         b.create<arith::SelectOp>(op->getLoc(), zeroEqCheck, posInfinity, res);
306     res = b.create<arith::SelectOp>(op->getLoc(), negZeroEqCheck, negInfinity,
307                                     res);
308   }
309 
310   rewriter.replaceOp(op, res);
311   return success();
312 }
313 
314 // Converts Powf(float a, float b) (meaning a^b) to exp^(b * ln(a))
315 // Restricting a >= 0
316 static LogicalResult convertPowfOp(math::PowFOp op, PatternRewriter &rewriter) {
317   ImplicitLocOpBuilder b(op->getLoc(), rewriter);
318   Value operandA = op.getOperand(0);
319   Value operandB = op.getOperand(1);
320   Type opType = operandA.getType();
321   Value zero = createFloatConst(op->getLoc(), opType, 0.00, rewriter);
322   Value one = createFloatConst(op->getLoc(), opType, 1.00, rewriter);
323 
324   Value logA = b.create<math::LogOp>(opType, operandA);
325   Value mult = b.create<arith::MulFOp>(opType, operandB, logA);
326   Value expResult = b.create<math::ExpOp>(opType, mult);
327 
328   // First, we select between the exp value and the adjusted value for odd
329   // powers of negatives. Then, we ensure that one is produced if `b` is zero.
330   // This corresponds to `libm` behavior, even for `0^0`. Without this check,
331   // `exp(0 * ln(0)) = exp(0 *-inf) = exp(-nan) = -nan`.
332   Value zeroCheck =
333       b.create<arith::CmpFOp>(arith::CmpFPredicate::OEQ, operandB, zero);
334   Value finalResult =
335       b.create<arith::SelectOp>(op->getLoc(), zeroCheck, one, expResult);
336   rewriter.replaceOp(op, finalResult);
337   return success();
338 }
339 
340 // exp2f(float x) -> exp(x * ln(2))
341 //   Proof: Let's say 2^x = y
342 //   ln(2^x) = ln(y)
343 //   x * ln(2) = ln(y) => e ^(x*ln(2)) = y
344 static LogicalResult convertExp2fOp(math::Exp2Op op,
345                                     PatternRewriter &rewriter) {
346   ImplicitLocOpBuilder b(op->getLoc(), rewriter);
347   Value operand = op.getOperand();
348   Type opType = operand.getType();
349   Value ln2 = createFloatConst(op->getLoc(), opType, llvm::numbers::ln2, b);
350   Value mult = b.create<arith::MulFOp>(opType, operand, ln2);
351   Value exp = b.create<math::ExpOp>(op->getLoc(), mult);
352   rewriter.replaceOp(op, exp);
353   return success();
354 }
355 
356 static LogicalResult convertRoundOp(math::RoundOp op,
357                                     PatternRewriter &rewriter) {
358   Location loc = op.getLoc();
359   ImplicitLocOpBuilder b(loc, rewriter);
360   Value operand = op.getOperand();
361   Type opType = operand.getType();
362   Type opEType = getElementTypeOrSelf(opType);
363 
364   if (!opEType.isF32()) {
365     return rewriter.notifyMatchFailure(op, "not a round of f32.");
366   }
367 
368   Type i32Ty = b.getI32Type();
369   if (auto shapedTy = dyn_cast<ShapedType>(opType))
370     i32Ty = shapedTy.clone(i32Ty);
371 
372   Value half = createFloatConst(loc, opType, 0.5, b);
373   Value c23 = createIntConst(loc, i32Ty, 23, b);
374   Value c127 = createIntConst(loc, i32Ty, 127, b);
375   Value expMask = createIntConst(loc, i32Ty, (1 << 8) - 1, b);
376 
377   Value incrValue = b.create<math::CopySignOp>(half, operand);
378   Value add = b.create<arith::AddFOp>(opType, operand, incrValue);
379   Value fpFixedConvert = createTruncatedFPValue(add, b);
380 
381   // There are three cases where adding 0.5 to the value and truncating by
382   // converting to an i64 does not result in the correct behavior:
383   //
384   // 1. Special values: +-inf and +-nan
385   //     Casting these special values to i64 has undefined behavior. To identify
386   //     these values, we use the fact that these values are the only float
387   //     values with the maximum possible biased exponent.
388   //
389   // 2. Large values: 2^23 <= |x| <= INT_64_MAX
390   //     Adding 0.5 to a float larger than or equal to 2^23 results in precision
391   //     errors that sometimes round the value up and sometimes round the value
392   //     down. For example:
393   //         8388608.0 + 0.5 = 8388608.0
394   //         8388609.0 + 0.5 = 8388610.0
395   //
396   // 3. Very large values: |x| > INT_64_MAX
397   //     Casting to i64 a value greater than the max i64 value will overflow the
398   //     i64 leading to wrong outputs.
399   //
400   // All three cases satisfy the property `biasedExp >= 23`.
401   Value operandBitcast = b.create<arith::BitcastOp>(i32Ty, operand);
402   Value operandExp = b.create<arith::AndIOp>(
403       b.create<arith::ShRUIOp>(operandBitcast, c23), expMask);
404   Value operandBiasedExp = b.create<arith::SubIOp>(operandExp, c127);
405   Value isSpecialValOrLargeVal =
406       b.create<arith::CmpIOp>(arith::CmpIPredicate::sge, operandBiasedExp, c23);
407 
408   Value result = b.create<arith::SelectOp>(isSpecialValOrLargeVal, operand,
409                                            fpFixedConvert);
410   rewriter.replaceOp(op, result);
411   return success();
412 }
413 
414 // Converts math.ctlz to scf and arith operations. This is done
415 // by performing a binary search on the bits.
416 static LogicalResult convertCtlzOp(math::CountLeadingZerosOp op,
417                                    PatternRewriter &rewriter) {
418   auto operand = op.getOperand();
419   auto operandTy = operand.getType();
420   auto eTy = getElementTypeOrSelf(operandTy);
421   Location loc = op.getLoc();
422 
423   int32_t bitwidth = eTy.getIntOrFloatBitWidth();
424   if (bitwidth > 64)
425     return failure();
426 
427   uint64_t allbits = -1;
428   if (bitwidth < 64) {
429     allbits = allbits >> (64 - bitwidth);
430   }
431 
432   Value x = operand;
433   Value count = createIntConst(loc, operandTy, 0, rewriter);
434   for (int32_t bw = bitwidth; bw > 1; bw = bw / 2) {
435     auto half = bw / 2;
436     auto bits = createIntConst(loc, operandTy, half, rewriter);
437     auto mask = createIntConst(loc, operandTy, allbits >> half, rewriter);
438 
439     Value pred =
440         rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::ule, x, mask);
441     Value add = rewriter.create<arith::AddIOp>(loc, count, bits);
442     Value shift = rewriter.create<arith::ShLIOp>(loc, x, bits);
443 
444     x = rewriter.create<arith::SelectOp>(loc, pred, shift, x);
445     count = rewriter.create<arith::SelectOp>(loc, pred, add, count);
446   }
447 
448   Value zero = createIntConst(loc, operandTy, 0, rewriter);
449   Value pred = rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq,
450                                               operand, zero);
451 
452   Value bwval = createIntConst(loc, operandTy, bitwidth, rewriter);
453   Value sel = rewriter.create<arith::SelectOp>(loc, pred, bwval, count);
454   rewriter.replaceOp(op, sel);
455   return success();
456 }
457 
458 // Convert `math.roundeven` into `math.round` + arith ops
459 static LogicalResult convertRoundEvenOp(math::RoundEvenOp op,
460                                         PatternRewriter &rewriter) {
461   Location loc = op.getLoc();
462   ImplicitLocOpBuilder b(loc, rewriter);
463   auto operand = op.getOperand();
464   Type operandTy = operand.getType();
465   Type resultTy = op.getType();
466   Type operandETy = getElementTypeOrSelf(operandTy);
467   Type resultETy = getElementTypeOrSelf(resultTy);
468 
469   if (!isa<FloatType>(operandETy) || !isa<FloatType>(resultETy)) {
470     return rewriter.notifyMatchFailure(op, "not a roundeven of f16 or f32.");
471   }
472 
473   Type fTy = operandTy;
474   Type iTy = rewriter.getIntegerType(operandETy.getIntOrFloatBitWidth());
475   if (auto shapedTy = dyn_cast<ShapedType>(fTy)) {
476     iTy = shapedTy.clone(iTy);
477   }
478 
479   unsigned bitWidth = operandETy.getIntOrFloatBitWidth();
480   // The width returned by getFPMantissaWidth includes the integer bit.
481   unsigned mantissaWidth =
482       llvm::cast<FloatType>(operandETy).getFPMantissaWidth() - 1;
483   unsigned exponentWidth = bitWidth - mantissaWidth - 1;
484 
485   // The names of the variables correspond to f32.
486   // f64: 1 bit sign | 11 bits exponent | 52 bits mantissa.
487   // f32: 1 bit sign | 8 bits exponent  | 23 bits mantissa.
488   // f16: 1 bit sign | 5 bits exponent  | 10 bits mantissa.
489   Value c1Float = createFloatConst(loc, fTy, 1.0, b);
490   Value c0 = createIntConst(loc, iTy, 0, b);
491   Value c1 = createIntConst(loc, iTy, 1, b);
492   Value cNeg1 = createIntConst(loc, iTy, -1, b);
493   Value c23 = createIntConst(loc, iTy, mantissaWidth, b);
494   Value c31 = createIntConst(loc, iTy, bitWidth - 1, b);
495   Value c127 = createIntConst(loc, iTy, (1ull << (exponentWidth - 1)) - 1, b);
496   Value c2To22 = createIntConst(loc, iTy, 1ull << (mantissaWidth - 1), b);
497   Value c23Mask = createIntConst(loc, iTy, (1ull << mantissaWidth) - 1, b);
498   Value expMask = createIntConst(loc, iTy, (1ull << exponentWidth) - 1, b);
499 
500   Value operandBitcast = b.create<arith::BitcastOp>(iTy, operand);
501   Value round = b.create<math::RoundOp>(operand);
502   Value roundBitcast = b.create<arith::BitcastOp>(iTy, round);
503 
504   // Get biased exponents for operand and round(operand)
505   Value operandExp = b.create<arith::AndIOp>(
506       b.create<arith::ShRUIOp>(operandBitcast, c23), expMask);
507   Value operandBiasedExp = b.create<arith::SubIOp>(operandExp, c127);
508   Value roundExp = b.create<arith::AndIOp>(
509       b.create<arith::ShRUIOp>(roundBitcast, c23), expMask);
510   Value roundBiasedExp = b.create<arith::SubIOp>(roundExp, c127);
511 
512   auto safeShiftRight = [&](Value x, Value shift) -> Value {
513     // Clamp shift to valid range [0, bitwidth - 1] to avoid undefined behavior
514     Value clampedShift = b.create<arith::MaxSIOp>(shift, c0);
515     clampedShift = b.create<arith::MinSIOp>(clampedShift, c31);
516     return b.create<arith::ShRUIOp>(x, clampedShift);
517   };
518 
519   auto maskMantissa = [&](Value mantissa,
520                           Value mantissaMaskRightShift) -> Value {
521     Value shiftedMantissaMask = safeShiftRight(c23Mask, mantissaMaskRightShift);
522     return b.create<arith::AndIOp>(mantissa, shiftedMantissaMask);
523   };
524 
525   // A whole number `x`, such that `|x| != 1`, is even if the mantissa, ignoring
526   // the leftmost `clamp(biasedExp - 1, 0, 23)` bits, is zero. Large numbers
527   // with `biasedExp > 23` (numbers where there is not enough precision to store
528   // decimals) are always even, and they satisfy the even condition trivially
529   // since the mantissa without all its bits is zero. The even condition
530   // is also true for +-0, since they have `biasedExp = -127` and the entire
531   // mantissa is zero. The case of +-1 has to be handled separately. Here
532   // we identify these values by noting that +-1 are the only whole numbers with
533   // `biasedExp == 0`.
534   //
535   // The special values +-inf and +-nan also satisfy the same property that
536   // whole non-unit even numbers satisfy. In particular, the special values have
537   // `biasedExp > 23`, so they get treated as large numbers with no room for
538   // decimals, which are always even.
539   Value roundBiasedExpEq0 =
540       b.create<arith::CmpIOp>(arith::CmpIPredicate::eq, roundBiasedExp, c0);
541   Value roundBiasedExpMinus1 = b.create<arith::SubIOp>(roundBiasedExp, c1);
542   Value roundMaskedMantissa = maskMantissa(roundBitcast, roundBiasedExpMinus1);
543   Value roundIsNotEvenOrSpecialVal = b.create<arith::CmpIOp>(
544       arith::CmpIPredicate::ne, roundMaskedMantissa, c0);
545   roundIsNotEvenOrSpecialVal =
546       b.create<arith::OrIOp>(roundIsNotEvenOrSpecialVal, roundBiasedExpEq0);
547 
548   // A value `x` with `0 <= biasedExp < 23`, is halfway between two consecutive
549   // integers if the bit at index `biasedExp` starting from the left in the
550   // mantissa is 1 and all the bits to the right are zero. Values with
551   // `biasedExp >= 23` don't have decimals, so they are never halfway. The
552   // values +-0.5 are the only halfway values that have `biasedExp == -1 < 0`,
553   // so these are handled separately. In particular, if `biasedExp == -1`, the
554   // value is halfway if the entire mantissa is zero.
555   Value operandBiasedExpEqNeg1 = b.create<arith::CmpIOp>(
556       arith::CmpIPredicate::eq, operandBiasedExp, cNeg1);
557   Value expectedOperandMaskedMantissa = b.create<arith::SelectOp>(
558       operandBiasedExpEqNeg1, c0, safeShiftRight(c2To22, operandBiasedExp));
559   Value operandMaskedMantissa = maskMantissa(operandBitcast, operandBiasedExp);
560   Value operandIsHalfway =
561       b.create<arith::CmpIOp>(arith::CmpIPredicate::eq, operandMaskedMantissa,
562                               expectedOperandMaskedMantissa);
563   // Ensure `biasedExp` is in the valid range for half values.
564   Value operandBiasedExpGeNeg1 = b.create<arith::CmpIOp>(
565       arith::CmpIPredicate::sge, operandBiasedExp, cNeg1);
566   Value operandBiasedExpLt23 =
567       b.create<arith::CmpIOp>(arith::CmpIPredicate::slt, operandBiasedExp, c23);
568   operandIsHalfway =
569       b.create<arith::AndIOp>(operandIsHalfway, operandBiasedExpLt23);
570   operandIsHalfway =
571       b.create<arith::AndIOp>(operandIsHalfway, operandBiasedExpGeNeg1);
572 
573   // Adjust rounded operand with `round(operand) - sign(operand)` to correct the
574   // case where `round` rounded in the opposite direction of `roundeven`.
575   Value sign = b.create<math::CopySignOp>(c1Float, operand);
576   Value roundShifted = b.create<arith::SubFOp>(round, sign);
577   // If the rounded value is even or a special value, we default to the behavior
578   // of `math.round`.
579   Value needsShift =
580       b.create<arith::AndIOp>(roundIsNotEvenOrSpecialVal, operandIsHalfway);
581   Value result = b.create<arith::SelectOp>(needsShift, roundShifted, round);
582   // The `x - sign` adjustment does not preserve the sign when we are adjusting
583   // the value -1 to -0. So here the sign is copied again to ensure that -0.5 is
584   // rounded to -0.0.
585   result = b.create<math::CopySignOp>(result, operand);
586   rewriter.replaceOp(op, result);
587   return success();
588 }
589 
590 // Convert `math.rsqrt` into `arith.divf` + `math.sqrt`
591 static LogicalResult convertRsqrtOp(math::RsqrtOp op,
592                                     PatternRewriter &rewriter) {
593 
594   auto operand = op.getOperand();
595   auto operandTy = operand.getType();
596   auto eTy = getElementTypeOrSelf(operandTy);
597   if (!isa<FloatType>(eTy))
598     return failure();
599 
600   Location loc = op->getLoc();
601   auto constOneFloat = createFloatConst(loc, operandTy, 1.0, rewriter);
602   auto sqrtOp = rewriter.create<math::SqrtOp>(loc, operand);
603   rewriter.replaceOpWithNewOp<arith::DivFOp>(op, constOneFloat, sqrtOp);
604   return success();
605 }
606 
607 void mlir::populateExpandCtlzPattern(RewritePatternSet &patterns) {
608   patterns.add(convertCtlzOp);
609 }
610 
611 void mlir::populateExpandSinhPattern(RewritePatternSet &patterns) {
612   patterns.add(convertSinhOp);
613 }
614 
615 void mlir::populateExpandCoshPattern(RewritePatternSet &patterns) {
616   patterns.add(convertCoshOp);
617 }
618 
619 void mlir::populateExpandTanPattern(RewritePatternSet &patterns) {
620   patterns.add(convertTanOp);
621 }
622 
623 void mlir::populateExpandTanhPattern(RewritePatternSet &patterns) {
624   patterns.add(convertTanhOp);
625 }
626 
627 void mlir::populateExpandAsinhPattern(RewritePatternSet &patterns) {
628   patterns.add(convertAsinhOp);
629 }
630 
631 void mlir::populateExpandAcoshPattern(RewritePatternSet &patterns) {
632   patterns.add(convertAcoshOp);
633 }
634 
635 void mlir::populateExpandAtanhPattern(RewritePatternSet &patterns) {
636   patterns.add(convertAtanhOp);
637 }
638 
639 void mlir::populateExpandFmaFPattern(RewritePatternSet &patterns) {
640   patterns.add(convertFmaFOp);
641 }
642 
643 void mlir::populateExpandCeilFPattern(RewritePatternSet &patterns) {
644   patterns.add(convertCeilOp);
645 }
646 
647 void mlir::populateExpandExp2FPattern(RewritePatternSet &patterns) {
648   patterns.add(convertExp2fOp);
649 }
650 
651 void mlir::populateExpandPowFPattern(RewritePatternSet &patterns) {
652   patterns.add(convertPowfOp);
653 }
654 
655 void mlir::populateExpandFPowIPattern(RewritePatternSet &patterns) {
656   patterns.add(convertFPowIOp);
657 }
658 
659 void mlir::populateExpandRoundFPattern(RewritePatternSet &patterns) {
660   patterns.add(convertRoundOp);
661 }
662 
663 void mlir::populateExpandRoundEvenPattern(RewritePatternSet &patterns) {
664   patterns.add(convertRoundEvenOp);
665 }
666 
667 void mlir::populateExpandRsqrtPattern(RewritePatternSet &patterns) {
668   patterns.add(convertRsqrtOp);
669 }
670