Transforms/InstSimplify/po2-shift-add-and-to-zero.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -passes=instsimplify -S < %s | FileCheck %s

;; The and X, (add Y, -1) pattern is from an earlier instcombine pass which
;; converted

;; define i64 @f1() #0 {
;; entry:
;;   %0 = call i64 @llvm.aarch64.sve.cntb(i32 31)
;;   %1 = call i64 @llvm.aarch64.sve.cnth(i32 31)
;;   %rem = urem i64 %0, %1
;;   ret i64 %rem
;; }

;; into

;; define i64 @f1() #0 {
;; entry:
;;   %0 = call i64 @llvm.vscale.i64()
;;   %1 = shl nuw nsw i64 %0, 4
;;   %2 = call i64 @llvm.vscale.i64()
;;   %3 = shl nuw nsw i64 %2, 3
;;   %4 = add nsw i64 %3, -1
;;   %rem = and i64 %1, %4
;;   ret i64 %rem
;; }

;; InstCombine would have folded the original to returning 0 if the vscale
;; calls were the same Value*, but since there's two of them it doesn't
;; work and we convert the urem to add/and. CSE then gets rid of the extra
;; vscale, leaving us with a new pattern to match. This only works because
;; vscale is known to be a power of 2 (assuming there's a defined range for it).

define i64 @f1() #0 {
; CHECK-LABEL: define i64 @f1
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    ret i64 0
;
entry:
  %0 = call i64 @llvm.vscale.i64()
  %1 = shl i64 %0, 4
  %2 = shl i64 %0, 3
  %3 = add i64 %2, -1
  %rem = and i64 %1, %3
  ret i64 %rem
}

;; Make sure it works if the value could also be zero.
define i64 @test_pow2_or_zero(i64 %arg) {
; CHECK-LABEL: define i64 @test_pow2_or_zero
; CHECK-SAME: (i64 [[ARG:%.*]]) {
; CHECK-NEXT:    ret i64 0
;
  %neg = sub i64 0, %arg
  %x = and i64 %neg, %arg
  %shl1 = shl i64 %x, 4
  %shl2 = shl i64 %x, 3
  %mask = add i64 %shl2, -1
  %rem = and i64 %mask, %shl1
  ret i64 %rem
}

;; Make sure it doesn't work if the value isn't known to be a power of 2.
;; In this case a vscale without a `vscale_range` attribute on the function.
define i64 @no_pow2() {
; CHECK-LABEL: define i64 @no_pow2() {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 4
; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0]], 3
; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[TMP2]], -1
; CHECK-NEXT:    [[REM:%.*]] = and i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT:    ret i64 [[REM]]
;
entry:
  %0 = call i64 @llvm.vscale.i64()
  %1 = shl i64 %0, 4
  %2 = shl i64 %0, 3
  %3 = add i64 %2, -1
  %rem = and i64 %1, %3
  ret i64 %rem
}

;; Make sure it doesn't work if the shift on the -1 side is greater
define i64 @minus_shift_greater(i64 %arg) {
; CHECK-LABEL: define i64 @minus_shift_greater
; CHECK-SAME: (i64 [[ARG:%.*]]) {
; CHECK-NEXT:    [[NEG:%.*]] = sub i64 0, [[ARG]]
; CHECK-NEXT:    [[X:%.*]] = and i64 [[NEG]], [[ARG]]
; CHECK-NEXT:    [[SHL1:%.*]] = shl i64 [[X]], 3
; CHECK-NEXT:    [[SHL2:%.*]] = shl i64 [[X]], 4
; CHECK-NEXT:    [[MASK:%.*]] = add i64 [[SHL2]], -1
; CHECK-NEXT:    [[REM:%.*]] = and i64 [[SHL1]], [[MASK]]
; CHECK-NEXT:    ret i64 [[REM]]
;
  %neg = sub i64 0, %arg
  %x = and i64 %neg, %arg
  %shl1 = shl i64 %x, 3
  %shl2 = shl i64 %x, 4
  %mask = add i64 %shl2, -1
  %rem = and i64 %shl1, %mask
  ret i64 %rem
}

;; Make sure it doesn't work if the subtract isn't one.
define i64 @sub2(i64 %arg) {
; CHECK-LABEL: define i64 @sub2
; CHECK-SAME: (i64 [[ARG:%.*]]) {
; CHECK-NEXT:    [[NEG:%.*]] = sub i64 0, [[ARG]]
; CHECK-NEXT:    [[X:%.*]] = and i64 [[NEG]], [[ARG]]
; CHECK-NEXT:    [[SHL1:%.*]] = shl i64 [[X]], 4
; CHECK-NEXT:    [[SHL2:%.*]] = shl i64 [[X]], 3
; CHECK-NEXT:    [[MASK:%.*]] = add i64 [[SHL2]], -2
; CHECK-NEXT:    [[REM:%.*]] = and i64 [[SHL1]], [[MASK]]
; CHECK-NEXT:    ret i64 [[REM]]
;
  %neg = sub i64 0, %arg
  %x = and i64 %neg, %arg
  %shl1 = shl i64 %x, 4
  %shl2 = shl i64 %x, 3
  %mask = add i64 %shl2, -2
  %rem = and i64 %shl1, %mask
  ret i64 %rem
}

;; Make sure it doesn't work with a right shift
;; Make sure it doesn't work if the subtract isn't one.
define i64 @rightshift(i64 %arg) {
; CHECK-LABEL: define i64 @rightshift
; CHECK-SAME: (i64 [[ARG:%.*]]) {
; CHECK-NEXT:    [[NEG:%.*]] = sub i64 0, [[ARG]]
; CHECK-NEXT:    [[X:%.*]] = and i64 [[NEG]], [[ARG]]
; CHECK-NEXT:    [[SHL1:%.*]] = shl i64 [[X]], 4
; CHECK-NEXT:    [[SHL2:%.*]] = lshr i64 [[X]], 3
; CHECK-NEXT:    [[MASK:%.*]] = add i64 [[SHL2]], -1
; CHECK-NEXT:    [[REM:%.*]] = and i64 [[SHL1]], [[MASK]]
; CHECK-NEXT:    ret i64 [[REM]]
;
  %neg = sub i64 0, %arg
  %x = and i64 %neg, %arg
  %shl1 = shl i64 %x, 4
  %shl2 = lshr i64 %x, 3
  %mask = add i64 %shl2, -1
  %rem = and i64 %shl1, %mask
  ret i64 %rem
}

declare i64 @llvm.vscale.i64()

attributes #0 = { vscale_range(1,16) }