1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64 | FileCheck %s 3 4define <4 x i16> @usra_v4i16(<8 x i8> %0) { 5; CHECK-LABEL: usra_v4i16: 6; CHECK: // %bb.0: 7; CHECK-NEXT: ushr v0.8b, v0.8b, #7 8; CHECK-NEXT: usra v0.4h, v0.4h, #7 9; CHECK-NEXT: ret 10 %2 = lshr <8 x i8> %0, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 11 %3 = bitcast <8 x i8> %2 to <4 x i16> 12 %4 = lshr <4 x i16> %3, <i16 7, i16 7, i16 7, i16 7> 13 %5 = or <4 x i16> %4, %3 14 ret <4 x i16> %5 15} 16 17define <4 x i32> @usra_v4i32(<8 x i16> %0) { 18; CHECK-LABEL: usra_v4i32: 19; CHECK: // %bb.0: 20; CHECK-NEXT: ushr v0.8h, v0.8h, #15 21; CHECK-NEXT: usra v0.4s, v0.4s, #15 22; CHECK-NEXT: ret 23 %2 = lshr <8 x i16> %0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 24 %3 = bitcast <8 x i16> %2 to <4 x i32> 25 %4 = lshr <4 x i32> %3, <i32 15, i32 15, i32 15, i32 15> 26 %5 = or <4 x i32> %4, %3 27 ret <4 x i32> %5 28} 29 30define <2 x i64> @usra_v2i64(<4 x i32> %0) { 31; CHECK-LABEL: usra_v2i64: 32; CHECK: // %bb.0: 33; CHECK-NEXT: ushr v0.4s, v0.4s, #31 34; CHECK-NEXT: usra v0.2d, v0.2d, #31 35; CHECK-NEXT: ret 36 %2 = lshr <4 x i32> %0, <i32 31, i32 31, i32 31, i32 31> 37 %3 = bitcast <4 x i32> %2 to <2 x i64> 38 %4 = lshr <2 x i64> %3, <i64 31, i64 31> 39 %5 = or <2 x i64> %4, %3 40 ret <2 x i64> %5 41} 42 43define <1 x i64> @usra_v1i64(<2 x i32> %0) { 44; CHECK-LABEL: usra_v1i64: 45; CHECK: // %bb.0: 46; CHECK-NEXT: ushr v0.2s, v0.2s, #31 47; CHECK-NEXT: usra d0, d0, #31 48; CHECK-NEXT: ret 49 %2 = lshr <2 x i32> %0, <i32 31, i32 31> 50 %3 = bitcast <2 x i32> %2 to <1 x i64> 51 %4 = lshr <1 x i64> %3, <i64 31> 52 %5 = or <1 x i64> %4, %3 53 ret <1 x i64> %5 54} 55 56define <4 x i16> @ssra_v4i16(<4 x i16> %0) { 57; CHECK-LABEL: ssra_v4i16: 58; CHECK: // %bb.0: 59; CHECK-NEXT: ushr v1.4h, v0.4h, #15 60; CHECK-NEXT: bic v0.4h, #64, lsl #8 61; CHECK-NEXT: ssra v1.4h, v0.4h, #14 62; CHECK-NEXT: fmov d0, d1 63; CHECK-NEXT: ret 64 ; set the 15th bit to zero. e.g. 0b1111111111111111 to 0b1011111111111111 65 %2 = and <4 x i16> %0, <i16 49151, i16 49151,i16 49151,i16 49151> 66 ; the first 15 bits are zero, the last bit can be zero or one. e.g. 0b1011111111111111 to 0b0000000000000001 67 %3 = lshr <4 x i16> %0, <i16 15, i16 15, i16 15, i16 15> 68 ; the first 15 bits maybe 1, and the last bit is zero. 0b1011111111111111 to 0b1111111111111110 69 %4 = ashr <4 x i16> %2, <i16 14, i16 14, i16 14, i16 14> 70 %5 = or <4 x i16> %3, %4 71 ret <4 x i16> %5 72} 73 74define <4 x i32> @ssra_v4i32(<4 x i32> %0) { 75; CHECK-LABEL: ssra_v4i32: 76; CHECK: // %bb.0: 77; CHECK-NEXT: ushr v1.4s, v0.4s, #31 78; CHECK-NEXT: bic v0.4s, #64, lsl #24 79; CHECK-NEXT: ssra v1.4s, v0.4s, #30 80; CHECK-NEXT: mov v0.16b, v1.16b 81; CHECK-NEXT: ret 82 ; set the 31th bit to zero. 83 %2 = and <4 x i32> %0, <i32 3221225471, i32 3221225471,i32 3221225471,i32 3221225471> 84 ; the first 31 bits are zero, the last bit can be zero or one. 85 %3 = lshr <4 x i32> %0, <i32 31, i32 31, i32 31, i32 31> 86 ; the first 31 bits maybe 1, and the last bit is zero. 87 %4 = ashr <4 x i32> %2, <i32 30, i32 30, i32 30, i32 30> 88 %5 = or <4 x i32> %3, %4 89 ret <4 x i32> %5 90} 91 92define <1 x i64> @ssra_v1i64(<2 x i32> %0) { 93; CHECK-LABEL: ssra_v1i64: 94; CHECK: // %bb.0: 95; CHECK-NEXT: ushr d1, d0, #63 96; CHECK-NEXT: bic v0.2s, #64, lsl #24 97; CHECK-NEXT: ssra d1, d0, #62 98; CHECK-NEXT: fmov d0, d1 99; CHECK-NEXT: ret 100 %2 = and <2 x i32> %0, <i32 3221225471, i32 3221225471> 101 %3 = bitcast <2 x i32> %2 to <1 x i64> 102 %4 = lshr <1 x i64> %3, <i64 63> 103 %5 = ashr <1 x i64> %3, <i64 62> 104 %6 = or <1 x i64> %4, %5 105 ret <1 x i64> %6 106} 107 108define <2 x i64> @ssra_v2i64(<4 x i32> %0) { 109; CHECK-LABEL: ssra_v2i64: 110; CHECK: // %bb.0: 111; CHECK-NEXT: ushr v1.2d, v0.2d, #63 112; CHECK-NEXT: bic v0.4s, #64, lsl #24 113; CHECK-NEXT: ssra v1.2d, v0.2d, #62 114; CHECK-NEXT: mov v0.16b, v1.16b 115; CHECK-NEXT: ret 116 %2 = and <4 x i32> %0, <i32 3221225471, i32 3221225471,i32 3221225471,i32 3221225471> 117 %3 = bitcast <4 x i32> %2 to <2 x i64> 118 %4 = lshr <2 x i64> %3, <i64 63, i64 63> 119 %5 = ashr <2 x i64> %3, <i64 62, i64 62> 120 %6 = or <2 x i64> %4, %5 121 ret <2 x i64> %6 122} 123 124; Expected to be able to deduce movi is generate a vector of integer 125; and turn USHR+ORR into USRA. 126define <8 x i16> @usra_with_movi_v8i16(<16 x i8> %0, <16 x i8> %1) { 127; CHECK-LABEL: usra_with_movi_v8i16: 128; CHECK: // %bb.0: 129; CHECK-NEXT: movi v2.16b, #1 130; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b 131; CHECK-NEXT: and v0.16b, v0.16b, v2.16b 132; CHECK-NEXT: usra v0.8h, v0.8h, #7 133; CHECK-NEXT: ret 134 %3 = icmp eq <16 x i8> %0, %1 135 %4 = zext <16 x i1> %3 to <16 x i8> 136 %5 = bitcast <16 x i8> %4 to <8 x i16> 137 %6 = lshr <8 x i16> %5, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 138 %7 = or <8 x i16> %6, %5 139 ret <8 x i16> %7 140} 141 142; Expected to be able to deduce movi is generate a vector of integer 143; and turn USHR+ORR into USRA. 144define <4 x i32> @usra_with_movi_v4i32(<16 x i8> %0, <16 x i8> %1) { 145; CHECK-LABEL: usra_with_movi_v4i32: 146; CHECK: // %bb.0: 147; CHECK-NEXT: movi v2.16b, #1 148; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b 149; CHECK-NEXT: and v0.16b, v0.16b, v2.16b 150; CHECK-NEXT: usra v0.4s, v0.4s, #15 151; CHECK-NEXT: ret 152 %3 = icmp eq <16 x i8> %0, %1 153 %4 = zext <16 x i1> %3 to <16 x i8> 154 %5 = bitcast <16 x i8> %4 to <4 x i32> 155 %6 = lshr <4 x i32> %5, <i32 15, i32 15, i32 15, i32 15> 156 %7 = or <4 x i32> %6, %5 157 ret <4 x i32> %7 158} 159 160; Expected to be able to deduce movi is generate a vector of integer 161; and turn USHR+ORR into USRA. 162define <2 x i64> @usra_with_movi_v2i64(<16 x i8> %0, <16 x i8> %1) { 163; CHECK-LABEL: usra_with_movi_v2i64: 164; CHECK: // %bb.0: 165; CHECK-NEXT: movi v2.16b, #1 166; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b 167; CHECK-NEXT: and v0.16b, v0.16b, v2.16b 168; CHECK-NEXT: usra v0.2d, v0.2d, #31 169; CHECK-NEXT: ret 170 %3 = icmp eq <16 x i8> %0, %1 171 %4 = zext <16 x i1> %3 to <16 x i8> 172 %5 = bitcast <16 x i8> %4 to <2 x i64> 173 %6 = lshr <2 x i64> %5, <i64 31, i64 31> 174 %7 = or <2 x i64> %6, %5 175 ret <2 x i64> %7 176} 177 178define <4 x i16> @usra_v4i16_disjointor(<4 x i16> %a) { 179; CHECK-LABEL: usra_v4i16_disjointor: 180; CHECK: // %bb.0: 181; CHECK-NEXT: usra v0.4h, v0.4h, #7 182; CHECK-NEXT: ret 183 %s = lshr <4 x i16> %a, <i16 7, i16 7, i16 7, i16 7> 184 %o = or disjoint <4 x i16> %s, %a 185 ret <4 x i16> %o 186} 187