1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s 3 4declare i8 @llvm.fshl.i8(i8, i8, i8) 5declare i16 @llvm.fshl.i16(i16, i16, i16) 6declare i32 @llvm.fshl.i32(i32, i32, i32) 7declare i64 @llvm.fshl.i64(i64, i64, i64) 8declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 9 10declare i8 @llvm.fshr.i8(i8, i8, i8) 11declare i16 @llvm.fshr.i16(i16, i16, i16) 12declare i32 @llvm.fshr.i32(i32, i32, i32) 13declare i64 @llvm.fshr.i64(i64, i64, i64) 14declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 15 16; When first 2 operands match, it's a rotate. 17 18define i8 @rotl_i8_const_shift(i8 %x) { 19; CHECK-LABEL: rotl_i8_const_shift: 20; CHECK: // %bb.0: 21; CHECK-NEXT: ubfx w8, w0, #5, #3 22; CHECK-NEXT: orr w0, w8, w0, lsl #3 23; CHECK-NEXT: ret 24 %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3) 25 ret i8 %f 26} 27 28define i64 @rotl_i64_const_shift(i64 %x) { 29; CHECK-LABEL: rotl_i64_const_shift: 30; CHECK: // %bb.0: 31; CHECK-NEXT: ror x0, x0, #61 32; CHECK-NEXT: ret 33 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3) 34 ret i64 %f 35} 36 37; When first 2 operands match, it's a rotate (by variable amount). 38 39define i16 @rotl_i16(i16 %x, i16 %z) { 40; CHECK-LABEL: rotl_i16: 41; CHECK: // %bb.0: 42; CHECK-NEXT: neg w8, w1 43; CHECK-NEXT: and w9, w0, #0xffff 44; CHECK-NEXT: and w10, w1, #0xf 45; CHECK-NEXT: and w8, w8, #0xf 46; CHECK-NEXT: lsl w10, w0, w10 47; CHECK-NEXT: lsr w8, w9, w8 48; CHECK-NEXT: orr w0, w10, w8 49; CHECK-NEXT: ret 50 %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z) 51 ret i16 %f 52} 53 54define i32 @rotl_i32(i32 %x, i32 %z) { 55; CHECK-LABEL: rotl_i32: 56; CHECK: // %bb.0: 57; CHECK-NEXT: neg w8, w1 58; CHECK-NEXT: ror w0, w0, w8 59; CHECK-NEXT: ret 60 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z) 61 ret i32 %f 62} 63 64define i64 @rotl_i64(i64 %x, i64 %z) { 65; CHECK-LABEL: rotl_i64: 66; CHECK: // %bb.0: 67; CHECK-NEXT: neg w8, w1 68; CHECK-NEXT: ror x0, x0, x8 69; CHECK-NEXT: ret 70 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z) 71 ret i64 %f 72} 73 74; Vector rotate. 75 76define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) { 77; CHECK-LABEL: rotl_v4i32: 78; CHECK: // %bb.0: 79; CHECK-NEXT: movi v2.4s, #31 80; CHECK-NEXT: neg v3.4s, v1.4s 81; CHECK-NEXT: and v3.16b, v3.16b, v2.16b 82; CHECK-NEXT: and v1.16b, v1.16b, v2.16b 83; CHECK-NEXT: neg v2.4s, v3.4s 84; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s 85; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s 86; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b 87; CHECK-NEXT: ret 88 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z) 89 ret <4 x i32> %f 90} 91 92; Vector rotate by constant splat amount. 93 94define <4 x i32> @rotl_v4i32_rotl_const_shift(<4 x i32> %x) { 95; CHECK-LABEL: rotl_v4i32_rotl_const_shift: 96; CHECK: // %bb.0: 97; CHECK-NEXT: shl v1.4s, v0.4s, #3 98; CHECK-NEXT: usra v1.4s, v0.4s, #29 99; CHECK-NEXT: mov v0.16b, v1.16b 100; CHECK-NEXT: ret 101 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) 102 ret <4 x i32> %f 103} 104 105; Repeat everything for funnel shift right. 106 107; When first 2 operands match, it's a rotate. 108 109define i8 @rotr_i8_const_shift(i8 %x) { 110; CHECK-LABEL: rotr_i8_const_shift: 111; CHECK: // %bb.0: 112; CHECK-NEXT: lsl w8, w0, #5 113; CHECK-NEXT: bfxil w8, w0, #3, #5 114; CHECK-NEXT: mov w0, w8 115; CHECK-NEXT: ret 116 %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3) 117 ret i8 %f 118} 119 120define i32 @rotr_i32_const_shift(i32 %x) { 121; CHECK-LABEL: rotr_i32_const_shift: 122; CHECK: // %bb.0: 123; CHECK-NEXT: ror w0, w0, #3 124; CHECK-NEXT: ret 125 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3) 126 ret i32 %f 127} 128 129; When first 2 operands match, it's a rotate (by variable amount). 130 131define i16 @rotr_i16(i16 %x, i16 %z) { 132; CHECK-LABEL: rotr_i16: 133; CHECK: // %bb.0: 134; CHECK-NEXT: neg w8, w1 135; CHECK-NEXT: and w9, w0, #0xffff 136; CHECK-NEXT: and w10, w1, #0xf 137; CHECK-NEXT: and w8, w8, #0xf 138; CHECK-NEXT: lsr w9, w9, w10 139; CHECK-NEXT: lsl w8, w0, w8 140; CHECK-NEXT: orr w0, w9, w8 141; CHECK-NEXT: ret 142 %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z) 143 ret i16 %f 144} 145 146define i32 @rotr_i32(i32 %x, i32 %z) { 147; CHECK-LABEL: rotr_i32: 148; CHECK: // %bb.0: 149; CHECK-NEXT: ror w0, w0, w1 150; CHECK-NEXT: ret 151 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z) 152 ret i32 %f 153} 154 155define i64 @rotr_i64(i64 %x, i64 %z) { 156; CHECK-LABEL: rotr_i64: 157; CHECK: // %bb.0: 158; CHECK-NEXT: ror x0, x0, x1 159; CHECK-NEXT: ret 160 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z) 161 ret i64 %f 162} 163 164; Vector rotate. 165 166define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) { 167; CHECK-LABEL: rotr_v4i32: 168; CHECK: // %bb.0: 169; CHECK-NEXT: movi v2.4s, #31 170; CHECK-NEXT: neg v3.4s, v1.4s 171; CHECK-NEXT: and v1.16b, v1.16b, v2.16b 172; CHECK-NEXT: and v2.16b, v3.16b, v2.16b 173; CHECK-NEXT: neg v1.4s, v1.4s 174; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s 175; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s 176; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b 177; CHECK-NEXT: ret 178 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z) 179 ret <4 x i32> %f 180} 181 182; Vector rotate by constant splat amount. 183 184define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) { 185; CHECK-LABEL: rotr_v4i32_const_shift: 186; CHECK: // %bb.0: 187; CHECK-NEXT: shl v1.4s, v0.4s, #29 188; CHECK-NEXT: usra v1.4s, v0.4s, #3 189; CHECK-NEXT: mov v0.16b, v1.16b 190; CHECK-NEXT: ret 191 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) 192 ret <4 x i32> %f 193} 194 195define i32 @rotl_i32_shift_by_bitwidth(i32 %x) { 196; CHECK-LABEL: rotl_i32_shift_by_bitwidth: 197; CHECK: // %bb.0: 198; CHECK-NEXT: ret 199 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32) 200 ret i32 %f 201} 202 203define i32 @rotr_i32_shift_by_bitwidth(i32 %x) { 204; CHECK-LABEL: rotr_i32_shift_by_bitwidth: 205; CHECK: // %bb.0: 206; CHECK-NEXT: ret 207 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32) 208 ret i32 %f 209} 210 211define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) { 212; CHECK-LABEL: rotl_v4i32_shift_by_bitwidth: 213; CHECK: // %bb.0: 214; CHECK-NEXT: ret 215 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 216 ret <4 x i32> %f 217} 218 219define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) { 220; CHECK-LABEL: rotr_v4i32_shift_by_bitwidth: 221; CHECK: // %bb.0: 222; CHECK-NEXT: ret 223 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 224 ret <4 x i32> %f 225} 226 227