1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm-eabi -mattr=+v6t2 | FileCheck %s --check-prefixes=CHECK,SCALAR 3; RUN: llc < %s -mtriple=arm-eabi -mattr=+v6t2 -mattr=+neon | FileCheck %s --check-prefixes=CHECK,NEON 4 5declare i8 @llvm.fshl.i8(i8, i8, i8) 6declare i16 @llvm.fshl.i16(i16, i16, i16) 7declare i32 @llvm.fshl.i32(i32, i32, i32) 8declare i64 @llvm.fshl.i64(i64, i64, i64) 9declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 10 11declare i8 @llvm.fshr.i8(i8, i8, i8) 12declare i16 @llvm.fshr.i16(i16, i16, i16) 13declare i32 @llvm.fshr.i32(i32, i32, i32) 14declare i64 @llvm.fshr.i64(i64, i64, i64) 15declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 16 17; When first 2 operands match, it's a rotate. 18 19define i8 @rotl_i8_const_shift(i8 %x) { 20; CHECK-LABEL: rotl_i8_const_shift: 21; CHECK: @ %bb.0: 22; CHECK-NEXT: uxtb r1, r0 23; CHECK-NEXT: lsl r0, r0, #3 24; CHECK-NEXT: orr r0, r0, r1, lsr #5 25; CHECK-NEXT: bx lr 26 %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3) 27 ret i8 %f 28} 29 30define i64 @rotl_i64_const_shift(i64 %x) { 31; CHECK-LABEL: rotl_i64_const_shift: 32; CHECK: @ %bb.0: 33; CHECK-NEXT: lsl r2, r0, #3 34; CHECK-NEXT: orr r2, r2, r1, lsr #29 35; CHECK-NEXT: lsl r1, r1, #3 36; CHECK-NEXT: orr r1, r1, r0, lsr #29 37; CHECK-NEXT: mov r0, r2 38; CHECK-NEXT: bx lr 39 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3) 40 ret i64 %f 41} 42 43; When first 2 operands match, it's a rotate (by variable amount). 44 45define i16 @rotl_i16(i16 %x, i16 %z) { 46; CHECK-LABEL: rotl_i16: 47; CHECK: @ %bb.0: 48; CHECK-NEXT: and r2, r1, #15 49; CHECK-NEXT: rsb r1, r1, #0 50; CHECK-NEXT: and r1, r1, #15 51; CHECK-NEXT: lsl r2, r0, r2 52; CHECK-NEXT: uxth r0, r0 53; CHECK-NEXT: orr r0, r2, r0, lsr r1 54; CHECK-NEXT: bx lr 55 %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z) 56 ret i16 %f 57} 58 59define i32 @rotl_i32(i32 %x, i32 %z) { 60; CHECK-LABEL: rotl_i32: 61; CHECK: @ %bb.0: 62; CHECK-NEXT: rsb r1, r1, #0 63; CHECK-NEXT: ror r0, r0, r1 64; CHECK-NEXT: bx lr 65 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z) 66 ret i32 %f 67} 68 69define i64 @rotl_i64(i64 %x, i64 %z) { 70; CHECK-LABEL: rotl_i64: 71; CHECK: @ %bb.0: 72; CHECK-NEXT: .save {r4, lr} 73; CHECK-NEXT: push {r4, lr} 74; CHECK-NEXT: tst r2, #32 75; CHECK-NEXT: mov r3, r0 76; CHECK-NEXT: and r12, r2, #31 77; CHECK-NEXT: movne r3, r1 78; CHECK-NEXT: movne r1, r0 79; CHECK-NEXT: mov r4, #31 80; CHECK-NEXT: bic r2, r4, r2 81; CHECK-NEXT: lsl lr, r3, r12 82; CHECK-NEXT: lsr r0, r1, #1 83; CHECK-NEXT: lsl r1, r1, r12 84; CHECK-NEXT: lsr r3, r3, #1 85; CHECK-NEXT: orr r0, lr, r0, lsr r2 86; CHECK-NEXT: orr r1, r1, r3, lsr r2 87; CHECK-NEXT: pop {r4, pc} 88 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z) 89 ret i64 %f 90} 91 92; Vector rotate. 93 94define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) { 95; SCALAR-LABEL: rotl_v4i32: 96; SCALAR: @ %bb.0: 97; SCALAR-NEXT: ldr r12, [sp] 98; SCALAR-NEXT: rsb r12, r12, #0 99; SCALAR-NEXT: ror r0, r0, r12 100; SCALAR-NEXT: ldr r12, [sp, #4] 101; SCALAR-NEXT: rsb r12, r12, #0 102; SCALAR-NEXT: ror r1, r1, r12 103; SCALAR-NEXT: ldr r12, [sp, #8] 104; SCALAR-NEXT: rsb r12, r12, #0 105; SCALAR-NEXT: ror r2, r2, r12 106; SCALAR-NEXT: ldr r12, [sp, #12] 107; SCALAR-NEXT: rsb r12, r12, #0 108; SCALAR-NEXT: ror r3, r3, r12 109; SCALAR-NEXT: bx lr 110; 111; NEON-LABEL: rotl_v4i32: 112; NEON: @ %bb.0: 113; NEON-NEXT: mov r12, sp 114; NEON-NEXT: vld1.64 {d16, d17}, [r12] 115; NEON-NEXT: vmov.i32 q10, #0x1f 116; NEON-NEXT: vneg.s32 q9, q8 117; NEON-NEXT: vmov d23, r2, r3 118; NEON-NEXT: vand q9, q9, q10 119; NEON-NEXT: vand q8, q8, q10 120; NEON-NEXT: vmov d22, r0, r1 121; NEON-NEXT: vneg.s32 q9, q9 122; NEON-NEXT: vshl.u32 q8, q11, q8 123; NEON-NEXT: vshl.u32 q9, q11, q9 124; NEON-NEXT: vorr q8, q8, q9 125; NEON-NEXT: vmov r0, r1, d16 126; NEON-NEXT: vmov r2, r3, d17 127; NEON-NEXT: bx lr 128 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z) 129 ret <4 x i32> %f 130} 131 132; Vector rotate by constant splat amount. 133 134define <4 x i32> @rotl_v4i32_rotl_const_shift(<4 x i32> %x) { 135; SCALAR-LABEL: rotl_v4i32_rotl_const_shift: 136; SCALAR: @ %bb.0: 137; SCALAR-NEXT: ror r0, r0, #29 138; SCALAR-NEXT: ror r1, r1, #29 139; SCALAR-NEXT: ror r2, r2, #29 140; SCALAR-NEXT: ror r3, r3, #29 141; SCALAR-NEXT: bx lr 142; 143; NEON-LABEL: rotl_v4i32_rotl_const_shift: 144; NEON: @ %bb.0: 145; NEON-NEXT: vmov d17, r2, r3 146; NEON-NEXT: vmov d16, r0, r1 147; NEON-NEXT: vshr.u32 q9, q8, #29 148; NEON-NEXT: vshl.i32 q8, q8, #3 149; NEON-NEXT: vorr q8, q8, q9 150; NEON-NEXT: vmov r0, r1, d16 151; NEON-NEXT: vmov r2, r3, d17 152; NEON-NEXT: bx lr 153 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) 154 ret <4 x i32> %f 155} 156 157; Repeat everything for funnel shift right. 158 159; When first 2 operands match, it's a rotate. 160 161define i8 @rotr_i8_const_shift(i8 %x) { 162; CHECK-LABEL: rotr_i8_const_shift: 163; CHECK: @ %bb.0: 164; CHECK-NEXT: uxtb r1, r0 165; CHECK-NEXT: lsr r1, r1, #3 166; CHECK-NEXT: orr r0, r1, r0, lsl #5 167; CHECK-NEXT: bx lr 168 %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3) 169 ret i8 %f 170} 171 172define i32 @rotr_i32_const_shift(i32 %x) { 173; CHECK-LABEL: rotr_i32_const_shift: 174; CHECK: @ %bb.0: 175; CHECK-NEXT: ror r0, r0, #3 176; CHECK-NEXT: bx lr 177 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3) 178 ret i32 %f 179} 180 181; When first 2 operands match, it's a rotate (by variable amount). 182 183define i16 @rotr_i16(i16 %x, i16 %z) { 184; CHECK-LABEL: rotr_i16: 185; CHECK: @ %bb.0: 186; CHECK-NEXT: and r2, r1, #15 187; CHECK-NEXT: rsb r1, r1, #0 188; CHECK-NEXT: and r1, r1, #15 189; CHECK-NEXT: uxth r3, r0 190; CHECK-NEXT: lsr r2, r3, r2 191; CHECK-NEXT: orr r0, r2, r0, lsl r1 192; CHECK-NEXT: bx lr 193 %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z) 194 ret i16 %f 195} 196 197define i32 @rotr_i32(i32 %x, i32 %z) { 198; CHECK-LABEL: rotr_i32: 199; CHECK: @ %bb.0: 200; CHECK-NEXT: ror r0, r0, r1 201; CHECK-NEXT: bx lr 202 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z) 203 ret i32 %f 204} 205 206define i64 @rotr_i64(i64 %x, i64 %z) { 207; CHECK-LABEL: rotr_i64: 208; CHECK: @ %bb.0: 209; CHECK-NEXT: tst r2, #32 210; CHECK-NEXT: mov r3, r1 211; CHECK-NEXT: moveq r3, r0 212; CHECK-NEXT: moveq r0, r1 213; CHECK-NEXT: mov r1, #31 214; CHECK-NEXT: lsl r12, r0, #1 215; CHECK-NEXT: bic r1, r1, r2 216; CHECK-NEXT: and r2, r2, #31 217; CHECK-NEXT: lsl r12, r12, r1 218; CHECK-NEXT: orr r12, r12, r3, lsr r2 219; CHECK-NEXT: lsl r3, r3, #1 220; CHECK-NEXT: lsl r1, r3, r1 221; CHECK-NEXT: orr r1, r1, r0, lsr r2 222; CHECK-NEXT: mov r0, r12 223; CHECK-NEXT: bx lr 224 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z) 225 ret i64 %f 226} 227 228; Vector rotate. 229 230define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) { 231; SCALAR-LABEL: rotr_v4i32: 232; SCALAR: @ %bb.0: 233; SCALAR-NEXT: ldr r12, [sp] 234; SCALAR-NEXT: ror r0, r0, r12 235; SCALAR-NEXT: ldr r12, [sp, #4] 236; SCALAR-NEXT: ror r1, r1, r12 237; SCALAR-NEXT: ldr r12, [sp, #8] 238; SCALAR-NEXT: ror r2, r2, r12 239; SCALAR-NEXT: ldr r12, [sp, #12] 240; SCALAR-NEXT: ror r3, r3, r12 241; SCALAR-NEXT: bx lr 242; 243; NEON-LABEL: rotr_v4i32: 244; NEON: @ %bb.0: 245; NEON-NEXT: mov r12, sp 246; NEON-NEXT: vld1.64 {d16, d17}, [r12] 247; NEON-NEXT: vmov.i32 q9, #0x1f 248; NEON-NEXT: vneg.s32 q10, q8 249; NEON-NEXT: vand q8, q8, q9 250; NEON-NEXT: vmov d23, r2, r3 251; NEON-NEXT: vand q9, q10, q9 252; NEON-NEXT: vneg.s32 q8, q8 253; NEON-NEXT: vmov d22, r0, r1 254; NEON-NEXT: vshl.u32 q9, q11, q9 255; NEON-NEXT: vshl.u32 q8, q11, q8 256; NEON-NEXT: vorr q8, q8, q9 257; NEON-NEXT: vmov r0, r1, d16 258; NEON-NEXT: vmov r2, r3, d17 259; NEON-NEXT: bx lr 260 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z) 261 ret <4 x i32> %f 262} 263 264; Vector rotate by constant splat amount. 265 266define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) { 267; SCALAR-LABEL: rotr_v4i32_const_shift: 268; SCALAR: @ %bb.0: 269; SCALAR-NEXT: ror r0, r0, #3 270; SCALAR-NEXT: ror r1, r1, #3 271; SCALAR-NEXT: ror r2, r2, #3 272; SCALAR-NEXT: ror r3, r3, #3 273; SCALAR-NEXT: bx lr 274; 275; NEON-LABEL: rotr_v4i32_const_shift: 276; NEON: @ %bb.0: 277; NEON-NEXT: vmov d17, r2, r3 278; NEON-NEXT: vmov d16, r0, r1 279; NEON-NEXT: vshl.i32 q9, q8, #29 280; NEON-NEXT: vshr.u32 q8, q8, #3 281; NEON-NEXT: vorr q8, q8, q9 282; NEON-NEXT: vmov r0, r1, d16 283; NEON-NEXT: vmov r2, r3, d17 284; NEON-NEXT: bx lr 285 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) 286 ret <4 x i32> %f 287} 288 289define i32 @rotl_i32_shift_by_bitwidth(i32 %x) { 290; CHECK-LABEL: rotl_i32_shift_by_bitwidth: 291; CHECK: @ %bb.0: 292; CHECK-NEXT: bx lr 293 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32) 294 ret i32 %f 295} 296 297define i32 @rotr_i32_shift_by_bitwidth(i32 %x) { 298; CHECK-LABEL: rotr_i32_shift_by_bitwidth: 299; CHECK: @ %bb.0: 300; CHECK-NEXT: bx lr 301 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32) 302 ret i32 %f 303} 304 305define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) { 306; CHECK-LABEL: rotl_v4i32_shift_by_bitwidth: 307; CHECK: @ %bb.0: 308; CHECK-NEXT: bx lr 309 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 310 ret <4 x i32> %f 311} 312 313define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) { 314; CHECK-LABEL: rotr_v4i32_shift_by_bitwidth: 315; CHECK: @ %bb.0: 316; CHECK-NEXT: bx lr 317 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 318 ret <4 x i32> %f 319} 320 321