1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm-eabi -mattr=+v6t2 | FileCheck %s --check-prefixes=CHECK,SCALAR 3; RUN: llc < %s -mtriple=arm-eabi -mattr=+v6t2 -mattr=+neon | FileCheck %s --check-prefixes=CHECK,NEON 4 5declare i8 @llvm.fshl.i8(i8, i8, i8) 6declare i16 @llvm.fshl.i16(i16, i16, i16) 7declare i32 @llvm.fshl.i32(i32, i32, i32) 8declare i64 @llvm.fshl.i64(i64, i64, i64) 9declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 10 11declare i8 @llvm.fshr.i8(i8, i8, i8) 12declare i16 @llvm.fshr.i16(i16, i16, i16) 13declare i32 @llvm.fshr.i32(i32, i32, i32) 14declare i64 @llvm.fshr.i64(i64, i64, i64) 15declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 16 17; General case - all operands can be variables. 18 19define i16 @fshl_i16(i16 %x, i16 %y, i16 %z) { 20; CHECK-LABEL: fshl_i16: 21; CHECK: @ %bb.0: 22; CHECK-NEXT: pkhbt r0, r1, r0, lsl #16 23; CHECK-NEXT: and r1, r2, #15 24; CHECK-NEXT: lsl r0, r0, r1 25; CHECK-NEXT: lsr r0, r0, #16 26; CHECK-NEXT: bx lr 27 %f = call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z) 28 ret i16 %f 29} 30 31define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { 32; CHECK-LABEL: fshl_i32: 33; CHECK: @ %bb.0: 34; CHECK-NEXT: mov r3, #31 35; CHECK-NEXT: lsr r1, r1, #1 36; CHECK-NEXT: bic r3, r3, r2 37; CHECK-NEXT: and r2, r2, #31 38; CHECK-NEXT: lsl r0, r0, r2 39; CHECK-NEXT: orr r0, r0, r1, lsr r3 40; CHECK-NEXT: bx lr 41 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) 42 ret i32 %f 43} 44 45; Verify that weird types are minimally supported. 46declare i37 @llvm.fshl.i37(i37, i37, i37) 47define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { 48; SCALAR-LABEL: fshl_i37: 49; SCALAR: @ %bb.0: 50; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, lr} 51; SCALAR-NEXT: push {r4, r5, r6, r7, r8, lr} 52; SCALAR-NEXT: mov r8, r0 53; SCALAR-NEXT: ldr r0, [sp, #28] 54; SCALAR-NEXT: mov r4, r1 55; SCALAR-NEXT: mov r5, r3 56; SCALAR-NEXT: and r1, r0, #31 57; SCALAR-NEXT: ldr r0, [sp, #24] 58; SCALAR-NEXT: mov r6, r2 59; SCALAR-NEXT: mov r2, #37 60; SCALAR-NEXT: mov r3, #0 61; SCALAR-NEXT: bl __aeabi_uldivmod 62; SCALAR-NEXT: lsl r0, r5, #27 63; SCALAR-NEXT: tst r2, #32 64; SCALAR-NEXT: orr r0, r0, r6, lsr #5 65; SCALAR-NEXT: mov r1, r8 66; SCALAR-NEXT: and r3, r2, #31 67; SCALAR-NEXT: mov r7, #31 68; SCALAR-NEXT: movne r1, r0 69; SCALAR-NEXT: lslne r0, r6, #27 70; SCALAR-NEXT: bic r2, r7, r2 71; SCALAR-NEXT: lsl r5, r1, r3 72; SCALAR-NEXT: lsr r0, r0, #1 73; SCALAR-NEXT: movne r4, r8 74; SCALAR-NEXT: lsr r1, r1, #1 75; SCALAR-NEXT: lsl r3, r4, r3 76; SCALAR-NEXT: orr r0, r5, r0, lsr r2 77; SCALAR-NEXT: orr r1, r3, r1, lsr r2 78; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, pc} 79; 80; NEON-LABEL: fshl_i37: 81; NEON: @ %bb.0: 82; NEON-NEXT: .save {r4, r5, r6, r7, r8, lr} 83; NEON-NEXT: push {r4, r5, r6, r7, r8, lr} 84; NEON-NEXT: mov r4, r1 85; NEON-NEXT: ldr r1, [sp, #28] 86; NEON-NEXT: mov r8, r0 87; NEON-NEXT: ldr r0, [sp, #24] 88; NEON-NEXT: and r1, r1, #31 89; NEON-NEXT: mov r5, r3 90; NEON-NEXT: mov r6, r2 91; NEON-NEXT: mov r2, #37 92; NEON-NEXT: mov r3, #0 93; NEON-NEXT: bl __aeabi_uldivmod 94; NEON-NEXT: lsl r0, r5, #27 95; NEON-NEXT: tst r2, #32 96; NEON-NEXT: orr r0, r0, r6, lsr #5 97; NEON-NEXT: mov r1, r8 98; NEON-NEXT: and r3, r2, #31 99; NEON-NEXT: mov r7, #31 100; NEON-NEXT: movne r1, r0 101; NEON-NEXT: lslne r0, r6, #27 102; NEON-NEXT: bic r2, r7, r2 103; NEON-NEXT: lsl r5, r1, r3 104; NEON-NEXT: lsr r0, r0, #1 105; NEON-NEXT: movne r4, r8 106; NEON-NEXT: lsr r1, r1, #1 107; NEON-NEXT: lsl r3, r4, r3 108; NEON-NEXT: orr r0, r5, r0, lsr r2 109; NEON-NEXT: orr r1, r3, r1, lsr r2 110; NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} 111 %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) 112 ret i37 %f 113} 114 115; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011 116 117declare i7 @llvm.fshl.i7(i7, i7, i7) 118define i7 @fshl_i7_const_fold() { 119; CHECK-LABEL: fshl_i7_const_fold: 120; CHECK: @ %bb.0: 121; CHECK-NEXT: mov r0, #67 122; CHECK-NEXT: bx lr 123 %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2) 124 ret i7 %f 125} 126 127define i8 @fshl_i8_const_fold_overshift_1() { 128; CHECK-LABEL: fshl_i8_const_fold_overshift_1: 129; CHECK: @ %bb.0: 130; CHECK-NEXT: mov r0, #128 131; CHECK-NEXT: bx lr 132 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15) 133 ret i8 %f 134} 135 136define i8 @fshl_i8_const_fold_overshift_2() { 137; CHECK-LABEL: fshl_i8_const_fold_overshift_2: 138; CHECK: @ %bb.0: 139; CHECK-NEXT: mov r0, #120 140; CHECK-NEXT: bx lr 141 %f = call i8 @llvm.fshl.i8(i8 15, i8 15, i8 11) 142 ret i8 %f 143} 144 145define i8 @fshl_i8_const_fold_overshift_3() { 146; CHECK-LABEL: fshl_i8_const_fold_overshift_3: 147; CHECK: @ %bb.0: 148; CHECK-NEXT: mov r0, #0 149; CHECK-NEXT: bx lr 150 %f = call i8 @llvm.fshl.i8(i8 0, i8 225, i8 8) 151 ret i8 %f 152} 153 154; With constant shift amount, this is 'extr'. 155 156define i32 @fshl_i32_const_shift(i32 %x, i32 %y) { 157; CHECK-LABEL: fshl_i32_const_shift: 158; CHECK: @ %bb.0: 159; CHECK-NEXT: lsl r0, r0, #9 160; CHECK-NEXT: orr r0, r0, r1, lsr #23 161; CHECK-NEXT: bx lr 162 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9) 163 ret i32 %f 164} 165 166; Check modulo math on shift amount. 167 168define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) { 169; CHECK-LABEL: fshl_i32_const_overshift: 170; CHECK: @ %bb.0: 171; CHECK-NEXT: lsl r0, r0, #9 172; CHECK-NEXT: orr r0, r0, r1, lsr #23 173; CHECK-NEXT: bx lr 174 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41) 175 ret i32 %f 176} 177 178; 64-bit should also work. 179 180define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) { 181; CHECK-LABEL: fshl_i64_const_overshift: 182; CHECK: @ %bb.0: 183; CHECK-NEXT: lsl r1, r3, #9 184; CHECK-NEXT: orr r2, r1, r2, lsr #23 185; CHECK-NEXT: lsl r0, r0, #9 186; CHECK-NEXT: orr r1, r0, r3, lsr #23 187; CHECK-NEXT: mov r0, r2 188; CHECK-NEXT: bx lr 189 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105) 190 ret i64 %f 191} 192 193; This should work without any node-specific logic. 194 195define i8 @fshl_i8_const_fold() { 196; CHECK-LABEL: fshl_i8_const_fold: 197; CHECK: @ %bb.0: 198; CHECK-NEXT: mov r0, #128 199; CHECK-NEXT: bx lr 200 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7) 201 ret i8 %f 202} 203 204; Repeat everything for funnel shift right. 205 206; General case - all operands can be variables. 207 208define i16 @fshr_i16(i16 %x, i16 %y, i16 %z) { 209; CHECK-LABEL: fshr_i16: 210; CHECK: @ %bb.0: 211; CHECK-NEXT: pkhbt r0, r1, r0, lsl #16 212; CHECK-NEXT: and r1, r2, #15 213; CHECK-NEXT: lsr r0, r0, r1 214; CHECK-NEXT: bx lr 215 %f = call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 %z) 216 ret i16 %f 217} 218 219define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { 220; CHECK-LABEL: fshr_i32: 221; CHECK: @ %bb.0: 222; CHECK-NEXT: mov r3, #31 223; CHECK-NEXT: lsl r0, r0, #1 224; CHECK-NEXT: bic r3, r3, r2 225; CHECK-NEXT: and r2, r2, #31 226; CHECK-NEXT: lsl r0, r0, r3 227; CHECK-NEXT: orr r0, r0, r1, lsr r2 228; CHECK-NEXT: bx lr 229 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) 230 ret i32 %f 231} 232 233; Verify that weird types are minimally supported. 234declare i37 @llvm.fshr.i37(i37, i37, i37) 235define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { 236; SCALAR-LABEL: fshr_i37: 237; SCALAR: @ %bb.0: 238; SCALAR-NEXT: .save {r4, r5, r6, r7, r11, lr} 239; SCALAR-NEXT: push {r4, r5, r6, r7, r11, lr} 240; SCALAR-NEXT: mov r5, r0 241; SCALAR-NEXT: ldr r0, [sp, #28] 242; SCALAR-NEXT: mov r4, r1 243; SCALAR-NEXT: mov r6, r3 244; SCALAR-NEXT: and r1, r0, #31 245; SCALAR-NEXT: ldr r0, [sp, #24] 246; SCALAR-NEXT: mov r7, r2 247; SCALAR-NEXT: mov r2, #37 248; SCALAR-NEXT: mov r3, #0 249; SCALAR-NEXT: bl __aeabi_uldivmod 250; SCALAR-NEXT: add r0, r2, #27 251; SCALAR-NEXT: lsl r2, r6, #27 252; SCALAR-NEXT: orr r2, r2, r7, lsr #5 253; SCALAR-NEXT: mov r1, #31 254; SCALAR-NEXT: tst r0, #32 255; SCALAR-NEXT: mov r3, r5 256; SCALAR-NEXT: moveq r3, r2 257; SCALAR-NEXT: lsleq r2, r7, #27 258; SCALAR-NEXT: bic r1, r1, r0 259; SCALAR-NEXT: and r7, r0, #31 260; SCALAR-NEXT: lsl r6, r3, #1 261; SCALAR-NEXT: moveq r4, r5 262; SCALAR-NEXT: lsl r6, r6, r1 263; SCALAR-NEXT: orr r0, r6, r2, lsr r7 264; SCALAR-NEXT: lsl r2, r4, #1 265; SCALAR-NEXT: lsl r1, r2, r1 266; SCALAR-NEXT: orr r1, r1, r3, lsr r7 267; SCALAR-NEXT: pop {r4, r5, r6, r7, r11, pc} 268; 269; NEON-LABEL: fshr_i37: 270; NEON: @ %bb.0: 271; NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} 272; NEON-NEXT: push {r4, r5, r6, r7, r11, lr} 273; NEON-NEXT: mov r4, r1 274; NEON-NEXT: ldr r1, [sp, #28] 275; NEON-NEXT: mov r5, r0 276; NEON-NEXT: ldr r0, [sp, #24] 277; NEON-NEXT: and r1, r1, #31 278; NEON-NEXT: mov r6, r3 279; NEON-NEXT: mov r7, r2 280; NEON-NEXT: mov r2, #37 281; NEON-NEXT: mov r3, #0 282; NEON-NEXT: bl __aeabi_uldivmod 283; NEON-NEXT: add r0, r2, #27 284; NEON-NEXT: lsl r2, r6, #27 285; NEON-NEXT: orr r2, r2, r7, lsr #5 286; NEON-NEXT: mov r1, #31 287; NEON-NEXT: tst r0, #32 288; NEON-NEXT: mov r3, r5 289; NEON-NEXT: moveq r3, r2 290; NEON-NEXT: lsleq r2, r7, #27 291; NEON-NEXT: bic r1, r1, r0 292; NEON-NEXT: and r7, r0, #31 293; NEON-NEXT: lsl r6, r3, #1 294; NEON-NEXT: moveq r4, r5 295; NEON-NEXT: lsl r6, r6, r1 296; NEON-NEXT: orr r0, r6, r2, lsr r7 297; NEON-NEXT: lsl r2, r4, #1 298; NEON-NEXT: lsl r1, r2, r1 299; NEON-NEXT: orr r1, r1, r3, lsr r7 300; NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} 301 %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) 302 ret i37 %f 303} 304 305; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111 306 307declare i7 @llvm.fshr.i7(i7, i7, i7) 308define i7 @fshr_i7_const_fold() { 309; CHECK-LABEL: fshr_i7_const_fold: 310; CHECK: @ %bb.0: 311; CHECK-NEXT: mov r0, #31 312; CHECK-NEXT: bx lr 313 %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2) 314 ret i7 %f 315} 316 317define i8 @fshr_i8_const_fold_overshift_1() { 318; CHECK-LABEL: fshr_i8_const_fold_overshift_1: 319; CHECK: @ %bb.0: 320; CHECK-NEXT: mov r0, #254 321; CHECK-NEXT: bx lr 322 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15) 323 ret i8 %f 324} 325 326define i8 @fshr_i8_const_fold_overshift_2() { 327; CHECK-LABEL: fshr_i8_const_fold_overshift_2: 328; CHECK: @ %bb.0: 329; CHECK-NEXT: mov r0, #225 330; CHECK-NEXT: bx lr 331 %f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11) 332 ret i8 %f 333} 334 335define i8 @fshr_i8_const_fold_overshift_3() { 336; CHECK-LABEL: fshr_i8_const_fold_overshift_3: 337; CHECK: @ %bb.0: 338; CHECK-NEXT: mov r0, #255 339; CHECK-NEXT: bx lr 340 %f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8) 341 ret i8 %f 342} 343 344; With constant shift amount, this is 'extr'. 345 346define i32 @fshr_i32_const_shift(i32 %x, i32 %y) { 347; CHECK-LABEL: fshr_i32_const_shift: 348; CHECK: @ %bb.0: 349; CHECK-NEXT: lsl r0, r0, #23 350; CHECK-NEXT: orr r0, r0, r1, lsr #9 351; CHECK-NEXT: bx lr 352 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9) 353 ret i32 %f 354} 355 356; Check modulo math on shift amount. 41-32=9. 357 358define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) { 359; CHECK-LABEL: fshr_i32_const_overshift: 360; CHECK: @ %bb.0: 361; CHECK-NEXT: lsl r0, r0, #23 362; CHECK-NEXT: orr r0, r0, r1, lsr #9 363; CHECK-NEXT: bx lr 364 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41) 365 ret i32 %f 366} 367 368; 64-bit should also work. 105-64 = 41. 369 370define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) { 371; CHECK-LABEL: fshr_i64_const_overshift: 372; CHECK: @ %bb.0: 373; CHECK-NEXT: lsl r2, r0, #23 374; CHECK-NEXT: lsl r1, r1, #23 375; CHECK-NEXT: orr r2, r2, r3, lsr #9 376; CHECK-NEXT: orr r1, r1, r0, lsr #9 377; CHECK-NEXT: mov r0, r2 378; CHECK-NEXT: bx lr 379 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105) 380 ret i64 %f 381} 382 383; This should work without any node-specific logic. 384 385define i8 @fshr_i8_const_fold() { 386; CHECK-LABEL: fshr_i8_const_fold: 387; CHECK: @ %bb.0: 388; CHECK-NEXT: mov r0, #254 389; CHECK-NEXT: bx lr 390 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7) 391 ret i8 %f 392} 393 394define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) { 395; CHECK-LABEL: fshl_i32_shift_by_bitwidth: 396; CHECK: @ %bb.0: 397; CHECK-NEXT: bx lr 398 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32) 399 ret i32 %f 400} 401 402define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) { 403; CHECK-LABEL: fshr_i32_shift_by_bitwidth: 404; CHECK: @ %bb.0: 405; CHECK-NEXT: mov r0, r1 406; CHECK-NEXT: bx lr 407 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32) 408 ret i32 %f 409} 410 411define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { 412; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth: 413; CHECK: @ %bb.0: 414; CHECK-NEXT: bx lr 415 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 416 ret <4 x i32> %f 417} 418 419define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { 420; SCALAR-LABEL: fshr_v4i32_shift_by_bitwidth: 421; SCALAR: @ %bb.0: 422; SCALAR-NEXT: ldm sp, {r0, r1, r2, r3} 423; SCALAR-NEXT: bx lr 424; 425; NEON-LABEL: fshr_v4i32_shift_by_bitwidth: 426; NEON: @ %bb.0: 427; NEON-NEXT: mov r0, sp 428; NEON-NEXT: vld1.64 {d16, d17}, [r0] 429; NEON-NEXT: vmov r0, r1, d16 430; NEON-NEXT: vmov r2, r3, d17 431; NEON-NEXT: bx lr 432 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 433 ret <4 x i32> %f 434} 435 436