1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm-eabi -mattr=+v6t2 | FileCheck %s --check-prefixes=CHECK,SCALAR 3; RUN: llc < %s -mtriple=arm-eabi -mattr=+v6t2 -mattr=+neon | FileCheck %s --check-prefixes=CHECK,NEON 4 5declare i8 @llvm.fshl.i8(i8, i8, i8) 6declare i16 @llvm.fshl.i16(i16, i16, i16) 7declare i32 @llvm.fshl.i32(i32, i32, i32) 8declare i64 @llvm.fshl.i64(i64, i64, i64) 9declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 10 11declare i8 @llvm.fshr.i8(i8, i8, i8) 12declare i16 @llvm.fshr.i16(i16, i16, i16) 13declare i32 @llvm.fshr.i32(i32, i32, i32) 14declare i64 @llvm.fshr.i64(i64, i64, i64) 15declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 16 17; General case - all operands can be variables. 18 19define i16 @fshl_i16(i16 %x, i16 %y, i16 %z) { 20; CHECK-LABEL: fshl_i16: 21; CHECK: @ %bb.0: 22; CHECK-NEXT: pkhbt r0, r1, r0, lsl #16 23; CHECK-NEXT: and r1, r2, #15 24; CHECK-NEXT: lsl r0, r0, r1 25; CHECK-NEXT: lsr r0, r0, #16 26; CHECK-NEXT: bx lr 27 %f = call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z) 28 ret i16 %f 29} 30 31define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { 32; CHECK-LABEL: fshl_i32: 33; CHECK: @ %bb.0: 34; CHECK-NEXT: mov r3, #31 35; CHECK-NEXT: lsr r1, r1, #1 36; CHECK-NEXT: bic r3, r3, r2 37; CHECK-NEXT: and r2, r2, #31 38; CHECK-NEXT: lsl r0, r0, r2 39; CHECK-NEXT: orr r0, r0, r1, lsr r3 40; CHECK-NEXT: bx lr 41 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) 42 ret i32 %f 43} 44 45; Verify that weird types are minimally supported. 46declare i37 @llvm.fshl.i37(i37, i37, i37) 47define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { 48; SCALAR-LABEL: fshl_i37: 49; SCALAR: @ %bb.0: 50; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} 51; SCALAR-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} 52; SCALAR-NEXT: mov r8, r0 53; SCALAR-NEXT: ldr r0, [sp, #36] 54; SCALAR-NEXT: mov r4, r1 55; SCALAR-NEXT: mov r6, r3 56; SCALAR-NEXT: and r1, r0, #31 57; SCALAR-NEXT: ldr r0, [sp, #32] 58; SCALAR-NEXT: mov r9, r2 59; SCALAR-NEXT: mov r2, #37 60; SCALAR-NEXT: mov r3, #0 61; SCALAR-NEXT: bl __aeabi_uldivmod 62; SCALAR-NEXT: lsl r1, r6, #27 63; SCALAR-NEXT: ands r0, r2, #32 64; SCALAR-NEXT: orr r1, r1, r9, lsr #5 65; SCALAR-NEXT: mov r3, r8 66; SCALAR-NEXT: and r6, r2, #31 67; SCALAR-NEXT: mov r7, #31 68; SCALAR-NEXT: movne r3, r1 69; SCALAR-NEXT: cmp r0, #0 70; SCALAR-NEXT: lslne r1, r9, #27 71; SCALAR-NEXT: bic r2, r7, r2 72; SCALAR-NEXT: movne r4, r8 73; SCALAR-NEXT: lsl r5, r3, r6 74; SCALAR-NEXT: lsr r0, r1, #1 75; SCALAR-NEXT: lsl r1, r4, r6 76; SCALAR-NEXT: lsr r3, r3, #1 77; SCALAR-NEXT: orr r0, r5, r0, lsr r2 78; SCALAR-NEXT: orr r1, r1, r3, lsr r2 79; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} 80; 81; NEON-LABEL: fshl_i37: 82; NEON: @ %bb.0: 83; NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} 84; NEON-NEXT: push {r4, r5, r6, r7, r11, lr} 85; NEON-NEXT: mov r4, r1 86; NEON-NEXT: ldr r1, [sp, #28] 87; NEON-NEXT: mov r6, r0 88; NEON-NEXT: ldr r0, [sp, #24] 89; NEON-NEXT: and r1, r1, #31 90; NEON-NEXT: mov r5, r3 91; NEON-NEXT: mov r7, r2 92; NEON-NEXT: mov r2, #37 93; NEON-NEXT: mov r3, #0 94; NEON-NEXT: bl __aeabi_uldivmod 95; NEON-NEXT: mov r0, #31 96; NEON-NEXT: bic r1, r0, r2 97; NEON-NEXT: lsl r0, r5, #27 98; NEON-NEXT: ands r12, r2, #32 99; NEON-NEXT: orr r0, r0, r7, lsr #5 100; NEON-NEXT: mov r5, r6 101; NEON-NEXT: and r2, r2, #31 102; NEON-NEXT: movne r5, r0 103; NEON-NEXT: lslne r0, r7, #27 104; NEON-NEXT: cmp r12, #0 105; NEON-NEXT: lsl r3, r5, r2 106; NEON-NEXT: lsr r0, r0, #1 107; NEON-NEXT: movne r4, r6 108; NEON-NEXT: orr r0, r3, r0, lsr r1 109; NEON-NEXT: lsr r3, r5, #1 110; NEON-NEXT: lsl r2, r4, r2 111; NEON-NEXT: orr r1, r2, r3, lsr r1 112; NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} 113 %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) 114 ret i37 %f 115} 116 117; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011 118 119declare i7 @llvm.fshl.i7(i7, i7, i7) 120define i7 @fshl_i7_const_fold() { 121; CHECK-LABEL: fshl_i7_const_fold: 122; CHECK: @ %bb.0: 123; CHECK-NEXT: mov r0, #67 124; CHECK-NEXT: bx lr 125 %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2) 126 ret i7 %f 127} 128 129define i8 @fshl_i8_const_fold_overshift_1() { 130; CHECK-LABEL: fshl_i8_const_fold_overshift_1: 131; CHECK: @ %bb.0: 132; CHECK-NEXT: mov r0, #128 133; CHECK-NEXT: bx lr 134 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15) 135 ret i8 %f 136} 137 138define i8 @fshl_i8_const_fold_overshift_2() { 139; CHECK-LABEL: fshl_i8_const_fold_overshift_2: 140; CHECK: @ %bb.0: 141; CHECK-NEXT: mov r0, #120 142; CHECK-NEXT: bx lr 143 %f = call i8 @llvm.fshl.i8(i8 15, i8 15, i8 11) 144 ret i8 %f 145} 146 147define i8 @fshl_i8_const_fold_overshift_3() { 148; CHECK-LABEL: fshl_i8_const_fold_overshift_3: 149; CHECK: @ %bb.0: 150; CHECK-NEXT: mov r0, #0 151; CHECK-NEXT: bx lr 152 %f = call i8 @llvm.fshl.i8(i8 0, i8 225, i8 8) 153 ret i8 %f 154} 155 156; With constant shift amount, this is 'extr'. 157 158define i32 @fshl_i32_const_shift(i32 %x, i32 %y) { 159; CHECK-LABEL: fshl_i32_const_shift: 160; CHECK: @ %bb.0: 161; CHECK-NEXT: lsl r0, r0, #9 162; CHECK-NEXT: orr r0, r0, r1, lsr #23 163; CHECK-NEXT: bx lr 164 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9) 165 ret i32 %f 166} 167 168; Check modulo math on shift amount. 169 170define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) { 171; CHECK-LABEL: fshl_i32_const_overshift: 172; CHECK: @ %bb.0: 173; CHECK-NEXT: lsl r0, r0, #9 174; CHECK-NEXT: orr r0, r0, r1, lsr #23 175; CHECK-NEXT: bx lr 176 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41) 177 ret i32 %f 178} 179 180; 64-bit should also work. 181 182define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) { 183; CHECK-LABEL: fshl_i64_const_overshift: 184; CHECK: @ %bb.0: 185; CHECK-NEXT: lsl r1, r3, #9 186; CHECK-NEXT: orr r2, r1, r2, lsr #23 187; CHECK-NEXT: lsl r0, r0, #9 188; CHECK-NEXT: orr r1, r0, r3, lsr #23 189; CHECK-NEXT: mov r0, r2 190; CHECK-NEXT: bx lr 191 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105) 192 ret i64 %f 193} 194 195; This should work without any node-specific logic. 196 197define i8 @fshl_i8_const_fold() { 198; CHECK-LABEL: fshl_i8_const_fold: 199; CHECK: @ %bb.0: 200; CHECK-NEXT: mov r0, #128 201; CHECK-NEXT: bx lr 202 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7) 203 ret i8 %f 204} 205 206; Repeat everything for funnel shift right. 207 208; General case - all operands can be variables. 209 210define i16 @fshr_i16(i16 %x, i16 %y, i16 %z) { 211; CHECK-LABEL: fshr_i16: 212; CHECK: @ %bb.0: 213; CHECK-NEXT: pkhbt r0, r1, r0, lsl #16 214; CHECK-NEXT: and r1, r2, #15 215; CHECK-NEXT: lsr r0, r0, r1 216; CHECK-NEXT: bx lr 217 %f = call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 %z) 218 ret i16 %f 219} 220 221define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { 222; CHECK-LABEL: fshr_i32: 223; CHECK: @ %bb.0: 224; CHECK-NEXT: mov r3, #31 225; CHECK-NEXT: lsl r0, r0, #1 226; CHECK-NEXT: bic r3, r3, r2 227; CHECK-NEXT: and r2, r2, #31 228; CHECK-NEXT: lsl r0, r0, r3 229; CHECK-NEXT: orr r0, r0, r1, lsr r2 230; CHECK-NEXT: bx lr 231 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) 232 ret i32 %f 233} 234 235; Verify that weird types are minimally supported. 236declare i37 @llvm.fshr.i37(i37, i37, i37) 237define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { 238; SCALAR-LABEL: fshr_i37: 239; SCALAR: @ %bb.0: 240; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, lr} 241; SCALAR-NEXT: push {r4, r5, r6, r7, r8, lr} 242; SCALAR-NEXT: mov r8, r0 243; SCALAR-NEXT: ldr r0, [sp, #28] 244; SCALAR-NEXT: mov r4, r1 245; SCALAR-NEXT: mov r5, r3 246; SCALAR-NEXT: and r1, r0, #31 247; SCALAR-NEXT: ldr r0, [sp, #24] 248; SCALAR-NEXT: mov r7, r2 249; SCALAR-NEXT: mov r2, #37 250; SCALAR-NEXT: mov r3, #0 251; SCALAR-NEXT: bl __aeabi_uldivmod 252; SCALAR-NEXT: lsl r3, r5, #27 253; SCALAR-NEXT: add r0, r2, #27 254; SCALAR-NEXT: orr r3, r3, r7, lsr #5 255; SCALAR-NEXT: ands r2, r0, #32 256; SCALAR-NEXT: mov r5, r8 257; SCALAR-NEXT: mov r1, #31 258; SCALAR-NEXT: moveq r5, r3 259; SCALAR-NEXT: lsleq r3, r7, #27 260; SCALAR-NEXT: cmp r2, #0 261; SCALAR-NEXT: bic r1, r1, r0 262; SCALAR-NEXT: moveq r4, r8 263; SCALAR-NEXT: lsl r6, r5, #1 264; SCALAR-NEXT: and r7, r0, #31 265; SCALAR-NEXT: lsl r2, r4, #1 266; SCALAR-NEXT: lsl r6, r6, r1 267; SCALAR-NEXT: lsl r1, r2, r1 268; SCALAR-NEXT: orr r0, r6, r3, lsr r7 269; SCALAR-NEXT: orr r1, r1, r5, lsr r7 270; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, pc} 271; 272; NEON-LABEL: fshr_i37: 273; NEON: @ %bb.0: 274; NEON-NEXT: .save {r4, r5, r6, r7, r8, lr} 275; NEON-NEXT: push {r4, r5, r6, r7, r8, lr} 276; NEON-NEXT: mov r4, r1 277; NEON-NEXT: ldr r1, [sp, #28] 278; NEON-NEXT: mov r8, r0 279; NEON-NEXT: ldr r0, [sp, #24] 280; NEON-NEXT: and r1, r1, #31 281; NEON-NEXT: mov r5, r3 282; NEON-NEXT: mov r7, r2 283; NEON-NEXT: mov r2, #37 284; NEON-NEXT: mov r3, #0 285; NEON-NEXT: bl __aeabi_uldivmod 286; NEON-NEXT: lsl r3, r5, #27 287; NEON-NEXT: add r0, r2, #27 288; NEON-NEXT: orr r3, r3, r7, lsr #5 289; NEON-NEXT: ands r2, r0, #32 290; NEON-NEXT: mov r5, r8 291; NEON-NEXT: mov r1, #31 292; NEON-NEXT: moveq r5, r3 293; NEON-NEXT: lsleq r3, r7, #27 294; NEON-NEXT: cmp r2, #0 295; NEON-NEXT: bic r1, r1, r0 296; NEON-NEXT: moveq r4, r8 297; NEON-NEXT: lsl r6, r5, #1 298; NEON-NEXT: and r7, r0, #31 299; NEON-NEXT: lsl r2, r4, #1 300; NEON-NEXT: lsl r6, r6, r1 301; NEON-NEXT: lsl r1, r2, r1 302; NEON-NEXT: orr r0, r6, r3, lsr r7 303; NEON-NEXT: orr r1, r1, r5, lsr r7 304; NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} 305 %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) 306 ret i37 %f 307} 308 309; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111 310 311declare i7 @llvm.fshr.i7(i7, i7, i7) 312define i7 @fshr_i7_const_fold() { 313; CHECK-LABEL: fshr_i7_const_fold: 314; CHECK: @ %bb.0: 315; CHECK-NEXT: mov r0, #31 316; CHECK-NEXT: bx lr 317 %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2) 318 ret i7 %f 319} 320 321define i8 @fshr_i8_const_fold_overshift_1() { 322; CHECK-LABEL: fshr_i8_const_fold_overshift_1: 323; CHECK: @ %bb.0: 324; CHECK-NEXT: mov r0, #254 325; CHECK-NEXT: bx lr 326 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15) 327 ret i8 %f 328} 329 330define i8 @fshr_i8_const_fold_overshift_2() { 331; CHECK-LABEL: fshr_i8_const_fold_overshift_2: 332; CHECK: @ %bb.0: 333; CHECK-NEXT: mov r0, #225 334; CHECK-NEXT: bx lr 335 %f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11) 336 ret i8 %f 337} 338 339define i8 @fshr_i8_const_fold_overshift_3() { 340; CHECK-LABEL: fshr_i8_const_fold_overshift_3: 341; CHECK: @ %bb.0: 342; CHECK-NEXT: mov r0, #255 343; CHECK-NEXT: bx lr 344 %f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8) 345 ret i8 %f 346} 347 348; With constant shift amount, this is 'extr'. 349 350define i32 @fshr_i32_const_shift(i32 %x, i32 %y) { 351; CHECK-LABEL: fshr_i32_const_shift: 352; CHECK: @ %bb.0: 353; CHECK-NEXT: lsl r0, r0, #23 354; CHECK-NEXT: orr r0, r0, r1, lsr #9 355; CHECK-NEXT: bx lr 356 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9) 357 ret i32 %f 358} 359 360; Check modulo math on shift amount. 41-32=9. 361 362define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) { 363; CHECK-LABEL: fshr_i32_const_overshift: 364; CHECK: @ %bb.0: 365; CHECK-NEXT: lsl r0, r0, #23 366; CHECK-NEXT: orr r0, r0, r1, lsr #9 367; CHECK-NEXT: bx lr 368 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41) 369 ret i32 %f 370} 371 372; 64-bit should also work. 105-64 = 41. 373 374define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) { 375; CHECK-LABEL: fshr_i64_const_overshift: 376; CHECK: @ %bb.0: 377; CHECK-NEXT: lsl r2, r0, #23 378; CHECK-NEXT: lsl r1, r1, #23 379; CHECK-NEXT: orr r2, r2, r3, lsr #9 380; CHECK-NEXT: orr r1, r1, r0, lsr #9 381; CHECK-NEXT: mov r0, r2 382; CHECK-NEXT: bx lr 383 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105) 384 ret i64 %f 385} 386 387; This should work without any node-specific logic. 388 389define i8 @fshr_i8_const_fold() { 390; CHECK-LABEL: fshr_i8_const_fold: 391; CHECK: @ %bb.0: 392; CHECK-NEXT: mov r0, #254 393; CHECK-NEXT: bx lr 394 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7) 395 ret i8 %f 396} 397 398define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) { 399; CHECK-LABEL: fshl_i32_shift_by_bitwidth: 400; CHECK: @ %bb.0: 401; CHECK-NEXT: bx lr 402 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32) 403 ret i32 %f 404} 405 406define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) { 407; CHECK-LABEL: fshr_i32_shift_by_bitwidth: 408; CHECK: @ %bb.0: 409; CHECK-NEXT: mov r0, r1 410; CHECK-NEXT: bx lr 411 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32) 412 ret i32 %f 413} 414 415define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { 416; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth: 417; CHECK: @ %bb.0: 418; CHECK-NEXT: bx lr 419 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 420 ret <4 x i32> %f 421} 422 423define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { 424; SCALAR-LABEL: fshr_v4i32_shift_by_bitwidth: 425; SCALAR: @ %bb.0: 426; SCALAR-NEXT: ldm sp, {r0, r1, r2, r3} 427; SCALAR-NEXT: bx lr 428; 429; NEON-LABEL: fshr_v4i32_shift_by_bitwidth: 430; NEON: @ %bb.0: 431; NEON-NEXT: mov r0, sp 432; NEON-NEXT: vld1.64 {d16, d17}, [r0] 433; NEON-NEXT: vmov r0, r1, d16 434; NEON-NEXT: vmov r2, r3, d17 435; NEON-NEXT: bx lr 436 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 437 ret <4 x i32> %f 438} 439 440