1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_32 3; RUN: llc < %s -mtriple=ppc32-- -mcpu=ppc64 | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_64 4; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s --check-prefixes=CHECK,CHECK64 5 6declare i8 @llvm.fshl.i8(i8, i8, i8) 7declare i16 @llvm.fshl.i16(i16, i16, i16) 8declare i32 @llvm.fshl.i32(i32, i32, i32) 9declare i64 @llvm.fshl.i64(i64, i64, i64) 10declare i128 @llvm.fshl.i128(i128, i128, i128) 11declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 12 13declare i8 @llvm.fshr.i8(i8, i8, i8) 14declare i16 @llvm.fshr.i16(i16, i16, i16) 15declare i32 @llvm.fshr.i32(i32, i32, i32) 16declare i64 @llvm.fshr.i64(i64, i64, i64) 17declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 18 19; General case - all operands can be variables. 20 21define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { 22; CHECK-LABEL: fshl_i32: 23; CHECK: # %bb.0: 24; CHECK-NEXT: clrlwi 5, 5, 27 25; CHECK-NEXT: slw 3, 3, 5 26; CHECK-NEXT: subfic 5, 5, 32 27; CHECK-NEXT: srw 4, 4, 5 28; CHECK-NEXT: or 3, 3, 4 29; CHECK-NEXT: blr 30 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) 31 ret i32 %f 32} 33 34define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) { 35; CHECK32-LABEL: fshl_i64: 36; CHECK32: # %bb.0: 37; CHECK32-NEXT: andi. 7, 8, 32 38; CHECK32-NEXT: mr 7, 5 39; CHECK32-NEXT: bne 0, .LBB1_2 40; CHECK32-NEXT: # %bb.1: 41; CHECK32-NEXT: mr 7, 4 42; CHECK32-NEXT: .LBB1_2: 43; CHECK32-NEXT: clrlwi 8, 8, 27 44; CHECK32-NEXT: subfic 9, 8, 32 45; CHECK32-NEXT: srw 10, 7, 9 46; CHECK32-NEXT: bne 0, .LBB1_4 47; CHECK32-NEXT: # %bb.3: 48; CHECK32-NEXT: mr 4, 3 49; CHECK32-NEXT: .LBB1_4: 50; CHECK32-NEXT: slw 3, 4, 8 51; CHECK32-NEXT: or 3, 3, 10 52; CHECK32-NEXT: bne 0, .LBB1_6 53; CHECK32-NEXT: # %bb.5: 54; CHECK32-NEXT: mr 6, 5 55; CHECK32-NEXT: .LBB1_6: 56; CHECK32-NEXT: srw 4, 6, 9 57; CHECK32-NEXT: slw 5, 7, 8 58; CHECK32-NEXT: or 4, 5, 4 59; CHECK32-NEXT: blr 60; 61; CHECK64-LABEL: fshl_i64: 62; CHECK64: # %bb.0: 63; CHECK64-NEXT: clrlwi 5, 5, 26 64; CHECK64-NEXT: sld 3, 3, 5 65; CHECK64-NEXT: subfic 5, 5, 64 66; CHECK64-NEXT: srd 4, 4, 5 67; CHECK64-NEXT: or 3, 3, 4 68; CHECK64-NEXT: blr 69 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z) 70 ret i64 %f 71} 72 73define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind { 74; CHECK32_32-LABEL: fshl_i128: 75; CHECK32_32: # %bb.0: 76; CHECK32_32-NEXT: stwu 1, -32(1) 77; CHECK32_32-NEXT: lwz 12, 52(1) 78; CHECK32_32-NEXT: stw 29, 20(1) # 4-byte Folded Spill 79; CHECK32_32-NEXT: andi. 11, 12, 64 80; CHECK32_32-NEXT: mcrf 1, 0 81; CHECK32_32-NEXT: mr 11, 6 82; CHECK32_32-NEXT: stw 30, 24(1) # 4-byte Folded Spill 83; CHECK32_32-NEXT: bne 0, .LBB2_2 84; CHECK32_32-NEXT: # %bb.1: 85; CHECK32_32-NEXT: mr 11, 4 86; CHECK32_32-NEXT: .LBB2_2: 87; CHECK32_32-NEXT: mr 30, 7 88; CHECK32_32-NEXT: bne 1, .LBB2_4 89; CHECK32_32-NEXT: # %bb.3: 90; CHECK32_32-NEXT: mr 30, 5 91; CHECK32_32-NEXT: .LBB2_4: 92; CHECK32_32-NEXT: andi. 4, 12, 32 93; CHECK32_32-NEXT: mr 4, 30 94; CHECK32_32-NEXT: beq 0, .LBB2_18 95; CHECK32_32-NEXT: # %bb.5: 96; CHECK32_32-NEXT: beq 1, .LBB2_19 97; CHECK32_32-NEXT: .LBB2_6: 98; CHECK32_32-NEXT: beq 0, .LBB2_20 99; CHECK32_32-NEXT: .LBB2_7: 100; CHECK32_32-NEXT: mr 5, 8 101; CHECK32_32-NEXT: beq 1, .LBB2_21 102; CHECK32_32-NEXT: .LBB2_8: 103; CHECK32_32-NEXT: mr 3, 5 104; CHECK32_32-NEXT: beq 0, .LBB2_22 105; CHECK32_32-NEXT: .LBB2_9: 106; CHECK32_32-NEXT: clrlwi 6, 12, 27 107; CHECK32_32-NEXT: bne 1, .LBB2_11 108; CHECK32_32-NEXT: .LBB2_10: 109; CHECK32_32-NEXT: mr 9, 7 110; CHECK32_32-NEXT: .LBB2_11: 111; CHECK32_32-NEXT: subfic 7, 6, 32 112; CHECK32_32-NEXT: mr 12, 9 113; CHECK32_32-NEXT: bne 0, .LBB2_13 114; CHECK32_32-NEXT: # %bb.12: 115; CHECK32_32-NEXT: mr 12, 5 116; CHECK32_32-NEXT: .LBB2_13: 117; CHECK32_32-NEXT: srw 5, 4, 7 118; CHECK32_32-NEXT: slw 11, 11, 6 119; CHECK32_32-NEXT: srw 0, 3, 7 120; CHECK32_32-NEXT: slw 4, 4, 6 121; CHECK32_32-NEXT: srw 30, 12, 7 122; CHECK32_32-NEXT: slw 29, 3, 6 123; CHECK32_32-NEXT: bne 1, .LBB2_15 124; CHECK32_32-NEXT: # %bb.14: 125; CHECK32_32-NEXT: mr 10, 8 126; CHECK32_32-NEXT: .LBB2_15: 127; CHECK32_32-NEXT: or 3, 11, 5 128; CHECK32_32-NEXT: or 4, 4, 0 129; CHECK32_32-NEXT: or 5, 29, 30 130; CHECK32_32-NEXT: bne 0, .LBB2_17 131; CHECK32_32-NEXT: # %bb.16: 132; CHECK32_32-NEXT: mr 10, 9 133; CHECK32_32-NEXT: .LBB2_17: 134; CHECK32_32-NEXT: srw 7, 10, 7 135; CHECK32_32-NEXT: slw 6, 12, 6 136; CHECK32_32-NEXT: or 6, 6, 7 137; CHECK32_32-NEXT: lwz 30, 24(1) # 4-byte Folded Reload 138; CHECK32_32-NEXT: lwz 29, 20(1) # 4-byte Folded Reload 139; CHECK32_32-NEXT: addi 1, 1, 32 140; CHECK32_32-NEXT: blr 141; CHECK32_32-NEXT: .LBB2_18: 142; CHECK32_32-NEXT: mr 4, 11 143; CHECK32_32-NEXT: bne 1, .LBB2_6 144; CHECK32_32-NEXT: .LBB2_19: 145; CHECK32_32-NEXT: mr 5, 3 146; CHECK32_32-NEXT: bne 0, .LBB2_7 147; CHECK32_32-NEXT: .LBB2_20: 148; CHECK32_32-NEXT: mr 11, 5 149; CHECK32_32-NEXT: mr 5, 8 150; CHECK32_32-NEXT: bne 1, .LBB2_8 151; CHECK32_32-NEXT: .LBB2_21: 152; CHECK32_32-NEXT: mr 5, 6 153; CHECK32_32-NEXT: mr 3, 5 154; CHECK32_32-NEXT: bne 0, .LBB2_9 155; CHECK32_32-NEXT: .LBB2_22: 156; CHECK32_32-NEXT: mr 3, 30 157; CHECK32_32-NEXT: clrlwi 6, 12, 27 158; CHECK32_32-NEXT: beq 1, .LBB2_10 159; CHECK32_32-NEXT: b .LBB2_11 160; 161; CHECK32_64-LABEL: fshl_i128: 162; CHECK32_64: # %bb.0: 163; CHECK32_64-NEXT: stwu 1, -32(1) 164; CHECK32_64-NEXT: lwz 12, 52(1) 165; CHECK32_64-NEXT: andi. 11, 12, 64 166; CHECK32_64-NEXT: stw 29, 20(1) # 4-byte Folded Spill 167; CHECK32_64-NEXT: mcrf 1, 0 168; CHECK32_64-NEXT: mr 11, 6 169; CHECK32_64-NEXT: stw 30, 24(1) # 4-byte Folded Spill 170; CHECK32_64-NEXT: bne 0, .LBB2_2 171; CHECK32_64-NEXT: # %bb.1: 172; CHECK32_64-NEXT: mr 11, 4 173; CHECK32_64-NEXT: .LBB2_2: 174; CHECK32_64-NEXT: mr 30, 7 175; CHECK32_64-NEXT: bne 1, .LBB2_4 176; CHECK32_64-NEXT: # %bb.3: 177; CHECK32_64-NEXT: mr 30, 5 178; CHECK32_64-NEXT: .LBB2_4: 179; CHECK32_64-NEXT: andi. 4, 12, 32 180; CHECK32_64-NEXT: mr 4, 30 181; CHECK32_64-NEXT: beq 0, .LBB2_18 182; CHECK32_64-NEXT: # %bb.5: 183; CHECK32_64-NEXT: beq 1, .LBB2_19 184; CHECK32_64-NEXT: .LBB2_6: 185; CHECK32_64-NEXT: beq 0, .LBB2_20 186; CHECK32_64-NEXT: .LBB2_7: 187; CHECK32_64-NEXT: mr 5, 8 188; CHECK32_64-NEXT: beq 1, .LBB2_21 189; CHECK32_64-NEXT: .LBB2_8: 190; CHECK32_64-NEXT: mr 3, 5 191; CHECK32_64-NEXT: beq 0, .LBB2_22 192; CHECK32_64-NEXT: .LBB2_9: 193; CHECK32_64-NEXT: clrlwi 6, 12, 27 194; CHECK32_64-NEXT: bne 1, .LBB2_11 195; CHECK32_64-NEXT: .LBB2_10: 196; CHECK32_64-NEXT: mr 9, 7 197; CHECK32_64-NEXT: .LBB2_11: 198; CHECK32_64-NEXT: subfic 7, 6, 32 199; CHECK32_64-NEXT: mr 12, 9 200; CHECK32_64-NEXT: bne 0, .LBB2_13 201; CHECK32_64-NEXT: # %bb.12: 202; CHECK32_64-NEXT: mr 12, 5 203; CHECK32_64-NEXT: .LBB2_13: 204; CHECK32_64-NEXT: srw 5, 4, 7 205; CHECK32_64-NEXT: slw 11, 11, 6 206; CHECK32_64-NEXT: srw 0, 3, 7 207; CHECK32_64-NEXT: slw 4, 4, 6 208; CHECK32_64-NEXT: srw 30, 12, 7 209; CHECK32_64-NEXT: slw 29, 3, 6 210; CHECK32_64-NEXT: bne 1, .LBB2_15 211; CHECK32_64-NEXT: # %bb.14: 212; CHECK32_64-NEXT: mr 10, 8 213; CHECK32_64-NEXT: .LBB2_15: 214; CHECK32_64-NEXT: or 3, 11, 5 215; CHECK32_64-NEXT: or 4, 4, 0 216; CHECK32_64-NEXT: or 5, 29, 30 217; CHECK32_64-NEXT: bne 0, .LBB2_17 218; CHECK32_64-NEXT: # %bb.16: 219; CHECK32_64-NEXT: mr 10, 9 220; CHECK32_64-NEXT: .LBB2_17: 221; CHECK32_64-NEXT: srw 7, 10, 7 222; CHECK32_64-NEXT: slw 6, 12, 6 223; CHECK32_64-NEXT: lwz 30, 24(1) # 4-byte Folded Reload 224; CHECK32_64-NEXT: or 6, 6, 7 225; CHECK32_64-NEXT: lwz 29, 20(1) # 4-byte Folded Reload 226; CHECK32_64-NEXT: addi 1, 1, 32 227; CHECK32_64-NEXT: blr 228; CHECK32_64-NEXT: .LBB2_18: 229; CHECK32_64-NEXT: mr 4, 11 230; CHECK32_64-NEXT: bne 1, .LBB2_6 231; CHECK32_64-NEXT: .LBB2_19: 232; CHECK32_64-NEXT: mr 5, 3 233; CHECK32_64-NEXT: bne 0, .LBB2_7 234; CHECK32_64-NEXT: .LBB2_20: 235; CHECK32_64-NEXT: mr 11, 5 236; CHECK32_64-NEXT: mr 5, 8 237; CHECK32_64-NEXT: bne 1, .LBB2_8 238; CHECK32_64-NEXT: .LBB2_21: 239; CHECK32_64-NEXT: mr 5, 6 240; CHECK32_64-NEXT: mr 3, 5 241; CHECK32_64-NEXT: bne 0, .LBB2_9 242; CHECK32_64-NEXT: .LBB2_22: 243; CHECK32_64-NEXT: mr 3, 30 244; CHECK32_64-NEXT: clrlwi 6, 12, 27 245; CHECK32_64-NEXT: beq 1, .LBB2_10 246; CHECK32_64-NEXT: b .LBB2_11 247; 248; CHECK64-LABEL: fshl_i128: 249; CHECK64: # %bb.0: 250; CHECK64-NEXT: andi. 8, 7, 64 251; CHECK64-NEXT: clrlwi 7, 7, 26 252; CHECK64-NEXT: subfic 8, 7, 64 253; CHECK64-NEXT: iseleq 5, 6, 5 254; CHECK64-NEXT: iseleq 6, 3, 6 255; CHECK64-NEXT: iseleq 3, 4, 3 256; CHECK64-NEXT: srd 5, 5, 8 257; CHECK64-NEXT: sld 9, 6, 7 258; CHECK64-NEXT: srd 6, 6, 8 259; CHECK64-NEXT: sld 3, 3, 7 260; CHECK64-NEXT: or 5, 9, 5 261; CHECK64-NEXT: or 4, 3, 6 262; CHECK64-NEXT: mr 3, 5 263; CHECK64-NEXT: blr 264 %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z) 265 ret i128 %f 266} 267 268; Verify that weird types are minimally supported. 269declare i37 @llvm.fshl.i37(i37, i37, i37) 270define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { 271; CHECK32_32-LABEL: fshl_i37: 272; CHECK32_32: # %bb.0: 273; CHECK32_32-NEXT: mflr 0 274; CHECK32_32-NEXT: stwu 1, -32(1) 275; CHECK32_32-NEXT: stw 0, 36(1) 276; CHECK32_32-NEXT: .cfi_def_cfa_offset 32 277; CHECK32_32-NEXT: .cfi_offset lr, 4 278; CHECK32_32-NEXT: .cfi_offset r27, -20 279; CHECK32_32-NEXT: .cfi_offset r28, -16 280; CHECK32_32-NEXT: .cfi_offset r29, -12 281; CHECK32_32-NEXT: .cfi_offset r30, -8 282; CHECK32_32-NEXT: stw 27, 12(1) # 4-byte Folded Spill 283; CHECK32_32-NEXT: mr 27, 5 284; CHECK32_32-NEXT: stw 28, 16(1) # 4-byte Folded Spill 285; CHECK32_32-NEXT: mr 28, 3 286; CHECK32_32-NEXT: stw 29, 20(1) # 4-byte Folded Spill 287; CHECK32_32-NEXT: mr 29, 4 288; CHECK32_32-NEXT: stw 30, 24(1) # 4-byte Folded Spill 289; CHECK32_32-NEXT: mr 30, 6 290; CHECK32_32-NEXT: clrlwi 3, 7, 27 291; CHECK32_32-NEXT: mr 4, 8 292; CHECK32_32-NEXT: li 5, 0 293; CHECK32_32-NEXT: li 6, 37 294; CHECK32_32-NEXT: bl __umoddi3 295; CHECK32_32-NEXT: rotlwi 5, 30, 27 296; CHECK32_32-NEXT: rlwimi 5, 27, 27, 0, 4 297; CHECK32_32-NEXT: andi. 3, 4, 32 298; CHECK32_32-NEXT: mr 6, 5 299; CHECK32_32-NEXT: bne 0, .LBB3_2 300; CHECK32_32-NEXT: # %bb.1: 301; CHECK32_32-NEXT: mr 6, 29 302; CHECK32_32-NEXT: .LBB3_2: 303; CHECK32_32-NEXT: clrlwi 4, 4, 27 304; CHECK32_32-NEXT: subfic 7, 4, 32 305; CHECK32_32-NEXT: srw 3, 6, 7 306; CHECK32_32-NEXT: bne 0, .LBB3_4 307; CHECK32_32-NEXT: # %bb.3: 308; CHECK32_32-NEXT: mr 29, 28 309; CHECK32_32-NEXT: .LBB3_4: 310; CHECK32_32-NEXT: slw 8, 29, 4 311; CHECK32_32-NEXT: or 3, 8, 3 312; CHECK32_32-NEXT: beq 0, .LBB3_6 313; CHECK32_32-NEXT: # %bb.5: 314; CHECK32_32-NEXT: slwi 5, 30, 27 315; CHECK32_32-NEXT: .LBB3_6: 316; CHECK32_32-NEXT: srw 5, 5, 7 317; CHECK32_32-NEXT: slw 4, 6, 4 318; CHECK32_32-NEXT: or 4, 4, 5 319; CHECK32_32-NEXT: lwz 30, 24(1) # 4-byte Folded Reload 320; CHECK32_32-NEXT: lwz 29, 20(1) # 4-byte Folded Reload 321; CHECK32_32-NEXT: lwz 28, 16(1) # 4-byte Folded Reload 322; CHECK32_32-NEXT: lwz 27, 12(1) # 4-byte Folded Reload 323; CHECK32_32-NEXT: lwz 0, 36(1) 324; CHECK32_32-NEXT: addi 1, 1, 32 325; CHECK32_32-NEXT: mtlr 0 326; CHECK32_32-NEXT: blr 327; 328; CHECK32_64-LABEL: fshl_i37: 329; CHECK32_64: # %bb.0: 330; CHECK32_64-NEXT: mflr 0 331; CHECK32_64-NEXT: stwu 1, -32(1) 332; CHECK32_64-NEXT: stw 0, 36(1) 333; CHECK32_64-NEXT: .cfi_def_cfa_offset 32 334; CHECK32_64-NEXT: .cfi_offset lr, 4 335; CHECK32_64-NEXT: .cfi_offset r27, -20 336; CHECK32_64-NEXT: .cfi_offset r28, -16 337; CHECK32_64-NEXT: .cfi_offset r29, -12 338; CHECK32_64-NEXT: .cfi_offset r30, -8 339; CHECK32_64-NEXT: stw 27, 12(1) # 4-byte Folded Spill 340; CHECK32_64-NEXT: mr 27, 5 341; CHECK32_64-NEXT: li 5, 0 342; CHECK32_64-NEXT: stw 28, 16(1) # 4-byte Folded Spill 343; CHECK32_64-NEXT: mr 28, 3 344; CHECK32_64-NEXT: clrlwi 3, 7, 27 345; CHECK32_64-NEXT: stw 29, 20(1) # 4-byte Folded Spill 346; CHECK32_64-NEXT: mr 29, 4 347; CHECK32_64-NEXT: mr 4, 8 348; CHECK32_64-NEXT: stw 30, 24(1) # 4-byte Folded Spill 349; CHECK32_64-NEXT: mr 30, 6 350; CHECK32_64-NEXT: li 6, 37 351; CHECK32_64-NEXT: bl __umoddi3 352; CHECK32_64-NEXT: rotlwi 5, 30, 27 353; CHECK32_64-NEXT: andi. 3, 4, 32 354; CHECK32_64-NEXT: rlwimi 5, 27, 27, 0, 4 355; CHECK32_64-NEXT: mr 6, 5 356; CHECK32_64-NEXT: bne 0, .LBB3_2 357; CHECK32_64-NEXT: # %bb.1: 358; CHECK32_64-NEXT: mr 6, 29 359; CHECK32_64-NEXT: .LBB3_2: 360; CHECK32_64-NEXT: clrlwi 4, 4, 27 361; CHECK32_64-NEXT: subfic 7, 4, 32 362; CHECK32_64-NEXT: srw 3, 6, 7 363; CHECK32_64-NEXT: bne 0, .LBB3_4 364; CHECK32_64-NEXT: # %bb.3: 365; CHECK32_64-NEXT: mr 29, 28 366; CHECK32_64-NEXT: .LBB3_4: 367; CHECK32_64-NEXT: slw 8, 29, 4 368; CHECK32_64-NEXT: or 3, 8, 3 369; CHECK32_64-NEXT: beq 0, .LBB3_6 370; CHECK32_64-NEXT: # %bb.5: 371; CHECK32_64-NEXT: slwi 5, 30, 27 372; CHECK32_64-NEXT: .LBB3_6: 373; CHECK32_64-NEXT: srw 5, 5, 7 374; CHECK32_64-NEXT: slw 4, 6, 4 375; CHECK32_64-NEXT: lwz 30, 24(1) # 4-byte Folded Reload 376; CHECK32_64-NEXT: or 4, 4, 5 377; CHECK32_64-NEXT: lwz 29, 20(1) # 4-byte Folded Reload 378; CHECK32_64-NEXT: lwz 28, 16(1) # 4-byte Folded Reload 379; CHECK32_64-NEXT: lwz 27, 12(1) # 4-byte Folded Reload 380; CHECK32_64-NEXT: lwz 0, 36(1) 381; CHECK32_64-NEXT: addi 1, 1, 32 382; CHECK32_64-NEXT: mtlr 0 383; CHECK32_64-NEXT: blr 384; 385; CHECK64-LABEL: fshl_i37: 386; CHECK64: # %bb.0: 387; CHECK64-NEXT: lis 7, 1771 388; CHECK64-NEXT: clrldi 6, 5, 27 389; CHECK64-NEXT: sldi 4, 4, 27 390; CHECK64-NEXT: ori 7, 7, 15941 391; CHECK64-NEXT: rldic 7, 7, 32, 5 392; CHECK64-NEXT: oris 7, 7, 12398 393; CHECK64-NEXT: ori 7, 7, 46053 394; CHECK64-NEXT: mulhdu 6, 6, 7 395; CHECK64-NEXT: mulli 6, 6, 37 396; CHECK64-NEXT: sub 5, 5, 6 397; CHECK64-NEXT: clrlwi 5, 5, 26 398; CHECK64-NEXT: sld 3, 3, 5 399; CHECK64-NEXT: subfic 5, 5, 64 400; CHECK64-NEXT: srd 4, 4, 5 401; CHECK64-NEXT: or 3, 3, 4 402; CHECK64-NEXT: blr 403 %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) 404 ret i37 %f 405} 406 407; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011 408 409declare i7 @llvm.fshl.i7(i7, i7, i7) 410define i7 @fshl_i7_const_fold() { 411; CHECK-LABEL: fshl_i7_const_fold: 412; CHECK: # %bb.0: 413; CHECK-NEXT: li 3, 67 414; CHECK-NEXT: blr 415 %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2) 416 ret i7 %f 417} 418 419; With constant shift amount, this is rotate + insert (missing extended mnemonics). 420 421define i32 @fshl_i32_const_shift(i32 %x, i32 %y) { 422; CHECK-LABEL: fshl_i32_const_shift: 423; CHECK: # %bb.0: 424; CHECK-NEXT: rotlwi 4, 4, 9 425; CHECK-NEXT: rlwimi 4, 3, 9, 0, 22 426; CHECK-NEXT: mr 3, 4 427; CHECK-NEXT: blr 428 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9) 429 ret i32 %f 430} 431 432; Check modulo math on shift amount. 433 434define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) { 435; CHECK-LABEL: fshl_i32_const_overshift: 436; CHECK: # %bb.0: 437; CHECK-NEXT: rotlwi 4, 4, 9 438; CHECK-NEXT: rlwimi 4, 3, 9, 0, 22 439; CHECK-NEXT: mr 3, 4 440; CHECK-NEXT: blr 441 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41) 442 ret i32 %f 443} 444 445; 64-bit should also work. 446 447define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) { 448; CHECK32-LABEL: fshl_i64_const_overshift: 449; CHECK32: # %bb.0: 450; CHECK32-NEXT: rotlwi 6, 6, 9 451; CHECK32-NEXT: rotlwi 3, 5, 9 452; CHECK32-NEXT: rlwimi 6, 5, 9, 0, 22 453; CHECK32-NEXT: rlwimi 3, 4, 9, 0, 22 454; CHECK32-NEXT: mr 4, 6 455; CHECK32-NEXT: blr 456; 457; CHECK64-LABEL: fshl_i64_const_overshift: 458; CHECK64: # %bb.0: 459; CHECK64-NEXT: rotldi 4, 4, 41 460; CHECK64-NEXT: rldimi 4, 3, 41, 0 461; CHECK64-NEXT: mr 3, 4 462; CHECK64-NEXT: blr 463 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105) 464 ret i64 %f 465} 466 467; This should work without any node-specific logic. 468 469define i8 @fshl_i8_const_fold() { 470; CHECK-LABEL: fshl_i8_const_fold: 471; CHECK: # %bb.0: 472; CHECK-NEXT: li 3, 128 473; CHECK-NEXT: blr 474 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7) 475 ret i8 %f 476} 477 478; Repeat everything for funnel shift right. 479 480; General case - all operands can be variables. 481 482define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { 483; CHECK-LABEL: fshr_i32: 484; CHECK: # %bb.0: 485; CHECK-NEXT: clrlwi 5, 5, 27 486; CHECK-NEXT: srw 4, 4, 5 487; CHECK-NEXT: subfic 5, 5, 32 488; CHECK-NEXT: slw 3, 3, 5 489; CHECK-NEXT: or 3, 3, 4 490; CHECK-NEXT: blr 491 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) 492 ret i32 %f 493} 494 495define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) { 496; CHECK32-LABEL: fshr_i64: 497; CHECK32: # %bb.0: 498; CHECK32-NEXT: andi. 7, 8, 32 499; CHECK32-NEXT: mr 7, 5 500; CHECK32-NEXT: beq 0, .LBB10_2 501; CHECK32-NEXT: # %bb.1: 502; CHECK32-NEXT: mr 7, 4 503; CHECK32-NEXT: .LBB10_2: 504; CHECK32-NEXT: clrlwi 8, 8, 27 505; CHECK32-NEXT: srw 10, 7, 8 506; CHECK32-NEXT: beq 0, .LBB10_4 507; CHECK32-NEXT: # %bb.3: 508; CHECK32-NEXT: mr 4, 3 509; CHECK32-NEXT: .LBB10_4: 510; CHECK32-NEXT: subfic 9, 8, 32 511; CHECK32-NEXT: slw 3, 4, 9 512; CHECK32-NEXT: or 3, 3, 10 513; CHECK32-NEXT: beq 0, .LBB10_6 514; CHECK32-NEXT: # %bb.5: 515; CHECK32-NEXT: mr 6, 5 516; CHECK32-NEXT: .LBB10_6: 517; CHECK32-NEXT: srw 4, 6, 8 518; CHECK32-NEXT: slw 5, 7, 9 519; CHECK32-NEXT: or 4, 5, 4 520; CHECK32-NEXT: blr 521; 522; CHECK64-LABEL: fshr_i64: 523; CHECK64: # %bb.0: 524; CHECK64-NEXT: clrlwi 5, 5, 26 525; CHECK64-NEXT: srd 4, 4, 5 526; CHECK64-NEXT: subfic 5, 5, 64 527; CHECK64-NEXT: sld 3, 3, 5 528; CHECK64-NEXT: or 3, 3, 4 529; CHECK64-NEXT: blr 530 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z) 531 ret i64 %f 532} 533 534; Verify that weird types are minimally supported. 535declare i37 @llvm.fshr.i37(i37, i37, i37) 536define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { 537; CHECK32_32-LABEL: fshr_i37: 538; CHECK32_32: # %bb.0: 539; CHECK32_32-NEXT: mflr 0 540; CHECK32_32-NEXT: stwu 1, -32(1) 541; CHECK32_32-NEXT: stw 0, 36(1) 542; CHECK32_32-NEXT: .cfi_def_cfa_offset 32 543; CHECK32_32-NEXT: .cfi_offset lr, 4 544; CHECK32_32-NEXT: .cfi_offset r27, -20 545; CHECK32_32-NEXT: .cfi_offset r28, -16 546; CHECK32_32-NEXT: .cfi_offset r29, -12 547; CHECK32_32-NEXT: .cfi_offset r30, -8 548; CHECK32_32-NEXT: stw 27, 12(1) # 4-byte Folded Spill 549; CHECK32_32-NEXT: mr 27, 5 550; CHECK32_32-NEXT: stw 28, 16(1) # 4-byte Folded Spill 551; CHECK32_32-NEXT: mr 28, 3 552; CHECK32_32-NEXT: stw 29, 20(1) # 4-byte Folded Spill 553; CHECK32_32-NEXT: mr 29, 4 554; CHECK32_32-NEXT: stw 30, 24(1) # 4-byte Folded Spill 555; CHECK32_32-NEXT: mr 30, 6 556; CHECK32_32-NEXT: clrlwi 3, 7, 27 557; CHECK32_32-NEXT: mr 4, 8 558; CHECK32_32-NEXT: li 5, 0 559; CHECK32_32-NEXT: li 6, 37 560; CHECK32_32-NEXT: bl __umoddi3 561; CHECK32_32-NEXT: rotlwi 5, 30, 27 562; CHECK32_32-NEXT: addi 3, 4, 27 563; CHECK32_32-NEXT: andi. 4, 3, 32 564; CHECK32_32-NEXT: rlwimi 5, 27, 27, 0, 4 565; CHECK32_32-NEXT: mr 4, 5 566; CHECK32_32-NEXT: beq 0, .LBB11_2 567; CHECK32_32-NEXT: # %bb.1: 568; CHECK32_32-NEXT: mr 4, 29 569; CHECK32_32-NEXT: .LBB11_2: 570; CHECK32_32-NEXT: clrlwi 6, 3, 27 571; CHECK32_32-NEXT: srw 3, 4, 6 572; CHECK32_32-NEXT: beq 0, .LBB11_4 573; CHECK32_32-NEXT: # %bb.3: 574; CHECK32_32-NEXT: mr 29, 28 575; CHECK32_32-NEXT: .LBB11_4: 576; CHECK32_32-NEXT: subfic 7, 6, 32 577; CHECK32_32-NEXT: slw 8, 29, 7 578; CHECK32_32-NEXT: or 3, 8, 3 579; CHECK32_32-NEXT: bne 0, .LBB11_6 580; CHECK32_32-NEXT: # %bb.5: 581; CHECK32_32-NEXT: slwi 5, 30, 27 582; CHECK32_32-NEXT: .LBB11_6: 583; CHECK32_32-NEXT: srw 5, 5, 6 584; CHECK32_32-NEXT: slw 4, 4, 7 585; CHECK32_32-NEXT: or 4, 4, 5 586; CHECK32_32-NEXT: lwz 30, 24(1) # 4-byte Folded Reload 587; CHECK32_32-NEXT: lwz 29, 20(1) # 4-byte Folded Reload 588; CHECK32_32-NEXT: lwz 28, 16(1) # 4-byte Folded Reload 589; CHECK32_32-NEXT: lwz 27, 12(1) # 4-byte Folded Reload 590; CHECK32_32-NEXT: lwz 0, 36(1) 591; CHECK32_32-NEXT: addi 1, 1, 32 592; CHECK32_32-NEXT: mtlr 0 593; CHECK32_32-NEXT: blr 594; 595; CHECK32_64-LABEL: fshr_i37: 596; CHECK32_64: # %bb.0: 597; CHECK32_64-NEXT: mflr 0 598; CHECK32_64-NEXT: stwu 1, -32(1) 599; CHECK32_64-NEXT: stw 0, 36(1) 600; CHECK32_64-NEXT: .cfi_def_cfa_offset 32 601; CHECK32_64-NEXT: .cfi_offset lr, 4 602; CHECK32_64-NEXT: .cfi_offset r27, -20 603; CHECK32_64-NEXT: .cfi_offset r28, -16 604; CHECK32_64-NEXT: .cfi_offset r29, -12 605; CHECK32_64-NEXT: .cfi_offset r30, -8 606; CHECK32_64-NEXT: stw 27, 12(1) # 4-byte Folded Spill 607; CHECK32_64-NEXT: mr 27, 5 608; CHECK32_64-NEXT: li 5, 0 609; CHECK32_64-NEXT: stw 28, 16(1) # 4-byte Folded Spill 610; CHECK32_64-NEXT: mr 28, 3 611; CHECK32_64-NEXT: clrlwi 3, 7, 27 612; CHECK32_64-NEXT: stw 29, 20(1) # 4-byte Folded Spill 613; CHECK32_64-NEXT: mr 29, 4 614; CHECK32_64-NEXT: mr 4, 8 615; CHECK32_64-NEXT: stw 30, 24(1) # 4-byte Folded Spill 616; CHECK32_64-NEXT: mr 30, 6 617; CHECK32_64-NEXT: li 6, 37 618; CHECK32_64-NEXT: bl __umoddi3 619; CHECK32_64-NEXT: rotlwi 5, 30, 27 620; CHECK32_64-NEXT: addi 3, 4, 27 621; CHECK32_64-NEXT: andi. 4, 3, 32 622; CHECK32_64-NEXT: rlwimi 5, 27, 27, 0, 4 623; CHECK32_64-NEXT: mr 4, 5 624; CHECK32_64-NEXT: beq 0, .LBB11_2 625; CHECK32_64-NEXT: # %bb.1: 626; CHECK32_64-NEXT: mr 4, 29 627; CHECK32_64-NEXT: .LBB11_2: 628; CHECK32_64-NEXT: clrlwi 6, 3, 27 629; CHECK32_64-NEXT: srw 3, 4, 6 630; CHECK32_64-NEXT: beq 0, .LBB11_4 631; CHECK32_64-NEXT: # %bb.3: 632; CHECK32_64-NEXT: mr 29, 28 633; CHECK32_64-NEXT: .LBB11_4: 634; CHECK32_64-NEXT: subfic 7, 6, 32 635; CHECK32_64-NEXT: slw 8, 29, 7 636; CHECK32_64-NEXT: or 3, 8, 3 637; CHECK32_64-NEXT: bne 0, .LBB11_6 638; CHECK32_64-NEXT: # %bb.5: 639; CHECK32_64-NEXT: slwi 5, 30, 27 640; CHECK32_64-NEXT: .LBB11_6: 641; CHECK32_64-NEXT: srw 5, 5, 6 642; CHECK32_64-NEXT: slw 4, 4, 7 643; CHECK32_64-NEXT: lwz 30, 24(1) # 4-byte Folded Reload 644; CHECK32_64-NEXT: or 4, 4, 5 645; CHECK32_64-NEXT: lwz 29, 20(1) # 4-byte Folded Reload 646; CHECK32_64-NEXT: lwz 28, 16(1) # 4-byte Folded Reload 647; CHECK32_64-NEXT: lwz 27, 12(1) # 4-byte Folded Reload 648; CHECK32_64-NEXT: lwz 0, 36(1) 649; CHECK32_64-NEXT: addi 1, 1, 32 650; CHECK32_64-NEXT: mtlr 0 651; CHECK32_64-NEXT: blr 652; 653; CHECK64-LABEL: fshr_i37: 654; CHECK64: # %bb.0: 655; CHECK64-NEXT: lis 7, 1771 656; CHECK64-NEXT: clrldi 6, 5, 27 657; CHECK64-NEXT: sldi 4, 4, 27 658; CHECK64-NEXT: ori 7, 7, 15941 659; CHECK64-NEXT: rldic 7, 7, 32, 5 660; CHECK64-NEXT: oris 7, 7, 12398 661; CHECK64-NEXT: ori 7, 7, 46053 662; CHECK64-NEXT: mulhdu 6, 6, 7 663; CHECK64-NEXT: mulli 6, 6, 37 664; CHECK64-NEXT: sub 5, 5, 6 665; CHECK64-NEXT: addi 5, 5, 27 666; CHECK64-NEXT: clrlwi 5, 5, 26 667; CHECK64-NEXT: srd 4, 4, 5 668; CHECK64-NEXT: subfic 5, 5, 64 669; CHECK64-NEXT: sld 3, 3, 5 670; CHECK64-NEXT: or 3, 3, 4 671; CHECK64-NEXT: blr 672 %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) 673 ret i37 %f 674} 675 676; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111 677 678declare i7 @llvm.fshr.i7(i7, i7, i7) 679define i7 @fshr_i7_const_fold() { 680; CHECK-LABEL: fshr_i7_const_fold: 681; CHECK: # %bb.0: 682; CHECK-NEXT: li 3, 31 683; CHECK-NEXT: blr 684 %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2) 685 ret i7 %f 686} 687 688; With constant shift amount, this is rotate + insert (missing extended mnemonics). 689 690define i32 @fshr_i32_const_shift(i32 %x, i32 %y) { 691; CHECK-LABEL: fshr_i32_const_shift: 692; CHECK: # %bb.0: 693; CHECK-NEXT: rotlwi 4, 4, 23 694; CHECK-NEXT: rlwimi 4, 3, 23, 0, 8 695; CHECK-NEXT: mr 3, 4 696; CHECK-NEXT: blr 697 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9) 698 ret i32 %f 699} 700 701; Check modulo math on shift amount. 41-32=9. 702 703define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) { 704; CHECK-LABEL: fshr_i32_const_overshift: 705; CHECK: # %bb.0: 706; CHECK-NEXT: rotlwi 4, 4, 23 707; CHECK-NEXT: rlwimi 4, 3, 23, 0, 8 708; CHECK-NEXT: mr 3, 4 709; CHECK-NEXT: blr 710 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41) 711 ret i32 %f 712} 713 714; 64-bit should also work. 105-64 = 41. 715 716define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) { 717; CHECK32-LABEL: fshr_i64_const_overshift: 718; CHECK32: # %bb.0: 719; CHECK32-NEXT: rotlwi 6, 4, 23 720; CHECK32-NEXT: rotlwi 5, 5, 23 721; CHECK32-NEXT: rlwimi 6, 3, 23, 0, 8 722; CHECK32-NEXT: rlwimi 5, 4, 23, 0, 8 723; CHECK32-NEXT: mr 3, 6 724; CHECK32-NEXT: mr 4, 5 725; CHECK32-NEXT: blr 726; 727; CHECK64-LABEL: fshr_i64_const_overshift: 728; CHECK64: # %bb.0: 729; CHECK64-NEXT: rotldi 4, 4, 23 730; CHECK64-NEXT: rldimi 4, 3, 23, 0 731; CHECK64-NEXT: mr 3, 4 732; CHECK64-NEXT: blr 733 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105) 734 ret i64 %f 735} 736 737; This should work without any node-specific logic. 738 739define i8 @fshr_i8_const_fold() { 740; CHECK-LABEL: fshr_i8_const_fold: 741; CHECK: # %bb.0: 742; CHECK-NEXT: li 3, 254 743; CHECK-NEXT: blr 744 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7) 745 ret i8 %f 746} 747 748define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) { 749; CHECK-LABEL: fshl_i32_shift_by_bitwidth: 750; CHECK: # %bb.0: 751; CHECK-NEXT: blr 752 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32) 753 ret i32 %f 754} 755 756define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) { 757; CHECK-LABEL: fshr_i32_shift_by_bitwidth: 758; CHECK: # %bb.0: 759; CHECK-NEXT: mr 3, 4 760; CHECK-NEXT: blr 761 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32) 762 ret i32 %f 763} 764 765define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { 766; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth: 767; CHECK: # %bb.0: 768; CHECK-NEXT: blr 769 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 770 ret <4 x i32> %f 771} 772 773define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { 774; CHECK32_32-LABEL: fshr_v4i32_shift_by_bitwidth: 775; CHECK32_32: # %bb.0: 776; CHECK32_32-NEXT: mr 6, 10 777; CHECK32_32-NEXT: mr 5, 9 778; CHECK32_32-NEXT: mr 4, 8 779; CHECK32_32-NEXT: mr 3, 7 780; CHECK32_32-NEXT: blr 781; 782; CHECK32_64-LABEL: fshr_v4i32_shift_by_bitwidth: 783; CHECK32_64: # %bb.0: 784; CHECK32_64-NEXT: vmr 2, 3 785; CHECK32_64-NEXT: blr 786; 787; CHECK64-LABEL: fshr_v4i32_shift_by_bitwidth: 788; CHECK64: # %bb.0: 789; CHECK64-NEXT: vmr 2, 3 790; CHECK64-NEXT: blr 791 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 792 ret <4 x i32> %f 793} 794 795