1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_32 3; RUN: llc < %s -mtriple=ppc32-- -mcpu=ppc64 | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_64 4; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s --check-prefixes=CHECK,CHECK64 5 6declare i8 @llvm.fshl.i8(i8, i8, i8) 7declare i16 @llvm.fshl.i16(i16, i16, i16) 8declare i32 @llvm.fshl.i32(i32, i32, i32) 9declare i64 @llvm.fshl.i64(i64, i64, i64) 10declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 11 12declare i8 @llvm.fshr.i8(i8, i8, i8) 13declare i16 @llvm.fshr.i16(i16, i16, i16) 14declare i32 @llvm.fshr.i32(i32, i32, i32) 15declare i64 @llvm.fshr.i64(i64, i64, i64) 16declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 17 18; When first 2 operands match, it's a rotate. 19 20define i8 @rotl_i8_const_shift(i8 %x) { 21; CHECK-LABEL: rotl_i8_const_shift: 22; CHECK: # %bb.0: 23; CHECK-NEXT: rotlwi 4, 3, 27 24; CHECK-NEXT: rlwimi 4, 3, 3, 0, 28 25; CHECK-NEXT: mr 3, 4 26; CHECK-NEXT: blr 27 %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3) 28 ret i8 %f 29} 30 31define i64 @rotl_i64_const_shift(i64 %x) { 32; CHECK32-LABEL: rotl_i64_const_shift: 33; CHECK32: # %bb.0: 34; CHECK32-NEXT: rotlwi 5, 4, 3 35; CHECK32-NEXT: rotlwi 6, 3, 3 36; CHECK32-NEXT: rlwimi 5, 3, 3, 0, 28 37; CHECK32-NEXT: rlwimi 6, 4, 3, 0, 28 38; CHECK32-NEXT: mr 3, 5 39; CHECK32-NEXT: mr 4, 6 40; CHECK32-NEXT: blr 41; 42; CHECK64-LABEL: rotl_i64_const_shift: 43; CHECK64: # %bb.0: 44; CHECK64-NEXT: rotldi 3, 3, 3 45; CHECK64-NEXT: blr 46 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3) 47 ret i64 %f 48} 49 50; When first 2 operands match, it's a rotate (by variable amount). 51 52define i16 @rotl_i16(i16 %x, i16 %z) { 53; CHECK-LABEL: rotl_i16: 54; CHECK: # %bb.0: 55; CHECK-NEXT: clrlwi 6, 4, 28 56; CHECK-NEXT: neg 4, 4 57; CHECK-NEXT: clrlwi 5, 3, 16 58; CHECK-NEXT: clrlwi 4, 4, 28 59; CHECK-NEXT: slw 3, 3, 6 60; CHECK-NEXT: srw 4, 5, 4 61; CHECK-NEXT: or 3, 3, 4 62; CHECK-NEXT: blr 63 %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z) 64 ret i16 %f 65} 66 67define i32 @rotl_i32(i32 %x, i32 %z) { 68; CHECK-LABEL: rotl_i32: 69; CHECK: # %bb.0: 70; CHECK-NEXT: rotlw 3, 3, 4 71; CHECK-NEXT: blr 72 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z) 73 ret i32 %f 74} 75 76define i64 @rotl_i64(i64 %x, i64 %z) { 77; CHECK32-LABEL: rotl_i64: 78; CHECK32: # %bb.0: 79; CHECK32-NEXT: andi. 5, 6, 32 80; CHECK32-NEXT: mr 5, 3 81; CHECK32-NEXT: bne 0, .LBB4_2 82; CHECK32-NEXT: # %bb.1: 83; CHECK32-NEXT: mr 5, 4 84; CHECK32-NEXT: .LBB4_2: 85; CHECK32-NEXT: clrlwi 6, 6, 27 86; CHECK32-NEXT: subfic 8, 6, 32 87; CHECK32-NEXT: srw 7, 5, 8 88; CHECK32-NEXT: bne 0, .LBB4_4 89; CHECK32-NEXT: # %bb.3: 90; CHECK32-NEXT: mr 4, 3 91; CHECK32-NEXT: .LBB4_4: 92; CHECK32-NEXT: slw 3, 4, 6 93; CHECK32-NEXT: srw 4, 4, 8 94; CHECK32-NEXT: slw 5, 5, 6 95; CHECK32-NEXT: or 3, 3, 7 96; CHECK32-NEXT: or 4, 5, 4 97; CHECK32-NEXT: blr 98; 99; CHECK64-LABEL: rotl_i64: 100; CHECK64: # %bb.0: 101; CHECK64-NEXT: rotld 3, 3, 4 102; CHECK64-NEXT: blr 103 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z) 104 ret i64 %f 105} 106 107; Vector rotate. 108 109define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) { 110; CHECK32_32-LABEL: rotl_v4i32: 111; CHECK32_32: # %bb.0: 112; CHECK32_32-NEXT: rotlw 3, 3, 7 113; CHECK32_32-NEXT: rotlw 4, 4, 8 114; CHECK32_32-NEXT: rotlw 5, 5, 9 115; CHECK32_32-NEXT: rotlw 6, 6, 10 116; CHECK32_32-NEXT: blr 117; 118; CHECK32_64-LABEL: rotl_v4i32: 119; CHECK32_64: # %bb.0: 120; CHECK32_64-NEXT: vrlw 2, 2, 3 121; CHECK32_64-NEXT: blr 122; 123; CHECK64-LABEL: rotl_v4i32: 124; CHECK64: # %bb.0: 125; CHECK64-NEXT: vrlw 2, 2, 3 126; CHECK64-NEXT: blr 127 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z) 128 ret <4 x i32> %f 129} 130 131; Vector rotate by constant splat amount. 132 133define <4 x i32> @rotl_v4i32_const_shift(<4 x i32> %x) { 134; CHECK32_32-LABEL: rotl_v4i32_const_shift: 135; CHECK32_32: # %bb.0: 136; CHECK32_32-NEXT: rotlwi 3, 3, 3 137; CHECK32_32-NEXT: rotlwi 4, 4, 3 138; CHECK32_32-NEXT: rotlwi 5, 5, 3 139; CHECK32_32-NEXT: rotlwi 6, 6, 3 140; CHECK32_32-NEXT: blr 141; 142; CHECK32_64-LABEL: rotl_v4i32_const_shift: 143; CHECK32_64: # %bb.0: 144; CHECK32_64-NEXT: vspltisw 3, 3 145; CHECK32_64-NEXT: vrlw 2, 2, 3 146; CHECK32_64-NEXT: blr 147; 148; CHECK64-LABEL: rotl_v4i32_const_shift: 149; CHECK64: # %bb.0: 150; CHECK64-NEXT: vspltisw 3, 3 151; CHECK64-NEXT: vrlw 2, 2, 3 152; CHECK64-NEXT: blr 153 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) 154 ret <4 x i32> %f 155} 156 157; Repeat everything for funnel shift right. 158 159define i8 @rotr_i8_const_shift(i8 %x) { 160; CHECK-LABEL: rotr_i8_const_shift: 161; CHECK: # %bb.0: 162; CHECK-NEXT: rotlwi 4, 3, 29 163; CHECK-NEXT: rlwimi 4, 3, 5, 0, 26 164; CHECK-NEXT: mr 3, 4 165; CHECK-NEXT: blr 166 %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3) 167 ret i8 %f 168} 169 170define i32 @rotr_i32_const_shift(i32 %x) { 171; CHECK-LABEL: rotr_i32_const_shift: 172; CHECK: # %bb.0: 173; CHECK-NEXT: rotlwi 3, 3, 29 174; CHECK-NEXT: blr 175 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3) 176 ret i32 %f 177} 178 179; When first 2 operands match, it's a rotate (by variable amount). 180 181define i16 @rotr_i16(i16 %x, i16 %z) { 182; CHECK-LABEL: rotr_i16: 183; CHECK: # %bb.0: 184; CHECK-NEXT: clrlwi 6, 4, 28 185; CHECK-NEXT: neg 4, 4 186; CHECK-NEXT: clrlwi 5, 3, 16 187; CHECK-NEXT: clrlwi 4, 4, 28 188; CHECK-NEXT: srw 5, 5, 6 189; CHECK-NEXT: slw 3, 3, 4 190; CHECK-NEXT: or 3, 5, 3 191; CHECK-NEXT: blr 192 %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z) 193 ret i16 %f 194} 195 196define i32 @rotr_i32(i32 %x, i32 %z) { 197; CHECK-LABEL: rotr_i32: 198; CHECK: # %bb.0: 199; CHECK-NEXT: neg 4, 4 200; CHECK-NEXT: rotlw 3, 3, 4 201; CHECK-NEXT: blr 202 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z) 203 ret i32 %f 204} 205 206define i64 @rotr_i64(i64 %x, i64 %z) { 207; CHECK32-LABEL: rotr_i64: 208; CHECK32: # %bb.0: 209; CHECK32-NEXT: andi. 5, 6, 32 210; CHECK32-NEXT: mr 5, 3 211; CHECK32-NEXT: beq 0, .LBB11_2 212; CHECK32-NEXT: # %bb.1: 213; CHECK32-NEXT: mr 5, 4 214; CHECK32-NEXT: .LBB11_2: 215; CHECK32-NEXT: clrlwi 7, 6, 27 216; CHECK32-NEXT: srw 6, 5, 7 217; CHECK32-NEXT: beq 0, .LBB11_4 218; CHECK32-NEXT: # %bb.3: 219; CHECK32-NEXT: mr 4, 3 220; CHECK32-NEXT: .LBB11_4: 221; CHECK32-NEXT: subfic 3, 7, 32 222; CHECK32-NEXT: srw 7, 4, 7 223; CHECK32-NEXT: slw 4, 4, 3 224; CHECK32-NEXT: slw 5, 5, 3 225; CHECK32-NEXT: or 3, 4, 6 226; CHECK32-NEXT: or 4, 5, 7 227; CHECK32-NEXT: blr 228; 229; CHECK64-LABEL: rotr_i64: 230; CHECK64: # %bb.0: 231; CHECK64-NEXT: neg 4, 4 232; CHECK64-NEXT: rotld 3, 3, 4 233; CHECK64-NEXT: blr 234 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z) 235 ret i64 %f 236} 237 238; Vector rotate. 239 240define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) { 241; CHECK32_32-LABEL: rotr_v4i32: 242; CHECK32_32: # %bb.0: 243; CHECK32_32-NEXT: neg 7, 7 244; CHECK32_32-NEXT: neg 8, 8 245; CHECK32_32-NEXT: neg 9, 9 246; CHECK32_32-NEXT: neg 10, 10 247; CHECK32_32-NEXT: rotlw 3, 3, 7 248; CHECK32_32-NEXT: rotlw 4, 4, 8 249; CHECK32_32-NEXT: rotlw 5, 5, 9 250; CHECK32_32-NEXT: rotlw 6, 6, 10 251; CHECK32_32-NEXT: blr 252; 253; CHECK32_64-LABEL: rotr_v4i32: 254; CHECK32_64: # %bb.0: 255; CHECK32_64-NEXT: vxor 4, 4, 4 256; CHECK32_64-NEXT: vsubuwm 3, 4, 3 257; CHECK32_64-NEXT: vrlw 2, 2, 3 258; CHECK32_64-NEXT: blr 259; 260; CHECK64-LABEL: rotr_v4i32: 261; CHECK64: # %bb.0: 262; CHECK64-NEXT: xxlxor 36, 36, 36 263; CHECK64-NEXT: vsubuwm 3, 4, 3 264; CHECK64-NEXT: vrlw 2, 2, 3 265; CHECK64-NEXT: blr 266 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z) 267 ret <4 x i32> %f 268} 269 270; Vector rotate by constant splat amount. 271 272define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) { 273; CHECK32_32-LABEL: rotr_v4i32_const_shift: 274; CHECK32_32: # %bb.0: 275; CHECK32_32-NEXT: rotlwi 3, 3, 29 276; CHECK32_32-NEXT: rotlwi 4, 4, 29 277; CHECK32_32-NEXT: rotlwi 5, 5, 29 278; CHECK32_32-NEXT: rotlwi 6, 6, 29 279; CHECK32_32-NEXT: blr 280; 281; CHECK32_64-LABEL: rotr_v4i32_const_shift: 282; CHECK32_64: # %bb.0: 283; CHECK32_64-NEXT: vspltisw 3, -16 284; CHECK32_64-NEXT: vspltisw 4, 13 285; CHECK32_64-NEXT: vsubuwm 3, 4, 3 286; CHECK32_64-NEXT: vrlw 2, 2, 3 287; CHECK32_64-NEXT: blr 288; 289; CHECK64-LABEL: rotr_v4i32_const_shift: 290; CHECK64: # %bb.0: 291; CHECK64-NEXT: vspltisw 3, -16 292; CHECK64-NEXT: vspltisw 4, 13 293; CHECK64-NEXT: vsubuwm 3, 4, 3 294; CHECK64-NEXT: vrlw 2, 2, 3 295; CHECK64-NEXT: blr 296 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) 297 ret <4 x i32> %f 298} 299 300define i32 @rotl_i32_shift_by_bitwidth(i32 %x) { 301; CHECK-LABEL: rotl_i32_shift_by_bitwidth: 302; CHECK: # %bb.0: 303; CHECK-NEXT: blr 304 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32) 305 ret i32 %f 306} 307 308define i32 @rotr_i32_shift_by_bitwidth(i32 %x) { 309; CHECK-LABEL: rotr_i32_shift_by_bitwidth: 310; CHECK: # %bb.0: 311; CHECK-NEXT: blr 312 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32) 313 ret i32 %f 314} 315 316define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) { 317; CHECK-LABEL: rotl_v4i32_shift_by_bitwidth: 318; CHECK: # %bb.0: 319; CHECK-NEXT: blr 320 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 321 ret <4 x i32> %f 322} 323 324define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) { 325; CHECK-LABEL: rotr_v4i32_shift_by_bitwidth: 326; CHECK: # %bb.0: 327; CHECK-NEXT: blr 328 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 329 ret <4 x i32> %f 330} 331 332