1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=instcombine -S | FileCheck %s 3; RUN: opt < %s -passes=instcombine -use-constant-int-for-fixed-length-splat -S | FileCheck %s 4 5target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" 6 7; Canonicalize rotate by constant to funnel shift intrinsics. 8; This should help cost modeling for vectorization, inlining, etc. 9; If a target does not have a rotate instruction, the expansion will 10; be exactly these same 3 basic ops (shl/lshr/or). 11 12define i32 @rotl_i32_constant(i32 %x) { 13; CHECK-LABEL: @rotl_i32_constant( 14; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 11) 15; CHECK-NEXT: ret i32 [[R]] 16; 17 %shl = shl i32 %x, 11 18 %shr = lshr i32 %x, 21 19 %r = or i32 %shr, %shl 20 ret i32 %r 21} 22 23define i42 @rotr_i42_constant(i42 %x) { 24; CHECK-LABEL: @rotr_i42_constant( 25; CHECK-NEXT: [[R:%.*]] = call i42 @llvm.fshl.i42(i42 [[X:%.*]], i42 [[X]], i42 31) 26; CHECK-NEXT: ret i42 [[R]] 27; 28 %shl = shl i42 %x, 31 29 %shr = lshr i42 %x, 11 30 %r = or i42 %shr, %shl 31 ret i42 %r 32} 33 34define i8 @rotr_i8_constant_commute(i8 %x) { 35; CHECK-LABEL: @rotr_i8_constant_commute( 36; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[X]], i8 5) 37; CHECK-NEXT: ret i8 [[R]] 38; 39 %shl = shl i8 %x, 5 40 %shr = lshr i8 %x, 3 41 %r = or i8 %shl, %shr 42 ret i8 %r 43} 44 45define i88 @rotl_i88_constant_commute(i88 %x) { 46; CHECK-LABEL: @rotl_i88_constant_commute( 47; CHECK-NEXT: [[R:%.*]] = call i88 @llvm.fshl.i88(i88 [[X:%.*]], i88 [[X]], i88 44) 48; CHECK-NEXT: ret i88 [[R]] 49; 50 %shl = shl i88 %x, 44 51 %shr = lshr i88 %x, 44 52 %r = or i88 %shl, %shr 53 ret i88 %r 54} 55 56; Vector types are allowed. 57 58define <2 x i16> @rotl_v2i16_constant_splat(<2 x i16> %x) { 59; CHECK-LABEL: @rotl_v2i16_constant_splat( 60; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> splat (i16 1)) 61; CHECK-NEXT: ret <2 x i16> [[R]] 62; 63 %shl = shl <2 x i16> %x, <i16 1, i16 1> 64 %shr = lshr <2 x i16> %x, <i16 15, i16 15> 65 %r = or <2 x i16> %shl, %shr 66 ret <2 x i16> %r 67} 68 69define <2 x i16> @rotl_v2i16_constant_splat_poison0(<2 x i16> %x) { 70; CHECK-LABEL: @rotl_v2i16_constant_splat_poison0( 71; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> splat (i16 1)) 72; CHECK-NEXT: ret <2 x i16> [[R]] 73; 74 %shl = shl <2 x i16> %x, <i16 poison, i16 1> 75 %shr = lshr <2 x i16> %x, <i16 15, i16 15> 76 %r = or <2 x i16> %shl, %shr 77 ret <2 x i16> %r 78} 79 80define <2 x i16> @rotl_v2i16_constant_splat_poison1(<2 x i16> %x) { 81; CHECK-LABEL: @rotl_v2i16_constant_splat_poison1( 82; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> splat (i16 1)) 83; CHECK-NEXT: ret <2 x i16> [[R]] 84; 85 %shl = shl <2 x i16> %x, <i16 1, i16 1> 86 %shr = lshr <2 x i16> %x, <i16 15, i16 poison> 87 %r = or <2 x i16> %shl, %shr 88 ret <2 x i16> %r 89} 90 91; Non-power-of-2 vector types are allowed. 92 93define <2 x i17> @rotr_v2i17_constant_splat(<2 x i17> %x) { 94; CHECK-LABEL: @rotr_v2i17_constant_splat( 95; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> splat (i17 12)) 96; CHECK-NEXT: ret <2 x i17> [[R]] 97; 98 %shl = shl <2 x i17> %x, <i17 12, i17 12> 99 %shr = lshr <2 x i17> %x, <i17 5, i17 5> 100 %r = or <2 x i17> %shr, %shl 101 ret <2 x i17> %r 102} 103 104define <2 x i17> @rotr_v2i17_constant_splat_poison0(<2 x i17> %x) { 105; CHECK-LABEL: @rotr_v2i17_constant_splat_poison0( 106; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> splat (i17 12)) 107; CHECK-NEXT: ret <2 x i17> [[R]] 108; 109 %shl = shl <2 x i17> %x, <i17 12, i17 poison> 110 %shr = lshr <2 x i17> %x, <i17 poison, i17 5> 111 %r = or <2 x i17> %shr, %shl 112 ret <2 x i17> %r 113} 114 115define <2 x i17> @rotr_v2i17_constant_splat_poison1(<2 x i17> %x) { 116; CHECK-LABEL: @rotr_v2i17_constant_splat_poison1( 117; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> splat (i17 12)) 118; CHECK-NEXT: ret <2 x i17> [[R]] 119; 120 %shl = shl <2 x i17> %x, <i17 12, i17 poison> 121 %shr = lshr <2 x i17> %x, <i17 5, i17 poison> 122 %r = or <2 x i17> %shr, %shl 123 ret <2 x i17> %r 124} 125 126; Allow arbitrary shift constants. 127; Support poison elements. 128 129define <2 x i32> @rotr_v2i32_constant_nonsplat(<2 x i32> %x) { 130; CHECK-LABEL: @rotr_v2i32_constant_nonsplat( 131; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 17, i32 19>) 132; CHECK-NEXT: ret <2 x i32> [[R]] 133; 134 %shl = shl <2 x i32> %x, <i32 17, i32 19> 135 %shr = lshr <2 x i32> %x, <i32 15, i32 13> 136 %r = or <2 x i32> %shl, %shr 137 ret <2 x i32> %r 138} 139 140define <2 x i32> @rotr_v2i32_constant_nonsplat_poison0(<2 x i32> %x) { 141; CHECK-LABEL: @rotr_v2i32_constant_nonsplat_poison0( 142; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 poison, i32 19>) 143; CHECK-NEXT: ret <2 x i32> [[R]] 144; 145 %shl = shl <2 x i32> %x, <i32 poison, i32 19> 146 %shr = lshr <2 x i32> %x, <i32 15, i32 13> 147 %r = or <2 x i32> %shl, %shr 148 ret <2 x i32> %r 149} 150 151define <2 x i32> @rotr_v2i32_constant_nonsplat_poison1(<2 x i32> %x) { 152; CHECK-LABEL: @rotr_v2i32_constant_nonsplat_poison1( 153; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 17, i32 0>) 154; CHECK-NEXT: ret <2 x i32> [[R]] 155; 156 %shl = shl <2 x i32> %x, <i32 17, i32 19> 157 %shr = lshr <2 x i32> %x, <i32 15, i32 poison> 158 %r = or <2 x i32> %shl, %shr 159 ret <2 x i32> %r 160} 161 162define <2 x i36> @rotl_v2i36_constant_nonsplat(<2 x i36> %x) { 163; CHECK-LABEL: @rotl_v2i36_constant_nonsplat( 164; CHECK-NEXT: [[R:%.*]] = call <2 x i36> @llvm.fshl.v2i36(<2 x i36> [[X:%.*]], <2 x i36> [[X]], <2 x i36> <i36 21, i36 11>) 165; CHECK-NEXT: ret <2 x i36> [[R]] 166; 167 %shl = shl <2 x i36> %x, <i36 21, i36 11> 168 %shr = lshr <2 x i36> %x, <i36 15, i36 25> 169 %r = or <2 x i36> %shl, %shr 170 ret <2 x i36> %r 171} 172 173define <3 x i36> @rotl_v3i36_constant_nonsplat_poison0(<3 x i36> %x) { 174; CHECK-LABEL: @rotl_v3i36_constant_nonsplat_poison0( 175; CHECK-NEXT: [[R:%.*]] = call <3 x i36> @llvm.fshl.v3i36(<3 x i36> [[X:%.*]], <3 x i36> [[X]], <3 x i36> <i36 21, i36 11, i36 poison>) 176; CHECK-NEXT: ret <3 x i36> [[R]] 177; 178 %shl = shl <3 x i36> %x, <i36 21, i36 11, i36 poison> 179 %shr = lshr <3 x i36> %x, <i36 15, i36 25, i36 poison> 180 %r = or <3 x i36> %shl, %shr 181 ret <3 x i36> %r 182} 183 184; The most basic rotate by variable - no guards for UB due to oversized shifts. 185; This cannot be canonicalized to funnel shift target-independently. The safe 186; expansion includes masking for the shift amount that is not included here, 187; so it could be more expensive. 188 189define i32 @rotl_i32(i32 %x, i32 %y) { 190; CHECK-LABEL: @rotl_i32( 191; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[Y:%.*]] 192; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y]] 193; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X]], [[SUB]] 194; CHECK-NEXT: [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]] 195; CHECK-NEXT: ret i32 [[R]] 196; 197 %sub = sub i32 32, %y 198 %shl = shl i32 %x, %y 199 %shr = lshr i32 %x, %sub 200 %r = or i32 %shr, %shl 201 ret i32 %r 202} 203 204; Non-power-of-2 types should follow the same reasoning. Left/right is determined by subtract. 205 206define i37 @rotr_i37(i37 %x, i37 %y) { 207; CHECK-LABEL: @rotr_i37( 208; CHECK-NEXT: [[SUB:%.*]] = sub i37 37, [[Y:%.*]] 209; CHECK-NEXT: [[SHL:%.*]] = shl i37 [[X:%.*]], [[SUB]] 210; CHECK-NEXT: [[SHR:%.*]] = lshr i37 [[X]], [[Y]] 211; CHECK-NEXT: [[R:%.*]] = or disjoint i37 [[SHR]], [[SHL]] 212; CHECK-NEXT: ret i37 [[R]] 213; 214 %sub = sub i37 37, %y 215 %shl = shl i37 %x, %sub 216 %shr = lshr i37 %x, %y 217 %r = or i37 %shr, %shl 218 ret i37 %r 219} 220 221; Commute 'or' operands. 222 223define i8 @rotr_i8_commute(i8 %x, i8 %y) { 224; CHECK-LABEL: @rotr_i8_commute( 225; CHECK-NEXT: [[SUB:%.*]] = sub i8 8, [[Y:%.*]] 226; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[X:%.*]], [[SUB]] 227; CHECK-NEXT: [[SHR:%.*]] = lshr i8 [[X]], [[Y]] 228; CHECK-NEXT: [[R:%.*]] = or disjoint i8 [[SHL]], [[SHR]] 229; CHECK-NEXT: ret i8 [[R]] 230; 231 %sub = sub i8 8, %y 232 %shl = shl i8 %x, %sub 233 %shr = lshr i8 %x, %y 234 %r = or i8 %shl, %shr 235 ret i8 %r 236} 237 238; Vector types should follow the same rules. 239 240define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %y) { 241; CHECK-LABEL: @rotl_v4i32( 242; CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> splat (i32 32), [[Y:%.*]] 243; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y]] 244; CHECK-NEXT: [[SHR:%.*]] = lshr <4 x i32> [[X]], [[SUB]] 245; CHECK-NEXT: [[R:%.*]] = or disjoint <4 x i32> [[SHL]], [[SHR]] 246; CHECK-NEXT: ret <4 x i32> [[R]] 247; 248 %sub = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %y 249 %shl = shl <4 x i32> %x, %y 250 %shr = lshr <4 x i32> %x, %sub 251 %r = or <4 x i32> %shl, %shr 252 ret <4 x i32> %r 253} 254 255; Non-power-of-2 vector types should follow the same rules. 256 257define <3 x i42> @rotr_v3i42(<3 x i42> %x, <3 x i42> %y) { 258; CHECK-LABEL: @rotr_v3i42( 259; CHECK-NEXT: [[SUB:%.*]] = sub <3 x i42> splat (i42 42), [[Y:%.*]] 260; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i42> [[X:%.*]], [[SUB]] 261; CHECK-NEXT: [[SHR:%.*]] = lshr <3 x i42> [[X]], [[Y]] 262; CHECK-NEXT: [[R:%.*]] = or disjoint <3 x i42> [[SHR]], [[SHL]] 263; CHECK-NEXT: ret <3 x i42> [[R]] 264; 265 %sub = sub <3 x i42> <i42 42, i42 42, i42 42>, %y 266 %shl = shl <3 x i42> %x, %sub 267 %shr = lshr <3 x i42> %x, %y 268 %r = or <3 x i42> %shr, %shl 269 ret <3 x i42> %r 270} 271 272; This is the canonical pattern for a UB-safe rotate-by-variable with power-of-2-size scalar type. 273; The backend expansion of funnel shift for targets that don't have a rotate instruction should 274; match the original IR, so it is always good to canonicalize to the intrinsics for this pattern. 275 276define i32 @rotl_safe_i32(i32 %x, i32 %y) { 277; CHECK-LABEL: @rotl_safe_i32( 278; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]]) 279; CHECK-NEXT: ret i32 [[R]] 280; 281 %negy = sub i32 0, %y 282 %ymask = and i32 %y, 31 283 %negymask = and i32 %negy, 31 284 %shl = shl i32 %x, %ymask 285 %shr = lshr i32 %x, %negymask 286 %r = or i32 %shr, %shl 287 ret i32 %r 288} 289 290; Extra uses don't change anything. 291 292define i16 @rotl_safe_i16_commute_extra_use(i16 %x, i16 %y, ptr %p) { 293; CHECK-LABEL: @rotl_safe_i16_commute_extra_use( 294; CHECK-NEXT: [[NEGY:%.*]] = sub i16 0, [[Y:%.*]] 295; CHECK-NEXT: [[NEGYMASK:%.*]] = and i16 [[NEGY]], 15 296; CHECK-NEXT: store i16 [[NEGYMASK]], ptr [[P:%.*]], align 2 297; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[Y]]) 298; CHECK-NEXT: ret i16 [[R]] 299; 300 %negy = sub i16 0, %y 301 %ymask = and i16 %y, 15 302 %negymask = and i16 %negy, 15 303 store i16 %negymask, ptr %p 304 %shl = shl i16 %x, %ymask 305 %shr = lshr i16 %x, %negymask 306 %r = or i16 %shl, %shr 307 ret i16 %r 308} 309 310; Left/right is determined by the negation. 311 312define i64 @rotr_safe_i64(i64 %x, i64 %y) { 313; CHECK-LABEL: @rotr_safe_i64( 314; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Y:%.*]]) 315; CHECK-NEXT: ret i64 [[R]] 316; 317 %negy = sub i64 0, %y 318 %ymask = and i64 %y, 63 319 %negymask = and i64 %negy, 63 320 %shl = shl i64 %x, %negymask 321 %shr = lshr i64 %x, %ymask 322 %r = or i64 %shr, %shl 323 ret i64 %r 324} 325 326; Extra uses don't change anything. 327 328define i8 @rotr_safe_i8_commute_extra_use(i8 %x, i8 %y, ptr %p) { 329; CHECK-LABEL: @rotr_safe_i8_commute_extra_use( 330; CHECK-NEXT: [[NEGY:%.*]] = sub i8 0, [[Y:%.*]] 331; CHECK-NEXT: [[YMASK:%.*]] = and i8 [[Y]], 7 332; CHECK-NEXT: [[NEGYMASK:%.*]] = and i8 [[NEGY]], 7 333; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[X:%.*]], [[NEGYMASK]] 334; CHECK-NEXT: [[SHR:%.*]] = lshr i8 [[X]], [[YMASK]] 335; CHECK-NEXT: store i8 [[SHR]], ptr [[P:%.*]], align 1 336; CHECK-NEXT: [[R:%.*]] = or i8 [[SHL]], [[SHR]] 337; CHECK-NEXT: ret i8 [[R]] 338; 339 %negy = sub i8 0, %y 340 %ymask = and i8 %y, 7 341 %negymask = and i8 %negy, 7 342 %shl = shl i8 %x, %negymask 343 %shr = lshr i8 %x, %ymask 344 store i8 %shr, ptr %p 345 %r = or i8 %shl, %shr 346 ret i8 %r 347} 348 349; Vectors follow the same rules. 350 351define <2 x i32> @rotl_safe_v2i32(<2 x i32> %x, <2 x i32> %y) { 352; CHECK-LABEL: @rotl_safe_v2i32( 353; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> [[Y:%.*]]) 354; CHECK-NEXT: ret <2 x i32> [[R]] 355; 356 %negy = sub <2 x i32> zeroinitializer, %y 357 %ymask = and <2 x i32> %y, <i32 31, i32 31> 358 %negymask = and <2 x i32> %negy, <i32 31, i32 31> 359 %shl = shl <2 x i32> %x, %ymask 360 %shr = lshr <2 x i32> %x, %negymask 361 %r = or <2 x i32> %shr, %shl 362 ret <2 x i32> %r 363} 364 365; Vectors follow the same rules. 366 367define <3 x i16> @rotr_safe_v3i16(<3 x i16> %x, <3 x i16> %y) { 368; CHECK-LABEL: @rotr_safe_v3i16( 369; CHECK-NEXT: [[R:%.*]] = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> [[X:%.*]], <3 x i16> [[X]], <3 x i16> [[Y:%.*]]) 370; CHECK-NEXT: ret <3 x i16> [[R]] 371; 372 %negy = sub <3 x i16> zeroinitializer, %y 373 %ymask = and <3 x i16> %y, <i16 15, i16 15, i16 15> 374 %negymask = and <3 x i16> %negy, <i16 15, i16 15, i16 15> 375 %shl = shl <3 x i16> %x, %negymask 376 %shr = lshr <3 x i16> %x, %ymask 377 %r = or <3 x i16> %shr, %shl 378 ret <3 x i16> %r 379} 380 381; These are optionally UB-free rotate left/right patterns that are narrowed to a smaller bitwidth. 382; See PR34046, PR16726, and PR39624 for motivating examples: 383; https://bugs.llvm.org/show_bug.cgi?id=34046 384; https://bugs.llvm.org/show_bug.cgi?id=16726 385; https://bugs.llvm.org/show_bug.cgi?id=39624 386 387define i16 @rotate_left_16bit(i16 %v, i32 %shift) { 388; CHECK-LABEL: @rotate_left_16bit( 389; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i16 390; CHECK-NEXT: [[CONV2:%.*]] = call i16 @llvm.fshl.i16(i16 [[V:%.*]], i16 [[V]], i16 [[TMP1]]) 391; CHECK-NEXT: ret i16 [[CONV2]] 392; 393 %and = and i32 %shift, 15 394 %conv = zext i16 %v to i32 395 %shl = shl i32 %conv, %and 396 %sub = sub i32 16, %and 397 %shr = lshr i32 %conv, %sub 398 %or = or i32 %shr, %shl 399 %conv2 = trunc i32 %or to i16 400 ret i16 %conv2 401} 402 403; Commute the 'or' operands and try a vector type. 404 405define <2 x i16> @rotate_left_commute_16bit_vec(<2 x i16> %v, <2 x i32> %shift) { 406; CHECK-LABEL: @rotate_left_commute_16bit_vec( 407; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[SHIFT:%.*]] to <2 x i16> 408; CHECK-NEXT: [[CONV2:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[V:%.*]], <2 x i16> [[V]], <2 x i16> [[TMP1]]) 409; CHECK-NEXT: ret <2 x i16> [[CONV2]] 410; 411 %and = and <2 x i32> %shift, <i32 15, i32 15> 412 %conv = zext <2 x i16> %v to <2 x i32> 413 %shl = shl <2 x i32> %conv, %and 414 %sub = sub <2 x i32> <i32 16, i32 16>, %and 415 %shr = lshr <2 x i32> %conv, %sub 416 %or = or <2 x i32> %shl, %shr 417 %conv2 = trunc <2 x i32> %or to <2 x i16> 418 ret <2 x i16> %conv2 419} 420 421; Change the size, rotation direction (the subtract is on the left-shift), and mask op. 422 423define i8 @rotate_right_8bit(i8 %v, i3 %shift) { 424; CHECK-LABEL: @rotate_right_8bit( 425; CHECK-NEXT: [[TMP1:%.*]] = zext i3 [[SHIFT:%.*]] to i8 426; CHECK-NEXT: [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]]) 427; CHECK-NEXT: ret i8 [[CONV2]] 428; 429 %and = zext i3 %shift to i32 430 %conv = zext i8 %v to i32 431 %shr = lshr i32 %conv, %and 432 %sub = sub i32 8, %and 433 %shl = shl i32 %conv, %sub 434 %or = or i32 %shl, %shr 435 %conv2 = trunc i32 %or to i8 436 ret i8 %conv2 437} 438 439; The right-shifted value does not need to be a zexted value; here it is masked. 440; The shift mask could be less than the bitwidth, but this is still ok. 441 442define i8 @rotate_right_commute_8bit_unmasked_shl(i32 %v, i32 %shift) { 443; CHECK-LABEL: @rotate_right_commute_8bit_unmasked_shl( 444; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i8 445; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 3 446; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[V:%.*]] to i8 447; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[V]] to i8 448; CHECK-NEXT: [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP3]], i8 [[TMP4]], i8 [[TMP2]]) 449; CHECK-NEXT: ret i8 [[CONV2]] 450; 451 %and = and i32 %shift, 3 452 %conv = and i32 %v, 255 453 %shr = lshr i32 %conv, %and 454 %sub = sub i32 8, %and 455 %shl = shl i32 %conv, %sub 456 %or = or i32 %shr, %shl 457 %conv2 = trunc i32 %or to i8 458 ret i8 %conv2 459} 460 461; The left-shifted value does not need to be masked at all. 462 463define i8 @rotate_right_commute_8bit(i32 %v, i32 %shift) { 464; CHECK-LABEL: @rotate_right_commute_8bit( 465; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i8 466; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 3 467; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[V:%.*]] to i8 468; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[V]] to i8 469; CHECK-NEXT: [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP3]], i8 [[TMP4]], i8 [[TMP2]]) 470; CHECK-NEXT: ret i8 [[CONV2]] 471; 472 %and = and i32 %shift, 3 473 %conv = and i32 %v, 255 474 %shr = lshr i32 %conv, %and 475 %sub = sub i32 8, %and 476 %shl = shl i32 %v, %sub 477 %or = or i32 %shr, %shl 478 %conv2 = trunc i32 %or to i8 479 ret i8 %conv2 480} 481 482; If the original source does not mask the shift amount, 483; we still do the transform by adding masks to make it safe. 484 485define i8 @rotate8_not_safe(i8 %v, i32 %shamt) { 486; CHECK-LABEL: @rotate8_not_safe( 487; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i8 488; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.fshl.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]]) 489; CHECK-NEXT: ret i8 [[RET]] 490; 491 %conv = zext i8 %v to i32 492 %sub = sub i32 8, %shamt 493 %shr = lshr i32 %conv, %sub 494 %shl = shl i32 %conv, %shamt 495 %or = or i32 %shr, %shl 496 %ret = trunc i32 %or to i8 497 ret i8 %ret 498} 499 500; A non-power-of-2 destination type can't be masked as above. 501 502define i9 @rotate9_not_safe(i9 %v, i32 %shamt) { 503; CHECK-LABEL: @rotate9_not_safe( 504; CHECK-NEXT: [[CONV:%.*]] = zext i9 [[V:%.*]] to i32 505; CHECK-NEXT: [[SUB:%.*]] = sub i32 9, [[SHAMT:%.*]] 506; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[CONV]], [[SUB]] 507; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[CONV]], [[SHAMT]] 508; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] 509; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[OR]] to i9 510; CHECK-NEXT: ret i9 [[RET]] 511; 512 %conv = zext i9 %v to i32 513 %sub = sub i32 9, %shamt 514 %shr = lshr i32 %conv, %sub 515 %shl = shl i32 %conv, %shamt 516 %or = or i32 %shr, %shl 517 %ret = trunc i32 %or to i9 518 ret i9 %ret 519} 520 521; We should narrow (v << (s & 15)) | (v >> (-s & 15)) 522; when both v and s have been promoted. 523 524define i16 @rotateleft_16_neg_mask(i16 %v, i16 %shamt) { 525; CHECK-LABEL: @rotateleft_16_neg_mask( 526; CHECK-NEXT: [[OR:%.*]] = call i16 @llvm.fshl.i16(i16 [[V:%.*]], i16 [[V]], i16 [[SHAMT:%.*]]) 527; CHECK-NEXT: ret i16 [[OR]] 528; 529 %neg = sub i16 0, %shamt 530 %lshamt = and i16 %shamt, 15 531 %lshamtconv = zext i16 %lshamt to i32 532 %rshamt = and i16 %neg, 15 533 %rshamtconv = zext i16 %rshamt to i32 534 %conv = zext i16 %v to i32 535 %shl = shl i32 %conv, %lshamtconv 536 %shr = lshr i32 %conv, %rshamtconv 537 %or = or i32 %shr, %shl 538 %ret = trunc i32 %or to i16 539 ret i16 %ret 540} 541 542define i16 @rotateleft_16_neg_mask_commute(i16 %v, i16 %shamt) { 543; CHECK-LABEL: @rotateleft_16_neg_mask_commute( 544; CHECK-NEXT: [[OR:%.*]] = call i16 @llvm.fshl.i16(i16 [[V:%.*]], i16 [[V]], i16 [[SHAMT:%.*]]) 545; CHECK-NEXT: ret i16 [[OR]] 546; 547 %neg = sub i16 0, %shamt 548 %lshamt = and i16 %shamt, 15 549 %lshamtconv = zext i16 %lshamt to i32 550 %rshamt = and i16 %neg, 15 551 %rshamtconv = zext i16 %rshamt to i32 552 %conv = zext i16 %v to i32 553 %shl = shl i32 %conv, %lshamtconv 554 %shr = lshr i32 %conv, %rshamtconv 555 %or = or i32 %shl, %shr 556 %ret = trunc i32 %or to i16 557 ret i16 %ret 558} 559 560define i8 @rotateright_8_neg_mask(i8 %v, i8 %shamt) { 561; CHECK-LABEL: @rotateright_8_neg_mask( 562; CHECK-NEXT: [[OR:%.*]] = call i8 @llvm.fshr.i8(i8 [[V:%.*]], i8 [[V]], i8 [[SHAMT:%.*]]) 563; CHECK-NEXT: ret i8 [[OR]] 564; 565 %neg = sub i8 0, %shamt 566 %rshamt = and i8 %shamt, 7 567 %rshamtconv = zext i8 %rshamt to i32 568 %lshamt = and i8 %neg, 7 569 %lshamtconv = zext i8 %lshamt to i32 570 %conv = zext i8 %v to i32 571 %shl = shl i32 %conv, %lshamtconv 572 %shr = lshr i32 %conv, %rshamtconv 573 %or = or i32 %shr, %shl 574 %ret = trunc i32 %or to i8 575 ret i8 %ret 576} 577 578define i8 @rotateright_8_neg_mask_commute(i8 %v, i8 %shamt) { 579; CHECK-LABEL: @rotateright_8_neg_mask_commute( 580; CHECK-NEXT: [[OR:%.*]] = call i8 @llvm.fshr.i8(i8 [[V:%.*]], i8 [[V]], i8 [[SHAMT:%.*]]) 581; CHECK-NEXT: ret i8 [[OR]] 582; 583 %neg = sub i8 0, %shamt 584 %rshamt = and i8 %shamt, 7 585 %rshamtconv = zext i8 %rshamt to i32 586 %lshamt = and i8 %neg, 7 587 %lshamtconv = zext i8 %lshamt to i32 588 %conv = zext i8 %v to i32 589 %shl = shl i32 %conv, %lshamtconv 590 %shr = lshr i32 %conv, %rshamtconv 591 %or = or i32 %shl, %shr 592 %ret = trunc i32 %or to i8 593 ret i8 %ret 594} 595 596; The shift amount may already be in the wide type, 597; so we need to truncate it going into the rotate pattern. 598 599define i16 @rotateright_16_neg_mask_wide_amount(i16 %v, i32 %shamt) { 600; CHECK-LABEL: @rotateright_16_neg_mask_wide_amount( 601; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i16 602; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.fshr.i16(i16 [[V:%.*]], i16 [[V]], i16 [[TMP1]]) 603; CHECK-NEXT: ret i16 [[RET]] 604; 605 %neg = sub i32 0, %shamt 606 %rshamt = and i32 %shamt, 15 607 %lshamt = and i32 %neg, 15 608 %conv = zext i16 %v to i32 609 %shl = shl i32 %conv, %lshamt 610 %shr = lshr i32 %conv, %rshamt 611 %or = or i32 %shr, %shl 612 %ret = trunc i32 %or to i16 613 ret i16 %ret 614} 615 616define i16 @rotateright_16_neg_mask_wide_amount_commute(i16 %v, i32 %shamt) { 617; CHECK-LABEL: @rotateright_16_neg_mask_wide_amount_commute( 618; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i16 619; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.fshr.i16(i16 [[V:%.*]], i16 [[V]], i16 [[TMP1]]) 620; CHECK-NEXT: ret i16 [[RET]] 621; 622 %neg = sub i32 0, %shamt 623 %rshamt = and i32 %shamt, 15 624 %lshamt = and i32 %neg, 15 625 %conv = zext i16 %v to i32 626 %shl = shl i32 %conv, %lshamt 627 %shr = lshr i32 %conv, %rshamt 628 %or = or i32 %shl, %shr 629 %ret = trunc i32 %or to i16 630 ret i16 %ret 631} 632 633define i64 @rotateright_64_zext_neg_mask_amount(i64 %0, i32 %1) { 634; CHECK-LABEL: @rotateright_64_zext_neg_mask_amount( 635; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP1:%.*]] to i64 636; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP3]]) 637; CHECK-NEXT: ret i64 [[TMP4]] 638; 639 %3 = and i32 %1, 63 640 %4 = zext i32 %3 to i64 641 %5 = lshr i64 %0, %4 642 %6 = sub nsw i32 0, %1 643 %7 = and i32 %6, 63 644 %8 = zext i32 %7 to i64 645 %9 = shl i64 %0, %8 646 %10 = or i64 %5, %9 647 ret i64 %10 648} 649 650define i8 @rotateleft_8_neg_mask_wide_amount(i8 %v, i32 %shamt) { 651; CHECK-LABEL: @rotateleft_8_neg_mask_wide_amount( 652; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i8 653; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.fshl.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]]) 654; CHECK-NEXT: ret i8 [[RET]] 655; 656 %neg = sub i32 0, %shamt 657 %lshamt = and i32 %shamt, 7 658 %rshamt = and i32 %neg, 7 659 %conv = zext i8 %v to i32 660 %shl = shl i32 %conv, %lshamt 661 %shr = lshr i32 %conv, %rshamt 662 %or = or i32 %shr, %shl 663 %ret = trunc i32 %or to i8 664 ret i8 %ret 665} 666 667define i8 @rotateleft_8_neg_mask_wide_amount_commute(i8 %v, i32 %shamt) { 668; CHECK-LABEL: @rotateleft_8_neg_mask_wide_amount_commute( 669; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i8 670; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.fshl.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]]) 671; CHECK-NEXT: ret i8 [[RET]] 672; 673 %neg = sub i32 0, %shamt 674 %lshamt = and i32 %shamt, 7 675 %rshamt = and i32 %neg, 7 676 %conv = zext i8 %v to i32 677 %shl = shl i32 %conv, %lshamt 678 %shr = lshr i32 %conv, %rshamt 679 %or = or i32 %shl, %shr 680 %ret = trunc i32 %or to i8 681 ret i8 %ret 682} 683 684define i64 @rotateleft_64_zext_neg_mask_amount(i64 %0, i32 %1) { 685; CHECK-LABEL: @rotateleft_64_zext_neg_mask_amount( 686; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP1:%.*]] to i64 687; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.fshl.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP3]]) 688; CHECK-NEXT: ret i64 [[TMP4]] 689; 690 %3 = and i32 %1, 63 691 %4 = zext i32 %3 to i64 692 %5 = shl i64 %0, %4 693 %6 = sub nsw i32 0, %1 694 %7 = and i32 %6, 63 695 %8 = zext i32 %7 to i64 696 %9 = lshr i64 %0, %8 697 %10 = or i64 %5, %9 698 ret i64 %10 699} 700 701; Non-power-of-2 types. This could be transformed, but it's not a typical rotate pattern. 702 703define i9 @rotateleft_9_neg_mask_wide_amount_commute(i9 %v, i33 %shamt) { 704; CHECK-LABEL: @rotateleft_9_neg_mask_wide_amount_commute( 705; CHECK-NEXT: [[NEG:%.*]] = sub i33 0, [[SHAMT:%.*]] 706; CHECK-NEXT: [[LSHAMT:%.*]] = and i33 [[SHAMT]], 8 707; CHECK-NEXT: [[RSHAMT:%.*]] = and i33 [[NEG]], 8 708; CHECK-NEXT: [[CONV:%.*]] = zext i9 [[V:%.*]] to i33 709; CHECK-NEXT: [[SHL:%.*]] = shl nuw nsw i33 [[CONV]], [[LSHAMT]] 710; CHECK-NEXT: [[SHR:%.*]] = lshr i33 [[CONV]], [[RSHAMT]] 711; CHECK-NEXT: [[OR:%.*]] = or i33 [[SHL]], [[SHR]] 712; CHECK-NEXT: [[RET:%.*]] = trunc i33 [[OR]] to i9 713; CHECK-NEXT: ret i9 [[RET]] 714; 715 %neg = sub i33 0, %shamt 716 %lshamt = and i33 %shamt, 8 717 %rshamt = and i33 %neg, 8 718 %conv = zext i9 %v to i33 719 %shl = shl i33 %conv, %lshamt 720 %shr = lshr i33 %conv, %rshamt 721 %or = or i33 %shl, %shr 722 %ret = trunc i33 %or to i9 723 ret i9 %ret 724} 725 726; Fold or(shl(v,x),lshr(v,bw-x)) iff x < bw 727 728define i64 @rotl_sub_mask(i64 %0, i64 %1) { 729; CHECK-LABEL: @rotl_sub_mask( 730; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.fshl.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP1:%.*]]) 731; CHECK-NEXT: ret i64 [[TMP3]] 732; 733 %3 = and i64 %1, 63 734 %4 = shl i64 %0, %3 735 %5 = sub nuw nsw i64 64, %3 736 %6 = lshr i64 %0, %5 737 %7 = or i64 %6, %4 738 ret i64 %7 739} 740 741; Fold or(lshr(v,x),shl(v,bw-x)) iff x < bw 742 743define i64 @rotr_sub_mask(i64 %0, i64 %1) { 744; CHECK-LABEL: @rotr_sub_mask( 745; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP1:%.*]]) 746; CHECK-NEXT: ret i64 [[TMP3]] 747; 748 %3 = and i64 %1, 63 749 %4 = lshr i64 %0, %3 750 %5 = sub nuw nsw i64 64, %3 751 %6 = shl i64 %0, %5 752 %7 = or i64 %6, %4 753 ret i64 %7 754} 755 756define <2 x i64> @rotr_sub_mask_vector(<2 x i64> %0, <2 x i64> %1) { 757; CHECK-LABEL: @rotr_sub_mask_vector( 758; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> [[TMP0:%.*]], <2 x i64> [[TMP0]], <2 x i64> [[TMP1:%.*]]) 759; CHECK-NEXT: ret <2 x i64> [[TMP3]] 760; 761 %3 = and <2 x i64> %1, <i64 63, i64 63> 762 %4 = lshr <2 x i64> %0, %3 763 %5 = sub nuw nsw <2 x i64> <i64 64, i64 64>, %3 764 %6 = shl <2 x i64> %0, %5 765 %7 = or <2 x i64> %6, %4 766 ret <2 x i64> %7 767} 768 769; Convert select pattern to masked shift that ends in 'or'. 770 771define i32 @rotr_select(i32 %x, i32 %shamt) { 772; CHECK-LABEL: @rotr_select( 773; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[SHAMT:%.*]]) 774; CHECK-NEXT: ret i32 [[R]] 775; 776 %cmp = icmp eq i32 %shamt, 0 777 %sub = sub i32 32, %shamt 778 %shr = lshr i32 %x, %shamt 779 %shl = shl i32 %x, %sub 780 %or = or i32 %shr, %shl 781 %r = select i1 %cmp, i32 %x, i32 %or 782 ret i32 %r 783} 784 785; Convert select pattern to masked shift that ends in 'or'. 786 787define i8 @rotr_select_commute(i8 %x, i8 %shamt) { 788; CHECK-LABEL: @rotr_select_commute( 789; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.fshr.i8(i8 [[X:%.*]], i8 [[X]], i8 [[SHAMT:%.*]]) 790; CHECK-NEXT: ret i8 [[R]] 791; 792 %cmp = icmp eq i8 %shamt, 0 793 %sub = sub i8 8, %shamt 794 %shr = lshr i8 %x, %shamt 795 %shl = shl i8 %x, %sub 796 %or = or i8 %shl, %shr 797 %r = select i1 %cmp, i8 %x, i8 %or 798 ret i8 %r 799} 800 801; Convert select pattern to masked shift that ends in 'or'. 802 803define i16 @rotl_select(i16 %x, i16 %shamt) { 804; CHECK-LABEL: @rotl_select( 805; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[SHAMT:%.*]]) 806; CHECK-NEXT: ret i16 [[R]] 807; 808 %cmp = icmp eq i16 %shamt, 0 809 %sub = sub i16 16, %shamt 810 %shr = lshr i16 %x, %sub 811 %shl = shl i16 %x, %shamt 812 %or = or i16 %shr, %shl 813 %r = select i1 %cmp, i16 %x, i16 %or 814 ret i16 %r 815} 816 817; Convert select pattern to masked shift that ends in 'or'. 818 819define <2 x i64> @rotl_select_commute(<2 x i64> %x, <2 x i64> %shamt) { 820; CHECK-LABEL: @rotl_select_commute( 821; CHECK-NEXT: [[R:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[X:%.*]], <2 x i64> [[X]], <2 x i64> [[SHAMT:%.*]]) 822; CHECK-NEXT: ret <2 x i64> [[R]] 823; 824 %cmp = icmp eq <2 x i64> %shamt, zeroinitializer 825 %sub = sub <2 x i64> <i64 64, i64 64>, %shamt 826 %shr = lshr <2 x i64> %x, %sub 827 %shl = shl <2 x i64> %x, %shamt 828 %or = or <2 x i64> %shl, %shr 829 %r = select <2 x i1> %cmp, <2 x i64> %x, <2 x i64> %or 830 ret <2 x i64> %r 831} 832 833; Negative test - the transform is only valid with power-of-2 types. 834 835define i24 @rotl_select_weird_type(i24 %x, i24 %shamt) { 836; CHECK-LABEL: @rotl_select_weird_type( 837; CHECK-NEXT: [[CMP:%.*]] = icmp eq i24 [[SHAMT:%.*]], 0 838; CHECK-NEXT: [[SUB:%.*]] = sub i24 24, [[SHAMT]] 839; CHECK-NEXT: [[SHR:%.*]] = lshr i24 [[X:%.*]], [[SUB]] 840; CHECK-NEXT: [[SHL:%.*]] = shl i24 [[X]], [[SHAMT]] 841; CHECK-NEXT: [[OR:%.*]] = or disjoint i24 [[SHL]], [[SHR]] 842; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i24 [[X]], i24 [[OR]] 843; CHECK-NEXT: ret i24 [[R]] 844; 845 %cmp = icmp eq i24 %shamt, 0 846 %sub = sub i24 24, %shamt 847 %shr = lshr i24 %x, %sub 848 %shl = shl i24 %x, %shamt 849 %or = or i24 %shl, %shr 850 %r = select i1 %cmp, i24 %x, i24 %or 851 ret i24 %r 852} 853 854define i32 @rotl_select_zext_shamt(i32 %x, i8 %y) { 855; CHECK-LABEL: @rotl_select_zext_shamt( 856; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[Y:%.*]] to i32 857; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[TMP1]]) 858; CHECK-NEXT: ret i32 [[R]] 859; 860 %rem = and i8 %y, 31 861 %cmp = icmp eq i8 %rem, 0 862 %sh_prom = zext i8 %rem to i32 863 %sub = sub nuw nsw i8 32, %rem 864 %sh_prom1 = zext i8 %sub to i32 865 %shr = lshr i32 %x, %sh_prom1 866 %shl = shl i32 %x, %sh_prom 867 %or = or i32 %shl, %shr 868 %r = select i1 %cmp, i32 %x, i32 %or 869 ret i32 %r 870} 871 872define i64 @rotr_select_zext_shamt(i64 %x, i32 %y) { 873; CHECK-LABEL: @rotr_select_zext_shamt( 874; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[Y:%.*]] to i64 875; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[TMP1]]) 876; CHECK-NEXT: ret i64 [[R]] 877; 878 %rem = and i32 %y, 63 879 %cmp = icmp eq i32 %rem, 0 880 %sh_prom = zext i32 %rem to i64 881 %shr = lshr i64 %x, %sh_prom 882 %sub = sub nuw nsw i32 64, %rem 883 %sh_prom1 = zext i32 %sub to i64 884 %shl = shl i64 %x, %sh_prom1 885 %or = or i64 %shl, %shr 886 %r = select i1 %cmp, i64 %x, i64 %or 887 ret i64 %r 888} 889 890; Test that the transform doesn't crash when there's an "or" with a ConstantExpr operand. 891 892@external_global = external global i8 893 894define i32 @rotl_constant_expr(i32 %shamt) { 895; CHECK-LABEL: @rotl_constant_expr( 896; CHECK-NEXT: [[SHR:%.*]] = lshr i32 ptrtoint (ptr @external_global to i32), [[SHAMT:%.*]] 897; CHECK-NEXT: [[SHL:%.*]] = shl i32 ptrtoint (ptr @external_global to i32), 11 898; CHECK-NEXT: [[R:%.*]] = or i32 [[SHR]], [[SHL]] 899; CHECK-NEXT: ret i32 [[R]] 900; 901 %shr = lshr i32 ptrtoint (ptr @external_global to i32), %shamt 902 %shl = shl i32 ptrtoint (ptr @external_global to i32), 11 903 %r = or i32 %shr, %shl 904 ret i32 %r 905} 906 907; PR20750 - https://bugs.llvm.org/show_bug.cgi?id=20750 908; This IR corresponds to C source where the shift amount is a smaller type than the rotated value: 909; unsigned int rotate32_doubleand1(unsigned int v, unsigned char r) { r = r & 31; return (v << r) | (v >> (((32 - r)) & 31)); } 910 911define i32 @rotateleft32_doubleand1(i32 %v, i8 %r) { 912; CHECK-LABEL: @rotateleft32_doubleand1( 913; CHECK-NEXT: [[Z:%.*]] = zext i8 [[R:%.*]] to i32 914; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[V:%.*]], i32 [[V]], i32 [[Z]]) 915; CHECK-NEXT: ret i32 [[OR]] 916; 917 %m = and i8 %r, 31 918 %z = zext i8 %m to i32 919 %neg = sub nsw i32 0, %z 920 %and2 = and i32 %neg, 31 921 %shl = shl i32 %v, %z 922 %shr = lshr i32 %v, %and2 923 %or = or i32 %shr, %shl 924 ret i32 %or 925} 926 927define i32 @rotateright32_doubleand1(i32 %v, i16 %r) { 928; CHECK-LABEL: @rotateright32_doubleand1( 929; CHECK-NEXT: [[Z:%.*]] = zext i16 [[R:%.*]] to i32 930; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.fshr.i32(i32 [[V:%.*]], i32 [[V]], i32 [[Z]]) 931; CHECK-NEXT: ret i32 [[OR]] 932; 933 %m = and i16 %r, 31 934 %z = zext i16 %m to i32 935 %neg = sub nsw i32 0, %z 936 %and2 = and i32 %neg, 31 937 %shl = shl i32 %v, %and2 938 %shr = lshr i32 %v, %z 939 %or = or i32 %shr, %shl 940 ret i32 %or 941} 942 943; TODO: This should be a rotate (funnel-shift). 944 945define i8 @unmasked_shlop_unmasked_shift_amount(i32 %x, i32 %shamt) { 946; CHECK-LABEL: @unmasked_shlop_unmasked_shift_amount( 947; CHECK-NEXT: [[MASKX:%.*]] = and i32 [[X:%.*]], 255 948; CHECK-NEXT: [[T4:%.*]] = sub i32 8, [[SHAMT:%.*]] 949; CHECK-NEXT: [[T5:%.*]] = shl i32 [[X]], [[T4]] 950; CHECK-NEXT: [[T6:%.*]] = lshr i32 [[MASKX]], [[SHAMT]] 951; CHECK-NEXT: [[T7:%.*]] = or i32 [[T5]], [[T6]] 952; CHECK-NEXT: [[T8:%.*]] = trunc i32 [[T7]] to i8 953; CHECK-NEXT: ret i8 [[T8]] 954; 955 %maskx = and i32 %x, 255 956 %t4 = sub i32 8, %shamt 957 %t5 = shl i32 %x, %t4 958 %t6 = lshr i32 %maskx, %shamt 959 %t7 = or i32 %t5, %t6 960 %t8 = trunc i32 %t7 to i8 961 ret i8 %t8 962} 963 964define i16 @check_rotate_masked_16bit(i8 %shamt, i32 %cond) { 965; CHECK-LABEL: @check_rotate_masked_16bit( 966; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[SHAMT:%.*]] to i16 967; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[COND:%.*]] to i16 968; CHECK-NEXT: [[TMP3:%.*]] = and i16 [[TMP2]], 1 969; CHECK-NEXT: [[TRUNC:%.*]] = call i16 @llvm.fshr.i16(i16 [[TMP3]], i16 [[TMP3]], i16 [[TMP1]]) 970; CHECK-NEXT: ret i16 [[TRUNC]] 971; 972 %maskx = and i32 %cond, 1 973 %masky = and i8 %shamt, 15 974 %z = zext i8 %masky to i32 975 %shr = lshr i32 %maskx, %z 976 %sub = sub i8 0, %shamt 977 %maskw = and i8 %sub, 15 978 %z2 = zext i8 %maskw to i32 979 %shl = shl nuw nsw i32 %maskx, %z2 980 %or = or i32 %shr, %shl 981 %trunc = trunc i32 %or to i16 982 ret i16 %trunc 983} 984 985define i32 @rotl_i32_add(i32 %x, i32 %y) { 986; CHECK-LABEL: @rotl_i32_add( 987; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[Y:%.*]] 988; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y]] 989; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X]], [[SUB]] 990; CHECK-NEXT: [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]] 991; CHECK-NEXT: ret i32 [[R]] 992; 993 %sub = sub i32 32, %y 994 %shl = shl i32 %x, %y 995 %shr = lshr i32 %x, %sub 996 %r = add i32 %shr, %shl 997 ret i32 %r 998} 999 1000define i32 @rotr_i32_add(i32 %x, i32 %y) { 1001; CHECK-LABEL: @rotr_i32_add( 1002; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[Y:%.*]] 1003; CHECK-NEXT: [[SHL:%.*]] = lshr i32 [[X:%.*]], [[Y]] 1004; CHECK-NEXT: [[SHR:%.*]] = shl i32 [[X]], [[SUB]] 1005; CHECK-NEXT: [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]] 1006; CHECK-NEXT: ret i32 [[R]] 1007; 1008 %sub = sub i32 32, %y 1009 %shl = lshr i32 %x, %y 1010 %shr = shl i32 %x, %sub 1011 %r = add i32 %shr, %shl 1012 ret i32 %r 1013} 1014 1015define i32 @fshr_i32_add(i32 %x, i32 %y, i32 %z) { 1016; CHECK-LABEL: @fshr_i32_add( 1017; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[Z:%.*]] 1018; CHECK-NEXT: [[SHL:%.*]] = lshr i32 [[X:%.*]], [[Z]] 1019; CHECK-NEXT: [[SHR:%.*]] = shl i32 [[Y:%.*]], [[SUB]] 1020; CHECK-NEXT: [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]] 1021; CHECK-NEXT: ret i32 [[R]] 1022; 1023 %sub = sub i32 32, %z 1024 %shl = lshr i32 %x, %z 1025 %shr = shl i32 %y, %sub 1026 %r = add i32 %shr, %shl 1027 ret i32 %r 1028} 1029 1030define i32 @fshl_i32_add(i32 %x, i32 %y, i32 %z) { 1031; CHECK-LABEL: @fshl_i32_add( 1032; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[Z:%.*]] 1033; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[Y:%.*]], [[Z]] 1034; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], [[SUB]] 1035; CHECK-NEXT: [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]] 1036; CHECK-NEXT: ret i32 [[R]] 1037; 1038 %sub = sub i32 32, %z 1039 %shl = shl i32 %y, %z 1040 %shr = lshr i32 %x, %sub 1041 %r = add i32 %shr, %shl 1042 ret i32 %r 1043} 1044 1045define i32 @rotl_i32_add_greater(i32 %x, i32 %y) { 1046; CHECK-LABEL: @rotl_i32_add_greater( 1047; CHECK-NEXT: [[SUB:%.*]] = sub i32 33, [[Y:%.*]] 1048; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y]] 1049; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X]], [[SUB]] 1050; CHECK-NEXT: [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]] 1051; CHECK-NEXT: ret i32 [[R]] 1052; 1053 %sub = sub i32 33, %y 1054 %shl = shl i32 %x, %y 1055 %shr = lshr i32 %x, %sub 1056 %r = add i32 %shr, %shl 1057 ret i32 %r 1058} 1059 1060define i32 @rotr_i32_add_greater(i32 %x, i32 %y) { 1061; CHECK-LABEL: @rotr_i32_add_greater( 1062; CHECK-NEXT: [[SUB:%.*]] = sub i32 34, [[Y:%.*]] 1063; CHECK-NEXT: [[SHL:%.*]] = lshr i32 [[X:%.*]], [[Y]] 1064; CHECK-NEXT: [[SHR:%.*]] = shl i32 [[X]], [[SUB]] 1065; CHECK-NEXT: [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]] 1066; CHECK-NEXT: ret i32 [[R]] 1067; 1068 %sub = sub i32 34, %y 1069 %shl = lshr i32 %x, %y 1070 %shr = shl i32 %x, %sub 1071 %r = add i32 %shr, %shl 1072 ret i32 %r 1073} 1074 1075define i32 @not_rotl_i32_add_less(i32 %x, i32 %y) { 1076; CHECK-LABEL: @not_rotl_i32_add_less( 1077; CHECK-NEXT: [[SUB:%.*]] = sub i32 31, [[Y:%.*]] 1078; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y]] 1079; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X]], [[SUB]] 1080; CHECK-NEXT: [[R:%.*]] = add i32 [[SHR]], [[SHL]] 1081; CHECK-NEXT: ret i32 [[R]] 1082; 1083 %sub = sub i32 31, %y 1084 %shl = shl i32 %x, %y 1085 %shr = lshr i32 %x, %sub 1086 %r = add i32 %shr, %shl 1087 ret i32 %r 1088} 1089