1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=instcombine -S | FileCheck %s 3 4; Try to eliminate binops and shuffles when the shuffle is a select in disguise: 5; PR37806 - https://bugs.llvm.org/show_bug.cgi?id=37806 6 7define <4 x i32> @add(<4 x i32> %v) { 8; CHECK-LABEL: @add( 9; CHECK-NEXT: [[S:%.*]] = add <4 x i32> [[V:%.*]], <i32 11, i32 0, i32 13, i32 0> 10; CHECK-NEXT: ret <4 x i32> [[S]] 11; 12 %b = add <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 13 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 14 ret <4 x i32> %s 15} 16 17; Propagate flags when possible. 18 19define <4 x i32> @add_nuw_nsw(<4 x i32> %v) { 20; CHECK-LABEL: @add_nuw_nsw( 21; CHECK-NEXT: [[S:%.*]] = add nuw nsw <4 x i32> [[V:%.*]], <i32 11, i32 0, i32 13, i32 0> 22; CHECK-NEXT: ret <4 x i32> [[S]] 23; 24 %b = add nuw nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 25 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 26 ret <4 x i32> %s 27} 28 29define <4 x i32> @add_undef_mask_elt(<4 x i32> %v) { 30; CHECK-LABEL: @add_undef_mask_elt( 31; CHECK-NEXT: [[S:%.*]] = add <4 x i32> [[V:%.*]], <i32 11, i32 0, i32 undef, i32 0> 32; CHECK-NEXT: ret <4 x i32> [[S]] 33; 34 %b = add <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 35 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 5, i32 undef, i32 7> 36 ret <4 x i32> %s 37} 38 39; Poison flags must be dropped or undef must be replaced with safe constant. 40 41define <4 x i32> @add_nuw_nsw_undef_mask_elt(<4 x i32> %v) { 42; CHECK-LABEL: @add_nuw_nsw_undef_mask_elt( 43; CHECK-NEXT: [[S:%.*]] = add <4 x i32> [[V:%.*]], <i32 11, i32 undef, i32 13, i32 0> 44; CHECK-NEXT: ret <4 x i32> [[S]] 45; 46 %b = add nuw nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 47 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 undef, i32 2, i32 7> 48 ret <4 x i32> %s 49} 50 51; Constant operand 0 (LHS) could work for some non-commutative binops? 52 53define <4 x i32> @sub(<4 x i32> %v) { 54; CHECK-LABEL: @sub( 55; CHECK-NEXT: [[B:%.*]] = sub <4 x i32> <i32 poison, i32 poison, i32 poison, i32 14>, [[V:%.*]] 56; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> 57; CHECK-NEXT: ret <4 x i32> [[S]] 58; 59 %b = sub <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v 60 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 61 ret <4 x i32> %s 62} 63 64; If any element of the shuffle mask operand is undef, that element of the result is undef. 65; The shuffle is eliminated in this transform, but we can replace a constant element with undef. 66; Preserve flags when possible. It's not safe to propagate poison-generating flags with undef constants. 67 68define <4 x i32> @mul(<4 x i32> %v) { 69; CHECK-LABEL: @mul( 70; CHECK-NEXT: [[S:%.*]] = mul <4 x i32> [[V:%.*]], <i32 undef, i32 12, i32 1, i32 14> 71; CHECK-NEXT: ret <4 x i32> [[S]] 72; 73 %b = mul nsw nuw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 74 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 undef, i32 5, i32 2, i32 7> 75 ret <4 x i32> %s 76} 77 78define <4 x i32> @shl(<4 x i32> %v) { 79; CHECK-LABEL: @shl( 80; CHECK-NEXT: [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0> 81; CHECK-NEXT: ret <4 x i32> [[S]] 82; 83 %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 84 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 4, i32 1, i32 2, i32 7> 85 ret <4 x i32> %s 86} 87 88define <4 x i32> @shl_nsw(<4 x i32> %v) { 89; CHECK-LABEL: @shl_nsw( 90; CHECK-NEXT: [[S:%.*]] = shl nsw <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0> 91; CHECK-NEXT: ret <4 x i32> [[S]] 92; 93 %b = shl nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 94 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 4, i32 1, i32 2, i32 7> 95 ret <4 x i32> %s 96} 97 98define <4 x i32> @shl_undef_mask_elt(<4 x i32> %v) { 99; CHECK-LABEL: @shl_undef_mask_elt( 100; CHECK-NEXT: [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0> 101; CHECK-NEXT: ret <4 x i32> [[S]] 102; 103 %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 104 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 1, i32 2, i32 7> 105 ret <4 x i32> %s 106} 107 108define <4 x i32> @shl_nuw_undef_mask_elt(<4 x i32> %v) { 109; CHECK-LABEL: @shl_nuw_undef_mask_elt( 110; CHECK-NEXT: [[S:%.*]] = shl nuw <4 x i32> [[V:%.*]], <i32 0, i32 0, i32 13, i32 0> 111; CHECK-NEXT: ret <4 x i32> [[S]] 112; 113 %b = shl nuw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 114 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef> 115 ret <4 x i32> %s 116} 117 118define <4 x i32> @lshr_constant_op0(<4 x i32> %v) { 119; CHECK-LABEL: @lshr_constant_op0( 120; CHECK-NEXT: [[S:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 0, i32 14> 121; CHECK-NEXT: ret <4 x i32> [[S]] 122; 123 %b = lshr <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 124 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 125 ret <4 x i32> %s 126} 127 128define <4 x i32> @lshr_exact_constant_op0(<4 x i32> %v) { 129; CHECK-LABEL: @lshr_exact_constant_op0( 130; CHECK-NEXT: [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 0, i32 14> 131; CHECK-NEXT: ret <4 x i32> [[S]] 132; 133 %b = lshr exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 134 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 135 ret <4 x i32> %s 136} 137 138define <4 x i32> @lshr_undef_mask_elt(<4 x i32> %v) { 139; CHECK-LABEL: @lshr_undef_mask_elt( 140; CHECK-NEXT: [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0> 141; CHECK-NEXT: ret <4 x i32> [[S]] 142; 143 %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 144 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 1, i32 2, i32 7> 145 ret <4 x i32> %s 146} 147 148define <4 x i32> @lshr_exact_undef_mask_elt(<4 x i32> %v) { 149; CHECK-LABEL: @lshr_exact_undef_mask_elt( 150; CHECK-NEXT: [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 0, i32 0, i32 13, i32 0> 151; CHECK-NEXT: ret <4 x i32> [[S]] 152; 153 %b = lshr exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 154 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef> 155 ret <4 x i32> %s 156} 157 158define <4 x i32> @lshr_constant_op1(<4 x i32> %v) { 159; CHECK-LABEL: @lshr_constant_op1( 160; CHECK-NEXT: [[B:%.*]] = lshr exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]] 161; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 3> 162; CHECK-NEXT: ret <4 x i32> [[S]] 163; 164 %b = lshr exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v 165 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 166 ret <4 x i32> %s 167} 168 169; Try weird types. 170 171define <3 x i32> @ashr(<3 x i32> %v) { 172; CHECK-LABEL: @ashr( 173; CHECK-NEXT: [[S:%.*]] = ashr <3 x i32> [[V:%.*]], <i32 0, i32 12, i32 13> 174; CHECK-NEXT: ret <3 x i32> [[S]] 175; 176 %b = ashr <3 x i32> %v, <i32 11, i32 12, i32 13> 177 %s = shufflevector <3 x i32> %b, <3 x i32> %v, <3 x i32> <i32 3, i32 1, i32 2> 178 ret <3 x i32> %s 179} 180 181define <3 x i42> @and(<3 x i42> %v) { 182; CHECK-LABEL: @and( 183; CHECK-NEXT: [[S:%.*]] = and <3 x i42> [[V:%.*]], <i42 -1, i42 12, i42 undef> 184; CHECK-NEXT: ret <3 x i42> [[S]] 185; 186 %b = and <3 x i42> %v, <i42 11, i42 12, i42 13> 187 %s = shufflevector <3 x i42> %v, <3 x i42> %b, <3 x i32> <i32 0, i32 4, i32 undef> 188 ret <3 x i42> %s 189} 190 191; It doesn't matter if the intermediate op has extra uses. 192 193declare void @use_v4i32(<4 x i32>) 194 195define <4 x i32> @or(<4 x i32> %v) { 196; CHECK-LABEL: @or( 197; CHECK-NEXT: [[B:%.*]] = or <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14> 198; CHECK-NEXT: [[S:%.*]] = or <4 x i32> [[V]], <i32 0, i32 0, i32 13, i32 14> 199; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[B]]) 200; CHECK-NEXT: ret <4 x i32> [[S]] 201; 202 %b = or <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 203 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 204 call void @use_v4i32(<4 x i32> %b) 205 ret <4 x i32> %s 206} 207 208define <4 x i32> @xor(<4 x i32> %v) { 209; CHECK-LABEL: @xor( 210; CHECK-NEXT: [[S:%.*]] = xor <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 0, i32 0> 211; CHECK-NEXT: ret <4 x i32> [[S]] 212; 213 %b = xor <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 214 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 215 ret <4 x i32> %s 216} 217 218define <4 x i32> @udiv(<4 x i32> %v) { 219; CHECK-LABEL: @udiv( 220; CHECK-NEXT: [[B:%.*]] = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]] 221; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> 222; CHECK-NEXT: ret <4 x i32> [[S]] 223; 224 %b = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v 225 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 226 ret <4 x i32> %s 227} 228 229define <4 x i32> @udiv_exact(<4 x i32> %v) { 230; CHECK-LABEL: @udiv_exact( 231; CHECK-NEXT: [[B:%.*]] = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]] 232; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> 233; CHECK-NEXT: ret <4 x i32> [[S]] 234; 235 %b = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v 236 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 237 ret <4 x i32> %s 238} 239 240define <4 x i32> @udiv_undef_mask_elt(<4 x i32> %v) { 241; CHECK-LABEL: @udiv_undef_mask_elt( 242; CHECK-NEXT: [[B:%.*]] = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]] 243; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 poison, i32 2, i32 7> 244; CHECK-NEXT: ret <4 x i32> [[S]] 245; 246 %b = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v 247 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 undef, i32 2, i32 7> 248 ret <4 x i32> %s 249} 250 251define <4 x i32> @udiv_exact_undef_mask_elt(<4 x i32> %v) { 252; CHECK-LABEL: @udiv_exact_undef_mask_elt( 253; CHECK-NEXT: [[B:%.*]] = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]] 254; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 poison, i32 2, i32 7> 255; CHECK-NEXT: ret <4 x i32> [[S]] 256; 257 %b = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v 258 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 undef, i32 2, i32 7> 259 ret <4 x i32> %s 260} 261 262define <4 x i32> @sdiv(<4 x i32> %v) { 263; CHECK-LABEL: @sdiv( 264; CHECK-NEXT: [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 11, i32 1, i32 13, i32 1> 265; CHECK-NEXT: ret <4 x i32> [[S]] 266; 267 %b = sdiv <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 268 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 269 ret <4 x i32> %s 270} 271 272define <4 x i32> @sdiv_exact(<4 x i32> %v) { 273; CHECK-LABEL: @sdiv_exact( 274; CHECK-NEXT: [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 11, i32 1, i32 13, i32 1> 275; CHECK-NEXT: ret <4 x i32> [[S]] 276; 277 %b = sdiv exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 278 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 279 ret <4 x i32> %s 280} 281 282; Div/rem need special handling if the shuffle has undef elements. 283 284define <4 x i32> @sdiv_undef_mask_elt(<4 x i32> %v) { 285; CHECK-LABEL: @sdiv_undef_mask_elt( 286; CHECK-NEXT: [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 13, i32 1> 287; CHECK-NEXT: ret <4 x i32> [[S]] 288; 289 %b = sdiv <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 290 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef> 291 ret <4 x i32> %s 292} 293 294define <4 x i32> @sdiv_exact_undef_mask_elt(<4 x i32> %v) { 295; CHECK-LABEL: @sdiv_exact_undef_mask_elt( 296; CHECK-NEXT: [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 13, i32 1> 297; CHECK-NEXT: ret <4 x i32> [[S]] 298; 299 %b = sdiv exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 300 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef> 301 ret <4 x i32> %s 302} 303 304define <4 x i32> @urem(<4 x i32> %v) { 305; CHECK-LABEL: @urem( 306; CHECK-NEXT: [[B:%.*]] = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]] 307; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 308; CHECK-NEXT: ret <4 x i32> [[S]] 309; 310 %b = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v 311 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 312 ret <4 x i32> %s 313} 314 315define <4 x i32> @urem_undef_mask_elt(<4 x i32> %v) { 316; CHECK-LABEL: @urem_undef_mask_elt( 317; CHECK-NEXT: [[B:%.*]] = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]] 318; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 poison> 319; CHECK-NEXT: ret <4 x i32> [[S]] 320; 321 %b = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v 322 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 undef> 323 ret <4 x i32> %s 324} 325 326define <4 x i32> @srem(<4 x i32> %v) { 327; CHECK-LABEL: @srem( 328; CHECK-NEXT: [[B:%.*]] = srem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]] 329; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 1, i32 6, i32 3> 330; CHECK-NEXT: ret <4 x i32> [[S]] 331; 332 %b = srem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v 333 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 334 ret <4 x i32> %s 335} 336 337; Try FP ops/types. 338 339define <4 x float> @fadd_maybe_nan(<4 x float> %v) { 340; CHECK-LABEL: @fadd_maybe_nan( 341; CHECK-NEXT: [[B:%.*]] = fadd <4 x float> [[V:%.*]], <float 4.100000e+01, float 4.200000e+01, float poison, float poison> 342; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[B]], <4 x float> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 343; CHECK-NEXT: ret <4 x float> [[S]] 344; 345 %b = fadd <4 x float> %v, <float 41.0, float 42.0, float 43.0, float 44.0> 346 %s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 347 ret <4 x float> %s 348} 349 350define <4 x float> @fadd(<4 x float> nofpclass(nan) %v) { 351; CHECK-LABEL: @fadd( 352; CHECK-NEXT: [[S:%.*]] = fadd <4 x float> [[V:%.*]], <float 4.100000e+01, float 4.200000e+01, float -0.000000e+00, float -0.000000e+00> 353; CHECK-NEXT: ret <4 x float> [[S]] 354; 355 %b = fadd <4 x float> %v, <float 41.0, float 42.0, float 43.0, float 44.0> 356 %s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 357 ret <4 x float> %s 358} 359 360define <4 x double> @fsub(<4 x double> %v) { 361; CHECK-LABEL: @fsub( 362; CHECK-NEXT: [[B:%.*]] = fsub <4 x double> <double poison, double poison, double 4.300000e+01, double 4.400000e+01>, [[V:%.*]] 363; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> <i32 poison, i32 1, i32 6, i32 7> 364; CHECK-NEXT: ret <4 x double> [[S]] 365; 366 %b = fsub <4 x double> <double 41.0, double 42.0, double 43.0, double 44.0>, %v 367 %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 7> 368 ret <4 x double> %s 369} 370 371; Propagate any FMF. 372 373define <4 x float> @fmul(<4 x float> nofpclass(nan) %v) { 374; CHECK-LABEL: @fmul( 375; CHECK-NEXT: [[S:%.*]] = fmul nnan ninf <4 x float> [[V:%.*]], <float 4.100000e+01, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> 376; CHECK-NEXT: ret <4 x float> [[S]] 377; 378 %b = fmul nnan ninf <4 x float> %v, <float 41.0, float 42.0, float 43.0, float 44.0> 379 %s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 380 ret <4 x float> %s 381} 382 383define <4 x double> @fdiv_constant_op0(<4 x double> %v) { 384; CHECK-LABEL: @fdiv_constant_op0( 385; CHECK-NEXT: [[B:%.*]] = fdiv fast <4 x double> <double poison, double poison, double 4.300000e+01, double 4.400000e+01>, [[V:%.*]] 386; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> <i32 poison, i32 1, i32 6, i32 7> 387; CHECK-NEXT: ret <4 x double> [[S]] 388; 389 %b = fdiv fast <4 x double> <double 41.0, double 42.0, double 43.0, double 44.0>, %v 390 %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 7> 391 ret <4 x double> %s 392} 393 394define <4 x double> @fdiv_constant_op1(<4 x double> nofpclass(nan) %v) { 395; CHECK-LABEL: @fdiv_constant_op1( 396; CHECK-NEXT: [[S:%.*]] = fdiv reassoc <4 x double> [[V:%.*]], <double undef, double 1.000000e+00, double 4.300000e+01, double 4.400000e+01> 397; CHECK-NEXT: ret <4 x double> [[S]] 398; 399 %b = fdiv reassoc <4 x double> %v, <double 41.0, double 42.0, double 43.0, double 44.0> 400 %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 7> 401 ret <4 x double> %s 402} 403 404define <4 x double> @frem(<4 x double> %v) { 405; CHECK-LABEL: @frem( 406; CHECK-NEXT: [[B:%.*]] = frem <4 x double> <double 4.100000e+01, double 4.200000e+01, double poison, double poison>, [[V:%.*]] 407; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 408; CHECK-NEXT: ret <4 x double> [[S]] 409; 410 %b = frem <4 x double> <double 41.0, double 42.0, double 43.0, double 44.0>, %v 411 %s = shufflevector <4 x double> %b, <4 x double> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 412 ret <4 x double> %s 413} 414 415; Tests where both operands of the shuffle are binops with the same opcode. 416 417define <4 x i32> @add_add(<4 x i32> %v0) { 418; CHECK-LABEL: @add_add( 419; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 3, i32 8> 420; CHECK-NEXT: ret <4 x i32> [[T3]] 421; 422 %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 423 %t2 = add <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 424 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 425 ret <4 x i32> %t3 426} 427 428define <4 x i32> @add_add_nsw(<4 x i32> %v0) { 429; CHECK-LABEL: @add_add_nsw( 430; CHECK-NEXT: [[T3:%.*]] = add nsw <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 3, i32 8> 431; CHECK-NEXT: ret <4 x i32> [[T3]] 432; 433 %t1 = add nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 434 %t2 = add nsw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 435 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 436 ret <4 x i32> %t3 437} 438 439define <4 x i32> @add_add_undef_mask_elt(<4 x i32> %v0) { 440; CHECK-LABEL: @add_add_undef_mask_elt( 441; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 undef, i32 8> 442; CHECK-NEXT: ret <4 x i32> [[T3]] 443; 444 %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 445 %t2 = add <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 446 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 undef, i32 7> 447 ret <4 x i32> %t3 448} 449 450; Poison flags must be dropped or undef must be replaced with safe constant. 451 452define <4 x i32> @add_add_nsw_undef_mask_elt(<4 x i32> %v0) { 453; CHECK-LABEL: @add_add_nsw_undef_mask_elt( 454; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 undef, i32 8> 455; CHECK-NEXT: ret <4 x i32> [[T3]] 456; 457 %t1 = add nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 458 %t2 = add nsw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 459 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 undef, i32 7> 460 ret <4 x i32> %t3 461} 462 463; Constant operand 0 (LHS) also works. 464 465define <4 x i32> @sub_sub(<4 x i32> %v0) { 466; CHECK-LABEL: @sub_sub( 467; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[V0:%.*]] 468; CHECK-NEXT: ret <4 x i32> [[T3]] 469; 470 %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 471 %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0 472 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 473 ret <4 x i32> %t3 474} 475 476define <4 x i32> @sub_sub_nuw(<4 x i32> %v0) { 477; CHECK-LABEL: @sub_sub_nuw( 478; CHECK-NEXT: [[T3:%.*]] = sub nuw <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[V0:%.*]] 479; CHECK-NEXT: ret <4 x i32> [[T3]] 480; 481 %t1 = sub nuw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 482 %t2 = sub nuw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0 483 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 484 ret <4 x i32> %t3 485} 486 487define <4 x i32> @sub_sub_undef_mask_elt(<4 x i32> %v0) { 488; CHECK-LABEL: @sub_sub_undef_mask_elt( 489; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[V0:%.*]] 490; CHECK-NEXT: ret <4 x i32> [[T3]] 491; 492 %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 493 %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0 494 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7> 495 ret <4 x i32> %t3 496} 497 498; Poison flags must be dropped or undef must be replaced with safe constant. 499 500define <4 x i32> @sub_sub_nuw_undef_mask_elt(<4 x i32> %v0) { 501; CHECK-LABEL: @sub_sub_nuw_undef_mask_elt( 502; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[V0:%.*]] 503; CHECK-NEXT: ret <4 x i32> [[T3]] 504; 505 %t1 = sub nuw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 506 %t2 = sub nuw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0 507 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7> 508 ret <4 x i32> %t3 509} 510 511; If any element of the shuffle mask operand is undef, that element of the result is undef. 512; The shuffle is eliminated in this transform, but we can replace a constant element with undef. 513 514define <4 x i32> @mul_mul(<4 x i32> %v0) { 515; CHECK-LABEL: @mul_mul( 516; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[V0:%.*]], <i32 undef, i32 6, i32 3, i32 8> 517; CHECK-NEXT: ret <4 x i32> [[T3]] 518; 519 %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 520 %t2 = mul <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 521 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7> 522 ret <4 x i32> %t3 523} 524 525; Preserve flags when possible. 526 527define <4 x i32> @shl_shl(<4 x i32> %v0) { 528; CHECK-LABEL: @shl_shl( 529; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 3, i32 4> 530; CHECK-NEXT: ret <4 x i32> [[T3]] 531; 532 %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 533 %t2 = shl <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 534 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 535 ret <4 x i32> %t3 536} 537 538define <4 x i32> @shl_shl_nuw(<4 x i32> %v0) { 539; CHECK-LABEL: @shl_shl_nuw( 540; CHECK-NEXT: [[T3:%.*]] = shl nuw <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 3, i32 4> 541; CHECK-NEXT: ret <4 x i32> [[T3]] 542; 543 %t1 = shl nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 544 %t2 = shl nuw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 545 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 546 ret <4 x i32> %t3 547} 548 549; Shift by undef is poison. Undef must be replaced by safe constant. 550 551define <4 x i32> @shl_shl_undef_mask_elt(<4 x i32> %v0) { 552; CHECK-LABEL: @shl_shl_undef_mask_elt( 553; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 3, i32 0> 554; CHECK-NEXT: ret <4 x i32> [[T3]] 555; 556 %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 557 %t2 = shl <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 558 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef> 559 ret <4 x i32> %t3 560} 561 562; Shift by undef is poison. Undef must be replaced by safe constant. 563 564define <4 x i32> @shl_shl_nuw_undef_mask_elt(<4 x i32> %v0) { 565; CHECK-LABEL: @shl_shl_nuw_undef_mask_elt( 566; CHECK-NEXT: [[T3:%.*]] = shl nuw <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 3, i32 0> 567; CHECK-NEXT: ret <4 x i32> [[T3]] 568; 569 %t1 = shl nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 570 %t2 = shl nuw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 571 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef> 572 ret <4 x i32> %t3 573} 574 575; Can't propagate the flag here. 576 577define <4 x i32> @lshr_lshr(<4 x i32> %v0) { 578; CHECK-LABEL: @lshr_lshr( 579; CHECK-NEXT: [[T3:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[V0:%.*]] 580; CHECK-NEXT: ret <4 x i32> [[T3]] 581; 582 %t1 = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 583 %t2 = lshr <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0 584 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 585 ret <4 x i32> %t3 586} 587 588; Try weird types. 589 590define <3 x i32> @ashr_ashr(<3 x i32> %v0) { 591; CHECK-LABEL: @ashr_ashr( 592; CHECK-NEXT: [[T3:%.*]] = ashr <3 x i32> [[V0:%.*]], <i32 4, i32 2, i32 3> 593; CHECK-NEXT: ret <3 x i32> [[T3]] 594; 595 %t1 = ashr <3 x i32> %v0, <i32 1, i32 2, i32 3> 596 %t2 = ashr <3 x i32> %v0, <i32 4, i32 5, i32 6> 597 %t3 = shufflevector <3 x i32> %t1, <3 x i32> %t2, <3 x i32> <i32 3, i32 1, i32 2> 598 ret <3 x i32> %t3 599} 600 601define <3 x i42> @and_and(<3 x i42> %v0) { 602; CHECK-LABEL: @and_and( 603; CHECK-NEXT: [[T3:%.*]] = and <3 x i42> [[V0:%.*]], <i42 1, i42 5, i42 undef> 604; CHECK-NEXT: ret <3 x i42> [[T3]] 605; 606 %t1 = and <3 x i42> %v0, <i42 1, i42 2, i42 3> 607 %t2 = and <3 x i42> %v0, <i42 4, i42 5, i42 6> 608 %t3 = shufflevector <3 x i42> %t1, <3 x i42> %t2, <3 x i32> <i32 0, i32 4, i32 undef> 609 ret <3 x i42> %t3 610} 611 612; It doesn't matter if the intermediate ops have extra uses. 613 614define <4 x i32> @or_or(<4 x i32> %v0) { 615; CHECK-LABEL: @or_or( 616; CHECK-NEXT: [[T1:%.*]] = or <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4> 617; CHECK-NEXT: [[T3:%.*]] = or <4 x i32> [[V0]], <i32 5, i32 6, i32 3, i32 4> 618; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) 619; CHECK-NEXT: ret <4 x i32> [[T3]] 620; 621 %t1 = or <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 622 %t2 = or <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 623 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 624 call void @use_v4i32(<4 x i32> %t1) 625 ret <4 x i32> %t3 626} 627 628define <4 x i32> @xor_xor(<4 x i32> %v0) { 629; CHECK-LABEL: @xor_xor( 630; CHECK-NEXT: [[T2:%.*]] = xor <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 7, i32 8> 631; CHECK-NEXT: [[T3:%.*]] = xor <4 x i32> [[V0]], <i32 1, i32 6, i32 3, i32 4> 632; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T2]]) 633; CHECK-NEXT: ret <4 x i32> [[T3]] 634; 635 %t1 = xor <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 636 %t2 = xor <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 637 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 638 call void @use_v4i32(<4 x i32> %t2) 639 ret <4 x i32> %t3 640} 641 642define <4 x i32> @udiv_udiv(<4 x i32> %v0) { 643; CHECK-LABEL: @udiv_udiv( 644; CHECK-NEXT: [[T1:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]] 645; CHECK-NEXT: [[T2:%.*]] = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V0]] 646; CHECK-NEXT: [[T3:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[V0]] 647; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) 648; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T2]]) 649; CHECK-NEXT: ret <4 x i32> [[T3]] 650; 651 %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 652 %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0 653 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 654 call void @use_v4i32(<4 x i32> %t1) 655 call void @use_v4i32(<4 x i32> %t2) 656 ret <4 x i32> %t3 657} 658 659; Div/rem need special handling if the shuffle has undef elements. 660 661define <4 x i32> @sdiv_sdiv(<4 x i32> %v0) { 662; CHECK-LABEL: @sdiv_sdiv( 663; CHECK-NEXT: [[T3:%.*]] = sdiv <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 8> 664; CHECK-NEXT: ret <4 x i32> [[T3]] 665; 666 %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 667 %t2 = sdiv <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 668 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 669 ret <4 x i32> %t3 670} 671 672define <4 x i32> @sdiv_sdiv_exact(<4 x i32> %v0) { 673; CHECK-LABEL: @sdiv_sdiv_exact( 674; CHECK-NEXT: [[T3:%.*]] = sdiv exact <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 8> 675; CHECK-NEXT: ret <4 x i32> [[T3]] 676; 677 %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 678 %t2 = sdiv exact <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 679 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 680 ret <4 x i32> %t3 681} 682 683define <4 x i32> @sdiv_sdiv_undef_mask_elt(<4 x i32> %v0) { 684; CHECK-LABEL: @sdiv_sdiv_undef_mask_elt( 685; CHECK-NEXT: [[T3:%.*]] = sdiv <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 1> 686; CHECK-NEXT: ret <4 x i32> [[T3]] 687; 688 %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 689 %t2 = sdiv <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 690 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef> 691 ret <4 x i32> %t3 692} 693 694define <4 x i32> @sdiv_sdiv_exact_undef_mask_elt(<4 x i32> %v0) { 695; CHECK-LABEL: @sdiv_sdiv_exact_undef_mask_elt( 696; CHECK-NEXT: [[T3:%.*]] = sdiv exact <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 1> 697; CHECK-NEXT: ret <4 x i32> [[T3]] 698; 699 %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 700 %t2 = sdiv exact <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 701 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef> 702 ret <4 x i32> %t3 703} 704 705define <4 x i32> @urem_urem(<4 x i32> %v0) { 706; CHECK-LABEL: @urem_urem( 707; CHECK-NEXT: [[T3:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 8>, [[V0:%.*]] 708; CHECK-NEXT: ret <4 x i32> [[T3]] 709; 710 %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 711 %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0 712 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 713 ret <4 x i32> %t3 714} 715 716; This is folded by using a safe constant. 717 718define <4 x i32> @urem_urem_undef_mask_elt(<4 x i32> %v0) { 719; CHECK-LABEL: @urem_urem_undef_mask_elt( 720; CHECK-NEXT: [[T3:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 0>, [[V0:%.*]] 721; CHECK-NEXT: ret <4 x i32> [[T3]] 722; 723 %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 724 %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0 725 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 undef> 726 ret <4 x i32> %t3 727} 728 729define <4 x i32> @srem_srem(<4 x i32> %v0) { 730; CHECK-LABEL: @srem_srem( 731; CHECK-NEXT: [[T3:%.*]] = srem <4 x i32> <i32 1, i32 2, i32 7, i32 4>, [[V0:%.*]] 732; CHECK-NEXT: ret <4 x i32> [[T3]] 733; 734 %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 735 %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0 736 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 737 ret <4 x i32> %t3 738} 739 740; This is folded by using a safe constant. 741 742define <4 x i32> @srem_srem_undef_mask_elt(<4 x i32> %v0) { 743; CHECK-LABEL: @srem_srem_undef_mask_elt( 744; CHECK-NEXT: [[T3:%.*]] = srem <4 x i32> <i32 1, i32 0, i32 7, i32 4>, [[V0:%.*]] 745; CHECK-NEXT: ret <4 x i32> [[T3]] 746; 747 %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 748 %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0 749 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 6, i32 3> 750 ret <4 x i32> %t3 751} 752 753; Try FP ops/types. 754 755define <4 x float> @fadd_fadd(<4 x float> %v0) { 756; CHECK-LABEL: @fadd_fadd( 757; CHECK-NEXT: [[T3:%.*]] = fadd <4 x float> [[V0:%.*]], <float 1.000000e+00, float 2.000000e+00, float 7.000000e+00, float 8.000000e+00> 758; CHECK-NEXT: ret <4 x float> [[T3]] 759; 760 %t1 = fadd <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0> 761 %t2 = fadd <4 x float> %v0, <float 5.0, float 6.0, float 7.0, float 8.0> 762 %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 763 ret <4 x float> %t3 764} 765 766define <4 x double> @fsub_fsub(<4 x double> %v0) { 767; CHECK-LABEL: @fsub_fsub( 768; CHECK-NEXT: [[T3:%.*]] = fsub <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[V0:%.*]] 769; CHECK-NEXT: ret <4 x double> [[T3]] 770; 771 %t1 = fsub <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0 772 %t2 = fsub <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v0 773 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7> 774 ret <4 x double> %t3 775} 776 777; Intersect any FMF. 778 779define <4 x float> @fmul_fmul(<4 x float> %v0) { 780; CHECK-LABEL: @fmul_fmul( 781; CHECK-NEXT: [[T3:%.*]] = fmul nnan ninf <4 x float> [[V0:%.*]], <float 1.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00> 782; CHECK-NEXT: ret <4 x float> [[T3]] 783; 784 %t1 = fmul nnan ninf <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0> 785 %t2 = fmul nnan ninf <4 x float> %v0, <float 5.0, float 6.0, float 7.0, float 8.0> 786 %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 787 ret <4 x float> %t3 788} 789 790define <4 x double> @fdiv_fdiv(<4 x double> %v0) { 791; CHECK-LABEL: @fdiv_fdiv( 792; CHECK-NEXT: [[T3:%.*]] = fdiv arcp <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[V0:%.*]] 793; CHECK-NEXT: ret <4 x double> [[T3]] 794; 795 %t1 = fdiv fast <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0 796 %t2 = fdiv nnan arcp <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v0 797 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7> 798 ret <4 x double> %t3 799} 800 801; The variable operand must be either the first operand or second operand in both binops. 802 803define <4 x double> @frem_frem(<4 x double> %v0) { 804; CHECK-LABEL: @frem_frem( 805; CHECK-NEXT: [[T1:%.*]] = frem <4 x double> <double 1.000000e+00, double 2.000000e+00, double poison, double poison>, [[V0:%.*]] 806; CHECK-NEXT: [[T2:%.*]] = frem <4 x double> [[V0]], <double poison, double poison, double 7.000000e+00, double 8.000000e+00> 807; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 808; CHECK-NEXT: ret <4 x double> [[T3]] 809; 810 %t1 = frem <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0 811 %t2 = frem <4 x double> %v0, <double 5.0, double 6.0, double 7.0, double 8.0> 812 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 813 ret <4 x double> %t3 814} 815 816define <4 x i32> @add_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 817; CHECK-LABEL: @add_2_vars( 818; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> 819; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8> 820; CHECK-NEXT: ret <4 x i32> [[T3]] 821; 822 %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 823 %t2 = add <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 824 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 825 ret <4 x i32> %t3 826} 827 828; Constant operand 0 (LHS) also works. 829 830define <4 x i32> @sub_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 831; CHECK-LABEL: @sub_2_vars( 832; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> 833; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[TMP1]] 834; CHECK-NEXT: ret <4 x i32> [[T3]] 835; 836 %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 837 %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 838 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 839 ret <4 x i32> %t3 840} 841 842define <4 x i32> @sub_2_vars_nsw(<4 x i32> %v0, <4 x i32> %v1) { 843; CHECK-LABEL: @sub_2_vars_nsw( 844; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> 845; CHECK-NEXT: [[T3:%.*]] = sub nsw <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[TMP1]] 846; CHECK-NEXT: ret <4 x i32> [[T3]] 847; 848 %t1 = sub nsw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 849 %t2 = sub nsw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 850 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 851 ret <4 x i32> %t3 852} 853 854define <4 x i32> @sub_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 855; CHECK-LABEL: @sub_2_vars_undef_mask_elt( 856; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 poison, i32 1, i32 2, i32 7> 857; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[TMP1]] 858; CHECK-NEXT: ret <4 x i32> [[T3]] 859; 860 %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 861 %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 862 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7> 863 ret <4 x i32> %t3 864} 865 866; Poison flags must be dropped or undef must be replaced with safe constant. 867 868define <4 x i32> @sub_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 869; CHECK-LABEL: @sub_2_vars_nsw_undef_mask_elt( 870; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 poison, i32 1, i32 2, i32 7> 871; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[TMP1]] 872; CHECK-NEXT: ret <4 x i32> [[T3]] 873; 874 %t1 = sub nsw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 875 %t2 = sub nsw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 876 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7> 877 ret <4 x i32> %t3 878} 879 880; If any element of the shuffle mask operand is undef, that element of the result is undef. 881; The shuffle is eliminated in this transform, but we can replace a constant element with undef. 882 883define <4 x i32> @mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 884; CHECK-LABEL: @mul_2_vars( 885; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> 886; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8> 887; CHECK-NEXT: ret <4 x i32> [[T3]] 888; 889 %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 890 %t2 = mul <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 891 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 892 ret <4 x i32> %t3 893} 894 895define <4 x i32> @mul_2_vars_nuw(<4 x i32> %v0, <4 x i32> %v1) { 896; CHECK-LABEL: @mul_2_vars_nuw( 897; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> 898; CHECK-NEXT: [[T3:%.*]] = mul nuw <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8> 899; CHECK-NEXT: ret <4 x i32> [[T3]] 900; 901 %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 902 %t2 = mul nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 903 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 904 ret <4 x i32> %t3 905} 906 907define <4 x i32> @mul_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 908; CHECK-LABEL: @mul_2_vars_undef_mask_elt( 909; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 poison, i32 2, i32 7> 910; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 1, i32 undef, i32 3, i32 8> 911; CHECK-NEXT: ret <4 x i32> [[T3]] 912; 913 %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 914 %t2 = mul <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 915 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 2, i32 7> 916 ret <4 x i32> %t3 917} 918 919; Poison flags must be dropped or undef must be replaced with safe constant. 920 921define <4 x i32> @mul_2_vars_nuw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 922; CHECK-LABEL: @mul_2_vars_nuw_undef_mask_elt( 923; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 poison, i32 2, i32 7> 924; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 1, i32 undef, i32 3, i32 8> 925; CHECK-NEXT: ret <4 x i32> [[T3]] 926; 927 %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 928 %t2 = mul nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 929 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 2, i32 7> 930 ret <4 x i32> %t3 931} 932 933; Preserve flags when possible. 934 935define <4 x i32> @shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 936; CHECK-LABEL: @shl_2_vars( 937; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 3> 938; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 4> 939; CHECK-NEXT: ret <4 x i32> [[T3]] 940; 941 %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 942 %t2 = shl <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 943 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 944 ret <4 x i32> %t3 945} 946 947define <4 x i32> @shl_2_vars_nsw(<4 x i32> %v0, <4 x i32> %v1) { 948; CHECK-LABEL: @shl_2_vars_nsw( 949; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 3> 950; CHECK-NEXT: [[T3:%.*]] = shl nsw <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 4> 951; CHECK-NEXT: ret <4 x i32> [[T3]] 952; 953 %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 954 %t2 = shl nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 955 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 956 ret <4 x i32> %t3 957} 958 959; Shift by undef is poison. Undef is replaced by safe constant. 960 961define <4 x i32> @shl_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 962; CHECK-LABEL: @shl_2_vars_undef_mask_elt( 963; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 poison, i32 5, i32 2, i32 poison> 964; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[TMP1]], <i32 0, i32 6, i32 3, i32 0> 965; CHECK-NEXT: ret <4 x i32> [[T3]] 966; 967 %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 968 %t2 = shl <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 969 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef> 970 ret <4 x i32> %t3 971} 972 973; Shift by undef is poison. Undef is replaced by safe constant. 974 975define <4 x i32> @shl_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 976; CHECK-LABEL: @shl_2_vars_nsw_undef_mask_elt( 977; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 poison, i32 5, i32 2, i32 poison> 978; CHECK-NEXT: [[T3:%.*]] = shl nsw <4 x i32> [[TMP1]], <i32 0, i32 6, i32 3, i32 0> 979; CHECK-NEXT: ret <4 x i32> [[T3]] 980; 981 %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 982 %t2 = shl nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 983 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef> 984 ret <4 x i32> %t3 985} 986 987; Can't propagate the flag here. 988 989define <4 x i32> @lshr_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 990; CHECK-LABEL: @lshr_2_vars( 991; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3> 992; CHECK-NEXT: [[T3:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[TMP1]] 993; CHECK-NEXT: ret <4 x i32> [[T3]] 994; 995 %t1 = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 996 %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 997 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 998 ret <4 x i32> %t3 999} 1000 1001define <4 x i32> @lshr_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) { 1002; CHECK-LABEL: @lshr_2_vars_exact( 1003; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3> 1004; CHECK-NEXT: [[T3:%.*]] = lshr exact <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[TMP1]] 1005; CHECK-NEXT: ret <4 x i32> [[T3]] 1006; 1007 %t1 = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 1008 %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 1009 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 1010 ret <4 x i32> %t3 1011} 1012 1013; TODO: This would require a new shuffle mask (replace undef with op0 or op1 lane). Otherwise, we have shift-by-undef. 1014 1015define <4 x i32> @lshr_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 1016; CHECK-LABEL: @lshr_2_vars_undef_mask_elt( 1017; CHECK-NEXT: [[T1:%.*]] = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]] 1018; CHECK-NEXT: [[T2:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]] 1019; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 poison, i32 5, i32 2, i32 7> 1020; CHECK-NEXT: ret <4 x i32> [[T3]] 1021; 1022 %t1 = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 1023 %t2 = lshr <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 1024 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7> 1025 ret <4 x i32> %t3 1026} 1027 1028; TODO: This would require a new shuffle mask (replace undef with op0 or op1 lane). Otherwise, we have shift-by-undef. 1029 1030define <4 x i32> @lshr_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 1031; CHECK-LABEL: @lshr_2_vars_exact_undef_mask_elt( 1032; CHECK-NEXT: [[T1:%.*]] = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]] 1033; CHECK-NEXT: [[T2:%.*]] = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]] 1034; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 poison, i32 5, i32 2, i32 7> 1035; CHECK-NEXT: ret <4 x i32> [[T3]] 1036; 1037 %t1 = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 1038 %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 1039 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7> 1040 ret <4 x i32> %t3 1041} 1042 1043; Try weird types. 1044 1045define <3 x i32> @ashr_2_vars(<3 x i32> %v0, <3 x i32> %v1) { 1046; CHECK-LABEL: @ashr_2_vars( 1047; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[V1:%.*]], <3 x i32> [[V0:%.*]], <3 x i32> <i32 0, i32 4, i32 5> 1048; CHECK-NEXT: [[T3:%.*]] = ashr <3 x i32> [[TMP1]], <i32 4, i32 2, i32 3> 1049; CHECK-NEXT: ret <3 x i32> [[T3]] 1050; 1051 %t1 = ashr <3 x i32> %v0, <i32 1, i32 2, i32 3> 1052 %t2 = ashr <3 x i32> %v1, <i32 4, i32 5, i32 6> 1053 %t3 = shufflevector <3 x i32> %t1, <3 x i32> %t2, <3 x i32> <i32 3, i32 1, i32 2> 1054 ret <3 x i32> %t3 1055} 1056 1057define <3 x i42> @and_2_vars(<3 x i42> %v0, <3 x i42> %v1) { 1058; CHECK-LABEL: @and_2_vars( 1059; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i42> [[V0:%.*]], <3 x i42> [[V1:%.*]], <3 x i32> <i32 0, i32 4, i32 poison> 1060; CHECK-NEXT: [[T3:%.*]] = and <3 x i42> [[TMP1]], <i42 1, i42 5, i42 undef> 1061; CHECK-NEXT: ret <3 x i42> [[T3]] 1062; 1063 %t1 = and <3 x i42> %v0, <i42 1, i42 2, i42 3> 1064 %t2 = and <3 x i42> %v1, <i42 4, i42 5, i42 6> 1065 %t3 = shufflevector <3 x i42> %t1, <3 x i42> %t2, <3 x i32> <i32 0, i32 4, i32 undef> 1066 ret <3 x i42> %t3 1067} 1068 1069; It doesn't matter if only one intermediate op has extra uses. 1070 1071define <4 x i32> @or_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 1072; CHECK-LABEL: @or_2_vars( 1073; CHECK-NEXT: [[T1:%.*]] = or <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4> 1074; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) 1075; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1076; CHECK-NEXT: [[T3:%.*]] = or <4 x i32> [[TMP1]], <i32 5, i32 6, i32 3, i32 4> 1077; CHECK-NEXT: ret <4 x i32> [[T3]] 1078; 1079 %t1 = or <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1080 call void @use_v4i32(<4 x i32> %t1) 1081 %t2 = or <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 1082 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1083 ret <4 x i32> %t3 1084} 1085 1086define <4 x i32> @or_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 1087; CHECK-LABEL: @or_2_vars_undef_mask_elt( 1088; CHECK-NEXT: [[T1:%.*]] = or <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4> 1089; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) 1090; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0]], <4 x i32> <i32 0, i32 1, i32 6, i32 poison> 1091; CHECK-NEXT: [[T3:%.*]] = or <4 x i32> [[TMP1]], <i32 5, i32 6, i32 3, i32 undef> 1092; CHECK-NEXT: ret <4 x i32> [[T3]] 1093; 1094 %t1 = or <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1095 call void @use_v4i32(<4 x i32> %t1) 1096 %t2 = or <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 1097 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 undef> 1098 ret <4 x i32> %t3 1099} 1100 1101; But we don't transform if both intermediate values have extra uses. 1102 1103define <4 x i32> @xor_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 1104; CHECK-LABEL: @xor_2_vars( 1105; CHECK-NEXT: [[T1:%.*]] = xor <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4> 1106; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) 1107; CHECK-NEXT: [[T2:%.*]] = xor <4 x i32> [[V1:%.*]], <i32 5, i32 6, i32 7, i32 8> 1108; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T2]]) 1109; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3> 1110; CHECK-NEXT: ret <4 x i32> [[T3]] 1111; 1112 %t1 = xor <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1113 call void @use_v4i32(<4 x i32> %t1) 1114 %t2 = xor <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 1115 call void @use_v4i32(<4 x i32> %t2) 1116 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 1117 ret <4 x i32> %t3 1118} 1119 1120; Div/rem need special handling if the shuffle has undef elements. 1121 1122define <4 x i32> @udiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 1123; CHECK-LABEL: @udiv_2_vars( 1124; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3> 1125; CHECK-NEXT: [[T3:%.*]] = udiv <4 x i32> <i32 5, i32 2, i32 3, i32 8>, [[TMP1]] 1126; CHECK-NEXT: ret <4 x i32> [[T3]] 1127; 1128 %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 1129 %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 1130 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 1, i32 2, i32 7> 1131 ret <4 x i32> %t3 1132} 1133 1134define <4 x i32> @udiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) { 1135; CHECK-LABEL: @udiv_2_vars_exact( 1136; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3> 1137; CHECK-NEXT: [[T3:%.*]] = udiv exact <4 x i32> <i32 5, i32 2, i32 3, i32 8>, [[TMP1]] 1138; CHECK-NEXT: ret <4 x i32> [[T3]] 1139; 1140 %t1 = udiv exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 1141 %t2 = udiv exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 1142 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 1, i32 2, i32 7> 1143 ret <4 x i32> %t3 1144} 1145 1146; TODO: This could be transformed using a safe constant. 1147 1148define <4 x i32> @udiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 1149; CHECK-LABEL: @udiv_2_vars_undef_mask_elt( 1150; CHECK-NEXT: [[T1:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]] 1151; CHECK-NEXT: [[T2:%.*]] = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]] 1152; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 poison, i32 1, i32 2, i32 7> 1153; CHECK-NEXT: ret <4 x i32> [[T3]] 1154; 1155 %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 1156 %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 1157 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7> 1158 ret <4 x i32> %t3 1159} 1160 1161; TODO: This could be transformed using a safe constant. 1162 1163define <4 x i32> @udiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 1164; CHECK-LABEL: @udiv_2_vars_exact_undef_mask_elt( 1165; CHECK-NEXT: [[T1:%.*]] = udiv exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]] 1166; CHECK-NEXT: [[T2:%.*]] = udiv exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]] 1167; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 poison, i32 1, i32 2, i32 7> 1168; CHECK-NEXT: ret <4 x i32> [[T3]] 1169; 1170 %t1 = udiv exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 1171 %t2 = udiv exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 1172 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7> 1173 ret <4 x i32> %t3 1174} 1175 1176; If the shuffle has no undefs, it's safe to shuffle the variables first. 1177 1178define <4 x i32> @sdiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 1179; CHECK-LABEL: @sdiv_2_vars( 1180; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3> 1181; CHECK-NEXT: [[T3:%.*]] = sdiv <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 4> 1182; CHECK-NEXT: ret <4 x i32> [[T3]] 1183; 1184 %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1185 %t2 = sdiv <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 1186 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 1187 ret <4 x i32> %t3 1188} 1189 1190define <4 x i32> @sdiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) { 1191; CHECK-LABEL: @sdiv_2_vars_exact( 1192; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3> 1193; CHECK-NEXT: [[T3:%.*]] = sdiv exact <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 4> 1194; CHECK-NEXT: ret <4 x i32> [[T3]] 1195; 1196 %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1197 %t2 = sdiv exact <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 1198 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 1199 ret <4 x i32> %t3 1200} 1201 1202; Div by undef is UB. Undef is replaced by safe constant. 1203 1204define <4 x i32> @sdiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 1205; CHECK-LABEL: @sdiv_2_vars_undef_mask_elt( 1206; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 poison> 1207; CHECK-NEXT: [[T3:%.*]] = sdiv <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 1> 1208; CHECK-NEXT: ret <4 x i32> [[T3]] 1209; 1210 %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1211 %t2 = sdiv <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 1212 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 undef> 1213 ret <4 x i32> %t3 1214} 1215 1216; Div by undef is UB. Undef is replaced by safe constant. 1217 1218define <4 x i32> @sdiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 1219; CHECK-LABEL: @sdiv_2_vars_exact_undef_mask_elt( 1220; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 poison> 1221; CHECK-NEXT: [[T3:%.*]] = sdiv exact <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 1> 1222; CHECK-NEXT: ret <4 x i32> [[T3]] 1223; 1224 %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1225 %t2 = sdiv exact <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 1226 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 undef> 1227 ret <4 x i32> %t3 1228} 1229 1230; If the shuffle has no undefs, it's safe to shuffle the variables first. 1231 1232define <4 x i32> @urem_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 1233; CHECK-LABEL: @urem_2_vars( 1234; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1235; CHECK-NEXT: [[T3:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 8>, [[TMP1]] 1236; CHECK-NEXT: ret <4 x i32> [[T3]] 1237; 1238 %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 1239 %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 1240 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1241 ret <4 x i32> %t3 1242} 1243 1244define <4 x i32> @srem_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 1245; CHECK-LABEL: @srem_2_vars( 1246; CHECK-NEXT: [[T1:%.*]] = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]] 1247; CHECK-NEXT: [[T2:%.*]] = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]] 1248; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 poison, i32 6, i32 3> 1249; CHECK-NEXT: ret <4 x i32> [[T3]] 1250; 1251 %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 1252 %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 1253 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 6, i32 3> 1254 ret <4 x i32> %t3 1255} 1256 1257; Try FP ops/types. 1258 1259define <4 x float> @fadd_2_vars(<4 x float> %v0, <4 x float> %v1) { 1260; CHECK-LABEL: @fadd_2_vars( 1261; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V0:%.*]], <4 x float> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1262; CHECK-NEXT: [[T3:%.*]] = fadd <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float 7.000000e+00, float 8.000000e+00> 1263; CHECK-NEXT: ret <4 x float> [[T3]] 1264; 1265 %t1 = fadd <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0> 1266 %t2 = fadd <4 x float> %v1, <float 5.0, float 6.0, float 7.0, float 8.0> 1267 %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1268 ret <4 x float> %t3 1269} 1270 1271define <4 x double> @fsub_2_vars(<4 x double> %v0, <4 x double> %v1) { 1272; CHECK-LABEL: @fsub_2_vars( 1273; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> <i32 poison, i32 1, i32 6, i32 7> 1274; CHECK-NEXT: [[T3:%.*]] = fsub <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[TMP1]] 1275; CHECK-NEXT: ret <4 x double> [[T3]] 1276; 1277 %t1 = fsub <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0 1278 %t2 = fsub <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v1 1279 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7> 1280 ret <4 x double> %t3 1281} 1282 1283; Intersect any FMF. 1284 1285define <4 x float> @fmul_2_vars(<4 x float> %v0, <4 x float> %v1) { 1286; CHECK-LABEL: @fmul_2_vars( 1287; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V0:%.*]], <4 x float> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1288; CHECK-NEXT: [[T3:%.*]] = fmul reassoc nsz <4 x float> [[TMP1]], <float 1.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00> 1289; CHECK-NEXT: ret <4 x float> [[T3]] 1290; 1291 %t1 = fmul reassoc nsz <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0> 1292 %t2 = fmul reassoc nsz <4 x float> %v1, <float 5.0, float 6.0, float 7.0, float 8.0> 1293 %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1294 ret <4 x float> %t3 1295} 1296 1297define <4 x double> @frem_2_vars(<4 x double> %v0, <4 x double> %v1) { 1298; CHECK-LABEL: @frem_2_vars( 1299; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> <i32 poison, i32 1, i32 6, i32 7> 1300; CHECK-NEXT: [[T3:%.*]] = frem <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[TMP1]] 1301; CHECK-NEXT: ret <4 x double> [[T3]] 1302; 1303 %t1 = frem nnan ninf <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0 1304 %t2 = frem nnan arcp <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v1 1305 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7> 1306 ret <4 x double> %t3 1307} 1308 1309; The variable operand must be either the first operand or second operand in both binops. 1310 1311define <4 x double> @fdiv_2_vars(<4 x double> %v0, <4 x double> %v1) { 1312; CHECK-LABEL: @fdiv_2_vars( 1313; CHECK-NEXT: [[T1:%.*]] = fdiv <4 x double> <double 1.000000e+00, double 2.000000e+00, double poison, double poison>, [[V0:%.*]] 1314; CHECK-NEXT: [[T2:%.*]] = fdiv <4 x double> [[V1:%.*]], <double poison, double poison, double 7.000000e+00, double 8.000000e+00> 1315; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1316; CHECK-NEXT: ret <4 x double> [[T3]] 1317; 1318 %t1 = fdiv <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0 1319 %t2 = fdiv <4 x double> %v1, <double 5.0, double 6.0, double 7.0, double 8.0> 1320 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1321 ret <4 x double> %t3 1322} 1323 1324; Shift-left with constant shift amount can be converted to mul to enable the fold. 1325 1326define <4 x i32> @mul_shl(<4 x i32> %v0) { 1327; CHECK-LABEL: @mul_shl( 1328; CHECK-NEXT: [[T3:%.*]] = mul nuw <4 x i32> [[V0:%.*]], <i32 32, i32 64, i32 3, i32 4> 1329; CHECK-NEXT: ret <4 x i32> [[T3]] 1330; 1331 %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1332 %t2 = shl nuw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 1333 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1334 ret <4 x i32> %t3 1335} 1336 1337; Try with shift as operand 0 of the shuffle; 'nsw' is dropped for safety, but that could be improved. 1338 1339define <4 x i32> @shl_mul(<4 x i32> %v0) { 1340; CHECK-LABEL: @shl_mul( 1341; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[V0:%.*]], <i32 5, i32 undef, i32 8, i32 16> 1342; CHECK-NEXT: ret <4 x i32> [[T3]] 1343; 1344 %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1345 %t2 = mul nsw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 1346 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 undef, i32 2, i32 3> 1347 ret <4 x i32> %t3 1348} 1349 1350; Demanded elements + simplification can remove the mul alone, but that's not the best case. 1351 1352define <4 x i32> @mul_is_nop_shl(<4 x i32> %v0) { 1353; CHECK-LABEL: @mul_is_nop_shl( 1354; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 7, i32 8> 1355; CHECK-NEXT: ret <4 x i32> [[T3]] 1356; 1357 %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1358 %t2 = shl <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 1359 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1360 ret <4 x i32> %t3 1361} 1362 1363; Negative test: shift amount (operand 1) must be constant. 1364 1365define <4 x i32> @shl_mul_not_constant_shift_amount(<4 x i32> %v0) { 1366; CHECK-LABEL: @shl_mul_not_constant_shift_amount( 1367; CHECK-NEXT: [[T1:%.*]] = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]] 1368; CHECK-NEXT: [[T2:%.*]] = mul <4 x i32> [[V0]], <i32 5, i32 6, i32 poison, i32 poison> 1369; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T2]], <4 x i32> [[T1]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1370; CHECK-NEXT: ret <4 x i32> [[T3]] 1371; 1372 %t1 = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 1373 %t2 = mul <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 1374 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1375 ret <4 x i32> %t3 1376} 1377 1378; Try with 2 variable inputs. 1379 1380define <4 x i32> @mul_shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 1381; CHECK-LABEL: @mul_shl_2_vars( 1382; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1383; CHECK-NEXT: [[T3:%.*]] = mul nuw <4 x i32> [[TMP1]], <i32 32, i32 64, i32 3, i32 4> 1384; CHECK-NEXT: ret <4 x i32> [[T3]] 1385; 1386 %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1387 %t2 = shl nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 1388 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1389 ret <4 x i32> %t3 1390} 1391 1392define <4 x i32> @shl_mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 1393; CHECK-LABEL: @shl_mul_2_vars( 1394; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 poison, i32 6, i32 7> 1395; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 5, i32 undef, i32 8, i32 16> 1396; CHECK-NEXT: ret <4 x i32> [[T3]] 1397; 1398 %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1399 %t2 = mul nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 1400 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 undef, i32 2, i32 3> 1401 ret <4 x i32> %t3 1402} 1403 1404; Negate can be converted to mul to enable the fold. 1405 1406define <4 x i32> @mul_neg(<4 x i32> %x) { 1407; CHECK-LABEL: @mul_neg( 1408; CHECK-NEXT: [[R:%.*]] = mul <4 x i32> [[X:%.*]], <i32 257, i32 -3, i32 -1, i32 -9> 1409; CHECK-NEXT: ret <4 x i32> [[R]] 1410; 1411 %m = mul <4 x i32> %x, <i32 257, i32 -3, i32 poison, i32 -9> 1412 %n = sub <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, %x 1413 %r = shufflevector <4 x i32> %m, <4 x i32> %n, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 1414 ret <4 x i32> %r 1415} 1416 1417define <3 x i79> @neg_mul(<3 x i79> %x) { 1418; CHECK-LABEL: @neg_mul( 1419; CHECK-NEXT: [[R:%.*]] = mul nsw <3 x i79> [[X:%.*]], <i79 -1, i79 -3, i79 -1> 1420; CHECK-NEXT: ret <3 x i79> [[R]] 1421; 1422 %n = sub nsw <3 x i79> <i79 0, i79 poison, i79 0>, %x 1423 %m = mul nsw <3 x i79> %x, <i79 poison, i79 -3, i79 poison> 1424 %r = shufflevector <3 x i79> %n, <3 x i79> %m, <3 x i32> <i32 0, i32 4, i32 2> 1425 ret <3 x i79> %r 1426} 1427 1428define <4 x i32> @mul_neg_2_vars(<4 x i32> %x, <4 x i32> %y) { 1429; CHECK-LABEL: @mul_neg_2_vars( 1430; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3> 1431; CHECK-NEXT: [[R:%.*]] = mul <4 x i32> [[TMP1]], <i32 42, i32 -1, i32 -1, i32 6> 1432; CHECK-NEXT: ret <4 x i32> [[R]] 1433; 1434 %m = mul nuw <4 x i32> %x, <i32 42, i32 poison, i32 poison, i32 6> 1435 %n = sub nsw <4 x i32> <i32 poison, i32 0, i32 0, i32 poison>, %y 1436 %r = shufflevector <4 x i32> %m, <4 x i32> %n, <4 x i32> <i32 0, i32 5, i32 6, i32 3> 1437 ret <4 x i32> %r 1438} 1439 1440define <4 x i32> @neg_mul_2_vars(<4 x i32> %x, <4 x i32> %y) { 1441; CHECK-LABEL: @neg_mul_2_vars( 1442; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1443; CHECK-NEXT: [[R:%.*]] = mul nsw <4 x i32> [[TMP1]], <i32 -1, i32 42, i32 -1, i32 6> 1444; CHECK-NEXT: ret <4 x i32> [[R]] 1445; 1446 %n = sub nsw <4 x i32> <i32 0, i32 poison, i32 0, i32 poison>, %y 1447 %m = mul nuw nsw <4 x i32> %x, <i32 poison, i32 42, i32 poison, i32 6> 1448 %r = shufflevector <4 x i32> %n, <4 x i32> %m, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1449 ret <4 x i32> %r 1450} 1451 1452; Or with constant can be converted to add to enable the fold. 1453; The 'shl' is here to allow analysis to determine that the 'or' can be transformed to 'add'. 1454; TODO: The 'or' constant is limited to a splat. 1455 1456define <4 x i32> @add_or(<4 x i32> %v) { 1457; CHECK-LABEL: @add_or( 1458; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 5) 1459; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[V0]], <i32 31, i32 31, i32 65536, i32 65537> 1460; CHECK-NEXT: ret <4 x i32> [[T3]] 1461; 1462 %v0 = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5> ; clear the bottom bits 1463 %t1 = add <4 x i32> %v0, <i32 65534, i32 65535, i32 65536, i32 65537> ; this can't be converted to 'or' 1464 %t2 = or <4 x i32> %v0, <i32 31, i32 31, i32 31, i32 31> ; set the bottom bits 1465 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1466 ret <4 x i32> %t3 1467} 1468 1469define <4 x i32> @add_or_disjoint(<4 x i32> %v) { 1470; CHECK-LABEL: @add_or_disjoint( 1471; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[V:%.*]], <i32 31, i32 31, i32 65536, i32 65537> 1472; CHECK-NEXT: ret <4 x i32> [[T3]] 1473; 1474 %t1 = add <4 x i32> %v, <i32 65534, i32 65535, i32 65536, i32 65537> 1475 %t2 = or disjoint <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31> 1476 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1477 ret <4 x i32> %t3 1478} 1479 1480; Try with 'or' as operand 0 of the shuffle. 1481 1482define <4 x i8> @or_add(<4 x i8> %v) { 1483; CHECK-LABEL: @or_add( 1484; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 3) 1485; CHECK-NEXT: [[T3:%.*]] = add nuw nsw <4 x i8> [[V0]], <i8 1, i8 2, i8 -64, i8 -64> 1486; CHECK-NEXT: ret <4 x i8> [[T3]] 1487; 1488 %v0 = lshr <4 x i8> %v, <i8 3, i8 3, i8 3, i8 3> ; clear the top bits 1489 %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192> ; set some top bits 1490 %t2 = add nsw nuw <4 x i8> %v0, <i8 1, i8 2, i8 3, i8 4> ; this can't be converted to 'or' 1491 %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1492 ret <4 x i8> %t3 1493} 1494 1495; Negative test: not all 'or' insts can be converted to 'add'. 1496 1497define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) { 1498; CHECK-LABEL: @or_add_not_enough_masking( 1499; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 1) 1500; CHECK-NEXT: [[T1:%.*]] = or <4 x i8> [[V0]], <i8 poison, i8 poison, i8 -64, i8 -64> 1501; CHECK-NEXT: [[T2:%.*]] = add nuw nsw <4 x i8> [[V0]], <i8 1, i8 2, i8 poison, i8 poison> 1502; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i8> [[T2]], <4 x i8> [[T1]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1503; CHECK-NEXT: ret <4 x i8> [[T3]] 1504; 1505 %v0 = lshr <4 x i8> %v, <i8 1, i8 1, i8 1, i8 1> ; clear not enough top bits 1506 %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192> ; set some top bits 1507 %t2 = add nsw nuw <4 x i8> %v0, <i8 1, i8 2, i8 3, i8 4> ; this can't be converted to 'or' 1508 %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1509 ret <4 x i8> %t3 1510} 1511 1512; Try with 2 variable inputs. 1513 1514define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) { 1515; CHECK-LABEL: @add_or_2_vars( 1516; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 5) 1517; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1518; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[TMP1]], <i32 31, i32 31, i32 65536, i32 65537> 1519; CHECK-NEXT: ret <4 x i32> [[T3]] 1520; 1521 %v0 = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5> ; clear the bottom bits 1522 %t1 = add <4 x i32> %v1, <i32 65534, i32 65535, i32 65536, i32 65537> ; this can't be converted to 'or' 1523 %t2 = or <4 x i32> %v0, <i32 31, i32 31, i32 31, i32 31> ; set the bottom bits 1524 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1525 ret <4 x i32> %t3 1526} 1527 1528define <4 x i8> @or_add_2_vars(<4 x i8> %v, <4 x i8> %v1) { 1529; CHECK-LABEL: @or_add_2_vars( 1530; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 3) 1531; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[V1:%.*]], <4 x i8> [[V0]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1532; CHECK-NEXT: [[T3:%.*]] = add nuw nsw <4 x i8> [[TMP1]], <i8 1, i8 2, i8 -64, i8 -64> 1533; CHECK-NEXT: ret <4 x i8> [[T3]] 1534; 1535 %v0 = lshr <4 x i8> %v, <i8 3, i8 3, i8 3, i8 3> ; clear the top bits 1536 %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192> ; set some top bits 1537 %t2 = add nsw nuw <4 x i8> %v1, <i8 1, i8 2, i8 3, i8 4> ; this can't be converted to 'or' 1538 %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1539 ret <4 x i8> %t3 1540} 1541 1542; The undef operand is used to simplify the shuffle mask, but don't assert that too soon. 1543 1544define <4 x i32> @PR41419(<4 x i32> %v) { 1545; CHECK-LABEL: @PR41419( 1546; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> <i32 undef, i32 undef, i32 poison, i32 undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 1547; CHECK-NEXT: ret <4 x i32> [[S]] 1548; 1549 %s = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 1550 ret <4 x i32> %s 1551} 1552 1553; The shuffle masks in the next 4 tests are identical to make it easier 1554; to see that we are choosing the correct elements in the new shuffle. 1555 1556define <5 x i4> @sel_common_op_commute0(<5 x i4> %x, <5 x i4> %y) { 1557; CHECK-LABEL: @sel_common_op_commute0( 1558; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 4> 1559; CHECK-NEXT: ret <5 x i4> [[S2]] 1560; 1561 %s1 = shufflevector <5 x i4> %x, <5 x i4> %y, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9> 1562 %s2 = shufflevector <5 x i4> %x, <5 x i4> %s1, <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4> 1563 ret <5 x i4> %s2 1564} 1565 1566define <5 x i4> @sel_common_op_commute1(<5 x i4> %x, <5 x i4> %y) { 1567; CHECK-LABEL: @sel_common_op_commute1( 1568; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> <i32 0, i32 1, i32 7, i32 3, i32 4> 1569; CHECK-NEXT: ret <5 x i4> [[S2]] 1570; 1571 %s1 = shufflevector <5 x i4> %y, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9> 1572 %s2 = shufflevector <5 x i4> %x, <5 x i4> %s1, <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4> 1573 ret <5 x i4> %s2 1574} 1575 1576define <5 x i4> @sel_common_op_commute2(<5 x i4> %x, <5 x i4> %y) { 1577; CHECK-LABEL: @sel_common_op_commute2( 1578; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> <i32 0, i32 1, i32 2, i32 3, i32 9> 1579; CHECK-NEXT: ret <5 x i4> [[S2]] 1580; 1581 %s1 = shufflevector <5 x i4> %x, <5 x i4> %y, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9> 1582 %s2 = shufflevector <5 x i4> %s1, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4> 1583 ret <5 x i4> %s2 1584} 1585 1586define <5 x i4> @sel_common_op_commute3(<5 x i4> %x, <5 x i4> %y) { 1587; CHECK-LABEL: @sel_common_op_commute3( 1588; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 9> 1589; CHECK-NEXT: ret <5 x i4> [[S2]] 1590; 1591 %s1 = shufflevector <5 x i4> %y, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9> 1592 %s2 = shufflevector <5 x i4> %s1, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4> 1593 ret <5 x i4> %s2 1594} 1595 1596define <5 x i4> @sel_common_op_commute3_poison_mask_elts(<5 x i4> %x, <5 x i4> %y) { 1597; CHECK-LABEL: @sel_common_op_commute3_poison_mask_elts( 1598; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 0, i32 6, i32 poison, i32 poison, i32 9> 1599; CHECK-NEXT: ret <5 x i4> [[S2]] 1600; 1601 %s1 = shufflevector <5 x i4> %y, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 2, i32 poison, i32 9> 1602 %s2 = shufflevector <5 x i4> %s1, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 poison, i32 3, i32 4> 1603 ret <5 x i4> %s2 1604} 1605 1606; negative test - need shared operand 1607 1608define <5 x i4> @sel_not_common_op_commute3(<5 x i4> %x, <5 x i4> %y, <5 x i4> %z) { 1609; CHECK-LABEL: @sel_not_common_op_commute3( 1610; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[Z:%.*]], <5 x i32> <i32 0, i32 poison, i32 poison, i32 3, i32 9> 1611; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[S1]], <5 x i4> [[X:%.*]], <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4> 1612; CHECK-NEXT: ret <5 x i4> [[S2]] 1613; 1614 %s1 = shufflevector <5 x i4> %y, <5 x i4> %z, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9> 1615 %s2 = shufflevector <5 x i4> %s1, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4> 1616 ret <5 x i4> %s2 1617} 1618 1619; negative test - need "select" shuffle, no lane changes 1620 1621define <5 x i4> @not_sel_common_op(<5 x i4> %x, <5 x i4> %y) { 1622; CHECK-LABEL: @not_sel_common_op( 1623; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 poison, i32 6, i32 poison, i32 3, i32 9> 1624; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[S1]], <5 x i4> [[X]], <5 x i32> <i32 1, i32 6, i32 7, i32 3, i32 4> 1625; CHECK-NEXT: ret <5 x i4> [[S2]] 1626; 1627 %s1 = shufflevector <5 x i4> %y, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9> 1628 %s2 = shufflevector <5 x i4> %s1, <5 x i4> %x, <5 x i32> <i32 1, i32 6, i32 7, i32 3, i32 4> 1629 ret <5 x i4> %s2 1630} 1631 1632; extra use is ok 1633 1634define <4 x i32> @sel_common_op_extra_use(<4 x i32> %x, <4 x i32> %y) { 1635; CHECK-LABEL: @sel_common_op_extra_use( 1636; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1637; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[S1]]) 1638; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1639; CHECK-NEXT: ret <4 x i32> [[S2]] 1640; 1641 %s1 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1642 call void @use_v4i32(<4 x i32> %s1) 1643 %s2 = shufflevector <4 x i32> %s1, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1644 ret <4 x i32> %s2 1645} 1646 1647define <4 x float> @identity_mask(<4 x float>%x, <4 x float> %y) { 1648; CHECK-LABEL: @identity_mask( 1649; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 2, i32 3> 1650; CHECK-NEXT: ret <4 x float> [[S2]] 1651; 1652 %s1 = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> 1653 %s2 = shufflevector <4 x float> %s1, <4 x float> %x, <4 x i32> <i32 0, i32 undef, i32 6, i32 7> 1654 ret <4 x float> %s2 1655} 1656