1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 2; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s 3 4define void @store_load(ptr %ptr) { 5; CHECK-LABEL: define void @store_load( 6; CHECK-SAME: ptr [[PTR:%.*]]) { 7; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0 8; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4 9; CHECK-NEXT: store <2 x float> [[VECL]], ptr [[PTR0]], align 4 10; CHECK-NEXT: ret void 11; 12 %ptr0 = getelementptr float, ptr %ptr, i32 0 13 %ptr1 = getelementptr float, ptr %ptr, i32 1 14 %ld0 = load float, ptr %ptr0 15 %ld1 = load float, ptr %ptr1 16 store float %ld0, ptr %ptr0 17 store float %ld1, ptr %ptr1 18 ret void 19} 20 21 22define void @store_fpext_load(ptr %ptr) { 23; CHECK-LABEL: define void @store_fpext_load( 24; CHECK-SAME: ptr [[PTR:%.*]]) { 25; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0 26; CHECK-NEXT: [[PTRD0:%.*]] = getelementptr double, ptr [[PTR]], i32 0 27; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4 28; CHECK-NEXT: [[VCAST:%.*]] = fpext <2 x float> [[VECL]] to <2 x double> 29; CHECK-NEXT: store <2 x double> [[VCAST]], ptr [[PTRD0]], align 8 30; CHECK-NEXT: ret void 31; 32 %ptr0 = getelementptr float, ptr %ptr, i32 0 33 %ptr1 = getelementptr float, ptr %ptr, i32 1 34 %ptrd0 = getelementptr double, ptr %ptr, i32 0 35 %ptrd1 = getelementptr double, ptr %ptr, i32 1 36 %ld0 = load float, ptr %ptr0 37 %ld1 = load float, ptr %ptr1 38 %fpext0 = fpext float %ld0 to double 39 %fpext1 = fpext float %ld1 to double 40 store double %fpext0, ptr %ptrd0 41 store double %fpext1, ptr %ptrd1 42 ret void 43} 44 45define void @store_fcmp_zext_load(ptr %ptr) { 46; CHECK-LABEL: define void @store_fcmp_zext_load( 47; CHECK-SAME: ptr [[PTR:%.*]]) { 48; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0 49; CHECK-NEXT: [[PTRB0:%.*]] = getelementptr i32, ptr [[PTR]], i32 0 50; CHECK-NEXT: [[VECL1:%.*]] = load <2 x float>, ptr [[PTR0]], align 4 51; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4 52; CHECK-NEXT: [[VCMP:%.*]] = fcmp ogt <2 x float> [[VECL]], [[VECL1]] 53; CHECK-NEXT: [[VCAST:%.*]] = zext <2 x i1> [[VCMP]] to <2 x i32> 54; CHECK-NEXT: store <2 x i32> [[VCAST]], ptr [[PTRB0]], align 4 55; CHECK-NEXT: ret void 56; 57 %ptr0 = getelementptr float, ptr %ptr, i32 0 58 %ptr1 = getelementptr float, ptr %ptr, i32 1 59 %ptrb0 = getelementptr i32, ptr %ptr, i32 0 60 %ptrb1 = getelementptr i32, ptr %ptr, i32 1 61 %ldB0 = load float, ptr %ptr0 62 %ldB1 = load float, ptr %ptr1 63 %ldA0 = load float, ptr %ptr0 64 %ldA1 = load float, ptr %ptr1 65 %fcmp0 = fcmp ogt float %ldA0, %ldB0 66 %fcmp1 = fcmp ogt float %ldA1, %ldB1 67 %zext0 = zext i1 %fcmp0 to i32 68 %zext1 = zext i1 %fcmp1 to i32 69 store i32 %zext0, ptr %ptrb0 70 store i32 %zext1, ptr %ptrb1 71 ret void 72} 73 74define void @store_fadd_load(ptr %ptr) { 75; CHECK-LABEL: define void @store_fadd_load( 76; CHECK-SAME: ptr [[PTR:%.*]]) { 77; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0 78; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4 79; CHECK-NEXT: [[VECL1:%.*]] = load <2 x float>, ptr [[PTR0]], align 4 80; CHECK-NEXT: [[VEC:%.*]] = fadd <2 x float> [[VECL]], [[VECL1]] 81; CHECK-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4 82; CHECK-NEXT: ret void 83; 84 %ptr0 = getelementptr float, ptr %ptr, i32 0 85 %ptr1 = getelementptr float, ptr %ptr, i32 1 86 %ldA0 = load float, ptr %ptr0 87 %ldA1 = load float, ptr %ptr1 88 %ldB0 = load float, ptr %ptr0 89 %ldB1 = load float, ptr %ptr1 90 %fadd0 = fadd float %ldA0, %ldB0 91 %fadd1 = fadd float %ldA1, %ldB1 92 store float %fadd0, ptr %ptr0 93 store float %fadd1, ptr %ptr1 94 ret void 95} 96 97define void @store_fneg_load(ptr %ptr) { 98; CHECK-LABEL: define void @store_fneg_load( 99; CHECK-SAME: ptr [[PTR:%.*]]) { 100; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0 101; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4 102; CHECK-NEXT: [[VEC:%.*]] = fneg <2 x float> [[VECL]] 103; CHECK-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4 104; CHECK-NEXT: ret void 105; 106 %ptr0 = getelementptr float, ptr %ptr, i32 0 107 %ptr1 = getelementptr float, ptr %ptr, i32 1 108 %ld0 = load float, ptr %ptr0 109 %ld1 = load float, ptr %ptr1 110 %fneg0 = fneg float %ld0 111 %fneg1 = fneg float %ld1 112 store float %fneg0, ptr %ptr0 113 store float %fneg1, ptr %ptr1 114 ret void 115} 116 117define float @scalars_with_external_uses_not_dead(ptr %ptr) { 118; CHECK-LABEL: define float @scalars_with_external_uses_not_dead( 119; CHECK-SAME: ptr [[PTR:%.*]]) { 120; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0 121; CHECK-NEXT: [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1 122; CHECK-NEXT: [[LD0:%.*]] = load float, ptr [[PTR0]], align 4 123; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[PTR1]], align 4 124; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4 125; CHECK-NEXT: store <2 x float> [[VECL]], ptr [[PTR0]], align 4 126; CHECK-NEXT: [[USER:%.*]] = fneg float [[LD1]] 127; CHECK-NEXT: ret float [[LD0]] 128; 129 %ptr0 = getelementptr float, ptr %ptr, i32 0 130 %ptr1 = getelementptr float, ptr %ptr, i32 1 131 %ld0 = load float, ptr %ptr0 132 %ld1 = load float, ptr %ptr1 133 store float %ld0, ptr %ptr0 134 store float %ld1, ptr %ptr1 135 %user = fneg float %ld1 136 ret float %ld0 137} 138 139define void @pack_scalars(ptr %ptr, ptr %ptr2) { 140; CHECK-LABEL: define void @pack_scalars( 141; CHECK-SAME: ptr [[PTR:%.*]], ptr [[PTR2:%.*]]) { 142; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0 143; CHECK-NEXT: [[LD0:%.*]] = load float, ptr [[PTR0]], align 4 144; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[PTR2]], align 4 145; CHECK-NEXT: [[PACK:%.*]] = insertelement <2 x float> poison, float [[LD0]], i32 0 146; CHECK-NEXT: [[PACK1:%.*]] = insertelement <2 x float> [[PACK]], float [[LD1]], i32 1 147; CHECK-NEXT: store <2 x float> [[PACK1]], ptr [[PTR0]], align 4 148; CHECK-NEXT: ret void 149; 150 %ptr0 = getelementptr float, ptr %ptr, i32 0 151 %ptr1 = getelementptr float, ptr %ptr, i32 1 152 %ld0 = load float, ptr %ptr0 153 %ld1 = load float, ptr %ptr2 154 store float %ld0, ptr %ptr0 155 store float %ld1, ptr %ptr1 156 ret void 157} 158 159declare void @foo() 160define void @cant_vectorize_seeds(ptr %ptr) { 161; CHECK-LABEL: define void @cant_vectorize_seeds( 162; CHECK-SAME: ptr [[PTR:%.*]]) { 163; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0 164; CHECK-NEXT: [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1 165; CHECK-NEXT: [[LD0:%.*]] = load float, ptr [[PTR0]], align 4 166; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[PTR1]], align 4 167; CHECK-NEXT: store float [[LD1]], ptr [[PTR1]], align 4 168; CHECK-NEXT: call void @foo() 169; CHECK-NEXT: store float [[LD1]], ptr [[PTR1]], align 4 170; CHECK-NEXT: ret void 171; 172 %ptr0 = getelementptr float, ptr %ptr, i32 0 173 %ptr1 = getelementptr float, ptr %ptr, i32 1 174 %ld0 = load float, ptr %ptr0 175 %ld1 = load float, ptr %ptr1 176 store float %ld1, ptr %ptr1 177 call void @foo() ; This call blocks scheduling of the store seeds. 178 store float %ld1, ptr %ptr1 179 ret void 180} 181 182define void @pack_vectors(ptr %ptr, ptr %ptr2) { 183; CHECK-LABEL: define void @pack_vectors( 184; CHECK-SAME: ptr [[PTR:%.*]], ptr [[PTR2:%.*]]) { 185; CHECK-NEXT: [[PTR0:%.*]] = getelementptr <2 x float>, ptr [[PTR]], i32 0 186; CHECK-NEXT: [[LD0:%.*]] = load <2 x float>, ptr [[PTR0]], align 8 187; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[PTR2]], align 4 188; CHECK-NEXT: [[VPACK:%.*]] = extractelement <2 x float> [[LD0]], i32 0 189; CHECK-NEXT: [[VPACK1:%.*]] = insertelement <3 x float> poison, float [[VPACK]], i32 0 190; CHECK-NEXT: [[VPACK2:%.*]] = extractelement <2 x float> [[LD0]], i32 1 191; CHECK-NEXT: [[VPACK3:%.*]] = insertelement <3 x float> [[VPACK1]], float [[VPACK2]], i32 1 192; CHECK-NEXT: [[PACK:%.*]] = insertelement <3 x float> [[VPACK3]], float [[LD1]], i32 2 193; CHECK-NEXT: store <3 x float> [[PACK]], ptr [[PTR0]], align 8 194; CHECK-NEXT: ret void 195; 196 %ptr0 = getelementptr <2 x float>, ptr %ptr, i32 0 197 %ptr1 = getelementptr float, ptr %ptr, i32 2 198 %ld0 = load <2 x float>, ptr %ptr0 199 %ld1 = load float, ptr %ptr2 200 store <2 x float> %ld0, ptr %ptr0 201 store float %ld1, ptr %ptr1 202 ret void 203} 204 205define void @diamond(ptr %ptr) { 206; CHECK-LABEL: define void @diamond( 207; CHECK-SAME: ptr [[PTR:%.*]]) { 208; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0 209; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4 210; CHECK-NEXT: [[VEC:%.*]] = fsub <2 x float> [[VECL]], [[VECL]] 211; CHECK-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4 212; CHECK-NEXT: ret void 213; 214 %ptr0 = getelementptr float, ptr %ptr, i32 0 215 %ptr1 = getelementptr float, ptr %ptr, i32 1 216 %ld0 = load float, ptr %ptr0 217 %ld1 = load float, ptr %ptr1 218 %sub0 = fsub float %ld0, %ld0 219 %sub1 = fsub float %ld1, %ld1 220 store float %sub0, ptr %ptr0 221 store float %sub1, ptr %ptr1 222 ret void 223} 224 225define void @diamondWithShuffle(ptr %ptr) { 226; CHECK-LABEL: define void @diamondWithShuffle( 227; CHECK-SAME: ptr [[PTR:%.*]]) { 228; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0 229; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4 230; CHECK-NEXT: [[VSHUF:%.*]] = shufflevector <2 x float> [[VECL]], <2 x float> [[VECL]], <2 x i32> <i32 1, i32 0> 231; CHECK-NEXT: [[VEC:%.*]] = fsub <2 x float> [[VECL]], [[VSHUF]] 232; CHECK-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4 233; CHECK-NEXT: ret void 234; 235 %ptr0 = getelementptr float, ptr %ptr, i32 0 236 %ptr1 = getelementptr float, ptr %ptr, i32 1 237 %ld0 = load float, ptr %ptr0 238 %ld1 = load float, ptr %ptr1 239 %sub0 = fsub float %ld0, %ld1 240 %sub1 = fsub float %ld1, %ld0 241 store float %sub0, ptr %ptr0 242 store float %sub1, ptr %ptr1 243 ret void 244} 245 246define void @diamondMultiInput(ptr %ptr, ptr %ptrX) { 247; CHECK-LABEL: define void @diamondMultiInput( 248; CHECK-SAME: ptr [[PTR:%.*]], ptr [[PTRX:%.*]]) { 249; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0 250; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4 251; CHECK-NEXT: [[LDX:%.*]] = load float, ptr [[PTRX]], align 4 252; CHECK-NEXT: [[VINS:%.*]] = insertelement <2 x float> poison, float [[LDX]], i32 0 253; CHECK-NEXT: [[VEXT:%.*]] = extractelement <2 x float> [[VECL]], i32 0 254; CHECK-NEXT: [[VINS1:%.*]] = insertelement <2 x float> [[VINS]], float [[VEXT]], i32 1 255; CHECK-NEXT: [[VEC:%.*]] = fsub <2 x float> [[VECL]], [[VINS1]] 256; CHECK-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4 257; CHECK-NEXT: ret void 258; 259 %ptr0 = getelementptr float, ptr %ptr, i32 0 260 %ptr1 = getelementptr float, ptr %ptr, i32 1 261 %ld0 = load float, ptr %ptr0 262 %ld1 = load float, ptr %ptr1 263 264 %ldX = load float, ptr %ptrX 265 266 %sub0 = fsub float %ld0, %ldX 267 %sub1 = fsub float %ld1, %ld0 268 store float %sub0, ptr %ptr0 269 store float %sub1, ptr %ptr1 270 ret void 271} 272 273define void @diamondWithConstantVector(ptr %ptr) { 274; CHECK-LABEL: define void @diamondWithConstantVector( 275; CHECK-SAME: ptr [[PTR:%.*]]) { 276; CHECK-NEXT: [[GEPA0:%.*]] = getelementptr i32, ptr [[PTR]], i64 0 277; CHECK-NEXT: [[GEPB0:%.*]] = getelementptr i32, ptr [[PTR]], i64 10 278; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[GEPA0]], align 4 279; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[GEPB0]], align 4 280; CHECK-NEXT: ret void 281; 282 %gepA0 = getelementptr i32, ptr %ptr, i64 0 283 %gepA1 = getelementptr i32, ptr %ptr, i64 1 284 285 %gepB0 = getelementptr i32, ptr %ptr, i64 10 286 %gepB1 = getelementptr i32, ptr %ptr, i64 11 287 288 %zext0 = zext i16 0 to i32 289 %zext1 = zext i16 0 to i32 290 291 store i32 %zext0, ptr %gepA0 292 store i32 %zext1, ptr %gepA1 293 294 %orB0 = or i32 0, %zext0 295 %orB1 = or i32 0, %zext1 296 store i32 %orB0, ptr %gepB0 297 store i32 %orB1, ptr %gepB1 298 ret void 299} 300