1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=slp-vectorizer -slp-threshold=-999 -S -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck %s 3 4declare i64 @may_inf_loop_ro() nounwind readonly 5 6; Base case without allocas or stacksave 7define void @basecase(ptr %a, ptr %b, ptr %c) { 8; CHECK-LABEL: @basecase( 9; CHECK-NEXT: [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8 10; CHECK-NEXT: store ptr null, ptr [[A]], align 8 11; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> splat (i32 1) 12; CHECK-NEXT: store <2 x ptr> [[TMP2]], ptr [[B:%.*]], align 8 13; CHECK-NEXT: ret void 14; 15 16 %v1 = load ptr, ptr %a 17 store ptr zeroinitializer, ptr %a 18 %a2 = getelementptr ptr, ptr %a, i32 1 19 %v2 = load ptr, ptr %a2 20 21 %add1 = getelementptr i8, ptr %v1, i32 1 22 %add2 = getelementptr i8, ptr %v2, i32 1 23 24 store ptr %add1, ptr %b 25 %b2 = getelementptr ptr, ptr %b, i32 1 26 store ptr %add2, ptr %b2 27 ret void 28} 29 30; Using two allocas and a buildvector 31define void @allocas(ptr %a, ptr %b, ptr %c) { 32; CHECK-LABEL: @allocas( 33; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1 34; CHECK-NEXT: [[V2:%.*]] = alloca i8, align 1 35; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0 36; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1 37; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1) 38; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1 39; CHECK-NEXT: store ptr [[TMP4]], ptr [[A:%.*]], align 8 40; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8 41; CHECK-NEXT: ret void 42; 43 44 %v1 = alloca i8 45 %add1 = getelementptr i8, ptr %v1, i32 1 46 store ptr %add1, ptr %a 47 %v2 = alloca i8 48 49 %add2 = getelementptr i8, ptr %v2, i32 1 50 51 store ptr %add1, ptr %b 52 %b2 = getelementptr ptr, ptr %b, i32 1 53 store ptr %add2, ptr %b2 54 ret void 55} 56 57; Allocas can not be speculated above a potentially non-returning call 58define void @allocas_speculation(ptr %a, ptr %b, ptr %c) { 59; CHECK-LABEL: @allocas_speculation( 60; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1 61; CHECK-NEXT: [[ADD1:%.*]] = getelementptr i8, ptr [[V1]], i32 1 62; CHECK-NEXT: store ptr [[ADD1]], ptr [[A:%.*]], align 8 63; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro() 64; CHECK-NEXT: [[V2:%.*]] = alloca i8, align 1 65; CHECK-NEXT: [[ADD2:%.*]] = getelementptr i8, ptr [[V2]], i32 1 66; CHECK-NEXT: store ptr [[ADD1]], ptr [[B:%.*]], align 8 67; CHECK-NEXT: [[B2:%.*]] = getelementptr ptr, ptr [[B]], i32 1 68; CHECK-NEXT: store ptr [[ADD2]], ptr [[B2]], align 8 69; CHECK-NEXT: ret void 70; 71 72 %v1 = alloca i8 73 %add1 = getelementptr i8, ptr %v1, i32 1 74 store ptr %add1, ptr %a 75 call i64 @may_inf_loop_ro() 76 %v2 = alloca i8 77 78 %add2 = getelementptr i8, ptr %v2, i32 1 79 80 store ptr %add1, ptr %b 81 %b2 = getelementptr ptr, ptr %b, i32 1 82 store ptr %add2, ptr %b2 83 ret void 84} 85 86; We must be careful not to lift the inalloca alloc above the stacksave here. 87; We used to miscompile this example before adding explicit dependency handling 88; for stacksave. 89define void @stacksave(ptr %a, ptr %b, ptr %c) { 90; CHECK-LABEL: @stacksave( 91; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1 92; CHECK-NEXT: [[ADD1:%.*]] = getelementptr i8, ptr [[V1]], i32 1 93; CHECK-NEXT: store ptr [[ADD1]], ptr [[A:%.*]], align 8 94; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0() 95; CHECK-NEXT: [[V2:%.*]] = alloca inalloca i8, align 1 96; CHECK-NEXT: call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR4:[0-9]+]] 97; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]]) 98; CHECK-NEXT: [[ADD2:%.*]] = getelementptr i8, ptr [[V2]], i32 1 99; CHECK-NEXT: store ptr [[ADD1]], ptr [[B:%.*]], align 8 100; CHECK-NEXT: [[B2:%.*]] = getelementptr ptr, ptr [[B]], i32 1 101; CHECK-NEXT: store ptr [[ADD2]], ptr [[B2]], align 8 102; CHECK-NEXT: ret void 103; 104 105 %v1 = alloca i8 106 %add1 = getelementptr i8, ptr %v1, i32 1 107 store ptr %add1, ptr %a 108 109 %stack = call ptr @llvm.stacksave() 110 %v2 = alloca inalloca i8 111 call void @use(ptr inalloca(i8) %v2) readnone 112 call void @llvm.stackrestore(ptr %stack) 113 114 %add2 = getelementptr i8, ptr %v2, i32 1 115 116 store ptr %add1, ptr %b 117 %b2 = getelementptr ptr, ptr %b, i32 1 118 store ptr %add2, ptr %b2 119 ret void 120} 121 122define void @stacksave2(ptr %a, ptr %b, ptr %c) { 123; CHECK-LABEL: @stacksave2( 124; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1 125; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0() 126; CHECK-NEXT: [[V2:%.*]] = alloca inalloca i8, align 1 127; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0 128; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1 129; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1) 130; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1 131; CHECK-NEXT: store ptr [[TMP4]], ptr [[A:%.*]], align 8 132; CHECK-NEXT: call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR5:[0-9]+]] 133; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]]) 134; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8 135; CHECK-NEXT: ret void 136; 137 138 %v1 = alloca i8 139 %add1 = getelementptr i8, ptr %v1, i32 1 140 141 %stack = call ptr @llvm.stacksave() 142 store ptr %add1, ptr %a 143 %v2 = alloca inalloca i8 144 call void @use(ptr inalloca(i8) %v2) readonly 145 call void @llvm.stackrestore(ptr %stack) 146 147 %add2 = getelementptr i8, ptr %v2, i32 1 148 149 store ptr %add1, ptr %b 150 %b2 = getelementptr ptr, ptr %b, i32 1 151 store ptr %add2, ptr %b2 152 ret void 153} 154 155define void @stacksave3(ptr %a, ptr %b, ptr %c) { 156; CHECK-LABEL: @stacksave3( 157; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0() 158; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1 159; CHECK-NEXT: [[V2:%.*]] = alloca inalloca i8, align 1 160; CHECK-NEXT: call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR4]] 161; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]]) 162; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0 163; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1 164; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1) 165; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8 166; CHECK-NEXT: ret void 167; 168 169 %stack = call ptr @llvm.stacksave() 170 %v1 = alloca i8 171 172 %v2 = alloca inalloca i8 173 call void @use(ptr inalloca(i8) %v2) readnone 174 call void @llvm.stackrestore(ptr %stack) 175 176 %add1 = getelementptr i8, ptr %v1, i32 1 177 %add2 = getelementptr i8, ptr %v2, i32 1 178 179 store ptr %add1, ptr %b 180 %b2 = getelementptr ptr, ptr %b, i32 1 181 store ptr %add2, ptr %b2 182 ret void 183} 184 185; Here we have an alloca which needs to stay under the stacksave, but is not 186; directly part of the vectorization tree. Instead, the stacksave is 187; encountered during dependency scanning via the memory chain. 188define void @stacksave4(ptr %a, ptr %b, ptr %c) { 189; CHECK-LABEL: @stacksave4( 190; CHECK-NEXT: [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8 191; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> splat (i32 1) 192; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0() 193; CHECK-NEXT: [[X:%.*]] = alloca inalloca i8, align 1 194; CHECK-NEXT: call void @use(ptr inalloca(i8) [[X]]) #[[ATTR4]] 195; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]]) 196; CHECK-NEXT: store <2 x ptr> [[TMP2]], ptr [[B:%.*]], align 8 197; CHECK-NEXT: ret void 198; 199 200 %v1 = load ptr, ptr %a 201 %a2 = getelementptr ptr, ptr %a, i32 1 202 %v2 = load ptr, ptr %a2 203 204 %add1 = getelementptr i8, ptr %v1, i32 1 205 %add2 = getelementptr i8, ptr %v2, i32 1 206 207 %stack = call ptr @llvm.stacksave() 208 %x = alloca inalloca i8 209 call void @use(ptr inalloca(i8) %x) readnone 210 call void @llvm.stackrestore(ptr %stack) 211 212 store ptr %add1, ptr %b 213 %b2 = getelementptr ptr, ptr %b, i32 1 214 store ptr %add2, ptr %b2 215 ret void 216} 217 218define void @stacksave5(ptr %a, ptr %b, ptr %c) { 219; CHECK-LABEL: @stacksave5( 220; CHECK-NEXT: [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8 221; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> splat (i32 1) 222; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0() 223; CHECK-NEXT: [[X:%.*]] = alloca inalloca i8, align 1 224; CHECK-NEXT: call void @use(ptr inalloca(i8) [[X]]) #[[ATTR4]] 225; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]]) 226; CHECK-NEXT: store <2 x ptr> [[TMP2]], ptr [[B:%.*]], align 8 227; CHECK-NEXT: ret void 228; 229 230 %v1 = load ptr, ptr %a 231 %a2 = getelementptr ptr, ptr %a, i32 1 232 %v2 = load ptr, ptr %a2 233 234 %add1 = getelementptr i8, ptr %v1, i32 1 235 %add2 = getelementptr i8, ptr %v2, i32 1 236 237 %stack = call ptr @llvm.stacksave() 238 %x = alloca inalloca i8 239 call void @use(ptr inalloca(i8) %x) readnone 240 call void @llvm.stackrestore(ptr %stack) 241 242 store ptr %add1, ptr %b 243 %b2 = getelementptr ptr, ptr %b, i32 1 244 store ptr %add2, ptr %b2 245 ret void 246} 247 248; Reordering the second alloca above the stackrestore while 249; leaving the write to it below would introduce a write-after-free 250; bug. 251define void @stackrestore1(ptr %a, ptr %b, ptr %c) { 252; CHECK-LABEL: @stackrestore1( 253; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0() 254; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1 255; CHECK-NEXT: store i8 0, ptr [[V1]], align 1 256; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]]) 257; CHECK-NEXT: [[V2:%.*]] = alloca i8, align 1 258; CHECK-NEXT: store i8 0, ptr [[V2]], align 1 259; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0 260; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1 261; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1) 262; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8 263; CHECK-NEXT: ret void 264; 265 266 %stack = call ptr @llvm.stacksave() 267 %v1 = alloca i8 268 store i8 0, ptr %v1 269 call void @llvm.stackrestore(ptr %stack) 270 %v2 = alloca i8 271 store i8 0, ptr %v2 272 273 %add1 = getelementptr i8, ptr %v1, i32 1 274 %add2 = getelementptr i8, ptr %v2, i32 1 275 276 store ptr %add1, ptr %b 277 %b2 = getelementptr ptr, ptr %b, i32 1 278 store ptr %add2, ptr %b2 279 ret void 280} 281 282declare void @use(ptr inalloca(i8)) 283declare ptr @llvm.stacksave() 284declare void @llvm.stackrestore(ptr) 285 286; The next set are reduced from previous regressions. 287 288declare ptr @wibble(ptr) 289declare void @quux(ptr inalloca(i32)) 290 291define void @ham() #1 { 292; CHECK-LABEL: @ham( 293; CHECK-NEXT: [[VAR2:%.*]] = alloca i8, align 1 294; CHECK-NEXT: [[VAR3:%.*]] = alloca i8, align 1 295; CHECK-NEXT: [[VAR4:%.*]] = alloca i8, align 1 296; CHECK-NEXT: [[VAR5:%.*]] = alloca i8, align 1 297; CHECK-NEXT: [[VAR12:%.*]] = alloca [12 x ptr], align 8 298; CHECK-NEXT: [[VAR15:%.*]] = call ptr @wibble(ptr [[VAR2]]) 299; CHECK-NEXT: [[VAR16:%.*]] = call ptr @wibble(ptr [[VAR3]]) 300; CHECK-NEXT: [[VAR17:%.*]] = call ptr @wibble(ptr [[VAR4]]) 301; CHECK-NEXT: [[VAR23:%.*]] = call ptr @llvm.stacksave.p0() 302; CHECK-NEXT: [[VAR24:%.*]] = alloca inalloca i32, align 4 303; CHECK-NEXT: call void @quux(ptr inalloca(i32) [[VAR24]]) 304; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[VAR23]]) 305; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[VAR4]], i32 0 306; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x ptr> [[TMP1]], <4 x ptr> poison, <4 x i32> zeroinitializer 307; CHECK-NEXT: store <4 x ptr> [[TMP2]], ptr [[VAR12]], align 8 308; CHECK-NEXT: [[VAR36:%.*]] = getelementptr inbounds [12 x ptr], ptr [[VAR12]], i32 0, i32 4 309; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[VAR5]], i32 1 310; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1> 311; CHECK-NEXT: store <4 x ptr> [[TMP4]], ptr [[VAR36]], align 8 312; CHECK-NEXT: ret void 313; 314 %var2 = alloca i8 315 %var3 = alloca i8 316 %var4 = alloca i8 317 %var5 = alloca i8 318 %var12 = alloca [12 x ptr] 319 %var15 = call ptr @wibble(ptr %var2) 320 %var16 = call ptr @wibble(ptr %var3) 321 %var17 = call ptr @wibble(ptr %var4) 322 %var23 = call ptr @llvm.stacksave() 323 %var24 = alloca inalloca i32 324 call void @quux(ptr inalloca(i32) %var24) 325 call void @llvm.stackrestore(ptr %var23) 326 store ptr %var4, ptr %var12 327 %var33 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 1 328 store ptr %var4, ptr %var33 329 %var34 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 2 330 store ptr %var4, ptr %var34 331 %var35 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 3 332 store ptr %var4, ptr %var35 333 %var36 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 4 334 store ptr %var4, ptr %var36 335 %var37 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 5 336 store ptr %var5, ptr %var37 337 %var38 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 6 338 store ptr %var5, ptr %var38 339 %var39 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 7 340 store ptr %var5, ptr %var39 341 ret void 342} 343 344define void @spam() #1 { 345; CHECK-LABEL: @spam( 346; CHECK-NEXT: [[VAR4:%.*]] = alloca i8, align 1 347; CHECK-NEXT: [[VAR5:%.*]] = alloca i8, align 1 348; CHECK-NEXT: [[VAR12:%.*]] = alloca [12 x ptr], align 8 349; CHECK-NEXT: [[VAR36:%.*]] = getelementptr inbounds [12 x ptr], ptr [[VAR12]], i32 0, i32 4 350; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[VAR4]], i32 0 351; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[VAR5]], i32 1 352; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x ptr> [[TMP2]], <4 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1> 353; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[VAR36]], align 8 354; CHECK-NEXT: ret void 355; 356 %var4 = alloca i8 357 %var5 = alloca i8 358 %var12 = alloca [12 x ptr] 359 %var36 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 4 360 store ptr %var4, ptr %var36 361 %var37 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 5 362 store ptr %var5, ptr %var37 363 %var38 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 6 364 store ptr %var5, ptr %var38 365 %var39 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 7 366 store ptr %var5, ptr %var39 367 ret void 368} 369 370attributes #0 = { nofree nosync nounwind willreturn } 371attributes #1 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" } 372