1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=slp-vectorizer -slp-threshold=-999 -S -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck %s 3 4declare i64 @may_inf_loop_ro() nounwind readonly 5declare i64 @may_inf_loop_rw() nounwind 6declare i64 @may_throw() willreturn 7 8; Base case with no interesting control dependencies 9define void @test_no_control(ptr %a, ptr %b, ptr %c) { 10; CHECK-LABEL: @test_no_control( 11; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8 12; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8 13; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] 14; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[B:%.*]], align 8 15; CHECK-NEXT: ret void 16; 17 %v1 = load i64, ptr %a 18 %a2 = getelementptr i64, ptr %a, i32 1 19 %v2 = load i64, ptr %a2 20 21 %c1 = load i64, ptr %c 22 %ca2 = getelementptr i64, ptr %c, i32 1 23 %c2 = load i64, ptr %ca2 24 %add1 = add i64 %v1, %c1 25 %add2 = add i64 %v2, %c2 26 27 store i64 %add1, ptr %b 28 %b2 = getelementptr i64, ptr %b, i32 1 29 store i64 %add2, ptr %b2 30 ret void 31} 32 33define void @test1(ptr %a, ptr %b, ptr %c) { 34; CHECK-LABEL: @test1( 35; CHECK-NEXT: [[C1:%.*]] = load i64, ptr [[C:%.*]], align 8 36; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro() 37; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8 38; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0 39; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[C2]], i32 1 40; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]] 41; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8 42; CHECK-NEXT: ret void 43; 44 %v1 = load i64, ptr %a 45 %a2 = getelementptr i64, ptr %a, i32 1 46 %v2 = load i64, ptr %a2 47 48 %c1 = load i64, ptr %c 49 %c2 = call i64 @may_inf_loop_ro() 50 %add1 = add i64 %v1, %c1 51 %add2 = add i64 %v2, %c2 52 53 store i64 %add1, ptr %b 54 %b2 = getelementptr i64, ptr %b, i32 1 55 store i64 %add2, ptr %b2 56 ret void 57} 58 59define void @test2(ptr %a, ptr %b, ptr %c) { 60; CHECK-LABEL: @test2( 61; CHECK-NEXT: [[C1:%.*]] = load i64, ptr [[C:%.*]], align 8 62; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro() 63; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8 64; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0 65; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[C2]], i32 1 66; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]] 67; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8 68; CHECK-NEXT: ret void 69; 70 %c1 = load i64, ptr %c 71 %c2 = call i64 @may_inf_loop_ro() 72 73 %v1 = load i64, ptr %a 74 %a2 = getelementptr i64, ptr %a, i32 1 75 %v2 = load i64, ptr %a2 76 77 %add1 = add i64 %v1, %c1 78 %add2 = add i64 %v2, %c2 79 80 store i64 %add1, ptr %b 81 %b2 = getelementptr i64, ptr %b, i32 1 82 store i64 %add2, ptr %b2 83 ret void 84} 85 86define void @test3(ptr %a, ptr %b, ptr %c) { 87; CHECK-LABEL: @test3( 88; CHECK-NEXT: [[C1:%.*]] = load i64, ptr [[C:%.*]], align 8 89; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro() 90; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8 91; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0 92; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[C2]], i32 1 93; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]] 94; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8 95; CHECK-NEXT: ret void 96; 97 %v1 = load i64, ptr %a 98 %c1 = load i64, ptr %c 99 %add1 = add i64 %v1, %c1 100 101 %a2 = getelementptr i64, ptr %a, i32 1 102 %v2 = load i64, ptr %a2 103 %c2 = call i64 @may_inf_loop_ro() 104 %add2 = add i64 %v2, %c2 105 106 store i64 %add1, ptr %b 107 %b2 = getelementptr i64, ptr %b, i32 1 108 store i64 %add2, ptr %b2 109 ret void 110} 111 112define void @test4(ptr %a, ptr %b, ptr %c) { 113; CHECK-LABEL: @test4( 114; CHECK-NEXT: [[C1:%.*]] = load i64, ptr [[C:%.*]], align 8 115; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro() 116; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8 117; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0 118; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[C2]], i32 1 119; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]] 120; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8 121; CHECK-NEXT: ret void 122; 123 %v1 = load i64, ptr %a 124 %c1 = load i64, ptr %c 125 %add1 = add i64 %v1, %c1 126 127 %c2 = call i64 @may_inf_loop_ro() 128 %a2 = getelementptr i64, ptr %a, i32 1 129 %v2 = load i64, ptr %a2 130 %add2 = add i64 %v2, %c2 131 132 store i64 %add1, ptr %b 133 %b2 = getelementptr i64, ptr %b, i32 1 134 store i64 %add2, ptr %b2 135 ret void 136} 137 138define void @test5(ptr %a, ptr %b, ptr %c) { 139; CHECK-LABEL: @test5( 140; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro() 141; CHECK-NEXT: [[C1:%.*]] = load i64, ptr [[C:%.*]], align 8 142; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8 143; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0 144; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[C2]], i32 1 145; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]] 146; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8 147; CHECK-NEXT: ret void 148; 149 %a2 = getelementptr i64, ptr %a, i32 1 150 %v2 = load i64, ptr %a2 151 %c2 = call i64 @may_inf_loop_ro() 152 %add2 = add i64 %v2, %c2 153 154 %v1 = load i64, ptr %a 155 %c1 = load i64, ptr %c 156 %add1 = add i64 %v1, %c1 157 158 store i64 %add1, ptr %b 159 %b2 = getelementptr i64, ptr %b, i32 1 160 store i64 %add2, ptr %b2 161 ret void 162} 163 164define void @test6(ptr %a, ptr %b, ptr %c) { 165; CHECK-LABEL: @test6( 166; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro() 167; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8 168; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8 169; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] 170; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8 171; CHECK-NEXT: ret void 172; 173 %v1 = load i64, ptr %a 174 call i64 @may_inf_loop_ro() 175 %a2 = getelementptr i64, ptr %a, i32 1 176 %v2 = load i64, ptr %a2 177 178 %c1 = load i64, ptr %c 179 %ca2 = getelementptr i64, ptr %c, i32 1 180 %c2 = load i64, ptr %ca2 181 %add1 = add i64 %v1, %c1 182 %add2 = add i64 %v2, %c2 183 184 store i64 %add1, ptr %b 185 %b2 = getelementptr i64, ptr %b, i32 1 186 store i64 %add2, ptr %b2 187 ret void 188} 189 190; In this case, we can't vectorize the load pair because there's no valid 191; scheduling point which respects both memory and control dependence. If 192; we scheduled the second load before the store holding the first one in place, 193; we'd have hoisted a potentially faulting load above a potentially infinite 194; call and thus have introduced a possible fault into a program which didn't 195; previously exist. 196define void @test7(ptr %a, ptr %b, ptr %c) { 197; CHECK-LABEL: @test7( 198; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[A:%.*]], i32 1 199; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[A]], align 8 200; CHECK-NEXT: store i64 0, ptr [[A]], align 8 201; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro() 202; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A2]], align 8 203; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8 204; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0 205; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[V2]], i32 1 206; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP2]] 207; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 8 208; CHECK-NEXT: ret void 209; 210 %v1 = load i64, ptr %a 211 store i64 0, ptr %a 212 call i64 @may_inf_loop_ro() 213 %a2 = getelementptr i64, ptr %a, i32 1 214 %v2 = load i64, ptr %a2 215 216 %c1 = load i64, ptr %c 217 %ca2 = getelementptr i64, ptr %c, i32 1 218 %c2 = load i64, ptr %ca2 219 %add1 = add i64 %v1, %c1 220 %add2 = add i64 %v2, %c2 221 222 store i64 %add1, ptr %b 223 %b2 = getelementptr i64, ptr %b, i32 1 224 store i64 %add2, ptr %b2 225 ret void 226} 227 228; Same as test7, but with a throwing call 229define void @test8(ptr %a, ptr %b, ptr %c) { 230; CHECK-LABEL: @test8( 231; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[A:%.*]], i32 1 232; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[A]], align 8 233; CHECK-NEXT: store i64 0, ptr [[A]], align 8 234; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_throw() #[[ATTR4:[0-9]+]] 235; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A2]], align 8 236; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8 237; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0 238; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[V2]], i32 1 239; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP2]] 240; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 8 241; CHECK-NEXT: ret void 242; 243 %v1 = load i64, ptr %a 244 store i64 0, ptr %a 245 call i64 @may_throw() readonly 246 %a2 = getelementptr i64, ptr %a, i32 1 247 %v2 = load i64, ptr %a2 248 249 %c1 = load i64, ptr %c 250 %ca2 = getelementptr i64, ptr %c, i32 1 251 %c2 = load i64, ptr %ca2 252 %add1 = add i64 %v1, %c1 253 %add2 = add i64 %v2, %c2 254 255 store i64 %add1, ptr %b 256 %b2 = getelementptr i64, ptr %b, i32 1 257 store i64 %add2, ptr %b2 258 ret void 259} 260 261; Same as test8, but with a readwrite maythrow call 262define void @test9(ptr %a, ptr %b, ptr %c) { 263; CHECK-LABEL: @test9( 264; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[A:%.*]], i32 1 265; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[A]], align 8 266; CHECK-NEXT: store i64 0, ptr [[A]], align 8 267; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_throw() 268; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A2]], align 8 269; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8 270; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0 271; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[V2]], i32 1 272; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP2]] 273; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 8 274; CHECK-NEXT: ret void 275; 276 %v1 = load i64, ptr %a 277 store i64 0, ptr %a 278 call i64 @may_throw() 279 %a2 = getelementptr i64, ptr %a, i32 1 280 %v2 = load i64, ptr %a2 281 282 %c1 = load i64, ptr %c 283 %ca2 = getelementptr i64, ptr %c, i32 1 284 %c2 = load i64, ptr %ca2 285 %add1 = add i64 %v1, %c1 286 %add2 = add i64 %v2, %c2 287 288 store i64 %add1, ptr %b 289 %b2 = getelementptr i64, ptr %b, i32 1 290 store i64 %add2, ptr %b2 291 ret void 292} 293 294; A variant of test7 which shows the same problem with a non-load instruction 295define void @test10(ptr %a, ptr %b, ptr %c) { 296; CHECK-LABEL: @test10( 297; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[A:%.*]], align 8 298; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[A]], i32 1 299; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A2]], align 8 300; CHECK-NEXT: [[U1:%.*]] = udiv i64 200, [[V1]] 301; CHECK-NEXT: store i64 [[U1]], ptr [[A]], align 8 302; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro() 303; CHECK-NEXT: [[U2:%.*]] = udiv i64 200, [[V2]] 304; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8 305; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[U1]], i32 0 306; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[U2]], i32 1 307; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP2]] 308; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 8 309; CHECK-NEXT: ret void 310; 311 %v1 = load i64, ptr %a 312 %a2 = getelementptr i64, ptr %a, i32 1 313 %v2 = load i64, ptr %a2 314 315 %u1 = udiv i64 200, %v1 316 store i64 %u1, ptr %a 317 call i64 @may_inf_loop_ro() 318 %u2 = udiv i64 200, %v2 319 320 %c1 = load i64, ptr %c 321 %ca2 = getelementptr i64, ptr %c, i32 1 322 %c2 = load i64, ptr %ca2 323 %add1 = add i64 %u1, %c1 324 %add2 = add i64 %u2, %c2 325 326 store i64 %add1, ptr %b 327 %b2 = getelementptr i64, ptr %b, i32 1 328 store i64 %add2, ptr %b2 329 ret void 330} 331 332; Variant of test10 block invariant operands to the udivs 333; FIXME: This is wrong, we're hoisting a faulting udiv above an infinite loop. 334define void @test11(i64 %x, i64 %y, ptr %b, ptr %c) { 335; CHECK-LABEL: @test11( 336; CHECK-NEXT: [[U1:%.*]] = udiv i64 200, [[X:%.*]] 337; CHECK-NEXT: store i64 [[U1]], ptr [[B:%.*]], align 8 338; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro() 339; CHECK-NEXT: [[U2:%.*]] = udiv i64 200, [[Y:%.*]] 340; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8 341; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[U1]], i32 0 342; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[U2]], i32 1 343; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP2]] 344; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[B]], align 8 345; CHECK-NEXT: ret void 346; 347 %u1 = udiv i64 200, %x 348 store i64 %u1, ptr %b 349 call i64 @may_inf_loop_ro() 350 %u2 = udiv i64 200, %y 351 352 %c1 = load i64, ptr %c 353 %ca2 = getelementptr i64, ptr %c, i32 1 354 %c2 = load i64, ptr %ca2 355 %add1 = add i64 %u1, %c1 356 %add2 = add i64 %u2, %c2 357 358 store i64 %add1, ptr %b 359 %b2 = getelementptr i64, ptr %b, i32 1 360 store i64 %add2, ptr %b2 361 ret void 362} 363