1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=slp-vectorizer,instcombine -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck %s 3 4define void @test1(ptr %in, ptr %out) { 5; CHECK-LABEL: @test1( 6; CHECK-NEXT: entry: 7; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 1 8; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i32> [[TMP0]] to <8 x i64> 9; CHECK-NEXT: store <8 x i64> [[TMP1]], ptr [[OUT:%.*]], align 8 10; CHECK-NEXT: ret void 11; 12entry: 13 %0 = load <8 x i32>, ptr %in, align 1 14 %1 = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 15 %2 = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 16 %3 = zext <4 x i32> %1 to <4 x i64> 17 %4 = zext <4 x i32> %2 to <4 x i64> 18 %5 = shufflevector <4 x i64> %3, <4 x i64> poison, <2 x i32> <i32 0, i32 1> 19 %6 = shufflevector <4 x i64> %3, <4 x i64> poison, <2 x i32> <i32 2, i32 3> 20 %7 = shufflevector <4 x i64> %4, <4 x i64> poison, <2 x i32> <i32 0, i32 1> 21 %8 = shufflevector <4 x i64> %4, <4 x i64> poison, <2 x i32> <i32 2, i32 3> 22 %9 = getelementptr inbounds i64, ptr %out, i64 0 23 %10 = getelementptr inbounds i64, ptr %out, i64 2 24 %11 = getelementptr inbounds i64, ptr %out, i64 4 25 %12 = getelementptr inbounds i64, ptr %out, i64 6 26 store <2 x i64> %5, ptr %9, align 8 27 store <2 x i64> %6, ptr %10, align 8 28 store <2 x i64> %7, ptr %11, align 8 29 store <2 x i64> %8, ptr %12, align 8 30 ret void 31} 32 33define void @test2(ptr %in, ptr %out) { 34; CHECK-LABEL: @test2( 35; CHECK-NEXT: entry: 36; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 1 37; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i32> [[TMP0]] to <8 x i64> 38; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7> 39; CHECK-NEXT: store <8 x i64> [[TMP2]], ptr [[OUT:%.*]], align 8 40; CHECK-NEXT: ret void 41; 42entry: 43 %0 = load <8 x i32>, ptr %in, align 1 44 %1 = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 45 %2 = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 46 %3 = zext <4 x i32> %1 to <4 x i64> 47 %4 = zext <4 x i32> %2 to <4 x i64> 48 %5 = shufflevector <4 x i64> %3, <4 x i64> poison, <2 x i32> <i32 2, i32 3> 49 %6 = shufflevector <4 x i64> %3, <4 x i64> poison, <2 x i32> <i32 0, i32 1> 50 %7 = shufflevector <4 x i64> %4, <4 x i64> poison, <2 x i32> <i32 0, i32 1> 51 %8 = shufflevector <4 x i64> %4, <4 x i64> poison, <2 x i32> <i32 2, i32 3> 52 %9 = getelementptr inbounds i64, ptr %out, i64 0 53 %10 = getelementptr inbounds i64, ptr %out, i64 2 54 %11 = getelementptr inbounds i64, ptr %out, i64 4 55 %12 = getelementptr inbounds i64, ptr %out, i64 6 56 store <2 x i64> %5, ptr %9, align 8 57 store <2 x i64> %6, ptr %10, align 8 58 store <2 x i64> %7, ptr %11, align 8 59 store <2 x i64> %8, ptr %12, align 8 60 ret void 61} 62 63define void @test3(<16 x i32> %0, ptr %out) { 64; CHECK-LABEL: @test3( 65; CHECK-NEXT: entry: 66; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0:%.*]], <16 x i32> poison, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 67; CHECK-NEXT: store <16 x i32> [[TMP1]], ptr [[OUT:%.*]], align 4 68; CHECK-NEXT: ret void 69; 70entry: 71 %1 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> <i32 12, i32 13, i32 14, i32 15> 72 %2 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11> 73 %3 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 74 %4 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 75 %5 = getelementptr inbounds i32, ptr %out, i64 0 76 %6 = getelementptr inbounds i32, ptr %out, i64 4 77 %7 = getelementptr inbounds i32, ptr %out, i64 8 78 %8 = getelementptr inbounds i32, ptr %out, i64 12 79 store <4 x i32> %1, ptr %5, align 4 80 store <4 x i32> %2, ptr %6, align 4 81 store <4 x i32> %3, ptr %7, align 4 82 store <4 x i32> %4, ptr %8, align 4 83 ret void 84} 85 86define void @test4(ptr %in, ptr %out) { 87; CHECK-LABEL: @test4( 88; CHECK-NEXT: entry: 89; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 4 90; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 91; CHECK-NEXT: store <16 x i32> [[TMP1]], ptr [[OUT:%.*]], align 4 92; CHECK-NEXT: ret void 93; 94entry: 95 %0 = load <8 x i32>, ptr %in, align 4 96 %1 = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 97 %2 = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 98 %3 = getelementptr inbounds i32, ptr %out, i64 0 99 %4 = getelementptr inbounds i32, ptr %out, i64 4 100 %5 = getelementptr inbounds i32, ptr %out, i64 8 101 %6 = getelementptr inbounds i32, ptr %out, i64 12 102 store <4 x i32> %1, ptr %3, align 4 103 store <4 x i32> %2, ptr %4, align 4 104 store <4 x i32> %1, ptr %5, align 4 105 store <4 x i32> %2, ptr %6, align 4 106 ret void 107} 108 109define void @test5(ptr %out) { 110; CHECK-LABEL: @test5( 111; CHECK-NEXT: entry: 112; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr [[OUT:%.*]], align 4 113; CHECK-NEXT: ret void 114; 115entry: 116 %0 = shufflevector <8 x i32> zeroinitializer, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 117 %1 = shufflevector <8 x i32> zeroinitializer, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 118 %2 = getelementptr inbounds i32, ptr %out, i64 0 119 %3 = getelementptr inbounds i32, ptr %out, i64 4 120 store <4 x i32> %0, ptr %2, align 4 121 store <4 x i32> %1, ptr %3, align 4 122 ret void 123} 124 125define void @test6(ptr %in0, ptr %in1, ptr %in2) { 126; CHECK-LABEL: @test6( 127; CHECK-NEXT: entry: 128; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds nuw i8, ptr [[IN0:%.*]], i64 32 129; CHECK-NEXT: [[LOAD2:%.*]] = load <4 x float>, ptr [[GEP1]], align 16 130; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[IN0]], align 16 131; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr [[IN1:%.*]], align 1 132; CHECK-NEXT: [[TMP2:%.*]] = uitofp <32 x i8> [[TMP1]] to <32 x float> 133; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 134; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x float> [[LOAD2]], <4 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 135; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x float> [[TMP14]], <16 x float> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison> 136; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[TMP16]], <16 x float> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 137; CHECK-NEXT: [[TMP4:%.*]] = fmul <32 x float> [[TMP3]], [[TMP2]] 138; CHECK-NEXT: store <32 x float> [[TMP4]], ptr [[IN2:%.*]], align 16 139; CHECK-NEXT: [[GEP10:%.*]] = getelementptr inbounds nuw i8, ptr [[IN1]], i64 32 140; CHECK-NEXT: [[LOAD5:%.*]] = load <16 x i8>, ptr [[GEP10]], align 1 141; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 142; CHECK-NEXT: [[GEP11:%.*]] = getelementptr inbounds nuw i8, ptr [[IN2]], i64 128 143; CHECK-NEXT: [[TMP6:%.*]] = uitofp <16 x i8> [[LOAD5]] to <16 x float> 144; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[LOAD2]], <4 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 145; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 146; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> [[TMP8]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 147; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 148; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x float> [[TMP9]], <16 x float> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison> 149; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3> 150; CHECK-NEXT: [[TMP13:%.*]] = fmul <16 x float> [[TMP12]], [[TMP6]] 151; CHECK-NEXT: store <16 x float> [[TMP13]], ptr [[GEP11]], align 16 152; CHECK-NEXT: ret void 153; 154entry: 155 %gep0 = getelementptr inbounds i8, ptr %in0, i64 16 156 %gep1 = getelementptr inbounds i8, ptr %in0, i64 32 157 %load0 = load <4 x float>, ptr %in0, align 16 158 %load1 = load <4 x float>, ptr %gep0, align 16 159 %load2 = load <4 x float>, ptr %gep1, align 16 160 %gep2 = getelementptr inbounds i8, ptr %in1, i64 16 161 %load3 = load <16 x i8>, ptr %in1, align 1 162 %load4 = load <16 x i8>, ptr %gep2, align 1 163 %shufflevector0 = shufflevector <16 x i8> %load3, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 164 %shufflevector1 = shufflevector <16 x i8> %load3, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 165 %shufflevector2 = shufflevector <16 x i8> %load4, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 166 %shufflevector3 = shufflevector <16 x i8> %load4, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 167 %zext0 = zext <8 x i8> %shufflevector0 to <8 x i16> 168 %zext1 = zext <8 x i8> %shufflevector1 to <8 x i16> 169 %zext2 = zext <8 x i8> %shufflevector2 to <8 x i16> 170 %zext3 = zext <8 x i8> %shufflevector3 to <8 x i16> 171 %shufflevector4 = shufflevector <8 x i16> %zext0, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 172 %shufflevector5 = shufflevector <8 x i16> %zext0, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 173 %shufflevector6 = shufflevector <8 x i16> %zext1, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 174 %shufflevector7 = shufflevector <8 x i16> %zext1, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 175 %shufflevector8 = shufflevector <8 x i16> %zext2, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 176 %shufflevector9 = shufflevector <8 x i16> %zext2, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 177 %shufflevector10 = shufflevector <8 x i16> %zext3, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 178 %shufflevector11 = shufflevector <8 x i16> %zext3, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 179 %uitofp0 = uitofp nneg <4 x i16> %shufflevector4 to <4 x float> 180 %uitofp1 = uitofp nneg <4 x i16> %shufflevector5 to <4 x float> 181 %uitofp2 = uitofp nneg <4 x i16> %shufflevector6 to <4 x float> 182 %uitofp3 = uitofp nneg <4 x i16> %shufflevector7 to <4 x float> 183 %uitofp4 = uitofp nneg <4 x i16> %shufflevector8 to <4 x float> 184 %uitofp5 = uitofp nneg <4 x i16> %shufflevector9 to <4 x float> 185 %uitofp6 = uitofp nneg <4 x i16> %shufflevector10 to <4 x float> 186 %uitofp7 = uitofp nneg <4 x i16> %shufflevector11 to <4 x float> 187 %fmul0 = fmul <4 x float> %load0, %uitofp0 188 %fmul1 = fmul <4 x float> %load1, %uitofp1 189 %fmul2 = fmul <4 x float> %load2, %uitofp2 190 %fmul3 = fmul <4 x float> %load0, %uitofp3 191 %fmul4 = fmul <4 x float> %load1, %uitofp4 192 %fmul5 = fmul <4 x float> %load2, %uitofp5 193 %fmul6 = fmul <4 x float> %load0, %uitofp6 194 %fmul7 = fmul <4 x float> %load1, %uitofp7 195 %gep3 = getelementptr inbounds i8, ptr %in2, i64 16 196 %gep4 = getelementptr inbounds i8, ptr %in2, i64 32 197 %gep5 = getelementptr inbounds i8, ptr %in2, i64 48 198 %gep6 = getelementptr inbounds i8, ptr %in2, i64 64 199 %gep7 = getelementptr inbounds i8, ptr %in2, i64 80 200 %gep8 = getelementptr inbounds i8, ptr %in2, i64 96 201 %gep9 = getelementptr inbounds i8, ptr %in2, i64 112 202 store <4 x float> %fmul0, ptr %in2, align 16 203 store <4 x float> %fmul1, ptr %gep3, align 16 204 store <4 x float> %fmul2, ptr %gep4, align 16 205 store <4 x float> %fmul3, ptr %gep5, align 16 206 store <4 x float> %fmul4, ptr %gep6, align 16 207 store <4 x float> %fmul5, ptr %gep7, align 16 208 store <4 x float> %fmul6, ptr %gep8, align 16 209 store <4 x float> %fmul7, ptr %gep9, align 16 210 %gep10 = getelementptr inbounds i8, ptr %in1, i64 32 211 %load5 = load <16 x i8>, ptr %gep10, align 1 212 %shufflevector12 = shufflevector <16 x i8> %load5, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 213 %shufflevector13 = shufflevector <16 x i8> %load5, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 214 %zext4 = zext <8 x i8> %shufflevector12 to <8 x i16> 215 %zext5 = zext <8 x i8> %shufflevector13 to <8 x i16> 216 %shufflevector14 = shufflevector <8 x i16> %zext4, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 217 %shufflevector15 = shufflevector <8 x i16> %zext4, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 218 %shufflevector16 = shufflevector <8 x i16> %zext5, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 219 %shufflevector17 = shufflevector <8 x i16> %zext5, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 220 %uitofp8 = uitofp nneg <4 x i16> %shufflevector14 to <4 x float> 221 %uitofp9 = uitofp nneg <4 x i16> %shufflevector15 to <4 x float> 222 %uitofp10 = uitofp nneg <4 x i16> %shufflevector16 to <4 x float> 223 %uitofp11 = uitofp nneg <4 x i16> %shufflevector17 to <4 x float> 224 %fmul8 = fmul <4 x float> %load2, %uitofp8 225 %fmul9 = fmul <4 x float> %load0, %uitofp9 226 %fmul10 = fmul <4 x float> %load1, %uitofp10 227 %fmul11 = fmul <4 x float> %load2, %uitofp11 228 %gep11 = getelementptr inbounds i8, ptr %in2, i64 128 229 %gep12 = getelementptr inbounds i8, ptr %in2, i64 144 230 %gep13 = getelementptr inbounds i8, ptr %in2, i64 160 231 %gep14 = getelementptr inbounds i8, ptr %in2, i64 176 232 store <4 x float> %fmul8, ptr %gep11, align 16 233 store <4 x float> %fmul9, ptr %gep12, align 16 234 store <4 x float> %fmul10, ptr %gep13, align 16 235 store <4 x float> %fmul11, ptr %gep14, align 16 236 ret void 237} 238