1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 2; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE 3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX 4declare float @llvm.maximum.f32(float, float) 5declare float @llvm.minimum.f32(float, float) 6declare double @llvm.maximum.f64(double, double) 7declare double @llvm.minimum.f64(double, double) 8 9@srcA64 = common global [8 x double] zeroinitializer, align 64 10@srcB64 = common global [8 x double] zeroinitializer, align 64 11@srcC64 = common global [8 x double] zeroinitializer, align 64 12@srcA32 = common global [16 x float] zeroinitializer, align 64 13@srcB32 = common global [16 x float] zeroinitializer, align 64 14@srcC32 = common global [16 x float] zeroinitializer, align 64 15@dst64 = common global [8 x double] zeroinitializer, align 64 16@dst32 = common global [16 x float] zeroinitializer, align 64 17 18define void @fmaximum_2f64() { 19; SSE-LABEL: define void @fmaximum_2f64() { 20; SSE-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 8 21; SSE-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 8 22; SSE-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]]) 23; SSE-NEXT: store <2 x double> [[TMP3]], ptr @dst64, align 8 24; SSE-NEXT: ret void 25; 26; AVX-LABEL: define void @fmaximum_2f64 27; AVX-SAME: () #[[ATTR1:[0-9]+]] { 28; AVX-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 8 29; AVX-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 8 30; AVX-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]]) 31; AVX-NEXT: store <2 x double> [[TMP3]], ptr @dst64, align 8 32; AVX-NEXT: ret void 33; 34 %a0 = load double, ptr @srcA64, align 8 35 %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8 36 %b0 = load double, ptr @srcB64, align 8 37 %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8 38 %fmaximum0 = call double @llvm.maximum.f64(double %a0, double %b0) 39 %fmaximum1 = call double @llvm.maximum.f64(double %a1, double %b1) 40 store double %fmaximum0, ptr @dst64, align 8 41 store double %fmaximum1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8 42 ret void 43} 44 45define void @fmaximum_4f64() { 46; SSE-LABEL: define void @fmaximum_4f64() { 47; SSE-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 8 48; SSE-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 8 49; SSE-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]]) 50; SSE-NEXT: store <2 x double> [[TMP3]], ptr @dst64, align 8 51; SSE-NEXT: [[TMP4:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 8 52; SSE-NEXT: [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 8 53; SSE-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]]) 54; SSE-NEXT: store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8 55; SSE-NEXT: ret void 56; 57; AVX-LABEL: define void @fmaximum_4f64 58; AVX-SAME: () #[[ATTR1]] { 59; AVX-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr @srcA64, align 8 60; AVX-NEXT: [[TMP2:%.*]] = load <4 x double>, ptr @srcB64, align 8 61; AVX-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.maximum.v4f64(<4 x double> [[TMP1]], <4 x double> [[TMP2]]) 62; AVX-NEXT: store <4 x double> [[TMP3]], ptr @dst64, align 8 63; AVX-NEXT: ret void 64; 65 %a0 = load double, ptr @srcA64, align 8 66 %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8 67 %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 8 68 %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 3), align 8 69 %b0 = load double, ptr @srcB64, align 8 70 %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8 71 %b2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 8 72 %b3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 3), align 8 73 %fmaximum0 = call double @llvm.maximum.f64(double %a0, double %b0) 74 %fmaximum1 = call double @llvm.maximum.f64(double %a1, double %b1) 75 %fmaximum2 = call double @llvm.maximum.f64(double %a2, double %b2) 76 %fmaximum3 = call double @llvm.maximum.f64(double %a3, double %b3) 77 store double %fmaximum0, ptr @dst64, align 8 78 store double %fmaximum1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8 79 store double %fmaximum2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8 80 store double %fmaximum3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8 81 ret void 82} 83 84define void @fmaximum_8f64() { 85; SSE-LABEL: define void @fmaximum_8f64() { 86; SSE-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 4 87; SSE-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 4 88; SSE-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]]) 89; SSE-NEXT: store <2 x double> [[TMP3]], ptr @dst64, align 4 90; SSE-NEXT: [[TMP4:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 4 91; SSE-NEXT: [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 4 92; SSE-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]]) 93; SSE-NEXT: store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 4 94; SSE-NEXT: [[TMP7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 4), align 4 95; SSE-NEXT: [[TMP8:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 4), align 4 96; SSE-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]]) 97; SSE-NEXT: store <2 x double> [[TMP9]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4 98; SSE-NEXT: [[TMP10:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 6), align 4 99; SSE-NEXT: [[TMP11:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 6), align 4 100; SSE-NEXT: [[TMP12:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP10]], <2 x double> [[TMP11]]) 101; SSE-NEXT: store <2 x double> [[TMP12]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 4 102; SSE-NEXT: ret void 103; 104; AVX-LABEL: define void @fmaximum_8f64 105; AVX-SAME: () #[[ATTR1]] { 106; AVX-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr @srcA64, align 4 107; AVX-NEXT: [[TMP2:%.*]] = load <4 x double>, ptr @srcB64, align 4 108; AVX-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.maximum.v4f64(<4 x double> [[TMP1]], <4 x double> [[TMP2]]) 109; AVX-NEXT: store <4 x double> [[TMP3]], ptr @dst64, align 4 110; AVX-NEXT: [[TMP4:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 4), align 4 111; AVX-NEXT: [[TMP5:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 4), align 4 112; AVX-NEXT: [[TMP6:%.*]] = call <4 x double> @llvm.maximum.v4f64(<4 x double> [[TMP4]], <4 x double> [[TMP5]]) 113; AVX-NEXT: store <4 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4 114; AVX-NEXT: ret void 115; 116 %a0 = load double, ptr @srcA64, align 4 117 %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 4 118 %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 4 119 %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 3), align 4 120 %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 4), align 4 121 %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 5), align 4 122 %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 6), align 4 123 %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 7), align 4 124 %b0 = load double, ptr @srcB64, align 4 125 %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 4 126 %b2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 4 127 %b3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 3), align 4 128 %b4 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 4), align 4 129 %b5 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 5), align 4 130 %b6 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 6), align 4 131 %b7 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 7), align 4 132 %fmaximum0 = call double @llvm.maximum.f64(double %a0, double %b0) 133 %fmaximum1 = call double @llvm.maximum.f64(double %a1, double %b1) 134 %fmaximum2 = call double @llvm.maximum.f64(double %a2, double %b2) 135 %fmaximum3 = call double @llvm.maximum.f64(double %a3, double %b3) 136 %fmaximum4 = call double @llvm.maximum.f64(double %a4, double %b4) 137 %fmaximum5 = call double @llvm.maximum.f64(double %a5, double %b5) 138 %fmaximum6 = call double @llvm.maximum.f64(double %a6, double %b6) 139 %fmaximum7 = call double @llvm.maximum.f64(double %a7, double %b7) 140 store double %fmaximum0, ptr @dst64, align 4 141 store double %fmaximum1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 4 142 store double %fmaximum2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 4 143 store double %fmaximum3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 4 144 store double %fmaximum4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4 145 store double %fmaximum5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 4 146 store double %fmaximum6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 4 147 store double %fmaximum7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 4 148 ret void 149} 150 151define double @reduction_v2f64(ptr %p) { 152; SSE-LABEL: define double @reduction_v2f64 153; SSE-SAME: (ptr [[P:%.*]]) { 154; SSE-NEXT: [[G1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 1 155; SSE-NEXT: [[T0:%.*]] = load double, ptr [[P]], align 4 156; SSE-NEXT: [[T1:%.*]] = load double, ptr [[G1]], align 4 157; SSE-NEXT: [[M1:%.*]] = tail call double @llvm.maximum.f64(double [[T1]], double [[T0]]) 158; SSE-NEXT: ret double [[M1]] 159; 160; AVX-LABEL: define double @reduction_v2f64 161; AVX-SAME: (ptr [[P:%.*]]) #[[ATTR1]] { 162; AVX-NEXT: [[G1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 1 163; AVX-NEXT: [[T0:%.*]] = load double, ptr [[P]], align 4 164; AVX-NEXT: [[T1:%.*]] = load double, ptr [[G1]], align 4 165; AVX-NEXT: [[M1:%.*]] = tail call double @llvm.maximum.f64(double [[T1]], double [[T0]]) 166; AVX-NEXT: ret double [[M1]] 167; 168 %g1 = getelementptr inbounds double, ptr %p, i64 1 169 %t0 = load double, ptr %p, align 4 170 %t1 = load double, ptr %g1, align 4 171 %m1 = tail call double @llvm.maximum.f64(double %t1, double %t0) 172 ret double %m1 173} 174 175define float @reduction_v4f32(ptr %p) { 176; SSE-LABEL: define float @reduction_v4f32 177; SSE-SAME: (ptr [[P:%.*]]) { 178; SSE-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P]], align 4 179; SSE-NEXT: [[TMP2:%.*]] = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[TMP1]]) 180; SSE-NEXT: ret float [[TMP2]] 181; 182; AVX-LABEL: define float @reduction_v4f32 183; AVX-SAME: (ptr [[P:%.*]]) #[[ATTR1]] { 184; AVX-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P]], align 4 185; AVX-NEXT: [[TMP2:%.*]] = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[TMP1]]) 186; AVX-NEXT: ret float [[TMP2]] 187; 188 %g1 = getelementptr inbounds float, ptr %p, i64 1 189 %g2 = getelementptr inbounds float, ptr %p, i64 2 190 %g3 = getelementptr inbounds float, ptr %p, i64 3 191 %t0 = load float, ptr %p, align 4 192 %t1 = load float, ptr %g1, align 4 193 %t2 = load float, ptr %g2, align 4 194 %t3 = load float, ptr %g3, align 4 195 %m1 = tail call float @llvm.maximum.f32(float %t1, float %t0) 196 %m2 = tail call float @llvm.maximum.f32(float %t2, float %m1) 197 %m3 = tail call float @llvm.maximum.f32(float %t3, float %m2) 198 ret float %m3 199} 200 201define double @reduction_v4f64_fminimum(ptr %p) { 202; SSE-LABEL: define double @reduction_v4f64_fminimum 203; SSE-SAME: (ptr [[P:%.*]]) { 204; SSE-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr [[P]], align 4 205; SSE-NEXT: [[TMP2:%.*]] = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> [[TMP1]]) 206; SSE-NEXT: ret double [[TMP2]] 207; 208; AVX-LABEL: define double @reduction_v4f64_fminimum 209; AVX-SAME: (ptr [[P:%.*]]) #[[ATTR1]] { 210; AVX-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr [[P]], align 4 211; AVX-NEXT: [[TMP2:%.*]] = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> [[TMP1]]) 212; AVX-NEXT: ret double [[TMP2]] 213; 214 %g1 = getelementptr inbounds double, ptr %p, i64 1 215 %g2 = getelementptr inbounds double, ptr %p, i64 2 216 %g3 = getelementptr inbounds double, ptr %p, i64 3 217 %t0 = load double, ptr %p, align 4 218 %t1 = load double, ptr %g1, align 4 219 %t2 = load double, ptr %g2, align 4 220 %t3 = load double, ptr %g3, align 4 221 %m1 = tail call double @llvm.minimum.f64(double %t1, double %t0) 222 %m2 = tail call double @llvm.minimum.f64(double %t2, double %m1) 223 %m3 = tail call double @llvm.minimum.f64(double %t3, double %m2) 224 ret double %m3 225} 226 227define float @reduction_v8f32_fminimum(ptr %p) { 228; SSE-LABEL: define float @reduction_v8f32_fminimum 229; SSE-SAME: (ptr [[P:%.*]]) { 230; SSE-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[P]], align 4 231; SSE-NEXT: [[TMP2:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[TMP1]]) 232; SSE-NEXT: ret float [[TMP2]] 233; 234; AVX-LABEL: define float @reduction_v8f32_fminimum 235; AVX-SAME: (ptr [[P:%.*]]) #[[ATTR1]] { 236; AVX-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[P]], align 4 237; AVX-NEXT: [[TMP2:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[TMP1]]) 238; AVX-NEXT: ret float [[TMP2]] 239; 240 %g1 = getelementptr inbounds float, ptr %p, i64 1 241 %g2 = getelementptr inbounds float, ptr %p, i64 2 242 %g3 = getelementptr inbounds float, ptr %p, i64 3 243 %g4 = getelementptr inbounds float, ptr %p, i64 4 244 %g5 = getelementptr inbounds float, ptr %p, i64 5 245 %g6 = getelementptr inbounds float, ptr %p, i64 6 246 %g7 = getelementptr inbounds float, ptr %p, i64 7 247 %t0 = load float, ptr %p, align 4 248 %t1 = load float, ptr %g1, align 4 249 %t2 = load float, ptr %g2, align 4 250 %t3 = load float, ptr %g3, align 4 251 %t4 = load float, ptr %g4, align 4 252 %t5 = load float, ptr %g5, align 4 253 %t6 = load float, ptr %g6, align 4 254 %t7 = load float, ptr %g7, align 4 255 %m1 = tail call float @llvm.minimum.f32(float %t1, float %t0) 256 %m2 = tail call float @llvm.minimum.f32(float %t2, float %m1) 257 %m3 = tail call float @llvm.minimum.f32(float %t3, float %m2) 258 %m4 = tail call float @llvm.minimum.f32(float %t4, float %m3) 259 %m5 = tail call float @llvm.minimum.f32(float %m4, float %t6) 260 %m6 = tail call float @llvm.minimum.f32(float %m5, float %t5) 261 %m7 = tail call float @llvm.minimum.f32(float %m6, float %t7) 262 ret float %m7 263} 264