1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 2; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z15 -passes=slp-vectorizer %s -S -o - \ 3; RUN: | FileCheck %s 4 5; Test vectorization and reassociation of fmin/fmax operations. Vectorization 6; is more profitable if the loads are also vectorizable. 7 8define double @fmin_double_4_nums_seq(ptr nocapture noundef readonly %x) { 9; CHECK-LABEL: define double @fmin_double_4_nums_seq( 10; CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]]) #[[ATTR0:[0-9]+]] { 11; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr [[X]], align 4 12; CHECK-NEXT: [[TMP2:%.*]] = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> [[TMP1]]) 13; CHECK-NEXT: ret double [[TMP2]] 14; 15 %g1 = getelementptr inbounds double, ptr %x, i64 1 16 %g2 = getelementptr inbounds double, ptr %x, i64 2 17 %g3 = getelementptr inbounds double, ptr %x, i64 3 18 %t0 = load double, ptr %x, align 4 19 %t1 = load double, ptr %g1, align 4 20 %t2 = load double, ptr %g2, align 4 21 %t3 = load double, ptr %g3, align 4 22 %m1 = tail call fast double @llvm.minnum.f64(double %t1, double %t0) 23 %m2 = tail call fast double @llvm.minnum.f64(double %t2, double %m1) 24 %m3 = tail call fast double @llvm.minnum.f64(double %t3, double %m2) 25 ret double %m3 26} 27 28define double @fmin_double_16_nums_nonseq(ptr nocapture noundef readonly %x) { 29; CHECK-LABEL: define double @fmin_double_16_nums_nonseq( 30; CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]]) #[[ATTR0]] { 31; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2 32; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 4 33; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 6 34; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 8 35; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds double, ptr [[X]], i64 10 36; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 12 37; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds double, ptr [[X]], i64 14 38; CHECK-NEXT: [[G8:%.*]] = getelementptr inbounds double, ptr [[X]], i64 16 39; CHECK-NEXT: [[G9:%.*]] = getelementptr inbounds double, ptr [[X]], i64 18 40; CHECK-NEXT: [[G10:%.*]] = getelementptr inbounds double, ptr [[X]], i64 20 41; CHECK-NEXT: [[G11:%.*]] = getelementptr inbounds double, ptr [[X]], i64 22 42; CHECK-NEXT: [[G12:%.*]] = getelementptr inbounds double, ptr [[X]], i64 24 43; CHECK-NEXT: [[G13:%.*]] = getelementptr inbounds double, ptr [[X]], i64 26 44; CHECK-NEXT: [[G14:%.*]] = getelementptr inbounds double, ptr [[X]], i64 28 45; CHECK-NEXT: [[G15:%.*]] = getelementptr inbounds double, ptr [[X]], i64 30 46; CHECK-NEXT: [[T0:%.*]] = load double, ptr [[X]], align 4 47; CHECK-NEXT: [[T1:%.*]] = load double, ptr [[G1]], align 4 48; CHECK-NEXT: [[T2:%.*]] = load double, ptr [[G2]], align 4 49; CHECK-NEXT: [[T3:%.*]] = load double, ptr [[G3]], align 4 50; CHECK-NEXT: [[T4:%.*]] = load double, ptr [[G4]], align 4 51; CHECK-NEXT: [[T5:%.*]] = load double, ptr [[G5]], align 4 52; CHECK-NEXT: [[T6:%.*]] = load double, ptr [[G6]], align 4 53; CHECK-NEXT: [[T7:%.*]] = load double, ptr [[G7]], align 4 54; CHECK-NEXT: [[T8:%.*]] = load double, ptr [[G8]], align 4 55; CHECK-NEXT: [[T9:%.*]] = load double, ptr [[G9]], align 4 56; CHECK-NEXT: [[T10:%.*]] = load double, ptr [[G10]], align 4 57; CHECK-NEXT: [[T11:%.*]] = load double, ptr [[G11]], align 4 58; CHECK-NEXT: [[T12:%.*]] = load double, ptr [[G12]], align 4 59; CHECK-NEXT: [[T13:%.*]] = load double, ptr [[G13]], align 4 60; CHECK-NEXT: [[T14:%.*]] = load double, ptr [[G14]], align 4 61; CHECK-NEXT: [[T15:%.*]] = load double, ptr [[G15]], align 4 62; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x double> poison, double [[T1]], i32 0 63; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x double> [[TMP1]], double [[T0]], i32 1 64; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x double> [[TMP2]], double [[T2]], i32 2 65; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x double> [[TMP3]], double [[T3]], i32 3 66; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x double> [[TMP4]], double [[T4]], i32 4 67; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x double> [[TMP5]], double [[T5]], i32 5 68; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x double> [[TMP6]], double [[T6]], i32 6 69; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x double> [[TMP7]], double [[T7]], i32 7 70; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x double> [[TMP8]], double [[T8]], i32 8 71; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x double> [[TMP9]], double [[T9]], i32 9 72; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x double> [[TMP10]], double [[T10]], i32 10 73; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x double> [[TMP11]], double [[T11]], i32 11 74; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x double> [[TMP12]], double [[T12]], i32 12 75; CHECK-NEXT: [[TMP14:%.*]] = insertelement <16 x double> [[TMP13]], double [[T13]], i32 13 76; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x double> [[TMP14]], double [[T14]], i32 14 77; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x double> [[TMP15]], double [[T15]], i32 15 78; CHECK-NEXT: [[TMP17:%.*]] = call fast double @llvm.vector.reduce.fmin.v16f64(<16 x double> [[TMP16]]) 79; CHECK-NEXT: ret double [[TMP17]] 80; 81 %g1 = getelementptr inbounds double, ptr %x, i64 2 82 %g2 = getelementptr inbounds double, ptr %x, i64 4 83 %g3 = getelementptr inbounds double, ptr %x, i64 6 84 %g4 = getelementptr inbounds double, ptr %x, i64 8 85 %g5 = getelementptr inbounds double, ptr %x, i64 10 86 %g6 = getelementptr inbounds double, ptr %x, i64 12 87 %g7 = getelementptr inbounds double, ptr %x, i64 14 88 %g8 = getelementptr inbounds double, ptr %x, i64 16 89 %g9 = getelementptr inbounds double, ptr %x, i64 18 90 %g10 = getelementptr inbounds double, ptr %x, i64 20 91 %g11 = getelementptr inbounds double, ptr %x, i64 22 92 %g12 = getelementptr inbounds double, ptr %x, i64 24 93 %g13 = getelementptr inbounds double, ptr %x, i64 26 94 %g14 = getelementptr inbounds double, ptr %x, i64 28 95 %g15 = getelementptr inbounds double, ptr %x, i64 30 96 %t0 = load double, ptr %x, align 4 97 %t1 = load double, ptr %g1, align 4 98 %t2 = load double, ptr %g2, align 4 99 %t3 = load double, ptr %g3, align 4 100 %t4 = load double, ptr %g4, align 4 101 %t5 = load double, ptr %g5, align 4 102 %t6 = load double, ptr %g6, align 4 103 %t7 = load double, ptr %g7, align 4 104 %t8 = load double, ptr %g8, align 4 105 %t9 = load double, ptr %g9, align 4 106 %t10 = load double, ptr %g10, align 4 107 %t11 = load double, ptr %g11, align 4 108 %t12 = load double, ptr %g12, align 4 109 %t13 = load double, ptr %g13, align 4 110 %t14 = load double, ptr %g14, align 4 111 %t15 = load double, ptr %g15, align 4 112 %m1 = tail call fast double @llvm.minnum.f64(double %t1, double %t0) 113 %m2 = tail call fast double @llvm.minnum.f64(double %t2, double %m1) 114 %m3 = tail call fast double @llvm.minnum.f64(double %t3, double %m2) 115 %m4 = tail call fast double @llvm.minnum.f64(double %t4, double %m3) 116 %m5 = tail call fast double @llvm.minnum.f64(double %t5, double %m4) 117 %m6 = tail call fast double @llvm.minnum.f64(double %t6, double %m5) 118 %m7 = tail call fast double @llvm.minnum.f64(double %t7, double %m6) 119 %m8 = tail call fast double @llvm.minnum.f64(double %t8, double %m7) 120 %m9 = tail call fast double @llvm.minnum.f64(double %t9, double %m8) 121 %m10 = tail call fast double @llvm.minnum.f64(double %t10, double %m9) 122 %m11 = tail call fast double @llvm.minnum.f64(double %t11, double %m10) 123 %m12 = tail call fast double @llvm.minnum.f64(double %t12, double %m11) 124 %m13 = tail call fast double @llvm.minnum.f64(double %t13, double %m12) 125 %m14 = tail call fast double @llvm.minnum.f64(double %t14, double %m13) 126 %m15 = tail call fast double @llvm.minnum.f64(double %t15, double %m14) 127 ret double %m15 128} 129 130define float @fmin_float_12_nums_nonseq(ptr nocapture noundef readonly %x) { 131; CHECK-LABEL: define float @fmin_float_12_nums_nonseq( 132; CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]]) #[[ATTR0]] { 133; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 134; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 4 135; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 6 136; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds float, ptr [[X]], i64 8 137; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds float, ptr [[X]], i64 10 138; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds float, ptr [[X]], i64 12 139; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds float, ptr [[X]], i64 14 140; CHECK-NEXT: [[G8:%.*]] = getelementptr inbounds float, ptr [[X]], i64 16 141; CHECK-NEXT: [[G9:%.*]] = getelementptr inbounds float, ptr [[X]], i64 18 142; CHECK-NEXT: [[G10:%.*]] = getelementptr inbounds float, ptr [[X]], i64 20 143; CHECK-NEXT: [[G11:%.*]] = getelementptr inbounds float, ptr [[X]], i64 22 144; CHECK-NEXT: [[T0:%.*]] = load float, ptr [[X]], align 4 145; CHECK-NEXT: [[T1:%.*]] = load float, ptr [[G1]], align 4 146; CHECK-NEXT: [[T2:%.*]] = load float, ptr [[G2]], align 4 147; CHECK-NEXT: [[T3:%.*]] = load float, ptr [[G3]], align 4 148; CHECK-NEXT: [[T4:%.*]] = load float, ptr [[G4]], align 4 149; CHECK-NEXT: [[T5:%.*]] = load float, ptr [[G5]], align 4 150; CHECK-NEXT: [[T6:%.*]] = load float, ptr [[G6]], align 4 151; CHECK-NEXT: [[T7:%.*]] = load float, ptr [[G7]], align 4 152; CHECK-NEXT: [[T8:%.*]] = load float, ptr [[G8]], align 4 153; CHECK-NEXT: [[T9:%.*]] = load float, ptr [[G9]], align 4 154; CHECK-NEXT: [[T10:%.*]] = load float, ptr [[G10]], align 4 155; CHECK-NEXT: [[T11:%.*]] = load float, ptr [[G11]], align 4 156; CHECK-NEXT: [[TMP1:%.*]] = insertelement <12 x float> poison, float [[T1]], i32 0 157; CHECK-NEXT: [[TMP2:%.*]] = insertelement <12 x float> [[TMP1]], float [[T0]], i32 1 158; CHECK-NEXT: [[TMP3:%.*]] = insertelement <12 x float> [[TMP2]], float [[T2]], i32 2 159; CHECK-NEXT: [[TMP4:%.*]] = insertelement <12 x float> [[TMP3]], float [[T3]], i32 3 160; CHECK-NEXT: [[TMP5:%.*]] = insertelement <12 x float> [[TMP4]], float [[T4]], i32 4 161; CHECK-NEXT: [[TMP6:%.*]] = insertelement <12 x float> [[TMP5]], float [[T5]], i32 5 162; CHECK-NEXT: [[TMP7:%.*]] = insertelement <12 x float> [[TMP6]], float [[T6]], i32 6 163; CHECK-NEXT: [[TMP8:%.*]] = insertelement <12 x float> [[TMP7]], float [[T7]], i32 7 164; CHECK-NEXT: [[TMP9:%.*]] = insertelement <12 x float> [[TMP8]], float [[T8]], i32 8 165; CHECK-NEXT: [[TMP10:%.*]] = insertelement <12 x float> [[TMP9]], float [[T9]], i32 9 166; CHECK-NEXT: [[TMP11:%.*]] = insertelement <12 x float> [[TMP10]], float [[T10]], i32 10 167; CHECK-NEXT: [[TMP12:%.*]] = insertelement <12 x float> [[TMP11]], float [[T11]], i32 11 168; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmin.v12f32(<12 x float> [[TMP12]]) 169; CHECK-NEXT: ret float [[TMP13]] 170; 171 %g1 = getelementptr inbounds float, ptr %x, i64 2 172 %g2 = getelementptr inbounds float, ptr %x, i64 4 173 %g3 = getelementptr inbounds float, ptr %x, i64 6 174 %g4 = getelementptr inbounds float, ptr %x, i64 8 175 %g5 = getelementptr inbounds float, ptr %x, i64 10 176 %g6 = getelementptr inbounds float, ptr %x, i64 12 177 %g7 = getelementptr inbounds float, ptr %x, i64 14 178 %g8 = getelementptr inbounds float, ptr %x, i64 16 179 %g9 = getelementptr inbounds float, ptr %x, i64 18 180 %g10 = getelementptr inbounds float, ptr %x, i64 20 181 %g11 = getelementptr inbounds float, ptr %x, i64 22 182 %t0 = load float, ptr %x, align 4 183 %t1 = load float, ptr %g1, align 4 184 %t2 = load float, ptr %g2, align 4 185 %t3 = load float, ptr %g3, align 4 186 %t4 = load float, ptr %g4, align 4 187 %t5 = load float, ptr %g5, align 4 188 %t6 = load float, ptr %g6, align 4 189 %t7 = load float, ptr %g7, align 4 190 %t8 = load float, ptr %g8, align 4 191 %t9 = load float, ptr %g9, align 4 192 %t10 = load float, ptr %g10, align 4 193 %t11 = load float, ptr %g11, align 4 194 %m1 = tail call fast float @llvm.minnum.f32(float %t1, float %t0) 195 %m2 = tail call fast float @llvm.minnum.f32(float %t2, float %m1) 196 %m3 = tail call fast float @llvm.minnum.f32(float %t3, float %m2) 197 %m4 = tail call fast float @llvm.minnum.f32(float %t4, float %m3) 198 %m5 = tail call fast float @llvm.minnum.f32(float %t5, float %m4) 199 %m6 = tail call fast float @llvm.minnum.f32(float %t6, float %m5) 200 %m7 = tail call fast float @llvm.minnum.f32(float %t7, float %m6) 201 %m8 = tail call fast float @llvm.minnum.f32(float %t8, float %m7) 202 %m9 = tail call fast float @llvm.minnum.f32(float %t9, float %m8) 203 %m10 = tail call fast float @llvm.minnum.f32(float %t10, float %m9) 204 %m11 = tail call fast float @llvm.minnum.f32(float %t11, float %m10) 205 ret float %m11 206} 207 208define double @fmax_double_4_nums_seq(ptr nocapture noundef readonly %x) { 209; CHECK-LABEL: define double @fmax_double_4_nums_seq( 210; CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]]) #[[ATTR0]] { 211; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr [[X]], align 4 212; CHECK-NEXT: [[TMP2:%.*]] = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> [[TMP1]]) 213; CHECK-NEXT: ret double [[TMP2]] 214; 215 %g1 = getelementptr inbounds double, ptr %x, i64 1 216 %g2 = getelementptr inbounds double, ptr %x, i64 2 217 %g3 = getelementptr inbounds double, ptr %x, i64 3 218 %t0 = load double, ptr %x, align 4 219 %t1 = load double, ptr %g1, align 4 220 %t2 = load double, ptr %g2, align 4 221 %t3 = load double, ptr %g3, align 4 222 %m1 = tail call fast double @llvm.maxnum.f64(double %t1, double %t0) 223 %m2 = tail call fast double @llvm.maxnum.f64(double %t2, double %m1) 224 %m3 = tail call fast double @llvm.maxnum.f64(double %t3, double %m2) 225 ret double %m3 226} 227 228define double @fmax_double_16_nums_nonseq(ptr nocapture noundef readonly %x) { 229; CHECK-LABEL: define double @fmax_double_16_nums_nonseq( 230; CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]]) #[[ATTR0]] { 231; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2 232; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 4 233; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 6 234; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 8 235; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds double, ptr [[X]], i64 10 236; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 12 237; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds double, ptr [[X]], i64 14 238; CHECK-NEXT: [[G8:%.*]] = getelementptr inbounds double, ptr [[X]], i64 16 239; CHECK-NEXT: [[G9:%.*]] = getelementptr inbounds double, ptr [[X]], i64 18 240; CHECK-NEXT: [[G10:%.*]] = getelementptr inbounds double, ptr [[X]], i64 20 241; CHECK-NEXT: [[G11:%.*]] = getelementptr inbounds double, ptr [[X]], i64 22 242; CHECK-NEXT: [[G12:%.*]] = getelementptr inbounds double, ptr [[X]], i64 24 243; CHECK-NEXT: [[G13:%.*]] = getelementptr inbounds double, ptr [[X]], i64 26 244; CHECK-NEXT: [[G14:%.*]] = getelementptr inbounds double, ptr [[X]], i64 28 245; CHECK-NEXT: [[G15:%.*]] = getelementptr inbounds double, ptr [[X]], i64 30 246; CHECK-NEXT: [[T0:%.*]] = load double, ptr [[X]], align 4 247; CHECK-NEXT: [[T1:%.*]] = load double, ptr [[G1]], align 4 248; CHECK-NEXT: [[T2:%.*]] = load double, ptr [[G2]], align 4 249; CHECK-NEXT: [[T3:%.*]] = load double, ptr [[G3]], align 4 250; CHECK-NEXT: [[T4:%.*]] = load double, ptr [[G4]], align 4 251; CHECK-NEXT: [[T5:%.*]] = load double, ptr [[G5]], align 4 252; CHECK-NEXT: [[T6:%.*]] = load double, ptr [[G6]], align 4 253; CHECK-NEXT: [[T7:%.*]] = load double, ptr [[G7]], align 4 254; CHECK-NEXT: [[T8:%.*]] = load double, ptr [[G8]], align 4 255; CHECK-NEXT: [[T9:%.*]] = load double, ptr [[G9]], align 4 256; CHECK-NEXT: [[T10:%.*]] = load double, ptr [[G10]], align 4 257; CHECK-NEXT: [[T11:%.*]] = load double, ptr [[G11]], align 4 258; CHECK-NEXT: [[T12:%.*]] = load double, ptr [[G12]], align 4 259; CHECK-NEXT: [[T13:%.*]] = load double, ptr [[G13]], align 4 260; CHECK-NEXT: [[T14:%.*]] = load double, ptr [[G14]], align 4 261; CHECK-NEXT: [[T15:%.*]] = load double, ptr [[G15]], align 4 262; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x double> poison, double [[T1]], i32 0 263; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x double> [[TMP1]], double [[T0]], i32 1 264; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x double> [[TMP2]], double [[T2]], i32 2 265; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x double> [[TMP3]], double [[T3]], i32 3 266; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x double> [[TMP4]], double [[T4]], i32 4 267; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x double> [[TMP5]], double [[T5]], i32 5 268; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x double> [[TMP6]], double [[T6]], i32 6 269; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x double> [[TMP7]], double [[T7]], i32 7 270; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x double> [[TMP8]], double [[T8]], i32 8 271; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x double> [[TMP9]], double [[T9]], i32 9 272; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x double> [[TMP10]], double [[T10]], i32 10 273; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x double> [[TMP11]], double [[T11]], i32 11 274; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x double> [[TMP12]], double [[T12]], i32 12 275; CHECK-NEXT: [[TMP14:%.*]] = insertelement <16 x double> [[TMP13]], double [[T13]], i32 13 276; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x double> [[TMP14]], double [[T14]], i32 14 277; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x double> [[TMP15]], double [[T15]], i32 15 278; CHECK-NEXT: [[TMP17:%.*]] = call fast double @llvm.vector.reduce.fmax.v16f64(<16 x double> [[TMP16]]) 279; CHECK-NEXT: ret double [[TMP17]] 280; 281 %g1 = getelementptr inbounds double, ptr %x, i64 2 282 %g2 = getelementptr inbounds double, ptr %x, i64 4 283 %g3 = getelementptr inbounds double, ptr %x, i64 6 284 %g4 = getelementptr inbounds double, ptr %x, i64 8 285 %g5 = getelementptr inbounds double, ptr %x, i64 10 286 %g6 = getelementptr inbounds double, ptr %x, i64 12 287 %g7 = getelementptr inbounds double, ptr %x, i64 14 288 %g8 = getelementptr inbounds double, ptr %x, i64 16 289 %g9 = getelementptr inbounds double, ptr %x, i64 18 290 %g10 = getelementptr inbounds double, ptr %x, i64 20 291 %g11 = getelementptr inbounds double, ptr %x, i64 22 292 %g12 = getelementptr inbounds double, ptr %x, i64 24 293 %g13 = getelementptr inbounds double, ptr %x, i64 26 294 %g14 = getelementptr inbounds double, ptr %x, i64 28 295 %g15 = getelementptr inbounds double, ptr %x, i64 30 296 %t0 = load double, ptr %x, align 4 297 %t1 = load double, ptr %g1, align 4 298 %t2 = load double, ptr %g2, align 4 299 %t3 = load double, ptr %g3, align 4 300 %t4 = load double, ptr %g4, align 4 301 %t5 = load double, ptr %g5, align 4 302 %t6 = load double, ptr %g6, align 4 303 %t7 = load double, ptr %g7, align 4 304 %t8 = load double, ptr %g8, align 4 305 %t9 = load double, ptr %g9, align 4 306 %t10 = load double, ptr %g10, align 4 307 %t11 = load double, ptr %g11, align 4 308 %t12 = load double, ptr %g12, align 4 309 %t13 = load double, ptr %g13, align 4 310 %t14 = load double, ptr %g14, align 4 311 %t15 = load double, ptr %g15, align 4 312 %m1 = tail call fast double @llvm.maxnum.f64(double %t1, double %t0) 313 %m2 = tail call fast double @llvm.maxnum.f64(double %t2, double %m1) 314 %m3 = tail call fast double @llvm.maxnum.f64(double %t3, double %m2) 315 %m4 = tail call fast double @llvm.maxnum.f64(double %t4, double %m3) 316 %m5 = tail call fast double @llvm.maxnum.f64(double %t5, double %m4) 317 %m6 = tail call fast double @llvm.maxnum.f64(double %t6, double %m5) 318 %m7 = tail call fast double @llvm.maxnum.f64(double %t7, double %m6) 319 %m8 = tail call fast double @llvm.maxnum.f64(double %t8, double %m7) 320 %m9 = tail call fast double @llvm.maxnum.f64(double %t9, double %m8) 321 %m10 = tail call fast double @llvm.maxnum.f64(double %t10, double %m9) 322 %m11 = tail call fast double @llvm.maxnum.f64(double %t11, double %m10) 323 %m12 = tail call fast double @llvm.maxnum.f64(double %t12, double %m11) 324 %m13 = tail call fast double @llvm.maxnum.f64(double %t13, double %m12) 325 %m14 = tail call fast double @llvm.maxnum.f64(double %t14, double %m13) 326 %m15 = tail call fast double @llvm.maxnum.f64(double %t15, double %m14) 327 ret double %m15 328} 329 330define float @fmax_float_12_nums_nonseq(ptr nocapture noundef readonly %x) { 331; CHECK-LABEL: define float @fmax_float_12_nums_nonseq( 332; CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]]) #[[ATTR0]] { 333; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 334; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 4 335; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 6 336; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds float, ptr [[X]], i64 8 337; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds float, ptr [[X]], i64 10 338; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds float, ptr [[X]], i64 12 339; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds float, ptr [[X]], i64 14 340; CHECK-NEXT: [[G8:%.*]] = getelementptr inbounds float, ptr [[X]], i64 16 341; CHECK-NEXT: [[G9:%.*]] = getelementptr inbounds float, ptr [[X]], i64 18 342; CHECK-NEXT: [[G10:%.*]] = getelementptr inbounds float, ptr [[X]], i64 20 343; CHECK-NEXT: [[G11:%.*]] = getelementptr inbounds float, ptr [[X]], i64 22 344; CHECK-NEXT: [[T0:%.*]] = load float, ptr [[X]], align 4 345; CHECK-NEXT: [[T1:%.*]] = load float, ptr [[G1]], align 4 346; CHECK-NEXT: [[T2:%.*]] = load float, ptr [[G2]], align 4 347; CHECK-NEXT: [[T3:%.*]] = load float, ptr [[G3]], align 4 348; CHECK-NEXT: [[T4:%.*]] = load float, ptr [[G4]], align 4 349; CHECK-NEXT: [[T5:%.*]] = load float, ptr [[G5]], align 4 350; CHECK-NEXT: [[T6:%.*]] = load float, ptr [[G6]], align 4 351; CHECK-NEXT: [[T7:%.*]] = load float, ptr [[G7]], align 4 352; CHECK-NEXT: [[T8:%.*]] = load float, ptr [[G8]], align 4 353; CHECK-NEXT: [[T9:%.*]] = load float, ptr [[G9]], align 4 354; CHECK-NEXT: [[T10:%.*]] = load float, ptr [[G10]], align 4 355; CHECK-NEXT: [[T11:%.*]] = load float, ptr [[G11]], align 4 356; CHECK-NEXT: [[TMP1:%.*]] = insertelement <12 x float> poison, float [[T1]], i32 0 357; CHECK-NEXT: [[TMP2:%.*]] = insertelement <12 x float> [[TMP1]], float [[T0]], i32 1 358; CHECK-NEXT: [[TMP3:%.*]] = insertelement <12 x float> [[TMP2]], float [[T2]], i32 2 359; CHECK-NEXT: [[TMP4:%.*]] = insertelement <12 x float> [[TMP3]], float [[T3]], i32 3 360; CHECK-NEXT: [[TMP5:%.*]] = insertelement <12 x float> [[TMP4]], float [[T4]], i32 4 361; CHECK-NEXT: [[TMP6:%.*]] = insertelement <12 x float> [[TMP5]], float [[T5]], i32 5 362; CHECK-NEXT: [[TMP7:%.*]] = insertelement <12 x float> [[TMP6]], float [[T6]], i32 6 363; CHECK-NEXT: [[TMP8:%.*]] = insertelement <12 x float> [[TMP7]], float [[T7]], i32 7 364; CHECK-NEXT: [[TMP9:%.*]] = insertelement <12 x float> [[TMP8]], float [[T8]], i32 8 365; CHECK-NEXT: [[TMP10:%.*]] = insertelement <12 x float> [[TMP9]], float [[T9]], i32 9 366; CHECK-NEXT: [[TMP11:%.*]] = insertelement <12 x float> [[TMP10]], float [[T10]], i32 10 367; CHECK-NEXT: [[TMP12:%.*]] = insertelement <12 x float> [[TMP11]], float [[T11]], i32 11 368; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmax.v12f32(<12 x float> [[TMP12]]) 369; CHECK-NEXT: ret float [[TMP13]] 370; 371 %g1 = getelementptr inbounds float, ptr %x, i64 2 372 %g2 = getelementptr inbounds float, ptr %x, i64 4 373 %g3 = getelementptr inbounds float, ptr %x, i64 6 374 %g4 = getelementptr inbounds float, ptr %x, i64 8 375 %g5 = getelementptr inbounds float, ptr %x, i64 10 376 %g6 = getelementptr inbounds float, ptr %x, i64 12 377 %g7 = getelementptr inbounds float, ptr %x, i64 14 378 %g8 = getelementptr inbounds float, ptr %x, i64 16 379 %g9 = getelementptr inbounds float, ptr %x, i64 18 380 %g10 = getelementptr inbounds float, ptr %x, i64 20 381 %g11 = getelementptr inbounds float, ptr %x, i64 22 382 %t0 = load float, ptr %x, align 4 383 %t1 = load float, ptr %g1, align 4 384 %t2 = load float, ptr %g2, align 4 385 %t3 = load float, ptr %g3, align 4 386 %t4 = load float, ptr %g4, align 4 387 %t5 = load float, ptr %g5, align 4 388 %t6 = load float, ptr %g6, align 4 389 %t7 = load float, ptr %g7, align 4 390 %t8 = load float, ptr %g8, align 4 391 %t9 = load float, ptr %g9, align 4 392 %t10 = load float, ptr %g10, align 4 393 %t11 = load float, ptr %g11, align 4 394 %m1 = tail call fast float @llvm.maxnum.f32(float %t1, float %t0) 395 %m2 = tail call fast float @llvm.maxnum.f32(float %t2, float %m1) 396 %m3 = tail call fast float @llvm.maxnum.f32(float %t3, float %m2) 397 %m4 = tail call fast float @llvm.maxnum.f32(float %t4, float %m3) 398 %m5 = tail call fast float @llvm.maxnum.f32(float %t5, float %m4) 399 %m6 = tail call fast float @llvm.maxnum.f32(float %t6, float %m5) 400 %m7 = tail call fast float @llvm.maxnum.f32(float %t7, float %m6) 401 %m8 = tail call fast float @llvm.maxnum.f32(float %t8, float %m7) 402 %m9 = tail call fast float @llvm.maxnum.f32(float %t9, float %m8) 403 %m10 = tail call fast float @llvm.maxnum.f32(float %t10, float %m9) 404 %m11 = tail call fast float @llvm.maxnum.f32(float %t11, float %m10) 405 ret float %m11 406} 407 408declare float @llvm.minnum.f32(float, float) 409declare double @llvm.minnum.f64(double, double) 410declare float @llvm.maxnum.f32(float, float) 411declare double @llvm.maxnum.f64(double, double) 412