1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 | FileCheck %s 3; RUN: opt -passes=slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -slp-threshold=-10 | FileCheck %s --check-prefix=THRESHOLD 4 5@n = external local_unnamed_addr global i32, align 4 6@arr = common local_unnamed_addr global [20 x float] zeroinitializer, align 16 7@arr1 = common local_unnamed_addr global [20 x float] zeroinitializer, align 16 8@res = external local_unnamed_addr global float, align 4 9 10define float @baz() { 11; CHECK-LABEL: @baz( 12; CHECK-NEXT: entry: 13; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4 14; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3 15; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float 16; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16 17; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16 18; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] 19; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) 20; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], 2.000000e+00 21; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[CONV]], 2.000000e+00 22; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP5]], [[TMP6]] 23; CHECK-NEXT: store float [[OP_RDX]], ptr @res, align 4 24; CHECK-NEXT: ret float [[OP_RDX]] 25; 26; THRESHOLD-LABEL: @baz( 27; THRESHOLD-NEXT: entry: 28; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4 29; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3 30; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float 31; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16 32; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16 33; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] 34; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) 35; THRESHOLD-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i32 0 36; THRESHOLD-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[CONV]], i32 1 37; THRESHOLD-NEXT: [[TMP7:%.*]] = fmul fast <2 x float> [[TMP6]], splat (float 2.000000e+00) 38; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0 39; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP7]], i32 1 40; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]] 41; THRESHOLD-NEXT: store float [[OP_RDX]], ptr @res, align 4 42; THRESHOLD-NEXT: ret float [[OP_RDX]] 43; 44entry: 45 %0 = load i32, ptr @n, align 4 46 %mul = mul nsw i32 %0, 3 47 %conv = sitofp i32 %mul to float 48 %1 = load float, ptr @arr, align 16 49 %2 = load float, ptr @arr1, align 16 50 %mul4 = fmul fast float %2, %1 51 %add = fadd fast float %mul4, %conv 52 %3 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 1), align 4 53 %4 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 1), align 4 54 %mul4.1 = fmul fast float %4, %3 55 %add.1 = fadd fast float %mul4.1, %add 56 %5 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8 57 %6 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8 58 %mul4.2 = fmul fast float %6, %5 59 %add.2 = fadd fast float %mul4.2, %add.1 60 %7 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4 61 %8 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4 62 %mul4.3 = fmul fast float %8, %7 63 %add.3 = fadd fast float %mul4.3, %add.2 64 %add7 = fadd fast float %add.3, %conv 65 %add19 = fadd fast float %mul4, %add7 66 %add19.1 = fadd fast float %mul4.1, %add19 67 %add19.2 = fadd fast float %mul4.2, %add19.1 68 %add19.3 = fadd fast float %mul4.3, %add19.2 69 store float %add19.3, ptr @res, align 4 70 ret float %add19.3 71} 72 73define float @bazz() { 74; CHECK-LABEL: @bazz( 75; CHECK-NEXT: entry: 76; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4 77; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3 78; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float 79; CHECK-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2 80; CHECK-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float 81; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr @arr, align 16 82; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, ptr @arr1, align 16 83; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]] 84; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP3]]) 85; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[CONV]] 86; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV6]] 87; CHECK-NEXT: store float [[OP_RDX1]], ptr @res, align 4 88; CHECK-NEXT: ret float [[OP_RDX1]] 89; 90; THRESHOLD-LABEL: @bazz( 91; THRESHOLD-NEXT: entry: 92; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4 93; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3 94; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float 95; THRESHOLD-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2 96; THRESHOLD-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float 97; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr @arr, align 16 98; THRESHOLD-NEXT: [[TMP2:%.*]] = load <8 x float>, ptr @arr1, align 16 99; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]] 100; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP3]]) 101; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[CONV]] 102; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV6]] 103; THRESHOLD-NEXT: store float [[OP_RDX1]], ptr @res, align 4 104; THRESHOLD-NEXT: ret float [[OP_RDX1]] 105; 106entry: 107 %0 = load i32, ptr @n, align 4 108 %mul = mul nsw i32 %0, 3 109 %conv = sitofp i32 %mul to float 110 %1 = load float, ptr @arr, align 16 111 %2 = load float, ptr @arr1, align 16 112 %mul4 = fmul fast float %2, %1 113 %add = fadd fast float %mul4, %conv 114 %3 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 1), align 4 115 %4 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 1), align 4 116 %mul4.1 = fmul fast float %4, %3 117 %add.1 = fadd fast float %mul4.1, %add 118 %5 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8 119 %6 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8 120 %mul4.2 = fmul fast float %6, %5 121 %add.2 = fadd fast float %mul4.2, %add.1 122 %7 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4 123 %8 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4 124 %mul4.3 = fmul fast float %8, %7 125 %add.3 = fadd fast float %mul4.3, %add.2 126 %mul5 = shl nsw i32 %0, 2 127 %conv6 = sitofp i32 %mul5 to float 128 %add7 = fadd fast float %add.3, %conv6 129 %9 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 4), align 16 130 %10 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 4), align 16 131 %mul18 = fmul fast float %10, %9 132 %add19 = fadd fast float %mul18, %add7 133 %11 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 5), align 4 134 %12 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 5), align 4 135 %mul18.1 = fmul fast float %12, %11 136 %add19.1 = fadd fast float %mul18.1, %add19 137 %13 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 6), align 8 138 %14 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 6), align 8 139 %mul18.2 = fmul fast float %14, %13 140 %add19.2 = fadd fast float %mul18.2, %add19.1 141 %15 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 7), align 4 142 %16 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 7), align 4 143 %mul18.3 = fmul fast float %16, %15 144 %add19.3 = fadd fast float %mul18.3, %add19.2 145 store float %add19.3, ptr @res, align 4 146 ret float %add19.3 147} 148 149define float @bazzz() { 150; CHECK-LABEL: @bazzz( 151; CHECK-NEXT: entry: 152; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4 153; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float 154; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16 155; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16 156; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] 157; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) 158; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] 159; CHECK-NEXT: store float [[TMP5]], ptr @res, align 4 160; CHECK-NEXT: ret float [[TMP5]] 161; 162; THRESHOLD-LABEL: @bazzz( 163; THRESHOLD-NEXT: entry: 164; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4 165; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float 166; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16 167; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16 168; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] 169; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) 170; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] 171; THRESHOLD-NEXT: store float [[TMP5]], ptr @res, align 4 172; THRESHOLD-NEXT: ret float [[TMP5]] 173; 174entry: 175 %0 = load i32, ptr @n, align 4 176 %conv = sitofp i32 %0 to float 177 %1 = load float, ptr @arr, align 16 178 %2 = load float, ptr @arr1, align 16 179 %mul = fmul fast float %2, %1 180 %3 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 1), align 4 181 %4 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 1), align 4 182 %mul.1 = fmul fast float %4, %3 183 %5 = fadd fast float %mul.1, %mul 184 %6 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8 185 %7 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8 186 %mul.2 = fmul fast float %7, %6 187 %8 = fadd fast float %mul.2, %5 188 %9 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4 189 %10 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4 190 %mul.3 = fmul fast float %10, %9 191 %11 = fadd fast float %mul.3, %8 192 %12 = fmul fast float %conv, %11 193 store float %12, ptr @res, align 4 194 ret float %12 195} 196 197define i32 @foo() { 198; CHECK-LABEL: @foo( 199; CHECK-NEXT: entry: 200; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4 201; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float 202; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16 203; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16 204; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] 205; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) 206; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] 207; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32 208; CHECK-NEXT: store i32 [[CONV4]], ptr @n, align 4 209; CHECK-NEXT: ret i32 [[CONV4]] 210; 211; THRESHOLD-LABEL: @foo( 212; THRESHOLD-NEXT: entry: 213; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4 214; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float 215; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16 216; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16 217; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] 218; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) 219; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] 220; THRESHOLD-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32 221; THRESHOLD-NEXT: store i32 [[CONV4]], ptr @n, align 4 222; THRESHOLD-NEXT: ret i32 [[CONV4]] 223; 224entry: 225 %0 = load i32, ptr @n, align 4 226 %conv = sitofp i32 %0 to float 227 %1 = load float, ptr @arr, align 16 228 %2 = load float, ptr @arr1, align 16 229 %mul = fmul fast float %2, %1 230 %3 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 1), align 4 231 %4 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 1), align 4 232 %mul.1 = fmul fast float %4, %3 233 %5 = fadd fast float %mul.1, %mul 234 %6 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8 235 %7 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8 236 %mul.2 = fmul fast float %7, %6 237 %8 = fadd fast float %mul.2, %5 238 %9 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4 239 %10 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4 240 %mul.3 = fmul fast float %10, %9 241 %11 = fadd fast float %mul.3, %8 242 %12 = fmul fast float %conv, %11 243 %conv4 = fptosi float %12 to i32 244 store i32 %conv4, ptr @n, align 4 245 ret i32 %conv4 246} 247 248; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select 249; with fastmath on the select. 250define float @bar() { 251; CHECK-LABEL: @bar( 252; CHECK-NEXT: entry: 253; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr @arr, align 16 254; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr @arr1, align 16 255; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]] 256; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 257; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 258; CHECK-NEXT: [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] 259; CHECK-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float [[TMP3]], float [[TMP4]] 260; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8 261; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8 262; CHECK-NEXT: [[MUL3_1:%.*]] = fmul fast float [[TMP6]], [[TMP5]] 263; CHECK-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], [[MUL3_1]] 264; CHECK-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float [[MUL3_1]] 265; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4 266; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4 267; CHECK-NEXT: [[MUL3_2:%.*]] = fmul fast float [[TMP8]], [[TMP7]] 268; CHECK-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], [[MUL3_2]] 269; CHECK-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float [[MUL3_2]] 270; CHECK-NEXT: store float [[MAX_0_MUL3_2]], ptr @res, align 4 271; CHECK-NEXT: ret float [[MAX_0_MUL3_2]] 272; 273; THRESHOLD-LABEL: @bar( 274; THRESHOLD-NEXT: entry: 275; THRESHOLD-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr @arr, align 16 276; THRESHOLD-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr @arr1, align 16 277; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]] 278; THRESHOLD-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 279; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 280; THRESHOLD-NEXT: [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] 281; THRESHOLD-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float [[TMP3]], float [[TMP4]] 282; THRESHOLD-NEXT: [[TMP5:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8 283; THRESHOLD-NEXT: [[TMP6:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8 284; THRESHOLD-NEXT: [[MUL3_1:%.*]] = fmul fast float [[TMP6]], [[TMP5]] 285; THRESHOLD-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], [[MUL3_1]] 286; THRESHOLD-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float [[MUL3_1]] 287; THRESHOLD-NEXT: [[TMP7:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4 288; THRESHOLD-NEXT: [[TMP8:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4 289; THRESHOLD-NEXT: [[MUL3_2:%.*]] = fmul fast float [[TMP8]], [[TMP7]] 290; THRESHOLD-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], [[MUL3_2]] 291; THRESHOLD-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float [[MUL3_2]] 292; THRESHOLD-NEXT: store float [[MAX_0_MUL3_2]], ptr @res, align 4 293; THRESHOLD-NEXT: ret float [[MAX_0_MUL3_2]] 294; 295entry: 296 %0 = load float, ptr @arr, align 16 297 %1 = load float, ptr @arr1, align 16 298 %mul = fmul fast float %1, %0 299 %2 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 1), align 4 300 %3 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 1), align 4 301 %mul3 = fmul fast float %3, %2 302 %cmp4 = fcmp fast ogt float %mul, %mul3 303 %max.0.mul3 = select i1 %cmp4, float %mul, float %mul3 304 %4 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8 305 %5 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8 306 %mul3.1 = fmul fast float %5, %4 307 %cmp4.1 = fcmp fast ogt float %max.0.mul3, %mul3.1 308 %max.0.mul3.1 = select i1 %cmp4.1, float %max.0.mul3, float %mul3.1 309 %6 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4 310 %7 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4 311 %mul3.2 = fmul fast float %7, %6 312 %cmp4.2 = fcmp fast ogt float %max.0.mul3.1, %mul3.2 313 %max.0.mul3.2 = select i1 %cmp4.2, float %max.0.mul3.1, float %mul3.2 314 store float %max.0.mul3.2, ptr @res, align 4 315 ret float %max.0.mul3.2 316} 317 318define float @f(ptr nocapture readonly %x) { 319; CHECK-LABEL: @f( 320; CHECK-NEXT: entry: 321; CHECK-NEXT: [[TMP0:%.*]] = load <48 x float>, ptr [[X:%.*]], align 4 322; CHECK-NEXT: [[OP_RDX:%.*]] = call fast float @llvm.vector.reduce.fadd.v48f32(float 0.000000e+00, <48 x float> [[TMP0]]) 323; CHECK-NEXT: ret float [[OP_RDX]] 324; 325; THRESHOLD-LABEL: @f( 326; THRESHOLD-NEXT: entry: 327; THRESHOLD-NEXT: [[TMP0:%.*]] = load <48 x float>, ptr [[X:%.*]], align 4 328; THRESHOLD-NEXT: [[OP_RDX:%.*]] = call fast float @llvm.vector.reduce.fadd.v48f32(float 0.000000e+00, <48 x float> [[TMP0]]) 329; THRESHOLD-NEXT: ret float [[OP_RDX]] 330; 331 entry: 332 %0 = load float, ptr %x, align 4 333 %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1 334 %1 = load float, ptr %arrayidx.1, align 4 335 %add.1 = fadd fast float %1, %0 336 %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2 337 %2 = load float, ptr %arrayidx.2, align 4 338 %add.2 = fadd fast float %2, %add.1 339 %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3 340 %3 = load float, ptr %arrayidx.3, align 4 341 %add.3 = fadd fast float %3, %add.2 342 %arrayidx.4 = getelementptr inbounds float, ptr %x, i64 4 343 %4 = load float, ptr %arrayidx.4, align 4 344 %add.4 = fadd fast float %4, %add.3 345 %arrayidx.5 = getelementptr inbounds float, ptr %x, i64 5 346 %5 = load float, ptr %arrayidx.5, align 4 347 %add.5 = fadd fast float %5, %add.4 348 %arrayidx.6 = getelementptr inbounds float, ptr %x, i64 6 349 %6 = load float, ptr %arrayidx.6, align 4 350 %add.6 = fadd fast float %6, %add.5 351 %arrayidx.7 = getelementptr inbounds float, ptr %x, i64 7 352 %7 = load float, ptr %arrayidx.7, align 4 353 %add.7 = fadd fast float %7, %add.6 354 %arrayidx.8 = getelementptr inbounds float, ptr %x, i64 8 355 %8 = load float, ptr %arrayidx.8, align 4 356 %add.8 = fadd fast float %8, %add.7 357 %arrayidx.9 = getelementptr inbounds float, ptr %x, i64 9 358 %9 = load float, ptr %arrayidx.9, align 4 359 %add.9 = fadd fast float %9, %add.8 360 %arrayidx.10 = getelementptr inbounds float, ptr %x, i64 10 361 %10 = load float, ptr %arrayidx.10, align 4 362 %add.10 = fadd fast float %10, %add.9 363 %arrayidx.11 = getelementptr inbounds float, ptr %x, i64 11 364 %11 = load float, ptr %arrayidx.11, align 4 365 %add.11 = fadd fast float %11, %add.10 366 %arrayidx.12 = getelementptr inbounds float, ptr %x, i64 12 367 %12 = load float, ptr %arrayidx.12, align 4 368 %add.12 = fadd fast float %12, %add.11 369 %arrayidx.13 = getelementptr inbounds float, ptr %x, i64 13 370 %13 = load float, ptr %arrayidx.13, align 4 371 %add.13 = fadd fast float %13, %add.12 372 %arrayidx.14 = getelementptr inbounds float, ptr %x, i64 14 373 %14 = load float, ptr %arrayidx.14, align 4 374 %add.14 = fadd fast float %14, %add.13 375 %arrayidx.15 = getelementptr inbounds float, ptr %x, i64 15 376 %15 = load float, ptr %arrayidx.15, align 4 377 %add.15 = fadd fast float %15, %add.14 378 %arrayidx.16 = getelementptr inbounds float, ptr %x, i64 16 379 %16 = load float, ptr %arrayidx.16, align 4 380 %add.16 = fadd fast float %16, %add.15 381 %arrayidx.17 = getelementptr inbounds float, ptr %x, i64 17 382 %17 = load float, ptr %arrayidx.17, align 4 383 %add.17 = fadd fast float %17, %add.16 384 %arrayidx.18 = getelementptr inbounds float, ptr %x, i64 18 385 %18 = load float, ptr %arrayidx.18, align 4 386 %add.18 = fadd fast float %18, %add.17 387 %arrayidx.19 = getelementptr inbounds float, ptr %x, i64 19 388 %19 = load float, ptr %arrayidx.19, align 4 389 %add.19 = fadd fast float %19, %add.18 390 %arrayidx.20 = getelementptr inbounds float, ptr %x, i64 20 391 %20 = load float, ptr %arrayidx.20, align 4 392 %add.20 = fadd fast float %20, %add.19 393 %arrayidx.21 = getelementptr inbounds float, ptr %x, i64 21 394 %21 = load float, ptr %arrayidx.21, align 4 395 %add.21 = fadd fast float %21, %add.20 396 %arrayidx.22 = getelementptr inbounds float, ptr %x, i64 22 397 %22 = load float, ptr %arrayidx.22, align 4 398 %add.22 = fadd fast float %22, %add.21 399 %arrayidx.23 = getelementptr inbounds float, ptr %x, i64 23 400 %23 = load float, ptr %arrayidx.23, align 4 401 %add.23 = fadd fast float %23, %add.22 402 %arrayidx.24 = getelementptr inbounds float, ptr %x, i64 24 403 %24 = load float, ptr %arrayidx.24, align 4 404 %add.24 = fadd fast float %24, %add.23 405 %arrayidx.25 = getelementptr inbounds float, ptr %x, i64 25 406 %25 = load float, ptr %arrayidx.25, align 4 407 %add.25 = fadd fast float %25, %add.24 408 %arrayidx.26 = getelementptr inbounds float, ptr %x, i64 26 409 %26 = load float, ptr %arrayidx.26, align 4 410 %add.26 = fadd fast float %26, %add.25 411 %arrayidx.27 = getelementptr inbounds float, ptr %x, i64 27 412 %27 = load float, ptr %arrayidx.27, align 4 413 %add.27 = fadd fast float %27, %add.26 414 %arrayidx.28 = getelementptr inbounds float, ptr %x, i64 28 415 %28 = load float, ptr %arrayidx.28, align 4 416 %add.28 = fadd fast float %28, %add.27 417 %arrayidx.29 = getelementptr inbounds float, ptr %x, i64 29 418 %29 = load float, ptr %arrayidx.29, align 4 419 %add.29 = fadd fast float %29, %add.28 420 %arrayidx.30 = getelementptr inbounds float, ptr %x, i64 30 421 %30 = load float, ptr %arrayidx.30, align 4 422 %add.30 = fadd fast float %30, %add.29 423 %arrayidx.31 = getelementptr inbounds float, ptr %x, i64 31 424 %31 = load float, ptr %arrayidx.31, align 4 425 %add.31 = fadd fast float %31, %add.30 426 %arrayidx.32 = getelementptr inbounds float, ptr %x, i64 32 427 %32 = load float, ptr %arrayidx.32, align 4 428 %add.32 = fadd fast float %32, %add.31 429 %arrayidx.33 = getelementptr inbounds float, ptr %x, i64 33 430 %33 = load float, ptr %arrayidx.33, align 4 431 %add.33 = fadd fast float %33, %add.32 432 %arrayidx.34 = getelementptr inbounds float, ptr %x, i64 34 433 %34 = load float, ptr %arrayidx.34, align 4 434 %add.34 = fadd fast float %34, %add.33 435 %arrayidx.35 = getelementptr inbounds float, ptr %x, i64 35 436 %35 = load float, ptr %arrayidx.35, align 4 437 %add.35 = fadd fast float %35, %add.34 438 %arrayidx.36 = getelementptr inbounds float, ptr %x, i64 36 439 %36 = load float, ptr %arrayidx.36, align 4 440 %add.36 = fadd fast float %36, %add.35 441 %arrayidx.37 = getelementptr inbounds float, ptr %x, i64 37 442 %37 = load float, ptr %arrayidx.37, align 4 443 %add.37 = fadd fast float %37, %add.36 444 %arrayidx.38 = getelementptr inbounds float, ptr %x, i64 38 445 %38 = load float, ptr %arrayidx.38, align 4 446 %add.38 = fadd fast float %38, %add.37 447 %arrayidx.39 = getelementptr inbounds float, ptr %x, i64 39 448 %39 = load float, ptr %arrayidx.39, align 4 449 %add.39 = fadd fast float %39, %add.38 450 %arrayidx.40 = getelementptr inbounds float, ptr %x, i64 40 451 %40 = load float, ptr %arrayidx.40, align 4 452 %add.40 = fadd fast float %40, %add.39 453 %arrayidx.41 = getelementptr inbounds float, ptr %x, i64 41 454 %41 = load float, ptr %arrayidx.41, align 4 455 %add.41 = fadd fast float %41, %add.40 456 %arrayidx.42 = getelementptr inbounds float, ptr %x, i64 42 457 %42 = load float, ptr %arrayidx.42, align 4 458 %add.42 = fadd fast float %42, %add.41 459 %arrayidx.43 = getelementptr inbounds float, ptr %x, i64 43 460 %43 = load float, ptr %arrayidx.43, align 4 461 %add.43 = fadd fast float %43, %add.42 462 %arrayidx.44 = getelementptr inbounds float, ptr %x, i64 44 463 %44 = load float, ptr %arrayidx.44, align 4 464 %add.44 = fadd fast float %44, %add.43 465 %arrayidx.45 = getelementptr inbounds float, ptr %x, i64 45 466 %45 = load float, ptr %arrayidx.45, align 4 467 %add.45 = fadd fast float %45, %add.44 468 %arrayidx.46 = getelementptr inbounds float, ptr %x, i64 46 469 %46 = load float, ptr %arrayidx.46, align 4 470 %add.46 = fadd fast float %46, %add.45 471 %arrayidx.47 = getelementptr inbounds float, ptr %x, i64 47 472 %47 = load float, ptr %arrayidx.47, align 4 473 %add.47 = fadd fast float %47, %add.46 474 ret float %add.47 475} 476 477define float @f1(ptr nocapture readonly %x, i32 %a, i32 %b) { 478; CHECK-LABEL: @f1( 479; CHECK-NEXT: entry: 480; CHECK-NEXT: [[REM:%.*]] = srem i32 [[A:%.*]], [[B:%.*]] 481; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[REM]] to float 482; CHECK-NEXT: [[TMP0:%.*]] = load <32 x float>, ptr [[X:%.*]], align 4 483; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> [[TMP0]]) 484; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], [[CONV]] 485; CHECK-NEXT: ret float [[OP_RDX]] 486; 487; THRESHOLD-LABEL: @f1( 488; THRESHOLD-NEXT: entry: 489; THRESHOLD-NEXT: [[REM:%.*]] = srem i32 [[A:%.*]], [[B:%.*]] 490; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[REM]] to float 491; THRESHOLD-NEXT: [[TMP0:%.*]] = load <32 x float>, ptr [[X:%.*]], align 4 492; THRESHOLD-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> [[TMP0]]) 493; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], [[CONV]] 494; THRESHOLD-NEXT: ret float [[OP_RDX]] 495; 496 entry: 497 %rem = srem i32 %a, %b 498 %conv = sitofp i32 %rem to float 499 %0 = load float, ptr %x, align 4 500 %add = fadd fast float %0, %conv 501 %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1 502 %1 = load float, ptr %arrayidx.1, align 4 503 %add.1 = fadd fast float %1, %add 504 %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2 505 %2 = load float, ptr %arrayidx.2, align 4 506 %add.2 = fadd fast float %2, %add.1 507 %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3 508 %3 = load float, ptr %arrayidx.3, align 4 509 %add.3 = fadd fast float %3, %add.2 510 %arrayidx.4 = getelementptr inbounds float, ptr %x, i64 4 511 %4 = load float, ptr %arrayidx.4, align 4 512 %add.4 = fadd fast float %4, %add.3 513 %arrayidx.5 = getelementptr inbounds float, ptr %x, i64 5 514 %5 = load float, ptr %arrayidx.5, align 4 515 %add.5 = fadd fast float %5, %add.4 516 %arrayidx.6 = getelementptr inbounds float, ptr %x, i64 6 517 %6 = load float, ptr %arrayidx.6, align 4 518 %add.6 = fadd fast float %6, %add.5 519 %arrayidx.7 = getelementptr inbounds float, ptr %x, i64 7 520 %7 = load float, ptr %arrayidx.7, align 4 521 %add.7 = fadd fast float %7, %add.6 522 %arrayidx.8 = getelementptr inbounds float, ptr %x, i64 8 523 %8 = load float, ptr %arrayidx.8, align 4 524 %add.8 = fadd fast float %8, %add.7 525 %arrayidx.9 = getelementptr inbounds float, ptr %x, i64 9 526 %9 = load float, ptr %arrayidx.9, align 4 527 %add.9 = fadd fast float %9, %add.8 528 %arrayidx.10 = getelementptr inbounds float, ptr %x, i64 10 529 %10 = load float, ptr %arrayidx.10, align 4 530 %add.10 = fadd fast float %10, %add.9 531 %arrayidx.11 = getelementptr inbounds float, ptr %x, i64 11 532 %11 = load float, ptr %arrayidx.11, align 4 533 %add.11 = fadd fast float %11, %add.10 534 %arrayidx.12 = getelementptr inbounds float, ptr %x, i64 12 535 %12 = load float, ptr %arrayidx.12, align 4 536 %add.12 = fadd fast float %12, %add.11 537 %arrayidx.13 = getelementptr inbounds float, ptr %x, i64 13 538 %13 = load float, ptr %arrayidx.13, align 4 539 %add.13 = fadd fast float %13, %add.12 540 %arrayidx.14 = getelementptr inbounds float, ptr %x, i64 14 541 %14 = load float, ptr %arrayidx.14, align 4 542 %add.14 = fadd fast float %14, %add.13 543 %arrayidx.15 = getelementptr inbounds float, ptr %x, i64 15 544 %15 = load float, ptr %arrayidx.15, align 4 545 %add.15 = fadd fast float %15, %add.14 546 %arrayidx.16 = getelementptr inbounds float, ptr %x, i64 16 547 %16 = load float, ptr %arrayidx.16, align 4 548 %add.16 = fadd fast float %16, %add.15 549 %arrayidx.17 = getelementptr inbounds float, ptr %x, i64 17 550 %17 = load float, ptr %arrayidx.17, align 4 551 %add.17 = fadd fast float %17, %add.16 552 %arrayidx.18 = getelementptr inbounds float, ptr %x, i64 18 553 %18 = load float, ptr %arrayidx.18, align 4 554 %add.18 = fadd fast float %18, %add.17 555 %arrayidx.19 = getelementptr inbounds float, ptr %x, i64 19 556 %19 = load float, ptr %arrayidx.19, align 4 557 %add.19 = fadd fast float %19, %add.18 558 %arrayidx.20 = getelementptr inbounds float, ptr %x, i64 20 559 %20 = load float, ptr %arrayidx.20, align 4 560 %add.20 = fadd fast float %20, %add.19 561 %arrayidx.21 = getelementptr inbounds float, ptr %x, i64 21 562 %21 = load float, ptr %arrayidx.21, align 4 563 %add.21 = fadd fast float %21, %add.20 564 %arrayidx.22 = getelementptr inbounds float, ptr %x, i64 22 565 %22 = load float, ptr %arrayidx.22, align 4 566 %add.22 = fadd fast float %22, %add.21 567 %arrayidx.23 = getelementptr inbounds float, ptr %x, i64 23 568 %23 = load float, ptr %arrayidx.23, align 4 569 %add.23 = fadd fast float %23, %add.22 570 %arrayidx.24 = getelementptr inbounds float, ptr %x, i64 24 571 %24 = load float, ptr %arrayidx.24, align 4 572 %add.24 = fadd fast float %24, %add.23 573 %arrayidx.25 = getelementptr inbounds float, ptr %x, i64 25 574 %25 = load float, ptr %arrayidx.25, align 4 575 %add.25 = fadd fast float %25, %add.24 576 %arrayidx.26 = getelementptr inbounds float, ptr %x, i64 26 577 %26 = load float, ptr %arrayidx.26, align 4 578 %add.26 = fadd fast float %26, %add.25 579 %arrayidx.27 = getelementptr inbounds float, ptr %x, i64 27 580 %27 = load float, ptr %arrayidx.27, align 4 581 %add.27 = fadd fast float %27, %add.26 582 %arrayidx.28 = getelementptr inbounds float, ptr %x, i64 28 583 %28 = load float, ptr %arrayidx.28, align 4 584 %add.28 = fadd fast float %28, %add.27 585 %arrayidx.29 = getelementptr inbounds float, ptr %x, i64 29 586 %29 = load float, ptr %arrayidx.29, align 4 587 %add.29 = fadd fast float %29, %add.28 588 %arrayidx.30 = getelementptr inbounds float, ptr %x, i64 30 589 %30 = load float, ptr %arrayidx.30, align 4 590 %add.30 = fadd fast float %30, %add.29 591 %arrayidx.31 = getelementptr inbounds float, ptr %x, i64 31 592 %31 = load float, ptr %arrayidx.31, align 4 593 %add.31 = fadd fast float %31, %add.30 594 ret float %add.31 595} 596 597define float @loadadd31(ptr nocapture readonly %x) { 598; CHECK-LABEL: @loadadd31( 599; CHECK-NEXT: entry: 600; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1 601; CHECK-NEXT: [[TMP0:%.*]] = load <24 x float>, ptr [[ARRAYIDX]], align 4 602; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, ptr [[X]], i64 25 603; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[ARRAYIDX_24]], align 4 604; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, ptr [[X]], i64 29 605; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX_28]], align 4 606; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30 607; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX_29]], align 4 608; CHECK-NEXT: [[OP_RDX:%.*]] = call fast float @llvm.vector.reduce.fadd.v24f32(float 0.000000e+00, <24 x float> [[TMP0]]) 609; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP2]]) 610; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP7]] 611; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP3]] 612; CHECK-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], [[TMP4]] 613; CHECK-NEXT: ret float [[OP_RDX3]] 614; 615; THRESHOLD-LABEL: @loadadd31( 616; THRESHOLD-NEXT: entry: 617; THRESHOLD-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1 618; THRESHOLD-NEXT: [[TMP0:%.*]] = load <24 x float>, ptr [[ARRAYIDX]], align 4 619; THRESHOLD-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, ptr [[X]], i64 25 620; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[ARRAYIDX_24]], align 4 621; THRESHOLD-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, ptr [[X]], i64 29 622; THRESHOLD-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX_28]], align 4 623; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30 624; THRESHOLD-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX_29]], align 4 625; THRESHOLD-NEXT: [[OP_RDX:%.*]] = call fast float @llvm.vector.reduce.fadd.v24f32(float 0.000000e+00, <24 x float> [[TMP0]]) 626; THRESHOLD-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP2]]) 627; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP7]] 628; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP3]] 629; THRESHOLD-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], [[TMP4]] 630; THRESHOLD-NEXT: ret float [[OP_RDX3]] 631; 632 entry: 633 %arrayidx = getelementptr inbounds float, ptr %x, i64 1 634 %0 = load float, ptr %arrayidx, align 4 635 %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 2 636 %1 = load float, ptr %arrayidx.1, align 4 637 %add.1 = fadd fast float %1, %0 638 %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 3 639 %2 = load float, ptr %arrayidx.2, align 4 640 %add.2 = fadd fast float %2, %add.1 641 %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 4 642 %3 = load float, ptr %arrayidx.3, align 4 643 %add.3 = fadd fast float %3, %add.2 644 %arrayidx.4 = getelementptr inbounds float, ptr %x, i64 5 645 %4 = load float, ptr %arrayidx.4, align 4 646 %add.4 = fadd fast float %4, %add.3 647 %arrayidx.5 = getelementptr inbounds float, ptr %x, i64 6 648 %5 = load float, ptr %arrayidx.5, align 4 649 %add.5 = fadd fast float %5, %add.4 650 %arrayidx.6 = getelementptr inbounds float, ptr %x, i64 7 651 %6 = load float, ptr %arrayidx.6, align 4 652 %add.6 = fadd fast float %6, %add.5 653 %arrayidx.7 = getelementptr inbounds float, ptr %x, i64 8 654 %7 = load float, ptr %arrayidx.7, align 4 655 %add.7 = fadd fast float %7, %add.6 656 %arrayidx.8 = getelementptr inbounds float, ptr %x, i64 9 657 %8 = load float, ptr %arrayidx.8, align 4 658 %add.8 = fadd fast float %8, %add.7 659 %arrayidx.9 = getelementptr inbounds float, ptr %x, i64 10 660 %9 = load float, ptr %arrayidx.9, align 4 661 %add.9 = fadd fast float %9, %add.8 662 %arrayidx.10 = getelementptr inbounds float, ptr %x, i64 11 663 %10 = load float, ptr %arrayidx.10, align 4 664 %add.10 = fadd fast float %10, %add.9 665 %arrayidx.11 = getelementptr inbounds float, ptr %x, i64 12 666 %11 = load float, ptr %arrayidx.11, align 4 667 %add.11 = fadd fast float %11, %add.10 668 %arrayidx.12 = getelementptr inbounds float, ptr %x, i64 13 669 %12 = load float, ptr %arrayidx.12, align 4 670 %add.12 = fadd fast float %12, %add.11 671 %arrayidx.13 = getelementptr inbounds float, ptr %x, i64 14 672 %13 = load float, ptr %arrayidx.13, align 4 673 %add.13 = fadd fast float %13, %add.12 674 %arrayidx.14 = getelementptr inbounds float, ptr %x, i64 15 675 %14 = load float, ptr %arrayidx.14, align 4 676 %add.14 = fadd fast float %14, %add.13 677 %arrayidx.15 = getelementptr inbounds float, ptr %x, i64 16 678 %15 = load float, ptr %arrayidx.15, align 4 679 %add.15 = fadd fast float %15, %add.14 680 %arrayidx.16 = getelementptr inbounds float, ptr %x, i64 17 681 %16 = load float, ptr %arrayidx.16, align 4 682 %add.16 = fadd fast float %16, %add.15 683 %arrayidx.17 = getelementptr inbounds float, ptr %x, i64 18 684 %17 = load float, ptr %arrayidx.17, align 4 685 %add.17 = fadd fast float %17, %add.16 686 %arrayidx.18 = getelementptr inbounds float, ptr %x, i64 19 687 %18 = load float, ptr %arrayidx.18, align 4 688 %add.18 = fadd fast float %18, %add.17 689 %arrayidx.19 = getelementptr inbounds float, ptr %x, i64 20 690 %19 = load float, ptr %arrayidx.19, align 4 691 %add.19 = fadd fast float %19, %add.18 692 %arrayidx.20 = getelementptr inbounds float, ptr %x, i64 21 693 %20 = load float, ptr %arrayidx.20, align 4 694 %add.20 = fadd fast float %20, %add.19 695 %arrayidx.21 = getelementptr inbounds float, ptr %x, i64 22 696 %21 = load float, ptr %arrayidx.21, align 4 697 %add.21 = fadd fast float %21, %add.20 698 %arrayidx.22 = getelementptr inbounds float, ptr %x, i64 23 699 %22 = load float, ptr %arrayidx.22, align 4 700 %add.22 = fadd fast float %22, %add.21 701 %arrayidx.23 = getelementptr inbounds float, ptr %x, i64 24 702 %23 = load float, ptr %arrayidx.23, align 4 703 %add.23 = fadd fast float %23, %add.22 704 %arrayidx.24 = getelementptr inbounds float, ptr %x, i64 25 705 %24 = load float, ptr %arrayidx.24, align 4 706 %add.24 = fadd fast float %24, %add.23 707 %arrayidx.25 = getelementptr inbounds float, ptr %x, i64 26 708 %25 = load float, ptr %arrayidx.25, align 4 709 %add.25 = fadd fast float %25, %add.24 710 %arrayidx.26 = getelementptr inbounds float, ptr %x, i64 27 711 %26 = load float, ptr %arrayidx.26, align 4 712 %add.26 = fadd fast float %26, %add.25 713 %arrayidx.27 = getelementptr inbounds float, ptr %x, i64 28 714 %27 = load float, ptr %arrayidx.27, align 4 715 %add.27 = fadd fast float %27, %add.26 716 %arrayidx.28 = getelementptr inbounds float, ptr %x, i64 29 717 %28 = load float, ptr %arrayidx.28, align 4 718 %add.28 = fadd fast float %28, %add.27 719 %arrayidx.29 = getelementptr inbounds float, ptr %x, i64 30 720 %29 = load float, ptr %arrayidx.29, align 4 721 %add.29 = fadd fast float %29, %add.28 722 ret float %add.29 723} 724 725define float @extra_args(ptr nocapture readonly %x, i32 %a, i32 %b) { 726; CHECK-LABEL: @extra_args( 727; CHECK-NEXT: entry: 728; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] 729; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float 730; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4 731; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP0]]) 732; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00 733; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], [[TMP2]] 734; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], 3.000000e+00 735; CHECK-NEXT: ret float [[OP_RDX1]] 736; 737; THRESHOLD-LABEL: @extra_args( 738; THRESHOLD-NEXT: entry: 739; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] 740; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float 741; THRESHOLD-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4 742; THRESHOLD-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP0]]) 743; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00 744; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], [[TMP2]] 745; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], 3.000000e+00 746; THRESHOLD-NEXT: ret float [[OP_RDX1]] 747; 748 entry: 749 %mul = mul nsw i32 %b, %a 750 %conv = sitofp i32 %mul to float 751 %0 = load float, ptr %x, align 4 752 %add = fadd fast float %conv, 3.000000e+00 753 %add1 = fadd fast float %0, %add 754 %arrayidx3 = getelementptr inbounds float, ptr %x, i64 1 755 %1 = load float, ptr %arrayidx3, align 4 756 %add4 = fadd fast float %1, %add1 757 %add5 = fadd fast float %add4, %conv 758 %arrayidx3.1 = getelementptr inbounds float, ptr %x, i64 2 759 %2 = load float, ptr %arrayidx3.1, align 4 760 %add4.1 = fadd fast float %2, %add5 761 %arrayidx3.2 = getelementptr inbounds float, ptr %x, i64 3 762 %3 = load float, ptr %arrayidx3.2, align 4 763 %add4.2 = fadd fast float %3, %add4.1 764 %arrayidx3.3 = getelementptr inbounds float, ptr %x, i64 4 765 %4 = load float, ptr %arrayidx3.3, align 4 766 %add4.3 = fadd fast float %4, %add4.2 767 %arrayidx3.4 = getelementptr inbounds float, ptr %x, i64 5 768 %5 = load float, ptr %arrayidx3.4, align 4 769 %add4.4 = fadd fast float %5, %add4.3 770 %arrayidx3.5 = getelementptr inbounds float, ptr %x, i64 6 771 %6 = load float, ptr %arrayidx3.5, align 4 772 %add4.5 = fadd fast float %6, %add4.4 773 %arrayidx3.6 = getelementptr inbounds float, ptr %x, i64 7 774 %7 = load float, ptr %arrayidx3.6, align 4 775 %add4.6 = fadd fast float %7, %add4.5 776 ret float %add4.6 777} 778 779define float @extra_args_same_several_times(ptr nocapture readonly %x, i32 %a, i32 %b) { 780; CHECK-LABEL: @extra_args_same_several_times( 781; CHECK-NEXT: entry: 782; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] 783; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float 784; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4 785; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP0]]) 786; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], 1.300000e+01 787; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00 788; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP2]] 789; CHECK-NEXT: ret float [[OP_RDX1]] 790; 791; THRESHOLD-LABEL: @extra_args_same_several_times( 792; THRESHOLD-NEXT: entry: 793; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] 794; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float 795; THRESHOLD-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4 796; THRESHOLD-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP0]]) 797; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], 1.300000e+01 798; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00 799; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP2]] 800; THRESHOLD-NEXT: ret float [[OP_RDX1]] 801; 802 entry: 803 %mul = mul nsw i32 %b, %a 804 %conv = sitofp i32 %mul to float 805 %0 = load float, ptr %x, align 4 806 %add = fadd fast float %conv, 3.000000e+00 807 %add1 = fadd fast float %0, %add 808 %arrayidx3 = getelementptr inbounds float, ptr %x, i64 1 809 %1 = load float, ptr %arrayidx3, align 4 810 %add4 = fadd fast float %1, %add1 811 %add41 = fadd fast float %add4, 5.000000e+00 812 %add5 = fadd fast float %add41, %conv 813 %arrayidx3.1 = getelementptr inbounds float, ptr %x, i64 2 814 %2 = load float, ptr %arrayidx3.1, align 4 815 %add4.1 = fadd fast float %2, %add5 816 %add4.11 = fadd fast float %add4.1, 5.000000e+00 817 %arrayidx3.2 = getelementptr inbounds float, ptr %x, i64 3 818 %3 = load float, ptr %arrayidx3.2, align 4 819 %add4.2 = fadd fast float %3, %add4.11 820 %arrayidx3.3 = getelementptr inbounds float, ptr %x, i64 4 821 %4 = load float, ptr %arrayidx3.3, align 4 822 %add4.3 = fadd fast float %4, %add4.2 823 %arrayidx3.4 = getelementptr inbounds float, ptr %x, i64 5 824 %5 = load float, ptr %arrayidx3.4, align 4 825 %add4.4 = fadd fast float %5, %add4.3 826 %arrayidx3.5 = getelementptr inbounds float, ptr %x, i64 6 827 %6 = load float, ptr %arrayidx3.5, align 4 828 %add4.5 = fadd fast float %6, %add4.4 829 %arrayidx3.6 = getelementptr inbounds float, ptr %x, i64 7 830 %7 = load float, ptr %arrayidx3.6, align 4 831 %add4.6 = fadd fast float %7, %add4.5 832 ret float %add4.6 833} 834 835define float @extra_args_no_replace(ptr nocapture readonly %x, i32 %a, i32 %b, i32 %c) { 836; CHECK-LABEL: @extra_args_no_replace( 837; CHECK-NEXT: entry: 838; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] 839; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float 840; CHECK-NEXT: [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float 841; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4 842; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP0]]) 843; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00 844; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], [[TMP2]] 845; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], 3.000000e+00 846; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[CONVC]] 847; CHECK-NEXT: ret float [[OP_RDX2]] 848; 849; THRESHOLD-LABEL: @extra_args_no_replace( 850; THRESHOLD-NEXT: entry: 851; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] 852; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float 853; THRESHOLD-NEXT: [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float 854; THRESHOLD-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4 855; THRESHOLD-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP0]]) 856; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00 857; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], [[TMP2]] 858; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], 3.000000e+00 859; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[CONVC]] 860; THRESHOLD-NEXT: ret float [[OP_RDX2]] 861; 862 entry: 863 %mul = mul nsw i32 %b, %a 864 %conv = sitofp i32 %mul to float 865 %0 = load float, ptr %x, align 4 866 %convc = sitofp i32 %c to float 867 %addc = fadd fast float %convc, 3.000000e+00 868 %add = fadd fast float %conv, %addc 869 %add1 = fadd fast float %0, %add 870 %arrayidx3 = getelementptr inbounds float, ptr %x, i64 1 871 %1 = load float, ptr %arrayidx3, align 4 872 %add4 = fadd fast float %1, %add1 873 %arrayidx3.1 = getelementptr inbounds float, ptr %x, i64 2 874 %2 = load float, ptr %arrayidx3.1, align 4 875 %add4.1 = fadd fast float %2, %add4 876 %arrayidx3.2 = getelementptr inbounds float, ptr %x, i64 3 877 %3 = load float, ptr %arrayidx3.2, align 4 878 %add4.2 = fadd fast float %3, %add4.1 879 %arrayidx3.3 = getelementptr inbounds float, ptr %x, i64 4 880 %4 = load float, ptr %arrayidx3.3, align 4 881 %add4.3 = fadd fast float %4, %add4.2 882 %add5 = fadd fast float %add4.3, %conv 883 %arrayidx3.4 = getelementptr inbounds float, ptr %x, i64 5 884 %5 = load float, ptr %arrayidx3.4, align 4 885 %add4.4 = fadd fast float %5, %add5 886 %arrayidx3.5 = getelementptr inbounds float, ptr %x, i64 6 887 %6 = load float, ptr %arrayidx3.5, align 4 888 %add4.5 = fadd fast float %6, %add4.4 889 %arrayidx3.6 = getelementptr inbounds float, ptr %x, i64 7 890 %7 = load float, ptr %arrayidx3.6, align 4 891 %add4.6 = fadd fast float %7, %add4.5 892 ret float %add4.6 893} 894 895define float @extra_args_no_fast(ptr %x, float %a, float %b) { 896; CHECK-LABEL: @extra_args_no_fast( 897; CHECK-NEXT: [[ADDC:%.*]] = fadd fast float [[B:%.*]], 3.000000e+00 898; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[A:%.*]], [[ADDC]] 899; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1 900; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 901; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3 902; CHECK-NEXT: [[T0:%.*]] = load float, ptr [[X]], align 4 903; CHECK-NEXT: [[T1:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 904; CHECK-NEXT: [[T2:%.*]] = load float, ptr [[ARRAYIDX3_1]], align 4 905; CHECK-NEXT: [[T3:%.*]] = load float, ptr [[ARRAYIDX3_2]], align 4 906; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[T0]], [[ADD]] 907; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[T1]], [[ADD1]] 908; CHECK-NEXT: [[ADD4_1:%.*]] = fadd float [[T2]], [[ADD4]] 909; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float [[T3]], [[ADD4_1]] 910; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_2]], [[A]] 911; CHECK-NEXT: ret float [[ADD5]] 912; 913; THRESHOLD-LABEL: @extra_args_no_fast( 914; THRESHOLD-NEXT: [[ADDC:%.*]] = fadd fast float [[B:%.*]], 3.000000e+00 915; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[A:%.*]], [[ADDC]] 916; THRESHOLD-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1 917; THRESHOLD-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 918; THRESHOLD-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3 919; THRESHOLD-NEXT: [[T0:%.*]] = load float, ptr [[X]], align 4 920; THRESHOLD-NEXT: [[T1:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 921; THRESHOLD-NEXT: [[T2:%.*]] = load float, ptr [[ARRAYIDX3_1]], align 4 922; THRESHOLD-NEXT: [[T3:%.*]] = load float, ptr [[ARRAYIDX3_2]], align 4 923; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float [[T0]], [[ADD]] 924; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float [[T1]], [[ADD1]] 925; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd float [[T2]], [[ADD4]] 926; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float [[T3]], [[ADD4_1]] 927; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_2]], [[A]] 928; THRESHOLD-NEXT: ret float [[ADD5]] 929; 930 %addc = fadd fast float %b, 3.0 931 %add = fadd fast float %a, %addc 932 %arrayidx3 = getelementptr inbounds float, ptr %x, i64 1 933 %arrayidx3.1 = getelementptr inbounds float, ptr %x, i64 2 934 %arrayidx3.2 = getelementptr inbounds float, ptr %x, i64 3 935 %t0 = load float, ptr %x, align 4 936 %t1 = load float, ptr %arrayidx3, align 4 937 %t2 = load float, ptr %arrayidx3.1, align 4 938 %t3 = load float, ptr %arrayidx3.2, align 4 939 %add1 = fadd fast float %t0, %add 940 %add4 = fadd fast float %t1, %add1 941 %add4.1 = fadd float %t2, %add4 ; this is not a reduction candidate 942 %add4.2 = fadd fast float %t3, %add4.1 943 %add5 = fadd fast float %add4.2, %a 944 ret float %add5 945} 946 947define i32 @wobble(i32 %arg, i32 %bar) { 948; CHECK-LABEL: @wobble( 949; CHECK-NEXT: bb: 950; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[ARG:%.*]], i32 0 951; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer 952; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0 953; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer 954; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i32> [[TMP1]], [[TMP3]] 955; CHECK-NEXT: [[X4:%.*]] = xor i32 [[ARG]], [[BAR]] 956; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i32> [[TMP4]], zeroinitializer 957; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> 958; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP6]]) 959; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP7]], [[X4]] 960; CHECK-NEXT: [[OP_RDX1:%.*]] = add i32 [[OP_RDX]], [[ARG]] 961; CHECK-NEXT: ret i32 [[OP_RDX1]] 962; 963; THRESHOLD-LABEL: @wobble( 964; THRESHOLD-NEXT: bb: 965; THRESHOLD-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[ARG:%.*]], i32 0 966; THRESHOLD-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer 967; THRESHOLD-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0 968; THRESHOLD-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer 969; THRESHOLD-NEXT: [[TMP4:%.*]] = xor <4 x i32> [[TMP1]], [[TMP3]] 970; THRESHOLD-NEXT: [[X4:%.*]] = xor i32 [[ARG]], [[BAR]] 971; THRESHOLD-NEXT: [[TMP5:%.*]] = icmp eq <4 x i32> [[TMP4]], zeroinitializer 972; THRESHOLD-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> 973; THRESHOLD-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP6]]) 974; THRESHOLD-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP7]], [[X4]] 975; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = add i32 [[OP_RDX]], [[ARG]] 976; THRESHOLD-NEXT: ret i32 [[OP_RDX1]] 977; 978 bb: 979 %x1 = xor i32 %arg, %bar 980 %i1 = icmp eq i32 %x1, 0 981 %s1 = sext i1 %i1 to i32 982 %x2 = xor i32 %arg, %bar 983 %i2 = icmp eq i32 %x2, 0 984 %s2 = sext i1 %i2 to i32 985 %x3 = xor i32 %arg, %bar 986 %i3 = icmp eq i32 %x3, 0 987 %s3 = sext i1 %i3 to i32 988 %x4 = xor i32 %arg, %bar 989 %i4 = icmp eq i32 %x4, 0 990 %s4 = sext i1 %i4 to i32 991 %r1 = add nuw i32 %arg, %s1 992 %r2 = add nsw i32 %r1, %s2 993 %r3 = add nsw i32 %r2, %s3 994 %r4 = add nsw i32 %r3, %s4 995 %r5 = add nsw i32 %r4, %x4 996 ret i32 %r5 997} 998 999