1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=slp-vectorizer,dce -slp-threshold=-999 -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s 3 4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 5target triple = "x86_64-apple-macosx10.8.0" 6 7declare double @llvm.fabs.f64(double) nounwind readnone 8 9define void @vec_fabs_f64(ptr %a, ptr %b, ptr %c) { 10; CHECK-LABEL: @vec_fabs_f64( 11; CHECK-NEXT: entry: 12; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8 13; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[B:%.*]], align 8 14; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] 15; CHECK-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP4]]) 16; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[C:%.*]], align 8 17; CHECK-NEXT: ret void 18; 19entry: 20 %i0 = load double, ptr %a, align 8 21 %i1 = load double, ptr %b, align 8 22 %mul = fmul double %i0, %i1 23 %call = tail call double @llvm.fabs.f64(double %mul) nounwind readnone 24 %arrayidx3 = getelementptr inbounds double, ptr %a, i64 1 25 %i3 = load double, ptr %arrayidx3, align 8 26 %arrayidx4 = getelementptr inbounds double, ptr %b, i64 1 27 %i4 = load double, ptr %arrayidx4, align 8 28 %mul5 = fmul double %i3, %i4 29 %call5 = tail call double @llvm.fabs.f64(double %mul5) nounwind readnone 30 store double %call, ptr %c, align 8 31 %arrayidx5 = getelementptr inbounds double, ptr %c, i64 1 32 store double %call5, ptr %arrayidx5, align 8 33 ret void 34} 35 36declare float @llvm.copysign.f32(float, float) nounwind readnone 37 38define void @vec_copysign_f32(ptr %a, ptr %b, ptr noalias %c) { 39; CHECK-LABEL: @vec_copysign_f32( 40; CHECK-NEXT: entry: 41; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4 42; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4 43; CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[TMP1]], <4 x float> [[TMP3]]) 44; CHECK-NEXT: store <4 x float> [[TMP4]], ptr [[C:%.*]], align 4 45; CHECK-NEXT: ret void 46; 47entry: 48 %0 = load float, ptr %a, align 4 49 %1 = load float, ptr %b, align 4 50 %call0 = tail call float @llvm.copysign.f32(float %0, float %1) nounwind readnone 51 store float %call0, ptr %c, align 4 52 53 %ix2 = getelementptr inbounds float, ptr %a, i64 1 54 %2 = load float, ptr %ix2, align 4 55 %ix3 = getelementptr inbounds float, ptr %b, i64 1 56 %3 = load float, ptr %ix3, align 4 57 %call1 = tail call float @llvm.copysign.f32(float %2, float %3) nounwind readnone 58 %c1 = getelementptr inbounds float, ptr %c, i64 1 59 store float %call1, ptr %c1, align 4 60 61 %ix4 = getelementptr inbounds float, ptr %a, i64 2 62 %4 = load float, ptr %ix4, align 4 63 %ix5 = getelementptr inbounds float, ptr %b, i64 2 64 %5 = load float, ptr %ix5, align 4 65 %call2 = tail call float @llvm.copysign.f32(float %4, float %5) nounwind readnone 66 %c2 = getelementptr inbounds float, ptr %c, i64 2 67 store float %call2, ptr %c2, align 4 68 69 %ix6 = getelementptr inbounds float, ptr %a, i64 3 70 %6 = load float, ptr %ix6, align 4 71 %ix7 = getelementptr inbounds float, ptr %b, i64 3 72 %7 = load float, ptr %ix7, align 4 73 %call3 = tail call float @llvm.copysign.f32(float %6, float %7) nounwind readnone 74 %c3 = getelementptr inbounds float, ptr %c, i64 3 75 store float %call3, ptr %c3, align 4 76 77 ret void 78} 79 80declare i32 @llvm.bswap.i32(i32) nounwind readnone 81 82define void @vec_bswap_i32(ptr %a, ptr %b, ptr %c) { 83; CHECK-LABEL: @vec_bswap_i32( 84; CHECK-NEXT: entry: 85; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4 86; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4 87; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]] 88; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[TMP4]]) 89; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[C:%.*]], align 4 90; CHECK-NEXT: ret void 91; 92entry: 93 %i0 = load i32, ptr %a, align 4 94 %i1 = load i32, ptr %b, align 4 95 %add1 = add i32 %i0, %i1 96 %call1 = tail call i32 @llvm.bswap.i32(i32 %add1) nounwind readnone 97 98 %arrayidx2 = getelementptr inbounds i32, ptr %a, i32 1 99 %i2 = load i32, ptr %arrayidx2, align 4 100 %arrayidx3 = getelementptr inbounds i32, ptr %b, i32 1 101 %i3 = load i32, ptr %arrayidx3, align 4 102 %add2 = add i32 %i2, %i3 103 %call2 = tail call i32 @llvm.bswap.i32(i32 %add2) nounwind readnone 104 105 %arrayidx4 = getelementptr inbounds i32, ptr %a, i32 2 106 %i4 = load i32, ptr %arrayidx4, align 4 107 %arrayidx5 = getelementptr inbounds i32, ptr %b, i32 2 108 %i5 = load i32, ptr %arrayidx5, align 4 109 %add3 = add i32 %i4, %i5 110 %call3 = tail call i32 @llvm.bswap.i32(i32 %add3) nounwind readnone 111 112 %arrayidx6 = getelementptr inbounds i32, ptr %a, i32 3 113 %i6 = load i32, ptr %arrayidx6, align 4 114 %arrayidx7 = getelementptr inbounds i32, ptr %b, i32 3 115 %i7 = load i32, ptr %arrayidx7, align 4 116 %add4 = add i32 %i6, %i7 117 %call4 = tail call i32 @llvm.bswap.i32(i32 %add4) nounwind readnone 118 119 store i32 %call1, ptr %c, align 4 120 %arrayidx8 = getelementptr inbounds i32, ptr %c, i32 1 121 store i32 %call2, ptr %arrayidx8, align 4 122 %arrayidx9 = getelementptr inbounds i32, ptr %c, i32 2 123 store i32 %call3, ptr %arrayidx9, align 4 124 %arrayidx10 = getelementptr inbounds i32, ptr %c, i32 3 125 store i32 %call4, ptr %arrayidx10, align 4 126 ret void 127 128} 129 130declare i32 @llvm.ctlz.i32(i32,i1) nounwind readnone 131 132define void @vec_ctlz_i32(ptr %a, ptr %b, ptr %c, i1) { 133; CHECK-LABEL: @vec_ctlz_i32( 134; CHECK-NEXT: entry: 135; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4 136; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4 137; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]] 138; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP5]], i1 true) 139; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[C:%.*]], align 4 140; CHECK-NEXT: ret void 141; 142entry: 143 %i0 = load i32, ptr %a, align 4 144 %i1 = load i32, ptr %b, align 4 145 %add1 = add i32 %i0, %i1 146 %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone 147 148 %arrayidx2 = getelementptr inbounds i32, ptr %a, i32 1 149 %i2 = load i32, ptr %arrayidx2, align 4 150 %arrayidx3 = getelementptr inbounds i32, ptr %b, i32 1 151 %i3 = load i32, ptr %arrayidx3, align 4 152 %add2 = add i32 %i2, %i3 153 %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 true) nounwind readnone 154 155 %arrayidx4 = getelementptr inbounds i32, ptr %a, i32 2 156 %i4 = load i32, ptr %arrayidx4, align 4 157 %arrayidx5 = getelementptr inbounds i32, ptr %b, i32 2 158 %i5 = load i32, ptr %arrayidx5, align 4 159 %add3 = add i32 %i4, %i5 160 %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone 161 162 %arrayidx6 = getelementptr inbounds i32, ptr %a, i32 3 163 %i6 = load i32, ptr %arrayidx6, align 4 164 %arrayidx7 = getelementptr inbounds i32, ptr %b, i32 3 165 %i7 = load i32, ptr %arrayidx7, align 4 166 %add4 = add i32 %i6, %i7 167 %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 true) nounwind readnone 168 169 store i32 %call1, ptr %c, align 4 170 %arrayidx8 = getelementptr inbounds i32, ptr %c, i32 1 171 store i32 %call2, ptr %arrayidx8, align 4 172 %arrayidx9 = getelementptr inbounds i32, ptr %c, i32 2 173 store i32 %call3, ptr %arrayidx9, align 4 174 %arrayidx10 = getelementptr inbounds i32, ptr %c, i32 3 175 store i32 %call4, ptr %arrayidx10, align 4 176 ret void 177 178} 179 180define void @vec_ctlz_i32_neg(ptr %a, ptr %b, ptr %c, i1) { 181; CHECK-LABEL: @vec_ctlz_i32_neg( 182; CHECK-NEXT: entry: 183; CHECK-NEXT: [[I0:%.*]] = load i32, ptr [[A:%.*]], align 4 184; CHECK-NEXT: [[I1:%.*]] = load i32, ptr [[B:%.*]], align 4 185; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[I0]], [[I1]] 186; CHECK-NEXT: [[CALL1:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[ADD1]], i1 true) #[[ATTR3:[0-9]+]] 187; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1 188; CHECK-NEXT: [[I2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 189; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 1 190; CHECK-NEXT: [[I3:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 191; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[I2]], [[I3]] 192; CHECK-NEXT: [[CALL2:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[ADD2]], i1 false) #[[ATTR3]] 193; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 2 194; CHECK-NEXT: [[I4:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 195; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 2 196; CHECK-NEXT: [[I5:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 197; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[I4]], [[I5]] 198; CHECK-NEXT: [[CALL3:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[ADD3]], i1 true) #[[ATTR3]] 199; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 3 200; CHECK-NEXT: [[I6:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 201; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 3 202; CHECK-NEXT: [[I7:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4 203; CHECK-NEXT: [[ADD4:%.*]] = add i32 [[I6]], [[I7]] 204; CHECK-NEXT: [[CALL4:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[ADD4]], i1 false) #[[ATTR3]] 205; CHECK-NEXT: store i32 [[CALL1]], ptr [[C:%.*]], align 4 206; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 1 207; CHECK-NEXT: store i32 [[CALL2]], ptr [[ARRAYIDX8]], align 4 208; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 2 209; CHECK-NEXT: store i32 [[CALL3]], ptr [[ARRAYIDX9]], align 4 210; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 3 211; CHECK-NEXT: store i32 [[CALL4]], ptr [[ARRAYIDX10]], align 4 212; CHECK-NEXT: ret void 213; 214entry: 215 %i0 = load i32, ptr %a, align 4 216 %i1 = load i32, ptr %b, align 4 217 %add1 = add i32 %i0, %i1 218 %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone 219 220 %arrayidx2 = getelementptr inbounds i32, ptr %a, i32 1 221 %i2 = load i32, ptr %arrayidx2, align 4 222 %arrayidx3 = getelementptr inbounds i32, ptr %b, i32 1 223 %i3 = load i32, ptr %arrayidx3, align 4 224 %add2 = add i32 %i2, %i3 225 %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 false) nounwind readnone 226 227 %arrayidx4 = getelementptr inbounds i32, ptr %a, i32 2 228 %i4 = load i32, ptr %arrayidx4, align 4 229 %arrayidx5 = getelementptr inbounds i32, ptr %b, i32 2 230 %i5 = load i32, ptr %arrayidx5, align 4 231 %add3 = add i32 %i4, %i5 232 %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone 233 234 %arrayidx6 = getelementptr inbounds i32, ptr %a, i32 3 235 %i6 = load i32, ptr %arrayidx6, align 4 236 %arrayidx7 = getelementptr inbounds i32, ptr %b, i32 3 237 %i7 = load i32, ptr %arrayidx7, align 4 238 %add4 = add i32 %i6, %i7 239 %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 false) nounwind readnone 240 241 store i32 %call1, ptr %c, align 4 242 %arrayidx8 = getelementptr inbounds i32, ptr %c, i32 1 243 store i32 %call2, ptr %arrayidx8, align 4 244 %arrayidx9 = getelementptr inbounds i32, ptr %c, i32 2 245 store i32 %call3, ptr %arrayidx9, align 4 246 %arrayidx10 = getelementptr inbounds i32, ptr %c, i32 3 247 store i32 %call4, ptr %arrayidx10, align 4 248 ret void 249 250 251} 252 253 254declare i32 @llvm.cttz.i32(i32,i1) nounwind readnone 255 256define void @vec_cttz_i32(ptr %a, ptr %b, ptr %c, i1) { 257; CHECK-LABEL: @vec_cttz_i32( 258; CHECK-NEXT: entry: 259; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4 260; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4 261; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]] 262; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP5]], i1 true) 263; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[C:%.*]], align 4 264; CHECK-NEXT: ret void 265; 266entry: 267 %i0 = load i32, ptr %a, align 4 268 %i1 = load i32, ptr %b, align 4 269 %add1 = add i32 %i0, %i1 270 %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone 271 272 %arrayidx2 = getelementptr inbounds i32, ptr %a, i32 1 273 %i2 = load i32, ptr %arrayidx2, align 4 274 %arrayidx3 = getelementptr inbounds i32, ptr %b, i32 1 275 %i3 = load i32, ptr %arrayidx3, align 4 276 %add2 = add i32 %i2, %i3 277 %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 true) nounwind readnone 278 279 %arrayidx4 = getelementptr inbounds i32, ptr %a, i32 2 280 %i4 = load i32, ptr %arrayidx4, align 4 281 %arrayidx5 = getelementptr inbounds i32, ptr %b, i32 2 282 %i5 = load i32, ptr %arrayidx5, align 4 283 %add3 = add i32 %i4, %i5 284 %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone 285 286 %arrayidx6 = getelementptr inbounds i32, ptr %a, i32 3 287 %i6 = load i32, ptr %arrayidx6, align 4 288 %arrayidx7 = getelementptr inbounds i32, ptr %b, i32 3 289 %i7 = load i32, ptr %arrayidx7, align 4 290 %add4 = add i32 %i6, %i7 291 %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 true) nounwind readnone 292 293 store i32 %call1, ptr %c, align 4 294 %arrayidx8 = getelementptr inbounds i32, ptr %c, i32 1 295 store i32 %call2, ptr %arrayidx8, align 4 296 %arrayidx9 = getelementptr inbounds i32, ptr %c, i32 2 297 store i32 %call3, ptr %arrayidx9, align 4 298 %arrayidx10 = getelementptr inbounds i32, ptr %c, i32 3 299 store i32 %call4, ptr %arrayidx10, align 4 300 ret void 301 302} 303 304define void @vec_cttz_i32_neg(ptr %a, ptr %b, ptr %c, i1) { 305; CHECK-LABEL: @vec_cttz_i32_neg( 306; CHECK-NEXT: entry: 307; CHECK-NEXT: [[I0:%.*]] = load i32, ptr [[A:%.*]], align 4 308; CHECK-NEXT: [[I1:%.*]] = load i32, ptr [[B:%.*]], align 4 309; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[I0]], [[I1]] 310; CHECK-NEXT: [[CALL1:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[ADD1]], i1 true) #[[ATTR3]] 311; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1 312; CHECK-NEXT: [[I2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 313; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 1 314; CHECK-NEXT: [[I3:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 315; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[I2]], [[I3]] 316; CHECK-NEXT: [[CALL2:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[ADD2]], i1 false) #[[ATTR3]] 317; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 2 318; CHECK-NEXT: [[I4:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 319; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 2 320; CHECK-NEXT: [[I5:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 321; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[I4]], [[I5]] 322; CHECK-NEXT: [[CALL3:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[ADD3]], i1 true) #[[ATTR3]] 323; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 3 324; CHECK-NEXT: [[I6:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 325; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 3 326; CHECK-NEXT: [[I7:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4 327; CHECK-NEXT: [[ADD4:%.*]] = add i32 [[I6]], [[I7]] 328; CHECK-NEXT: [[CALL4:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[ADD4]], i1 false) #[[ATTR3]] 329; CHECK-NEXT: store i32 [[CALL1]], ptr [[C:%.*]], align 4 330; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 1 331; CHECK-NEXT: store i32 [[CALL2]], ptr [[ARRAYIDX8]], align 4 332; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 2 333; CHECK-NEXT: store i32 [[CALL3]], ptr [[ARRAYIDX9]], align 4 334; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 3 335; CHECK-NEXT: store i32 [[CALL4]], ptr [[ARRAYIDX10]], align 4 336; CHECK-NEXT: ret void 337; 338entry: 339 %i0 = load i32, ptr %a, align 4 340 %i1 = load i32, ptr %b, align 4 341 %add1 = add i32 %i0, %i1 342 %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone 343 344 %arrayidx2 = getelementptr inbounds i32, ptr %a, i32 1 345 %i2 = load i32, ptr %arrayidx2, align 4 346 %arrayidx3 = getelementptr inbounds i32, ptr %b, i32 1 347 %i3 = load i32, ptr %arrayidx3, align 4 348 %add2 = add i32 %i2, %i3 349 %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 false) nounwind readnone 350 351 %arrayidx4 = getelementptr inbounds i32, ptr %a, i32 2 352 %i4 = load i32, ptr %arrayidx4, align 4 353 %arrayidx5 = getelementptr inbounds i32, ptr %b, i32 2 354 %i5 = load i32, ptr %arrayidx5, align 4 355 %add3 = add i32 %i4, %i5 356 %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone 357 358 %arrayidx6 = getelementptr inbounds i32, ptr %a, i32 3 359 %i6 = load i32, ptr %arrayidx6, align 4 360 %arrayidx7 = getelementptr inbounds i32, ptr %b, i32 3 361 %i7 = load i32, ptr %arrayidx7, align 4 362 %add4 = add i32 %i6, %i7 363 %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 false) nounwind readnone 364 365 store i32 %call1, ptr %c, align 4 366 %arrayidx8 = getelementptr inbounds i32, ptr %c, i32 1 367 store i32 %call2, ptr %arrayidx8, align 4 368 %arrayidx9 = getelementptr inbounds i32, ptr %c, i32 2 369 store i32 %call3, ptr %arrayidx9, align 4 370 %arrayidx10 = getelementptr inbounds i32, ptr %c, i32 3 371 store i32 %call4, ptr %arrayidx10, align 4 372 ret void 373 374} 375 376 377declare float @llvm.powi.f32.i32(float, i32) 378define void @vec_powi_f32(ptr %a, ptr %b, ptr %c, i32 %P) { 379; CHECK-LABEL: @vec_powi_f32( 380; CHECK-NEXT: entry: 381; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4 382; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4 383; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP1]], [[TMP3]] 384; CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[TMP4]], i32 [[P:%.*]]) 385; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[C:%.*]], align 4 386; CHECK-NEXT: ret void 387; 388entry: 389 %i0 = load float, ptr %a, align 4 390 %i1 = load float, ptr %b, align 4 391 %add1 = fadd float %i0, %i1 392 %call1 = tail call float @llvm.powi.f32.i32(float %add1,i32 %P) nounwind readnone 393 394 %arrayidx2 = getelementptr inbounds float, ptr %a, i32 1 395 %i2 = load float, ptr %arrayidx2, align 4 396 %arrayidx3 = getelementptr inbounds float, ptr %b, i32 1 397 %i3 = load float, ptr %arrayidx3, align 4 398 %add2 = fadd float %i2, %i3 399 %call2 = tail call float @llvm.powi.f32.i32(float %add2,i32 %P) nounwind readnone 400 401 %arrayidx4 = getelementptr inbounds float, ptr %a, i32 2 402 %i4 = load float, ptr %arrayidx4, align 4 403 %arrayidx5 = getelementptr inbounds float, ptr %b, i32 2 404 %i5 = load float, ptr %arrayidx5, align 4 405 %add3 = fadd float %i4, %i5 406 %call3 = tail call float @llvm.powi.f32.i32(float %add3,i32 %P) nounwind readnone 407 408 %arrayidx6 = getelementptr inbounds float, ptr %a, i32 3 409 %i6 = load float, ptr %arrayidx6, align 4 410 %arrayidx7 = getelementptr inbounds float, ptr %b, i32 3 411 %i7 = load float, ptr %arrayidx7, align 4 412 %add4 = fadd float %i6, %i7 413 %call4 = tail call float @llvm.powi.f32.i32(float %add4,i32 %P) nounwind readnone 414 415 store float %call1, ptr %c, align 4 416 %arrayidx8 = getelementptr inbounds float, ptr %c, i32 1 417 store float %call2, ptr %arrayidx8, align 4 418 %arrayidx9 = getelementptr inbounds float, ptr %c, i32 2 419 store float %call3, ptr %arrayidx9, align 4 420 %arrayidx10 = getelementptr inbounds float, ptr %c, i32 3 421 store float %call4, ptr %arrayidx10, align 4 422 ret void 423 424} 425 426 427define void @vec_powi_f32_neg(ptr %a, ptr %b, ptr %c, i32 %P, i32 %Q) { 428; CHECK-LABEL: @vec_powi_f32_neg( 429; CHECK-NEXT: entry: 430; CHECK-NEXT: [[I0:%.*]] = load float, ptr [[A:%.*]], align 4 431; CHECK-NEXT: [[I1:%.*]] = load float, ptr [[B:%.*]], align 4 432; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[I0]], [[I1]] 433; CHECK-NEXT: [[CALL1:%.*]] = tail call float @llvm.powi.f32.i32(float [[ADD1]], i32 [[P:%.*]]) #[[ATTR3]] 434; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 1 435; CHECK-NEXT: [[I2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 436; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[B]], i32 1 437; CHECK-NEXT: [[I3:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 438; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[I2]], [[I3]] 439; CHECK-NEXT: [[CALL2:%.*]] = tail call float @llvm.powi.f32.i32(float [[ADD2]], i32 [[Q:%.*]]) #[[ATTR3]] 440; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2 441; CHECK-NEXT: [[I4:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 442; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2 443; CHECK-NEXT: [[I5:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 444; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[I4]], [[I5]] 445; CHECK-NEXT: [[CALL3:%.*]] = tail call float @llvm.powi.f32.i32(float [[ADD3]], i32 [[P]]) #[[ATTR3]] 446; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 3 447; CHECK-NEXT: [[I6:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 448; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[B]], i32 3 449; CHECK-NEXT: [[I7:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 450; CHECK-NEXT: [[ADD4:%.*]] = fadd float [[I6]], [[I7]] 451; CHECK-NEXT: [[CALL4:%.*]] = tail call float @llvm.powi.f32.i32(float [[ADD4]], i32 [[Q]]) #[[ATTR3]] 452; CHECK-NEXT: store float [[CALL1]], ptr [[C:%.*]], align 4 453; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[C]], i32 1 454; CHECK-NEXT: store float [[CALL2]], ptr [[ARRAYIDX8]], align 4 455; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[C]], i32 2 456; CHECK-NEXT: store float [[CALL3]], ptr [[ARRAYIDX9]], align 4 457; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[C]], i32 3 458; CHECK-NEXT: store float [[CALL4]], ptr [[ARRAYIDX10]], align 4 459; CHECK-NEXT: ret void 460; 461entry: 462 %i0 = load float, ptr %a, align 4 463 %i1 = load float, ptr %b, align 4 464 %add1 = fadd float %i0, %i1 465 %call1 = tail call float @llvm.powi.f32.i32(float %add1,i32 %P) nounwind readnone 466 467 %arrayidx2 = getelementptr inbounds float, ptr %a, i32 1 468 %i2 = load float, ptr %arrayidx2, align 4 469 %arrayidx3 = getelementptr inbounds float, ptr %b, i32 1 470 %i3 = load float, ptr %arrayidx3, align 4 471 %add2 = fadd float %i2, %i3 472 %call2 = tail call float @llvm.powi.f32.i32(float %add2,i32 %Q) nounwind readnone 473 474 %arrayidx4 = getelementptr inbounds float, ptr %a, i32 2 475 %i4 = load float, ptr %arrayidx4, align 4 476 %arrayidx5 = getelementptr inbounds float, ptr %b, i32 2 477 %i5 = load float, ptr %arrayidx5, align 4 478 %add3 = fadd float %i4, %i5 479 %call3 = tail call float @llvm.powi.f32.i32(float %add3,i32 %P) nounwind readnone 480 481 %arrayidx6 = getelementptr inbounds float, ptr %a, i32 3 482 %i6 = load float, ptr %arrayidx6, align 4 483 %arrayidx7 = getelementptr inbounds float, ptr %b, i32 3 484 %i7 = load float, ptr %arrayidx7, align 4 485 %add4 = fadd float %i6, %i7 486 %call4 = tail call float @llvm.powi.f32.i32(float %add4,i32 %Q) nounwind readnone 487 488 store float %call1, ptr %c, align 4 489 %arrayidx8 = getelementptr inbounds float, ptr %c, i32 1 490 store float %call2, ptr %arrayidx8, align 4 491 %arrayidx9 = getelementptr inbounds float, ptr %c, i32 2 492 store float %call3, ptr %arrayidx9, align 4 493 %arrayidx10 = getelementptr inbounds float, ptr %c, i32 3 494 store float %call4, ptr %arrayidx10, align 4 495 ret void 496 497} 498