1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=inject-tli-mappings,slp-vectorizer -vector-library=Accelerate -S %s | FileCheck %s 3; RUN: opt -passes=inject-tli-mappings,slp-vectorizer -S %s | FileCheck --check-prefix NOACCELERATE %s 4 5target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 6target triple = "arm64-apple-ios14.0.0" 7 8declare float @llvm.sin.f32(float) 9 10; Accelerate provides sin() for <4 x float> 11define <4 x float> @int_sin_4x(ptr %a) { 12; CHECK-LABEL: @int_sin_4x( 13; CHECK-NEXT: entry: 14; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 15; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vsinf(<4 x float> [[TMP0]]) 16; CHECK-NEXT: ret <4 x float> [[TMP1]] 17; 18; NOACCELERATE-LABEL: @int_sin_4x( 19; NOACCELERATE-NEXT: entry: 20; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 21; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 22; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) 23; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 24; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 25; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) 26; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 27; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 28; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) 29; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 30; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 31; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 32; 33entry: 34 %0 = load <4 x float>, ptr %a, align 16 35 %vecext = extractelement <4 x float> %0, i32 0 36 %1 = tail call fast float @llvm.sin.f32(float %vecext) 37 %vecins = insertelement <4 x float> undef, float %1, i32 0 38 %vecext.1 = extractelement <4 x float> %0, i32 1 39 %2 = tail call fast float @llvm.sin.f32(float %vecext.1) 40 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 41 %vecext.2 = extractelement <4 x float> %0, i32 2 42 %3 = tail call fast float @llvm.sin.f32(float %vecext.2) 43 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 44 %vecext.3 = extractelement <4 x float> %0, i32 3 45 %4 = tail call fast float @llvm.sin.f32(float %vecext.3) 46 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 47 ret <4 x float> %vecins.3 48} 49 50declare float @ceilf(float) readonly nounwind willreturn 51 52define <4 x float> @ceil_4x(ptr %a) { 53; CHECK-LABEL: @ceil_4x( 54; CHECK-NEXT: entry: 55; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 56; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]]) 57; CHECK-NEXT: ret <4 x float> [[TMP1]] 58; 59; NOACCELERATE-LABEL: @ceil_4x( 60; NOACCELERATE-NEXT: entry: 61; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 62; NOACCELERATE-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]]) 63; NOACCELERATE-NEXT: ret <4 x float> [[TMP1]] 64; 65entry: 66 %0 = load <4 x float>, ptr %a, align 16 67 %vecext = extractelement <4 x float> %0, i32 0 68 %1 = tail call fast float @ceilf(float %vecext) 69 %vecins = insertelement <4 x float> undef, float %1, i32 0 70 %vecext.1 = extractelement <4 x float> %0, i32 1 71 %2 = tail call fast float @ceilf(float %vecext.1) 72 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 73 %vecext.2 = extractelement <4 x float> %0, i32 2 74 %3 = tail call fast float @ceilf(float %vecext.2) 75 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 76 %vecext.3 = extractelement <4 x float> %0, i32 3 77 %4 = tail call fast float @ceilf(float %vecext.3) 78 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 79 ret <4 x float> %vecins.3 80} 81 82declare float @fabsf(float) readonly nounwind willreturn 83 84define <4 x float> @fabs_4x(ptr %a) { 85; CHECK-LABEL: @fabs_4x( 86; CHECK-NEXT: entry: 87; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 88; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 89; CHECK-NEXT: ret <4 x float> [[TMP1]] 90; 91; NOACCELERATE-LABEL: @fabs_4x( 92; NOACCELERATE-NEXT: entry: 93; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 94; NOACCELERATE-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 95; NOACCELERATE-NEXT: ret <4 x float> [[TMP1]] 96; 97entry: 98 %0 = load <4 x float>, ptr %a, align 16 99 %vecext = extractelement <4 x float> %0, i32 0 100 %1 = tail call fast float @fabsf(float %vecext) 101 %vecins = insertelement <4 x float> undef, float %1, i32 0 102 %vecext.1 = extractelement <4 x float> %0, i32 1 103 %2 = tail call fast float @fabsf(float %vecext.1) 104 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 105 %vecext.2 = extractelement <4 x float> %0, i32 2 106 %3 = tail call fast float @fabsf(float %vecext.2) 107 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 108 %vecext.3 = extractelement <4 x float> %0, i32 3 109 %4 = tail call fast float @fabsf(float %vecext.3) 110 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 111 ret <4 x float> %vecins.3 112} 113declare float @llvm.fabs.f32(float) 114define <4 x float> @int_fabs_4x(ptr %a) { 115; CHECK-LABEL: @int_fabs_4x( 116; CHECK-NEXT: entry: 117; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 118; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 119; CHECK-NEXT: ret <4 x float> [[TMP1]] 120; 121; NOACCELERATE-LABEL: @int_fabs_4x( 122; NOACCELERATE-NEXT: entry: 123; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 124; NOACCELERATE-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 125; NOACCELERATE-NEXT: ret <4 x float> [[TMP1]] 126; 127entry: 128 %0 = load <4 x float>, ptr %a, align 16 129 %vecext = extractelement <4 x float> %0, i32 0 130 %1 = tail call fast float @llvm.fabs.f32(float %vecext) 131 %vecins = insertelement <4 x float> undef, float %1, i32 0 132 %vecext.1 = extractelement <4 x float> %0, i32 1 133 %2 = tail call fast float @llvm.fabs.f32(float %vecext.1) 134 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 135 %vecext.2 = extractelement <4 x float> %0, i32 2 136 %3 = tail call fast float @llvm.fabs.f32(float %vecext.2) 137 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 138 %vecext.3 = extractelement <4 x float> %0, i32 3 139 %4 = tail call fast float @llvm.fabs.f32(float %vecext.3) 140 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 141 ret <4 x float> %vecins.3 142} 143declare float @floorf(float) readonly nounwind willreturn 144define <4 x float> @floor_4x(ptr %a) { 145; CHECK-LABEL: @floor_4x( 146; CHECK-NEXT: entry: 147; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 148; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]]) 149; CHECK-NEXT: ret <4 x float> [[TMP1]] 150; 151; NOACCELERATE-LABEL: @floor_4x( 152; NOACCELERATE-NEXT: entry: 153; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 154; NOACCELERATE-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]]) 155; NOACCELERATE-NEXT: ret <4 x float> [[TMP1]] 156; 157entry: 158 %0 = load <4 x float>, ptr %a, align 16 159 %vecext = extractelement <4 x float> %0, i32 0 160 %1 = tail call fast float @floorf(float %vecext) 161 %vecins = insertelement <4 x float> undef, float %1, i32 0 162 %vecext.1 = extractelement <4 x float> %0, i32 1 163 %2 = tail call fast float @floorf(float %vecext.1) 164 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 165 %vecext.2 = extractelement <4 x float> %0, i32 2 166 %3 = tail call fast float @floorf(float %vecext.2) 167 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 168 %vecext.3 = extractelement <4 x float> %0, i32 3 169 %4 = tail call fast float @floorf(float %vecext.3) 170 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 171 ret <4 x float> %vecins.3 172} 173declare float @sqrtf(float) readonly nounwind willreturn 174define <4 x float> @sqrt_4x(ptr %a) { 175; CHECK-LABEL: @sqrt_4x( 176; CHECK-NEXT: entry: 177; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 178; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) 179; CHECK-NEXT: ret <4 x float> [[TMP1]] 180; 181; NOACCELERATE-LABEL: @sqrt_4x( 182; NOACCELERATE-NEXT: entry: 183; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 184; NOACCELERATE-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) 185; NOACCELERATE-NEXT: ret <4 x float> [[TMP1]] 186; 187entry: 188 %0 = load <4 x float>, ptr %a, align 16 189 %vecext = extractelement <4 x float> %0, i32 0 190 %1 = tail call fast float @sqrtf(float %vecext) 191 %vecins = insertelement <4 x float> undef, float %1, i32 0 192 %vecext.1 = extractelement <4 x float> %0, i32 1 193 %2 = tail call fast float @sqrtf(float %vecext.1) 194 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 195 %vecext.2 = extractelement <4 x float> %0, i32 2 196 %3 = tail call fast float @sqrtf(float %vecext.2) 197 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 198 %vecext.3 = extractelement <4 x float> %0, i32 3 199 %4 = tail call fast float @sqrtf(float %vecext.3) 200 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 201 ret <4 x float> %vecins.3 202} 203declare float @expf(float) readonly nounwind willreturn 204define <4 x float> @exp_4x(ptr %a) { 205; CHECK-LABEL: @exp_4x( 206; CHECK-NEXT: entry: 207; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 208; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vexpf(<4 x float> [[TMP0]]) 209; CHECK-NEXT: ret <4 x float> [[TMP1]] 210; 211; NOACCELERATE-LABEL: @exp_4x( 212; NOACCELERATE-NEXT: entry: 213; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 214; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 215; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]]) 216; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 217; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 218; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]]) 219; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 220; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 221; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) 222; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 223; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 224; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 225; 226entry: 227 %0 = load <4 x float>, ptr %a, align 16 228 %vecext = extractelement <4 x float> %0, i32 0 229 %1 = tail call fast float @expf(float %vecext) 230 %vecins = insertelement <4 x float> undef, float %1, i32 0 231 %vecext.1 = extractelement <4 x float> %0, i32 1 232 %2 = tail call fast float @expf(float %vecext.1) 233 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 234 %vecext.2 = extractelement <4 x float> %0, i32 2 235 %3 = tail call fast float @expf(float %vecext.2) 236 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 237 %vecext.3 = extractelement <4 x float> %0, i32 3 238 %4 = tail call fast float @expf(float %vecext.3) 239 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 240 ret <4 x float> %vecins.3 241} 242declare float @expm1f(float) readonly nounwind willreturn 243define <4 x float> @expm1_4x(ptr %a) { 244; CHECK-LABEL: @expm1_4x( 245; CHECK-NEXT: entry: 246; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 247; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vexpm1f(<4 x float> [[TMP0]]) 248; CHECK-NEXT: ret <4 x float> [[TMP1]] 249; 250; NOACCELERATE-LABEL: @expm1_4x( 251; NOACCELERATE-NEXT: entry: 252; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 253; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 254; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @expm1f(float [[VECEXT]]) 255; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 256; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 257; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @expm1f(float [[VECEXT_1]]) 258; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 259; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 260; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @expm1f(float [[VECEXT_2]]) 261; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 262; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 263; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @expm1f(float [[VECEXT_3]]) 264; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 265; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 266; 267entry: 268 %0 = load <4 x float>, ptr %a, align 16 269 %vecext = extractelement <4 x float> %0, i32 0 270 %1 = tail call fast float @expm1f(float %vecext) 271 %vecins = insertelement <4 x float> undef, float %1, i32 0 272 %vecext.1 = extractelement <4 x float> %0, i32 1 273 %2 = tail call fast float @expm1f(float %vecext.1) 274 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 275 %vecext.2 = extractelement <4 x float> %0, i32 2 276 %3 = tail call fast float @expm1f(float %vecext.2) 277 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 278 %vecext.3 = extractelement <4 x float> %0, i32 3 279 %4 = tail call fast float @expm1f(float %vecext.3) 280 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 281 ret <4 x float> %vecins.3 282} 283declare float @logf(float) readonly nounwind willreturn 284define <4 x float> @log_4x(ptr %a) { 285; CHECK-LABEL: @log_4x( 286; CHECK-NEXT: entry: 287; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 288; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vlogf(<4 x float> [[TMP0]]) 289; CHECK-NEXT: ret <4 x float> [[TMP1]] 290; 291; NOACCELERATE-LABEL: @log_4x( 292; NOACCELERATE-NEXT: entry: 293; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 294; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 295; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]]) 296; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 297; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 298; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]]) 299; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 300; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 301; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) 302; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 303; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 304; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 305; 306entry: 307 %0 = load <4 x float>, ptr %a, align 16 308 %vecext = extractelement <4 x float> %0, i32 0 309 %1 = tail call fast float @logf(float %vecext) 310 %vecins = insertelement <4 x float> undef, float %1, i32 0 311 %vecext.1 = extractelement <4 x float> %0, i32 1 312 %2 = tail call fast float @logf(float %vecext.1) 313 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 314 %vecext.2 = extractelement <4 x float> %0, i32 2 315 %3 = tail call fast float @logf(float %vecext.2) 316 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 317 %vecext.3 = extractelement <4 x float> %0, i32 3 318 %4 = tail call fast float @logf(float %vecext.3) 319 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 320 ret <4 x float> %vecins.3 321} 322declare float @log1pf(float) readonly nounwind willreturn 323define <4 x float> @log1p_4x(ptr %a) { 324; CHECK-LABEL: @log1p_4x( 325; CHECK-NEXT: entry: 326; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 327; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vlog1pf(<4 x float> [[TMP0]]) 328; CHECK-NEXT: ret <4 x float> [[TMP1]] 329; 330; NOACCELERATE-LABEL: @log1p_4x( 331; NOACCELERATE-NEXT: entry: 332; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 333; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 334; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @log1pf(float [[VECEXT]]) 335; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 336; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 337; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @log1pf(float [[VECEXT_1]]) 338; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 339; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 340; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @log1pf(float [[VECEXT_2]]) 341; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 342; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 343; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @log1pf(float [[VECEXT_3]]) 344; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 345; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 346; 347entry: 348 %0 = load <4 x float>, ptr %a, align 16 349 %vecext = extractelement <4 x float> %0, i32 0 350 %1 = tail call fast float @log1pf(float %vecext) 351 %vecins = insertelement <4 x float> undef, float %1, i32 0 352 %vecext.1 = extractelement <4 x float> %0, i32 1 353 %2 = tail call fast float @log1pf(float %vecext.1) 354 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 355 %vecext.2 = extractelement <4 x float> %0, i32 2 356 %3 = tail call fast float @log1pf(float %vecext.2) 357 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 358 %vecext.3 = extractelement <4 x float> %0, i32 3 359 %4 = tail call fast float @log1pf(float %vecext.3) 360 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 361 ret <4 x float> %vecins.3 362} 363declare float @log10pf(float) readonly nounwind willreturn 364define <4 x float> @log10p_4x(ptr %a) { 365; CHECK-LABEL: @log10p_4x( 366; CHECK-NEXT: entry: 367; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 368; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 369; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @log10pf(float [[VECEXT]]) 370; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 371; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 372; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @log10pf(float [[VECEXT_1]]) 373; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 374; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 375; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @log10pf(float [[VECEXT_2]]) 376; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 377; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 378; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @log10pf(float [[VECEXT_3]]) 379; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 380; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 381; 382; NOACCELERATE-LABEL: @log10p_4x( 383; NOACCELERATE-NEXT: entry: 384; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 385; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 386; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @log10pf(float [[VECEXT]]) 387; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 388; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 389; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @log10pf(float [[VECEXT_1]]) 390; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 391; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 392; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @log10pf(float [[VECEXT_2]]) 393; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 394; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 395; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @log10pf(float [[VECEXT_3]]) 396; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 397; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 398; 399entry: 400 %0 = load <4 x float>, ptr %a, align 16 401 %vecext = extractelement <4 x float> %0, i32 0 402 %1 = tail call fast float @log10pf(float %vecext) 403 %vecins = insertelement <4 x float> undef, float %1, i32 0 404 %vecext.1 = extractelement <4 x float> %0, i32 1 405 %2 = tail call fast float @log10pf(float %vecext.1) 406 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 407 %vecext.2 = extractelement <4 x float> %0, i32 2 408 %3 = tail call fast float @log10pf(float %vecext.2) 409 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 410 %vecext.3 = extractelement <4 x float> %0, i32 3 411 %4 = tail call fast float @log10pf(float %vecext.3) 412 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 413 ret <4 x float> %vecins.3 414} 415declare float @logbf(float) readonly nounwind willreturn 416define <4 x float> @logb_4x(ptr %a) { 417; CHECK-LABEL: @logb_4x( 418; CHECK-NEXT: entry: 419; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 420; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vlogbf(<4 x float> [[TMP0]]) 421; CHECK-NEXT: ret <4 x float> [[TMP1]] 422; 423; NOACCELERATE-LABEL: @logb_4x( 424; NOACCELERATE-NEXT: entry: 425; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 426; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 427; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @logbf(float [[VECEXT]]) 428; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 429; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 430; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @logbf(float [[VECEXT_1]]) 431; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 432; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 433; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @logbf(float [[VECEXT_2]]) 434; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 435; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 436; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @logbf(float [[VECEXT_3]]) 437; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 438; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 439; 440entry: 441 %0 = load <4 x float>, ptr %a, align 16 442 %vecext = extractelement <4 x float> %0, i32 0 443 %1 = tail call fast float @logbf(float %vecext) 444 %vecins = insertelement <4 x float> undef, float %1, i32 0 445 %vecext.1 = extractelement <4 x float> %0, i32 1 446 %2 = tail call fast float @logbf(float %vecext.1) 447 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 448 %vecext.2 = extractelement <4 x float> %0, i32 2 449 %3 = tail call fast float @logbf(float %vecext.2) 450 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 451 %vecext.3 = extractelement <4 x float> %0, i32 3 452 %4 = tail call fast float @logbf(float %vecext.3) 453 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 454 ret <4 x float> %vecins.3 455} 456declare float @sinf(float) readonly nounwind willreturn 457define <4 x float> @sin_4x(ptr %a) { 458; CHECK-LABEL: @sin_4x( 459; CHECK-NEXT: entry: 460; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 461; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vsinf(<4 x float> [[TMP0]]) 462; CHECK-NEXT: ret <4 x float> [[TMP1]] 463; 464; NOACCELERATE-LABEL: @sin_4x( 465; NOACCELERATE-NEXT: entry: 466; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 467; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 468; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]]) 469; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 470; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 471; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]]) 472; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 473; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 474; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) 475; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 476; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 477; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 478; 479entry: 480 %0 = load <4 x float>, ptr %a, align 16 481 %vecext = extractelement <4 x float> %0, i32 0 482 %1 = tail call fast float @sinf(float %vecext) 483 %vecins = insertelement <4 x float> undef, float %1, i32 0 484 %vecext.1 = extractelement <4 x float> %0, i32 1 485 %2 = tail call fast float @sinf(float %vecext.1) 486 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 487 %vecext.2 = extractelement <4 x float> %0, i32 2 488 %3 = tail call fast float @sinf(float %vecext.2) 489 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 490 %vecext.3 = extractelement <4 x float> %0, i32 3 491 %4 = tail call fast float @sinf(float %vecext.3) 492 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 493 ret <4 x float> %vecins.3 494} 495declare float @cosf(float) readonly nounwind willreturn 496define <4 x float> @cos_4x(ptr %a) { 497; CHECK-LABEL: @cos_4x( 498; CHECK-NEXT: entry: 499; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 500; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vcosf(<4 x float> [[TMP0]]) 501; CHECK-NEXT: ret <4 x float> [[TMP1]] 502; 503; NOACCELERATE-LABEL: @cos_4x( 504; NOACCELERATE-NEXT: entry: 505; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 506; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 507; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]]) 508; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 509; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 510; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]]) 511; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 512; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 513; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]]) 514; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 515; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 516; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 517; 518entry: 519 %0 = load <4 x float>, ptr %a, align 16 520 %vecext = extractelement <4 x float> %0, i32 0 521 %1 = tail call fast float @cosf(float %vecext) 522 %vecins = insertelement <4 x float> undef, float %1, i32 0 523 %vecext.1 = extractelement <4 x float> %0, i32 1 524 %2 = tail call fast float @cosf(float %vecext.1) 525 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 526 %vecext.2 = extractelement <4 x float> %0, i32 2 527 %3 = tail call fast float @cosf(float %vecext.2) 528 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 529 %vecext.3 = extractelement <4 x float> %0, i32 3 530 %4 = tail call fast float @cosf(float %vecext.3) 531 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 532 ret <4 x float> %vecins.3 533} 534declare float @tanf(float) readonly nounwind willreturn 535define <4 x float> @tan_4x(ptr %a) { 536; CHECK-LABEL: @tan_4x( 537; CHECK-NEXT: entry: 538; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 539; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vtanf(<4 x float> [[TMP0]]) 540; CHECK-NEXT: ret <4 x float> [[TMP1]] 541; 542; NOACCELERATE-LABEL: @tan_4x( 543; NOACCELERATE-NEXT: entry: 544; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 545; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 546; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]]) 547; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 548; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 549; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]]) 550; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 551; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 552; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP3]]) 553; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 554; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 555; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 556; 557entry: 558 %0 = load <4 x float>, ptr %a, align 16 559 %vecext = extractelement <4 x float> %0, i32 0 560 %1 = tail call fast float @tanf(float %vecext) 561 %vecins = insertelement <4 x float> undef, float %1, i32 0 562 %vecext.1 = extractelement <4 x float> %0, i32 1 563 %2 = tail call fast float @tanf(float %vecext.1) 564 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 565 %vecext.2 = extractelement <4 x float> %0, i32 2 566 %3 = tail call fast float @tanf(float %vecext.2) 567 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 568 %vecext.3 = extractelement <4 x float> %0, i32 3 569 %4 = tail call fast float @tanf(float %vecext.3) 570 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 571 ret <4 x float> %vecins.3 572} 573declare float @asinf(float) readonly nounwind willreturn 574define <4 x float> @asin_4x(ptr %a) { 575; CHECK-LABEL: @asin_4x( 576; CHECK-NEXT: entry: 577; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 578; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vasinf(<4 x float> [[TMP0]]) 579; CHECK-NEXT: ret <4 x float> [[TMP1]] 580; 581; NOACCELERATE-LABEL: @asin_4x( 582; NOACCELERATE-NEXT: entry: 583; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 584; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 585; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]]) 586; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 587; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 588; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]]) 589; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 590; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 591; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP3]]) 592; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 593; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 594; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 595; 596entry: 597 %0 = load <4 x float>, ptr %a, align 16 598 %vecext = extractelement <4 x float> %0, i32 0 599 %1 = tail call fast float @asinf(float %vecext) 600 %vecins = insertelement <4 x float> undef, float %1, i32 0 601 %vecext.1 = extractelement <4 x float> %0, i32 1 602 %2 = tail call fast float @asinf(float %vecext.1) 603 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 604 %vecext.2 = extractelement <4 x float> %0, i32 2 605 %3 = tail call fast float @asinf(float %vecext.2) 606 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 607 %vecext.3 = extractelement <4 x float> %0, i32 3 608 %4 = tail call fast float @asinf(float %vecext.3) 609 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 610 ret <4 x float> %vecins.3 611} 612define <4 x float> @int_asin_4x(ptr %a) { 613; CHECK-LABEL: @int_asin_4x( 614; CHECK-NEXT: entry: 615; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 616; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vasinf(<4 x float> [[TMP0]]) 617; CHECK-NEXT: ret <4 x float> [[TMP1]] 618; 619; NOACCELERATE-LABEL: @int_asin_4x( 620; NOACCELERATE-NEXT: entry: 621; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 622; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 623; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]]) 624; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 625; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 626; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]]) 627; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 628; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 629; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP3]]) 630; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 631; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 632; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 633; 634entry: 635 %0 = load <4 x float>, ptr %a, align 16 636 %vecext = extractelement <4 x float> %0, i32 0 637 %1 = tail call fast float @llvm.asin.f32(float %vecext) 638 %vecins = insertelement <4 x float> undef, float %1, i32 0 639 %vecext.1 = extractelement <4 x float> %0, i32 1 640 %2 = tail call fast float @llvm.asin.f32(float %vecext.1) 641 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 642 %vecext.2 = extractelement <4 x float> %0, i32 2 643 %3 = tail call fast float @llvm.asin.f32(float %vecext.2) 644 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 645 %vecext.3 = extractelement <4 x float> %0, i32 3 646 %4 = tail call fast float @llvm.asin.f32(float %vecext.3) 647 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 648 ret <4 x float> %vecins.3 649} 650declare float @acosf(float) readonly nounwind willreturn 651define <4 x float> @acos_4x(ptr %a) { 652; CHECK-LABEL: @acos_4x( 653; CHECK-NEXT: entry: 654; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 655; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vacosf(<4 x float> [[TMP0]]) 656; CHECK-NEXT: ret <4 x float> [[TMP1]] 657; 658; NOACCELERATE-LABEL: @acos_4x( 659; NOACCELERATE-NEXT: entry: 660; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 661; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 662; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]]) 663; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 664; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 665; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]]) 666; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 667; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 668; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP3]]) 669; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 670; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 671; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 672; 673entry: 674 %0 = load <4 x float>, ptr %a, align 16 675 %vecext = extractelement <4 x float> %0, i32 0 676 %1 = tail call fast float @acosf(float %vecext) 677 %vecins = insertelement <4 x float> undef, float %1, i32 0 678 %vecext.1 = extractelement <4 x float> %0, i32 1 679 %2 = tail call fast float @acosf(float %vecext.1) 680 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 681 %vecext.2 = extractelement <4 x float> %0, i32 2 682 %3 = tail call fast float @acosf(float %vecext.2) 683 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 684 %vecext.3 = extractelement <4 x float> %0, i32 3 685 %4 = tail call fast float @acosf(float %vecext.3) 686 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 687 ret <4 x float> %vecins.3 688} 689define <4 x float> @int_acos_4x(ptr %a) { 690; CHECK-LABEL: @int_acos_4x( 691; CHECK-NEXT: entry: 692; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 693; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vacosf(<4 x float> [[TMP0]]) 694; CHECK-NEXT: ret <4 x float> [[TMP1]] 695; 696; NOACCELERATE-LABEL: @int_acos_4x( 697; NOACCELERATE-NEXT: entry: 698; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 699; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 700; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]]) 701; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 702; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 703; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]]) 704; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 705; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 706; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP3]]) 707; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 708; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 709; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 710; 711entry: 712 %0 = load <4 x float>, ptr %a, align 16 713 %vecext = extractelement <4 x float> %0, i32 0 714 %1 = tail call fast float @llvm.acos.f32(float %vecext) 715 %vecins = insertelement <4 x float> undef, float %1, i32 0 716 %vecext.1 = extractelement <4 x float> %0, i32 1 717 %2 = tail call fast float @llvm.acos.f32(float %vecext.1) 718 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 719 %vecext.2 = extractelement <4 x float> %0, i32 2 720 %3 = tail call fast float @llvm.acos.f32(float %vecext.2) 721 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 722 %vecext.3 = extractelement <4 x float> %0, i32 3 723 %4 = tail call fast float @llvm.acos.f32(float %vecext.3) 724 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 725 ret <4 x float> %vecins.3 726} 727declare float @atanf(float) readonly nounwind willreturn 728define <4 x float> @atan_4x(ptr %a) { 729; CHECK-LABEL: @atan_4x( 730; CHECK-NEXT: entry: 731; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 732; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatanf(<4 x float> [[TMP0]]) 733; CHECK-NEXT: ret <4 x float> [[TMP1]] 734; 735; NOACCELERATE-LABEL: @atan_4x( 736; NOACCELERATE-NEXT: entry: 737; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 738; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 739; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]]) 740; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 741; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 742; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]]) 743; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 744; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 745; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP3]]) 746; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 747; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 748; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 749; 750entry: 751 %0 = load <4 x float>, ptr %a, align 16 752 %vecext = extractelement <4 x float> %0, i32 0 753 %1 = tail call fast float @atanf(float %vecext) 754 %vecins = insertelement <4 x float> undef, float %1, i32 0 755 %vecext.1 = extractelement <4 x float> %0, i32 1 756 %2 = tail call fast float @atanf(float %vecext.1) 757 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 758 %vecext.2 = extractelement <4 x float> %0, i32 2 759 %3 = tail call fast float @atanf(float %vecext.2) 760 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 761 %vecext.3 = extractelement <4 x float> %0, i32 3 762 %4 = tail call fast float @atanf(float %vecext.3) 763 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 764 ret <4 x float> %vecins.3 765} 766define <4 x float> @int_atan_4x(ptr %a) { 767; CHECK-LABEL: @int_atan_4x( 768; CHECK-NEXT: entry: 769; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 770; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatanf(<4 x float> [[TMP0]]) 771; CHECK-NEXT: ret <4 x float> [[TMP1]] 772; 773; NOACCELERATE-LABEL: @int_atan_4x( 774; NOACCELERATE-NEXT: entry: 775; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 776; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 777; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]]) 778; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 779; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 780; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]]) 781; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 782; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 783; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP3]]) 784; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 785; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 786; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 787; 788entry: 789 %0 = load <4 x float>, ptr %a, align 16 790 %vecext = extractelement <4 x float> %0, i32 0 791 %1 = tail call fast float @llvm.atan.f32(float %vecext) 792 %vecins = insertelement <4 x float> undef, float %1, i32 0 793 %vecext.1 = extractelement <4 x float> %0, i32 1 794 %2 = tail call fast float @llvm.atan.f32(float %vecext.1) 795 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 796 %vecext.2 = extractelement <4 x float> %0, i32 2 797 %3 = tail call fast float @llvm.atan.f32(float %vecext.2) 798 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 799 %vecext.3 = extractelement <4 x float> %0, i32 3 800 %4 = tail call fast float @llvm.atan.f32(float %vecext.3) 801 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 802 ret <4 x float> %vecins.3 803} 804declare float @atan2f(float,float) readonly nounwind willreturn 805define <4 x float> @atan2_4x(ptr %a, ptr %b) { 806; CHECK-LABEL: @atan2_4x( 807; CHECK-NEXT: entry: 808; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 809; CHECK-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16 810; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatan2f(<4 x float> [[TMP0]], <4 x float> [[BB]]) 811; CHECK-NEXT: ret <4 x float> [[TMP1]] 812; 813; NOACCELERATE-LABEL: @atan2_4x( 814; NOACCELERATE-NEXT: entry: 815; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 816; NOACCELERATE-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16 817; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 818; NOACCELERATE-NEXT: [[VECEXTB:%.*]] = extractelement <4 x float> [[BB]], i32 0 819; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @atan2f(float [[VECEXT]], float [[VECEXTB]]) 820; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 821; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 822; NOACCELERATE-NEXT: [[VECEXTB_1:%.*]] = extractelement <4 x float> [[BB]], i32 1 823; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @atan2f(float [[VECEXT_1]], float [[VECEXTB_1]]) 824; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 825; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 826; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[BB]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 827; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.atan2.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]]) 828; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 829; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 830; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 831; 832entry: 833 %0 = load <4 x float>, ptr %a, align 16 834 %bb = load <4 x float>, ptr %b, align 16 835 %vecext = extractelement <4 x float> %0, i32 0 836 %vecextb = extractelement <4 x float> %bb, i32 0 837 %1 = tail call fast float @atan2f(float %vecext, float %vecextb) 838 %vecins = insertelement <4 x float> undef, float %1, i32 0 839 %vecext.1 = extractelement <4 x float> %0, i32 1 840 %vecextb.1 = extractelement <4 x float> %bb, i32 1 841 %2 = tail call fast float @atan2f(float %vecext.1, float %vecextb.1) 842 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 843 %vecext.2 = extractelement <4 x float> %0, i32 2 844 %vecextb.2 = extractelement <4 x float> %bb, i32 2 845 %3 = tail call fast float @atan2f(float %vecext.2, float %vecextb.2) 846 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 847 %vecext.3 = extractelement <4 x float> %0, i32 3 848 %vecextb.3 = extractelement <4 x float> %bb, i32 3 849 %4 = tail call fast float @atan2f(float %vecext.3, float %vecextb.3) 850 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 851 ret <4 x float> %vecins.3 852} 853define <4 x float> @int_atan2_4x(ptr %a, ptr %b) { 854; CHECK-LABEL: @int_atan2_4x( 855; CHECK-NEXT: entry: 856; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 857; CHECK-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16 858; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatan2f(<4 x float> [[TMP0]], <4 x float> [[BB]]) 859; CHECK-NEXT: ret <4 x float> [[TMP1]] 860; 861; NOACCELERATE-LABEL: @int_atan2_4x( 862; NOACCELERATE-NEXT: entry: 863; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 864; NOACCELERATE-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16 865; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 866; NOACCELERATE-NEXT: [[VECEXTB:%.*]] = extractelement <4 x float> [[BB]], i32 0 867; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan2.f32(float [[VECEXT]], float [[VECEXTB]]) 868; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 869; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 870; NOACCELERATE-NEXT: [[VECEXTB_1:%.*]] = extractelement <4 x float> [[BB]], i32 1 871; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan2.f32(float [[VECEXT_1]], float [[VECEXTB_1]]) 872; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 873; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 874; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[BB]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 875; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.atan2.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]]) 876; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 877; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 878; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 879; 880entry: 881 %0 = load <4 x float>, ptr %a, align 16 882 %bb = load <4 x float>, ptr %b, align 16 883 %vecext = extractelement <4 x float> %0, i32 0 884 %vecextb = extractelement <4 x float> %bb, i32 0 885 %1 = tail call fast float @llvm.atan2.f32(float %vecext, float %vecextb) 886 %vecins = insertelement <4 x float> undef, float %1, i32 0 887 %vecext.1 = extractelement <4 x float> %0, i32 1 888 %vecextb.1 = extractelement <4 x float> %bb, i32 1 889 %2 = tail call fast float @llvm.atan2.f32(float %vecext.1, float %vecextb.1) 890 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 891 %vecext.2 = extractelement <4 x float> %0, i32 2 892 %vecextb.2 = extractelement <4 x float> %bb, i32 2 893 %3 = tail call fast float @llvm.atan2.f32(float %vecext.2, float %vecextb.2) 894 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 895 %vecext.3 = extractelement <4 x float> %0, i32 3 896 %vecextb.3 = extractelement <4 x float> %bb, i32 3 897 %4 = tail call fast float @llvm.atan2.f32(float %vecext.3, float %vecextb.3) 898 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 899 ret <4 x float> %vecins.3 900} 901declare float @sinhf(float) readonly nounwind willreturn 902define <4 x float> @sinh_4x(ptr %a) { 903; CHECK-LABEL: @sinh_4x( 904; CHECK-NEXT: entry: 905; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 906; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vsinhf(<4 x float> [[TMP0]]) 907; CHECK-NEXT: ret <4 x float> [[TMP1]] 908; 909; NOACCELERATE-LABEL: @sinh_4x( 910; NOACCELERATE-NEXT: entry: 911; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 912; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 913; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]]) 914; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 915; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 916; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]]) 917; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 918; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 919; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP3]]) 920; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 921; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 922; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 923; 924entry: 925 %0 = load <4 x float>, ptr %a, align 16 926 %vecext = extractelement <4 x float> %0, i32 0 927 %1 = tail call fast float @sinhf(float %vecext) 928 %vecins = insertelement <4 x float> undef, float %1, i32 0 929 %vecext.1 = extractelement <4 x float> %0, i32 1 930 %2 = tail call fast float @sinhf(float %vecext.1) 931 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 932 %vecext.2 = extractelement <4 x float> %0, i32 2 933 %3 = tail call fast float @sinhf(float %vecext.2) 934 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 935 %vecext.3 = extractelement <4 x float> %0, i32 3 936 %4 = tail call fast float @sinhf(float %vecext.3) 937 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 938 ret <4 x float> %vecins.3 939} 940define <4 x float> @int_sinh_4x(ptr %a) { 941; CHECK-LABEL: @int_sinh_4x( 942; CHECK-NEXT: entry: 943; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 944; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vsinhf(<4 x float> [[TMP0]]) 945; CHECK-NEXT: ret <4 x float> [[TMP1]] 946; 947; NOACCELERATE-LABEL: @int_sinh_4x( 948; NOACCELERATE-NEXT: entry: 949; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 950; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 951; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]]) 952; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 953; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 954; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]]) 955; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 956; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 957; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP3]]) 958; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 959; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 960; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 961; 962entry: 963 %0 = load <4 x float>, ptr %a, align 16 964 %vecext = extractelement <4 x float> %0, i32 0 965 %1 = tail call fast float @llvm.sinh.f32(float %vecext) 966 %vecins = insertelement <4 x float> undef, float %1, i32 0 967 %vecext.1 = extractelement <4 x float> %0, i32 1 968 %2 = tail call fast float @llvm.sinh.f32(float %vecext.1) 969 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 970 %vecext.2 = extractelement <4 x float> %0, i32 2 971 %3 = tail call fast float @llvm.sinh.f32(float %vecext.2) 972 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 973 %vecext.3 = extractelement <4 x float> %0, i32 3 974 %4 = tail call fast float @llvm.sinh.f32(float %vecext.3) 975 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 976 ret <4 x float> %vecins.3 977} 978declare float @coshf(float) readonly nounwind willreturn 979define <4 x float> @cosh_4x(ptr %a) { 980; CHECK-LABEL: @cosh_4x( 981; CHECK-NEXT: entry: 982; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 983; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vcoshf(<4 x float> [[TMP0]]) 984; CHECK-NEXT: ret <4 x float> [[TMP1]] 985; 986; NOACCELERATE-LABEL: @cosh_4x( 987; NOACCELERATE-NEXT: entry: 988; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 989; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 990; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]]) 991; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 992; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 993; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]]) 994; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 995; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 996; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP3]]) 997; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 998; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 999; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 1000; 1001entry: 1002 %0 = load <4 x float>, ptr %a, align 16 1003 %vecext = extractelement <4 x float> %0, i32 0 1004 %1 = tail call fast float @coshf(float %vecext) 1005 %vecins = insertelement <4 x float> undef, float %1, i32 0 1006 %vecext.1 = extractelement <4 x float> %0, i32 1 1007 %2 = tail call fast float @coshf(float %vecext.1) 1008 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1009 %vecext.2 = extractelement <4 x float> %0, i32 2 1010 %3 = tail call fast float @coshf(float %vecext.2) 1011 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1012 %vecext.3 = extractelement <4 x float> %0, i32 3 1013 %4 = tail call fast float @coshf(float %vecext.3) 1014 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1015 ret <4 x float> %vecins.3 1016} 1017define <4 x float> @int_cosh_4x(ptr %a) { 1018; CHECK-LABEL: @int_cosh_4x( 1019; CHECK-NEXT: entry: 1020; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 1021; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vcoshf(<4 x float> [[TMP0]]) 1022; CHECK-NEXT: ret <4 x float> [[TMP1]] 1023; 1024; NOACCELERATE-LABEL: @int_cosh_4x( 1025; NOACCELERATE-NEXT: entry: 1026; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 1027; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1028; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]]) 1029; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1030; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1031; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]]) 1032; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1033; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 1034; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP3]]) 1035; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 1036; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1037; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 1038; 1039entry: 1040 %0 = load <4 x float>, ptr %a, align 16 1041 %vecext = extractelement <4 x float> %0, i32 0 1042 %1 = tail call fast float @llvm.cosh.f32(float %vecext) 1043 %vecins = insertelement <4 x float> undef, float %1, i32 0 1044 %vecext.1 = extractelement <4 x float> %0, i32 1 1045 %2 = tail call fast float @llvm.cosh.f32(float %vecext.1) 1046 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1047 %vecext.2 = extractelement <4 x float> %0, i32 2 1048 %3 = tail call fast float @llvm.cosh.f32(float %vecext.2) 1049 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1050 %vecext.3 = extractelement <4 x float> %0, i32 3 1051 %4 = tail call fast float @llvm.cosh.f32(float %vecext.3) 1052 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1053 ret <4 x float> %vecins.3 1054} 1055declare float @tanhf(float) readonly nounwind willreturn 1056define <4 x float> @tanh_4x(ptr %a) { 1057; CHECK-LABEL: @tanh_4x( 1058; CHECK-NEXT: entry: 1059; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 1060; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vtanhf(<4 x float> [[TMP0]]) 1061; CHECK-NEXT: ret <4 x float> [[TMP1]] 1062; 1063; NOACCELERATE-LABEL: @tanh_4x( 1064; NOACCELERATE-NEXT: entry: 1065; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 1066; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1067; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]]) 1068; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1069; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1070; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]]) 1071; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1072; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 1073; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP3]]) 1074; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 1075; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1076; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 1077; 1078entry: 1079 %0 = load <4 x float>, ptr %a, align 16 1080 %vecext = extractelement <4 x float> %0, i32 0 1081 %1 = tail call fast float @tanhf(float %vecext) 1082 %vecins = insertelement <4 x float> undef, float %1, i32 0 1083 %vecext.1 = extractelement <4 x float> %0, i32 1 1084 %2 = tail call fast float @tanhf(float %vecext.1) 1085 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1086 %vecext.2 = extractelement <4 x float> %0, i32 2 1087 %3 = tail call fast float @tanhf(float %vecext.2) 1088 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1089 %vecext.3 = extractelement <4 x float> %0, i32 3 1090 %4 = tail call fast float @tanhf(float %vecext.3) 1091 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1092 ret <4 x float> %vecins.3 1093} 1094define <4 x float> @int_tanh_4x(ptr %a) { 1095; CHECK-LABEL: @int_tanh_4x( 1096; CHECK-NEXT: entry: 1097; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 1098; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vtanhf(<4 x float> [[TMP0]]) 1099; CHECK-NEXT: ret <4 x float> [[TMP1]] 1100; 1101; NOACCELERATE-LABEL: @int_tanh_4x( 1102; NOACCELERATE-NEXT: entry: 1103; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 1104; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1105; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]]) 1106; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1107; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1108; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]]) 1109; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1110; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 1111; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP3]]) 1112; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 1113; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1114; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 1115; 1116entry: 1117 %0 = load <4 x float>, ptr %a, align 16 1118 %vecext = extractelement <4 x float> %0, i32 0 1119 %1 = tail call fast float @llvm.tanh.f32(float %vecext) 1120 %vecins = insertelement <4 x float> undef, float %1, i32 0 1121 %vecext.1 = extractelement <4 x float> %0, i32 1 1122 %2 = tail call fast float @llvm.tanh.f32(float %vecext.1) 1123 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1124 %vecext.2 = extractelement <4 x float> %0, i32 2 1125 %3 = tail call fast float @llvm.tanh.f32(float %vecext.2) 1126 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1127 %vecext.3 = extractelement <4 x float> %0, i32 3 1128 %4 = tail call fast float @llvm.tanh.f32(float %vecext.3) 1129 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1130 ret <4 x float> %vecins.3 1131} 1132declare float @asinhf(float) readonly nounwind willreturn 1133define <4 x float> @asinh_4x(ptr %a) { 1134; CHECK-LABEL: @asinh_4x( 1135; CHECK-NEXT: entry: 1136; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 1137; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vasinhf(<4 x float> [[TMP0]]) 1138; CHECK-NEXT: ret <4 x float> [[TMP1]] 1139; 1140; NOACCELERATE-LABEL: @asinh_4x( 1141; NOACCELERATE-NEXT: entry: 1142; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 1143; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1144; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]]) 1145; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1146; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1147; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]]) 1148; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1149; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1150; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]]) 1151; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1152; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1153; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]]) 1154; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1155; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 1156; 1157entry: 1158 %0 = load <4 x float>, ptr %a, align 16 1159 %vecext = extractelement <4 x float> %0, i32 0 1160 %1 = tail call fast float @asinhf(float %vecext) 1161 %vecins = insertelement <4 x float> undef, float %1, i32 0 1162 %vecext.1 = extractelement <4 x float> %0, i32 1 1163 %2 = tail call fast float @asinhf(float %vecext.1) 1164 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1165 %vecext.2 = extractelement <4 x float> %0, i32 2 1166 %3 = tail call fast float @asinhf(float %vecext.2) 1167 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1168 %vecext.3 = extractelement <4 x float> %0, i32 3 1169 %4 = tail call fast float @asinhf(float %vecext.3) 1170 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1171 ret <4 x float> %vecins.3 1172} 1173declare float @acoshf(float) readonly nounwind willreturn 1174define <4 x float> @acosh_4x(ptr %a) { 1175; CHECK-LABEL: @acosh_4x( 1176; CHECK-NEXT: entry: 1177; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 1178; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vacoshf(<4 x float> [[TMP0]]) 1179; CHECK-NEXT: ret <4 x float> [[TMP1]] 1180; 1181; NOACCELERATE-LABEL: @acosh_4x( 1182; NOACCELERATE-NEXT: entry: 1183; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 1184; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1185; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]]) 1186; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1187; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1188; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]]) 1189; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1190; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1191; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]]) 1192; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1193; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1194; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]]) 1195; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1196; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 1197; 1198entry: 1199 %0 = load <4 x float>, ptr %a, align 16 1200 %vecext = extractelement <4 x float> %0, i32 0 1201 %1 = tail call fast float @acoshf(float %vecext) 1202 %vecins = insertelement <4 x float> undef, float %1, i32 0 1203 %vecext.1 = extractelement <4 x float> %0, i32 1 1204 %2 = tail call fast float @acoshf(float %vecext.1) 1205 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1206 %vecext.2 = extractelement <4 x float> %0, i32 2 1207 %3 = tail call fast float @acoshf(float %vecext.2) 1208 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1209 %vecext.3 = extractelement <4 x float> %0, i32 3 1210 %4 = tail call fast float @acoshf(float %vecext.3) 1211 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1212 ret <4 x float> %vecins.3 1213} 1214declare float @atanhf(float) readonly nounwind willreturn 1215define <4 x float> @atanh_4x(ptr %a) { 1216; CHECK-LABEL: @atanh_4x( 1217; CHECK-NEXT: entry: 1218; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 1219; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatanhf(<4 x float> [[TMP0]]) 1220; CHECK-NEXT: ret <4 x float> [[TMP1]] 1221; 1222; NOACCELERATE-LABEL: @atanh_4x( 1223; NOACCELERATE-NEXT: entry: 1224; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 1225; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1226; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]]) 1227; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1228; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1229; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]]) 1230; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1231; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1232; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]]) 1233; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1234; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1235; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]]) 1236; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1237; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 1238; 1239entry: 1240 %0 = load <4 x float>, ptr %a, align 16 1241 %vecext = extractelement <4 x float> %0, i32 0 1242 %1 = tail call fast float @atanhf(float %vecext) 1243 %vecins = insertelement <4 x float> undef, float %1, i32 0 1244 %vecext.1 = extractelement <4 x float> %0, i32 1 1245 %2 = tail call fast float @atanhf(float %vecext.1) 1246 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1247 %vecext.2 = extractelement <4 x float> %0, i32 2 1248 %3 = tail call fast float @atanhf(float %vecext.2) 1249 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1250 %vecext.3 = extractelement <4 x float> %0, i32 3 1251 %4 = tail call fast float @atanhf(float %vecext.3) 1252 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1253 ret <4 x float> %vecins.3 1254} 1255 1256; Accelerate *does not* provide sin() for <2 x float>. 1257define <2 x float> @sin_2x(ptr %a) { 1258; CHECK-LABEL: @sin_2x( 1259; CHECK-NEXT: entry: 1260; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A:%.*]], align 16 1261; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0 1262; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) #[[ATTR2:[0-9]+]] 1263; CHECK-NEXT: [[VECINS:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0 1264; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1 1265; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) #[[ATTR2]] 1266; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <2 x float> [[VECINS]], float [[TMP2]], i32 1 1267; CHECK-NEXT: ret <2 x float> [[VECINS_1]] 1268; 1269; NOACCELERATE-LABEL: @sin_2x( 1270; NOACCELERATE-NEXT: entry: 1271; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A:%.*]], align 16 1272; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0 1273; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) 1274; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0 1275; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1 1276; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) 1277; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <2 x float> [[VECINS]], float [[TMP2]], i32 1 1278; NOACCELERATE-NEXT: ret <2 x float> [[VECINS_1]] 1279; 1280entry: 1281 %0 = load <2 x float>, ptr %a, align 16 1282 %vecext = extractelement <2 x float> %0, i32 0 1283 %1 = tail call fast float @llvm.sin.f32(float %vecext) 1284 %vecins = insertelement <2 x float> undef, float %1, i32 0 1285 %vecext.1 = extractelement <2 x float> %0, i32 1 1286 %2 = tail call fast float @llvm.sin.f32(float %vecext.1) 1287 %vecins.1 = insertelement <2 x float> %vecins, float %2, i32 1 1288 ret <2 x float> %vecins.1 1289} 1290 1291 1292declare float @llvm.cos.f32(float) 1293 1294; Accelerate provides cos() for <4 x float> 1295define <4 x float> @int_cos_4x(ptr %a) { 1296; CHECK-LABEL: @int_cos_4x( 1297; CHECK-NEXT: entry: 1298; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 1299; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vcosf(<4 x float> [[TMP0]]) 1300; CHECK-NEXT: ret <4 x float> [[TMP1]] 1301; 1302; NOACCELERATE-LABEL: @int_cos_4x( 1303; NOACCELERATE-NEXT: entry: 1304; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 1305; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1306; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]]) 1307; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1308; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1309; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) 1310; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1311; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 1312; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]]) 1313; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 1314; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1315; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 1316; 1317entry: 1318 %0 = load <4 x float>, ptr %a, align 16 1319 %vecext = extractelement <4 x float> %0, i32 0 1320 %1 = tail call fast float @llvm.cos.f32(float %vecext) 1321 %vecins = insertelement <4 x float> undef, float %1, i32 0 1322 %vecext.1 = extractelement <4 x float> %0, i32 1 1323 %2 = tail call fast float @llvm.cos.f32(float %vecext.1) 1324 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1325 %vecext.2 = extractelement <4 x float> %0, i32 2 1326 %3 = tail call fast float @llvm.cos.f32(float %vecext.2) 1327 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1328 %vecext.3 = extractelement <4 x float> %0, i32 3 1329 %4 = tail call fast float @llvm.cos.f32(float %vecext.3) 1330 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1331 ret <4 x float> %vecins.3 1332} 1333 1334; Accelerate *does not* provide cos() for <2 x float>. 1335define <2 x float> @cos_2x(ptr %a) { 1336; CHECK-LABEL: @cos_2x( 1337; CHECK-NEXT: entry: 1338; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A:%.*]], align 16 1339; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0 1340; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]]) #[[ATTR3:[0-9]+]] 1341; CHECK-NEXT: [[VECINS:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0 1342; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1 1343; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) #[[ATTR3]] 1344; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <2 x float> [[VECINS]], float [[TMP2]], i32 1 1345; CHECK-NEXT: ret <2 x float> [[VECINS_1]] 1346; 1347; NOACCELERATE-LABEL: @cos_2x( 1348; NOACCELERATE-NEXT: entry: 1349; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A:%.*]], align 16 1350; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0 1351; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]]) 1352; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0 1353; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1 1354; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) 1355; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <2 x float> [[VECINS]], float [[TMP2]], i32 1 1356; NOACCELERATE-NEXT: ret <2 x float> [[VECINS_1]] 1357; 1358entry: 1359 %0 = load <2 x float>, ptr %a, align 16 1360 %vecext = extractelement <2 x float> %0, i32 0 1361 %1 = tail call fast float @llvm.cos.f32(float %vecext) 1362 %vecins = insertelement <2 x float> undef, float %1, i32 0 1363 %vecext.1 = extractelement <2 x float> %0, i32 1 1364 %2 = tail call fast float @llvm.cos.f32(float %vecext.1) 1365 %vecins.1 = insertelement <2 x float> %vecins, float %2, i32 1 1366 ret <2 x float> %vecins.1 1367} 1368