1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 2; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \ 3; RUN: -riscv-v-vector-bits-min=-1 -riscv-v-slp-max-vf=0 \ 4; RUN: | FileCheck %s 5; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \ 6; RUN: | FileCheck %s --check-prefix=DEFAULT 7 8declare float @fabsf(float) readonly nounwind willreturn 9 10define <4 x float> @fabs_4x(ptr %a) { 11; CHECK-LABEL: define <4 x float> @fabs_4x 12; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { 13; CHECK-NEXT: entry: 14; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 15; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 16; CHECK-NEXT: ret <4 x float> [[TMP1]] 17; 18; DEFAULT-LABEL: define <4 x float> @fabs_4x 19; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { 20; DEFAULT-NEXT: entry: 21; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 22; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 23; DEFAULT-NEXT: ret <4 x float> [[TMP1]] 24; 25entry: 26 %0 = load <4 x float>, ptr %a, align 16 27 %vecext = extractelement <4 x float> %0, i32 0 28 %1 = tail call fast float @fabsf(float %vecext) 29 %vecins = insertelement <4 x float> undef, float %1, i32 0 30 %vecext.1 = extractelement <4 x float> %0, i32 1 31 %2 = tail call fast float @fabsf(float %vecext.1) 32 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 33 %vecext.2 = extractelement <4 x float> %0, i32 2 34 %3 = tail call fast float @fabsf(float %vecext.2) 35 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 36 %vecext.3 = extractelement <4 x float> %0, i32 3 37 %4 = tail call fast float @fabsf(float %vecext.3) 38 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 39 ret <4 x float> %vecins.3 40} 41 42declare float @llvm.fabs.f32(float) 43 44define <4 x float> @int_fabs_4x(ptr %a) { 45; CHECK-LABEL: define <4 x float> @int_fabs_4x 46; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 47; CHECK-NEXT: entry: 48; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 49; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 50; CHECK-NEXT: ret <4 x float> [[TMP1]] 51; 52; DEFAULT-LABEL: define <4 x float> @int_fabs_4x 53; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 54; DEFAULT-NEXT: entry: 55; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 56; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 57; DEFAULT-NEXT: ret <4 x float> [[TMP1]] 58; 59entry: 60 %0 = load <4 x float>, ptr %a, align 16 61 %vecext = extractelement <4 x float> %0, i32 0 62 %1 = tail call fast float @llvm.fabs.f32(float %vecext) 63 %vecins = insertelement <4 x float> undef, float %1, i32 0 64 %vecext.1 = extractelement <4 x float> %0, i32 1 65 %2 = tail call fast float @llvm.fabs.f32(float %vecext.1) 66 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 67 %vecext.2 = extractelement <4 x float> %0, i32 2 68 %3 = tail call fast float @llvm.fabs.f32(float %vecext.2) 69 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 70 %vecext.3 = extractelement <4 x float> %0, i32 3 71 %4 = tail call fast float @llvm.fabs.f32(float %vecext.3) 72 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 73 ret <4 x float> %vecins.3 74} 75 76declare float @sqrtf(float) readonly nounwind willreturn 77 78define <4 x float> @sqrt_4x(ptr %a) { 79; CHECK-LABEL: define <4 x float> @sqrt_4x 80; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 81; CHECK-NEXT: entry: 82; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 83; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) 84; CHECK-NEXT: ret <4 x float> [[TMP1]] 85; 86; DEFAULT-LABEL: define <4 x float> @sqrt_4x 87; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 88; DEFAULT-NEXT: entry: 89; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 90; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) 91; DEFAULT-NEXT: ret <4 x float> [[TMP1]] 92; 93entry: 94 %0 = load <4 x float>, ptr %a, align 16 95 %vecext = extractelement <4 x float> %0, i32 0 96 %1 = tail call fast float @sqrtf(float %vecext) 97 %vecins = insertelement <4 x float> undef, float %1, i32 0 98 %vecext.1 = extractelement <4 x float> %0, i32 1 99 %2 = tail call fast float @sqrtf(float %vecext.1) 100 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 101 %vecext.2 = extractelement <4 x float> %0, i32 2 102 %3 = tail call fast float @sqrtf(float %vecext.2) 103 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 104 %vecext.3 = extractelement <4 x float> %0, i32 3 105 %4 = tail call fast float @sqrtf(float %vecext.3) 106 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 107 ret <4 x float> %vecins.3 108} 109 110declare float @llvm.sqrt.f32(float) 111 112define <4 x float> @int_sqrt_4x(ptr %a) { 113; CHECK-LABEL: define <4 x float> @int_sqrt_4x 114; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 115; CHECK-NEXT: entry: 116; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 117; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) 118; CHECK-NEXT: ret <4 x float> [[TMP1]] 119; 120; DEFAULT-LABEL: define <4 x float> @int_sqrt_4x 121; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 122; DEFAULT-NEXT: entry: 123; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 124; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) 125; DEFAULT-NEXT: ret <4 x float> [[TMP1]] 126; 127entry: 128 %0 = load <4 x float>, ptr %a, align 16 129 %vecext = extractelement <4 x float> %0, i32 0 130 %1 = tail call fast float @llvm.sqrt.f32(float %vecext) 131 %vecins = insertelement <4 x float> undef, float %1, i32 0 132 %vecext.1 = extractelement <4 x float> %0, i32 1 133 %2 = tail call fast float @llvm.sqrt.f32(float %vecext.1) 134 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 135 %vecext.2 = extractelement <4 x float> %0, i32 2 136 %3 = tail call fast float @llvm.sqrt.f32(float %vecext.2) 137 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 138 %vecext.3 = extractelement <4 x float> %0, i32 3 139 %4 = tail call fast float @llvm.sqrt.f32(float %vecext.3) 140 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 141 ret <4 x float> %vecins.3 142} 143 144declare float @expf(float) readonly nounwind willreturn 145 146; We can not vectorized exp since RISCV has no such instruction. 147define <4 x float> @exp_4x(ptr %a) { 148; CHECK-LABEL: define <4 x float> @exp_4x 149; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 150; CHECK-NEXT: entry: 151; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 152; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 153; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]]) 154; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 155; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 156; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]]) 157; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 158; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 159; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) 160; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 161; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 162; CHECK-NEXT: ret <4 x float> [[VECINS_31]] 163; 164; DEFAULT-LABEL: define <4 x float> @exp_4x 165; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 166; DEFAULT-NEXT: entry: 167; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 168; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 169; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]]) 170; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 171; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 172; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]]) 173; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 174; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 175; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) 176; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 177; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 178; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] 179; 180entry: 181 %0 = load <4 x float>, ptr %a, align 16 182 %vecext = extractelement <4 x float> %0, i32 0 183 %1 = tail call fast float @expf(float %vecext) 184 %vecins = insertelement <4 x float> undef, float %1, i32 0 185 %vecext.1 = extractelement <4 x float> %0, i32 1 186 %2 = tail call fast float @expf(float %vecext.1) 187 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 188 %vecext.2 = extractelement <4 x float> %0, i32 2 189 %3 = tail call fast float @expf(float %vecext.2) 190 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 191 %vecext.3 = extractelement <4 x float> %0, i32 3 192 %4 = tail call fast float @expf(float %vecext.3) 193 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 194 ret <4 x float> %vecins.3 195} 196 197declare float @llvm.exp.f32(float) 198 199; We can not vectorized exp since RISCV has no such instruction. 200define <4 x float> @int_exp_4x(ptr %a) { 201; CHECK-LABEL: define <4 x float> @int_exp_4x 202; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 203; CHECK-NEXT: entry: 204; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 205; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 206; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]]) 207; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 208; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 209; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]]) 210; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 211; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 212; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) 213; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 214; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 215; CHECK-NEXT: ret <4 x float> [[VECINS_31]] 216; 217; DEFAULT-LABEL: define <4 x float> @int_exp_4x 218; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 219; DEFAULT-NEXT: entry: 220; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 221; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 222; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]]) 223; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 224; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 225; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]]) 226; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 227; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 228; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) 229; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 230; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 231; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] 232; 233entry: 234 %0 = load <4 x float>, ptr %a, align 16 235 %vecext = extractelement <4 x float> %0, i32 0 236 %1 = tail call fast float @llvm.exp.f32(float %vecext) 237 %vecins = insertelement <4 x float> undef, float %1, i32 0 238 %vecext.1 = extractelement <4 x float> %0, i32 1 239 %2 = tail call fast float @llvm.exp.f32(float %vecext.1) 240 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 241 %vecext.2 = extractelement <4 x float> %0, i32 2 242 %3 = tail call fast float @llvm.exp.f32(float %vecext.2) 243 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 244 %vecext.3 = extractelement <4 x float> %0, i32 3 245 %4 = tail call fast float @llvm.exp.f32(float %vecext.3) 246 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 247 ret <4 x float> %vecins.3 248} 249 250declare float @logf(float) readonly nounwind willreturn 251 252; We can not vectorized log since RISCV has no such instruction. 253define <4 x float> @log_4x(ptr %a) { 254; CHECK-LABEL: define <4 x float> @log_4x 255; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 256; CHECK-NEXT: entry: 257; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 258; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 259; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]]) 260; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 261; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 262; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]]) 263; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 264; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 265; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) 266; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 267; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 268; CHECK-NEXT: ret <4 x float> [[VECINS_31]] 269; 270; DEFAULT-LABEL: define <4 x float> @log_4x 271; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 272; DEFAULT-NEXT: entry: 273; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 274; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 275; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]]) 276; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 277; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 278; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]]) 279; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 280; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 281; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) 282; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 283; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 284; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] 285; 286entry: 287 %0 = load <4 x float>, ptr %a, align 16 288 %vecext = extractelement <4 x float> %0, i32 0 289 %1 = tail call fast float @logf(float %vecext) 290 %vecins = insertelement <4 x float> undef, float %1, i32 0 291 %vecext.1 = extractelement <4 x float> %0, i32 1 292 %2 = tail call fast float @logf(float %vecext.1) 293 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 294 %vecext.2 = extractelement <4 x float> %0, i32 2 295 %3 = tail call fast float @logf(float %vecext.2) 296 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 297 %vecext.3 = extractelement <4 x float> %0, i32 3 298 %4 = tail call fast float @logf(float %vecext.3) 299 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 300 ret <4 x float> %vecins.3 301} 302 303declare float @llvm.log.f32(float) 304 305; We can not vectorized log since RISCV has no such instruction. 306define <4 x float> @int_log_4x(ptr %a) { 307; CHECK-LABEL: define <4 x float> @int_log_4x 308; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 309; CHECK-NEXT: entry: 310; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 311; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 312; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]]) 313; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 314; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 315; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]]) 316; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 317; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 318; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) 319; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 320; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 321; CHECK-NEXT: ret <4 x float> [[VECINS_31]] 322; 323; DEFAULT-LABEL: define <4 x float> @int_log_4x 324; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 325; DEFAULT-NEXT: entry: 326; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 327; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 328; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]]) 329; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 330; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 331; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]]) 332; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 333; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 334; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) 335; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 336; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 337; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] 338; 339entry: 340 %0 = load <4 x float>, ptr %a, align 16 341 %vecext = extractelement <4 x float> %0, i32 0 342 %1 = tail call fast float @llvm.log.f32(float %vecext) 343 %vecins = insertelement <4 x float> undef, float %1, i32 0 344 %vecext.1 = extractelement <4 x float> %0, i32 1 345 %2 = tail call fast float @llvm.log.f32(float %vecext.1) 346 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 347 %vecext.2 = extractelement <4 x float> %0, i32 2 348 %3 = tail call fast float @llvm.log.f32(float %vecext.2) 349 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 350 %vecext.3 = extractelement <4 x float> %0, i32 3 351 %4 = tail call fast float @llvm.log.f32(float %vecext.3) 352 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 353 ret <4 x float> %vecins.3 354} 355 356declare float @sinf(float) readonly nounwind willreturn 357 358; We can not vectorized sin since RISCV has no such instruction. 359define <4 x float> @sin_4x(ptr %a) { 360; CHECK-LABEL: define <4 x float> @sin_4x 361; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 362; CHECK-NEXT: entry: 363; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 364; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 365; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]]) 366; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 367; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 368; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]]) 369; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 370; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 371; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) 372; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 373; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 374; CHECK-NEXT: ret <4 x float> [[VECINS_31]] 375; 376; DEFAULT-LABEL: define <4 x float> @sin_4x 377; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 378; DEFAULT-NEXT: entry: 379; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 380; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 381; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]]) 382; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 383; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 384; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]]) 385; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 386; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 387; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) 388; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 389; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 390; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] 391; 392entry: 393 %0 = load <4 x float>, ptr %a, align 16 394 %vecext = extractelement <4 x float> %0, i32 0 395 %1 = tail call fast float @sinf(float %vecext) 396 %vecins = insertelement <4 x float> undef, float %1, i32 0 397 %vecext.1 = extractelement <4 x float> %0, i32 1 398 %2 = tail call fast float @sinf(float %vecext.1) 399 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 400 %vecext.2 = extractelement <4 x float> %0, i32 2 401 %3 = tail call fast float @sinf(float %vecext.2) 402 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 403 %vecext.3 = extractelement <4 x float> %0, i32 3 404 %4 = tail call fast float @sinf(float %vecext.3) 405 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 406 ret <4 x float> %vecins.3 407} 408 409declare float @llvm.sin.f32(float) 410 411; We can not vectorized sin since RISCV has no such instruction. 412define <4 x float> @int_sin_4x(ptr %a) { 413; CHECK-LABEL: define <4 x float> @int_sin_4x 414; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 415; CHECK-NEXT: entry: 416; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 417; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 418; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) 419; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 420; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 421; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) 422; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 423; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 424; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) 425; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 426; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 427; CHECK-NEXT: ret <4 x float> [[VECINS_31]] 428; 429; DEFAULT-LABEL: define <4 x float> @int_sin_4x 430; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 431; DEFAULT-NEXT: entry: 432; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 433; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 434; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) 435; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 436; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 437; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) 438; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 439; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 440; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) 441; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 442; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 443; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] 444; 445entry: 446 %0 = load <4 x float>, ptr %a, align 16 447 %vecext = extractelement <4 x float> %0, i32 0 448 %1 = tail call fast float @llvm.sin.f32(float %vecext) 449 %vecins = insertelement <4 x float> undef, float %1, i32 0 450 %vecext.1 = extractelement <4 x float> %0, i32 1 451 %2 = tail call fast float @llvm.sin.f32(float %vecext.1) 452 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 453 %vecext.2 = extractelement <4 x float> %0, i32 2 454 %3 = tail call fast float @llvm.sin.f32(float %vecext.2) 455 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 456 %vecext.3 = extractelement <4 x float> %0, i32 3 457 %4 = tail call fast float @llvm.sin.f32(float %vecext.3) 458 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 459 ret <4 x float> %vecins.3 460} 461 462declare float @asinf(float) readonly nounwind willreturn 463 464; We can not vectorized asin since RISCV has no such instruction. 465define <4 x float> @asin_4x(ptr %a) { 466; CHECK-LABEL: define <4 x float> @asin_4x 467; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 468; CHECK-NEXT: entry: 469; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 470; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 471; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]]) 472; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 473; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 474; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]]) 475; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 476; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 477; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]]) 478; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 479; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 480; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]]) 481; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 482; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 483; 484; DEFAULT-LABEL: define <4 x float> @asin_4x 485; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 486; DEFAULT-NEXT: entry: 487; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 488; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 489; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]]) 490; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 491; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 492; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]]) 493; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 494; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 495; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]]) 496; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 497; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 498; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]]) 499; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 500; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 501; 502entry: 503 %0 = load <4 x float>, ptr %a, align 16 504 %vecext = extractelement <4 x float> %0, i32 0 505 %1 = tail call fast float @asinf(float %vecext) 506 %vecins = insertelement <4 x float> undef, float %1, i32 0 507 %vecext.1 = extractelement <4 x float> %0, i32 1 508 %2 = tail call fast float @asinf(float %vecext.1) 509 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 510 %vecext.2 = extractelement <4 x float> %0, i32 2 511 %3 = tail call fast float @asinf(float %vecext.2) 512 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 513 %vecext.3 = extractelement <4 x float> %0, i32 3 514 %4 = tail call fast float @asinf(float %vecext.3) 515 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 516 ret <4 x float> %vecins.3 517} 518 519declare float @llvm.asin.f32(float) 520 521; We can not vectorized asin since RISCV has no such instruction. 522define <4 x float> @int_asin_4x(ptr %a) { 523; CHECK-LABEL: define <4 x float> @int_asin_4x 524; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 525; CHECK-NEXT: entry: 526; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 527; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 528; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]]) 529; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 530; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 531; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]]) 532; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 533; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 534; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]]) 535; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 536; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 537; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]]) 538; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 539; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 540; 541; DEFAULT-LABEL: define <4 x float> @int_asin_4x 542; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 543; DEFAULT-NEXT: entry: 544; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 545; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 546; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]]) 547; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 548; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 549; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]]) 550; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 551; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 552; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]]) 553; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 554; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 555; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]]) 556; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 557; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 558; 559entry: 560 %0 = load <4 x float>, ptr %a, align 16 561 %vecext = extractelement <4 x float> %0, i32 0 562 %1 = tail call fast float @llvm.asin.f32(float %vecext) 563 %vecins = insertelement <4 x float> undef, float %1, i32 0 564 %vecext.1 = extractelement <4 x float> %0, i32 1 565 %2 = tail call fast float @llvm.asin.f32(float %vecext.1) 566 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 567 %vecext.2 = extractelement <4 x float> %0, i32 2 568 %3 = tail call fast float @llvm.asin.f32(float %vecext.2) 569 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 570 %vecext.3 = extractelement <4 x float> %0, i32 3 571 %4 = tail call fast float @llvm.asin.f32(float %vecext.3) 572 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 573 ret <4 x float> %vecins.3 574} 575 576declare float @coshf(float) readonly nounwind willreturn 577 578; We can not vectorized cosh since RISCV has no such instruction. 579define <4 x float> @cosh_4x(ptr %a) { 580; CHECK-LABEL: define <4 x float> @cosh_4x 581; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 582; CHECK-NEXT: entry: 583; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 584; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 585; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]]) 586; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 587; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 588; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]]) 589; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 590; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 591; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]]) 592; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 593; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 594; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]]) 595; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 596; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 597; 598; DEFAULT-LABEL: define <4 x float> @cosh_4x 599; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 600; DEFAULT-NEXT: entry: 601; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 602; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 603; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]]) 604; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 605; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 606; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]]) 607; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 608; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 609; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]]) 610; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 611; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 612; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]]) 613; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 614; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 615; 616entry: 617 %0 = load <4 x float>, ptr %a, align 16 618 %vecext = extractelement <4 x float> %0, i32 0 619 %1 = tail call fast float @coshf(float %vecext) 620 %vecins = insertelement <4 x float> undef, float %1, i32 0 621 %vecext.1 = extractelement <4 x float> %0, i32 1 622 %2 = tail call fast float @coshf(float %vecext.1) 623 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 624 %vecext.2 = extractelement <4 x float> %0, i32 2 625 %3 = tail call fast float @coshf(float %vecext.2) 626 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 627 %vecext.3 = extractelement <4 x float> %0, i32 3 628 %4 = tail call fast float @coshf(float %vecext.3) 629 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 630 ret <4 x float> %vecins.3 631} 632 633declare float @llvm.cosh.f32(float) 634 635; We can not vectorized cosh since RISCV has no such instruction. 636define <4 x float> @int_cosh_4x(ptr %a) { 637; CHECK-LABEL: define <4 x float> @int_cosh_4x 638; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 639; CHECK-NEXT: entry: 640; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 641; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 642; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]]) 643; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 644; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 645; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]]) 646; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 647; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 648; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]]) 649; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 650; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 651; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]]) 652; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 653; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 654; 655; DEFAULT-LABEL: define <4 x float> @int_cosh_4x 656; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 657; DEFAULT-NEXT: entry: 658; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 659; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 660; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]]) 661; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 662; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 663; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]]) 664; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 665; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 666; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]]) 667; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 668; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 669; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]]) 670; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 671; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 672; 673entry: 674 %0 = load <4 x float>, ptr %a, align 16 675 %vecext = extractelement <4 x float> %0, i32 0 676 %1 = tail call fast float @llvm.cosh.f32(float %vecext) 677 %vecins = insertelement <4 x float> undef, float %1, i32 0 678 %vecext.1 = extractelement <4 x float> %0, i32 1 679 %2 = tail call fast float @llvm.cosh.f32(float %vecext.1) 680 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 681 %vecext.2 = extractelement <4 x float> %0, i32 2 682 %3 = tail call fast float @llvm.cosh.f32(float %vecext.2) 683 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 684 %vecext.3 = extractelement <4 x float> %0, i32 3 685 %4 = tail call fast float @llvm.cosh.f32(float %vecext.3) 686 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 687 ret <4 x float> %vecins.3 688} 689 690declare float @atanhf(float) readonly nounwind willreturn 691 692; We can not vectorized atanh since RISCV has no such instruction. 693define <4 x float> @atanh_4x(ptr %a) { 694; CHECK-LABEL: define <4 x float> @atanh_4x 695; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 696; CHECK-NEXT: entry: 697; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 698; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 699; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]]) 700; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 701; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 702; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]]) 703; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 704; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 705; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]]) 706; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 707; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 708; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]]) 709; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 710; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 711; 712; DEFAULT-LABEL: define <4 x float> @atanh_4x 713; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 714; DEFAULT-NEXT: entry: 715; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 716; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 717; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]]) 718; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 719; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 720; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]]) 721; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 722; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 723; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]]) 724; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 725; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 726; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]]) 727; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 728; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 729; 730entry: 731 %0 = load <4 x float>, ptr %a, align 16 732 %vecext = extractelement <4 x float> %0, i32 0 733 %1 = tail call fast float @atanhf(float %vecext) 734 %vecins = insertelement <4 x float> undef, float %1, i32 0 735 %vecext.1 = extractelement <4 x float> %0, i32 1 736 %2 = tail call fast float @atanhf(float %vecext.1) 737 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 738 %vecext.2 = extractelement <4 x float> %0, i32 2 739 %3 = tail call fast float @atanhf(float %vecext.2) 740 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 741 %vecext.3 = extractelement <4 x float> %0, i32 3 742 %4 = tail call fast float @atanhf(float %vecext.3) 743 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 744 ret <4 x float> %vecins.3 745} 746 747declare float @llvm.atanh.f32(float) 748 749; We can not vectorized atanh since RISCV has no such instruction. 750define <4 x float> @int_atanh_4x(ptr %a) { 751; CHECK-LABEL: define <4 x float> @int_atanh_4x 752; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 753; CHECK-NEXT: entry: 754; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 755; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 756; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]]) 757; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 758; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 759; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]]) 760; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 761; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 762; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]]) 763; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 764; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 765; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]]) 766; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 767; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 768; 769; DEFAULT-LABEL: define <4 x float> @int_atanh_4x 770; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 771; DEFAULT-NEXT: entry: 772; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 773; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 774; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]]) 775; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 776; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 777; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]]) 778; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 779; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 780; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]]) 781; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 782; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 783; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]]) 784; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 785; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 786; 787entry: 788 %0 = load <4 x float>, ptr %a, align 16 789 %vecext = extractelement <4 x float> %0, i32 0 790 %1 = tail call fast float @llvm.atanh.f32(float %vecext) 791 %vecins = insertelement <4 x float> undef, float %1, i32 0 792 %vecext.1 = extractelement <4 x float> %0, i32 1 793 %2 = tail call fast float @llvm.atanh.f32(float %vecext.1) 794 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 795 %vecext.2 = extractelement <4 x float> %0, i32 2 796 %3 = tail call fast float @llvm.atanh.f32(float %vecext.2) 797 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 798 %vecext.3 = extractelement <4 x float> %0, i32 3 799 %4 = tail call fast float @llvm.atanh.f32(float %vecext.3) 800 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 801 ret <4 x float> %vecins.3 802} 803