1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 2; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \ 3; RUN: -riscv-v-vector-bits-min=-1 -riscv-v-slp-max-vf=0 \ 4; RUN: | FileCheck %s 5; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \ 6; RUN: | FileCheck %s --check-prefix=DEFAULT 7 8declare float @fabsf(float) readonly nounwind willreturn 9 10define <4 x float> @fabs_4x(ptr %a) { 11; CHECK-LABEL: define <4 x float> @fabs_4x 12; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { 13; CHECK-NEXT: entry: 14; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 15; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 16; CHECK-NEXT: ret <4 x float> [[TMP1]] 17; 18; DEFAULT-LABEL: define <4 x float> @fabs_4x 19; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { 20; DEFAULT-NEXT: entry: 21; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 22; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 23; DEFAULT-NEXT: ret <4 x float> [[TMP1]] 24; 25entry: 26 %0 = load <4 x float>, ptr %a, align 16 27 %vecext = extractelement <4 x float> %0, i32 0 28 %1 = tail call fast float @fabsf(float %vecext) 29 %vecins = insertelement <4 x float> undef, float %1, i32 0 30 %vecext.1 = extractelement <4 x float> %0, i32 1 31 %2 = tail call fast float @fabsf(float %vecext.1) 32 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 33 %vecext.2 = extractelement <4 x float> %0, i32 2 34 %3 = tail call fast float @fabsf(float %vecext.2) 35 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 36 %vecext.3 = extractelement <4 x float> %0, i32 3 37 %4 = tail call fast float @fabsf(float %vecext.3) 38 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 39 ret <4 x float> %vecins.3 40} 41 42declare float @llvm.fabs.f32(float) 43 44define <4 x float> @int_fabs_4x(ptr %a) { 45; CHECK-LABEL: define <4 x float> @int_fabs_4x 46; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 47; CHECK-NEXT: entry: 48; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 49; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 50; CHECK-NEXT: ret <4 x float> [[TMP1]] 51; 52; DEFAULT-LABEL: define <4 x float> @int_fabs_4x 53; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 54; DEFAULT-NEXT: entry: 55; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 56; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 57; DEFAULT-NEXT: ret <4 x float> [[TMP1]] 58; 59entry: 60 %0 = load <4 x float>, ptr %a, align 16 61 %vecext = extractelement <4 x float> %0, i32 0 62 %1 = tail call fast float @llvm.fabs.f32(float %vecext) 63 %vecins = insertelement <4 x float> undef, float %1, i32 0 64 %vecext.1 = extractelement <4 x float> %0, i32 1 65 %2 = tail call fast float @llvm.fabs.f32(float %vecext.1) 66 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 67 %vecext.2 = extractelement <4 x float> %0, i32 2 68 %3 = tail call fast float @llvm.fabs.f32(float %vecext.2) 69 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 70 %vecext.3 = extractelement <4 x float> %0, i32 3 71 %4 = tail call fast float @llvm.fabs.f32(float %vecext.3) 72 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 73 ret <4 x float> %vecins.3 74} 75 76declare float @sqrtf(float) readonly nounwind willreturn 77 78define <4 x float> @sqrt_4x(ptr %a) { 79; CHECK-LABEL: define <4 x float> @sqrt_4x 80; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 81; CHECK-NEXT: entry: 82; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 83; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) 84; CHECK-NEXT: ret <4 x float> [[TMP1]] 85; 86; DEFAULT-LABEL: define <4 x float> @sqrt_4x 87; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 88; DEFAULT-NEXT: entry: 89; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 90; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) 91; DEFAULT-NEXT: ret <4 x float> [[TMP1]] 92; 93entry: 94 %0 = load <4 x float>, ptr %a, align 16 95 %vecext = extractelement <4 x float> %0, i32 0 96 %1 = tail call fast float @sqrtf(float %vecext) 97 %vecins = insertelement <4 x float> undef, float %1, i32 0 98 %vecext.1 = extractelement <4 x float> %0, i32 1 99 %2 = tail call fast float @sqrtf(float %vecext.1) 100 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 101 %vecext.2 = extractelement <4 x float> %0, i32 2 102 %3 = tail call fast float @sqrtf(float %vecext.2) 103 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 104 %vecext.3 = extractelement <4 x float> %0, i32 3 105 %4 = tail call fast float @sqrtf(float %vecext.3) 106 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 107 ret <4 x float> %vecins.3 108} 109 110declare float @llvm.sqrt.f32(float) 111 112define <4 x float> @int_sqrt_4x(ptr %a) { 113; CHECK-LABEL: define <4 x float> @int_sqrt_4x 114; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 115; CHECK-NEXT: entry: 116; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 117; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) 118; CHECK-NEXT: ret <4 x float> [[TMP1]] 119; 120; DEFAULT-LABEL: define <4 x float> @int_sqrt_4x 121; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 122; DEFAULT-NEXT: entry: 123; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 124; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) 125; DEFAULT-NEXT: ret <4 x float> [[TMP1]] 126; 127entry: 128 %0 = load <4 x float>, ptr %a, align 16 129 %vecext = extractelement <4 x float> %0, i32 0 130 %1 = tail call fast float @llvm.sqrt.f32(float %vecext) 131 %vecins = insertelement <4 x float> undef, float %1, i32 0 132 %vecext.1 = extractelement <4 x float> %0, i32 1 133 %2 = tail call fast float @llvm.sqrt.f32(float %vecext.1) 134 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 135 %vecext.2 = extractelement <4 x float> %0, i32 2 136 %3 = tail call fast float @llvm.sqrt.f32(float %vecext.2) 137 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 138 %vecext.3 = extractelement <4 x float> %0, i32 3 139 %4 = tail call fast float @llvm.sqrt.f32(float %vecext.3) 140 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 141 ret <4 x float> %vecins.3 142} 143 144declare float @expf(float) readonly nounwind willreturn 145 146; We can not vectorized exp since RISCV has no such instruction. 147define <4 x float> @exp_4x(ptr %a) { 148; CHECK-LABEL: define <4 x float> @exp_4x 149; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 150; CHECK-NEXT: entry: 151; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 152; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 153; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]]) 154; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 155; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 156; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]]) 157; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 158; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 159; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]]) 160; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 161; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 162; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]]) 163; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 164; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 165; 166; DEFAULT-LABEL: define <4 x float> @exp_4x 167; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 168; DEFAULT-NEXT: entry: 169; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 170; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 171; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]]) 172; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 173; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 174; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]]) 175; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 176; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 177; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]]) 178; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 179; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 180; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]]) 181; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 182; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 183; 184entry: 185 %0 = load <4 x float>, ptr %a, align 16 186 %vecext = extractelement <4 x float> %0, i32 0 187 %1 = tail call fast float @expf(float %vecext) 188 %vecins = insertelement <4 x float> undef, float %1, i32 0 189 %vecext.1 = extractelement <4 x float> %0, i32 1 190 %2 = tail call fast float @expf(float %vecext.1) 191 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 192 %vecext.2 = extractelement <4 x float> %0, i32 2 193 %3 = tail call fast float @expf(float %vecext.2) 194 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 195 %vecext.3 = extractelement <4 x float> %0, i32 3 196 %4 = tail call fast float @expf(float %vecext.3) 197 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 198 ret <4 x float> %vecins.3 199} 200 201declare float @llvm.exp.f32(float) 202 203; We can not vectorized exp since RISCV has no such instruction. 204define <4 x float> @int_exp_4x(ptr %a) { 205; CHECK-LABEL: define <4 x float> @int_exp_4x 206; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 207; CHECK-NEXT: entry: 208; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 209; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 210; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]]) 211; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 212; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 213; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]]) 214; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 215; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 216; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]]) 217; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 218; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 219; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]]) 220; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 221; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 222; 223; DEFAULT-LABEL: define <4 x float> @int_exp_4x 224; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 225; DEFAULT-NEXT: entry: 226; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 227; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 228; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]]) 229; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 230; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 231; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]]) 232; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 233; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 234; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]]) 235; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 236; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 237; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]]) 238; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 239; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 240; 241entry: 242 %0 = load <4 x float>, ptr %a, align 16 243 %vecext = extractelement <4 x float> %0, i32 0 244 %1 = tail call fast float @llvm.exp.f32(float %vecext) 245 %vecins = insertelement <4 x float> undef, float %1, i32 0 246 %vecext.1 = extractelement <4 x float> %0, i32 1 247 %2 = tail call fast float @llvm.exp.f32(float %vecext.1) 248 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 249 %vecext.2 = extractelement <4 x float> %0, i32 2 250 %3 = tail call fast float @llvm.exp.f32(float %vecext.2) 251 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 252 %vecext.3 = extractelement <4 x float> %0, i32 3 253 %4 = tail call fast float @llvm.exp.f32(float %vecext.3) 254 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 255 ret <4 x float> %vecins.3 256} 257 258declare float @logf(float) readonly nounwind willreturn 259 260; We can not vectorized log since RISCV has no such instruction. 261define <4 x float> @log_4x(ptr %a) { 262; CHECK-LABEL: define <4 x float> @log_4x 263; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 264; CHECK-NEXT: entry: 265; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 266; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 267; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]]) 268; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 269; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 270; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]]) 271; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 272; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 273; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]]) 274; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 275; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 276; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]]) 277; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 278; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 279; 280; DEFAULT-LABEL: define <4 x float> @log_4x 281; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 282; DEFAULT-NEXT: entry: 283; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 284; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 285; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]]) 286; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 287; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 288; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]]) 289; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 290; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 291; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]]) 292; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 293; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 294; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]]) 295; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 296; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 297; 298entry: 299 %0 = load <4 x float>, ptr %a, align 16 300 %vecext = extractelement <4 x float> %0, i32 0 301 %1 = tail call fast float @logf(float %vecext) 302 %vecins = insertelement <4 x float> undef, float %1, i32 0 303 %vecext.1 = extractelement <4 x float> %0, i32 1 304 %2 = tail call fast float @logf(float %vecext.1) 305 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 306 %vecext.2 = extractelement <4 x float> %0, i32 2 307 %3 = tail call fast float @logf(float %vecext.2) 308 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 309 %vecext.3 = extractelement <4 x float> %0, i32 3 310 %4 = tail call fast float @logf(float %vecext.3) 311 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 312 ret <4 x float> %vecins.3 313} 314 315declare float @llvm.log.f32(float) 316 317; We can not vectorized log since RISCV has no such instruction. 318define <4 x float> @int_log_4x(ptr %a) { 319; CHECK-LABEL: define <4 x float> @int_log_4x 320; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 321; CHECK-NEXT: entry: 322; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 323; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 324; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]]) 325; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 326; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 327; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]]) 328; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 329; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 330; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]]) 331; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 332; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 333; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]]) 334; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 335; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 336; 337; DEFAULT-LABEL: define <4 x float> @int_log_4x 338; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 339; DEFAULT-NEXT: entry: 340; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 341; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 342; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]]) 343; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 344; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 345; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]]) 346; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 347; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 348; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]]) 349; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 350; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 351; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]]) 352; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 353; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 354; 355entry: 356 %0 = load <4 x float>, ptr %a, align 16 357 %vecext = extractelement <4 x float> %0, i32 0 358 %1 = tail call fast float @llvm.log.f32(float %vecext) 359 %vecins = insertelement <4 x float> undef, float %1, i32 0 360 %vecext.1 = extractelement <4 x float> %0, i32 1 361 %2 = tail call fast float @llvm.log.f32(float %vecext.1) 362 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 363 %vecext.2 = extractelement <4 x float> %0, i32 2 364 %3 = tail call fast float @llvm.log.f32(float %vecext.2) 365 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 366 %vecext.3 = extractelement <4 x float> %0, i32 3 367 %4 = tail call fast float @llvm.log.f32(float %vecext.3) 368 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 369 ret <4 x float> %vecins.3 370} 371 372declare float @sinf(float) readonly nounwind willreturn 373 374; We can not vectorized sin since RISCV has no such instruction. 375define <4 x float> @sin_4x(ptr %a) { 376; CHECK-LABEL: define <4 x float> @sin_4x 377; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 378; CHECK-NEXT: entry: 379; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 380; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 381; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]]) 382; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 383; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 384; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]]) 385; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 386; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 387; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]]) 388; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 389; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 390; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]]) 391; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 392; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 393; 394; DEFAULT-LABEL: define <4 x float> @sin_4x 395; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 396; DEFAULT-NEXT: entry: 397; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 398; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 399; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]]) 400; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 401; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 402; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]]) 403; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 404; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 405; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]]) 406; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 407; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 408; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]]) 409; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 410; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 411; 412entry: 413 %0 = load <4 x float>, ptr %a, align 16 414 %vecext = extractelement <4 x float> %0, i32 0 415 %1 = tail call fast float @sinf(float %vecext) 416 %vecins = insertelement <4 x float> undef, float %1, i32 0 417 %vecext.1 = extractelement <4 x float> %0, i32 1 418 %2 = tail call fast float @sinf(float %vecext.1) 419 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 420 %vecext.2 = extractelement <4 x float> %0, i32 2 421 %3 = tail call fast float @sinf(float %vecext.2) 422 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 423 %vecext.3 = extractelement <4 x float> %0, i32 3 424 %4 = tail call fast float @sinf(float %vecext.3) 425 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 426 ret <4 x float> %vecins.3 427} 428 429declare float @llvm.sin.f32(float) 430 431; We can not vectorized sin since RISCV has no such instruction. 432define <4 x float> @int_sin_4x(ptr %a) { 433; CHECK-LABEL: define <4 x float> @int_sin_4x 434; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 435; CHECK-NEXT: entry: 436; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 437; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 438; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) 439; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 440; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 441; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) 442; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 443; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 444; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]]) 445; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 446; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 447; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]]) 448; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 449; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 450; 451; DEFAULT-LABEL: define <4 x float> @int_sin_4x 452; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 453; DEFAULT-NEXT: entry: 454; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 455; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 456; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) 457; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 458; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 459; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) 460; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 461; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 462; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]]) 463; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 464; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 465; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]]) 466; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 467; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 468; 469entry: 470 %0 = load <4 x float>, ptr %a, align 16 471 %vecext = extractelement <4 x float> %0, i32 0 472 %1 = tail call fast float @llvm.sin.f32(float %vecext) 473 %vecins = insertelement <4 x float> undef, float %1, i32 0 474 %vecext.1 = extractelement <4 x float> %0, i32 1 475 %2 = tail call fast float @llvm.sin.f32(float %vecext.1) 476 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 477 %vecext.2 = extractelement <4 x float> %0, i32 2 478 %3 = tail call fast float @llvm.sin.f32(float %vecext.2) 479 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 480 %vecext.3 = extractelement <4 x float> %0, i32 3 481 %4 = tail call fast float @llvm.sin.f32(float %vecext.3) 482 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 483 ret <4 x float> %vecins.3 484} 485 486declare float @asinf(float) readonly nounwind willreturn 487 488; We can not vectorized asin since RISCV has no such instruction. 489define <4 x float> @asin_4x(ptr %a) { 490; CHECK-LABEL: define <4 x float> @asin_4x 491; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 492; CHECK-NEXT: entry: 493; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 494; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 495; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]]) 496; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 497; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 498; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]]) 499; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 500; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 501; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]]) 502; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 503; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 504; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]]) 505; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 506; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 507; 508; DEFAULT-LABEL: define <4 x float> @asin_4x 509; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 510; DEFAULT-NEXT: entry: 511; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 512; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 513; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]]) 514; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 515; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 516; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]]) 517; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 518; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 519; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]]) 520; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 521; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 522; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]]) 523; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 524; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 525; 526entry: 527 %0 = load <4 x float>, ptr %a, align 16 528 %vecext = extractelement <4 x float> %0, i32 0 529 %1 = tail call fast float @asinf(float %vecext) 530 %vecins = insertelement <4 x float> undef, float %1, i32 0 531 %vecext.1 = extractelement <4 x float> %0, i32 1 532 %2 = tail call fast float @asinf(float %vecext.1) 533 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 534 %vecext.2 = extractelement <4 x float> %0, i32 2 535 %3 = tail call fast float @asinf(float %vecext.2) 536 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 537 %vecext.3 = extractelement <4 x float> %0, i32 3 538 %4 = tail call fast float @asinf(float %vecext.3) 539 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 540 ret <4 x float> %vecins.3 541} 542 543declare float @llvm.asin.f32(float) 544 545; We can not vectorized asin since RISCV has no such instruction. 546define <4 x float> @int_asin_4x(ptr %a) { 547; CHECK-LABEL: define <4 x float> @int_asin_4x 548; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 549; CHECK-NEXT: entry: 550; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 551; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 552; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]]) 553; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 554; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 555; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]]) 556; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 557; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 558; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]]) 559; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 560; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 561; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]]) 562; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 563; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 564; 565; DEFAULT-LABEL: define <4 x float> @int_asin_4x 566; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 567; DEFAULT-NEXT: entry: 568; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 569; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 570; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]]) 571; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 572; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 573; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]]) 574; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 575; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 576; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]]) 577; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 578; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 579; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]]) 580; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 581; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 582; 583entry: 584 %0 = load <4 x float>, ptr %a, align 16 585 %vecext = extractelement <4 x float> %0, i32 0 586 %1 = tail call fast float @llvm.asin.f32(float %vecext) 587 %vecins = insertelement <4 x float> undef, float %1, i32 0 588 %vecext.1 = extractelement <4 x float> %0, i32 1 589 %2 = tail call fast float @llvm.asin.f32(float %vecext.1) 590 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 591 %vecext.2 = extractelement <4 x float> %0, i32 2 592 %3 = tail call fast float @llvm.asin.f32(float %vecext.2) 593 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 594 %vecext.3 = extractelement <4 x float> %0, i32 3 595 %4 = tail call fast float @llvm.asin.f32(float %vecext.3) 596 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 597 ret <4 x float> %vecins.3 598} 599 600declare float @cosf(float) readonly nounwind willreturn 601 602; We can not vectorized cos cosce RISCV has no such instruction. 603define <4 x float> @cos_4x(ptr %a) { 604; CHECK-LABEL: define <4 x float> @cos_4x 605; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 606; CHECK-NEXT: entry: 607; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 608; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 609; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]]) 610; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 611; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 612; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]]) 613; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 614; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 615; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]]) 616; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 617; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 618; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]]) 619; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 620; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 621; 622; DEFAULT-LABEL: define <4 x float> @cos_4x 623; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 624; DEFAULT-NEXT: entry: 625; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 626; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 627; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]]) 628; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 629; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 630; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]]) 631; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 632; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 633; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]]) 634; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 635; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 636; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]]) 637; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 638; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 639; 640entry: 641 %0 = load <4 x float>, ptr %a, align 16 642 %vecext = extractelement <4 x float> %0, i32 0 643 %1 = tail call fast float @cosf(float %vecext) 644 %vecins = insertelement <4 x float> undef, float %1, i32 0 645 %vecext.1 = extractelement <4 x float> %0, i32 1 646 %2 = tail call fast float @cosf(float %vecext.1) 647 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 648 %vecext.2 = extractelement <4 x float> %0, i32 2 649 %3 = tail call fast float @cosf(float %vecext.2) 650 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 651 %vecext.3 = extractelement <4 x float> %0, i32 3 652 %4 = tail call fast float @cosf(float %vecext.3) 653 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 654 ret <4 x float> %vecins.3 655} 656 657declare float @llvm.cos.f32(float) 658 659; We can not vectorized cos cosce RISCV has no such instruction. 660define <4 x float> @int_cos_4x(ptr %a) { 661; CHECK-LABEL: define <4 x float> @int_cos_4x 662; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 663; CHECK-NEXT: entry: 664; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 665; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 666; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]]) 667; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 668; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 669; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) 670; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 671; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 672; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]]) 673; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 674; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 675; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]]) 676; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 677; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 678; 679; DEFAULT-LABEL: define <4 x float> @int_cos_4x 680; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 681; DEFAULT-NEXT: entry: 682; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 683; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 684; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]]) 685; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 686; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 687; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) 688; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 689; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 690; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]]) 691; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 692; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 693; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]]) 694; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 695; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 696; 697entry: 698 %0 = load <4 x float>, ptr %a, align 16 699 %vecext = extractelement <4 x float> %0, i32 0 700 %1 = tail call fast float @llvm.cos.f32(float %vecext) 701 %vecins = insertelement <4 x float> undef, float %1, i32 0 702 %vecext.1 = extractelement <4 x float> %0, i32 1 703 %2 = tail call fast float @llvm.cos.f32(float %vecext.1) 704 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 705 %vecext.2 = extractelement <4 x float> %0, i32 2 706 %3 = tail call fast float @llvm.cos.f32(float %vecext.2) 707 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 708 %vecext.3 = extractelement <4 x float> %0, i32 3 709 %4 = tail call fast float @llvm.cos.f32(float %vecext.3) 710 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 711 ret <4 x float> %vecins.3 712} 713 714declare float @acosf(float) readonly nounwind willreturn 715 716; We can not vectorized acos cosce RISCV has no such instruction. 717define <4 x float> @acos_4x(ptr %a) { 718; CHECK-LABEL: define <4 x float> @acos_4x 719; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 720; CHECK-NEXT: entry: 721; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 722; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 723; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]]) 724; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 725; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 726; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]]) 727; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 728; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 729; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]]) 730; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 731; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 732; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]]) 733; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 734; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 735; 736; DEFAULT-LABEL: define <4 x float> @acos_4x 737; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 738; DEFAULT-NEXT: entry: 739; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 740; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 741; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]]) 742; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 743; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 744; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]]) 745; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 746; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 747; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]]) 748; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 749; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 750; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]]) 751; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 752; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 753; 754entry: 755 %0 = load <4 x float>, ptr %a, align 16 756 %vecext = extractelement <4 x float> %0, i32 0 757 %1 = tail call fast float @acosf(float %vecext) 758 %vecins = insertelement <4 x float> undef, float %1, i32 0 759 %vecext.1 = extractelement <4 x float> %0, i32 1 760 %2 = tail call fast float @acosf(float %vecext.1) 761 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 762 %vecext.2 = extractelement <4 x float> %0, i32 2 763 %3 = tail call fast float @acosf(float %vecext.2) 764 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 765 %vecext.3 = extractelement <4 x float> %0, i32 3 766 %4 = tail call fast float @acosf(float %vecext.3) 767 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 768 ret <4 x float> %vecins.3 769} 770 771declare float @llvm.acos.f32(float) 772 773; We can not vectorized acos cosce RISCV has no such instruction. 774define <4 x float> @int_acos_4x(ptr %a) { 775; CHECK-LABEL: define <4 x float> @int_acos_4x 776; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 777; CHECK-NEXT: entry: 778; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 779; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 780; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]]) 781; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 782; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 783; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]]) 784; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 785; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 786; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]]) 787; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 788; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 789; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]]) 790; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 791; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 792; 793; DEFAULT-LABEL: define <4 x float> @int_acos_4x 794; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 795; DEFAULT-NEXT: entry: 796; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 797; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 798; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]]) 799; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 800; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 801; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]]) 802; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 803; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 804; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]]) 805; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 806; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 807; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]]) 808; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 809; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 810; 811entry: 812 %0 = load <4 x float>, ptr %a, align 16 813 %vecext = extractelement <4 x float> %0, i32 0 814 %1 = tail call fast float @llvm.acos.f32(float %vecext) 815 %vecins = insertelement <4 x float> undef, float %1, i32 0 816 %vecext.1 = extractelement <4 x float> %0, i32 1 817 %2 = tail call fast float @llvm.acos.f32(float %vecext.1) 818 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 819 %vecext.2 = extractelement <4 x float> %0, i32 2 820 %3 = tail call fast float @llvm.acos.f32(float %vecext.2) 821 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 822 %vecext.3 = extractelement <4 x float> %0, i32 3 823 %4 = tail call fast float @llvm.acos.f32(float %vecext.3) 824 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 825 ret <4 x float> %vecins.3 826} 827 828declare float @tanf(float) readonly nounwind willreturn 829 830; We can not vectorized tan tance RISCV has no such instruction. 831define <4 x float> @tan_4x(ptr %a) { 832; CHECK-LABEL: define <4 x float> @tan_4x 833; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 834; CHECK-NEXT: entry: 835; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 836; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 837; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]]) 838; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 839; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 840; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]]) 841; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 842; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 843; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]]) 844; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 845; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 846; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]]) 847; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 848; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 849; 850; DEFAULT-LABEL: define <4 x float> @tan_4x 851; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 852; DEFAULT-NEXT: entry: 853; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 854; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 855; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]]) 856; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 857; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 858; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]]) 859; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 860; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 861; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]]) 862; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 863; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 864; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]]) 865; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 866; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 867; 868entry: 869 %0 = load <4 x float>, ptr %a, align 16 870 %vecext = extractelement <4 x float> %0, i32 0 871 %1 = tail call fast float @tanf(float %vecext) 872 %vecins = insertelement <4 x float> undef, float %1, i32 0 873 %vecext.1 = extractelement <4 x float> %0, i32 1 874 %2 = tail call fast float @tanf(float %vecext.1) 875 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 876 %vecext.2 = extractelement <4 x float> %0, i32 2 877 %3 = tail call fast float @tanf(float %vecext.2) 878 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 879 %vecext.3 = extractelement <4 x float> %0, i32 3 880 %4 = tail call fast float @tanf(float %vecext.3) 881 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 882 ret <4 x float> %vecins.3 883} 884 885declare float @llvm.tan.f32(float) 886 887; We can not vectorized tan tance RISCV has no such instruction. 888define <4 x float> @int_tan_4x(ptr %a) { 889; CHECK-LABEL: define <4 x float> @int_tan_4x 890; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 891; CHECK-NEXT: entry: 892; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 893; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 894; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT]]) 895; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 896; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 897; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_1]]) 898; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 899; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 900; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_2]]) 901; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 902; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 903; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_3]]) 904; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 905; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 906; 907; DEFAULT-LABEL: define <4 x float> @int_tan_4x 908; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 909; DEFAULT-NEXT: entry: 910; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 911; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 912; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT]]) 913; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 914; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 915; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_1]]) 916; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 917; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 918; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_2]]) 919; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 920; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 921; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_3]]) 922; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 923; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 924; 925entry: 926 %0 = load <4 x float>, ptr %a, align 16 927 %vecext = extractelement <4 x float> %0, i32 0 928 %1 = tail call fast float @llvm.tan.f32(float %vecext) 929 %vecins = insertelement <4 x float> undef, float %1, i32 0 930 %vecext.1 = extractelement <4 x float> %0, i32 1 931 %2 = tail call fast float @llvm.tan.f32(float %vecext.1) 932 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 933 %vecext.2 = extractelement <4 x float> %0, i32 2 934 %3 = tail call fast float @llvm.tan.f32(float %vecext.2) 935 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 936 %vecext.3 = extractelement <4 x float> %0, i32 3 937 %4 = tail call fast float @llvm.tan.f32(float %vecext.3) 938 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 939 ret <4 x float> %vecins.3 940} 941 942declare float @atanf(float) readonly nounwind willreturn 943 944; We can not vectorized atan tance RISCV has no such instruction. 945define <4 x float> @atan_4x(ptr %a) { 946; CHECK-LABEL: define <4 x float> @atan_4x 947; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 948; CHECK-NEXT: entry: 949; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 950; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 951; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]]) 952; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 953; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 954; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]]) 955; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 956; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 957; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]]) 958; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 959; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 960; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]]) 961; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 962; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 963; 964; DEFAULT-LABEL: define <4 x float> @atan_4x 965; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 966; DEFAULT-NEXT: entry: 967; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 968; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 969; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]]) 970; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 971; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 972; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]]) 973; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 974; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 975; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]]) 976; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 977; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 978; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]]) 979; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 980; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 981; 982entry: 983 %0 = load <4 x float>, ptr %a, align 16 984 %vecext = extractelement <4 x float> %0, i32 0 985 %1 = tail call fast float @atanf(float %vecext) 986 %vecins = insertelement <4 x float> undef, float %1, i32 0 987 %vecext.1 = extractelement <4 x float> %0, i32 1 988 %2 = tail call fast float @atanf(float %vecext.1) 989 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 990 %vecext.2 = extractelement <4 x float> %0, i32 2 991 %3 = tail call fast float @atanf(float %vecext.2) 992 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 993 %vecext.3 = extractelement <4 x float> %0, i32 3 994 %4 = tail call fast float @atanf(float %vecext.3) 995 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 996 ret <4 x float> %vecins.3 997} 998 999declare float @llvm.atan.f32(float) 1000 1001; We can not vectorized atan tance RISCV has no such instruction. 1002define <4 x float> @int_atan_4x(ptr %a) { 1003; CHECK-LABEL: define <4 x float> @int_atan_4x 1004; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1005; CHECK-NEXT: entry: 1006; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1007; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1008; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]]) 1009; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1010; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1011; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]]) 1012; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1013; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1014; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]]) 1015; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1016; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1017; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]]) 1018; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1019; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1020; 1021; DEFAULT-LABEL: define <4 x float> @int_atan_4x 1022; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1023; DEFAULT-NEXT: entry: 1024; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1025; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1026; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]]) 1027; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1028; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1029; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]]) 1030; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1031; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1032; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]]) 1033; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1034; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1035; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]]) 1036; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1037; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1038; 1039entry: 1040 %0 = load <4 x float>, ptr %a, align 16 1041 %vecext = extractelement <4 x float> %0, i32 0 1042 %1 = tail call fast float @llvm.atan.f32(float %vecext) 1043 %vecins = insertelement <4 x float> undef, float %1, i32 0 1044 %vecext.1 = extractelement <4 x float> %0, i32 1 1045 %2 = tail call fast float @llvm.atan.f32(float %vecext.1) 1046 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1047 %vecext.2 = extractelement <4 x float> %0, i32 2 1048 %3 = tail call fast float @llvm.atan.f32(float %vecext.2) 1049 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1050 %vecext.3 = extractelement <4 x float> %0, i32 3 1051 %4 = tail call fast float @llvm.atan.f32(float %vecext.3) 1052 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1053 ret <4 x float> %vecins.3 1054} 1055 1056declare float @sinhf(float) readonly nounwind willreturn 1057 1058; We can not vectorized sinh since RISCV has no such instruction. 1059define <4 x float> @sinh_4x(ptr %a) { 1060; CHECK-LABEL: define <4 x float> @sinh_4x 1061; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1062; CHECK-NEXT: entry: 1063; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1064; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1065; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]]) 1066; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1067; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1068; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]]) 1069; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1070; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1071; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]]) 1072; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1073; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1074; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]]) 1075; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1076; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1077; 1078; DEFAULT-LABEL: define <4 x float> @sinh_4x 1079; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1080; DEFAULT-NEXT: entry: 1081; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1082; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1083; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]]) 1084; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1085; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1086; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]]) 1087; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1088; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1089; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]]) 1090; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1091; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1092; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]]) 1093; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1094; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1095; 1096entry: 1097 %0 = load <4 x float>, ptr %a, align 16 1098 %vecext = extractelement <4 x float> %0, i32 0 1099 %1 = tail call fast float @sinhf(float %vecext) 1100 %vecins = insertelement <4 x float> undef, float %1, i32 0 1101 %vecext.1 = extractelement <4 x float> %0, i32 1 1102 %2 = tail call fast float @sinhf(float %vecext.1) 1103 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1104 %vecext.2 = extractelement <4 x float> %0, i32 2 1105 %3 = tail call fast float @sinhf(float %vecext.2) 1106 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1107 %vecext.3 = extractelement <4 x float> %0, i32 3 1108 %4 = tail call fast float @sinhf(float %vecext.3) 1109 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1110 ret <4 x float> %vecins.3 1111} 1112 1113declare float @llvm.sinh.f32(float) 1114 1115; We can not vectorized sinh since RISCV has no such instruction. 1116define <4 x float> @int_sinh_4x(ptr %a) { 1117; CHECK-LABEL: define <4 x float> @int_sinh_4x 1118; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1119; CHECK-NEXT: entry: 1120; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1121; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1122; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]]) 1123; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1124; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1125; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]]) 1126; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1127; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1128; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]]) 1129; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1130; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1131; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]]) 1132; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1133; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1134; 1135; DEFAULT-LABEL: define <4 x float> @int_sinh_4x 1136; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1137; DEFAULT-NEXT: entry: 1138; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1139; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1140; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]]) 1141; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1142; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1143; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]]) 1144; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1145; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1146; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]]) 1147; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1148; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1149; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]]) 1150; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1151; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1152; 1153entry: 1154 %0 = load <4 x float>, ptr %a, align 16 1155 %vecext = extractelement <4 x float> %0, i32 0 1156 %1 = tail call fast float @llvm.sinh.f32(float %vecext) 1157 %vecins = insertelement <4 x float> undef, float %1, i32 0 1158 %vecext.1 = extractelement <4 x float> %0, i32 1 1159 %2 = tail call fast float @llvm.sinh.f32(float %vecext.1) 1160 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1161 %vecext.2 = extractelement <4 x float> %0, i32 2 1162 %3 = tail call fast float @llvm.sinh.f32(float %vecext.2) 1163 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1164 %vecext.3 = extractelement <4 x float> %0, i32 3 1165 %4 = tail call fast float @llvm.sinh.f32(float %vecext.3) 1166 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1167 ret <4 x float> %vecins.3 1168} 1169 1170declare float @asinhf(float) readonly nounwind willreturn 1171 1172; We can not vectorized asinh since RISCV has no such instruction. 1173define <4 x float> @asinh_4x(ptr %a) { 1174; CHECK-LABEL: define <4 x float> @asinh_4x 1175; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1176; CHECK-NEXT: entry: 1177; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1178; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1179; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]]) 1180; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1181; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1182; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]]) 1183; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1184; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1185; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]]) 1186; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1187; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1188; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]]) 1189; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1190; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1191; 1192; DEFAULT-LABEL: define <4 x float> @asinh_4x 1193; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1194; DEFAULT-NEXT: entry: 1195; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1196; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1197; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]]) 1198; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1199; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1200; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]]) 1201; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1202; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1203; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]]) 1204; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1205; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1206; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]]) 1207; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1208; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1209; 1210entry: 1211 %0 = load <4 x float>, ptr %a, align 16 1212 %vecext = extractelement <4 x float> %0, i32 0 1213 %1 = tail call fast float @asinhf(float %vecext) 1214 %vecins = insertelement <4 x float> undef, float %1, i32 0 1215 %vecext.1 = extractelement <4 x float> %0, i32 1 1216 %2 = tail call fast float @asinhf(float %vecext.1) 1217 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1218 %vecext.2 = extractelement <4 x float> %0, i32 2 1219 %3 = tail call fast float @asinhf(float %vecext.2) 1220 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1221 %vecext.3 = extractelement <4 x float> %0, i32 3 1222 %4 = tail call fast float @asinhf(float %vecext.3) 1223 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1224 ret <4 x float> %vecins.3 1225} 1226 1227declare float @llvm.asinh.f32(float) 1228 1229; We can not vectorized asinh since RISCV has no such instruction. 1230define <4 x float> @int_asinh_4x(ptr %a) { 1231; CHECK-LABEL: define <4 x float> @int_asinh_4x 1232; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1233; CHECK-NEXT: entry: 1234; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1235; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1236; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT]]) 1237; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1238; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1239; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_1]]) 1240; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1241; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1242; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_2]]) 1243; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1244; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1245; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_3]]) 1246; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1247; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1248; 1249; DEFAULT-LABEL: define <4 x float> @int_asinh_4x 1250; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1251; DEFAULT-NEXT: entry: 1252; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1253; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1254; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT]]) 1255; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1256; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1257; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_1]]) 1258; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1259; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1260; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_2]]) 1261; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1262; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1263; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_3]]) 1264; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1265; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1266; 1267entry: 1268 %0 = load <4 x float>, ptr %a, align 16 1269 %vecext = extractelement <4 x float> %0, i32 0 1270 %1 = tail call fast float @llvm.asinh.f32(float %vecext) 1271 %vecins = insertelement <4 x float> undef, float %1, i32 0 1272 %vecext.1 = extractelement <4 x float> %0, i32 1 1273 %2 = tail call fast float @llvm.asinh.f32(float %vecext.1) 1274 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1275 %vecext.2 = extractelement <4 x float> %0, i32 2 1276 %3 = tail call fast float @llvm.asinh.f32(float %vecext.2) 1277 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1278 %vecext.3 = extractelement <4 x float> %0, i32 3 1279 %4 = tail call fast float @llvm.asinh.f32(float %vecext.3) 1280 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1281 ret <4 x float> %vecins.3 1282} 1283 1284declare float @coshf(float) readonly nounwind willreturn 1285 1286; We can not vectorized cosh since RISCV has no such instruction. 1287define <4 x float> @cosh_4x(ptr %a) { 1288; CHECK-LABEL: define <4 x float> @cosh_4x 1289; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1290; CHECK-NEXT: entry: 1291; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1292; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1293; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]]) 1294; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1295; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1296; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]]) 1297; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1298; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1299; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]]) 1300; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1301; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1302; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]]) 1303; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1304; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1305; 1306; DEFAULT-LABEL: define <4 x float> @cosh_4x 1307; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1308; DEFAULT-NEXT: entry: 1309; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1310; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1311; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]]) 1312; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1313; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1314; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]]) 1315; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1316; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1317; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]]) 1318; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1319; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1320; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]]) 1321; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1322; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1323; 1324entry: 1325 %0 = load <4 x float>, ptr %a, align 16 1326 %vecext = extractelement <4 x float> %0, i32 0 1327 %1 = tail call fast float @coshf(float %vecext) 1328 %vecins = insertelement <4 x float> undef, float %1, i32 0 1329 %vecext.1 = extractelement <4 x float> %0, i32 1 1330 %2 = tail call fast float @coshf(float %vecext.1) 1331 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1332 %vecext.2 = extractelement <4 x float> %0, i32 2 1333 %3 = tail call fast float @coshf(float %vecext.2) 1334 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1335 %vecext.3 = extractelement <4 x float> %0, i32 3 1336 %4 = tail call fast float @coshf(float %vecext.3) 1337 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1338 ret <4 x float> %vecins.3 1339} 1340 1341declare float @llvm.cosh.f32(float) 1342 1343; We can not vectorized cosh since RISCV has no such instruction. 1344define <4 x float> @int_cosh_4x(ptr %a) { 1345; CHECK-LABEL: define <4 x float> @int_cosh_4x 1346; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1347; CHECK-NEXT: entry: 1348; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1349; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1350; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]]) 1351; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1352; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1353; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]]) 1354; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1355; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1356; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]]) 1357; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1358; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1359; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]]) 1360; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1361; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1362; 1363; DEFAULT-LABEL: define <4 x float> @int_cosh_4x 1364; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1365; DEFAULT-NEXT: entry: 1366; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1367; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1368; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]]) 1369; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1370; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1371; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]]) 1372; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1373; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1374; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]]) 1375; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1376; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1377; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]]) 1378; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1379; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1380; 1381entry: 1382 %0 = load <4 x float>, ptr %a, align 16 1383 %vecext = extractelement <4 x float> %0, i32 0 1384 %1 = tail call fast float @llvm.cosh.f32(float %vecext) 1385 %vecins = insertelement <4 x float> undef, float %1, i32 0 1386 %vecext.1 = extractelement <4 x float> %0, i32 1 1387 %2 = tail call fast float @llvm.cosh.f32(float %vecext.1) 1388 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1389 %vecext.2 = extractelement <4 x float> %0, i32 2 1390 %3 = tail call fast float @llvm.cosh.f32(float %vecext.2) 1391 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1392 %vecext.3 = extractelement <4 x float> %0, i32 3 1393 %4 = tail call fast float @llvm.cosh.f32(float %vecext.3) 1394 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1395 ret <4 x float> %vecins.3 1396} 1397 1398declare float @acoshf(float) readonly nounwind willreturn 1399 1400; We can not vectorized acosh since RISCV has no such instruction. 1401define <4 x float> @acosh_4x(ptr %a) { 1402; CHECK-LABEL: define <4 x float> @acosh_4x 1403; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1404; CHECK-NEXT: entry: 1405; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1406; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1407; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]]) 1408; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1409; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1410; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]]) 1411; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1412; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1413; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]]) 1414; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1415; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1416; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]]) 1417; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1418; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1419; 1420; DEFAULT-LABEL: define <4 x float> @acosh_4x 1421; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1422; DEFAULT-NEXT: entry: 1423; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1424; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1425; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]]) 1426; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1427; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1428; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]]) 1429; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1430; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1431; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]]) 1432; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1433; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1434; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]]) 1435; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1436; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1437; 1438entry: 1439 %0 = load <4 x float>, ptr %a, align 16 1440 %vecext = extractelement <4 x float> %0, i32 0 1441 %1 = tail call fast float @acoshf(float %vecext) 1442 %vecins = insertelement <4 x float> undef, float %1, i32 0 1443 %vecext.1 = extractelement <4 x float> %0, i32 1 1444 %2 = tail call fast float @acoshf(float %vecext.1) 1445 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1446 %vecext.2 = extractelement <4 x float> %0, i32 2 1447 %3 = tail call fast float @acoshf(float %vecext.2) 1448 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1449 %vecext.3 = extractelement <4 x float> %0, i32 3 1450 %4 = tail call fast float @acoshf(float %vecext.3) 1451 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1452 ret <4 x float> %vecins.3 1453} 1454 1455declare float @llvm.acosh.f32(float) 1456 1457; We can not vectorized acosh since RISCV has no such instruction. 1458define <4 x float> @int_acosh_4x(ptr %a) { 1459; CHECK-LABEL: define <4 x float> @int_acosh_4x 1460; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1461; CHECK-NEXT: entry: 1462; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1463; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1464; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT]]) 1465; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1466; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1467; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_1]]) 1468; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1469; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1470; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_2]]) 1471; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1472; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1473; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_3]]) 1474; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1475; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1476; 1477; DEFAULT-LABEL: define <4 x float> @int_acosh_4x 1478; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1479; DEFAULT-NEXT: entry: 1480; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1481; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1482; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT]]) 1483; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1484; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1485; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_1]]) 1486; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1487; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1488; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_2]]) 1489; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1490; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1491; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_3]]) 1492; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1493; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1494; 1495entry: 1496 %0 = load <4 x float>, ptr %a, align 16 1497 %vecext = extractelement <4 x float> %0, i32 0 1498 %1 = tail call fast float @llvm.acosh.f32(float %vecext) 1499 %vecins = insertelement <4 x float> undef, float %1, i32 0 1500 %vecext.1 = extractelement <4 x float> %0, i32 1 1501 %2 = tail call fast float @llvm.acosh.f32(float %vecext.1) 1502 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1503 %vecext.2 = extractelement <4 x float> %0, i32 2 1504 %3 = tail call fast float @llvm.acosh.f32(float %vecext.2) 1505 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1506 %vecext.3 = extractelement <4 x float> %0, i32 3 1507 %4 = tail call fast float @llvm.acosh.f32(float %vecext.3) 1508 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1509 ret <4 x float> %vecins.3 1510} 1511 1512declare float @tanhf(float) readonly nounwind willreturn 1513 1514; We can not vectorized tanh since RISCV has no such instruction. 1515define <4 x float> @tanh_4x(ptr %a) { 1516; CHECK-LABEL: define <4 x float> @tanh_4x 1517; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1518; CHECK-NEXT: entry: 1519; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1520; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1521; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]]) 1522; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1523; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1524; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]]) 1525; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1526; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1527; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]]) 1528; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1529; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1530; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]]) 1531; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1532; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1533; 1534; DEFAULT-LABEL: define <4 x float> @tanh_4x 1535; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1536; DEFAULT-NEXT: entry: 1537; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1538; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1539; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]]) 1540; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1541; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1542; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]]) 1543; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1544; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1545; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]]) 1546; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1547; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1548; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]]) 1549; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1550; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1551; 1552entry: 1553 %0 = load <4 x float>, ptr %a, align 16 1554 %vecext = extractelement <4 x float> %0, i32 0 1555 %1 = tail call fast float @tanhf(float %vecext) 1556 %vecins = insertelement <4 x float> undef, float %1, i32 0 1557 %vecext.1 = extractelement <4 x float> %0, i32 1 1558 %2 = tail call fast float @tanhf(float %vecext.1) 1559 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1560 %vecext.2 = extractelement <4 x float> %0, i32 2 1561 %3 = tail call fast float @tanhf(float %vecext.2) 1562 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1563 %vecext.3 = extractelement <4 x float> %0, i32 3 1564 %4 = tail call fast float @tanhf(float %vecext.3) 1565 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1566 ret <4 x float> %vecins.3 1567} 1568 1569declare float @llvm.tanh.f32(float) 1570 1571; We can not vectorized tanh since RISCV has no such instruction. 1572define <4 x float> @int_tanh_4x(ptr %a) { 1573; CHECK-LABEL: define <4 x float> @int_tanh_4x 1574; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1575; CHECK-NEXT: entry: 1576; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1577; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1578; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]]) 1579; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1580; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1581; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]]) 1582; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1583; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1584; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]]) 1585; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1586; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1587; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]]) 1588; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1589; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1590; 1591; DEFAULT-LABEL: define <4 x float> @int_tanh_4x 1592; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1593; DEFAULT-NEXT: entry: 1594; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1595; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1596; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]]) 1597; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1598; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1599; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]]) 1600; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1601; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1602; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]]) 1603; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1604; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1605; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]]) 1606; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1607; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1608; 1609entry: 1610 %0 = load <4 x float>, ptr %a, align 16 1611 %vecext = extractelement <4 x float> %0, i32 0 1612 %1 = tail call fast float @llvm.tanh.f32(float %vecext) 1613 %vecins = insertelement <4 x float> undef, float %1, i32 0 1614 %vecext.1 = extractelement <4 x float> %0, i32 1 1615 %2 = tail call fast float @llvm.tanh.f32(float %vecext.1) 1616 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1617 %vecext.2 = extractelement <4 x float> %0, i32 2 1618 %3 = tail call fast float @llvm.tanh.f32(float %vecext.2) 1619 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1620 %vecext.3 = extractelement <4 x float> %0, i32 3 1621 %4 = tail call fast float @llvm.tanh.f32(float %vecext.3) 1622 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1623 ret <4 x float> %vecins.3 1624} 1625 1626declare float @atanhf(float) readonly nounwind willreturn 1627 1628; We can not vectorized atanh since RISCV has no such instruction. 1629define <4 x float> @atanh_4x(ptr %a) { 1630; CHECK-LABEL: define <4 x float> @atanh_4x 1631; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1632; CHECK-NEXT: entry: 1633; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1634; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1635; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]]) 1636; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1637; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1638; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]]) 1639; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1640; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1641; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]]) 1642; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1643; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1644; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]]) 1645; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1646; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1647; 1648; DEFAULT-LABEL: define <4 x float> @atanh_4x 1649; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1650; DEFAULT-NEXT: entry: 1651; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1652; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1653; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]]) 1654; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1655; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1656; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]]) 1657; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1658; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1659; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]]) 1660; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1661; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1662; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]]) 1663; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1664; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1665; 1666entry: 1667 %0 = load <4 x float>, ptr %a, align 16 1668 %vecext = extractelement <4 x float> %0, i32 0 1669 %1 = tail call fast float @atanhf(float %vecext) 1670 %vecins = insertelement <4 x float> undef, float %1, i32 0 1671 %vecext.1 = extractelement <4 x float> %0, i32 1 1672 %2 = tail call fast float @atanhf(float %vecext.1) 1673 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1674 %vecext.2 = extractelement <4 x float> %0, i32 2 1675 %3 = tail call fast float @atanhf(float %vecext.2) 1676 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1677 %vecext.3 = extractelement <4 x float> %0, i32 3 1678 %4 = tail call fast float @atanhf(float %vecext.3) 1679 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1680 ret <4 x float> %vecins.3 1681} 1682 1683declare float @llvm.atanh.f32(float) 1684 1685; We can not vectorized atanh since RISCV has no such instruction. 1686define <4 x float> @int_atanh_4x(ptr %a) { 1687; CHECK-LABEL: define <4 x float> @int_atanh_4x 1688; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1689; CHECK-NEXT: entry: 1690; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1691; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1692; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]]) 1693; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1694; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1695; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]]) 1696; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1697; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1698; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]]) 1699; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1700; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1701; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]]) 1702; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1703; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1704; 1705; DEFAULT-LABEL: define <4 x float> @int_atanh_4x 1706; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1707; DEFAULT-NEXT: entry: 1708; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1709; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1710; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]]) 1711; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1712; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1713; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]]) 1714; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1715; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1716; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]]) 1717; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1718; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1719; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]]) 1720; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1721; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1722; 1723entry: 1724 %0 = load <4 x float>, ptr %a, align 16 1725 %vecext = extractelement <4 x float> %0, i32 0 1726 %1 = tail call fast float @llvm.atanh.f32(float %vecext) 1727 %vecins = insertelement <4 x float> undef, float %1, i32 0 1728 %vecext.1 = extractelement <4 x float> %0, i32 1 1729 %2 = tail call fast float @llvm.atanh.f32(float %vecext.1) 1730 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1731 %vecext.2 = extractelement <4 x float> %0, i32 2 1732 %3 = tail call fast float @llvm.atanh.f32(float %vecext.2) 1733 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1734 %vecext.3 = extractelement <4 x float> %0, i32 3 1735 %4 = tail call fast float @llvm.atanh.f32(float %vecext.3) 1736 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1737 ret <4 x float> %vecins.3 1738} 1739