1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 2; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \ 3; RUN: -riscv-v-vector-bits-min=-1 -riscv-v-slp-max-vf=0 \ 4; RUN: | FileCheck %s 5; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \ 6; RUN: | FileCheck %s --check-prefix=DEFAULT 7 8declare float @fabsf(float) readonly nounwind willreturn 9 10define <4 x float> @fabs_4x(ptr %a) { 11; CHECK-LABEL: define <4 x float> @fabs_4x 12; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { 13; CHECK-NEXT: entry: 14; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 15; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 16; CHECK-NEXT: ret <4 x float> [[TMP1]] 17; 18; DEFAULT-LABEL: define <4 x float> @fabs_4x 19; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { 20; DEFAULT-NEXT: entry: 21; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 22; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 23; DEFAULT-NEXT: ret <4 x float> [[TMP1]] 24; 25entry: 26 %0 = load <4 x float>, ptr %a, align 16 27 %vecext = extractelement <4 x float> %0, i32 0 28 %1 = tail call fast float @fabsf(float %vecext) 29 %vecins = insertelement <4 x float> undef, float %1, i32 0 30 %vecext.1 = extractelement <4 x float> %0, i32 1 31 %2 = tail call fast float @fabsf(float %vecext.1) 32 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 33 %vecext.2 = extractelement <4 x float> %0, i32 2 34 %3 = tail call fast float @fabsf(float %vecext.2) 35 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 36 %vecext.3 = extractelement <4 x float> %0, i32 3 37 %4 = tail call fast float @fabsf(float %vecext.3) 38 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 39 ret <4 x float> %vecins.3 40} 41 42declare float @llvm.fabs.f32(float) 43 44define <4 x float> @int_fabs_4x(ptr %a) { 45; CHECK-LABEL: define <4 x float> @int_fabs_4x 46; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 47; CHECK-NEXT: entry: 48; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 49; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 50; CHECK-NEXT: ret <4 x float> [[TMP1]] 51; 52; DEFAULT-LABEL: define <4 x float> @int_fabs_4x 53; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 54; DEFAULT-NEXT: entry: 55; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 56; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 57; DEFAULT-NEXT: ret <4 x float> [[TMP1]] 58; 59entry: 60 %0 = load <4 x float>, ptr %a, align 16 61 %vecext = extractelement <4 x float> %0, i32 0 62 %1 = tail call fast float @llvm.fabs.f32(float %vecext) 63 %vecins = insertelement <4 x float> undef, float %1, i32 0 64 %vecext.1 = extractelement <4 x float> %0, i32 1 65 %2 = tail call fast float @llvm.fabs.f32(float %vecext.1) 66 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 67 %vecext.2 = extractelement <4 x float> %0, i32 2 68 %3 = tail call fast float @llvm.fabs.f32(float %vecext.2) 69 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 70 %vecext.3 = extractelement <4 x float> %0, i32 3 71 %4 = tail call fast float @llvm.fabs.f32(float %vecext.3) 72 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 73 ret <4 x float> %vecins.3 74} 75 76declare float @sqrtf(float) readonly nounwind willreturn 77 78define <4 x float> @sqrt_4x(ptr %a) { 79; CHECK-LABEL: define <4 x float> @sqrt_4x 80; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 81; CHECK-NEXT: entry: 82; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 83; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) 84; CHECK-NEXT: ret <4 x float> [[TMP1]] 85; 86; DEFAULT-LABEL: define <4 x float> @sqrt_4x 87; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 88; DEFAULT-NEXT: entry: 89; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 90; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) 91; DEFAULT-NEXT: ret <4 x float> [[TMP1]] 92; 93entry: 94 %0 = load <4 x float>, ptr %a, align 16 95 %vecext = extractelement <4 x float> %0, i32 0 96 %1 = tail call fast float @sqrtf(float %vecext) 97 %vecins = insertelement <4 x float> undef, float %1, i32 0 98 %vecext.1 = extractelement <4 x float> %0, i32 1 99 %2 = tail call fast float @sqrtf(float %vecext.1) 100 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 101 %vecext.2 = extractelement <4 x float> %0, i32 2 102 %3 = tail call fast float @sqrtf(float %vecext.2) 103 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 104 %vecext.3 = extractelement <4 x float> %0, i32 3 105 %4 = tail call fast float @sqrtf(float %vecext.3) 106 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 107 ret <4 x float> %vecins.3 108} 109 110declare float @llvm.sqrt.f32(float) 111 112define <4 x float> @int_sqrt_4x(ptr %a) { 113; CHECK-LABEL: define <4 x float> @int_sqrt_4x 114; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 115; CHECK-NEXT: entry: 116; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 117; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) 118; CHECK-NEXT: ret <4 x float> [[TMP1]] 119; 120; DEFAULT-LABEL: define <4 x float> @int_sqrt_4x 121; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 122; DEFAULT-NEXT: entry: 123; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 124; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) 125; DEFAULT-NEXT: ret <4 x float> [[TMP1]] 126; 127entry: 128 %0 = load <4 x float>, ptr %a, align 16 129 %vecext = extractelement <4 x float> %0, i32 0 130 %1 = tail call fast float @llvm.sqrt.f32(float %vecext) 131 %vecins = insertelement <4 x float> undef, float %1, i32 0 132 %vecext.1 = extractelement <4 x float> %0, i32 1 133 %2 = tail call fast float @llvm.sqrt.f32(float %vecext.1) 134 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 135 %vecext.2 = extractelement <4 x float> %0, i32 2 136 %3 = tail call fast float @llvm.sqrt.f32(float %vecext.2) 137 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 138 %vecext.3 = extractelement <4 x float> %0, i32 3 139 %4 = tail call fast float @llvm.sqrt.f32(float %vecext.3) 140 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 141 ret <4 x float> %vecins.3 142} 143 144declare float @expf(float) readonly nounwind willreturn 145 146; We can not vectorized exp since RISCV has no such instruction. 147define <4 x float> @exp_4x(ptr %a) { 148; CHECK-LABEL: define <4 x float> @exp_4x 149; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 150; CHECK-NEXT: entry: 151; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 152; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 153; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]]) 154; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 155; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 156; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]]) 157; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 158; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 159; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]]) 160; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 161; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 162; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]]) 163; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 164; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 165; 166; DEFAULT-LABEL: define <4 x float> @exp_4x 167; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 168; DEFAULT-NEXT: entry: 169; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 170; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 171; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]]) 172; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 173; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 174; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]]) 175; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 176; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 177; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]]) 178; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 179; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 180; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]]) 181; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 182; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 183; 184entry: 185 %0 = load <4 x float>, ptr %a, align 16 186 %vecext = extractelement <4 x float> %0, i32 0 187 %1 = tail call fast float @expf(float %vecext) 188 %vecins = insertelement <4 x float> undef, float %1, i32 0 189 %vecext.1 = extractelement <4 x float> %0, i32 1 190 %2 = tail call fast float @expf(float %vecext.1) 191 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 192 %vecext.2 = extractelement <4 x float> %0, i32 2 193 %3 = tail call fast float @expf(float %vecext.2) 194 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 195 %vecext.3 = extractelement <4 x float> %0, i32 3 196 %4 = tail call fast float @expf(float %vecext.3) 197 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 198 ret <4 x float> %vecins.3 199} 200 201declare float @llvm.exp.f32(float) 202 203; We can not vectorized exp since RISCV has no such instruction. 204define <4 x float> @int_exp_4x(ptr %a) { 205; CHECK-LABEL: define <4 x float> @int_exp_4x 206; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 207; CHECK-NEXT: entry: 208; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 209; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 210; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]]) 211; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 212; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 213; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]]) 214; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 215; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 216; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]]) 217; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 218; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 219; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]]) 220; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 221; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 222; 223; DEFAULT-LABEL: define <4 x float> @int_exp_4x 224; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 225; DEFAULT-NEXT: entry: 226; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 227; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 228; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]]) 229; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 230; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 231; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]]) 232; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 233; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 234; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]]) 235; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 236; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 237; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]]) 238; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 239; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 240; 241entry: 242 %0 = load <4 x float>, ptr %a, align 16 243 %vecext = extractelement <4 x float> %0, i32 0 244 %1 = tail call fast float @llvm.exp.f32(float %vecext) 245 %vecins = insertelement <4 x float> undef, float %1, i32 0 246 %vecext.1 = extractelement <4 x float> %0, i32 1 247 %2 = tail call fast float @llvm.exp.f32(float %vecext.1) 248 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 249 %vecext.2 = extractelement <4 x float> %0, i32 2 250 %3 = tail call fast float @llvm.exp.f32(float %vecext.2) 251 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 252 %vecext.3 = extractelement <4 x float> %0, i32 3 253 %4 = tail call fast float @llvm.exp.f32(float %vecext.3) 254 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 255 ret <4 x float> %vecins.3 256} 257 258declare float @logf(float) readonly nounwind willreturn 259 260; We can not vectorized log since RISCV has no such instruction. 261define <4 x float> @log_4x(ptr %a) { 262; CHECK-LABEL: define <4 x float> @log_4x 263; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 264; CHECK-NEXT: entry: 265; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 266; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 267; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]]) 268; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 269; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 270; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]]) 271; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 272; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 273; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]]) 274; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 275; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 276; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]]) 277; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 278; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 279; 280; DEFAULT-LABEL: define <4 x float> @log_4x 281; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 282; DEFAULT-NEXT: entry: 283; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 284; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 285; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]]) 286; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 287; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 288; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]]) 289; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 290; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 291; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]]) 292; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 293; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 294; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]]) 295; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 296; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 297; 298entry: 299 %0 = load <4 x float>, ptr %a, align 16 300 %vecext = extractelement <4 x float> %0, i32 0 301 %1 = tail call fast float @logf(float %vecext) 302 %vecins = insertelement <4 x float> undef, float %1, i32 0 303 %vecext.1 = extractelement <4 x float> %0, i32 1 304 %2 = tail call fast float @logf(float %vecext.1) 305 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 306 %vecext.2 = extractelement <4 x float> %0, i32 2 307 %3 = tail call fast float @logf(float %vecext.2) 308 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 309 %vecext.3 = extractelement <4 x float> %0, i32 3 310 %4 = tail call fast float @logf(float %vecext.3) 311 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 312 ret <4 x float> %vecins.3 313} 314 315declare float @llvm.log.f32(float) 316 317; We can not vectorized log since RISCV has no such instruction. 318define <4 x float> @int_log_4x(ptr %a) { 319; CHECK-LABEL: define <4 x float> @int_log_4x 320; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 321; CHECK-NEXT: entry: 322; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 323; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 324; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]]) 325; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 326; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 327; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]]) 328; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 329; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 330; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]]) 331; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 332; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 333; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]]) 334; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 335; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 336; 337; DEFAULT-LABEL: define <4 x float> @int_log_4x 338; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 339; DEFAULT-NEXT: entry: 340; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 341; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 342; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]]) 343; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 344; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 345; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]]) 346; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 347; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 348; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]]) 349; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 350; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 351; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]]) 352; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 353; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 354; 355entry: 356 %0 = load <4 x float>, ptr %a, align 16 357 %vecext = extractelement <4 x float> %0, i32 0 358 %1 = tail call fast float @llvm.log.f32(float %vecext) 359 %vecins = insertelement <4 x float> undef, float %1, i32 0 360 %vecext.1 = extractelement <4 x float> %0, i32 1 361 %2 = tail call fast float @llvm.log.f32(float %vecext.1) 362 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 363 %vecext.2 = extractelement <4 x float> %0, i32 2 364 %3 = tail call fast float @llvm.log.f32(float %vecext.2) 365 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 366 %vecext.3 = extractelement <4 x float> %0, i32 3 367 %4 = tail call fast float @llvm.log.f32(float %vecext.3) 368 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 369 ret <4 x float> %vecins.3 370} 371 372declare float @sinf(float) readonly nounwind willreturn 373 374; We can not vectorized sin since RISCV has no such instruction. 375define <4 x float> @sin_4x(ptr %a) { 376; CHECK-LABEL: define <4 x float> @sin_4x 377; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 378; CHECK-NEXT: entry: 379; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 380; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 381; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]]) 382; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 383; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 384; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]]) 385; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 386; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 387; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]]) 388; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 389; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 390; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]]) 391; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 392; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 393; 394; DEFAULT-LABEL: define <4 x float> @sin_4x 395; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 396; DEFAULT-NEXT: entry: 397; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 398; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 399; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]]) 400; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 401; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 402; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]]) 403; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 404; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 405; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]]) 406; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 407; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 408; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]]) 409; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 410; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 411; 412entry: 413 %0 = load <4 x float>, ptr %a, align 16 414 %vecext = extractelement <4 x float> %0, i32 0 415 %1 = tail call fast float @sinf(float %vecext) 416 %vecins = insertelement <4 x float> undef, float %1, i32 0 417 %vecext.1 = extractelement <4 x float> %0, i32 1 418 %2 = tail call fast float @sinf(float %vecext.1) 419 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 420 %vecext.2 = extractelement <4 x float> %0, i32 2 421 %3 = tail call fast float @sinf(float %vecext.2) 422 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 423 %vecext.3 = extractelement <4 x float> %0, i32 3 424 %4 = tail call fast float @sinf(float %vecext.3) 425 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 426 ret <4 x float> %vecins.3 427} 428 429declare float @llvm.sin.f32(float) 430 431; We can not vectorized sin since RISCV has no such instruction. 432define <4 x float> @int_sin_4x(ptr %a) { 433; CHECK-LABEL: define <4 x float> @int_sin_4x 434; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 435; CHECK-NEXT: entry: 436; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 437; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 438; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) 439; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 440; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 441; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) 442; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 443; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 444; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]]) 445; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 446; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 447; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]]) 448; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 449; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 450; 451; DEFAULT-LABEL: define <4 x float> @int_sin_4x 452; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 453; DEFAULT-NEXT: entry: 454; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 455; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 456; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) 457; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 458; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 459; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) 460; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 461; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 462; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]]) 463; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 464; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 465; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]]) 466; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 467; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 468; 469entry: 470 %0 = load <4 x float>, ptr %a, align 16 471 %vecext = extractelement <4 x float> %0, i32 0 472 %1 = tail call fast float @llvm.sin.f32(float %vecext) 473 %vecins = insertelement <4 x float> undef, float %1, i32 0 474 %vecext.1 = extractelement <4 x float> %0, i32 1 475 %2 = tail call fast float @llvm.sin.f32(float %vecext.1) 476 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 477 %vecext.2 = extractelement <4 x float> %0, i32 2 478 %3 = tail call fast float @llvm.sin.f32(float %vecext.2) 479 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 480 %vecext.3 = extractelement <4 x float> %0, i32 3 481 %4 = tail call fast float @llvm.sin.f32(float %vecext.3) 482 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 483 ret <4 x float> %vecins.3 484} 485 486declare float @asinf(float) readonly nounwind willreturn 487 488; We can not vectorized asin since RISCV has no such instruction. 489define <4 x float> @asin_4x(ptr %a) { 490; CHECK-LABEL: define <4 x float> @asin_4x 491; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 492; CHECK-NEXT: entry: 493; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 494; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 495; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]]) 496; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 497; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 498; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]]) 499; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 500; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 501; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]]) 502; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 503; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 504; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]]) 505; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 506; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 507; 508; DEFAULT-LABEL: define <4 x float> @asin_4x 509; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 510; DEFAULT-NEXT: entry: 511; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 512; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 513; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]]) 514; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 515; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 516; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]]) 517; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 518; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 519; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]]) 520; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 521; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 522; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]]) 523; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 524; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 525; 526entry: 527 %0 = load <4 x float>, ptr %a, align 16 528 %vecext = extractelement <4 x float> %0, i32 0 529 %1 = tail call fast float @asinf(float %vecext) 530 %vecins = insertelement <4 x float> undef, float %1, i32 0 531 %vecext.1 = extractelement <4 x float> %0, i32 1 532 %2 = tail call fast float @asinf(float %vecext.1) 533 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 534 %vecext.2 = extractelement <4 x float> %0, i32 2 535 %3 = tail call fast float @asinf(float %vecext.2) 536 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 537 %vecext.3 = extractelement <4 x float> %0, i32 3 538 %4 = tail call fast float @asinf(float %vecext.3) 539 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 540 ret <4 x float> %vecins.3 541} 542 543declare float @llvm.asin.f32(float) 544 545; We can not vectorized asin since RISCV has no such instruction. 546define <4 x float> @int_asin_4x(ptr %a) { 547; CHECK-LABEL: define <4 x float> @int_asin_4x 548; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 549; CHECK-NEXT: entry: 550; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 551; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 552; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]]) 553; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 554; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 555; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]]) 556; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 557; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 558; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]]) 559; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 560; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 561; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]]) 562; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 563; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 564; 565; DEFAULT-LABEL: define <4 x float> @int_asin_4x 566; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 567; DEFAULT-NEXT: entry: 568; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 569; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 570; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]]) 571; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 572; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 573; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]]) 574; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 575; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 576; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]]) 577; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 578; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 579; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]]) 580; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 581; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 582; 583entry: 584 %0 = load <4 x float>, ptr %a, align 16 585 %vecext = extractelement <4 x float> %0, i32 0 586 %1 = tail call fast float @llvm.asin.f32(float %vecext) 587 %vecins = insertelement <4 x float> undef, float %1, i32 0 588 %vecext.1 = extractelement <4 x float> %0, i32 1 589 %2 = tail call fast float @llvm.asin.f32(float %vecext.1) 590 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 591 %vecext.2 = extractelement <4 x float> %0, i32 2 592 %3 = tail call fast float @llvm.asin.f32(float %vecext.2) 593 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 594 %vecext.3 = extractelement <4 x float> %0, i32 3 595 %4 = tail call fast float @llvm.asin.f32(float %vecext.3) 596 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 597 ret <4 x float> %vecins.3 598} 599 600declare float @coshf(float) readonly nounwind willreturn 601 602; We can not vectorized cosh since RISCV has no such instruction. 603define <4 x float> @cosh_4x(ptr %a) { 604; CHECK-LABEL: define <4 x float> @cosh_4x 605; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 606; CHECK-NEXT: entry: 607; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 608; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 609; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]]) 610; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 611; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 612; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]]) 613; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 614; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 615; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]]) 616; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 617; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 618; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]]) 619; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 620; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 621; 622; DEFAULT-LABEL: define <4 x float> @cosh_4x 623; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 624; DEFAULT-NEXT: entry: 625; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 626; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 627; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]]) 628; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 629; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 630; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]]) 631; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 632; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 633; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]]) 634; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 635; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 636; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]]) 637; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 638; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 639; 640entry: 641 %0 = load <4 x float>, ptr %a, align 16 642 %vecext = extractelement <4 x float> %0, i32 0 643 %1 = tail call fast float @coshf(float %vecext) 644 %vecins = insertelement <4 x float> undef, float %1, i32 0 645 %vecext.1 = extractelement <4 x float> %0, i32 1 646 %2 = tail call fast float @coshf(float %vecext.1) 647 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 648 %vecext.2 = extractelement <4 x float> %0, i32 2 649 %3 = tail call fast float @coshf(float %vecext.2) 650 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 651 %vecext.3 = extractelement <4 x float> %0, i32 3 652 %4 = tail call fast float @coshf(float %vecext.3) 653 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 654 ret <4 x float> %vecins.3 655} 656 657declare float @llvm.cosh.f32(float) 658 659; We can not vectorized cosh since RISCV has no such instruction. 660define <4 x float> @int_cosh_4x(ptr %a) { 661; CHECK-LABEL: define <4 x float> @int_cosh_4x 662; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 663; CHECK-NEXT: entry: 664; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 665; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 666; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]]) 667; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 668; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 669; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]]) 670; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 671; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 672; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]]) 673; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 674; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 675; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]]) 676; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 677; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 678; 679; DEFAULT-LABEL: define <4 x float> @int_cosh_4x 680; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 681; DEFAULT-NEXT: entry: 682; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 683; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 684; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]]) 685; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 686; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 687; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]]) 688; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 689; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 690; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]]) 691; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 692; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 693; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]]) 694; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 695; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 696; 697entry: 698 %0 = load <4 x float>, ptr %a, align 16 699 %vecext = extractelement <4 x float> %0, i32 0 700 %1 = tail call fast float @llvm.cosh.f32(float %vecext) 701 %vecins = insertelement <4 x float> undef, float %1, i32 0 702 %vecext.1 = extractelement <4 x float> %0, i32 1 703 %2 = tail call fast float @llvm.cosh.f32(float %vecext.1) 704 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 705 %vecext.2 = extractelement <4 x float> %0, i32 2 706 %3 = tail call fast float @llvm.cosh.f32(float %vecext.2) 707 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 708 %vecext.3 = extractelement <4 x float> %0, i32 3 709 %4 = tail call fast float @llvm.cosh.f32(float %vecext.3) 710 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 711 ret <4 x float> %vecins.3 712} 713 714declare float @atanhf(float) readonly nounwind willreturn 715 716; We can not vectorized atanh since RISCV has no such instruction. 717define <4 x float> @atanh_4x(ptr %a) { 718; CHECK-LABEL: define <4 x float> @atanh_4x 719; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 720; CHECK-NEXT: entry: 721; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 722; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 723; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]]) 724; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 725; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 726; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]]) 727; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 728; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 729; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]]) 730; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 731; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 732; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]]) 733; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 734; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 735; 736; DEFAULT-LABEL: define <4 x float> @atanh_4x 737; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 738; DEFAULT-NEXT: entry: 739; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 740; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 741; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]]) 742; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 743; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 744; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]]) 745; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 746; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 747; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]]) 748; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 749; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 750; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]]) 751; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 752; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 753; 754entry: 755 %0 = load <4 x float>, ptr %a, align 16 756 %vecext = extractelement <4 x float> %0, i32 0 757 %1 = tail call fast float @atanhf(float %vecext) 758 %vecins = insertelement <4 x float> undef, float %1, i32 0 759 %vecext.1 = extractelement <4 x float> %0, i32 1 760 %2 = tail call fast float @atanhf(float %vecext.1) 761 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 762 %vecext.2 = extractelement <4 x float> %0, i32 2 763 %3 = tail call fast float @atanhf(float %vecext.2) 764 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 765 %vecext.3 = extractelement <4 x float> %0, i32 3 766 %4 = tail call fast float @atanhf(float %vecext.3) 767 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 768 ret <4 x float> %vecins.3 769} 770 771declare float @llvm.atanh.f32(float) 772 773; We can not vectorized atanh since RISCV has no such instruction. 774define <4 x float> @int_atanh_4x(ptr %a) { 775; CHECK-LABEL: define <4 x float> @int_atanh_4x 776; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 777; CHECK-NEXT: entry: 778; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 779; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 780; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]]) 781; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 782; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 783; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]]) 784; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 785; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 786; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]]) 787; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 788; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 789; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]]) 790; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 791; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 792; 793; DEFAULT-LABEL: define <4 x float> @int_atanh_4x 794; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 795; DEFAULT-NEXT: entry: 796; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 797; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 798; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]]) 799; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 800; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 801; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]]) 802; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 803; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 804; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]]) 805; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 806; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 807; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]]) 808; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 809; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 810; 811entry: 812 %0 = load <4 x float>, ptr %a, align 16 813 %vecext = extractelement <4 x float> %0, i32 0 814 %1 = tail call fast float @llvm.atanh.f32(float %vecext) 815 %vecins = insertelement <4 x float> undef, float %1, i32 0 816 %vecext.1 = extractelement <4 x float> %0, i32 1 817 %2 = tail call fast float @llvm.atanh.f32(float %vecext.1) 818 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 819 %vecext.2 = extractelement <4 x float> %0, i32 2 820 %3 = tail call fast float @llvm.atanh.f32(float %vecext.2) 821 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 822 %vecext.3 = extractelement <4 x float> %0, i32 3 823 %4 = tail call fast float @llvm.atanh.f32(float %vecext.3) 824 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 825 ret <4 x float> %vecins.3 826} 827