1013235a2SBen Shi; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 2013235a2SBen Shi; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \ 3013235a2SBen Shi; RUN: -riscv-v-vector-bits-min=-1 -riscv-v-slp-max-vf=0 \ 4013235a2SBen Shi; RUN: | FileCheck %s 5013235a2SBen Shi; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \ 6013235a2SBen Shi; RUN: | FileCheck %s --check-prefix=DEFAULT 7013235a2SBen Shi 8013235a2SBen Shideclare float @fabsf(float) readonly nounwind willreturn 9013235a2SBen Shi 10013235a2SBen Shidefine <4 x float> @fabs_4x(ptr %a) { 11013235a2SBen Shi; CHECK-LABEL: define <4 x float> @fabs_4x 12013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { 13013235a2SBen Shi; CHECK-NEXT: entry: 14013235a2SBen Shi; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 15013235a2SBen Shi; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 16013235a2SBen Shi; CHECK-NEXT: ret <4 x float> [[TMP1]] 17013235a2SBen Shi; 18013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @fabs_4x 19013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { 20013235a2SBen Shi; DEFAULT-NEXT: entry: 21013235a2SBen Shi; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 227f26c27eSPhilip Reames; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 237f26c27eSPhilip Reames; DEFAULT-NEXT: ret <4 x float> [[TMP1]] 24013235a2SBen Shi; 25013235a2SBen Shientry: 26013235a2SBen Shi %0 = load <4 x float>, ptr %a, align 16 27013235a2SBen Shi %vecext = extractelement <4 x float> %0, i32 0 28013235a2SBen Shi %1 = tail call fast float @fabsf(float %vecext) 29013235a2SBen Shi %vecins = insertelement <4 x float> undef, float %1, i32 0 30013235a2SBen Shi %vecext.1 = extractelement <4 x float> %0, i32 1 31013235a2SBen Shi %2 = tail call fast float @fabsf(float %vecext.1) 32013235a2SBen Shi %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 33013235a2SBen Shi %vecext.2 = extractelement <4 x float> %0, i32 2 34013235a2SBen Shi %3 = tail call fast float @fabsf(float %vecext.2) 35013235a2SBen Shi %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 36013235a2SBen Shi %vecext.3 = extractelement <4 x float> %0, i32 3 37013235a2SBen Shi %4 = tail call fast float @fabsf(float %vecext.3) 38013235a2SBen Shi %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 39013235a2SBen Shi ret <4 x float> %vecins.3 40013235a2SBen Shi} 41013235a2SBen Shi 42013235a2SBen Shideclare float @llvm.fabs.f32(float) 43013235a2SBen Shi 44013235a2SBen Shidefine <4 x float> @int_fabs_4x(ptr %a) { 45013235a2SBen Shi; CHECK-LABEL: define <4 x float> @int_fabs_4x 46013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 47013235a2SBen Shi; CHECK-NEXT: entry: 48013235a2SBen Shi; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 49013235a2SBen Shi; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 50013235a2SBen Shi; CHECK-NEXT: ret <4 x float> [[TMP1]] 51013235a2SBen Shi; 52013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @int_fabs_4x 53013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 54013235a2SBen Shi; DEFAULT-NEXT: entry: 55013235a2SBen Shi; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 567f26c27eSPhilip Reames; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 577f26c27eSPhilip Reames; DEFAULT-NEXT: ret <4 x float> [[TMP1]] 58013235a2SBen Shi; 59013235a2SBen Shientry: 60013235a2SBen Shi %0 = load <4 x float>, ptr %a, align 16 61013235a2SBen Shi %vecext = extractelement <4 x float> %0, i32 0 62013235a2SBen Shi %1 = tail call fast float @llvm.fabs.f32(float %vecext) 63013235a2SBen Shi %vecins = insertelement <4 x float> undef, float %1, i32 0 64013235a2SBen Shi %vecext.1 = extractelement <4 x float> %0, i32 1 65013235a2SBen Shi %2 = tail call fast float @llvm.fabs.f32(float %vecext.1) 66013235a2SBen Shi %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 67013235a2SBen Shi %vecext.2 = extractelement <4 x float> %0, i32 2 68013235a2SBen Shi %3 = tail call fast float @llvm.fabs.f32(float %vecext.2) 69013235a2SBen Shi %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 70013235a2SBen Shi %vecext.3 = extractelement <4 x float> %0, i32 3 71013235a2SBen Shi %4 = tail call fast float @llvm.fabs.f32(float %vecext.3) 72013235a2SBen Shi %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 73013235a2SBen Shi ret <4 x float> %vecins.3 74013235a2SBen Shi} 75013235a2SBen Shi 76013235a2SBen Shideclare float @sqrtf(float) readonly nounwind willreturn 77013235a2SBen Shi 78013235a2SBen Shidefine <4 x float> @sqrt_4x(ptr %a) { 79013235a2SBen Shi; CHECK-LABEL: define <4 x float> @sqrt_4x 80013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 81013235a2SBen Shi; CHECK-NEXT: entry: 82013235a2SBen Shi; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 83013235a2SBen Shi; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) 84013235a2SBen Shi; CHECK-NEXT: ret <4 x float> [[TMP1]] 85013235a2SBen Shi; 86013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @sqrt_4x 87013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 88013235a2SBen Shi; DEFAULT-NEXT: entry: 89013235a2SBen Shi; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 907f26c27eSPhilip Reames; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) 917f26c27eSPhilip Reames; DEFAULT-NEXT: ret <4 x float> [[TMP1]] 92013235a2SBen Shi; 93013235a2SBen Shientry: 94013235a2SBen Shi %0 = load <4 x float>, ptr %a, align 16 95013235a2SBen Shi %vecext = extractelement <4 x float> %0, i32 0 96013235a2SBen Shi %1 = tail call fast float @sqrtf(float %vecext) 97013235a2SBen Shi %vecins = insertelement <4 x float> undef, float %1, i32 0 98013235a2SBen Shi %vecext.1 = extractelement <4 x float> %0, i32 1 99013235a2SBen Shi %2 = tail call fast float @sqrtf(float %vecext.1) 100013235a2SBen Shi %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 101013235a2SBen Shi %vecext.2 = extractelement <4 x float> %0, i32 2 102013235a2SBen Shi %3 = tail call fast float @sqrtf(float %vecext.2) 103013235a2SBen Shi %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 104013235a2SBen Shi %vecext.3 = extractelement <4 x float> %0, i32 3 105013235a2SBen Shi %4 = tail call fast float @sqrtf(float %vecext.3) 106013235a2SBen Shi %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 107013235a2SBen Shi ret <4 x float> %vecins.3 108013235a2SBen Shi} 109013235a2SBen Shi 110013235a2SBen Shideclare float @llvm.sqrt.f32(float) 111013235a2SBen Shi 112013235a2SBen Shidefine <4 x float> @int_sqrt_4x(ptr %a) { 113013235a2SBen Shi; CHECK-LABEL: define <4 x float> @int_sqrt_4x 114013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 115013235a2SBen Shi; CHECK-NEXT: entry: 116013235a2SBen Shi; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 117013235a2SBen Shi; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) 118013235a2SBen Shi; CHECK-NEXT: ret <4 x float> [[TMP1]] 119013235a2SBen Shi; 120013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @int_sqrt_4x 121013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 122013235a2SBen Shi; DEFAULT-NEXT: entry: 123013235a2SBen Shi; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1247f26c27eSPhilip Reames; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) 1257f26c27eSPhilip Reames; DEFAULT-NEXT: ret <4 x float> [[TMP1]] 126013235a2SBen Shi; 127013235a2SBen Shientry: 128013235a2SBen Shi %0 = load <4 x float>, ptr %a, align 16 129013235a2SBen Shi %vecext = extractelement <4 x float> %0, i32 0 130013235a2SBen Shi %1 = tail call fast float @llvm.sqrt.f32(float %vecext) 131013235a2SBen Shi %vecins = insertelement <4 x float> undef, float %1, i32 0 132013235a2SBen Shi %vecext.1 = extractelement <4 x float> %0, i32 1 133013235a2SBen Shi %2 = tail call fast float @llvm.sqrt.f32(float %vecext.1) 134013235a2SBen Shi %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 135013235a2SBen Shi %vecext.2 = extractelement <4 x float> %0, i32 2 136013235a2SBen Shi %3 = tail call fast float @llvm.sqrt.f32(float %vecext.2) 137013235a2SBen Shi %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 138013235a2SBen Shi %vecext.3 = extractelement <4 x float> %0, i32 3 139013235a2SBen Shi %4 = tail call fast float @llvm.sqrt.f32(float %vecext.3) 140013235a2SBen Shi %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 141013235a2SBen Shi ret <4 x float> %vecins.3 142013235a2SBen Shi} 143013235a2SBen Shi 144013235a2SBen Shideclare float @expf(float) readonly nounwind willreturn 145013235a2SBen Shi 146013235a2SBen Shi; We can not vectorized exp since RISCV has no such instruction. 147013235a2SBen Shidefine <4 x float> @exp_4x(ptr %a) { 148013235a2SBen Shi; CHECK-LABEL: define <4 x float> @exp_4x 149013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 150013235a2SBen Shi; CHECK-NEXT: entry: 151013235a2SBen Shi; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 152013235a2SBen Shi; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 153013235a2SBen Shi; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]]) 154013235a2SBen Shi; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 155013235a2SBen Shi; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 156013235a2SBen Shi; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]]) 157013235a2SBen Shi; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 158d70963a7SAlexey Bataev; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 159d70963a7SAlexey Bataev; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]]) 160d70963a7SAlexey Bataev; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 161d70963a7SAlexey Bataev; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 162d70963a7SAlexey Bataev; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]]) 163d70963a7SAlexey Bataev; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 164d70963a7SAlexey Bataev; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 165013235a2SBen Shi; 166013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @exp_4x 167013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 168013235a2SBen Shi; DEFAULT-NEXT: entry: 169013235a2SBen Shi; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 170013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 171013235a2SBen Shi; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]]) 172013235a2SBen Shi; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 173013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 174013235a2SBen Shi; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]]) 175013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 176d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 177d70963a7SAlexey Bataev; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]]) 178d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 179d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 180d70963a7SAlexey Bataev; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]]) 181d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 182d70963a7SAlexey Bataev; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 183013235a2SBen Shi; 184013235a2SBen Shientry: 185013235a2SBen Shi %0 = load <4 x float>, ptr %a, align 16 186013235a2SBen Shi %vecext = extractelement <4 x float> %0, i32 0 187013235a2SBen Shi %1 = tail call fast float @expf(float %vecext) 188013235a2SBen Shi %vecins = insertelement <4 x float> undef, float %1, i32 0 189013235a2SBen Shi %vecext.1 = extractelement <4 x float> %0, i32 1 190013235a2SBen Shi %2 = tail call fast float @expf(float %vecext.1) 191013235a2SBen Shi %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 192013235a2SBen Shi %vecext.2 = extractelement <4 x float> %0, i32 2 193013235a2SBen Shi %3 = tail call fast float @expf(float %vecext.2) 194013235a2SBen Shi %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 195013235a2SBen Shi %vecext.3 = extractelement <4 x float> %0, i32 3 196013235a2SBen Shi %4 = tail call fast float @expf(float %vecext.3) 197013235a2SBen Shi %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 198013235a2SBen Shi ret <4 x float> %vecins.3 199013235a2SBen Shi} 200013235a2SBen Shi 201013235a2SBen Shideclare float @llvm.exp.f32(float) 202013235a2SBen Shi 203013235a2SBen Shi; We can not vectorized exp since RISCV has no such instruction. 204013235a2SBen Shidefine <4 x float> @int_exp_4x(ptr %a) { 205013235a2SBen Shi; CHECK-LABEL: define <4 x float> @int_exp_4x 206013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 207013235a2SBen Shi; CHECK-NEXT: entry: 208013235a2SBen Shi; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 209013235a2SBen Shi; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 210013235a2SBen Shi; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]]) 211013235a2SBen Shi; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 212013235a2SBen Shi; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 213013235a2SBen Shi; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]]) 214013235a2SBen Shi; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 215d70963a7SAlexey Bataev; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 216d70963a7SAlexey Bataev; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]]) 217d70963a7SAlexey Bataev; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 218d70963a7SAlexey Bataev; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 219d70963a7SAlexey Bataev; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]]) 220d70963a7SAlexey Bataev; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 221d70963a7SAlexey Bataev; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 222013235a2SBen Shi; 223013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @int_exp_4x 224013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 225013235a2SBen Shi; DEFAULT-NEXT: entry: 226013235a2SBen Shi; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 227013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 228013235a2SBen Shi; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]]) 229013235a2SBen Shi; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 230013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 231013235a2SBen Shi; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]]) 232013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 233d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 234d70963a7SAlexey Bataev; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]]) 235d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 236d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 237d70963a7SAlexey Bataev; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]]) 238d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 239d70963a7SAlexey Bataev; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 240013235a2SBen Shi; 241013235a2SBen Shientry: 242013235a2SBen Shi %0 = load <4 x float>, ptr %a, align 16 243013235a2SBen Shi %vecext = extractelement <4 x float> %0, i32 0 244013235a2SBen Shi %1 = tail call fast float @llvm.exp.f32(float %vecext) 245013235a2SBen Shi %vecins = insertelement <4 x float> undef, float %1, i32 0 246013235a2SBen Shi %vecext.1 = extractelement <4 x float> %0, i32 1 247013235a2SBen Shi %2 = tail call fast float @llvm.exp.f32(float %vecext.1) 248013235a2SBen Shi %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 249013235a2SBen Shi %vecext.2 = extractelement <4 x float> %0, i32 2 250013235a2SBen Shi %3 = tail call fast float @llvm.exp.f32(float %vecext.2) 251013235a2SBen Shi %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 252013235a2SBen Shi %vecext.3 = extractelement <4 x float> %0, i32 3 253013235a2SBen Shi %4 = tail call fast float @llvm.exp.f32(float %vecext.3) 254013235a2SBen Shi %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 255013235a2SBen Shi ret <4 x float> %vecins.3 256013235a2SBen Shi} 257013235a2SBen Shi 258013235a2SBen Shideclare float @logf(float) readonly nounwind willreturn 259013235a2SBen Shi 260013235a2SBen Shi; We can not vectorized log since RISCV has no such instruction. 261013235a2SBen Shidefine <4 x float> @log_4x(ptr %a) { 262013235a2SBen Shi; CHECK-LABEL: define <4 x float> @log_4x 263013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 264013235a2SBen Shi; CHECK-NEXT: entry: 265013235a2SBen Shi; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 266013235a2SBen Shi; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 267013235a2SBen Shi; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]]) 268013235a2SBen Shi; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 269013235a2SBen Shi; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 270013235a2SBen Shi; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]]) 271013235a2SBen Shi; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 272d70963a7SAlexey Bataev; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 273d70963a7SAlexey Bataev; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]]) 274d70963a7SAlexey Bataev; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 275d70963a7SAlexey Bataev; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 276d70963a7SAlexey Bataev; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]]) 277d70963a7SAlexey Bataev; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 278d70963a7SAlexey Bataev; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 279013235a2SBen Shi; 280013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @log_4x 281013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 282013235a2SBen Shi; DEFAULT-NEXT: entry: 283013235a2SBen Shi; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 284013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 285013235a2SBen Shi; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]]) 286013235a2SBen Shi; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 287013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 288013235a2SBen Shi; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]]) 289013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 290d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 291d70963a7SAlexey Bataev; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]]) 292d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 293d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 294d70963a7SAlexey Bataev; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]]) 295d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 296d70963a7SAlexey Bataev; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 297013235a2SBen Shi; 298013235a2SBen Shientry: 299013235a2SBen Shi %0 = load <4 x float>, ptr %a, align 16 300013235a2SBen Shi %vecext = extractelement <4 x float> %0, i32 0 301013235a2SBen Shi %1 = tail call fast float @logf(float %vecext) 302013235a2SBen Shi %vecins = insertelement <4 x float> undef, float %1, i32 0 303013235a2SBen Shi %vecext.1 = extractelement <4 x float> %0, i32 1 304013235a2SBen Shi %2 = tail call fast float @logf(float %vecext.1) 305013235a2SBen Shi %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 306013235a2SBen Shi %vecext.2 = extractelement <4 x float> %0, i32 2 307013235a2SBen Shi %3 = tail call fast float @logf(float %vecext.2) 308013235a2SBen Shi %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 309013235a2SBen Shi %vecext.3 = extractelement <4 x float> %0, i32 3 310013235a2SBen Shi %4 = tail call fast float @logf(float %vecext.3) 311013235a2SBen Shi %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 312013235a2SBen Shi ret <4 x float> %vecins.3 313013235a2SBen Shi} 314013235a2SBen Shi 315013235a2SBen Shideclare float @llvm.log.f32(float) 316013235a2SBen Shi 317013235a2SBen Shi; We can not vectorized log since RISCV has no such instruction. 318013235a2SBen Shidefine <4 x float> @int_log_4x(ptr %a) { 319013235a2SBen Shi; CHECK-LABEL: define <4 x float> @int_log_4x 320013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 321013235a2SBen Shi; CHECK-NEXT: entry: 322013235a2SBen Shi; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 323013235a2SBen Shi; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 324013235a2SBen Shi; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]]) 325013235a2SBen Shi; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 326013235a2SBen Shi; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 327013235a2SBen Shi; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]]) 328013235a2SBen Shi; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 329d70963a7SAlexey Bataev; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 330d70963a7SAlexey Bataev; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]]) 331d70963a7SAlexey Bataev; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 332d70963a7SAlexey Bataev; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 333d70963a7SAlexey Bataev; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]]) 334d70963a7SAlexey Bataev; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 335d70963a7SAlexey Bataev; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 336013235a2SBen Shi; 337013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @int_log_4x 338013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 339013235a2SBen Shi; DEFAULT-NEXT: entry: 340013235a2SBen Shi; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 341013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 342013235a2SBen Shi; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]]) 343013235a2SBen Shi; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 344013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 345013235a2SBen Shi; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]]) 346013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 347d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 348d70963a7SAlexey Bataev; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]]) 349d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 350d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 351d70963a7SAlexey Bataev; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]]) 352d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 353d70963a7SAlexey Bataev; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 354013235a2SBen Shi; 355013235a2SBen Shientry: 356013235a2SBen Shi %0 = load <4 x float>, ptr %a, align 16 357013235a2SBen Shi %vecext = extractelement <4 x float> %0, i32 0 358013235a2SBen Shi %1 = tail call fast float @llvm.log.f32(float %vecext) 359013235a2SBen Shi %vecins = insertelement <4 x float> undef, float %1, i32 0 360013235a2SBen Shi %vecext.1 = extractelement <4 x float> %0, i32 1 361013235a2SBen Shi %2 = tail call fast float @llvm.log.f32(float %vecext.1) 362013235a2SBen Shi %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 363013235a2SBen Shi %vecext.2 = extractelement <4 x float> %0, i32 2 364013235a2SBen Shi %3 = tail call fast float @llvm.log.f32(float %vecext.2) 365013235a2SBen Shi %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 366013235a2SBen Shi %vecext.3 = extractelement <4 x float> %0, i32 3 367013235a2SBen Shi %4 = tail call fast float @llvm.log.f32(float %vecext.3) 368013235a2SBen Shi %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 369013235a2SBen Shi ret <4 x float> %vecins.3 370013235a2SBen Shi} 371013235a2SBen Shi 372013235a2SBen Shideclare float @sinf(float) readonly nounwind willreturn 373013235a2SBen Shi 374013235a2SBen Shi; We can not vectorized sin since RISCV has no such instruction. 375013235a2SBen Shidefine <4 x float> @sin_4x(ptr %a) { 376013235a2SBen Shi; CHECK-LABEL: define <4 x float> @sin_4x 377013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 378013235a2SBen Shi; CHECK-NEXT: entry: 379013235a2SBen Shi; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 380013235a2SBen Shi; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 381013235a2SBen Shi; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]]) 382013235a2SBen Shi; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 383013235a2SBen Shi; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 384013235a2SBen Shi; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]]) 385013235a2SBen Shi; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 386d70963a7SAlexey Bataev; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 387d70963a7SAlexey Bataev; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]]) 388d70963a7SAlexey Bataev; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 389d70963a7SAlexey Bataev; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 390d70963a7SAlexey Bataev; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]]) 391d70963a7SAlexey Bataev; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 392d70963a7SAlexey Bataev; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 393013235a2SBen Shi; 394013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @sin_4x 395013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 396013235a2SBen Shi; DEFAULT-NEXT: entry: 397013235a2SBen Shi; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 398013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 399013235a2SBen Shi; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]]) 400013235a2SBen Shi; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 401013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 402013235a2SBen Shi; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]]) 403013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 404d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 405d70963a7SAlexey Bataev; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]]) 406d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 407d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 408d70963a7SAlexey Bataev; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]]) 409d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 410d70963a7SAlexey Bataev; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 411013235a2SBen Shi; 412013235a2SBen Shientry: 413013235a2SBen Shi %0 = load <4 x float>, ptr %a, align 16 414013235a2SBen Shi %vecext = extractelement <4 x float> %0, i32 0 415013235a2SBen Shi %1 = tail call fast float @sinf(float %vecext) 416013235a2SBen Shi %vecins = insertelement <4 x float> undef, float %1, i32 0 417013235a2SBen Shi %vecext.1 = extractelement <4 x float> %0, i32 1 418013235a2SBen Shi %2 = tail call fast float @sinf(float %vecext.1) 419013235a2SBen Shi %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 420013235a2SBen Shi %vecext.2 = extractelement <4 x float> %0, i32 2 421013235a2SBen Shi %3 = tail call fast float @sinf(float %vecext.2) 422013235a2SBen Shi %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 423013235a2SBen Shi %vecext.3 = extractelement <4 x float> %0, i32 3 424013235a2SBen Shi %4 = tail call fast float @sinf(float %vecext.3) 425013235a2SBen Shi %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 426013235a2SBen Shi ret <4 x float> %vecins.3 427013235a2SBen Shi} 428013235a2SBen Shi 429013235a2SBen Shideclare float @llvm.sin.f32(float) 430013235a2SBen Shi 431013235a2SBen Shi; We can not vectorized sin since RISCV has no such instruction. 432013235a2SBen Shidefine <4 x float> @int_sin_4x(ptr %a) { 433013235a2SBen Shi; CHECK-LABEL: define <4 x float> @int_sin_4x 434013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 435013235a2SBen Shi; CHECK-NEXT: entry: 436013235a2SBen Shi; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 437013235a2SBen Shi; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 438013235a2SBen Shi; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) 439013235a2SBen Shi; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 440013235a2SBen Shi; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 441013235a2SBen Shi; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) 442013235a2SBen Shi; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 443d70963a7SAlexey Bataev; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 444d70963a7SAlexey Bataev; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]]) 445d70963a7SAlexey Bataev; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 446d70963a7SAlexey Bataev; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 447d70963a7SAlexey Bataev; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]]) 448d70963a7SAlexey Bataev; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 449d70963a7SAlexey Bataev; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 450013235a2SBen Shi; 451013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @int_sin_4x 452013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 453013235a2SBen Shi; DEFAULT-NEXT: entry: 454013235a2SBen Shi; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 455013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 456013235a2SBen Shi; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) 457013235a2SBen Shi; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 458013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 459013235a2SBen Shi; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) 460013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 461d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 462d70963a7SAlexey Bataev; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]]) 463d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 464d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 465d70963a7SAlexey Bataev; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]]) 466d70963a7SAlexey Bataev; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 467d70963a7SAlexey Bataev; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 468013235a2SBen Shi; 469013235a2SBen Shientry: 470013235a2SBen Shi %0 = load <4 x float>, ptr %a, align 16 471013235a2SBen Shi %vecext = extractelement <4 x float> %0, i32 0 472013235a2SBen Shi %1 = tail call fast float @llvm.sin.f32(float %vecext) 473013235a2SBen Shi %vecins = insertelement <4 x float> undef, float %1, i32 0 474013235a2SBen Shi %vecext.1 = extractelement <4 x float> %0, i32 1 475013235a2SBen Shi %2 = tail call fast float @llvm.sin.f32(float %vecext.1) 476013235a2SBen Shi %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 477013235a2SBen Shi %vecext.2 = extractelement <4 x float> %0, i32 2 478013235a2SBen Shi %3 = tail call fast float @llvm.sin.f32(float %vecext.2) 479013235a2SBen Shi %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 480013235a2SBen Shi %vecext.3 = extractelement <4 x float> %0, i32 3 481013235a2SBen Shi %4 = tail call fast float @llvm.sin.f32(float %vecext.3) 482013235a2SBen Shi %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 483013235a2SBen Shi ret <4 x float> %vecins.3 484013235a2SBen Shi} 485013235a2SBen Shi 486013235a2SBen Shideclare float @asinf(float) readonly nounwind willreturn 487013235a2SBen Shi 488013235a2SBen Shi; We can not vectorized asin since RISCV has no such instruction. 489013235a2SBen Shidefine <4 x float> @asin_4x(ptr %a) { 490013235a2SBen Shi; CHECK-LABEL: define <4 x float> @asin_4x 491013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 492013235a2SBen Shi; CHECK-NEXT: entry: 493013235a2SBen Shi; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 494013235a2SBen Shi; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 495013235a2SBen Shi; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]]) 496013235a2SBen Shi; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 497013235a2SBen Shi; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 498013235a2SBen Shi; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]]) 499013235a2SBen Shi; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 500013235a2SBen Shi; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 501013235a2SBen Shi; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]]) 502013235a2SBen Shi; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 503013235a2SBen Shi; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 504013235a2SBen Shi; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]]) 505013235a2SBen Shi; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 506013235a2SBen Shi; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 507013235a2SBen Shi; 508013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @asin_4x 509013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 510013235a2SBen Shi; DEFAULT-NEXT: entry: 511013235a2SBen Shi; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 512013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 513013235a2SBen Shi; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]]) 514013235a2SBen Shi; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 515013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 516013235a2SBen Shi; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]]) 517013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 518013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 519013235a2SBen Shi; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]]) 520013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 521013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 522013235a2SBen Shi; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]]) 523013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 524013235a2SBen Shi; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 525013235a2SBen Shi; 526013235a2SBen Shientry: 527013235a2SBen Shi %0 = load <4 x float>, ptr %a, align 16 528013235a2SBen Shi %vecext = extractelement <4 x float> %0, i32 0 529013235a2SBen Shi %1 = tail call fast float @asinf(float %vecext) 530013235a2SBen Shi %vecins = insertelement <4 x float> undef, float %1, i32 0 531013235a2SBen Shi %vecext.1 = extractelement <4 x float> %0, i32 1 532013235a2SBen Shi %2 = tail call fast float @asinf(float %vecext.1) 533013235a2SBen Shi %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 534013235a2SBen Shi %vecext.2 = extractelement <4 x float> %0, i32 2 535013235a2SBen Shi %3 = tail call fast float @asinf(float %vecext.2) 536013235a2SBen Shi %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 537013235a2SBen Shi %vecext.3 = extractelement <4 x float> %0, i32 3 538013235a2SBen Shi %4 = tail call fast float @asinf(float %vecext.3) 539013235a2SBen Shi %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 540013235a2SBen Shi ret <4 x float> %vecins.3 541013235a2SBen Shi} 542013235a2SBen Shi 543013235a2SBen Shideclare float @llvm.asin.f32(float) 544013235a2SBen Shi 545013235a2SBen Shi; We can not vectorized asin since RISCV has no such instruction. 546013235a2SBen Shidefine <4 x float> @int_asin_4x(ptr %a) { 547013235a2SBen Shi; CHECK-LABEL: define <4 x float> @int_asin_4x 548013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 549013235a2SBen Shi; CHECK-NEXT: entry: 550013235a2SBen Shi; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 551013235a2SBen Shi; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 552013235a2SBen Shi; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]]) 553013235a2SBen Shi; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 554013235a2SBen Shi; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 555013235a2SBen Shi; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]]) 556013235a2SBen Shi; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 557013235a2SBen Shi; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 558013235a2SBen Shi; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]]) 559013235a2SBen Shi; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 560013235a2SBen Shi; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 561013235a2SBen Shi; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]]) 562013235a2SBen Shi; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 563013235a2SBen Shi; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 564013235a2SBen Shi; 565013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @int_asin_4x 566013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 567013235a2SBen Shi; DEFAULT-NEXT: entry: 568013235a2SBen Shi; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 569013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 570013235a2SBen Shi; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]]) 571013235a2SBen Shi; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 572013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 573013235a2SBen Shi; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]]) 574013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 575013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 576013235a2SBen Shi; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]]) 577013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 578013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 579013235a2SBen Shi; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]]) 580013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 581013235a2SBen Shi; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 582013235a2SBen Shi; 583013235a2SBen Shientry: 584013235a2SBen Shi %0 = load <4 x float>, ptr %a, align 16 585013235a2SBen Shi %vecext = extractelement <4 x float> %0, i32 0 586013235a2SBen Shi %1 = tail call fast float @llvm.asin.f32(float %vecext) 587013235a2SBen Shi %vecins = insertelement <4 x float> undef, float %1, i32 0 588013235a2SBen Shi %vecext.1 = extractelement <4 x float> %0, i32 1 589013235a2SBen Shi %2 = tail call fast float @llvm.asin.f32(float %vecext.1) 590013235a2SBen Shi %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 591013235a2SBen Shi %vecext.2 = extractelement <4 x float> %0, i32 2 592013235a2SBen Shi %3 = tail call fast float @llvm.asin.f32(float %vecext.2) 593013235a2SBen Shi %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 594013235a2SBen Shi %vecext.3 = extractelement <4 x float> %0, i32 3 595013235a2SBen Shi %4 = tail call fast float @llvm.asin.f32(float %vecext.3) 596013235a2SBen Shi %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 597013235a2SBen Shi ret <4 x float> %vecins.3 598013235a2SBen Shi} 599013235a2SBen Shi 600*ceb613a8SSimon Pilgrimdeclare float @cosf(float) readonly nounwind willreturn 601*ceb613a8SSimon Pilgrim 602*ceb613a8SSimon Pilgrim; We can not vectorized cos cosce RISCV has no such instruction. 603*ceb613a8SSimon Pilgrimdefine <4 x float> @cos_4x(ptr %a) { 604*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @cos_4x 605*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 606*ceb613a8SSimon Pilgrim; CHECK-NEXT: entry: 607*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 608*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 609*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]]) 610*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 611*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 612*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]]) 613*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 614*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 615*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]]) 616*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 617*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 618*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]]) 619*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 620*ceb613a8SSimon Pilgrim; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 621*ceb613a8SSimon Pilgrim; 622*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @cos_4x 623*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 624*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: entry: 625*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 626*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 627*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]]) 628*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 629*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 630*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]]) 631*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 632*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 633*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]]) 634*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 635*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 636*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]]) 637*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 638*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 639*ceb613a8SSimon Pilgrim; 640*ceb613a8SSimon Pilgrimentry: 641*ceb613a8SSimon Pilgrim %0 = load <4 x float>, ptr %a, align 16 642*ceb613a8SSimon Pilgrim %vecext = extractelement <4 x float> %0, i32 0 643*ceb613a8SSimon Pilgrim %1 = tail call fast float @cosf(float %vecext) 644*ceb613a8SSimon Pilgrim %vecins = insertelement <4 x float> undef, float %1, i32 0 645*ceb613a8SSimon Pilgrim %vecext.1 = extractelement <4 x float> %0, i32 1 646*ceb613a8SSimon Pilgrim %2 = tail call fast float @cosf(float %vecext.1) 647*ceb613a8SSimon Pilgrim %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 648*ceb613a8SSimon Pilgrim %vecext.2 = extractelement <4 x float> %0, i32 2 649*ceb613a8SSimon Pilgrim %3 = tail call fast float @cosf(float %vecext.2) 650*ceb613a8SSimon Pilgrim %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 651*ceb613a8SSimon Pilgrim %vecext.3 = extractelement <4 x float> %0, i32 3 652*ceb613a8SSimon Pilgrim %4 = tail call fast float @cosf(float %vecext.3) 653*ceb613a8SSimon Pilgrim %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 654*ceb613a8SSimon Pilgrim ret <4 x float> %vecins.3 655*ceb613a8SSimon Pilgrim} 656*ceb613a8SSimon Pilgrim 657*ceb613a8SSimon Pilgrimdeclare float @llvm.cos.f32(float) 658*ceb613a8SSimon Pilgrim 659*ceb613a8SSimon Pilgrim; We can not vectorized cos cosce RISCV has no such instruction. 660*ceb613a8SSimon Pilgrimdefine <4 x float> @int_cos_4x(ptr %a) { 661*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @int_cos_4x 662*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 663*ceb613a8SSimon Pilgrim; CHECK-NEXT: entry: 664*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 665*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 666*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]]) 667*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 668*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 669*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) 670*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 671*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 672*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]]) 673*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 674*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 675*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]]) 676*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 677*ceb613a8SSimon Pilgrim; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 678*ceb613a8SSimon Pilgrim; 679*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @int_cos_4x 680*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 681*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: entry: 682*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 683*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 684*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]]) 685*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 686*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 687*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) 688*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 689*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 690*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]]) 691*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 692*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 693*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]]) 694*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 695*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 696*ceb613a8SSimon Pilgrim; 697*ceb613a8SSimon Pilgrimentry: 698*ceb613a8SSimon Pilgrim %0 = load <4 x float>, ptr %a, align 16 699*ceb613a8SSimon Pilgrim %vecext = extractelement <4 x float> %0, i32 0 700*ceb613a8SSimon Pilgrim %1 = tail call fast float @llvm.cos.f32(float %vecext) 701*ceb613a8SSimon Pilgrim %vecins = insertelement <4 x float> undef, float %1, i32 0 702*ceb613a8SSimon Pilgrim %vecext.1 = extractelement <4 x float> %0, i32 1 703*ceb613a8SSimon Pilgrim %2 = tail call fast float @llvm.cos.f32(float %vecext.1) 704*ceb613a8SSimon Pilgrim %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 705*ceb613a8SSimon Pilgrim %vecext.2 = extractelement <4 x float> %0, i32 2 706*ceb613a8SSimon Pilgrim %3 = tail call fast float @llvm.cos.f32(float %vecext.2) 707*ceb613a8SSimon Pilgrim %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 708*ceb613a8SSimon Pilgrim %vecext.3 = extractelement <4 x float> %0, i32 3 709*ceb613a8SSimon Pilgrim %4 = tail call fast float @llvm.cos.f32(float %vecext.3) 710*ceb613a8SSimon Pilgrim %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 711*ceb613a8SSimon Pilgrim ret <4 x float> %vecins.3 712*ceb613a8SSimon Pilgrim} 713*ceb613a8SSimon Pilgrim 714*ceb613a8SSimon Pilgrimdeclare float @acosf(float) readonly nounwind willreturn 715*ceb613a8SSimon Pilgrim 716*ceb613a8SSimon Pilgrim; We can not vectorized acos cosce RISCV has no such instruction. 717*ceb613a8SSimon Pilgrimdefine <4 x float> @acos_4x(ptr %a) { 718*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @acos_4x 719*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 720*ceb613a8SSimon Pilgrim; CHECK-NEXT: entry: 721*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 722*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 723*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]]) 724*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 725*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 726*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]]) 727*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 728*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 729*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]]) 730*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 731*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 732*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]]) 733*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 734*ceb613a8SSimon Pilgrim; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 735*ceb613a8SSimon Pilgrim; 736*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @acos_4x 737*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 738*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: entry: 739*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 740*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 741*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]]) 742*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 743*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 744*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]]) 745*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 746*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 747*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]]) 748*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 749*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 750*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]]) 751*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 752*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 753*ceb613a8SSimon Pilgrim; 754*ceb613a8SSimon Pilgrimentry: 755*ceb613a8SSimon Pilgrim %0 = load <4 x float>, ptr %a, align 16 756*ceb613a8SSimon Pilgrim %vecext = extractelement <4 x float> %0, i32 0 757*ceb613a8SSimon Pilgrim %1 = tail call fast float @acosf(float %vecext) 758*ceb613a8SSimon Pilgrim %vecins = insertelement <4 x float> undef, float %1, i32 0 759*ceb613a8SSimon Pilgrim %vecext.1 = extractelement <4 x float> %0, i32 1 760*ceb613a8SSimon Pilgrim %2 = tail call fast float @acosf(float %vecext.1) 761*ceb613a8SSimon Pilgrim %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 762*ceb613a8SSimon Pilgrim %vecext.2 = extractelement <4 x float> %0, i32 2 763*ceb613a8SSimon Pilgrim %3 = tail call fast float @acosf(float %vecext.2) 764*ceb613a8SSimon Pilgrim %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 765*ceb613a8SSimon Pilgrim %vecext.3 = extractelement <4 x float> %0, i32 3 766*ceb613a8SSimon Pilgrim %4 = tail call fast float @acosf(float %vecext.3) 767*ceb613a8SSimon Pilgrim %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 768*ceb613a8SSimon Pilgrim ret <4 x float> %vecins.3 769*ceb613a8SSimon Pilgrim} 770*ceb613a8SSimon Pilgrim 771*ceb613a8SSimon Pilgrimdeclare float @llvm.acos.f32(float) 772*ceb613a8SSimon Pilgrim 773*ceb613a8SSimon Pilgrim; We can not vectorized acos cosce RISCV has no such instruction. 774*ceb613a8SSimon Pilgrimdefine <4 x float> @int_acos_4x(ptr %a) { 775*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @int_acos_4x 776*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 777*ceb613a8SSimon Pilgrim; CHECK-NEXT: entry: 778*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 779*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 780*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]]) 781*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 782*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 783*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]]) 784*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 785*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 786*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]]) 787*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 788*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 789*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]]) 790*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 791*ceb613a8SSimon Pilgrim; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 792*ceb613a8SSimon Pilgrim; 793*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @int_acos_4x 794*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 795*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: entry: 796*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 797*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 798*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]]) 799*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 800*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 801*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]]) 802*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 803*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 804*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]]) 805*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 806*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 807*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]]) 808*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 809*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 810*ceb613a8SSimon Pilgrim; 811*ceb613a8SSimon Pilgrimentry: 812*ceb613a8SSimon Pilgrim %0 = load <4 x float>, ptr %a, align 16 813*ceb613a8SSimon Pilgrim %vecext = extractelement <4 x float> %0, i32 0 814*ceb613a8SSimon Pilgrim %1 = tail call fast float @llvm.acos.f32(float %vecext) 815*ceb613a8SSimon Pilgrim %vecins = insertelement <4 x float> undef, float %1, i32 0 816*ceb613a8SSimon Pilgrim %vecext.1 = extractelement <4 x float> %0, i32 1 817*ceb613a8SSimon Pilgrim %2 = tail call fast float @llvm.acos.f32(float %vecext.1) 818*ceb613a8SSimon Pilgrim %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 819*ceb613a8SSimon Pilgrim %vecext.2 = extractelement <4 x float> %0, i32 2 820*ceb613a8SSimon Pilgrim %3 = tail call fast float @llvm.acos.f32(float %vecext.2) 821*ceb613a8SSimon Pilgrim %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 822*ceb613a8SSimon Pilgrim %vecext.3 = extractelement <4 x float> %0, i32 3 823*ceb613a8SSimon Pilgrim %4 = tail call fast float @llvm.acos.f32(float %vecext.3) 824*ceb613a8SSimon Pilgrim %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 825*ceb613a8SSimon Pilgrim ret <4 x float> %vecins.3 826*ceb613a8SSimon Pilgrim} 827*ceb613a8SSimon Pilgrim 828*ceb613a8SSimon Pilgrimdeclare float @tanf(float) readonly nounwind willreturn 829*ceb613a8SSimon Pilgrim 830*ceb613a8SSimon Pilgrim; We can not vectorized tan tance RISCV has no such instruction. 831*ceb613a8SSimon Pilgrimdefine <4 x float> @tan_4x(ptr %a) { 832*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @tan_4x 833*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 834*ceb613a8SSimon Pilgrim; CHECK-NEXT: entry: 835*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 836*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 837*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]]) 838*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 839*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 840*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]]) 841*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 842*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 843*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]]) 844*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 845*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 846*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]]) 847*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 848*ceb613a8SSimon Pilgrim; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 849*ceb613a8SSimon Pilgrim; 850*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @tan_4x 851*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 852*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: entry: 853*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 854*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 855*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]]) 856*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 857*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 858*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]]) 859*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 860*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 861*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]]) 862*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 863*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 864*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]]) 865*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 866*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 867*ceb613a8SSimon Pilgrim; 868*ceb613a8SSimon Pilgrimentry: 869*ceb613a8SSimon Pilgrim %0 = load <4 x float>, ptr %a, align 16 870*ceb613a8SSimon Pilgrim %vecext = extractelement <4 x float> %0, i32 0 871*ceb613a8SSimon Pilgrim %1 = tail call fast float @tanf(float %vecext) 872*ceb613a8SSimon Pilgrim %vecins = insertelement <4 x float> undef, float %1, i32 0 873*ceb613a8SSimon Pilgrim %vecext.1 = extractelement <4 x float> %0, i32 1 874*ceb613a8SSimon Pilgrim %2 = tail call fast float @tanf(float %vecext.1) 875*ceb613a8SSimon Pilgrim %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 876*ceb613a8SSimon Pilgrim %vecext.2 = extractelement <4 x float> %0, i32 2 877*ceb613a8SSimon Pilgrim %3 = tail call fast float @tanf(float %vecext.2) 878*ceb613a8SSimon Pilgrim %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 879*ceb613a8SSimon Pilgrim %vecext.3 = extractelement <4 x float> %0, i32 3 880*ceb613a8SSimon Pilgrim %4 = tail call fast float @tanf(float %vecext.3) 881*ceb613a8SSimon Pilgrim %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 882*ceb613a8SSimon Pilgrim ret <4 x float> %vecins.3 883*ceb613a8SSimon Pilgrim} 884*ceb613a8SSimon Pilgrim 885*ceb613a8SSimon Pilgrimdeclare float @llvm.tan.f32(float) 886*ceb613a8SSimon Pilgrim 887*ceb613a8SSimon Pilgrim; We can not vectorized tan tance RISCV has no such instruction. 888*ceb613a8SSimon Pilgrimdefine <4 x float> @int_tan_4x(ptr %a) { 889*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @int_tan_4x 890*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 891*ceb613a8SSimon Pilgrim; CHECK-NEXT: entry: 892*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 893*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 894*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT]]) 895*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 896*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 897*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_1]]) 898*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 899*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 900*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_2]]) 901*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 902*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 903*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_3]]) 904*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 905*ceb613a8SSimon Pilgrim; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 906*ceb613a8SSimon Pilgrim; 907*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @int_tan_4x 908*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 909*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: entry: 910*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 911*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 912*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT]]) 913*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 914*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 915*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_1]]) 916*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 917*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 918*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_2]]) 919*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 920*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 921*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_3]]) 922*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 923*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 924*ceb613a8SSimon Pilgrim; 925*ceb613a8SSimon Pilgrimentry: 926*ceb613a8SSimon Pilgrim %0 = load <4 x float>, ptr %a, align 16 927*ceb613a8SSimon Pilgrim %vecext = extractelement <4 x float> %0, i32 0 928*ceb613a8SSimon Pilgrim %1 = tail call fast float @llvm.tan.f32(float %vecext) 929*ceb613a8SSimon Pilgrim %vecins = insertelement <4 x float> undef, float %1, i32 0 930*ceb613a8SSimon Pilgrim %vecext.1 = extractelement <4 x float> %0, i32 1 931*ceb613a8SSimon Pilgrim %2 = tail call fast float @llvm.tan.f32(float %vecext.1) 932*ceb613a8SSimon Pilgrim %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 933*ceb613a8SSimon Pilgrim %vecext.2 = extractelement <4 x float> %0, i32 2 934*ceb613a8SSimon Pilgrim %3 = tail call fast float @llvm.tan.f32(float %vecext.2) 935*ceb613a8SSimon Pilgrim %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 936*ceb613a8SSimon Pilgrim %vecext.3 = extractelement <4 x float> %0, i32 3 937*ceb613a8SSimon Pilgrim %4 = tail call fast float @llvm.tan.f32(float %vecext.3) 938*ceb613a8SSimon Pilgrim %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 939*ceb613a8SSimon Pilgrim ret <4 x float> %vecins.3 940*ceb613a8SSimon Pilgrim} 941*ceb613a8SSimon Pilgrim 942*ceb613a8SSimon Pilgrimdeclare float @atanf(float) readonly nounwind willreturn 943*ceb613a8SSimon Pilgrim 944*ceb613a8SSimon Pilgrim; We can not vectorized atan tance RISCV has no such instruction. 945*ceb613a8SSimon Pilgrimdefine <4 x float> @atan_4x(ptr %a) { 946*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @atan_4x 947*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 948*ceb613a8SSimon Pilgrim; CHECK-NEXT: entry: 949*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 950*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 951*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]]) 952*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 953*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 954*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]]) 955*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 956*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 957*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]]) 958*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 959*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 960*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]]) 961*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 962*ceb613a8SSimon Pilgrim; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 963*ceb613a8SSimon Pilgrim; 964*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @atan_4x 965*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 966*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: entry: 967*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 968*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 969*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]]) 970*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 971*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 972*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]]) 973*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 974*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 975*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]]) 976*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 977*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 978*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]]) 979*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 980*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 981*ceb613a8SSimon Pilgrim; 982*ceb613a8SSimon Pilgrimentry: 983*ceb613a8SSimon Pilgrim %0 = load <4 x float>, ptr %a, align 16 984*ceb613a8SSimon Pilgrim %vecext = extractelement <4 x float> %0, i32 0 985*ceb613a8SSimon Pilgrim %1 = tail call fast float @atanf(float %vecext) 986*ceb613a8SSimon Pilgrim %vecins = insertelement <4 x float> undef, float %1, i32 0 987*ceb613a8SSimon Pilgrim %vecext.1 = extractelement <4 x float> %0, i32 1 988*ceb613a8SSimon Pilgrim %2 = tail call fast float @atanf(float %vecext.1) 989*ceb613a8SSimon Pilgrim %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 990*ceb613a8SSimon Pilgrim %vecext.2 = extractelement <4 x float> %0, i32 2 991*ceb613a8SSimon Pilgrim %3 = tail call fast float @atanf(float %vecext.2) 992*ceb613a8SSimon Pilgrim %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 993*ceb613a8SSimon Pilgrim %vecext.3 = extractelement <4 x float> %0, i32 3 994*ceb613a8SSimon Pilgrim %4 = tail call fast float @atanf(float %vecext.3) 995*ceb613a8SSimon Pilgrim %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 996*ceb613a8SSimon Pilgrim ret <4 x float> %vecins.3 997*ceb613a8SSimon Pilgrim} 998*ceb613a8SSimon Pilgrim 999*ceb613a8SSimon Pilgrimdeclare float @llvm.atan.f32(float) 1000*ceb613a8SSimon Pilgrim 1001*ceb613a8SSimon Pilgrim; We can not vectorized atan tance RISCV has no such instruction. 1002*ceb613a8SSimon Pilgrimdefine <4 x float> @int_atan_4x(ptr %a) { 1003*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @int_atan_4x 1004*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1005*ceb613a8SSimon Pilgrim; CHECK-NEXT: entry: 1006*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1007*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1008*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]]) 1009*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1010*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1011*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]]) 1012*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1013*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1014*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]]) 1015*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1016*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1017*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]]) 1018*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1019*ceb613a8SSimon Pilgrim; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1020*ceb613a8SSimon Pilgrim; 1021*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @int_atan_4x 1022*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1023*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: entry: 1024*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1025*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1026*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]]) 1027*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1028*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1029*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]]) 1030*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1031*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1032*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]]) 1033*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1034*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1035*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]]) 1036*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1037*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1038*ceb613a8SSimon Pilgrim; 1039*ceb613a8SSimon Pilgrimentry: 1040*ceb613a8SSimon Pilgrim %0 = load <4 x float>, ptr %a, align 16 1041*ceb613a8SSimon Pilgrim %vecext = extractelement <4 x float> %0, i32 0 1042*ceb613a8SSimon Pilgrim %1 = tail call fast float @llvm.atan.f32(float %vecext) 1043*ceb613a8SSimon Pilgrim %vecins = insertelement <4 x float> undef, float %1, i32 0 1044*ceb613a8SSimon Pilgrim %vecext.1 = extractelement <4 x float> %0, i32 1 1045*ceb613a8SSimon Pilgrim %2 = tail call fast float @llvm.atan.f32(float %vecext.1) 1046*ceb613a8SSimon Pilgrim %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1047*ceb613a8SSimon Pilgrim %vecext.2 = extractelement <4 x float> %0, i32 2 1048*ceb613a8SSimon Pilgrim %3 = tail call fast float @llvm.atan.f32(float %vecext.2) 1049*ceb613a8SSimon Pilgrim %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1050*ceb613a8SSimon Pilgrim %vecext.3 = extractelement <4 x float> %0, i32 3 1051*ceb613a8SSimon Pilgrim %4 = tail call fast float @llvm.atan.f32(float %vecext.3) 1052*ceb613a8SSimon Pilgrim %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1053*ceb613a8SSimon Pilgrim ret <4 x float> %vecins.3 1054*ceb613a8SSimon Pilgrim} 1055*ceb613a8SSimon Pilgrim 1056*ceb613a8SSimon Pilgrimdeclare float @sinhf(float) readonly nounwind willreturn 1057*ceb613a8SSimon Pilgrim 1058*ceb613a8SSimon Pilgrim; We can not vectorized sinh since RISCV has no such instruction. 1059*ceb613a8SSimon Pilgrimdefine <4 x float> @sinh_4x(ptr %a) { 1060*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @sinh_4x 1061*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1062*ceb613a8SSimon Pilgrim; CHECK-NEXT: entry: 1063*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1064*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1065*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]]) 1066*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1067*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1068*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]]) 1069*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1070*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1071*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]]) 1072*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1073*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1074*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]]) 1075*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1076*ceb613a8SSimon Pilgrim; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1077*ceb613a8SSimon Pilgrim; 1078*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @sinh_4x 1079*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1080*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: entry: 1081*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1082*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1083*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]]) 1084*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1085*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1086*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]]) 1087*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1088*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1089*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]]) 1090*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1091*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1092*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]]) 1093*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1094*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1095*ceb613a8SSimon Pilgrim; 1096*ceb613a8SSimon Pilgrimentry: 1097*ceb613a8SSimon Pilgrim %0 = load <4 x float>, ptr %a, align 16 1098*ceb613a8SSimon Pilgrim %vecext = extractelement <4 x float> %0, i32 0 1099*ceb613a8SSimon Pilgrim %1 = tail call fast float @sinhf(float %vecext) 1100*ceb613a8SSimon Pilgrim %vecins = insertelement <4 x float> undef, float %1, i32 0 1101*ceb613a8SSimon Pilgrim %vecext.1 = extractelement <4 x float> %0, i32 1 1102*ceb613a8SSimon Pilgrim %2 = tail call fast float @sinhf(float %vecext.1) 1103*ceb613a8SSimon Pilgrim %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1104*ceb613a8SSimon Pilgrim %vecext.2 = extractelement <4 x float> %0, i32 2 1105*ceb613a8SSimon Pilgrim %3 = tail call fast float @sinhf(float %vecext.2) 1106*ceb613a8SSimon Pilgrim %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1107*ceb613a8SSimon Pilgrim %vecext.3 = extractelement <4 x float> %0, i32 3 1108*ceb613a8SSimon Pilgrim %4 = tail call fast float @sinhf(float %vecext.3) 1109*ceb613a8SSimon Pilgrim %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1110*ceb613a8SSimon Pilgrim ret <4 x float> %vecins.3 1111*ceb613a8SSimon Pilgrim} 1112*ceb613a8SSimon Pilgrim 1113*ceb613a8SSimon Pilgrimdeclare float @llvm.sinh.f32(float) 1114*ceb613a8SSimon Pilgrim 1115*ceb613a8SSimon Pilgrim; We can not vectorized sinh since RISCV has no such instruction. 1116*ceb613a8SSimon Pilgrimdefine <4 x float> @int_sinh_4x(ptr %a) { 1117*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @int_sinh_4x 1118*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1119*ceb613a8SSimon Pilgrim; CHECK-NEXT: entry: 1120*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1121*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1122*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]]) 1123*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1124*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1125*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]]) 1126*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1127*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1128*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]]) 1129*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1130*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1131*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]]) 1132*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1133*ceb613a8SSimon Pilgrim; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1134*ceb613a8SSimon Pilgrim; 1135*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @int_sinh_4x 1136*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1137*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: entry: 1138*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1139*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1140*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]]) 1141*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1142*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1143*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]]) 1144*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1145*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1146*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]]) 1147*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1148*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1149*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]]) 1150*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1151*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1152*ceb613a8SSimon Pilgrim; 1153*ceb613a8SSimon Pilgrimentry: 1154*ceb613a8SSimon Pilgrim %0 = load <4 x float>, ptr %a, align 16 1155*ceb613a8SSimon Pilgrim %vecext = extractelement <4 x float> %0, i32 0 1156*ceb613a8SSimon Pilgrim %1 = tail call fast float @llvm.sinh.f32(float %vecext) 1157*ceb613a8SSimon Pilgrim %vecins = insertelement <4 x float> undef, float %1, i32 0 1158*ceb613a8SSimon Pilgrim %vecext.1 = extractelement <4 x float> %0, i32 1 1159*ceb613a8SSimon Pilgrim %2 = tail call fast float @llvm.sinh.f32(float %vecext.1) 1160*ceb613a8SSimon Pilgrim %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1161*ceb613a8SSimon Pilgrim %vecext.2 = extractelement <4 x float> %0, i32 2 1162*ceb613a8SSimon Pilgrim %3 = tail call fast float @llvm.sinh.f32(float %vecext.2) 1163*ceb613a8SSimon Pilgrim %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1164*ceb613a8SSimon Pilgrim %vecext.3 = extractelement <4 x float> %0, i32 3 1165*ceb613a8SSimon Pilgrim %4 = tail call fast float @llvm.sinh.f32(float %vecext.3) 1166*ceb613a8SSimon Pilgrim %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1167*ceb613a8SSimon Pilgrim ret <4 x float> %vecins.3 1168*ceb613a8SSimon Pilgrim} 1169*ceb613a8SSimon Pilgrim 1170*ceb613a8SSimon Pilgrimdeclare float @asinhf(float) readonly nounwind willreturn 1171*ceb613a8SSimon Pilgrim 1172*ceb613a8SSimon Pilgrim; We can not vectorized asinh since RISCV has no such instruction. 1173*ceb613a8SSimon Pilgrimdefine <4 x float> @asinh_4x(ptr %a) { 1174*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @asinh_4x 1175*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1176*ceb613a8SSimon Pilgrim; CHECK-NEXT: entry: 1177*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1178*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1179*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]]) 1180*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1181*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1182*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]]) 1183*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1184*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1185*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]]) 1186*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1187*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1188*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]]) 1189*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1190*ceb613a8SSimon Pilgrim; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1191*ceb613a8SSimon Pilgrim; 1192*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @asinh_4x 1193*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1194*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: entry: 1195*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1196*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1197*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]]) 1198*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1199*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1200*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]]) 1201*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1202*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1203*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]]) 1204*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1205*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1206*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]]) 1207*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1208*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1209*ceb613a8SSimon Pilgrim; 1210*ceb613a8SSimon Pilgrimentry: 1211*ceb613a8SSimon Pilgrim %0 = load <4 x float>, ptr %a, align 16 1212*ceb613a8SSimon Pilgrim %vecext = extractelement <4 x float> %0, i32 0 1213*ceb613a8SSimon Pilgrim %1 = tail call fast float @asinhf(float %vecext) 1214*ceb613a8SSimon Pilgrim %vecins = insertelement <4 x float> undef, float %1, i32 0 1215*ceb613a8SSimon Pilgrim %vecext.1 = extractelement <4 x float> %0, i32 1 1216*ceb613a8SSimon Pilgrim %2 = tail call fast float @asinhf(float %vecext.1) 1217*ceb613a8SSimon Pilgrim %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1218*ceb613a8SSimon Pilgrim %vecext.2 = extractelement <4 x float> %0, i32 2 1219*ceb613a8SSimon Pilgrim %3 = tail call fast float @asinhf(float %vecext.2) 1220*ceb613a8SSimon Pilgrim %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1221*ceb613a8SSimon Pilgrim %vecext.3 = extractelement <4 x float> %0, i32 3 1222*ceb613a8SSimon Pilgrim %4 = tail call fast float @asinhf(float %vecext.3) 1223*ceb613a8SSimon Pilgrim %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1224*ceb613a8SSimon Pilgrim ret <4 x float> %vecins.3 1225*ceb613a8SSimon Pilgrim} 1226*ceb613a8SSimon Pilgrim 1227*ceb613a8SSimon Pilgrimdeclare float @llvm.asinh.f32(float) 1228*ceb613a8SSimon Pilgrim 1229*ceb613a8SSimon Pilgrim; We can not vectorized asinh since RISCV has no such instruction. 1230*ceb613a8SSimon Pilgrimdefine <4 x float> @int_asinh_4x(ptr %a) { 1231*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @int_asinh_4x 1232*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1233*ceb613a8SSimon Pilgrim; CHECK-NEXT: entry: 1234*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1235*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1236*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT]]) 1237*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1238*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1239*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_1]]) 1240*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1241*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1242*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_2]]) 1243*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1244*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1245*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_3]]) 1246*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1247*ceb613a8SSimon Pilgrim; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1248*ceb613a8SSimon Pilgrim; 1249*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @int_asinh_4x 1250*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1251*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: entry: 1252*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1253*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1254*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT]]) 1255*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1256*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1257*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_1]]) 1258*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1259*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1260*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_2]]) 1261*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1262*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1263*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_3]]) 1264*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1265*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1266*ceb613a8SSimon Pilgrim; 1267*ceb613a8SSimon Pilgrimentry: 1268*ceb613a8SSimon Pilgrim %0 = load <4 x float>, ptr %a, align 16 1269*ceb613a8SSimon Pilgrim %vecext = extractelement <4 x float> %0, i32 0 1270*ceb613a8SSimon Pilgrim %1 = tail call fast float @llvm.asinh.f32(float %vecext) 1271*ceb613a8SSimon Pilgrim %vecins = insertelement <4 x float> undef, float %1, i32 0 1272*ceb613a8SSimon Pilgrim %vecext.1 = extractelement <4 x float> %0, i32 1 1273*ceb613a8SSimon Pilgrim %2 = tail call fast float @llvm.asinh.f32(float %vecext.1) 1274*ceb613a8SSimon Pilgrim %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1275*ceb613a8SSimon Pilgrim %vecext.2 = extractelement <4 x float> %0, i32 2 1276*ceb613a8SSimon Pilgrim %3 = tail call fast float @llvm.asinh.f32(float %vecext.2) 1277*ceb613a8SSimon Pilgrim %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1278*ceb613a8SSimon Pilgrim %vecext.3 = extractelement <4 x float> %0, i32 3 1279*ceb613a8SSimon Pilgrim %4 = tail call fast float @llvm.asinh.f32(float %vecext.3) 1280*ceb613a8SSimon Pilgrim %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1281*ceb613a8SSimon Pilgrim ret <4 x float> %vecins.3 1282*ceb613a8SSimon Pilgrim} 1283*ceb613a8SSimon Pilgrim 1284013235a2SBen Shideclare float @coshf(float) readonly nounwind willreturn 1285013235a2SBen Shi 1286013235a2SBen Shi; We can not vectorized cosh since RISCV has no such instruction. 1287013235a2SBen Shidefine <4 x float> @cosh_4x(ptr %a) { 1288013235a2SBen Shi; CHECK-LABEL: define <4 x float> @cosh_4x 1289013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1290013235a2SBen Shi; CHECK-NEXT: entry: 1291013235a2SBen Shi; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1292013235a2SBen Shi; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1293013235a2SBen Shi; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]]) 1294013235a2SBen Shi; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1295013235a2SBen Shi; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1296013235a2SBen Shi; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]]) 1297013235a2SBen Shi; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1298013235a2SBen Shi; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1299013235a2SBen Shi; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]]) 1300013235a2SBen Shi; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1301013235a2SBen Shi; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1302013235a2SBen Shi; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]]) 1303013235a2SBen Shi; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1304013235a2SBen Shi; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1305013235a2SBen Shi; 1306013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @cosh_4x 1307013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1308013235a2SBen Shi; DEFAULT-NEXT: entry: 1309013235a2SBen Shi; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1310013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1311013235a2SBen Shi; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]]) 1312013235a2SBen Shi; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1313013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1314013235a2SBen Shi; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]]) 1315013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1316013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1317013235a2SBen Shi; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]]) 1318013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1319013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1320013235a2SBen Shi; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]]) 1321013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1322013235a2SBen Shi; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1323013235a2SBen Shi; 1324013235a2SBen Shientry: 1325013235a2SBen Shi %0 = load <4 x float>, ptr %a, align 16 1326013235a2SBen Shi %vecext = extractelement <4 x float> %0, i32 0 1327013235a2SBen Shi %1 = tail call fast float @coshf(float %vecext) 1328013235a2SBen Shi %vecins = insertelement <4 x float> undef, float %1, i32 0 1329013235a2SBen Shi %vecext.1 = extractelement <4 x float> %0, i32 1 1330013235a2SBen Shi %2 = tail call fast float @coshf(float %vecext.1) 1331013235a2SBen Shi %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1332013235a2SBen Shi %vecext.2 = extractelement <4 x float> %0, i32 2 1333013235a2SBen Shi %3 = tail call fast float @coshf(float %vecext.2) 1334013235a2SBen Shi %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1335013235a2SBen Shi %vecext.3 = extractelement <4 x float> %0, i32 3 1336013235a2SBen Shi %4 = tail call fast float @coshf(float %vecext.3) 1337013235a2SBen Shi %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1338013235a2SBen Shi ret <4 x float> %vecins.3 1339013235a2SBen Shi} 1340013235a2SBen Shi 1341013235a2SBen Shideclare float @llvm.cosh.f32(float) 1342013235a2SBen Shi 1343013235a2SBen Shi; We can not vectorized cosh since RISCV has no such instruction. 1344013235a2SBen Shidefine <4 x float> @int_cosh_4x(ptr %a) { 1345013235a2SBen Shi; CHECK-LABEL: define <4 x float> @int_cosh_4x 1346013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1347013235a2SBen Shi; CHECK-NEXT: entry: 1348013235a2SBen Shi; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1349013235a2SBen Shi; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1350013235a2SBen Shi; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]]) 1351013235a2SBen Shi; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1352013235a2SBen Shi; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1353013235a2SBen Shi; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]]) 1354013235a2SBen Shi; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1355013235a2SBen Shi; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1356013235a2SBen Shi; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]]) 1357013235a2SBen Shi; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1358013235a2SBen Shi; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1359013235a2SBen Shi; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]]) 1360013235a2SBen Shi; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1361013235a2SBen Shi; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1362013235a2SBen Shi; 1363013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @int_cosh_4x 1364013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1365013235a2SBen Shi; DEFAULT-NEXT: entry: 1366013235a2SBen Shi; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1367013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1368013235a2SBen Shi; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]]) 1369013235a2SBen Shi; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1370013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1371013235a2SBen Shi; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]]) 1372013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1373013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1374013235a2SBen Shi; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]]) 1375013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1376013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1377013235a2SBen Shi; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]]) 1378013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1379013235a2SBen Shi; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1380013235a2SBen Shi; 1381013235a2SBen Shientry: 1382013235a2SBen Shi %0 = load <4 x float>, ptr %a, align 16 1383013235a2SBen Shi %vecext = extractelement <4 x float> %0, i32 0 1384013235a2SBen Shi %1 = tail call fast float @llvm.cosh.f32(float %vecext) 1385013235a2SBen Shi %vecins = insertelement <4 x float> undef, float %1, i32 0 1386013235a2SBen Shi %vecext.1 = extractelement <4 x float> %0, i32 1 1387013235a2SBen Shi %2 = tail call fast float @llvm.cosh.f32(float %vecext.1) 1388013235a2SBen Shi %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1389013235a2SBen Shi %vecext.2 = extractelement <4 x float> %0, i32 2 1390013235a2SBen Shi %3 = tail call fast float @llvm.cosh.f32(float %vecext.2) 1391013235a2SBen Shi %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1392013235a2SBen Shi %vecext.3 = extractelement <4 x float> %0, i32 3 1393013235a2SBen Shi %4 = tail call fast float @llvm.cosh.f32(float %vecext.3) 1394013235a2SBen Shi %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1395013235a2SBen Shi ret <4 x float> %vecins.3 1396013235a2SBen Shi} 1397013235a2SBen Shi 1398*ceb613a8SSimon Pilgrimdeclare float @acoshf(float) readonly nounwind willreturn 1399*ceb613a8SSimon Pilgrim 1400*ceb613a8SSimon Pilgrim; We can not vectorized acosh since RISCV has no such instruction. 1401*ceb613a8SSimon Pilgrimdefine <4 x float> @acosh_4x(ptr %a) { 1402*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @acosh_4x 1403*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1404*ceb613a8SSimon Pilgrim; CHECK-NEXT: entry: 1405*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1406*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1407*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]]) 1408*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1409*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1410*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]]) 1411*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1412*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1413*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]]) 1414*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1415*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1416*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]]) 1417*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1418*ceb613a8SSimon Pilgrim; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1419*ceb613a8SSimon Pilgrim; 1420*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @acosh_4x 1421*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1422*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: entry: 1423*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1424*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1425*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]]) 1426*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1427*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1428*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]]) 1429*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1430*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1431*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]]) 1432*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1433*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1434*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]]) 1435*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1436*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1437*ceb613a8SSimon Pilgrim; 1438*ceb613a8SSimon Pilgrimentry: 1439*ceb613a8SSimon Pilgrim %0 = load <4 x float>, ptr %a, align 16 1440*ceb613a8SSimon Pilgrim %vecext = extractelement <4 x float> %0, i32 0 1441*ceb613a8SSimon Pilgrim %1 = tail call fast float @acoshf(float %vecext) 1442*ceb613a8SSimon Pilgrim %vecins = insertelement <4 x float> undef, float %1, i32 0 1443*ceb613a8SSimon Pilgrim %vecext.1 = extractelement <4 x float> %0, i32 1 1444*ceb613a8SSimon Pilgrim %2 = tail call fast float @acoshf(float %vecext.1) 1445*ceb613a8SSimon Pilgrim %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1446*ceb613a8SSimon Pilgrim %vecext.2 = extractelement <4 x float> %0, i32 2 1447*ceb613a8SSimon Pilgrim %3 = tail call fast float @acoshf(float %vecext.2) 1448*ceb613a8SSimon Pilgrim %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1449*ceb613a8SSimon Pilgrim %vecext.3 = extractelement <4 x float> %0, i32 3 1450*ceb613a8SSimon Pilgrim %4 = tail call fast float @acoshf(float %vecext.3) 1451*ceb613a8SSimon Pilgrim %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1452*ceb613a8SSimon Pilgrim ret <4 x float> %vecins.3 1453*ceb613a8SSimon Pilgrim} 1454*ceb613a8SSimon Pilgrim 1455*ceb613a8SSimon Pilgrimdeclare float @llvm.acosh.f32(float) 1456*ceb613a8SSimon Pilgrim 1457*ceb613a8SSimon Pilgrim; We can not vectorized acosh since RISCV has no such instruction. 1458*ceb613a8SSimon Pilgrimdefine <4 x float> @int_acosh_4x(ptr %a) { 1459*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @int_acosh_4x 1460*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1461*ceb613a8SSimon Pilgrim; CHECK-NEXT: entry: 1462*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1463*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1464*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT]]) 1465*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1466*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1467*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_1]]) 1468*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1469*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1470*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_2]]) 1471*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1472*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1473*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_3]]) 1474*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1475*ceb613a8SSimon Pilgrim; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1476*ceb613a8SSimon Pilgrim; 1477*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @int_acosh_4x 1478*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1479*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: entry: 1480*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1481*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1482*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT]]) 1483*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1484*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1485*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_1]]) 1486*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1487*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1488*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_2]]) 1489*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1490*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1491*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_3]]) 1492*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1493*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1494*ceb613a8SSimon Pilgrim; 1495*ceb613a8SSimon Pilgrimentry: 1496*ceb613a8SSimon Pilgrim %0 = load <4 x float>, ptr %a, align 16 1497*ceb613a8SSimon Pilgrim %vecext = extractelement <4 x float> %0, i32 0 1498*ceb613a8SSimon Pilgrim %1 = tail call fast float @llvm.acosh.f32(float %vecext) 1499*ceb613a8SSimon Pilgrim %vecins = insertelement <4 x float> undef, float %1, i32 0 1500*ceb613a8SSimon Pilgrim %vecext.1 = extractelement <4 x float> %0, i32 1 1501*ceb613a8SSimon Pilgrim %2 = tail call fast float @llvm.acosh.f32(float %vecext.1) 1502*ceb613a8SSimon Pilgrim %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1503*ceb613a8SSimon Pilgrim %vecext.2 = extractelement <4 x float> %0, i32 2 1504*ceb613a8SSimon Pilgrim %3 = tail call fast float @llvm.acosh.f32(float %vecext.2) 1505*ceb613a8SSimon Pilgrim %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1506*ceb613a8SSimon Pilgrim %vecext.3 = extractelement <4 x float> %0, i32 3 1507*ceb613a8SSimon Pilgrim %4 = tail call fast float @llvm.acosh.f32(float %vecext.3) 1508*ceb613a8SSimon Pilgrim %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1509*ceb613a8SSimon Pilgrim ret <4 x float> %vecins.3 1510*ceb613a8SSimon Pilgrim} 1511*ceb613a8SSimon Pilgrim 1512*ceb613a8SSimon Pilgrimdeclare float @tanhf(float) readonly nounwind willreturn 1513*ceb613a8SSimon Pilgrim 1514*ceb613a8SSimon Pilgrim; We can not vectorized tanh since RISCV has no such instruction. 1515*ceb613a8SSimon Pilgrimdefine <4 x float> @tanh_4x(ptr %a) { 1516*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @tanh_4x 1517*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1518*ceb613a8SSimon Pilgrim; CHECK-NEXT: entry: 1519*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1520*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1521*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]]) 1522*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1523*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1524*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]]) 1525*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1526*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1527*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]]) 1528*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1529*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1530*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]]) 1531*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1532*ceb613a8SSimon Pilgrim; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1533*ceb613a8SSimon Pilgrim; 1534*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @tanh_4x 1535*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1536*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: entry: 1537*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1538*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1539*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]]) 1540*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1541*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1542*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]]) 1543*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1544*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1545*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]]) 1546*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1547*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1548*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]]) 1549*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1550*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1551*ceb613a8SSimon Pilgrim; 1552*ceb613a8SSimon Pilgrimentry: 1553*ceb613a8SSimon Pilgrim %0 = load <4 x float>, ptr %a, align 16 1554*ceb613a8SSimon Pilgrim %vecext = extractelement <4 x float> %0, i32 0 1555*ceb613a8SSimon Pilgrim %1 = tail call fast float @tanhf(float %vecext) 1556*ceb613a8SSimon Pilgrim %vecins = insertelement <4 x float> undef, float %1, i32 0 1557*ceb613a8SSimon Pilgrim %vecext.1 = extractelement <4 x float> %0, i32 1 1558*ceb613a8SSimon Pilgrim %2 = tail call fast float @tanhf(float %vecext.1) 1559*ceb613a8SSimon Pilgrim %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1560*ceb613a8SSimon Pilgrim %vecext.2 = extractelement <4 x float> %0, i32 2 1561*ceb613a8SSimon Pilgrim %3 = tail call fast float @tanhf(float %vecext.2) 1562*ceb613a8SSimon Pilgrim %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1563*ceb613a8SSimon Pilgrim %vecext.3 = extractelement <4 x float> %0, i32 3 1564*ceb613a8SSimon Pilgrim %4 = tail call fast float @tanhf(float %vecext.3) 1565*ceb613a8SSimon Pilgrim %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1566*ceb613a8SSimon Pilgrim ret <4 x float> %vecins.3 1567*ceb613a8SSimon Pilgrim} 1568*ceb613a8SSimon Pilgrim 1569*ceb613a8SSimon Pilgrimdeclare float @llvm.tanh.f32(float) 1570*ceb613a8SSimon Pilgrim 1571*ceb613a8SSimon Pilgrim; We can not vectorized tanh since RISCV has no such instruction. 1572*ceb613a8SSimon Pilgrimdefine <4 x float> @int_tanh_4x(ptr %a) { 1573*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @int_tanh_4x 1574*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1575*ceb613a8SSimon Pilgrim; CHECK-NEXT: entry: 1576*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1577*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1578*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]]) 1579*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1580*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1581*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]]) 1582*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1583*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1584*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]]) 1585*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1586*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1587*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]]) 1588*ceb613a8SSimon Pilgrim; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1589*ceb613a8SSimon Pilgrim; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1590*ceb613a8SSimon Pilgrim; 1591*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @int_tanh_4x 1592*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1593*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: entry: 1594*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1595*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1596*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]]) 1597*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1598*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1599*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]]) 1600*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1601*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1602*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]]) 1603*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1604*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1605*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]]) 1606*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1607*ceb613a8SSimon Pilgrim; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1608*ceb613a8SSimon Pilgrim; 1609*ceb613a8SSimon Pilgrimentry: 1610*ceb613a8SSimon Pilgrim %0 = load <4 x float>, ptr %a, align 16 1611*ceb613a8SSimon Pilgrim %vecext = extractelement <4 x float> %0, i32 0 1612*ceb613a8SSimon Pilgrim %1 = tail call fast float @llvm.tanh.f32(float %vecext) 1613*ceb613a8SSimon Pilgrim %vecins = insertelement <4 x float> undef, float %1, i32 0 1614*ceb613a8SSimon Pilgrim %vecext.1 = extractelement <4 x float> %0, i32 1 1615*ceb613a8SSimon Pilgrim %2 = tail call fast float @llvm.tanh.f32(float %vecext.1) 1616*ceb613a8SSimon Pilgrim %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1617*ceb613a8SSimon Pilgrim %vecext.2 = extractelement <4 x float> %0, i32 2 1618*ceb613a8SSimon Pilgrim %3 = tail call fast float @llvm.tanh.f32(float %vecext.2) 1619*ceb613a8SSimon Pilgrim %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1620*ceb613a8SSimon Pilgrim %vecext.3 = extractelement <4 x float> %0, i32 3 1621*ceb613a8SSimon Pilgrim %4 = tail call fast float @llvm.tanh.f32(float %vecext.3) 1622*ceb613a8SSimon Pilgrim %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1623*ceb613a8SSimon Pilgrim ret <4 x float> %vecins.3 1624*ceb613a8SSimon Pilgrim} 1625*ceb613a8SSimon Pilgrim 1626013235a2SBen Shideclare float @atanhf(float) readonly nounwind willreturn 1627013235a2SBen Shi 1628013235a2SBen Shi; We can not vectorized atanh since RISCV has no such instruction. 1629013235a2SBen Shidefine <4 x float> @atanh_4x(ptr %a) { 1630013235a2SBen Shi; CHECK-LABEL: define <4 x float> @atanh_4x 1631013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1632013235a2SBen Shi; CHECK-NEXT: entry: 1633013235a2SBen Shi; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1634013235a2SBen Shi; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1635013235a2SBen Shi; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]]) 1636013235a2SBen Shi; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1637013235a2SBen Shi; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1638013235a2SBen Shi; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]]) 1639013235a2SBen Shi; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1640013235a2SBen Shi; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1641013235a2SBen Shi; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]]) 1642013235a2SBen Shi; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1643013235a2SBen Shi; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1644013235a2SBen Shi; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]]) 1645013235a2SBen Shi; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1646013235a2SBen Shi; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1647013235a2SBen Shi; 1648013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @atanh_4x 1649013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1650013235a2SBen Shi; DEFAULT-NEXT: entry: 1651013235a2SBen Shi; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1652013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1653013235a2SBen Shi; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]]) 1654013235a2SBen Shi; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1655013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1656013235a2SBen Shi; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]]) 1657013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1658013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1659013235a2SBen Shi; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]]) 1660013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1661013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1662013235a2SBen Shi; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]]) 1663013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1664013235a2SBen Shi; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1665013235a2SBen Shi; 1666013235a2SBen Shientry: 1667013235a2SBen Shi %0 = load <4 x float>, ptr %a, align 16 1668013235a2SBen Shi %vecext = extractelement <4 x float> %0, i32 0 1669013235a2SBen Shi %1 = tail call fast float @atanhf(float %vecext) 1670013235a2SBen Shi %vecins = insertelement <4 x float> undef, float %1, i32 0 1671013235a2SBen Shi %vecext.1 = extractelement <4 x float> %0, i32 1 1672013235a2SBen Shi %2 = tail call fast float @atanhf(float %vecext.1) 1673013235a2SBen Shi %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1674013235a2SBen Shi %vecext.2 = extractelement <4 x float> %0, i32 2 1675013235a2SBen Shi %3 = tail call fast float @atanhf(float %vecext.2) 1676013235a2SBen Shi %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1677013235a2SBen Shi %vecext.3 = extractelement <4 x float> %0, i32 3 1678013235a2SBen Shi %4 = tail call fast float @atanhf(float %vecext.3) 1679013235a2SBen Shi %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1680013235a2SBen Shi ret <4 x float> %vecins.3 1681013235a2SBen Shi} 1682013235a2SBen Shi 1683013235a2SBen Shideclare float @llvm.atanh.f32(float) 1684013235a2SBen Shi 1685013235a2SBen Shi; We can not vectorized atanh since RISCV has no such instruction. 1686013235a2SBen Shidefine <4 x float> @int_atanh_4x(ptr %a) { 1687013235a2SBen Shi; CHECK-LABEL: define <4 x float> @int_atanh_4x 1688013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1689013235a2SBen Shi; CHECK-NEXT: entry: 1690013235a2SBen Shi; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1691013235a2SBen Shi; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1692013235a2SBen Shi; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]]) 1693013235a2SBen Shi; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1694013235a2SBen Shi; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1695013235a2SBen Shi; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]]) 1696013235a2SBen Shi; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1697013235a2SBen Shi; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1698013235a2SBen Shi; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]]) 1699013235a2SBen Shi; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1700013235a2SBen Shi; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1701013235a2SBen Shi; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]]) 1702013235a2SBen Shi; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1703013235a2SBen Shi; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 1704013235a2SBen Shi; 1705013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @int_atanh_4x 1706013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { 1707013235a2SBen Shi; DEFAULT-NEXT: entry: 1708013235a2SBen Shi; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 1709013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1710013235a2SBen Shi; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]]) 1711013235a2SBen Shi; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1712013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1713013235a2SBen Shi; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]]) 1714013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1715013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1716013235a2SBen Shi; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]]) 1717013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 1718013235a2SBen Shi; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1719013235a2SBen Shi; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]]) 1720013235a2SBen Shi; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 1721013235a2SBen Shi; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] 1722013235a2SBen Shi; 1723013235a2SBen Shientry: 1724013235a2SBen Shi %0 = load <4 x float>, ptr %a, align 16 1725013235a2SBen Shi %vecext = extractelement <4 x float> %0, i32 0 1726013235a2SBen Shi %1 = tail call fast float @llvm.atanh.f32(float %vecext) 1727013235a2SBen Shi %vecins = insertelement <4 x float> undef, float %1, i32 0 1728013235a2SBen Shi %vecext.1 = extractelement <4 x float> %0, i32 1 1729013235a2SBen Shi %2 = tail call fast float @llvm.atanh.f32(float %vecext.1) 1730013235a2SBen Shi %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1731013235a2SBen Shi %vecext.2 = extractelement <4 x float> %0, i32 2 1732013235a2SBen Shi %3 = tail call fast float @llvm.atanh.f32(float %vecext.2) 1733013235a2SBen Shi %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1734013235a2SBen Shi %vecext.3 = extractelement <4 x float> %0, i32 3 1735013235a2SBen Shi %4 = tail call fast float @llvm.atanh.f32(float %vecext.3) 1736013235a2SBen Shi %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1737013235a2SBen Shi ret <4 x float> %vecins.3 1738013235a2SBen Shi} 1739