1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfh,+f,+d -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s 4; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s 6 7define <2 x float> @vfwadd_v2f16(ptr %x, ptr %y) { 8; CHECK-LABEL: vfwadd_v2f16: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 11; CHECK-NEXT: vle16.v v9, (a0) 12; CHECK-NEXT: vle16.v v10, (a1) 13; CHECK-NEXT: vfwadd.vv v8, v9, v10 14; CHECK-NEXT: ret 15 %a = load <2 x half>, ptr %x 16 %b = load <2 x half>, ptr %y 17 %c = fpext <2 x half> %a to <2 x float> 18 %d = fpext <2 x half> %b to <2 x float> 19 %e = fadd <2 x float> %c, %d 20 ret <2 x float> %e 21} 22 23define <4 x float> @vfwadd_v4f16(ptr %x, ptr %y) { 24; CHECK-LABEL: vfwadd_v4f16: 25; CHECK: # %bb.0: 26; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 27; CHECK-NEXT: vle16.v v9, (a0) 28; CHECK-NEXT: vle16.v v10, (a1) 29; CHECK-NEXT: vfwadd.vv v8, v9, v10 30; CHECK-NEXT: ret 31 %a = load <4 x half>, ptr %x 32 %b = load <4 x half>, ptr %y 33 %c = fpext <4 x half> %a to <4 x float> 34 %d = fpext <4 x half> %b to <4 x float> 35 %e = fadd <4 x float> %c, %d 36 ret <4 x float> %e 37} 38 39define <8 x float> @vfwadd_v8f16(ptr %x, ptr %y) { 40; CHECK-LABEL: vfwadd_v8f16: 41; CHECK: # %bb.0: 42; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 43; CHECK-NEXT: vle16.v v10, (a0) 44; CHECK-NEXT: vle16.v v11, (a1) 45; CHECK-NEXT: vfwadd.vv v8, v10, v11 46; CHECK-NEXT: ret 47 %a = load <8 x half>, ptr %x 48 %b = load <8 x half>, ptr %y 49 %c = fpext <8 x half> %a to <8 x float> 50 %d = fpext <8 x half> %b to <8 x float> 51 %e = fadd <8 x float> %c, %d 52 ret <8 x float> %e 53} 54 55define <16 x float> @vfwadd_v16f16(ptr %x, ptr %y) { 56; CHECK-LABEL: vfwadd_v16f16: 57; CHECK: # %bb.0: 58; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 59; CHECK-NEXT: vle16.v v12, (a0) 60; CHECK-NEXT: vle16.v v14, (a1) 61; CHECK-NEXT: vfwadd.vv v8, v12, v14 62; CHECK-NEXT: ret 63 %a = load <16 x half>, ptr %x 64 %b = load <16 x half>, ptr %y 65 %c = fpext <16 x half> %a to <16 x float> 66 %d = fpext <16 x half> %b to <16 x float> 67 %e = fadd <16 x float> %c, %d 68 ret <16 x float> %e 69} 70 71define <32 x float> @vfwadd_v32f16(ptr %x, ptr %y) { 72; CHECK-LABEL: vfwadd_v32f16: 73; CHECK: # %bb.0: 74; CHECK-NEXT: li a2, 32 75; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 76; CHECK-NEXT: vle16.v v16, (a0) 77; CHECK-NEXT: vle16.v v20, (a1) 78; CHECK-NEXT: vfwadd.vv v8, v16, v20 79; CHECK-NEXT: ret 80 %a = load <32 x half>, ptr %x 81 %b = load <32 x half>, ptr %y 82 %c = fpext <32 x half> %a to <32 x float> 83 %d = fpext <32 x half> %b to <32 x float> 84 %e = fadd <32 x float> %c, %d 85 ret <32 x float> %e 86} 87 88define <64 x float> @vfwadd_v64f16(ptr %x, ptr %y) { 89; CHECK-LABEL: vfwadd_v64f16: 90; CHECK: # %bb.0: 91; CHECK-NEXT: addi sp, sp, -16 92; CHECK-NEXT: .cfi_def_cfa_offset 16 93; CHECK-NEXT: csrr a2, vlenb 94; CHECK-NEXT: slli a2, a2, 4 95; CHECK-NEXT: sub sp, sp, a2 96; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 97; CHECK-NEXT: li a2, 64 98; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma 99; CHECK-NEXT: vle16.v v8, (a0) 100; CHECK-NEXT: addi a0, sp, 16 101; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 102; CHECK-NEXT: vle16.v v0, (a1) 103; CHECK-NEXT: li a0, 32 104; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 105; CHECK-NEXT: vslidedown.vx v16, v8, a0 106; CHECK-NEXT: vslidedown.vx v8, v0, a0 107; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 108; CHECK-NEXT: vfwadd.vv v24, v16, v8 109; CHECK-NEXT: csrr a0, vlenb 110; CHECK-NEXT: slli a0, a0, 3 111; CHECK-NEXT: add a0, sp, a0 112; CHECK-NEXT: addi a0, a0, 16 113; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 114; CHECK-NEXT: addi a0, sp, 16 115; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 116; CHECK-NEXT: vfwadd.vv v8, v16, v0 117; CHECK-NEXT: csrr a0, vlenb 118; CHECK-NEXT: slli a0, a0, 3 119; CHECK-NEXT: add a0, sp, a0 120; CHECK-NEXT: addi a0, a0, 16 121; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 122; CHECK-NEXT: csrr a0, vlenb 123; CHECK-NEXT: slli a0, a0, 4 124; CHECK-NEXT: add sp, sp, a0 125; CHECK-NEXT: .cfi_def_cfa sp, 16 126; CHECK-NEXT: addi sp, sp, 16 127; CHECK-NEXT: .cfi_def_cfa_offset 0 128; CHECK-NEXT: ret 129 %a = load <64 x half>, ptr %x 130 %b = load <64 x half>, ptr %y 131 %c = fpext <64 x half> %a to <64 x float> 132 %d = fpext <64 x half> %b to <64 x float> 133 %e = fadd <64 x float> %c, %d 134 ret <64 x float> %e 135} 136 137define <2 x double> @vfwadd_v2f32(ptr %x, ptr %y) { 138; CHECK-LABEL: vfwadd_v2f32: 139; CHECK: # %bb.0: 140; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 141; CHECK-NEXT: vle32.v v9, (a0) 142; CHECK-NEXT: vle32.v v10, (a1) 143; CHECK-NEXT: vfwadd.vv v8, v9, v10 144; CHECK-NEXT: ret 145 %a = load <2 x float>, ptr %x 146 %b = load <2 x float>, ptr %y 147 %c = fpext <2 x float> %a to <2 x double> 148 %d = fpext <2 x float> %b to <2 x double> 149 %e = fadd <2 x double> %c, %d 150 ret <2 x double> %e 151} 152 153define <4 x double> @vfwadd_v4f32(ptr %x, ptr %y) { 154; CHECK-LABEL: vfwadd_v4f32: 155; CHECK: # %bb.0: 156; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 157; CHECK-NEXT: vle32.v v10, (a0) 158; CHECK-NEXT: vle32.v v11, (a1) 159; CHECK-NEXT: vfwadd.vv v8, v10, v11 160; CHECK-NEXT: ret 161 %a = load <4 x float>, ptr %x 162 %b = load <4 x float>, ptr %y 163 %c = fpext <4 x float> %a to <4 x double> 164 %d = fpext <4 x float> %b to <4 x double> 165 %e = fadd <4 x double> %c, %d 166 ret <4 x double> %e 167} 168 169define <8 x double> @vfwadd_v8f32(ptr %x, ptr %y) { 170; CHECK-LABEL: vfwadd_v8f32: 171; CHECK: # %bb.0: 172; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 173; CHECK-NEXT: vle32.v v12, (a0) 174; CHECK-NEXT: vle32.v v14, (a1) 175; CHECK-NEXT: vfwadd.vv v8, v12, v14 176; CHECK-NEXT: ret 177 %a = load <8 x float>, ptr %x 178 %b = load <8 x float>, ptr %y 179 %c = fpext <8 x float> %a to <8 x double> 180 %d = fpext <8 x float> %b to <8 x double> 181 %e = fadd <8 x double> %c, %d 182 ret <8 x double> %e 183} 184 185define <16 x double> @vfwadd_v16f32(ptr %x, ptr %y) { 186; CHECK-LABEL: vfwadd_v16f32: 187; CHECK: # %bb.0: 188; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 189; CHECK-NEXT: vle32.v v16, (a0) 190; CHECK-NEXT: vle32.v v20, (a1) 191; CHECK-NEXT: vfwadd.vv v8, v16, v20 192; CHECK-NEXT: ret 193 %a = load <16 x float>, ptr %x 194 %b = load <16 x float>, ptr %y 195 %c = fpext <16 x float> %a to <16 x double> 196 %d = fpext <16 x float> %b to <16 x double> 197 %e = fadd <16 x double> %c, %d 198 ret <16 x double> %e 199} 200 201define <32 x double> @vfwadd_v32f32(ptr %x, ptr %y) { 202; CHECK-LABEL: vfwadd_v32f32: 203; CHECK: # %bb.0: 204; CHECK-NEXT: addi sp, sp, -16 205; CHECK-NEXT: .cfi_def_cfa_offset 16 206; CHECK-NEXT: csrr a2, vlenb 207; CHECK-NEXT: slli a2, a2, 4 208; CHECK-NEXT: sub sp, sp, a2 209; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 210; CHECK-NEXT: li a2, 32 211; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma 212; CHECK-NEXT: vle32.v v8, (a0) 213; CHECK-NEXT: addi a0, sp, 16 214; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 215; CHECK-NEXT: vle32.v v0, (a1) 216; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma 217; CHECK-NEXT: vslidedown.vi v16, v8, 16 218; CHECK-NEXT: vslidedown.vi v8, v0, 16 219; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 220; CHECK-NEXT: vfwadd.vv v24, v16, v8 221; CHECK-NEXT: csrr a0, vlenb 222; CHECK-NEXT: slli a0, a0, 3 223; CHECK-NEXT: add a0, sp, a0 224; CHECK-NEXT: addi a0, a0, 16 225; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 226; CHECK-NEXT: addi a0, sp, 16 227; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 228; CHECK-NEXT: vfwadd.vv v8, v16, v0 229; CHECK-NEXT: csrr a0, vlenb 230; CHECK-NEXT: slli a0, a0, 3 231; CHECK-NEXT: add a0, sp, a0 232; CHECK-NEXT: addi a0, a0, 16 233; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 234; CHECK-NEXT: csrr a0, vlenb 235; CHECK-NEXT: slli a0, a0, 4 236; CHECK-NEXT: add sp, sp, a0 237; CHECK-NEXT: .cfi_def_cfa sp, 16 238; CHECK-NEXT: addi sp, sp, 16 239; CHECK-NEXT: .cfi_def_cfa_offset 0 240; CHECK-NEXT: ret 241 %a = load <32 x float>, ptr %x 242 %b = load <32 x float>, ptr %y 243 %c = fpext <32 x float> %a to <32 x double> 244 %d = fpext <32 x float> %b to <32 x double> 245 %e = fadd <32 x double> %c, %d 246 ret <32 x double> %e 247} 248 249define <2 x float> @vfwadd_vf_v2f16(ptr %x, half %y) { 250; CHECK-LABEL: vfwadd_vf_v2f16: 251; CHECK: # %bb.0: 252; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 253; CHECK-NEXT: vle16.v v9, (a0) 254; CHECK-NEXT: vfwadd.vf v8, v9, fa0 255; CHECK-NEXT: ret 256 %a = load <2 x half>, ptr %x 257 %b = insertelement <2 x half> poison, half %y, i32 0 258 %c = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> zeroinitializer 259 %d = fpext <2 x half> %a to <2 x float> 260 %e = fpext <2 x half> %c to <2 x float> 261 %f = fadd <2 x float> %d, %e 262 ret <2 x float> %f 263} 264 265define <4 x float> @vfwadd_vf_v4f16(ptr %x, half %y) { 266; CHECK-LABEL: vfwadd_vf_v4f16: 267; CHECK: # %bb.0: 268; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 269; CHECK-NEXT: vle16.v v9, (a0) 270; CHECK-NEXT: vfwadd.vf v8, v9, fa0 271; CHECK-NEXT: ret 272 %a = load <4 x half>, ptr %x 273 %b = insertelement <4 x half> poison, half %y, i32 0 274 %c = shufflevector <4 x half> %b, <4 x half> poison, <4 x i32> zeroinitializer 275 %d = fpext <4 x half> %a to <4 x float> 276 %e = fpext <4 x half> %c to <4 x float> 277 %f = fadd <4 x float> %d, %e 278 ret <4 x float> %f 279} 280 281define <8 x float> @vfwadd_vf_v8f16(ptr %x, half %y) { 282; CHECK-LABEL: vfwadd_vf_v8f16: 283; CHECK: # %bb.0: 284; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 285; CHECK-NEXT: vle16.v v10, (a0) 286; CHECK-NEXT: vfwadd.vf v8, v10, fa0 287; CHECK-NEXT: ret 288 %a = load <8 x half>, ptr %x 289 %b = insertelement <8 x half> poison, half %y, i32 0 290 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer 291 %d = fpext <8 x half> %a to <8 x float> 292 %e = fpext <8 x half> %c to <8 x float> 293 %f = fadd <8 x float> %d, %e 294 ret <8 x float> %f 295} 296 297define <16 x float> @vfwadd_vf_v16f16(ptr %x, half %y) { 298; CHECK-LABEL: vfwadd_vf_v16f16: 299; CHECK: # %bb.0: 300; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 301; CHECK-NEXT: vle16.v v12, (a0) 302; CHECK-NEXT: vfwadd.vf v8, v12, fa0 303; CHECK-NEXT: ret 304 %a = load <16 x half>, ptr %x 305 %b = insertelement <16 x half> poison, half %y, i32 0 306 %c = shufflevector <16 x half> %b, <16 x half> poison, <16 x i32> zeroinitializer 307 %d = fpext <16 x half> %a to <16 x float> 308 %e = fpext <16 x half> %c to <16 x float> 309 %f = fadd <16 x float> %d, %e 310 ret <16 x float> %f 311} 312 313define <32 x float> @vfwadd_vf_v32f16(ptr %x, half %y) { 314; CHECK-LABEL: vfwadd_vf_v32f16: 315; CHECK: # %bb.0: 316; CHECK-NEXT: li a1, 32 317; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma 318; CHECK-NEXT: vle16.v v16, (a0) 319; CHECK-NEXT: vfwadd.vf v8, v16, fa0 320; CHECK-NEXT: ret 321 %a = load <32 x half>, ptr %x 322 %b = insertelement <32 x half> poison, half %y, i32 0 323 %c = shufflevector <32 x half> %b, <32 x half> poison, <32 x i32> zeroinitializer 324 %d = fpext <32 x half> %a to <32 x float> 325 %e = fpext <32 x half> %c to <32 x float> 326 %f = fadd <32 x float> %d, %e 327 ret <32 x float> %f 328} 329 330define <2 x double> @vfwadd_vf_v2f32(ptr %x, float %y) { 331; CHECK-LABEL: vfwadd_vf_v2f32: 332; CHECK: # %bb.0: 333; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 334; CHECK-NEXT: vle32.v v9, (a0) 335; CHECK-NEXT: vfwadd.vf v8, v9, fa0 336; CHECK-NEXT: ret 337 %a = load <2 x float>, ptr %x 338 %b = insertelement <2 x float> poison, float %y, i32 0 339 %c = shufflevector <2 x float> %b, <2 x float> poison, <2 x i32> zeroinitializer 340 %d = fpext <2 x float> %a to <2 x double> 341 %e = fpext <2 x float> %c to <2 x double> 342 %f = fadd <2 x double> %d, %e 343 ret <2 x double> %f 344} 345 346define <4 x double> @vfwadd_vf_v4f32(ptr %x, float %y) { 347; CHECK-LABEL: vfwadd_vf_v4f32: 348; CHECK: # %bb.0: 349; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 350; CHECK-NEXT: vle32.v v10, (a0) 351; CHECK-NEXT: vfwadd.vf v8, v10, fa0 352; CHECK-NEXT: ret 353 %a = load <4 x float>, ptr %x 354 %b = insertelement <4 x float> poison, float %y, i32 0 355 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer 356 %d = fpext <4 x float> %a to <4 x double> 357 %e = fpext <4 x float> %c to <4 x double> 358 %f = fadd <4 x double> %d, %e 359 ret <4 x double> %f 360} 361 362define <8 x double> @vfwadd_vf_v8f32(ptr %x, float %y) { 363; CHECK-LABEL: vfwadd_vf_v8f32: 364; CHECK: # %bb.0: 365; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 366; CHECK-NEXT: vle32.v v12, (a0) 367; CHECK-NEXT: vfwadd.vf v8, v12, fa0 368; CHECK-NEXT: ret 369 %a = load <8 x float>, ptr %x 370 %b = insertelement <8 x float> poison, float %y, i32 0 371 %c = shufflevector <8 x float> %b, <8 x float> poison, <8 x i32> zeroinitializer 372 %d = fpext <8 x float> %a to <8 x double> 373 %e = fpext <8 x float> %c to <8 x double> 374 %f = fadd <8 x double> %d, %e 375 ret <8 x double> %f 376} 377 378define <16 x double> @vfwadd_vf_v16f32(ptr %x, float %y) { 379; CHECK-LABEL: vfwadd_vf_v16f32: 380; CHECK: # %bb.0: 381; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 382; CHECK-NEXT: vle32.v v16, (a0) 383; CHECK-NEXT: vfwadd.vf v8, v16, fa0 384; CHECK-NEXT: ret 385 %a = load <16 x float>, ptr %x 386 %b = insertelement <16 x float> poison, float %y, i32 0 387 %c = shufflevector <16 x float> %b, <16 x float> poison, <16 x i32> zeroinitializer 388 %d = fpext <16 x float> %a to <16 x double> 389 %e = fpext <16 x float> %c to <16 x double> 390 %f = fadd <16 x double> %d, %e 391 ret <16 x double> %f 392} 393 394define <32 x double> @vfwadd_vf_v32f32(ptr %x, float %y) { 395; CHECK-LABEL: vfwadd_vf_v32f32: 396; CHECK: # %bb.0: 397; CHECK-NEXT: li a1, 32 398; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 399; CHECK-NEXT: vle32.v v24, (a0) 400; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma 401; CHECK-NEXT: vslidedown.vi v8, v24, 16 402; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 403; CHECK-NEXT: vfwadd.vf v16, v8, fa0 404; CHECK-NEXT: vfwadd.vf v8, v24, fa0 405; CHECK-NEXT: ret 406 %a = load <32 x float>, ptr %x 407 %b = insertelement <32 x float> poison, float %y, i32 0 408 %c = shufflevector <32 x float> %b, <32 x float> poison, <32 x i32> zeroinitializer 409 %d = fpext <32 x float> %a to <32 x double> 410 %e = fpext <32 x float> %c to <32 x double> 411 %f = fadd <32 x double> %d, %e 412 ret <32 x double> %f 413} 414 415define <2 x float> @vfwadd_wv_v2f16(ptr %x, ptr %y) { 416; CHECK-LABEL: vfwadd_wv_v2f16: 417; CHECK: # %bb.0: 418; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 419; CHECK-NEXT: vle32.v v8, (a0) 420; CHECK-NEXT: vle16.v v9, (a1) 421; CHECK-NEXT: vfwadd.wv v8, v8, v9 422; CHECK-NEXT: ret 423 %a = load <2 x float>, ptr %x 424 %b = load <2 x half>, ptr %y 425 %c = fpext <2 x half> %b to <2 x float> 426 %d = fadd <2 x float> %c, %a 427 ret <2 x float> %d 428} 429 430define <4 x float> @vfwadd_wv_v4f16(ptr %x, ptr %y) { 431; CHECK-LABEL: vfwadd_wv_v4f16: 432; CHECK: # %bb.0: 433; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 434; CHECK-NEXT: vle32.v v8, (a0) 435; CHECK-NEXT: vle16.v v9, (a1) 436; CHECK-NEXT: vfwadd.wv v8, v8, v9 437; CHECK-NEXT: ret 438 %a = load <4 x float>, ptr %x 439 %b = load <4 x half>, ptr %y 440 %c = fpext <4 x half> %b to <4 x float> 441 %d = fadd <4 x float> %c, %a 442 ret <4 x float> %d 443} 444 445define <8 x float> @vfwadd_wv_v8f16(ptr %x, ptr %y) { 446; CHECK-LABEL: vfwadd_wv_v8f16: 447; CHECK: # %bb.0: 448; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 449; CHECK-NEXT: vle32.v v8, (a0) 450; CHECK-NEXT: vle16.v v10, (a1) 451; CHECK-NEXT: vfwadd.wv v8, v8, v10 452; CHECK-NEXT: ret 453 %a = load <8 x float>, ptr %x 454 %b = load <8 x half>, ptr %y 455 %c = fpext <8 x half> %b to <8 x float> 456 %d = fadd <8 x float> %c, %a 457 ret <8 x float> %d 458} 459 460define <16 x float> @vfwadd_wv_v16f16(ptr %x, ptr %y) { 461; CHECK-LABEL: vfwadd_wv_v16f16: 462; CHECK: # %bb.0: 463; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 464; CHECK-NEXT: vle32.v v8, (a0) 465; CHECK-NEXT: vle16.v v12, (a1) 466; CHECK-NEXT: vfwadd.wv v8, v8, v12 467; CHECK-NEXT: ret 468 %a = load <16 x float>, ptr %x 469 %b = load <16 x half>, ptr %y 470 %c = fpext <16 x half> %b to <16 x float> 471 %d = fadd <16 x float> %c, %a 472 ret <16 x float> %d 473} 474 475define <32 x float> @vfwadd_wv_v32f16(ptr %x, ptr %y) { 476; CHECK-LABEL: vfwadd_wv_v32f16: 477; CHECK: # %bb.0: 478; CHECK-NEXT: li a2, 32 479; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 480; CHECK-NEXT: vle32.v v8, (a0) 481; CHECK-NEXT: vle16.v v16, (a1) 482; CHECK-NEXT: vfwadd.wv v8, v8, v16 483; CHECK-NEXT: ret 484 %a = load <32 x float>, ptr %x 485 %b = load <32 x half>, ptr %y 486 %c = fpext <32 x half> %b to <32 x float> 487 %d = fadd <32 x float> %c, %a 488 ret <32 x float> %d 489} 490 491define <2 x double> @vfwadd_wv_v2f32(ptr %x, ptr %y) { 492; CHECK-LABEL: vfwadd_wv_v2f32: 493; CHECK: # %bb.0: 494; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 495; CHECK-NEXT: vle64.v v8, (a0) 496; CHECK-NEXT: vle32.v v9, (a1) 497; CHECK-NEXT: vfwadd.wv v8, v8, v9 498; CHECK-NEXT: ret 499 %a = load <2 x double>, ptr %x 500 %b = load <2 x float>, ptr %y 501 %c = fpext <2 x float> %b to <2 x double> 502 %d = fadd <2 x double> %c, %a 503 ret <2 x double> %d 504} 505 506define <4 x double> @vfwadd_wv_v4f32(ptr %x, ptr %y) { 507; CHECK-LABEL: vfwadd_wv_v4f32: 508; CHECK: # %bb.0: 509; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 510; CHECK-NEXT: vle64.v v8, (a0) 511; CHECK-NEXT: vle32.v v10, (a1) 512; CHECK-NEXT: vfwadd.wv v8, v8, v10 513; CHECK-NEXT: ret 514 %a = load <4 x double>, ptr %x 515 %b = load <4 x float>, ptr %y 516 %c = fpext <4 x float> %b to <4 x double> 517 %d = fadd <4 x double> %c, %a 518 ret <4 x double> %d 519} 520 521define <8 x double> @vfwadd_wv_v8f32(ptr %x, ptr %y) { 522; CHECK-LABEL: vfwadd_wv_v8f32: 523; CHECK: # %bb.0: 524; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 525; CHECK-NEXT: vle64.v v8, (a0) 526; CHECK-NEXT: vle32.v v12, (a1) 527; CHECK-NEXT: vfwadd.wv v8, v8, v12 528; CHECK-NEXT: ret 529 %a = load <8 x double>, ptr %x 530 %b = load <8 x float>, ptr %y 531 %c = fpext <8 x float> %b to <8 x double> 532 %d = fadd <8 x double> %c, %a 533 ret <8 x double> %d 534} 535 536define <16 x double> @vfwadd_wv_v16f32(ptr %x, ptr %y) { 537; CHECK-LABEL: vfwadd_wv_v16f32: 538; CHECK: # %bb.0: 539; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 540; CHECK-NEXT: vle64.v v8, (a0) 541; CHECK-NEXT: vle32.v v16, (a1) 542; CHECK-NEXT: vfwadd.wv v8, v8, v16 543; CHECK-NEXT: ret 544 %a = load <16 x double>, ptr %x 545 %b = load <16 x float>, ptr %y 546 %c = fpext <16 x float> %b to <16 x double> 547 %d = fadd <16 x double> %c, %a 548 ret <16 x double> %d 549} 550 551define <2 x float> @vfwadd_wf_v2f16(ptr %x, half %y) { 552; CHECK-LABEL: vfwadd_wf_v2f16: 553; CHECK: # %bb.0: 554; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 555; CHECK-NEXT: vle32.v v8, (a0) 556; CHECK-NEXT: vfwadd.wf v8, v8, fa0 557; CHECK-NEXT: ret 558 %a = load <2 x float>, ptr %x 559 %b = insertelement <2 x half> poison, half %y, i32 0 560 %c = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> zeroinitializer 561 %d = fpext <2 x half> %c to <2 x float> 562 %e = fadd <2 x float> %d, %a 563 ret <2 x float> %e 564} 565 566define <4 x float> @vfwadd_wf_v4f16(ptr %x, half %y) { 567; CHECK-LABEL: vfwadd_wf_v4f16: 568; CHECK: # %bb.0: 569; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 570; CHECK-NEXT: vle32.v v8, (a0) 571; CHECK-NEXT: vfwadd.wf v8, v8, fa0 572; CHECK-NEXT: ret 573 %a = load <4 x float>, ptr %x 574 %b = insertelement <4 x half> poison, half %y, i32 0 575 %c = shufflevector <4 x half> %b, <4 x half> poison, <4 x i32> zeroinitializer 576 %d = fpext <4 x half> %c to <4 x float> 577 %e = fadd <4 x float> %d, %a 578 ret <4 x float> %e 579} 580 581define <8 x float> @vfwadd_wf_v8f16(ptr %x, half %y) { 582; CHECK-LABEL: vfwadd_wf_v8f16: 583; CHECK: # %bb.0: 584; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 585; CHECK-NEXT: vle32.v v8, (a0) 586; CHECK-NEXT: vfwadd.wf v8, v8, fa0 587; CHECK-NEXT: ret 588 %a = load <8 x float>, ptr %x 589 %b = insertelement <8 x half> poison, half %y, i32 0 590 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer 591 %d = fpext <8 x half> %c to <8 x float> 592 %e = fadd <8 x float> %d, %a 593 ret <8 x float> %e 594} 595 596define <16 x float> @vfwadd_wf_v16f16(ptr %x, half %y) { 597; CHECK-LABEL: vfwadd_wf_v16f16: 598; CHECK: # %bb.0: 599; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 600; CHECK-NEXT: vle32.v v8, (a0) 601; CHECK-NEXT: vfwadd.wf v8, v8, fa0 602; CHECK-NEXT: ret 603 %a = load <16 x float>, ptr %x 604 %b = insertelement <16 x half> poison, half %y, i32 0 605 %c = shufflevector <16 x half> %b, <16 x half> poison, <16 x i32> zeroinitializer 606 %d = fpext <16 x half> %c to <16 x float> 607 %e = fadd <16 x float> %d, %a 608 ret <16 x float> %e 609} 610 611define <2 x double> @vfwadd_wf_v2f32(ptr %x, float %y) { 612; CHECK-LABEL: vfwadd_wf_v2f32: 613; CHECK: # %bb.0: 614; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 615; CHECK-NEXT: vle64.v v8, (a0) 616; CHECK-NEXT: vfwadd.wf v8, v8, fa0 617; CHECK-NEXT: ret 618 %a = load <2 x double>, ptr %x 619 %b = insertelement <2 x float> poison, float %y, i32 0 620 %c = shufflevector <2 x float> %b, <2 x float> poison, <2 x i32> zeroinitializer 621 %d = fpext <2 x float> %c to <2 x double> 622 %e = fadd <2 x double> %d, %a 623 ret <2 x double> %e 624} 625 626define <4 x double> @vfwadd_wf_v4f32(ptr %x, float %y) { 627; CHECK-LABEL: vfwadd_wf_v4f32: 628; CHECK: # %bb.0: 629; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 630; CHECK-NEXT: vle64.v v8, (a0) 631; CHECK-NEXT: vfwadd.wf v8, v8, fa0 632; CHECK-NEXT: ret 633 %a = load <4 x double>, ptr %x 634 %b = insertelement <4 x float> poison, float %y, i32 0 635 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer 636 %d = fpext <4 x float> %c to <4 x double> 637 %e = fadd <4 x double> %d, %a 638 ret <4 x double> %e 639} 640 641define <8 x double> @vfwadd_wf_v8f32(ptr %x, float %y) { 642; CHECK-LABEL: vfwadd_wf_v8f32: 643; CHECK: # %bb.0: 644; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 645; CHECK-NEXT: vle64.v v8, (a0) 646; CHECK-NEXT: vfwadd.wf v8, v8, fa0 647; CHECK-NEXT: ret 648 %a = load <8 x double>, ptr %x 649 %b = insertelement <8 x float> poison, float %y, i32 0 650 %c = shufflevector <8 x float> %b, <8 x float> poison, <8 x i32> zeroinitializer 651 %d = fpext <8 x float> %c to <8 x double> 652 %e = fadd <8 x double> %d, %a 653 ret <8 x double> %e 654} 655 656define <16 x double> @vfwadd_wf_v16f32(ptr %x, float %y) { 657; CHECK-LABEL: vfwadd_wf_v16f32: 658; CHECK: # %bb.0: 659; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 660; CHECK-NEXT: vle64.v v8, (a0) 661; CHECK-NEXT: vfwadd.wf v8, v8, fa0 662; CHECK-NEXT: ret 663 %a = load <16 x double>, ptr %x 664 %b = insertelement <16 x float> poison, float %y, i32 0 665 %c = shufflevector <16 x float> %b, <16 x float> poison, <16 x i32> zeroinitializer 666 %d = fpext <16 x float> %c to <16 x double> 667 %e = fadd <16 x double> %d, %a 668 ret <16 x double> %e 669} 670 671define <2 x float> @vfwadd_vf2_v2f32(<2 x half> %x, half %y) { 672; CHECK-LABEL: vfwadd_vf2_v2f32: 673; CHECK: # %bb.0: 674; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 675; CHECK-NEXT: vfwadd.vf v9, v8, fa0 676; CHECK-NEXT: vmv1r.v v8, v9 677; CHECK-NEXT: ret 678 %a = fpext <2 x half> %x to <2 x float> 679 %b = fpext half %y to float 680 %c = insertelement <2 x float> poison, float %b, i32 0 681 %d = shufflevector <2 x float> %c, <2 x float> poison, <2 x i32> zeroinitializer 682 %e = fadd <2 x float> %a, %d 683 ret <2 x float> %e 684} 685 686define <2 x float> @vfwadd_wf2_v2f32(<2 x float> %x, half %y) { 687; CHECK-LABEL: vfwadd_wf2_v2f32: 688; CHECK: # %bb.0: 689; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 690; CHECK-NEXT: vfwadd.wf v8, v8, fa0 691; CHECK-NEXT: ret 692 %b = fpext half %y to float 693 %c = insertelement <2 x float> poison, float %b, i32 0 694 %d = shufflevector <2 x float> %c, <2 x float> poison, <2 x i32> zeroinitializer 695 %e = fadd <2 x float> %x, %d 696 ret <2 x float> %e 697} 698