1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfh,+f,+d -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s 4; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s 6 7define <2 x float> @vfwmul_v2f16(ptr %x, ptr %y) { 8; CHECK-LABEL: vfwmul_v2f16: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 11; CHECK-NEXT: vle16.v v9, (a0) 12; CHECK-NEXT: vle16.v v10, (a1) 13; CHECK-NEXT: vfwmul.vv v8, v9, v10 14; CHECK-NEXT: ret 15 %a = load <2 x half>, ptr %x 16 %b = load <2 x half>, ptr %y 17 %c = fpext <2 x half> %a to <2 x float> 18 %d = fpext <2 x half> %b to <2 x float> 19 %e = fmul <2 x float> %c, %d 20 ret <2 x float> %e 21} 22 23define <4 x float> @vfwmul_v4f16(ptr %x, ptr %y) { 24; CHECK-LABEL: vfwmul_v4f16: 25; CHECK: # %bb.0: 26; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 27; CHECK-NEXT: vle16.v v9, (a0) 28; CHECK-NEXT: vle16.v v10, (a1) 29; CHECK-NEXT: vfwmul.vv v8, v9, v10 30; CHECK-NEXT: ret 31 %a = load <4 x half>, ptr %x 32 %b = load <4 x half>, ptr %y 33 %c = fpext <4 x half> %a to <4 x float> 34 %d = fpext <4 x half> %b to <4 x float> 35 %e = fmul <4 x float> %c, %d 36 ret <4 x float> %e 37} 38 39define <8 x float> @vfwmul_v8f16(ptr %x, ptr %y) { 40; CHECK-LABEL: vfwmul_v8f16: 41; CHECK: # %bb.0: 42; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 43; CHECK-NEXT: vle16.v v10, (a0) 44; CHECK-NEXT: vle16.v v11, (a1) 45; CHECK-NEXT: vfwmul.vv v8, v10, v11 46; CHECK-NEXT: ret 47 %a = load <8 x half>, ptr %x 48 %b = load <8 x half>, ptr %y 49 %c = fpext <8 x half> %a to <8 x float> 50 %d = fpext <8 x half> %b to <8 x float> 51 %e = fmul <8 x float> %c, %d 52 ret <8 x float> %e 53} 54 55define <16 x float> @vfwmul_v16f16(ptr %x, ptr %y) { 56; CHECK-LABEL: vfwmul_v16f16: 57; CHECK: # %bb.0: 58; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 59; CHECK-NEXT: vle16.v v12, (a0) 60; CHECK-NEXT: vle16.v v14, (a1) 61; CHECK-NEXT: vfwmul.vv v8, v12, v14 62; CHECK-NEXT: ret 63 %a = load <16 x half>, ptr %x 64 %b = load <16 x half>, ptr %y 65 %c = fpext <16 x half> %a to <16 x float> 66 %d = fpext <16 x half> %b to <16 x float> 67 %e = fmul <16 x float> %c, %d 68 ret <16 x float> %e 69} 70 71define <32 x float> @vfwmul_v32f16(ptr %x, ptr %y) { 72; CHECK-LABEL: vfwmul_v32f16: 73; CHECK: # %bb.0: 74; CHECK-NEXT: li a2, 32 75; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 76; CHECK-NEXT: vle16.v v16, (a0) 77; CHECK-NEXT: vle16.v v20, (a1) 78; CHECK-NEXT: vfwmul.vv v8, v16, v20 79; CHECK-NEXT: ret 80 %a = load <32 x half>, ptr %x 81 %b = load <32 x half>, ptr %y 82 %c = fpext <32 x half> %a to <32 x float> 83 %d = fpext <32 x half> %b to <32 x float> 84 %e = fmul <32 x float> %c, %d 85 ret <32 x float> %e 86} 87 88define <64 x float> @vfwmul_v64f16(ptr %x, ptr %y) { 89; CHECK-LABEL: vfwmul_v64f16: 90; CHECK: # %bb.0: 91; CHECK-NEXT: addi sp, sp, -16 92; CHECK-NEXT: .cfi_def_cfa_offset 16 93; CHECK-NEXT: csrr a2, vlenb 94; CHECK-NEXT: slli a2, a2, 4 95; CHECK-NEXT: sub sp, sp, a2 96; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 97; CHECK-NEXT: li a2, 64 98; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma 99; CHECK-NEXT: vle16.v v8, (a0) 100; CHECK-NEXT: addi a0, sp, 16 101; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 102; CHECK-NEXT: vle16.v v0, (a1) 103; CHECK-NEXT: li a0, 32 104; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 105; CHECK-NEXT: vslidedown.vx v16, v8, a0 106; CHECK-NEXT: vslidedown.vx v8, v0, a0 107; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 108; CHECK-NEXT: vfwmul.vv v24, v16, v8 109; CHECK-NEXT: csrr a0, vlenb 110; CHECK-NEXT: slli a0, a0, 3 111; CHECK-NEXT: add a0, sp, a0 112; CHECK-NEXT: addi a0, a0, 16 113; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 114; CHECK-NEXT: addi a0, sp, 16 115; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 116; CHECK-NEXT: vfwmul.vv v8, v16, v0 117; CHECK-NEXT: csrr a0, vlenb 118; CHECK-NEXT: slli a0, a0, 3 119; CHECK-NEXT: add a0, sp, a0 120; CHECK-NEXT: addi a0, a0, 16 121; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 122; CHECK-NEXT: csrr a0, vlenb 123; CHECK-NEXT: slli a0, a0, 4 124; CHECK-NEXT: add sp, sp, a0 125; CHECK-NEXT: .cfi_def_cfa sp, 16 126; CHECK-NEXT: addi sp, sp, 16 127; CHECK-NEXT: .cfi_def_cfa_offset 0 128; CHECK-NEXT: ret 129 %a = load <64 x half>, ptr %x 130 %b = load <64 x half>, ptr %y 131 %c = fpext <64 x half> %a to <64 x float> 132 %d = fpext <64 x half> %b to <64 x float> 133 %e = fmul <64 x float> %c, %d 134 ret <64 x float> %e 135} 136 137define <2 x double> @vfwmul_v2f32(ptr %x, ptr %y) { 138; CHECK-LABEL: vfwmul_v2f32: 139; CHECK: # %bb.0: 140; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 141; CHECK-NEXT: vle32.v v9, (a0) 142; CHECK-NEXT: vle32.v v10, (a1) 143; CHECK-NEXT: vfwmul.vv v8, v9, v10 144; CHECK-NEXT: ret 145 %a = load <2 x float>, ptr %x 146 %b = load <2 x float>, ptr %y 147 %c = fpext <2 x float> %a to <2 x double> 148 %d = fpext <2 x float> %b to <2 x double> 149 %e = fmul <2 x double> %c, %d 150 ret <2 x double> %e 151} 152 153define <4 x double> @vfwmul_v4f32(ptr %x, ptr %y) { 154; CHECK-LABEL: vfwmul_v4f32: 155; CHECK: # %bb.0: 156; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 157; CHECK-NEXT: vle32.v v10, (a0) 158; CHECK-NEXT: vle32.v v11, (a1) 159; CHECK-NEXT: vfwmul.vv v8, v10, v11 160; CHECK-NEXT: ret 161 %a = load <4 x float>, ptr %x 162 %b = load <4 x float>, ptr %y 163 %c = fpext <4 x float> %a to <4 x double> 164 %d = fpext <4 x float> %b to <4 x double> 165 %e = fmul <4 x double> %c, %d 166 ret <4 x double> %e 167} 168 169define <8 x double> @vfwmul_v8f32(ptr %x, ptr %y) { 170; CHECK-LABEL: vfwmul_v8f32: 171; CHECK: # %bb.0: 172; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 173; CHECK-NEXT: vle32.v v12, (a0) 174; CHECK-NEXT: vle32.v v14, (a1) 175; CHECK-NEXT: vfwmul.vv v8, v12, v14 176; CHECK-NEXT: ret 177 %a = load <8 x float>, ptr %x 178 %b = load <8 x float>, ptr %y 179 %c = fpext <8 x float> %a to <8 x double> 180 %d = fpext <8 x float> %b to <8 x double> 181 %e = fmul <8 x double> %c, %d 182 ret <8 x double> %e 183} 184 185define <16 x double> @vfwmul_v16f32(ptr %x, ptr %y) { 186; CHECK-LABEL: vfwmul_v16f32: 187; CHECK: # %bb.0: 188; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 189; CHECK-NEXT: vle32.v v16, (a0) 190; CHECK-NEXT: vle32.v v20, (a1) 191; CHECK-NEXT: vfwmul.vv v8, v16, v20 192; CHECK-NEXT: ret 193 %a = load <16 x float>, ptr %x 194 %b = load <16 x float>, ptr %y 195 %c = fpext <16 x float> %a to <16 x double> 196 %d = fpext <16 x float> %b to <16 x double> 197 %e = fmul <16 x double> %c, %d 198 ret <16 x double> %e 199} 200 201define <32 x double> @vfwmul_v32f32(ptr %x, ptr %y) { 202; CHECK-LABEL: vfwmul_v32f32: 203; CHECK: # %bb.0: 204; CHECK-NEXT: addi sp, sp, -16 205; CHECK-NEXT: .cfi_def_cfa_offset 16 206; CHECK-NEXT: csrr a2, vlenb 207; CHECK-NEXT: slli a2, a2, 4 208; CHECK-NEXT: sub sp, sp, a2 209; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 210; CHECK-NEXT: li a2, 32 211; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma 212; CHECK-NEXT: vle32.v v8, (a0) 213; CHECK-NEXT: addi a0, sp, 16 214; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 215; CHECK-NEXT: vle32.v v0, (a1) 216; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma 217; CHECK-NEXT: vslidedown.vi v16, v8, 16 218; CHECK-NEXT: vslidedown.vi v8, v0, 16 219; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 220; CHECK-NEXT: vfwmul.vv v24, v16, v8 221; CHECK-NEXT: csrr a0, vlenb 222; CHECK-NEXT: slli a0, a0, 3 223; CHECK-NEXT: add a0, sp, a0 224; CHECK-NEXT: addi a0, a0, 16 225; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 226; CHECK-NEXT: addi a0, sp, 16 227; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 228; CHECK-NEXT: vfwmul.vv v8, v16, v0 229; CHECK-NEXT: csrr a0, vlenb 230; CHECK-NEXT: slli a0, a0, 3 231; CHECK-NEXT: add a0, sp, a0 232; CHECK-NEXT: addi a0, a0, 16 233; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 234; CHECK-NEXT: csrr a0, vlenb 235; CHECK-NEXT: slli a0, a0, 4 236; CHECK-NEXT: add sp, sp, a0 237; CHECK-NEXT: .cfi_def_cfa sp, 16 238; CHECK-NEXT: addi sp, sp, 16 239; CHECK-NEXT: .cfi_def_cfa_offset 0 240; CHECK-NEXT: ret 241 %a = load <32 x float>, ptr %x 242 %b = load <32 x float>, ptr %y 243 %c = fpext <32 x float> %a to <32 x double> 244 %d = fpext <32 x float> %b to <32 x double> 245 %e = fmul <32 x double> %c, %d 246 ret <32 x double> %e 247} 248 249define <2 x float> @vfwmul_vf_v2f16(ptr %x, half %y) { 250; CHECK-LABEL: vfwmul_vf_v2f16: 251; CHECK: # %bb.0: 252; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 253; CHECK-NEXT: vle16.v v9, (a0) 254; CHECK-NEXT: vfwmul.vf v8, v9, fa0 255; CHECK-NEXT: ret 256 %a = load <2 x half>, ptr %x 257 %b = insertelement <2 x half> poison, half %y, i32 0 258 %c = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> zeroinitializer 259 %d = fpext <2 x half> %a to <2 x float> 260 %e = fpext <2 x half> %c to <2 x float> 261 %f = fmul <2 x float> %d, %e 262 ret <2 x float> %f 263} 264 265define <4 x float> @vfwmul_vf_v4f16(ptr %x, half %y) { 266; CHECK-LABEL: vfwmul_vf_v4f16: 267; CHECK: # %bb.0: 268; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 269; CHECK-NEXT: vle16.v v9, (a0) 270; CHECK-NEXT: vfwmul.vf v8, v9, fa0 271; CHECK-NEXT: ret 272 %a = load <4 x half>, ptr %x 273 %b = insertelement <4 x half> poison, half %y, i32 0 274 %c = shufflevector <4 x half> %b, <4 x half> poison, <4 x i32> zeroinitializer 275 %d = fpext <4 x half> %a to <4 x float> 276 %e = fpext <4 x half> %c to <4 x float> 277 %f = fmul <4 x float> %d, %e 278 ret <4 x float> %f 279} 280 281define <8 x float> @vfwmul_vf_v8f16(ptr %x, half %y) { 282; CHECK-LABEL: vfwmul_vf_v8f16: 283; CHECK: # %bb.0: 284; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 285; CHECK-NEXT: vle16.v v10, (a0) 286; CHECK-NEXT: vfwmul.vf v8, v10, fa0 287; CHECK-NEXT: ret 288 %a = load <8 x half>, ptr %x 289 %b = insertelement <8 x half> poison, half %y, i32 0 290 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer 291 %d = fpext <8 x half> %a to <8 x float> 292 %e = fpext <8 x half> %c to <8 x float> 293 %f = fmul <8 x float> %d, %e 294 ret <8 x float> %f 295} 296 297define <16 x float> @vfwmul_vf_v16f16(ptr %x, half %y) { 298; CHECK-LABEL: vfwmul_vf_v16f16: 299; CHECK: # %bb.0: 300; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 301; CHECK-NEXT: vle16.v v12, (a0) 302; CHECK-NEXT: vfwmul.vf v8, v12, fa0 303; CHECK-NEXT: ret 304 %a = load <16 x half>, ptr %x 305 %b = insertelement <16 x half> poison, half %y, i32 0 306 %c = shufflevector <16 x half> %b, <16 x half> poison, <16 x i32> zeroinitializer 307 %d = fpext <16 x half> %a to <16 x float> 308 %e = fpext <16 x half> %c to <16 x float> 309 %f = fmul <16 x float> %d, %e 310 ret <16 x float> %f 311} 312 313define <32 x float> @vfwmul_vf_v32f16(ptr %x, half %y) { 314; CHECK-LABEL: vfwmul_vf_v32f16: 315; CHECK: # %bb.0: 316; CHECK-NEXT: li a1, 32 317; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma 318; CHECK-NEXT: vle16.v v16, (a0) 319; CHECK-NEXT: vfwmul.vf v8, v16, fa0 320; CHECK-NEXT: ret 321 %a = load <32 x half>, ptr %x 322 %b = insertelement <32 x half> poison, half %y, i32 0 323 %c = shufflevector <32 x half> %b, <32 x half> poison, <32 x i32> zeroinitializer 324 %d = fpext <32 x half> %a to <32 x float> 325 %e = fpext <32 x half> %c to <32 x float> 326 %f = fmul <32 x float> %d, %e 327 ret <32 x float> %f 328} 329 330define <2 x double> @vfwmul_vf_v2f32(ptr %x, float %y) { 331; CHECK-LABEL: vfwmul_vf_v2f32: 332; CHECK: # %bb.0: 333; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 334; CHECK-NEXT: vle32.v v9, (a0) 335; CHECK-NEXT: vfwmul.vf v8, v9, fa0 336; CHECK-NEXT: ret 337 %a = load <2 x float>, ptr %x 338 %b = insertelement <2 x float> poison, float %y, i32 0 339 %c = shufflevector <2 x float> %b, <2 x float> poison, <2 x i32> zeroinitializer 340 %d = fpext <2 x float> %a to <2 x double> 341 %e = fpext <2 x float> %c to <2 x double> 342 %f = fmul <2 x double> %d, %e 343 ret <2 x double> %f 344} 345 346define <4 x double> @vfwmul_vf_v4f32(ptr %x, float %y) { 347; CHECK-LABEL: vfwmul_vf_v4f32: 348; CHECK: # %bb.0: 349; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 350; CHECK-NEXT: vle32.v v10, (a0) 351; CHECK-NEXT: vfwmul.vf v8, v10, fa0 352; CHECK-NEXT: ret 353 %a = load <4 x float>, ptr %x 354 %b = insertelement <4 x float> poison, float %y, i32 0 355 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer 356 %d = fpext <4 x float> %a to <4 x double> 357 %e = fpext <4 x float> %c to <4 x double> 358 %f = fmul <4 x double> %d, %e 359 ret <4 x double> %f 360} 361 362define <8 x double> @vfwmul_vf_v8f32(ptr %x, float %y) { 363; CHECK-LABEL: vfwmul_vf_v8f32: 364; CHECK: # %bb.0: 365; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 366; CHECK-NEXT: vle32.v v12, (a0) 367; CHECK-NEXT: vfwmul.vf v8, v12, fa0 368; CHECK-NEXT: ret 369 %a = load <8 x float>, ptr %x 370 %b = insertelement <8 x float> poison, float %y, i32 0 371 %c = shufflevector <8 x float> %b, <8 x float> poison, <8 x i32> zeroinitializer 372 %d = fpext <8 x float> %a to <8 x double> 373 %e = fpext <8 x float> %c to <8 x double> 374 %f = fmul <8 x double> %d, %e 375 ret <8 x double> %f 376} 377 378define <16 x double> @vfwmul_vf_v16f32(ptr %x, float %y) { 379; CHECK-LABEL: vfwmul_vf_v16f32: 380; CHECK: # %bb.0: 381; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 382; CHECK-NEXT: vle32.v v16, (a0) 383; CHECK-NEXT: vfwmul.vf v8, v16, fa0 384; CHECK-NEXT: ret 385 %a = load <16 x float>, ptr %x 386 %b = insertelement <16 x float> poison, float %y, i32 0 387 %c = shufflevector <16 x float> %b, <16 x float> poison, <16 x i32> zeroinitializer 388 %d = fpext <16 x float> %a to <16 x double> 389 %e = fpext <16 x float> %c to <16 x double> 390 %f = fmul <16 x double> %d, %e 391 ret <16 x double> %f 392} 393 394define <32 x double> @vfwmul_vf_v32f32(ptr %x, float %y) { 395; CHECK-LABEL: vfwmul_vf_v32f32: 396; CHECK: # %bb.0: 397; CHECK-NEXT: li a1, 32 398; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 399; CHECK-NEXT: vle32.v v24, (a0) 400; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma 401; CHECK-NEXT: vslidedown.vi v8, v24, 16 402; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 403; CHECK-NEXT: vfwmul.vf v16, v8, fa0 404; CHECK-NEXT: vfwmul.vf v8, v24, fa0 405; CHECK-NEXT: ret 406 %a = load <32 x float>, ptr %x 407 %b = insertelement <32 x float> poison, float %y, i32 0 408 %c = shufflevector <32 x float> %b, <32 x float> poison, <32 x i32> zeroinitializer 409 %d = fpext <32 x float> %a to <32 x double> 410 %e = fpext <32 x float> %c to <32 x double> 411 %f = fmul <32 x double> %d, %e 412 ret <32 x double> %f 413} 414 415define <2 x float> @vfwmul_squared_v2f16_v2f32(ptr %x) { 416; CHECK-LABEL: vfwmul_squared_v2f16_v2f32: 417; CHECK: # %bb.0: 418; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 419; CHECK-NEXT: vle16.v v9, (a0) 420; CHECK-NEXT: vfwmul.vv v8, v9, v9 421; CHECK-NEXT: ret 422 %a = load <2 x half>, ptr %x 423 %b = fpext <2 x half> %a to <2 x float> 424 %c = fmul <2 x float> %b, %b 425 ret <2 x float> %c 426} 427 428define <2 x double> @vfwmul_squared_v2f32_v2f64(ptr %x) { 429; CHECK-LABEL: vfwmul_squared_v2f32_v2f64: 430; CHECK: # %bb.0: 431; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 432; CHECK-NEXT: vle32.v v9, (a0) 433; CHECK-NEXT: vfwmul.vv v8, v9, v9 434; CHECK-NEXT: ret 435 %a = load <2 x float>, ptr %x 436 %b = fpext <2 x float> %a to <2 x double> 437 %c = fmul <2 x double> %b, %b 438 ret <2 x double> %c 439} 440 441define <2 x double> @vfwmul_squared_v2f16_v2f64(ptr %x) { 442; CHECK-LABEL: vfwmul_squared_v2f16_v2f64: 443; CHECK: # %bb.0: 444; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 445; CHECK-NEXT: vle16.v v8, (a0) 446; CHECK-NEXT: vfwcvt.f.f.v v9, v8 447; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 448; CHECK-NEXT: vfwmul.vv v8, v9, v9 449; CHECK-NEXT: ret 450 %a = load <2 x half>, ptr %x 451 %b = fpext <2 x half> %a to <2 x double> 452 %c = fmul <2 x double> %b, %b 453 ret <2 x double> %c 454} 455 456define <2 x float> @vfwmul_vf2_v2f32(<2 x half> %x, half %y) { 457; CHECK-LABEL: vfwmul_vf2_v2f32: 458; CHECK: # %bb.0: 459; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 460; CHECK-NEXT: vfwmul.vf v9, v8, fa0 461; CHECK-NEXT: vmv1r.v v8, v9 462; CHECK-NEXT: ret 463 %a = fpext <2 x half> %x to <2 x float> 464 %b = fpext half %y to float 465 %c = insertelement <2 x float> poison, float %b, i32 0 466 %d = shufflevector <2 x float> %c, <2 x float> poison, <2 x i32> zeroinitializer 467 %e = fmul <2 x float> %a, %d 468 ret <2 x float> %e 469} 470