1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s 3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s 4 5define <2 x i16> @vwmulu_v2i16(ptr %x, ptr %y) { 6; CHECK-LABEL: vwmulu_v2i16: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 9; CHECK-NEXT: vle8.v v9, (a0) 10; CHECK-NEXT: vle8.v v10, (a1) 11; CHECK-NEXT: vwmulu.vv v8, v9, v10 12; CHECK-NEXT: ret 13 %a = load <2 x i8>, ptr %x 14 %b = load <2 x i8>, ptr %y 15 %c = zext <2 x i8> %a to <2 x i16> 16 %d = zext <2 x i8> %b to <2 x i16> 17 %e = mul <2 x i16> %c, %d 18 ret <2 x i16> %e 19} 20 21define <4 x i16> @vwmulu_v4i16(ptr %x, ptr %y) { 22; CHECK-LABEL: vwmulu_v4i16: 23; CHECK: # %bb.0: 24; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 25; CHECK-NEXT: vle8.v v9, (a0) 26; CHECK-NEXT: vle8.v v10, (a1) 27; CHECK-NEXT: vwmulu.vv v8, v9, v10 28; CHECK-NEXT: ret 29 %a = load <4 x i8>, ptr %x 30 %b = load <4 x i8>, ptr %y 31 %c = zext <4 x i8> %a to <4 x i16> 32 %d = zext <4 x i8> %b to <4 x i16> 33 %e = mul <4 x i16> %c, %d 34 ret <4 x i16> %e 35} 36 37define <2 x i32> @vwmulu_v2i32(ptr %x, ptr %y) { 38; CHECK-LABEL: vwmulu_v2i32: 39; CHECK: # %bb.0: 40; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 41; CHECK-NEXT: vle16.v v9, (a0) 42; CHECK-NEXT: vle16.v v10, (a1) 43; CHECK-NEXT: vwmulu.vv v8, v9, v10 44; CHECK-NEXT: ret 45 %a = load <2 x i16>, ptr %x 46 %b = load <2 x i16>, ptr %y 47 %c = zext <2 x i16> %a to <2 x i32> 48 %d = zext <2 x i16> %b to <2 x i32> 49 %e = mul <2 x i32> %c, %d 50 ret <2 x i32> %e 51} 52 53define <8 x i16> @vwmulu_v8i16(ptr %x, ptr %y) { 54; CHECK-LABEL: vwmulu_v8i16: 55; CHECK: # %bb.0: 56; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 57; CHECK-NEXT: vle8.v v9, (a0) 58; CHECK-NEXT: vle8.v v10, (a1) 59; CHECK-NEXT: vwmulu.vv v8, v9, v10 60; CHECK-NEXT: ret 61 %a = load <8 x i8>, ptr %x 62 %b = load <8 x i8>, ptr %y 63 %c = zext <8 x i8> %a to <8 x i16> 64 %d = zext <8 x i8> %b to <8 x i16> 65 %e = mul <8 x i16> %c, %d 66 ret <8 x i16> %e 67} 68 69define <4 x i32> @vwmulu_v4i32(ptr %x, ptr %y) { 70; CHECK-LABEL: vwmulu_v4i32: 71; CHECK: # %bb.0: 72; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 73; CHECK-NEXT: vle16.v v9, (a0) 74; CHECK-NEXT: vle16.v v10, (a1) 75; CHECK-NEXT: vwmulu.vv v8, v9, v10 76; CHECK-NEXT: ret 77 %a = load <4 x i16>, ptr %x 78 %b = load <4 x i16>, ptr %y 79 %c = zext <4 x i16> %a to <4 x i32> 80 %d = zext <4 x i16> %b to <4 x i32> 81 %e = mul <4 x i32> %c, %d 82 ret <4 x i32> %e 83} 84 85define <2 x i64> @vwmulu_v2i64(ptr %x, ptr %y) { 86; CHECK-LABEL: vwmulu_v2i64: 87; CHECK: # %bb.0: 88; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 89; CHECK-NEXT: vle32.v v9, (a0) 90; CHECK-NEXT: vle32.v v10, (a1) 91; CHECK-NEXT: vwmulu.vv v8, v9, v10 92; CHECK-NEXT: ret 93 %a = load <2 x i32>, ptr %x 94 %b = load <2 x i32>, ptr %y 95 %c = zext <2 x i32> %a to <2 x i64> 96 %d = zext <2 x i32> %b to <2 x i64> 97 %e = mul <2 x i64> %c, %d 98 ret <2 x i64> %e 99} 100 101define <16 x i16> @vwmulu_v16i16(ptr %x, ptr %y) { 102; CHECK-LABEL: vwmulu_v16i16: 103; CHECK: # %bb.0: 104; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 105; CHECK-NEXT: vle8.v v10, (a0) 106; CHECK-NEXT: vle8.v v11, (a1) 107; CHECK-NEXT: vwmulu.vv v8, v10, v11 108; CHECK-NEXT: ret 109 %a = load <16 x i8>, ptr %x 110 %b = load <16 x i8>, ptr %y 111 %c = zext <16 x i8> %a to <16 x i16> 112 %d = zext <16 x i8> %b to <16 x i16> 113 %e = mul <16 x i16> %c, %d 114 ret <16 x i16> %e 115} 116 117define <8 x i32> @vwmulu_v8i32(ptr %x, ptr %y) { 118; CHECK-LABEL: vwmulu_v8i32: 119; CHECK: # %bb.0: 120; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 121; CHECK-NEXT: vle16.v v10, (a0) 122; CHECK-NEXT: vle16.v v11, (a1) 123; CHECK-NEXT: vwmulu.vv v8, v10, v11 124; CHECK-NEXT: ret 125 %a = load <8 x i16>, ptr %x 126 %b = load <8 x i16>, ptr %y 127 %c = zext <8 x i16> %a to <8 x i32> 128 %d = zext <8 x i16> %b to <8 x i32> 129 %e = mul <8 x i32> %c, %d 130 ret <8 x i32> %e 131} 132 133define <4 x i64> @vwmulu_v4i64(ptr %x, ptr %y) { 134; CHECK-LABEL: vwmulu_v4i64: 135; CHECK: # %bb.0: 136; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 137; CHECK-NEXT: vle32.v v10, (a0) 138; CHECK-NEXT: vle32.v v11, (a1) 139; CHECK-NEXT: vwmulu.vv v8, v10, v11 140; CHECK-NEXT: ret 141 %a = load <4 x i32>, ptr %x 142 %b = load <4 x i32>, ptr %y 143 %c = zext <4 x i32> %a to <4 x i64> 144 %d = zext <4 x i32> %b to <4 x i64> 145 %e = mul <4 x i64> %c, %d 146 ret <4 x i64> %e 147} 148 149define <32 x i16> @vwmulu_v32i16(ptr %x, ptr %y) { 150; CHECK-LABEL: vwmulu_v32i16: 151; CHECK: # %bb.0: 152; CHECK-NEXT: li a2, 32 153; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma 154; CHECK-NEXT: vle8.v v12, (a0) 155; CHECK-NEXT: vle8.v v14, (a1) 156; CHECK-NEXT: vwmulu.vv v8, v12, v14 157; CHECK-NEXT: ret 158 %a = load <32 x i8>, ptr %x 159 %b = load <32 x i8>, ptr %y 160 %c = zext <32 x i8> %a to <32 x i16> 161 %d = zext <32 x i8> %b to <32 x i16> 162 %e = mul <32 x i16> %c, %d 163 ret <32 x i16> %e 164} 165 166define <16 x i32> @vwmulu_v16i32(ptr %x, ptr %y) { 167; CHECK-LABEL: vwmulu_v16i32: 168; CHECK: # %bb.0: 169; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 170; CHECK-NEXT: vle16.v v12, (a0) 171; CHECK-NEXT: vle16.v v14, (a1) 172; CHECK-NEXT: vwmulu.vv v8, v12, v14 173; CHECK-NEXT: ret 174 %a = load <16 x i16>, ptr %x 175 %b = load <16 x i16>, ptr %y 176 %c = zext <16 x i16> %a to <16 x i32> 177 %d = zext <16 x i16> %b to <16 x i32> 178 %e = mul <16 x i32> %c, %d 179 ret <16 x i32> %e 180} 181 182define <8 x i64> @vwmulu_v8i64(ptr %x, ptr %y) { 183; CHECK-LABEL: vwmulu_v8i64: 184; CHECK: # %bb.0: 185; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 186; CHECK-NEXT: vle32.v v12, (a0) 187; CHECK-NEXT: vle32.v v14, (a1) 188; CHECK-NEXT: vwmulu.vv v8, v12, v14 189; CHECK-NEXT: ret 190 %a = load <8 x i32>, ptr %x 191 %b = load <8 x i32>, ptr %y 192 %c = zext <8 x i32> %a to <8 x i64> 193 %d = zext <8 x i32> %b to <8 x i64> 194 %e = mul <8 x i64> %c, %d 195 ret <8 x i64> %e 196} 197 198define <64 x i16> @vwmulu_v64i16(ptr %x, ptr %y) { 199; CHECK-LABEL: vwmulu_v64i16: 200; CHECK: # %bb.0: 201; CHECK-NEXT: li a2, 64 202; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma 203; CHECK-NEXT: vle8.v v16, (a0) 204; CHECK-NEXT: vle8.v v20, (a1) 205; CHECK-NEXT: vwmulu.vv v8, v16, v20 206; CHECK-NEXT: ret 207 %a = load <64 x i8>, ptr %x 208 %b = load <64 x i8>, ptr %y 209 %c = zext <64 x i8> %a to <64 x i16> 210 %d = zext <64 x i8> %b to <64 x i16> 211 %e = mul <64 x i16> %c, %d 212 ret <64 x i16> %e 213} 214 215define <32 x i32> @vwmulu_v32i32(ptr %x, ptr %y) { 216; CHECK-LABEL: vwmulu_v32i32: 217; CHECK: # %bb.0: 218; CHECK-NEXT: li a2, 32 219; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 220; CHECK-NEXT: vle16.v v16, (a0) 221; CHECK-NEXT: vle16.v v20, (a1) 222; CHECK-NEXT: vwmulu.vv v8, v16, v20 223; CHECK-NEXT: ret 224 %a = load <32 x i16>, ptr %x 225 %b = load <32 x i16>, ptr %y 226 %c = zext <32 x i16> %a to <32 x i32> 227 %d = zext <32 x i16> %b to <32 x i32> 228 %e = mul <32 x i32> %c, %d 229 ret <32 x i32> %e 230} 231 232define <16 x i64> @vwmulu_v16i64(ptr %x, ptr %y) { 233; CHECK-LABEL: vwmulu_v16i64: 234; CHECK: # %bb.0: 235; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 236; CHECK-NEXT: vle32.v v16, (a0) 237; CHECK-NEXT: vle32.v v20, (a1) 238; CHECK-NEXT: vwmulu.vv v8, v16, v20 239; CHECK-NEXT: ret 240 %a = load <16 x i32>, ptr %x 241 %b = load <16 x i32>, ptr %y 242 %c = zext <16 x i32> %a to <16 x i64> 243 %d = zext <16 x i32> %b to <16 x i64> 244 %e = mul <16 x i64> %c, %d 245 ret <16 x i64> %e 246} 247 248define <128 x i16> @vwmulu_v128i16(ptr %x, ptr %y) { 249; CHECK-LABEL: vwmulu_v128i16: 250; CHECK: # %bb.0: 251; CHECK-NEXT: addi sp, sp, -16 252; CHECK-NEXT: .cfi_def_cfa_offset 16 253; CHECK-NEXT: csrr a2, vlenb 254; CHECK-NEXT: slli a2, a2, 4 255; CHECK-NEXT: sub sp, sp, a2 256; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 257; CHECK-NEXT: li a2, 128 258; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma 259; CHECK-NEXT: vle8.v v8, (a0) 260; CHECK-NEXT: addi a0, sp, 16 261; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 262; CHECK-NEXT: vle8.v v0, (a1) 263; CHECK-NEXT: li a0, 64 264; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 265; CHECK-NEXT: vslidedown.vx v16, v8, a0 266; CHECK-NEXT: vslidedown.vx v8, v0, a0 267; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma 268; CHECK-NEXT: vwmulu.vv v24, v16, v8 269; CHECK-NEXT: csrr a0, vlenb 270; CHECK-NEXT: slli a0, a0, 3 271; CHECK-NEXT: add a0, sp, a0 272; CHECK-NEXT: addi a0, a0, 16 273; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 274; CHECK-NEXT: addi a0, sp, 16 275; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 276; CHECK-NEXT: vwmulu.vv v8, v16, v0 277; CHECK-NEXT: csrr a0, vlenb 278; CHECK-NEXT: slli a0, a0, 3 279; CHECK-NEXT: add a0, sp, a0 280; CHECK-NEXT: addi a0, a0, 16 281; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 282; CHECK-NEXT: csrr a0, vlenb 283; CHECK-NEXT: slli a0, a0, 4 284; CHECK-NEXT: add sp, sp, a0 285; CHECK-NEXT: .cfi_def_cfa sp, 16 286; CHECK-NEXT: addi sp, sp, 16 287; CHECK-NEXT: .cfi_def_cfa_offset 0 288; CHECK-NEXT: ret 289 %a = load <128 x i8>, ptr %x 290 %b = load <128 x i8>, ptr %y 291 %c = zext <128 x i8> %a to <128 x i16> 292 %d = zext <128 x i8> %b to <128 x i16> 293 %e = mul <128 x i16> %c, %d 294 ret <128 x i16> %e 295} 296 297define <64 x i32> @vwmulu_v64i32(ptr %x, ptr %y) { 298; CHECK-LABEL: vwmulu_v64i32: 299; CHECK: # %bb.0: 300; CHECK-NEXT: addi sp, sp, -16 301; CHECK-NEXT: .cfi_def_cfa_offset 16 302; CHECK-NEXT: csrr a2, vlenb 303; CHECK-NEXT: slli a2, a2, 4 304; CHECK-NEXT: sub sp, sp, a2 305; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 306; CHECK-NEXT: li a2, 64 307; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma 308; CHECK-NEXT: vle16.v v8, (a0) 309; CHECK-NEXT: addi a0, sp, 16 310; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 311; CHECK-NEXT: vle16.v v0, (a1) 312; CHECK-NEXT: li a0, 32 313; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 314; CHECK-NEXT: vslidedown.vx v16, v8, a0 315; CHECK-NEXT: vslidedown.vx v8, v0, a0 316; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 317; CHECK-NEXT: vwmulu.vv v24, v16, v8 318; CHECK-NEXT: csrr a0, vlenb 319; CHECK-NEXT: slli a0, a0, 3 320; CHECK-NEXT: add a0, sp, a0 321; CHECK-NEXT: addi a0, a0, 16 322; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 323; CHECK-NEXT: addi a0, sp, 16 324; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 325; CHECK-NEXT: vwmulu.vv v8, v16, v0 326; CHECK-NEXT: csrr a0, vlenb 327; CHECK-NEXT: slli a0, a0, 3 328; CHECK-NEXT: add a0, sp, a0 329; CHECK-NEXT: addi a0, a0, 16 330; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 331; CHECK-NEXT: csrr a0, vlenb 332; CHECK-NEXT: slli a0, a0, 4 333; CHECK-NEXT: add sp, sp, a0 334; CHECK-NEXT: .cfi_def_cfa sp, 16 335; CHECK-NEXT: addi sp, sp, 16 336; CHECK-NEXT: .cfi_def_cfa_offset 0 337; CHECK-NEXT: ret 338 %a = load <64 x i16>, ptr %x 339 %b = load <64 x i16>, ptr %y 340 %c = zext <64 x i16> %a to <64 x i32> 341 %d = zext <64 x i16> %b to <64 x i32> 342 %e = mul <64 x i32> %c, %d 343 ret <64 x i32> %e 344} 345 346define <32 x i64> @vwmulu_v32i64(ptr %x, ptr %y) { 347; CHECK-LABEL: vwmulu_v32i64: 348; CHECK: # %bb.0: 349; CHECK-NEXT: addi sp, sp, -16 350; CHECK-NEXT: .cfi_def_cfa_offset 16 351; CHECK-NEXT: csrr a2, vlenb 352; CHECK-NEXT: slli a2, a2, 4 353; CHECK-NEXT: sub sp, sp, a2 354; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 355; CHECK-NEXT: li a2, 32 356; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma 357; CHECK-NEXT: vle32.v v8, (a0) 358; CHECK-NEXT: addi a0, sp, 16 359; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 360; CHECK-NEXT: vle32.v v0, (a1) 361; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma 362; CHECK-NEXT: vslidedown.vi v16, v8, 16 363; CHECK-NEXT: vslidedown.vi v8, v0, 16 364; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 365; CHECK-NEXT: vwmulu.vv v24, v16, v8 366; CHECK-NEXT: csrr a0, vlenb 367; CHECK-NEXT: slli a0, a0, 3 368; CHECK-NEXT: add a0, sp, a0 369; CHECK-NEXT: addi a0, a0, 16 370; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 371; CHECK-NEXT: addi a0, sp, 16 372; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 373; CHECK-NEXT: vwmulu.vv v8, v16, v0 374; CHECK-NEXT: csrr a0, vlenb 375; CHECK-NEXT: slli a0, a0, 3 376; CHECK-NEXT: add a0, sp, a0 377; CHECK-NEXT: addi a0, a0, 16 378; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 379; CHECK-NEXT: csrr a0, vlenb 380; CHECK-NEXT: slli a0, a0, 4 381; CHECK-NEXT: add sp, sp, a0 382; CHECK-NEXT: .cfi_def_cfa sp, 16 383; CHECK-NEXT: addi sp, sp, 16 384; CHECK-NEXT: .cfi_def_cfa_offset 0 385; CHECK-NEXT: ret 386 %a = load <32 x i32>, ptr %x 387 %b = load <32 x i32>, ptr %y 388 %c = zext <32 x i32> %a to <32 x i64> 389 %d = zext <32 x i32> %b to <32 x i64> 390 %e = mul <32 x i64> %c, %d 391 ret <32 x i64> %e 392} 393 394define <2 x i32> @vwmulu_v2i32_v2i8(ptr %x, ptr %y) { 395; CHECK-LABEL: vwmulu_v2i32_v2i8: 396; CHECK: # %bb.0: 397; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 398; CHECK-NEXT: vle8.v v8, (a0) 399; CHECK-NEXT: vle8.v v9, (a1) 400; CHECK-NEXT: vwmulu.vv v10, v8, v9 401; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 402; CHECK-NEXT: vzext.vf2 v8, v10 403; CHECK-NEXT: ret 404 %a = load <2 x i8>, ptr %x 405 %b = load <2 x i8>, ptr %y 406 %c = zext <2 x i8> %a to <2 x i32> 407 %d = zext <2 x i8> %b to <2 x i32> 408 %e = mul <2 x i32> %c, %d 409 ret <2 x i32> %e 410} 411 412define <4 x i32> @vwmulu_v4i32_v4i8_v4i16(ptr %x, ptr %y) { 413; CHECK-LABEL: vwmulu_v4i32_v4i8_v4i16: 414; CHECK: # %bb.0: 415; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 416; CHECK-NEXT: vle8.v v8, (a0) 417; CHECK-NEXT: vle16.v v9, (a1) 418; CHECK-NEXT: vzext.vf2 v10, v8 419; CHECK-NEXT: vwmulu.vv v8, v10, v9 420; CHECK-NEXT: ret 421 %a = load <4 x i8>, ptr %x 422 %b = load <4 x i16>, ptr %y 423 %c = zext <4 x i8> %a to <4 x i32> 424 %d = zext <4 x i16> %b to <4 x i32> 425 %e = mul <4 x i32> %c, %d 426 ret <4 x i32> %e 427} 428 429define <4 x i64> @vwmulu_v4i64_v4i32_v4i8(ptr %x, ptr %y) { 430; CHECK-LABEL: vwmulu_v4i64_v4i32_v4i8: 431; CHECK: # %bb.0: 432; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 433; CHECK-NEXT: vle8.v v8, (a1) 434; CHECK-NEXT: vle32.v v10, (a0) 435; CHECK-NEXT: vzext.vf4 v11, v8 436; CHECK-NEXT: vwmulu.vv v8, v10, v11 437; CHECK-NEXT: ret 438 %a = load <4 x i32>, ptr %x 439 %b = load <4 x i8>, ptr %y 440 %c = zext <4 x i32> %a to <4 x i64> 441 %d = zext <4 x i8> %b to <4 x i64> 442 %e = mul <4 x i64> %c, %d 443 ret <4 x i64> %e 444} 445 446define <2 x i16> @vwmulu_vx_v2i16(ptr %x, i8 %y) { 447; CHECK-LABEL: vwmulu_vx_v2i16: 448; CHECK: # %bb.0: 449; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 450; CHECK-NEXT: vle8.v v9, (a0) 451; CHECK-NEXT: vwmulu.vx v8, v9, a1 452; CHECK-NEXT: ret 453 %a = load <2 x i8>, ptr %x 454 %b = insertelement <2 x i8> poison, i8 %y, i32 0 455 %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer 456 %d = zext <2 x i8> %a to <2 x i16> 457 %e = zext <2 x i8> %c to <2 x i16> 458 %f = mul <2 x i16> %d, %e 459 ret <2 x i16> %f 460} 461 462define <4 x i16> @vwmulu_vx_v4i16(ptr %x, i8 %y) { 463; CHECK-LABEL: vwmulu_vx_v4i16: 464; CHECK: # %bb.0: 465; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 466; CHECK-NEXT: vle8.v v9, (a0) 467; CHECK-NEXT: vwmulu.vx v8, v9, a1 468; CHECK-NEXT: ret 469 %a = load <4 x i8>, ptr %x 470 %b = insertelement <4 x i8> poison, i8 %y, i32 0 471 %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer 472 %d = zext <4 x i8> %a to <4 x i16> 473 %e = zext <4 x i8> %c to <4 x i16> 474 %f = mul <4 x i16> %d, %e 475 ret <4 x i16> %f 476} 477 478define <2 x i32> @vwmulu_vx_v2i32(ptr %x, i16 %y) { 479; CHECK-LABEL: vwmulu_vx_v2i32: 480; CHECK: # %bb.0: 481; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 482; CHECK-NEXT: vle16.v v9, (a0) 483; CHECK-NEXT: vwmulu.vx v8, v9, a1 484; CHECK-NEXT: ret 485 %a = load <2 x i16>, ptr %x 486 %b = insertelement <2 x i16> poison, i16 %y, i32 0 487 %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer 488 %d = zext <2 x i16> %a to <2 x i32> 489 %e = zext <2 x i16> %c to <2 x i32> 490 %f = mul <2 x i32> %d, %e 491 ret <2 x i32> %f 492} 493 494define <8 x i16> @vwmulu_vx_v8i16(ptr %x, i8 %y) { 495; CHECK-LABEL: vwmulu_vx_v8i16: 496; CHECK: # %bb.0: 497; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 498; CHECK-NEXT: vle8.v v9, (a0) 499; CHECK-NEXT: vwmulu.vx v8, v9, a1 500; CHECK-NEXT: ret 501 %a = load <8 x i8>, ptr %x 502 %b = insertelement <8 x i8> poison, i8 %y, i32 0 503 %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer 504 %d = zext <8 x i8> %a to <8 x i16> 505 %e = zext <8 x i8> %c to <8 x i16> 506 %f = mul <8 x i16> %d, %e 507 ret <8 x i16> %f 508} 509 510define <4 x i32> @vwmulu_vx_v4i32(ptr %x, i16 %y) { 511; CHECK-LABEL: vwmulu_vx_v4i32: 512; CHECK: # %bb.0: 513; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 514; CHECK-NEXT: vle16.v v9, (a0) 515; CHECK-NEXT: vwmulu.vx v8, v9, a1 516; CHECK-NEXT: ret 517 %a = load <4 x i16>, ptr %x 518 %b = insertelement <4 x i16> poison, i16 %y, i32 0 519 %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer 520 %d = zext <4 x i16> %a to <4 x i32> 521 %e = zext <4 x i16> %c to <4 x i32> 522 %f = mul <4 x i32> %d, %e 523 ret <4 x i32> %f 524} 525 526define <2 x i64> @vwmulu_vx_v2i64(ptr %x, i32 %y) { 527; CHECK-LABEL: vwmulu_vx_v2i64: 528; CHECK: # %bb.0: 529; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 530; CHECK-NEXT: vle32.v v9, (a0) 531; CHECK-NEXT: vwmulu.vx v8, v9, a1 532; CHECK-NEXT: ret 533 %a = load <2 x i32>, ptr %x 534 %b = insertelement <2 x i32> poison, i32 %y, i64 0 535 %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer 536 %d = zext <2 x i32> %a to <2 x i64> 537 %e = zext <2 x i32> %c to <2 x i64> 538 %f = mul <2 x i64> %d, %e 539 ret <2 x i64> %f 540} 541 542define <16 x i16> @vwmulu_vx_v16i16(ptr %x, i8 %y) { 543; CHECK-LABEL: vwmulu_vx_v16i16: 544; CHECK: # %bb.0: 545; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 546; CHECK-NEXT: vle8.v v10, (a0) 547; CHECK-NEXT: vwmulu.vx v8, v10, a1 548; CHECK-NEXT: ret 549 %a = load <16 x i8>, ptr %x 550 %b = insertelement <16 x i8> poison, i8 %y, i32 0 551 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer 552 %d = zext <16 x i8> %a to <16 x i16> 553 %e = zext <16 x i8> %c to <16 x i16> 554 %f = mul <16 x i16> %d, %e 555 ret <16 x i16> %f 556} 557 558define <8 x i32> @vwmulu_vx_v8i32(ptr %x, i16 %y) { 559; CHECK-LABEL: vwmulu_vx_v8i32: 560; CHECK: # %bb.0: 561; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 562; CHECK-NEXT: vle16.v v10, (a0) 563; CHECK-NEXT: vwmulu.vx v8, v10, a1 564; CHECK-NEXT: ret 565 %a = load <8 x i16>, ptr %x 566 %b = insertelement <8 x i16> poison, i16 %y, i32 0 567 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer 568 %d = zext <8 x i16> %a to <8 x i32> 569 %e = zext <8 x i16> %c to <8 x i32> 570 %f = mul <8 x i32> %d, %e 571 ret <8 x i32> %f 572} 573 574define <4 x i64> @vwmulu_vx_v4i64(ptr %x, i32 %y) { 575; CHECK-LABEL: vwmulu_vx_v4i64: 576; CHECK: # %bb.0: 577; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 578; CHECK-NEXT: vle32.v v10, (a0) 579; CHECK-NEXT: vwmulu.vx v8, v10, a1 580; CHECK-NEXT: ret 581 %a = load <4 x i32>, ptr %x 582 %b = insertelement <4 x i32> poison, i32 %y, i64 0 583 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer 584 %d = zext <4 x i32> %a to <4 x i64> 585 %e = zext <4 x i32> %c to <4 x i64> 586 %f = mul <4 x i64> %d, %e 587 ret <4 x i64> %f 588} 589 590define <32 x i16> @vwmulu_vx_v32i16(ptr %x, i8 %y) { 591; CHECK-LABEL: vwmulu_vx_v32i16: 592; CHECK: # %bb.0: 593; CHECK-NEXT: li a2, 32 594; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma 595; CHECK-NEXT: vle8.v v12, (a0) 596; CHECK-NEXT: vwmulu.vx v8, v12, a1 597; CHECK-NEXT: ret 598 %a = load <32 x i8>, ptr %x 599 %b = insertelement <32 x i8> poison, i8 %y, i32 0 600 %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer 601 %d = zext <32 x i8> %a to <32 x i16> 602 %e = zext <32 x i8> %c to <32 x i16> 603 %f = mul <32 x i16> %d, %e 604 ret <32 x i16> %f 605} 606 607define <16 x i32> @vwmulu_vx_v16i32(ptr %x, i16 %y) { 608; CHECK-LABEL: vwmulu_vx_v16i32: 609; CHECK: # %bb.0: 610; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 611; CHECK-NEXT: vle16.v v12, (a0) 612; CHECK-NEXT: vwmulu.vx v8, v12, a1 613; CHECK-NEXT: ret 614 %a = load <16 x i16>, ptr %x 615 %b = insertelement <16 x i16> poison, i16 %y, i32 0 616 %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer 617 %d = zext <16 x i16> %a to <16 x i32> 618 %e = zext <16 x i16> %c to <16 x i32> 619 %f = mul <16 x i32> %d, %e 620 ret <16 x i32> %f 621} 622 623define <8 x i64> @vwmulu_vx_v8i64(ptr %x, i32 %y) { 624; CHECK-LABEL: vwmulu_vx_v8i64: 625; CHECK: # %bb.0: 626; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 627; CHECK-NEXT: vle32.v v12, (a0) 628; CHECK-NEXT: vwmulu.vx v8, v12, a1 629; CHECK-NEXT: ret 630 %a = load <8 x i32>, ptr %x 631 %b = insertelement <8 x i32> poison, i32 %y, i64 0 632 %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer 633 %d = zext <8 x i32> %a to <8 x i64> 634 %e = zext <8 x i32> %c to <8 x i64> 635 %f = mul <8 x i64> %d, %e 636 ret <8 x i64> %f 637} 638 639define <64 x i16> @vwmulu_vx_v64i16(ptr %x, i8 %y) { 640; CHECK-LABEL: vwmulu_vx_v64i16: 641; CHECK: # %bb.0: 642; CHECK-NEXT: li a2, 64 643; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma 644; CHECK-NEXT: vle8.v v16, (a0) 645; CHECK-NEXT: vwmulu.vx v8, v16, a1 646; CHECK-NEXT: ret 647 %a = load <64 x i8>, ptr %x 648 %b = insertelement <64 x i8> poison, i8 %y, i32 0 649 %c = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer 650 %d = zext <64 x i8> %a to <64 x i16> 651 %e = zext <64 x i8> %c to <64 x i16> 652 %f = mul <64 x i16> %d, %e 653 ret <64 x i16> %f 654} 655 656define <32 x i32> @vwmulu_vx_v32i32(ptr %x, i16 %y) { 657; CHECK-LABEL: vwmulu_vx_v32i32: 658; CHECK: # %bb.0: 659; CHECK-NEXT: li a2, 32 660; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 661; CHECK-NEXT: vle16.v v16, (a0) 662; CHECK-NEXT: vwmulu.vx v8, v16, a1 663; CHECK-NEXT: ret 664 %a = load <32 x i16>, ptr %x 665 %b = insertelement <32 x i16> poison, i16 %y, i32 0 666 %c = shufflevector <32 x i16> %b, <32 x i16> poison, <32 x i32> zeroinitializer 667 %d = zext <32 x i16> %a to <32 x i32> 668 %e = zext <32 x i16> %c to <32 x i32> 669 %f = mul <32 x i32> %d, %e 670 ret <32 x i32> %f 671} 672 673define <16 x i64> @vwmulu_vx_v16i64(ptr %x, i32 %y) { 674; CHECK-LABEL: vwmulu_vx_v16i64: 675; CHECK: # %bb.0: 676; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 677; CHECK-NEXT: vle32.v v16, (a0) 678; CHECK-NEXT: vwmulu.vx v8, v16, a1 679; CHECK-NEXT: ret 680 %a = load <16 x i32>, ptr %x 681 %b = insertelement <16 x i32> poison, i32 %y, i64 0 682 %c = shufflevector <16 x i32> %b, <16 x i32> poison, <16 x i32> zeroinitializer 683 %d = zext <16 x i32> %a to <16 x i64> 684 %e = zext <16 x i32> %c to <16 x i64> 685 %f = mul <16 x i64> %d, %e 686 ret <16 x i64> %f 687} 688 689define <8 x i16> @vwmulu_vx_v8i16_i8(ptr %x, ptr %y) { 690; CHECK-LABEL: vwmulu_vx_v8i16_i8: 691; CHECK: # %bb.0: 692; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 693; CHECK-NEXT: vle8.v v9, (a0) 694; CHECK-NEXT: lbu a0, 0(a1) 695; CHECK-NEXT: vwmulu.vx v8, v9, a0 696; CHECK-NEXT: ret 697 %a = load <8 x i8>, ptr %x 698 %b = load i8, ptr %y 699 %c = zext i8 %b to i16 700 %d = insertelement <8 x i16> poison, i16 %c, i32 0 701 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer 702 %f = zext <8 x i8> %a to <8 x i16> 703 %g = mul <8 x i16> %e, %f 704 ret <8 x i16> %g 705} 706 707define <8 x i16> @vwmulu_vx_v8i16_i16(ptr %x, ptr %y) { 708; CHECK-LABEL: vwmulu_vx_v8i16_i16: 709; CHECK: # %bb.0: 710; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 711; CHECK-NEXT: vle8.v v8, (a0) 712; CHECK-NEXT: lh a0, 0(a1) 713; CHECK-NEXT: vzext.vf2 v9, v8 714; CHECK-NEXT: vmul.vx v8, v9, a0 715; CHECK-NEXT: ret 716 %a = load <8 x i8>, ptr %x 717 %b = load i16, ptr %y 718 %d = insertelement <8 x i16> poison, i16 %b, i32 0 719 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer 720 %f = zext <8 x i8> %a to <8 x i16> 721 %g = mul <8 x i16> %e, %f 722 ret <8 x i16> %g 723} 724 725define <4 x i32> @vwmulu_vx_v4i32_i8(ptr %x, ptr %y) { 726; CHECK-LABEL: vwmulu_vx_v4i32_i8: 727; CHECK: # %bb.0: 728; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 729; CHECK-NEXT: vle16.v v9, (a0) 730; CHECK-NEXT: lbu a0, 0(a1) 731; CHECK-NEXT: vwmulu.vx v8, v9, a0 732; CHECK-NEXT: ret 733 %a = load <4 x i16>, ptr %x 734 %b = load i8, ptr %y 735 %c = zext i8 %b to i32 736 %d = insertelement <4 x i32> poison, i32 %c, i32 0 737 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer 738 %f = zext <4 x i16> %a to <4 x i32> 739 %g = mul <4 x i32> %e, %f 740 ret <4 x i32> %g 741} 742 743define <4 x i32> @vwmulu_vx_v4i32_i16(ptr %x, ptr %y) { 744; CHECK-LABEL: vwmulu_vx_v4i32_i16: 745; CHECK: # %bb.0: 746; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 747; CHECK-NEXT: vle16.v v9, (a0) 748; CHECK-NEXT: lhu a0, 0(a1) 749; CHECK-NEXT: vwmulu.vx v8, v9, a0 750; CHECK-NEXT: ret 751 %a = load <4 x i16>, ptr %x 752 %b = load i16, ptr %y 753 %c = zext i16 %b to i32 754 %d = insertelement <4 x i32> poison, i32 %c, i32 0 755 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer 756 %f = zext <4 x i16> %a to <4 x i32> 757 %g = mul <4 x i32> %e, %f 758 ret <4 x i32> %g 759} 760 761define <4 x i32> @vwmulu_vx_v4i32_i32(ptr %x, ptr %y) { 762; CHECK-LABEL: vwmulu_vx_v4i32_i32: 763; CHECK: # %bb.0: 764; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 765; CHECK-NEXT: vle16.v v8, (a0) 766; CHECK-NEXT: lw a0, 0(a1) 767; CHECK-NEXT: vzext.vf2 v9, v8 768; CHECK-NEXT: vmul.vx v8, v9, a0 769; CHECK-NEXT: ret 770 %a = load <4 x i16>, ptr %x 771 %b = load i32, ptr %y 772 %d = insertelement <4 x i32> poison, i32 %b, i32 0 773 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer 774 %f = zext <4 x i16> %a to <4 x i32> 775 %g = mul <4 x i32> %e, %f 776 ret <4 x i32> %g 777} 778 779define <2 x i64> @vwmulu_vx_v2i64_i8(ptr %x, ptr %y) { 780; RV32-LABEL: vwmulu_vx_v2i64_i8: 781; RV32: # %bb.0: 782; RV32-NEXT: addi sp, sp, -16 783; RV32-NEXT: .cfi_def_cfa_offset 16 784; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 785; RV32-NEXT: lb a1, 0(a1) 786; RV32-NEXT: vle32.v v25, (a0) 787; RV32-NEXT: srai a0, a1, 31 788; RV32-NEXT: sw a1, 8(sp) 789; RV32-NEXT: sw a0, 12(sp) 790; RV32-NEXT: addi a0, sp, 8 791; RV32-NEXT: vlse64.v v26, (a0), zero 792; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu 793; RV32-NEXT: vzext.vf2 v27, v25 794; RV32-NEXT: vmul.vv v8, v26, v27 795; RV32-NEXT: addi sp, sp, 16 796; RV32-NEXT: ret 797; 798; RV64-LABEL: vwmulu_vx_v2i64_i8: 799; RV64: # %bb.0: 800; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 801; RV64-NEXT: vle32.v v25, (a0) 802; RV64-NEXT: lb a0, 0(a1) 803; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu 804; RV64-NEXT: vzext.vf2 v26, v25 805; RV64-NEXT: vmul.vx v8, v26, a0 806; RV64-NEXT: ret 807 %a = load <2 x i32>, ptr %x 808 %b = load i8, ptr %y 809 %c = zext i8 %b to i64 810 %d = insertelement <2 x i64> poison, i64 %c, i64 0 811 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer 812 %f = zext <2 x i32> %a to <2 x i64> 813 %g = mul <2 x i64> %e, %f 814 ret <2 x i64> %g 815} 816 817define <2 x i64> @vwmulu_vx_v2i64_i16(ptr %x, ptr %y) { 818; RV32-LABEL: vwmulu_vx_v2i64_i16: 819; RV32: # %bb.0: 820; RV32-NEXT: addi sp, sp, -16 821; RV32-NEXT: .cfi_def_cfa_offset 16 822; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 823; RV32-NEXT: lh a1, 0(a1) 824; RV32-NEXT: vle32.v v25, (a0) 825; RV32-NEXT: srai a0, a1, 31 826; RV32-NEXT: sw a1, 8(sp) 827; RV32-NEXT: sw a0, 12(sp) 828; RV32-NEXT: addi a0, sp, 8 829; RV32-NEXT: vlse64.v v26, (a0), zero 830; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu 831; RV32-NEXT: vzext.vf2 v27, v25 832; RV32-NEXT: vmul.vv v8, v26, v27 833; RV32-NEXT: addi sp, sp, 16 834; RV32-NEXT: ret 835; 836; RV64-LABEL: vwmulu_vx_v2i64_i16: 837; RV64: # %bb.0: 838; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 839; RV64-NEXT: vle32.v v25, (a0) 840; RV64-NEXT: lh a0, 0(a1) 841; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu 842; RV64-NEXT: vzext.vf2 v26, v25 843; RV64-NEXT: vmul.vx v8, v26, a0 844; RV64-NEXT: ret 845 %a = load <2 x i32>, ptr %x 846 %b = load i16, ptr %y 847 %c = zext i16 %b to i64 848 %d = insertelement <2 x i64> poison, i64 %c, i64 0 849 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer 850 %f = zext <2 x i32> %a to <2 x i64> 851 %g = mul <2 x i64> %e, %f 852 ret <2 x i64> %g 853} 854 855define <2 x i64> @vwmulu_vx_v2i64_i32(ptr %x, ptr %y) { 856; RV32-LABEL: vwmulu_vx_v2i64_i32: 857; RV32: # %bb.0: 858; RV32-NEXT: addi sp, sp, -16 859; RV32-NEXT: .cfi_def_cfa_offset 16 860; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 861; RV32-NEXT: lw a1, 0(a1) 862; RV32-NEXT: vle32.v v25, (a0) 863; RV32-NEXT: srai a0, a1, 31 864; RV32-NEXT: sw a1, 8(sp) 865; RV32-NEXT: sw a0, 12(sp) 866; RV32-NEXT: addi a0, sp, 8 867; RV32-NEXT: vlse64.v v26, (a0), zero 868; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu 869; RV32-NEXT: vzext.vf2 v27, v25 870; RV32-NEXT: vmul.vv v8, v26, v27 871; RV32-NEXT: addi sp, sp, 16 872; RV32-NEXT: ret 873; 874; RV64-LABEL: vwmulu_vx_v2i64_i32: 875; RV64: # %bb.0: 876; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 877; RV64-NEXT: vle32.v v25, (a0) 878; RV64-NEXT: lw a0, 0(a1) 879; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu 880; RV64-NEXT: vzext.vf2 v26, v25 881; RV64-NEXT: vmul.vx v8, v26, a0 882; RV64-NEXT: ret 883 %a = load <2 x i32>, ptr %x 884 %b = load i32, ptr %y 885 %c = zext i32 %b to i64 886 %d = insertelement <2 x i64> poison, i64 %c, i64 0 887 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer 888 %f = zext <2 x i32> %a to <2 x i64> 889 %g = mul <2 x i64> %e, %f 890 ret <2 x i64> %g 891} 892 893define <2 x i64> @vwmulu_vx_v2i64_i64(ptr %x, ptr %y) { 894; RV32-LABEL: vwmulu_vx_v2i64_i64: 895; RV32: # %bb.0: 896; RV32-NEXT: addi sp, sp, -16 897; RV32-NEXT: .cfi_def_cfa_offset 16 898; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 899; RV32-NEXT: lw a2, 4(a1) 900; RV32-NEXT: lw a1, 0(a1) 901; RV32-NEXT: vle32.v v25, (a0) 902; RV32-NEXT: sw a2, 12(sp) 903; RV32-NEXT: sw a1, 8(sp) 904; RV32-NEXT: addi a0, sp, 8 905; RV32-NEXT: vlse64.v v26, (a0), zero 906; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu 907; RV32-NEXT: vzext.vf2 v27, v25 908; RV32-NEXT: vmul.vv v8, v26, v27 909; RV32-NEXT: addi sp, sp, 16 910; RV32-NEXT: ret 911; 912; RV64-LABEL: vwmulu_vx_v2i64_i64: 913; RV64: # %bb.0: 914; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 915; RV64-NEXT: vle32.v v25, (a0) 916; RV64-NEXT: ld a0, 0(a1) 917; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu 918; RV64-NEXT: vzext.vf2 v26, v25 919; RV64-NEXT: vmul.vx v8, v26, a0 920; RV64-NEXT: ret 921 %a = load <2 x i32>, ptr %x 922 %b = load i64, ptr %y 923 %d = insertelement <2 x i64> poison, i64 %b, i64 0 924 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer 925 %f = zext <2 x i32> %a to <2 x i64> 926 %g = mul <2 x i64> %e, %f 927 ret <2 x i64> %g 928} 929