1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4 5define <2 x i16> @vwmulsu_v2i16(ptr %x, ptr %y) { 6; CHECK-LABEL: vwmulsu_v2i16: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 9; CHECK-NEXT: vle8.v v9, (a0) 10; CHECK-NEXT: vle8.v v10, (a1) 11; CHECK-NEXT: vwmulsu.vv v8, v10, v9 12; CHECK-NEXT: ret 13 %a = load <2 x i8>, ptr %x 14 %b = load <2 x i8>, ptr %y 15 %c = zext <2 x i8> %a to <2 x i16> 16 %d = sext <2 x i8> %b to <2 x i16> 17 %e = mul <2 x i16> %c, %d 18 ret <2 x i16> %e 19} 20 21define <2 x i16> @vwmulsu_v2i16_swap(ptr %x, ptr %y) { 22; CHECK-LABEL: vwmulsu_v2i16_swap: 23; CHECK: # %bb.0: 24; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 25; CHECK-NEXT: vle8.v v9, (a0) 26; CHECK-NEXT: vle8.v v10, (a1) 27; CHECK-NEXT: vwmulsu.vv v8, v9, v10 28; CHECK-NEXT: ret 29 %a = load <2 x i8>, ptr %x 30 %b = load <2 x i8>, ptr %y 31 %c = sext <2 x i8> %a to <2 x i16> 32 %d = zext <2 x i8> %b to <2 x i16> 33 %e = mul <2 x i16> %c, %d 34 ret <2 x i16> %e 35} 36 37define <4 x i16> @vwmulsu_v4i16(ptr %x, ptr %y) { 38; CHECK-LABEL: vwmulsu_v4i16: 39; CHECK: # %bb.0: 40; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 41; CHECK-NEXT: vle8.v v9, (a0) 42; CHECK-NEXT: vle8.v v10, (a1) 43; CHECK-NEXT: vwmulsu.vv v8, v10, v9 44; CHECK-NEXT: ret 45 %a = load <4 x i8>, ptr %x 46 %b = load <4 x i8>, ptr %y 47 %c = zext <4 x i8> %a to <4 x i16> 48 %d = sext <4 x i8> %b to <4 x i16> 49 %e = mul <4 x i16> %c, %d 50 ret <4 x i16> %e 51} 52 53define <2 x i32> @vwmulsu_v2i32(ptr %x, ptr %y) { 54; CHECK-LABEL: vwmulsu_v2i32: 55; CHECK: # %bb.0: 56; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 57; CHECK-NEXT: vle16.v v9, (a0) 58; CHECK-NEXT: vle16.v v10, (a1) 59; CHECK-NEXT: vwmulsu.vv v8, v10, v9 60; CHECK-NEXT: ret 61 %a = load <2 x i16>, ptr %x 62 %b = load <2 x i16>, ptr %y 63 %c = zext <2 x i16> %a to <2 x i32> 64 %d = sext <2 x i16> %b to <2 x i32> 65 %e = mul <2 x i32> %c, %d 66 ret <2 x i32> %e 67} 68 69define <8 x i16> @vwmulsu_v8i16(ptr %x, ptr %y) { 70; CHECK-LABEL: vwmulsu_v8i16: 71; CHECK: # %bb.0: 72; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 73; CHECK-NEXT: vle8.v v9, (a0) 74; CHECK-NEXT: vle8.v v10, (a1) 75; CHECK-NEXT: vwmulsu.vv v8, v10, v9 76; CHECK-NEXT: ret 77 %a = load <8 x i8>, ptr %x 78 %b = load <8 x i8>, ptr %y 79 %c = zext <8 x i8> %a to <8 x i16> 80 %d = sext <8 x i8> %b to <8 x i16> 81 %e = mul <8 x i16> %c, %d 82 ret <8 x i16> %e 83} 84 85define <4 x i32> @vwmulsu_v4i32(ptr %x, ptr %y) { 86; CHECK-LABEL: vwmulsu_v4i32: 87; CHECK: # %bb.0: 88; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 89; CHECK-NEXT: vle16.v v9, (a0) 90; CHECK-NEXT: vle16.v v10, (a1) 91; CHECK-NEXT: vwmulsu.vv v8, v10, v9 92; CHECK-NEXT: ret 93 %a = load <4 x i16>, ptr %x 94 %b = load <4 x i16>, ptr %y 95 %c = zext <4 x i16> %a to <4 x i32> 96 %d = sext <4 x i16> %b to <4 x i32> 97 %e = mul <4 x i32> %c, %d 98 ret <4 x i32> %e 99} 100 101define <2 x i64> @vwmulsu_v2i64(ptr %x, ptr %y) { 102; CHECK-LABEL: vwmulsu_v2i64: 103; CHECK: # %bb.0: 104; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 105; CHECK-NEXT: vle32.v v9, (a0) 106; CHECK-NEXT: vle32.v v10, (a1) 107; CHECK-NEXT: vwmulsu.vv v8, v10, v9 108; CHECK-NEXT: ret 109 %a = load <2 x i32>, ptr %x 110 %b = load <2 x i32>, ptr %y 111 %c = zext <2 x i32> %a to <2 x i64> 112 %d = sext <2 x i32> %b to <2 x i64> 113 %e = mul <2 x i64> %c, %d 114 ret <2 x i64> %e 115} 116 117define <16 x i16> @vwmulsu_v16i16(ptr %x, ptr %y) { 118; CHECK-LABEL: vwmulsu_v16i16: 119; CHECK: # %bb.0: 120; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 121; CHECK-NEXT: vle8.v v10, (a0) 122; CHECK-NEXT: vle8.v v11, (a1) 123; CHECK-NEXT: vwmulsu.vv v8, v11, v10 124; CHECK-NEXT: ret 125 %a = load <16 x i8>, ptr %x 126 %b = load <16 x i8>, ptr %y 127 %c = zext <16 x i8> %a to <16 x i16> 128 %d = sext <16 x i8> %b to <16 x i16> 129 %e = mul <16 x i16> %c, %d 130 ret <16 x i16> %e 131} 132 133define <8 x i32> @vwmulsu_v8i32(ptr %x, ptr %y) { 134; CHECK-LABEL: vwmulsu_v8i32: 135; CHECK: # %bb.0: 136; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 137; CHECK-NEXT: vle16.v v10, (a0) 138; CHECK-NEXT: vle16.v v11, (a1) 139; CHECK-NEXT: vwmulsu.vv v8, v11, v10 140; CHECK-NEXT: ret 141 %a = load <8 x i16>, ptr %x 142 %b = load <8 x i16>, ptr %y 143 %c = zext <8 x i16> %a to <8 x i32> 144 %d = sext <8 x i16> %b to <8 x i32> 145 %e = mul <8 x i32> %c, %d 146 ret <8 x i32> %e 147} 148 149define <4 x i64> @vwmulsu_v4i64(ptr %x, ptr %y) { 150; CHECK-LABEL: vwmulsu_v4i64: 151; CHECK: # %bb.0: 152; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 153; CHECK-NEXT: vle32.v v10, (a0) 154; CHECK-NEXT: vle32.v v11, (a1) 155; CHECK-NEXT: vwmulsu.vv v8, v11, v10 156; CHECK-NEXT: ret 157 %a = load <4 x i32>, ptr %x 158 %b = load <4 x i32>, ptr %y 159 %c = zext <4 x i32> %a to <4 x i64> 160 %d = sext <4 x i32> %b to <4 x i64> 161 %e = mul <4 x i64> %c, %d 162 ret <4 x i64> %e 163} 164 165define <32 x i16> @vwmulsu_v32i16(ptr %x, ptr %y) { 166; CHECK-LABEL: vwmulsu_v32i16: 167; CHECK: # %bb.0: 168; CHECK-NEXT: li a2, 32 169; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma 170; CHECK-NEXT: vle8.v v12, (a0) 171; CHECK-NEXT: vle8.v v14, (a1) 172; CHECK-NEXT: vwmulsu.vv v8, v14, v12 173; CHECK-NEXT: ret 174 %a = load <32 x i8>, ptr %x 175 %b = load <32 x i8>, ptr %y 176 %c = zext <32 x i8> %a to <32 x i16> 177 %d = sext <32 x i8> %b to <32 x i16> 178 %e = mul <32 x i16> %c, %d 179 ret <32 x i16> %e 180} 181 182define <16 x i32> @vwmulsu_v16i32(ptr %x, ptr %y) { 183; CHECK-LABEL: vwmulsu_v16i32: 184; CHECK: # %bb.0: 185; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 186; CHECK-NEXT: vle16.v v12, (a0) 187; CHECK-NEXT: vle16.v v14, (a1) 188; CHECK-NEXT: vwmulsu.vv v8, v14, v12 189; CHECK-NEXT: ret 190 %a = load <16 x i16>, ptr %x 191 %b = load <16 x i16>, ptr %y 192 %c = zext <16 x i16> %a to <16 x i32> 193 %d = sext <16 x i16> %b to <16 x i32> 194 %e = mul <16 x i32> %c, %d 195 ret <16 x i32> %e 196} 197 198define <8 x i64> @vwmulsu_v8i64(ptr %x, ptr %y) { 199; CHECK-LABEL: vwmulsu_v8i64: 200; CHECK: # %bb.0: 201; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 202; CHECK-NEXT: vle32.v v12, (a0) 203; CHECK-NEXT: vle32.v v14, (a1) 204; CHECK-NEXT: vwmulsu.vv v8, v14, v12 205; CHECK-NEXT: ret 206 %a = load <8 x i32>, ptr %x 207 %b = load <8 x i32>, ptr %y 208 %c = zext <8 x i32> %a to <8 x i64> 209 %d = sext <8 x i32> %b to <8 x i64> 210 %e = mul <8 x i64> %c, %d 211 ret <8 x i64> %e 212} 213 214define <64 x i16> @vwmulsu_v64i16(ptr %x, ptr %y) { 215; CHECK-LABEL: vwmulsu_v64i16: 216; CHECK: # %bb.0: 217; CHECK-NEXT: li a2, 64 218; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma 219; CHECK-NEXT: vle8.v v16, (a0) 220; CHECK-NEXT: vle8.v v20, (a1) 221; CHECK-NEXT: vwmulsu.vv v8, v20, v16 222; CHECK-NEXT: ret 223 %a = load <64 x i8>, ptr %x 224 %b = load <64 x i8>, ptr %y 225 %c = zext <64 x i8> %a to <64 x i16> 226 %d = sext <64 x i8> %b to <64 x i16> 227 %e = mul <64 x i16> %c, %d 228 ret <64 x i16> %e 229} 230 231define <32 x i32> @vwmulsu_v32i32(ptr %x, ptr %y) { 232; CHECK-LABEL: vwmulsu_v32i32: 233; CHECK: # %bb.0: 234; CHECK-NEXT: li a2, 32 235; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 236; CHECK-NEXT: vle16.v v16, (a0) 237; CHECK-NEXT: vle16.v v20, (a1) 238; CHECK-NEXT: vwmulsu.vv v8, v20, v16 239; CHECK-NEXT: ret 240 %a = load <32 x i16>, ptr %x 241 %b = load <32 x i16>, ptr %y 242 %c = zext <32 x i16> %a to <32 x i32> 243 %d = sext <32 x i16> %b to <32 x i32> 244 %e = mul <32 x i32> %c, %d 245 ret <32 x i32> %e 246} 247 248define <16 x i64> @vwmulsu_v16i64(ptr %x, ptr %y) { 249; CHECK-LABEL: vwmulsu_v16i64: 250; CHECK: # %bb.0: 251; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 252; CHECK-NEXT: vle32.v v16, (a0) 253; CHECK-NEXT: vle32.v v20, (a1) 254; CHECK-NEXT: vwmulsu.vv v8, v20, v16 255; CHECK-NEXT: ret 256 %a = load <16 x i32>, ptr %x 257 %b = load <16 x i32>, ptr %y 258 %c = zext <16 x i32> %a to <16 x i64> 259 %d = sext <16 x i32> %b to <16 x i64> 260 %e = mul <16 x i64> %c, %d 261 ret <16 x i64> %e 262} 263 264define <128 x i16> @vwmulsu_v128i16(ptr %x, ptr %y) { 265; CHECK-LABEL: vwmulsu_v128i16: 266; CHECK: # %bb.0: 267; CHECK-NEXT: addi sp, sp, -16 268; CHECK-NEXT: .cfi_def_cfa_offset 16 269; CHECK-NEXT: csrr a2, vlenb 270; CHECK-NEXT: slli a2, a2, 4 271; CHECK-NEXT: sub sp, sp, a2 272; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 273; CHECK-NEXT: li a2, 128 274; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma 275; CHECK-NEXT: vle8.v v8, (a0) 276; CHECK-NEXT: addi a0, sp, 16 277; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 278; CHECK-NEXT: vle8.v v0, (a1) 279; CHECK-NEXT: li a0, 64 280; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 281; CHECK-NEXT: vslidedown.vx v16, v8, a0 282; CHECK-NEXT: vslidedown.vx v8, v0, a0 283; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma 284; CHECK-NEXT: vwmulsu.vv v24, v8, v16 285; CHECK-NEXT: csrr a0, vlenb 286; CHECK-NEXT: slli a0, a0, 3 287; CHECK-NEXT: add a0, sp, a0 288; CHECK-NEXT: addi a0, a0, 16 289; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 290; CHECK-NEXT: addi a0, sp, 16 291; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 292; CHECK-NEXT: vwmulsu.vv v8, v0, v16 293; CHECK-NEXT: csrr a0, vlenb 294; CHECK-NEXT: slli a0, a0, 3 295; CHECK-NEXT: add a0, sp, a0 296; CHECK-NEXT: addi a0, a0, 16 297; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 298; CHECK-NEXT: csrr a0, vlenb 299; CHECK-NEXT: slli a0, a0, 4 300; CHECK-NEXT: add sp, sp, a0 301; CHECK-NEXT: .cfi_def_cfa sp, 16 302; CHECK-NEXT: addi sp, sp, 16 303; CHECK-NEXT: .cfi_def_cfa_offset 0 304; CHECK-NEXT: ret 305 %a = load <128 x i8>, ptr %x 306 %b = load <128 x i8>, ptr %y 307 %c = zext <128 x i8> %a to <128 x i16> 308 %d = sext <128 x i8> %b to <128 x i16> 309 %e = mul <128 x i16> %c, %d 310 ret <128 x i16> %e 311} 312 313define <64 x i32> @vwmulsu_v64i32(ptr %x, ptr %y) { 314; CHECK-LABEL: vwmulsu_v64i32: 315; CHECK: # %bb.0: 316; CHECK-NEXT: addi sp, sp, -16 317; CHECK-NEXT: .cfi_def_cfa_offset 16 318; CHECK-NEXT: csrr a2, vlenb 319; CHECK-NEXT: slli a2, a2, 4 320; CHECK-NEXT: sub sp, sp, a2 321; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 322; CHECK-NEXT: li a2, 64 323; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma 324; CHECK-NEXT: vle16.v v8, (a0) 325; CHECK-NEXT: addi a0, sp, 16 326; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 327; CHECK-NEXT: vle16.v v0, (a1) 328; CHECK-NEXT: li a0, 32 329; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 330; CHECK-NEXT: vslidedown.vx v16, v8, a0 331; CHECK-NEXT: vslidedown.vx v8, v0, a0 332; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 333; CHECK-NEXT: vwmulsu.vv v24, v8, v16 334; CHECK-NEXT: csrr a0, vlenb 335; CHECK-NEXT: slli a0, a0, 3 336; CHECK-NEXT: add a0, sp, a0 337; CHECK-NEXT: addi a0, a0, 16 338; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 339; CHECK-NEXT: addi a0, sp, 16 340; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 341; CHECK-NEXT: vwmulsu.vv v8, v0, v16 342; CHECK-NEXT: csrr a0, vlenb 343; CHECK-NEXT: slli a0, a0, 3 344; CHECK-NEXT: add a0, sp, a0 345; CHECK-NEXT: addi a0, a0, 16 346; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 347; CHECK-NEXT: csrr a0, vlenb 348; CHECK-NEXT: slli a0, a0, 4 349; CHECK-NEXT: add sp, sp, a0 350; CHECK-NEXT: .cfi_def_cfa sp, 16 351; CHECK-NEXT: addi sp, sp, 16 352; CHECK-NEXT: .cfi_def_cfa_offset 0 353; CHECK-NEXT: ret 354 %a = load <64 x i16>, ptr %x 355 %b = load <64 x i16>, ptr %y 356 %c = zext <64 x i16> %a to <64 x i32> 357 %d = sext <64 x i16> %b to <64 x i32> 358 %e = mul <64 x i32> %c, %d 359 ret <64 x i32> %e 360} 361 362define <32 x i64> @vwmulsu_v32i64(ptr %x, ptr %y) { 363; CHECK-LABEL: vwmulsu_v32i64: 364; CHECK: # %bb.0: 365; CHECK-NEXT: addi sp, sp, -16 366; CHECK-NEXT: .cfi_def_cfa_offset 16 367; CHECK-NEXT: csrr a2, vlenb 368; CHECK-NEXT: slli a2, a2, 4 369; CHECK-NEXT: sub sp, sp, a2 370; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 371; CHECK-NEXT: li a2, 32 372; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma 373; CHECK-NEXT: vle32.v v8, (a0) 374; CHECK-NEXT: addi a0, sp, 16 375; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 376; CHECK-NEXT: vle32.v v0, (a1) 377; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma 378; CHECK-NEXT: vslidedown.vi v16, v8, 16 379; CHECK-NEXT: vslidedown.vi v8, v0, 16 380; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 381; CHECK-NEXT: vwmulsu.vv v24, v8, v16 382; CHECK-NEXT: csrr a0, vlenb 383; CHECK-NEXT: slli a0, a0, 3 384; CHECK-NEXT: add a0, sp, a0 385; CHECK-NEXT: addi a0, a0, 16 386; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 387; CHECK-NEXT: addi a0, sp, 16 388; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 389; CHECK-NEXT: vwmulsu.vv v8, v0, v16 390; CHECK-NEXT: csrr a0, vlenb 391; CHECK-NEXT: slli a0, a0, 3 392; CHECK-NEXT: add a0, sp, a0 393; CHECK-NEXT: addi a0, a0, 16 394; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 395; CHECK-NEXT: csrr a0, vlenb 396; CHECK-NEXT: slli a0, a0, 4 397; CHECK-NEXT: add sp, sp, a0 398; CHECK-NEXT: .cfi_def_cfa sp, 16 399; CHECK-NEXT: addi sp, sp, 16 400; CHECK-NEXT: .cfi_def_cfa_offset 0 401; CHECK-NEXT: ret 402 %a = load <32 x i32>, ptr %x 403 %b = load <32 x i32>, ptr %y 404 %c = zext <32 x i32> %a to <32 x i64> 405 %d = sext <32 x i32> %b to <32 x i64> 406 %e = mul <32 x i64> %c, %d 407 ret <32 x i64> %e 408} 409 410define <2 x i32> @vwmulsu_v2i32_v2i8(ptr %x, ptr %y) { 411; CHECK-LABEL: vwmulsu_v2i32_v2i8: 412; CHECK: # %bb.0: 413; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 414; CHECK-NEXT: vle8.v v8, (a0) 415; CHECK-NEXT: vle8.v v9, (a1) 416; CHECK-NEXT: vzext.vf2 v10, v8 417; CHECK-NEXT: vsext.vf2 v11, v9 418; CHECK-NEXT: vwmulsu.vv v8, v11, v10 419; CHECK-NEXT: ret 420 %a = load <2 x i8>, ptr %x 421 %b = load <2 x i8>, ptr %y 422 %c = zext <2 x i8> %a to <2 x i32> 423 %d = sext <2 x i8> %b to <2 x i32> 424 %e = mul <2 x i32> %c, %d 425 ret <2 x i32> %e 426} 427 428define <4 x i32> @vwmulsu_v4i32_v4i8_v4i16(ptr %x, ptr %y) { 429; CHECK-LABEL: vwmulsu_v4i32_v4i8_v4i16: 430; CHECK: # %bb.0: 431; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 432; CHECK-NEXT: vle8.v v8, (a0) 433; CHECK-NEXT: vle16.v v9, (a1) 434; CHECK-NEXT: vzext.vf2 v10, v8 435; CHECK-NEXT: vwmulsu.vv v8, v9, v10 436; CHECK-NEXT: ret 437 %a = load <4 x i8>, ptr %x 438 %b = load <4 x i16>, ptr %y 439 %c = zext <4 x i8> %a to <4 x i32> 440 %d = sext <4 x i16> %b to <4 x i32> 441 %e = mul <4 x i32> %c, %d 442 ret <4 x i32> %e 443} 444 445define <4 x i64> @vwmulsu_v4i64_v4i32_v4i8(ptr %x, ptr %y) { 446; CHECK-LABEL: vwmulsu_v4i64_v4i32_v4i8: 447; CHECK: # %bb.0: 448; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 449; CHECK-NEXT: vle8.v v8, (a1) 450; CHECK-NEXT: vle32.v v10, (a0) 451; CHECK-NEXT: vsext.vf4 v11, v8 452; CHECK-NEXT: vwmulsu.vv v8, v11, v10 453; CHECK-NEXT: ret 454 %a = load <4 x i32>, ptr %x 455 %b = load <4 x i8>, ptr %y 456 %c = zext <4 x i32> %a to <4 x i64> 457 %d = sext <4 x i8> %b to <4 x i64> 458 %e = mul <4 x i64> %c, %d 459 ret <4 x i64> %e 460} 461 462define <2 x i16> @vwmulsu_vx_v2i16(ptr %x, i8 %y) { 463; CHECK-LABEL: vwmulsu_vx_v2i16: 464; CHECK: # %bb.0: 465; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 466; CHECK-NEXT: vle8.v v9, (a0) 467; CHECK-NEXT: vwmulsu.vx v8, v9, a1 468; CHECK-NEXT: ret 469 %a = load <2 x i8>, ptr %x 470 %b = insertelement <2 x i8> poison, i8 %y, i32 0 471 %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer 472 %d = sext <2 x i8> %a to <2 x i16> 473 %e = zext <2 x i8> %c to <2 x i16> 474 %f = mul <2 x i16> %d, %e 475 ret <2 x i16> %f 476} 477 478define <2 x i16> @vwmulsu_vx_v2i16_swap(ptr %x, i8 %y) { 479; CHECK-LABEL: vwmulsu_vx_v2i16_swap: 480; CHECK: # %bb.0: 481; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 482; CHECK-NEXT: vle8.v v9, (a0) 483; CHECK-NEXT: vmv.v.x v10, a1 484; CHECK-NEXT: vwmulsu.vv v8, v10, v9 485; CHECK-NEXT: ret 486 %a = load <2 x i8>, ptr %x 487 %b = insertelement <2 x i8> poison, i8 %y, i32 0 488 %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer 489 %d = zext <2 x i8> %a to <2 x i16> 490 %e = sext <2 x i8> %c to <2 x i16> 491 %f = mul <2 x i16> %d, %e 492 ret <2 x i16> %f 493} 494 495define <4 x i16> @vwmulsu_vx_v4i16(ptr %x, i8 %y) { 496; CHECK-LABEL: vwmulsu_vx_v4i16: 497; CHECK: # %bb.0: 498; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 499; CHECK-NEXT: vle8.v v9, (a0) 500; CHECK-NEXT: vwmulsu.vx v8, v9, a1 501; CHECK-NEXT: ret 502 %a = load <4 x i8>, ptr %x 503 %b = insertelement <4 x i8> poison, i8 %y, i32 0 504 %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer 505 %d = sext <4 x i8> %a to <4 x i16> 506 %e = zext <4 x i8> %c to <4 x i16> 507 %f = mul <4 x i16> %d, %e 508 ret <4 x i16> %f 509} 510 511define <2 x i32> @vwmulsu_vx_v2i32(ptr %x, i16 %y) { 512; CHECK-LABEL: vwmulsu_vx_v2i32: 513; CHECK: # %bb.0: 514; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 515; CHECK-NEXT: vle16.v v9, (a0) 516; CHECK-NEXT: vwmulsu.vx v8, v9, a1 517; CHECK-NEXT: ret 518 %a = load <2 x i16>, ptr %x 519 %b = insertelement <2 x i16> poison, i16 %y, i32 0 520 %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer 521 %d = sext <2 x i16> %a to <2 x i32> 522 %e = zext <2 x i16> %c to <2 x i32> 523 %f = mul <2 x i32> %d, %e 524 ret <2 x i32> %f 525} 526 527define <8 x i16> @vwmulsu_vx_v8i16(ptr %x, i8 %y) { 528; CHECK-LABEL: vwmulsu_vx_v8i16: 529; CHECK: # %bb.0: 530; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 531; CHECK-NEXT: vle8.v v9, (a0) 532; CHECK-NEXT: vwmulsu.vx v8, v9, a1 533; CHECK-NEXT: ret 534 %a = load <8 x i8>, ptr %x 535 %b = insertelement <8 x i8> poison, i8 %y, i32 0 536 %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer 537 %d = sext <8 x i8> %a to <8 x i16> 538 %e = zext <8 x i8> %c to <8 x i16> 539 %f = mul <8 x i16> %d, %e 540 ret <8 x i16> %f 541} 542 543define <4 x i32> @vwmulsu_vx_v4i32(ptr %x, i16 %y) { 544; CHECK-LABEL: vwmulsu_vx_v4i32: 545; CHECK: # %bb.0: 546; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 547; CHECK-NEXT: vle16.v v9, (a0) 548; CHECK-NEXT: vwmulsu.vx v8, v9, a1 549; CHECK-NEXT: ret 550 %a = load <4 x i16>, ptr %x 551 %b = insertelement <4 x i16> poison, i16 %y, i32 0 552 %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer 553 %d = sext <4 x i16> %a to <4 x i32> 554 %e = zext <4 x i16> %c to <4 x i32> 555 %f = mul <4 x i32> %d, %e 556 ret <4 x i32> %f 557} 558 559define <2 x i64> @vwmulsu_vx_v2i64(ptr %x, i32 %y) { 560; CHECK-LABEL: vwmulsu_vx_v2i64: 561; CHECK: # %bb.0: 562; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 563; CHECK-NEXT: vle32.v v9, (a0) 564; CHECK-NEXT: vwmulsu.vx v8, v9, a1 565; CHECK-NEXT: ret 566 %a = load <2 x i32>, ptr %x 567 %b = insertelement <2 x i32> poison, i32 %y, i64 0 568 %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer 569 %d = sext <2 x i32> %a to <2 x i64> 570 %e = zext <2 x i32> %c to <2 x i64> 571 %f = mul <2 x i64> %d, %e 572 ret <2 x i64> %f 573} 574 575define <16 x i16> @vwmulsu_vx_v16i16(ptr %x, i8 %y) { 576; CHECK-LABEL: vwmulsu_vx_v16i16: 577; CHECK: # %bb.0: 578; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 579; CHECK-NEXT: vle8.v v10, (a0) 580; CHECK-NEXT: vwmulsu.vx v8, v10, a1 581; CHECK-NEXT: ret 582 %a = load <16 x i8>, ptr %x 583 %b = insertelement <16 x i8> poison, i8 %y, i32 0 584 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer 585 %d = sext <16 x i8> %a to <16 x i16> 586 %e = zext <16 x i8> %c to <16 x i16> 587 %f = mul <16 x i16> %d, %e 588 ret <16 x i16> %f 589} 590 591define <8 x i32> @vwmulsu_vx_v8i32(ptr %x, i16 %y) { 592; CHECK-LABEL: vwmulsu_vx_v8i32: 593; CHECK: # %bb.0: 594; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 595; CHECK-NEXT: vle16.v v10, (a0) 596; CHECK-NEXT: vwmulsu.vx v8, v10, a1 597; CHECK-NEXT: ret 598 %a = load <8 x i16>, ptr %x 599 %b = insertelement <8 x i16> poison, i16 %y, i32 0 600 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer 601 %d = sext <8 x i16> %a to <8 x i32> 602 %e = zext <8 x i16> %c to <8 x i32> 603 %f = mul <8 x i32> %d, %e 604 ret <8 x i32> %f 605} 606 607define <4 x i64> @vwmulsu_vx_v4i64(ptr %x, i32 %y) { 608; CHECK-LABEL: vwmulsu_vx_v4i64: 609; CHECK: # %bb.0: 610; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 611; CHECK-NEXT: vle32.v v10, (a0) 612; CHECK-NEXT: vwmulsu.vx v8, v10, a1 613; CHECK-NEXT: ret 614 %a = load <4 x i32>, ptr %x 615 %b = insertelement <4 x i32> poison, i32 %y, i64 0 616 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer 617 %d = sext <4 x i32> %a to <4 x i64> 618 %e = zext <4 x i32> %c to <4 x i64> 619 %f = mul <4 x i64> %d, %e 620 ret <4 x i64> %f 621} 622 623define <32 x i16> @vwmulsu_vx_v32i16(ptr %x, i8 %y) { 624; CHECK-LABEL: vwmulsu_vx_v32i16: 625; CHECK: # %bb.0: 626; CHECK-NEXT: li a2, 32 627; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma 628; CHECK-NEXT: vle8.v v12, (a0) 629; CHECK-NEXT: vwmulsu.vx v8, v12, a1 630; CHECK-NEXT: ret 631 %a = load <32 x i8>, ptr %x 632 %b = insertelement <32 x i8> poison, i8 %y, i32 0 633 %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer 634 %d = sext <32 x i8> %a to <32 x i16> 635 %e = zext <32 x i8> %c to <32 x i16> 636 %f = mul <32 x i16> %d, %e 637 ret <32 x i16> %f 638} 639 640define <16 x i32> @vwmulsu_vx_v16i32(ptr %x, i16 %y) { 641; CHECK-LABEL: vwmulsu_vx_v16i32: 642; CHECK: # %bb.0: 643; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 644; CHECK-NEXT: vle16.v v12, (a0) 645; CHECK-NEXT: vwmulsu.vx v8, v12, a1 646; CHECK-NEXT: ret 647 %a = load <16 x i16>, ptr %x 648 %b = insertelement <16 x i16> poison, i16 %y, i32 0 649 %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer 650 %d = sext <16 x i16> %a to <16 x i32> 651 %e = zext <16 x i16> %c to <16 x i32> 652 %f = mul <16 x i32> %d, %e 653 ret <16 x i32> %f 654} 655 656define <8 x i64> @vwmulsu_vx_v8i64(ptr %x, i32 %y) { 657; CHECK-LABEL: vwmulsu_vx_v8i64: 658; CHECK: # %bb.0: 659; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 660; CHECK-NEXT: vle32.v v12, (a0) 661; CHECK-NEXT: vwmulsu.vx v8, v12, a1 662; CHECK-NEXT: ret 663 %a = load <8 x i32>, ptr %x 664 %b = insertelement <8 x i32> poison, i32 %y, i64 0 665 %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer 666 %d = sext <8 x i32> %a to <8 x i64> 667 %e = zext <8 x i32> %c to <8 x i64> 668 %f = mul <8 x i64> %d, %e 669 ret <8 x i64> %f 670} 671 672define <64 x i16> @vwmulsu_vx_v64i16(ptr %x, i8 %y) { 673; CHECK-LABEL: vwmulsu_vx_v64i16: 674; CHECK: # %bb.0: 675; CHECK-NEXT: li a2, 64 676; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma 677; CHECK-NEXT: vle8.v v16, (a0) 678; CHECK-NEXT: vwmulsu.vx v8, v16, a1 679; CHECK-NEXT: ret 680 %a = load <64 x i8>, ptr %x 681 %b = insertelement <64 x i8> poison, i8 %y, i32 0 682 %c = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer 683 %d = sext <64 x i8> %a to <64 x i16> 684 %e = zext <64 x i8> %c to <64 x i16> 685 %f = mul <64 x i16> %d, %e 686 ret <64 x i16> %f 687} 688 689define <32 x i32> @vwmulsu_vx_v32i32(ptr %x, i16 %y) { 690; CHECK-LABEL: vwmulsu_vx_v32i32: 691; CHECK: # %bb.0: 692; CHECK-NEXT: li a2, 32 693; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 694; CHECK-NEXT: vle16.v v16, (a0) 695; CHECK-NEXT: vwmulsu.vx v8, v16, a1 696; CHECK-NEXT: ret 697 %a = load <32 x i16>, ptr %x 698 %b = insertelement <32 x i16> poison, i16 %y, i32 0 699 %c = shufflevector <32 x i16> %b, <32 x i16> poison, <32 x i32> zeroinitializer 700 %d = sext <32 x i16> %a to <32 x i32> 701 %e = zext <32 x i16> %c to <32 x i32> 702 %f = mul <32 x i32> %d, %e 703 ret <32 x i32> %f 704} 705 706define <16 x i64> @vwmulsu_vx_v16i64(ptr %x, i32 %y) { 707; CHECK-LABEL: vwmulsu_vx_v16i64: 708; CHECK: # %bb.0: 709; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 710; CHECK-NEXT: vle32.v v16, (a0) 711; CHECK-NEXT: vwmulsu.vx v8, v16, a1 712; CHECK-NEXT: ret 713 %a = load <16 x i32>, ptr %x 714 %b = insertelement <16 x i32> poison, i32 %y, i64 0 715 %c = shufflevector <16 x i32> %b, <16 x i32> poison, <16 x i32> zeroinitializer 716 %d = sext <16 x i32> %a to <16 x i64> 717 %e = zext <16 x i32> %c to <16 x i64> 718 %f = mul <16 x i64> %d, %e 719 ret <16 x i64> %f 720} 721 722define <8 x i16> @vwmulsu_vx_v8i16_i8(ptr %x, ptr %y) { 723; CHECK-LABEL: vwmulsu_vx_v8i16_i8: 724; CHECK: # %bb.0: 725; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 726; CHECK-NEXT: vle8.v v9, (a0) 727; CHECK-NEXT: lbu a0, 0(a1) 728; CHECK-NEXT: vwmulsu.vx v8, v9, a0 729; CHECK-NEXT: ret 730 %a = load <8 x i8>, ptr %x 731 %b = load i8, ptr %y 732 %c = zext i8 %b to i16 733 %d = insertelement <8 x i16> poison, i16 %c, i32 0 734 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer 735 %f = sext <8 x i8> %a to <8 x i16> 736 %g = mul <8 x i16> %e, %f 737 ret <8 x i16> %g 738} 739 740define <8 x i16> @vwmulsu_vx_v8i16_i8_swap(ptr %x, ptr %y) { 741; CHECK-LABEL: vwmulsu_vx_v8i16_i8_swap: 742; CHECK: # %bb.0: 743; CHECK-NEXT: lb a1, 0(a1) 744; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 745; CHECK-NEXT: vle8.v v9, (a0) 746; CHECK-NEXT: vmv.v.x v10, a1 747; CHECK-NEXT: vwmulsu.vv v8, v10, v9 748; CHECK-NEXT: ret 749 %a = load <8 x i8>, ptr %x 750 %b = load i8, ptr %y 751 %c = sext i8 %b to i16 752 %d = insertelement <8 x i16> poison, i16 %c, i32 0 753 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer 754 %f = zext <8 x i8> %a to <8 x i16> 755 %g = mul <8 x i16> %e, %f 756 ret <8 x i16> %g 757} 758 759define <4 x i32> @vwmulsu_vx_v4i32_i8(ptr %x, ptr %y) { 760; CHECK-LABEL: vwmulsu_vx_v4i32_i8: 761; CHECK: # %bb.0: 762; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 763; CHECK-NEXT: vle16.v v9, (a0) 764; CHECK-NEXT: lbu a0, 0(a1) 765; CHECK-NEXT: vwmul.vx v8, v9, a0 766; CHECK-NEXT: ret 767 %a = load <4 x i16>, ptr %x 768 %b = load i8, ptr %y 769 %c = zext i8 %b to i32 770 %d = insertelement <4 x i32> poison, i32 %c, i32 0 771 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer 772 %f = sext <4 x i16> %a to <4 x i32> 773 %g = mul <4 x i32> %e, %f 774 ret <4 x i32> %g 775} 776 777define <4 x i32> @vwmulsu_vx_v4i32_i16(ptr %x, ptr %y) { 778; CHECK-LABEL: vwmulsu_vx_v4i32_i16: 779; CHECK: # %bb.0: 780; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 781; CHECK-NEXT: vle16.v v9, (a0) 782; CHECK-NEXT: lhu a0, 0(a1) 783; CHECK-NEXT: vwmulsu.vx v8, v9, a0 784; CHECK-NEXT: ret 785 %a = load <4 x i16>, ptr %x 786 %b = load i16, ptr %y 787 %c = zext i16 %b to i32 788 %d = insertelement <4 x i32> poison, i32 %c, i32 0 789 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer 790 %f = sext <4 x i16> %a to <4 x i32> 791 %g = mul <4 x i32> %e, %f 792 ret <4 x i32> %g 793} 794 795define <2 x i64> @vwmulsu_vx_v2i64_i8(ptr %x, ptr %y) { 796; RV32-LABEL: vwmulsu_vx_v2i64_i8: 797; RV32: # %bb.0: 798; RV32-NEXT: addi sp, sp, -16 799; RV32-NEXT: .cfi_def_cfa_offset 16 800; RV32-NEXT: lbu a1, 0(a1) 801; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 802; RV32-NEXT: vle32.v v8, (a0) 803; RV32-NEXT: sw a1, 8(sp) 804; RV32-NEXT: sw zero, 12(sp) 805; RV32-NEXT: addi a0, sp, 8 806; RV32-NEXT: vlse64.v v9, (a0), zero 807; RV32-NEXT: vsext.vf2 v10, v8 808; RV32-NEXT: vmul.vv v8, v9, v10 809; RV32-NEXT: addi sp, sp, 16 810; RV32-NEXT: .cfi_def_cfa_offset 0 811; RV32-NEXT: ret 812; 813; RV64-LABEL: vwmulsu_vx_v2i64_i8: 814; RV64: # %bb.0: 815; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 816; RV64-NEXT: vle32.v v9, (a0) 817; RV64-NEXT: lbu a0, 0(a1) 818; RV64-NEXT: vwmul.vx v8, v9, a0 819; RV64-NEXT: ret 820 %a = load <2 x i32>, ptr %x 821 %b = load i8, ptr %y 822 %c = zext i8 %b to i64 823 %d = insertelement <2 x i64> poison, i64 %c, i64 0 824 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer 825 %f = sext <2 x i32> %a to <2 x i64> 826 %g = mul <2 x i64> %e, %f 827 ret <2 x i64> %g 828} 829 830define <2 x i64> @vwmulsu_vx_v2i64_i16(ptr %x, ptr %y) { 831; RV32-LABEL: vwmulsu_vx_v2i64_i16: 832; RV32: # %bb.0: 833; RV32-NEXT: addi sp, sp, -16 834; RV32-NEXT: .cfi_def_cfa_offset 16 835; RV32-NEXT: lhu a1, 0(a1) 836; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 837; RV32-NEXT: vle32.v v8, (a0) 838; RV32-NEXT: sw a1, 8(sp) 839; RV32-NEXT: sw zero, 12(sp) 840; RV32-NEXT: addi a0, sp, 8 841; RV32-NEXT: vlse64.v v9, (a0), zero 842; RV32-NEXT: vsext.vf2 v10, v8 843; RV32-NEXT: vmul.vv v8, v9, v10 844; RV32-NEXT: addi sp, sp, 16 845; RV32-NEXT: .cfi_def_cfa_offset 0 846; RV32-NEXT: ret 847; 848; RV64-LABEL: vwmulsu_vx_v2i64_i16: 849; RV64: # %bb.0: 850; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 851; RV64-NEXT: vle32.v v9, (a0) 852; RV64-NEXT: lhu a0, 0(a1) 853; RV64-NEXT: vwmul.vx v8, v9, a0 854; RV64-NEXT: ret 855 %a = load <2 x i32>, ptr %x 856 %b = load i16, ptr %y 857 %c = zext i16 %b to i64 858 %d = insertelement <2 x i64> poison, i64 %c, i64 0 859 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer 860 %f = sext <2 x i32> %a to <2 x i64> 861 %g = mul <2 x i64> %e, %f 862 ret <2 x i64> %g 863} 864 865define <2 x i64> @vwmulsu_vx_v2i64_i32(ptr %x, ptr %y) { 866; RV32-LABEL: vwmulsu_vx_v2i64_i32: 867; RV32: # %bb.0: 868; RV32-NEXT: addi sp, sp, -16 869; RV32-NEXT: .cfi_def_cfa_offset 16 870; RV32-NEXT: lw a1, 0(a1) 871; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 872; RV32-NEXT: vle32.v v8, (a0) 873; RV32-NEXT: sw a1, 8(sp) 874; RV32-NEXT: sw zero, 12(sp) 875; RV32-NEXT: addi a0, sp, 8 876; RV32-NEXT: vlse64.v v9, (a0), zero 877; RV32-NEXT: vsext.vf2 v10, v8 878; RV32-NEXT: vmul.vv v8, v9, v10 879; RV32-NEXT: addi sp, sp, 16 880; RV32-NEXT: .cfi_def_cfa_offset 0 881; RV32-NEXT: ret 882; 883; RV64-LABEL: vwmulsu_vx_v2i64_i32: 884; RV64: # %bb.0: 885; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 886; RV64-NEXT: vle32.v v9, (a0) 887; RV64-NEXT: lwu a0, 0(a1) 888; RV64-NEXT: vwmulsu.vx v8, v9, a0 889; RV64-NEXT: ret 890 %a = load <2 x i32>, ptr %x 891 %b = load i32, ptr %y 892 %c = zext i32 %b to i64 893 %d = insertelement <2 x i64> poison, i64 %c, i64 0 894 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer 895 %f = sext <2 x i32> %a to <2 x i64> 896 %g = mul <2 x i64> %e, %f 897 ret <2 x i64> %g 898} 899 900define <8 x i16> @vwmulsu_vx_v8i16_i8_and(ptr %x, i16 %y) { 901; CHECK-LABEL: vwmulsu_vx_v8i16_i8_and: 902; CHECK: # %bb.0: 903; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 904; CHECK-NEXT: vle8.v v9, (a0) 905; CHECK-NEXT: vwmulsu.vx v8, v9, a1 906; CHECK-NEXT: ret 907 %a = load <8 x i8>, ptr %x 908 %b = and i16 %y, 255 909 %c = insertelement <8 x i16> poison, i16 %b, i32 0 910 %d = shufflevector <8 x i16> %c, <8 x i16> poison, <8 x i32> zeroinitializer 911 %e = sext <8 x i8> %a to <8 x i16> 912 %f = mul <8 x i16> %d, %e 913 ret <8 x i16> %f 914} 915 916define <8 x i16> @vwmulsu_vx_v8i16_i8_and1(ptr %x, i16 %y) { 917; CHECK-LABEL: vwmulsu_vx_v8i16_i8_and1: 918; CHECK: # %bb.0: 919; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 920; CHECK-NEXT: vle8.v v9, (a0) 921; CHECK-NEXT: andi a0, a1, 254 922; CHECK-NEXT: vwmulsu.vx v8, v9, a0 923; CHECK-NEXT: ret 924 %a = load <8 x i8>, ptr %x 925 %b = and i16 %y, 254 926 %c = insertelement <8 x i16> poison, i16 %b, i32 0 927 %d = shufflevector <8 x i16> %c, <8 x i16> poison, <8 x i32> zeroinitializer 928 %e = sext <8 x i8> %a to <8 x i16> 929 %f = mul <8 x i16> %d, %e 930 ret <8 x i16> %f 931} 932 933define <4 x i32> @vwmulsu_vx_v4i32_i16_and(ptr %x, i32 %y) { 934; CHECK-LABEL: vwmulsu_vx_v4i32_i16_and: 935; CHECK: # %bb.0: 936; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 937; CHECK-NEXT: vle16.v v9, (a0) 938; CHECK-NEXT: vwmulsu.vx v8, v9, a1 939; CHECK-NEXT: ret 940 %a = load <4 x i16>, ptr %x 941 %b = and i32 %y, 65535 942 %c = insertelement <4 x i32> poison, i32 %b, i32 0 943 %d = shufflevector <4 x i32> %c, <4 x i32> poison, <4 x i32> zeroinitializer 944 %e = sext <4 x i16> %a to <4 x i32> 945 %f = mul <4 x i32> %d, %e 946 ret <4 x i32> %f 947} 948 949define <4 x i32> @vwmulsu_vx_v4i32_i16_zext(ptr %x, i16 %y) { 950; CHECK-LABEL: vwmulsu_vx_v4i32_i16_zext: 951; CHECK: # %bb.0: 952; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 953; CHECK-NEXT: vle16.v v9, (a0) 954; CHECK-NEXT: vwmulsu.vx v8, v9, a1 955; CHECK-NEXT: ret 956 %a = load <4 x i16>, ptr %x 957 %b = zext i16 %y to i32 958 %c = insertelement <4 x i32> poison, i32 %b, i32 0 959 %d = shufflevector <4 x i32> %c, <4 x i32> poison, <4 x i32> zeroinitializer 960 %e = sext <4 x i16> %a to <4 x i32> 961 %f = mul <4 x i32> %d, %e 962 ret <4 x i32> %f 963} 964