1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s 3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s 4 5define <2 x i16> @vwmaccsu_v2i16(ptr %x, ptr %y, <2 x i16> %z) { 6; CHECK-LABEL: vwmaccsu_v2i16: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 9; CHECK-NEXT: vle8.v v9, (a0) 10; CHECK-NEXT: vle8.v v10, (a1) 11; CHECK-NEXT: vwmaccsu.vv v8, v9, v10 12; CHECK-NEXT: ret 13 %a = load <2 x i8>, ptr %x 14 %b = load <2 x i8>, ptr %y 15 %c = sext <2 x i8> %a to <2 x i16> 16 %d = zext <2 x i8> %b to <2 x i16> 17 %e = mul <2 x i16> %c, %d 18 %f = add <2 x i16> %e, %z 19 ret <2 x i16> %f 20} 21 22define <4 x i16> @vwmaccsu_v4i16(ptr %x, ptr %y, <4 x i16> %z) { 23; CHECK-LABEL: vwmaccsu_v4i16: 24; CHECK: # %bb.0: 25; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 26; CHECK-NEXT: vle8.v v9, (a0) 27; CHECK-NEXT: vle8.v v10, (a1) 28; CHECK-NEXT: vwmaccsu.vv v8, v9, v10 29; CHECK-NEXT: ret 30 %a = load <4 x i8>, ptr %x 31 %b = load <4 x i8>, ptr %y 32 %c = sext <4 x i8> %a to <4 x i16> 33 %d = zext <4 x i8> %b to <4 x i16> 34 %e = mul <4 x i16> %c, %d 35 %f = add <4 x i16> %e, %z 36 ret <4 x i16> %f 37} 38 39define <2 x i32> @vwmaccsu_v2i32(ptr %x, ptr %y, <2 x i32> %z) { 40; CHECK-LABEL: vwmaccsu_v2i32: 41; CHECK: # %bb.0: 42; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 43; CHECK-NEXT: vle16.v v9, (a0) 44; CHECK-NEXT: vle16.v v10, (a1) 45; CHECK-NEXT: vwmaccsu.vv v8, v9, v10 46; CHECK-NEXT: ret 47 %a = load <2 x i16>, ptr %x 48 %b = load <2 x i16>, ptr %y 49 %c = sext <2 x i16> %a to <2 x i32> 50 %d = zext <2 x i16> %b to <2 x i32> 51 %e = mul <2 x i32> %c, %d 52 %f = add <2 x i32> %e, %z 53 ret <2 x i32> %f 54} 55 56define <8 x i16> @vwmaccsu_v8i16(ptr %x, ptr %y, <8 x i16> %z) { 57; CHECK-LABEL: vwmaccsu_v8i16: 58; CHECK: # %bb.0: 59; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 60; CHECK-NEXT: vle8.v v9, (a0) 61; CHECK-NEXT: vle8.v v10, (a1) 62; CHECK-NEXT: vwmaccsu.vv v8, v9, v10 63; CHECK-NEXT: ret 64 %a = load <8 x i8>, ptr %x 65 %b = load <8 x i8>, ptr %y 66 %c = sext <8 x i8> %a to <8 x i16> 67 %d = zext <8 x i8> %b to <8 x i16> 68 %e = mul <8 x i16> %c, %d 69 %f = add <8 x i16> %e, %z 70 ret <8 x i16> %f 71} 72 73define <4 x i32> @vwmaccsu_v4i32(ptr %x, ptr %y, <4 x i32> %z) { 74; CHECK-LABEL: vwmaccsu_v4i32: 75; CHECK: # %bb.0: 76; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 77; CHECK-NEXT: vle16.v v9, (a0) 78; CHECK-NEXT: vle16.v v10, (a1) 79; CHECK-NEXT: vwmaccsu.vv v8, v9, v10 80; CHECK-NEXT: ret 81 %a = load <4 x i16>, ptr %x 82 %b = load <4 x i16>, ptr %y 83 %c = sext <4 x i16> %a to <4 x i32> 84 %d = zext <4 x i16> %b to <4 x i32> 85 %e = mul <4 x i32> %c, %d 86 %f = add <4 x i32> %e, %z 87 ret <4 x i32> %f 88} 89 90define <2 x i64> @vwmaccsu_v2i64(ptr %x, ptr %y, <2 x i64> %z) { 91; CHECK-LABEL: vwmaccsu_v2i64: 92; CHECK: # %bb.0: 93; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 94; CHECK-NEXT: vle32.v v9, (a0) 95; CHECK-NEXT: vle32.v v10, (a1) 96; CHECK-NEXT: vwmaccsu.vv v8, v9, v10 97; CHECK-NEXT: ret 98 %a = load <2 x i32>, ptr %x 99 %b = load <2 x i32>, ptr %y 100 %c = sext <2 x i32> %a to <2 x i64> 101 %d = zext <2 x i32> %b to <2 x i64> 102 %e = mul <2 x i64> %c, %d 103 %f = add <2 x i64> %e, %z 104 ret <2 x i64> %f 105} 106 107define <16 x i16> @vwmaccsu_v16i16(ptr %x, ptr %y, <16 x i16> %z) { 108; CHECK-LABEL: vwmaccsu_v16i16: 109; CHECK: # %bb.0: 110; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 111; CHECK-NEXT: vle8.v v10, (a0) 112; CHECK-NEXT: vle8.v v11, (a1) 113; CHECK-NEXT: vwmaccsu.vv v8, v10, v11 114; CHECK-NEXT: ret 115 %a = load <16 x i8>, ptr %x 116 %b = load <16 x i8>, ptr %y 117 %c = sext <16 x i8> %a to <16 x i16> 118 %d = zext <16 x i8> %b to <16 x i16> 119 %e = mul <16 x i16> %c, %d 120 %f = add <16 x i16> %e, %z 121 ret <16 x i16> %f 122} 123 124define <8 x i32> @vwmaccsu_v8i32(ptr %x, ptr %y, <8 x i32> %z) { 125; CHECK-LABEL: vwmaccsu_v8i32: 126; CHECK: # %bb.0: 127; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 128; CHECK-NEXT: vle16.v v10, (a0) 129; CHECK-NEXT: vle16.v v11, (a1) 130; CHECK-NEXT: vwmaccsu.vv v8, v10, v11 131; CHECK-NEXT: ret 132 %a = load <8 x i16>, ptr %x 133 %b = load <8 x i16>, ptr %y 134 %c = sext <8 x i16> %a to <8 x i32> 135 %d = zext <8 x i16> %b to <8 x i32> 136 %e = mul <8 x i32> %c, %d 137 %f = add <8 x i32> %e, %z 138 ret <8 x i32> %f 139} 140 141define <4 x i64> @vwmaccsu_v4i64(ptr %x, ptr %y, <4 x i64> %z) { 142; CHECK-LABEL: vwmaccsu_v4i64: 143; CHECK: # %bb.0: 144; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 145; CHECK-NEXT: vle32.v v10, (a0) 146; CHECK-NEXT: vle32.v v11, (a1) 147; CHECK-NEXT: vwmaccsu.vv v8, v10, v11 148; CHECK-NEXT: ret 149 %a = load <4 x i32>, ptr %x 150 %b = load <4 x i32>, ptr %y 151 %c = sext <4 x i32> %a to <4 x i64> 152 %d = zext <4 x i32> %b to <4 x i64> 153 %e = mul <4 x i64> %c, %d 154 %f = add <4 x i64> %e, %z 155 ret <4 x i64> %f 156} 157 158define <32 x i16> @vwmaccsu_v32i16(ptr %x, ptr %y, <32 x i16> %z) { 159; CHECK-LABEL: vwmaccsu_v32i16: 160; CHECK: # %bb.0: 161; CHECK-NEXT: li a2, 32 162; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma 163; CHECK-NEXT: vle8.v v12, (a0) 164; CHECK-NEXT: vle8.v v14, (a1) 165; CHECK-NEXT: vwmaccsu.vv v8, v12, v14 166; CHECK-NEXT: ret 167 %a = load <32 x i8>, ptr %x 168 %b = load <32 x i8>, ptr %y 169 %c = sext <32 x i8> %a to <32 x i16> 170 %d = zext <32 x i8> %b to <32 x i16> 171 %e = mul <32 x i16> %c, %d 172 %f = add <32 x i16> %e, %z 173 ret <32 x i16> %f 174} 175 176define <16 x i32> @vwmaccsu_v16i32(ptr %x, ptr %y, <16 x i32> %z) { 177; CHECK-LABEL: vwmaccsu_v16i32: 178; CHECK: # %bb.0: 179; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 180; CHECK-NEXT: vle16.v v12, (a0) 181; CHECK-NEXT: vle16.v v14, (a1) 182; CHECK-NEXT: vwmaccsu.vv v8, v12, v14 183; CHECK-NEXT: ret 184 %a = load <16 x i16>, ptr %x 185 %b = load <16 x i16>, ptr %y 186 %c = sext <16 x i16> %a to <16 x i32> 187 %d = zext <16 x i16> %b to <16 x i32> 188 %e = mul <16 x i32> %c, %d 189 %f = add <16 x i32> %e, %z 190 ret <16 x i32> %f 191} 192 193define <8 x i64> @vwmaccsu_v8i64(ptr %x, ptr %y, <8 x i64> %z) { 194; CHECK-LABEL: vwmaccsu_v8i64: 195; CHECK: # %bb.0: 196; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 197; CHECK-NEXT: vle32.v v12, (a0) 198; CHECK-NEXT: vle32.v v14, (a1) 199; CHECK-NEXT: vwmaccsu.vv v8, v12, v14 200; CHECK-NEXT: ret 201 %a = load <8 x i32>, ptr %x 202 %b = load <8 x i32>, ptr %y 203 %c = sext <8 x i32> %a to <8 x i64> 204 %d = zext <8 x i32> %b to <8 x i64> 205 %e = mul <8 x i64> %c, %d 206 %f = add <8 x i64> %e, %z 207 ret <8 x i64> %f 208} 209 210define <64 x i16> @vwmaccsu_v64i16(ptr %x, ptr %y, <64 x i16> %z) { 211; CHECK-LABEL: vwmaccsu_v64i16: 212; CHECK: # %bb.0: 213; CHECK-NEXT: li a2, 64 214; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma 215; CHECK-NEXT: vle8.v v16, (a0) 216; CHECK-NEXT: vle8.v v20, (a1) 217; CHECK-NEXT: vwmaccsu.vv v8, v16, v20 218; CHECK-NEXT: ret 219 %a = load <64 x i8>, ptr %x 220 %b = load <64 x i8>, ptr %y 221 %c = sext <64 x i8> %a to <64 x i16> 222 %d = zext <64 x i8> %b to <64 x i16> 223 %e = mul <64 x i16> %c, %d 224 %f = add <64 x i16> %e, %z 225 ret <64 x i16> %f 226} 227 228define <32 x i32> @vwmaccsu_v32i32(ptr %x, ptr %y, <32 x i32> %z) { 229; CHECK-LABEL: vwmaccsu_v32i32: 230; CHECK: # %bb.0: 231; CHECK-NEXT: li a2, 32 232; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 233; CHECK-NEXT: vle16.v v16, (a0) 234; CHECK-NEXT: vle16.v v20, (a1) 235; CHECK-NEXT: vwmaccsu.vv v8, v16, v20 236; CHECK-NEXT: ret 237 %a = load <32 x i16>, ptr %x 238 %b = load <32 x i16>, ptr %y 239 %c = sext <32 x i16> %a to <32 x i32> 240 %d = zext <32 x i16> %b to <32 x i32> 241 %e = mul <32 x i32> %c, %d 242 %f = add <32 x i32> %e, %z 243 ret <32 x i32> %f 244} 245 246define <16 x i64> @vwmaccsu_v16i64(ptr %x, ptr %y, <16 x i64> %z) { 247; CHECK-LABEL: vwmaccsu_v16i64: 248; CHECK: # %bb.0: 249; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 250; CHECK-NEXT: vle32.v v16, (a0) 251; CHECK-NEXT: vle32.v v20, (a1) 252; CHECK-NEXT: vwmaccsu.vv v8, v16, v20 253; CHECK-NEXT: ret 254 %a = load <16 x i32>, ptr %x 255 %b = load <16 x i32>, ptr %y 256 %c = sext <16 x i32> %a to <16 x i64> 257 %d = zext <16 x i32> %b to <16 x i64> 258 %e = mul <16 x i64> %c, %d 259 %f = add <16 x i64> %e, %z 260 ret <16 x i64> %f 261} 262 263define <2 x i16> @vwmaccsu_vx_v2i16(ptr %x, i8 %y, <2 x i16> %z) { 264; CHECK-LABEL: vwmaccsu_vx_v2i16: 265; CHECK: # %bb.0: 266; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 267; CHECK-NEXT: vle8.v v9, (a0) 268; CHECK-NEXT: vwmaccsu.vx v8, a1, v9 269; CHECK-NEXT: ret 270 %a = load <2 x i8>, ptr %x 271 %b = insertelement <2 x i8> poison, i8 %y, i32 0 272 %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer 273 %d = zext <2 x i8> %a to <2 x i16> 274 %e = sext <2 x i8> %c to <2 x i16> 275 %f = mul <2 x i16> %d, %e 276 %g = add <2 x i16> %f, %z 277 ret <2 x i16> %g 278} 279 280define <4 x i16> @vwmaccsu_vx_v4i16(ptr %x, i8 %y, <4 x i16> %z) { 281; CHECK-LABEL: vwmaccsu_vx_v4i16: 282; CHECK: # %bb.0: 283; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 284; CHECK-NEXT: vle8.v v9, (a0) 285; CHECK-NEXT: vwmaccsu.vx v8, a1, v9 286; CHECK-NEXT: ret 287 %a = load <4 x i8>, ptr %x 288 %b = insertelement <4 x i8> poison, i8 %y, i32 0 289 %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer 290 %d = zext <4 x i8> %a to <4 x i16> 291 %e = sext <4 x i8> %c to <4 x i16> 292 %f = mul <4 x i16> %d, %e 293 %g = add <4 x i16> %f, %z 294 ret <4 x i16> %g 295} 296 297define <2 x i32> @vwmaccsu_vx_v2i32(ptr %x, i16 %y, <2 x i32> %z) { 298; CHECK-LABEL: vwmaccsu_vx_v2i32: 299; CHECK: # %bb.0: 300; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 301; CHECK-NEXT: vle16.v v9, (a0) 302; CHECK-NEXT: vwmaccsu.vx v8, a1, v9 303; CHECK-NEXT: ret 304 %a = load <2 x i16>, ptr %x 305 %b = insertelement <2 x i16> poison, i16 %y, i32 0 306 %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer 307 %d = zext <2 x i16> %a to <2 x i32> 308 %e = sext <2 x i16> %c to <2 x i32> 309 %f = mul <2 x i32> %d, %e 310 %g = add <2 x i32> %f, %z 311 ret <2 x i32> %g 312} 313 314define <8 x i16> @vwmaccsu_vx_v8i16(ptr %x, i8 %y, <8 x i16> %z) { 315; CHECK-LABEL: vwmaccsu_vx_v8i16: 316; CHECK: # %bb.0: 317; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 318; CHECK-NEXT: vle8.v v9, (a0) 319; CHECK-NEXT: vwmaccsu.vx v8, a1, v9 320; CHECK-NEXT: ret 321 %a = load <8 x i8>, ptr %x 322 %b = insertelement <8 x i8> poison, i8 %y, i32 0 323 %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer 324 %d = zext <8 x i8> %a to <8 x i16> 325 %e = sext <8 x i8> %c to <8 x i16> 326 %f = mul <8 x i16> %d, %e 327 %g = add <8 x i16> %f, %z 328 ret <8 x i16> %g 329} 330 331define <4 x i32> @vwmaccsu_vx_v4i32(ptr %x, i16 %y, <4 x i32> %z) { 332; CHECK-LABEL: vwmaccsu_vx_v4i32: 333; CHECK: # %bb.0: 334; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 335; CHECK-NEXT: vle16.v v9, (a0) 336; CHECK-NEXT: vwmaccsu.vx v8, a1, v9 337; CHECK-NEXT: ret 338 %a = load <4 x i16>, ptr %x 339 %b = insertelement <4 x i16> poison, i16 %y, i32 0 340 %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer 341 %d = zext <4 x i16> %a to <4 x i32> 342 %e = sext <4 x i16> %c to <4 x i32> 343 %f = mul <4 x i32> %d, %e 344 %g = add <4 x i32> %f, %z 345 ret <4 x i32> %g 346} 347 348define <2 x i64> @vwmaccsu_vx_v2i64(ptr %x, i32 %y, <2 x i64> %z) { 349; CHECK-LABEL: vwmaccsu_vx_v2i64: 350; CHECK: # %bb.0: 351; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 352; CHECK-NEXT: vle32.v v9, (a0) 353; CHECK-NEXT: vwmaccsu.vx v8, a1, v9 354; CHECK-NEXT: ret 355 %a = load <2 x i32>, ptr %x 356 %b = insertelement <2 x i32> poison, i32 %y, i64 0 357 %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer 358 %d = zext <2 x i32> %a to <2 x i64> 359 %e = sext <2 x i32> %c to <2 x i64> 360 %f = mul <2 x i64> %d, %e 361 %g = add <2 x i64> %f, %z 362 ret <2 x i64> %g 363} 364 365define <16 x i16> @vwmaccsu_vx_v16i16(ptr %x, i8 %y, <16 x i16> %z) { 366; CHECK-LABEL: vwmaccsu_vx_v16i16: 367; CHECK: # %bb.0: 368; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 369; CHECK-NEXT: vle8.v v10, (a0) 370; CHECK-NEXT: vwmaccsu.vx v8, a1, v10 371; CHECK-NEXT: ret 372 %a = load <16 x i8>, ptr %x 373 %b = insertelement <16 x i8> poison, i8 %y, i32 0 374 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer 375 %d = zext <16 x i8> %a to <16 x i16> 376 %e = sext <16 x i8> %c to <16 x i16> 377 %f = mul <16 x i16> %d, %e 378 %g = add <16 x i16> %f, %z 379 ret <16 x i16> %g 380} 381 382define <8 x i32> @vwmaccsu_vx_v8i32(ptr %x, i16 %y, <8 x i32> %z) { 383; CHECK-LABEL: vwmaccsu_vx_v8i32: 384; CHECK: # %bb.0: 385; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 386; CHECK-NEXT: vle16.v v10, (a0) 387; CHECK-NEXT: vwmaccsu.vx v8, a1, v10 388; CHECK-NEXT: ret 389 %a = load <8 x i16>, ptr %x 390 %b = insertelement <8 x i16> poison, i16 %y, i32 0 391 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer 392 %d = zext <8 x i16> %a to <8 x i32> 393 %e = sext <8 x i16> %c to <8 x i32> 394 %f = mul <8 x i32> %d, %e 395 %g = add <8 x i32> %f, %z 396 ret <8 x i32> %g 397} 398 399define <4 x i64> @vwmaccsu_vx_v4i64(ptr %x, i32 %y, <4 x i64> %z) { 400; CHECK-LABEL: vwmaccsu_vx_v4i64: 401; CHECK: # %bb.0: 402; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 403; CHECK-NEXT: vle32.v v10, (a0) 404; CHECK-NEXT: vwmaccsu.vx v8, a1, v10 405; CHECK-NEXT: ret 406 %a = load <4 x i32>, ptr %x 407 %b = insertelement <4 x i32> poison, i32 %y, i64 0 408 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer 409 %d = zext <4 x i32> %a to <4 x i64> 410 %e = sext <4 x i32> %c to <4 x i64> 411 %f = mul <4 x i64> %d, %e 412 %g = add <4 x i64> %f, %z 413 ret <4 x i64> %g 414} 415 416define <32 x i16> @vwmaccsu_vx_v32i16(ptr %x, i8 %y, <32 x i16> %z) { 417; CHECK-LABEL: vwmaccsu_vx_v32i16: 418; CHECK: # %bb.0: 419; CHECK-NEXT: li a2, 32 420; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma 421; CHECK-NEXT: vle8.v v12, (a0) 422; CHECK-NEXT: vwmaccsu.vx v8, a1, v12 423; CHECK-NEXT: ret 424 %a = load <32 x i8>, ptr %x 425 %b = insertelement <32 x i8> poison, i8 %y, i32 0 426 %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer 427 %d = zext <32 x i8> %a to <32 x i16> 428 %e = sext <32 x i8> %c to <32 x i16> 429 %f = mul <32 x i16> %d, %e 430 %g = add <32 x i16> %f, %z 431 ret <32 x i16> %g 432} 433 434define <16 x i32> @vwmaccsu_vx_v16i32(ptr %x, i16 %y, <16 x i32> %z) { 435; CHECK-LABEL: vwmaccsu_vx_v16i32: 436; CHECK: # %bb.0: 437; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 438; CHECK-NEXT: vle16.v v12, (a0) 439; CHECK-NEXT: vwmaccsu.vx v8, a1, v12 440; CHECK-NEXT: ret 441 %a = load <16 x i16>, ptr %x 442 %b = insertelement <16 x i16> poison, i16 %y, i32 0 443 %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer 444 %d = zext <16 x i16> %a to <16 x i32> 445 %e = sext <16 x i16> %c to <16 x i32> 446 %f = mul <16 x i32> %d, %e 447 %g = add <16 x i32> %f, %z 448 ret <16 x i32> %g 449} 450 451define <8 x i64> @vwmaccsu_vx_v8i64(ptr %x, i32 %y, <8 x i64> %z) { 452; CHECK-LABEL: vwmaccsu_vx_v8i64: 453; CHECK: # %bb.0: 454; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 455; CHECK-NEXT: vle32.v v12, (a0) 456; CHECK-NEXT: vwmaccsu.vx v8, a1, v12 457; CHECK-NEXT: ret 458 %a = load <8 x i32>, ptr %x 459 %b = insertelement <8 x i32> poison, i32 %y, i64 0 460 %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer 461 %d = zext <8 x i32> %a to <8 x i64> 462 %e = sext <8 x i32> %c to <8 x i64> 463 %f = mul <8 x i64> %d, %e 464 %g = add <8 x i64> %f, %z 465 ret <8 x i64> %g 466} 467 468define <64 x i16> @vwmaccsu_vx_v64i16(ptr %x, i8 %y, <64 x i16> %z) { 469; CHECK-LABEL: vwmaccsu_vx_v64i16: 470; CHECK: # %bb.0: 471; CHECK-NEXT: li a2, 64 472; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma 473; CHECK-NEXT: vle8.v v16, (a0) 474; CHECK-NEXT: vwmaccsu.vx v8, a1, v16 475; CHECK-NEXT: ret 476 %a = load <64 x i8>, ptr %x 477 %b = insertelement <64 x i8> poison, i8 %y, i32 0 478 %c = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer 479 %d = zext <64 x i8> %a to <64 x i16> 480 %e = sext <64 x i8> %c to <64 x i16> 481 %f = mul <64 x i16> %d, %e 482 %g = add <64 x i16> %f, %z 483 ret <64 x i16> %g 484} 485 486define <32 x i32> @vwmaccsu_vx_v32i32(ptr %x, i16 %y, <32 x i32> %z) { 487; CHECK-LABEL: vwmaccsu_vx_v32i32: 488; CHECK: # %bb.0: 489; CHECK-NEXT: li a2, 32 490; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 491; CHECK-NEXT: vle16.v v16, (a0) 492; CHECK-NEXT: vwmaccsu.vx v8, a1, v16 493; CHECK-NEXT: ret 494 %a = load <32 x i16>, ptr %x 495 %b = insertelement <32 x i16> poison, i16 %y, i32 0 496 %c = shufflevector <32 x i16> %b, <32 x i16> poison, <32 x i32> zeroinitializer 497 %d = zext <32 x i16> %a to <32 x i32> 498 %e = sext <32 x i16> %c to <32 x i32> 499 %f = mul <32 x i32> %d, %e 500 %g = add <32 x i32> %f, %z 501 ret <32 x i32> %g 502} 503 504define <16 x i64> @vwmaccsu_vx_v16i64(ptr %x, i32 %y, <16 x i64> %z) { 505; CHECK-LABEL: vwmaccsu_vx_v16i64: 506; CHECK: # %bb.0: 507; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 508; CHECK-NEXT: vle32.v v16, (a0) 509; CHECK-NEXT: vwmaccsu.vx v8, a1, v16 510; CHECK-NEXT: ret 511 %a = load <16 x i32>, ptr %x 512 %b = insertelement <16 x i32> poison, i32 %y, i64 0 513 %c = shufflevector <16 x i32> %b, <16 x i32> poison, <16 x i32> zeroinitializer 514 %d = zext <16 x i32> %a to <16 x i64> 515 %e = sext <16 x i32> %c to <16 x i64> 516 %f = mul <16 x i64> %d, %e 517 %g = add <16 x i64> %f, %z 518 ret <16 x i64> %g 519} 520