1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4 5define <2 x i16> @vwmul_v2i16(ptr %x, ptr %y) { 6; CHECK-LABEL: vwmul_v2i16: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 9; CHECK-NEXT: vle8.v v9, (a0) 10; CHECK-NEXT: vle8.v v10, (a1) 11; CHECK-NEXT: vwmul.vv v8, v9, v10 12; CHECK-NEXT: ret 13 %a = load <2 x i8>, ptr %x 14 %b = load <2 x i8>, ptr %y 15 %c = sext <2 x i8> %a to <2 x i16> 16 %d = sext <2 x i8> %b to <2 x i16> 17 %e = mul <2 x i16> %c, %d 18 ret <2 x i16> %e 19} 20 21define <2 x i16> @vwmul_v2i16_multiple_users(ptr %x, ptr %y, ptr %z) { 22; CHECK-LABEL: vwmul_v2i16_multiple_users: 23; CHECK: # %bb.0: 24; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 25; CHECK-NEXT: vle8.v v8, (a0) 26; CHECK-NEXT: vle8.v v9, (a1) 27; CHECK-NEXT: vle8.v v10, (a2) 28; CHECK-NEXT: vwmul.vv v11, v8, v9 29; CHECK-NEXT: vwmul.vv v9, v8, v10 30; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 31; CHECK-NEXT: vor.vv v8, v11, v9 32; CHECK-NEXT: ret 33 %a = load <2 x i8>, ptr %x 34 %b = load <2 x i8>, ptr %y 35 %b2 = load <2 x i8>, ptr %z 36 %c = sext <2 x i8> %a to <2 x i16> 37 %d = sext <2 x i8> %b to <2 x i16> 38 %d2 = sext <2 x i8> %b2 to <2 x i16> 39 %e = mul <2 x i16> %c, %d 40 %f = mul <2 x i16> %c, %d2 41 %g = or <2 x i16> %e, %f 42 ret <2 x i16> %g 43} 44 45define <4 x i16> @vwmul_v4i16(ptr %x, ptr %y) { 46; CHECK-LABEL: vwmul_v4i16: 47; CHECK: # %bb.0: 48; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 49; CHECK-NEXT: vle8.v v9, (a0) 50; CHECK-NEXT: vle8.v v10, (a1) 51; CHECK-NEXT: vwmul.vv v8, v9, v10 52; CHECK-NEXT: ret 53 %a = load <4 x i8>, ptr %x 54 %b = load <4 x i8>, ptr %y 55 %c = sext <4 x i8> %a to <4 x i16> 56 %d = sext <4 x i8> %b to <4 x i16> 57 %e = mul <4 x i16> %c, %d 58 ret <4 x i16> %e 59} 60 61define <2 x i32> @vwmul_v2i32(ptr %x, ptr %y) { 62; CHECK-LABEL: vwmul_v2i32: 63; CHECK: # %bb.0: 64; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 65; CHECK-NEXT: vle16.v v9, (a0) 66; CHECK-NEXT: vle16.v v10, (a1) 67; CHECK-NEXT: vwmul.vv v8, v9, v10 68; CHECK-NEXT: ret 69 %a = load <2 x i16>, ptr %x 70 %b = load <2 x i16>, ptr %y 71 %c = sext <2 x i16> %a to <2 x i32> 72 %d = sext <2 x i16> %b to <2 x i32> 73 %e = mul <2 x i32> %c, %d 74 ret <2 x i32> %e 75} 76 77define <8 x i16> @vwmul_v8i16(ptr %x, ptr %y) { 78; CHECK-LABEL: vwmul_v8i16: 79; CHECK: # %bb.0: 80; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 81; CHECK-NEXT: vle8.v v9, (a0) 82; CHECK-NEXT: vle8.v v10, (a1) 83; CHECK-NEXT: vwmul.vv v8, v9, v10 84; CHECK-NEXT: ret 85 %a = load <8 x i8>, ptr %x 86 %b = load <8 x i8>, ptr %y 87 %c = sext <8 x i8> %a to <8 x i16> 88 %d = sext <8 x i8> %b to <8 x i16> 89 %e = mul <8 x i16> %c, %d 90 ret <8 x i16> %e 91} 92 93define <4 x i32> @vwmul_v4i32(ptr %x, ptr %y) { 94; CHECK-LABEL: vwmul_v4i32: 95; CHECK: # %bb.0: 96; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 97; CHECK-NEXT: vle16.v v9, (a0) 98; CHECK-NEXT: vle16.v v10, (a1) 99; CHECK-NEXT: vwmul.vv v8, v9, v10 100; CHECK-NEXT: ret 101 %a = load <4 x i16>, ptr %x 102 %b = load <4 x i16>, ptr %y 103 %c = sext <4 x i16> %a to <4 x i32> 104 %d = sext <4 x i16> %b to <4 x i32> 105 %e = mul <4 x i32> %c, %d 106 ret <4 x i32> %e 107} 108 109define <2 x i64> @vwmul_v2i64(ptr %x, ptr %y) { 110; CHECK-LABEL: vwmul_v2i64: 111; CHECK: # %bb.0: 112; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 113; CHECK-NEXT: vle32.v v9, (a0) 114; CHECK-NEXT: vle32.v v10, (a1) 115; CHECK-NEXT: vwmul.vv v8, v9, v10 116; CHECK-NEXT: ret 117 %a = load <2 x i32>, ptr %x 118 %b = load <2 x i32>, ptr %y 119 %c = sext <2 x i32> %a to <2 x i64> 120 %d = sext <2 x i32> %b to <2 x i64> 121 %e = mul <2 x i64> %c, %d 122 ret <2 x i64> %e 123} 124 125define <16 x i16> @vwmul_v16i16(ptr %x, ptr %y) { 126; CHECK-LABEL: vwmul_v16i16: 127; CHECK: # %bb.0: 128; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 129; CHECK-NEXT: vle8.v v10, (a0) 130; CHECK-NEXT: vle8.v v11, (a1) 131; CHECK-NEXT: vwmul.vv v8, v10, v11 132; CHECK-NEXT: ret 133 %a = load <16 x i8>, ptr %x 134 %b = load <16 x i8>, ptr %y 135 %c = sext <16 x i8> %a to <16 x i16> 136 %d = sext <16 x i8> %b to <16 x i16> 137 %e = mul <16 x i16> %c, %d 138 ret <16 x i16> %e 139} 140 141define <8 x i32> @vwmul_v8i32(ptr %x, ptr %y) { 142; CHECK-LABEL: vwmul_v8i32: 143; CHECK: # %bb.0: 144; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 145; CHECK-NEXT: vle16.v v10, (a0) 146; CHECK-NEXT: vle16.v v11, (a1) 147; CHECK-NEXT: vwmul.vv v8, v10, v11 148; CHECK-NEXT: ret 149 %a = load <8 x i16>, ptr %x 150 %b = load <8 x i16>, ptr %y 151 %c = sext <8 x i16> %a to <8 x i32> 152 %d = sext <8 x i16> %b to <8 x i32> 153 %e = mul <8 x i32> %c, %d 154 ret <8 x i32> %e 155} 156 157define <4 x i64> @vwmul_v4i64(ptr %x, ptr %y) { 158; CHECK-LABEL: vwmul_v4i64: 159; CHECK: # %bb.0: 160; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 161; CHECK-NEXT: vle32.v v10, (a0) 162; CHECK-NEXT: vle32.v v11, (a1) 163; CHECK-NEXT: vwmul.vv v8, v10, v11 164; CHECK-NEXT: ret 165 %a = load <4 x i32>, ptr %x 166 %b = load <4 x i32>, ptr %y 167 %c = sext <4 x i32> %a to <4 x i64> 168 %d = sext <4 x i32> %b to <4 x i64> 169 %e = mul <4 x i64> %c, %d 170 ret <4 x i64> %e 171} 172 173define <32 x i16> @vwmul_v32i16(ptr %x, ptr %y) { 174; CHECK-LABEL: vwmul_v32i16: 175; CHECK: # %bb.0: 176; CHECK-NEXT: li a2, 32 177; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma 178; CHECK-NEXT: vle8.v v12, (a0) 179; CHECK-NEXT: vle8.v v14, (a1) 180; CHECK-NEXT: vwmul.vv v8, v12, v14 181; CHECK-NEXT: ret 182 %a = load <32 x i8>, ptr %x 183 %b = load <32 x i8>, ptr %y 184 %c = sext <32 x i8> %a to <32 x i16> 185 %d = sext <32 x i8> %b to <32 x i16> 186 %e = mul <32 x i16> %c, %d 187 ret <32 x i16> %e 188} 189 190define <16 x i32> @vwmul_v16i32(ptr %x, ptr %y) { 191; CHECK-LABEL: vwmul_v16i32: 192; CHECK: # %bb.0: 193; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 194; CHECK-NEXT: vle16.v v12, (a0) 195; CHECK-NEXT: vle16.v v14, (a1) 196; CHECK-NEXT: vwmul.vv v8, v12, v14 197; CHECK-NEXT: ret 198 %a = load <16 x i16>, ptr %x 199 %b = load <16 x i16>, ptr %y 200 %c = sext <16 x i16> %a to <16 x i32> 201 %d = sext <16 x i16> %b to <16 x i32> 202 %e = mul <16 x i32> %c, %d 203 ret <16 x i32> %e 204} 205 206define <8 x i64> @vwmul_v8i64(ptr %x, ptr %y) { 207; CHECK-LABEL: vwmul_v8i64: 208; CHECK: # %bb.0: 209; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 210; CHECK-NEXT: vle32.v v12, (a0) 211; CHECK-NEXT: vle32.v v14, (a1) 212; CHECK-NEXT: vwmul.vv v8, v12, v14 213; CHECK-NEXT: ret 214 %a = load <8 x i32>, ptr %x 215 %b = load <8 x i32>, ptr %y 216 %c = sext <8 x i32> %a to <8 x i64> 217 %d = sext <8 x i32> %b to <8 x i64> 218 %e = mul <8 x i64> %c, %d 219 ret <8 x i64> %e 220} 221 222define <64 x i16> @vwmul_v64i16(ptr %x, ptr %y) { 223; CHECK-LABEL: vwmul_v64i16: 224; CHECK: # %bb.0: 225; CHECK-NEXT: li a2, 64 226; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma 227; CHECK-NEXT: vle8.v v16, (a0) 228; CHECK-NEXT: vle8.v v20, (a1) 229; CHECK-NEXT: vwmul.vv v8, v16, v20 230; CHECK-NEXT: ret 231 %a = load <64 x i8>, ptr %x 232 %b = load <64 x i8>, ptr %y 233 %c = sext <64 x i8> %a to <64 x i16> 234 %d = sext <64 x i8> %b to <64 x i16> 235 %e = mul <64 x i16> %c, %d 236 ret <64 x i16> %e 237} 238 239define <32 x i32> @vwmul_v32i32(ptr %x, ptr %y) { 240; CHECK-LABEL: vwmul_v32i32: 241; CHECK: # %bb.0: 242; CHECK-NEXT: li a2, 32 243; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 244; CHECK-NEXT: vle16.v v16, (a0) 245; CHECK-NEXT: vle16.v v20, (a1) 246; CHECK-NEXT: vwmul.vv v8, v16, v20 247; CHECK-NEXT: ret 248 %a = load <32 x i16>, ptr %x 249 %b = load <32 x i16>, ptr %y 250 %c = sext <32 x i16> %a to <32 x i32> 251 %d = sext <32 x i16> %b to <32 x i32> 252 %e = mul <32 x i32> %c, %d 253 ret <32 x i32> %e 254} 255 256define <16 x i64> @vwmul_v16i64(ptr %x, ptr %y) { 257; CHECK-LABEL: vwmul_v16i64: 258; CHECK: # %bb.0: 259; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 260; CHECK-NEXT: vle32.v v16, (a0) 261; CHECK-NEXT: vle32.v v20, (a1) 262; CHECK-NEXT: vwmul.vv v8, v16, v20 263; CHECK-NEXT: ret 264 %a = load <16 x i32>, ptr %x 265 %b = load <16 x i32>, ptr %y 266 %c = sext <16 x i32> %a to <16 x i64> 267 %d = sext <16 x i32> %b to <16 x i64> 268 %e = mul <16 x i64> %c, %d 269 ret <16 x i64> %e 270} 271 272define <128 x i16> @vwmul_v128i16(ptr %x, ptr %y) { 273; CHECK-LABEL: vwmul_v128i16: 274; CHECK: # %bb.0: 275; CHECK-NEXT: addi sp, sp, -16 276; CHECK-NEXT: .cfi_def_cfa_offset 16 277; CHECK-NEXT: csrr a2, vlenb 278; CHECK-NEXT: slli a2, a2, 4 279; CHECK-NEXT: sub sp, sp, a2 280; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 281; CHECK-NEXT: li a2, 128 282; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma 283; CHECK-NEXT: vle8.v v8, (a0) 284; CHECK-NEXT: addi a0, sp, 16 285; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 286; CHECK-NEXT: vle8.v v0, (a1) 287; CHECK-NEXT: li a0, 64 288; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 289; CHECK-NEXT: vslidedown.vx v16, v8, a0 290; CHECK-NEXT: vslidedown.vx v8, v0, a0 291; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma 292; CHECK-NEXT: vwmul.vv v24, v16, v8 293; CHECK-NEXT: csrr a0, vlenb 294; CHECK-NEXT: slli a0, a0, 3 295; CHECK-NEXT: add a0, sp, a0 296; CHECK-NEXT: addi a0, a0, 16 297; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 298; CHECK-NEXT: addi a0, sp, 16 299; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 300; CHECK-NEXT: vwmul.vv v8, v16, v0 301; CHECK-NEXT: csrr a0, vlenb 302; CHECK-NEXT: slli a0, a0, 3 303; CHECK-NEXT: add a0, sp, a0 304; CHECK-NEXT: addi a0, a0, 16 305; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 306; CHECK-NEXT: csrr a0, vlenb 307; CHECK-NEXT: slli a0, a0, 4 308; CHECK-NEXT: add sp, sp, a0 309; CHECK-NEXT: .cfi_def_cfa sp, 16 310; CHECK-NEXT: addi sp, sp, 16 311; CHECK-NEXT: .cfi_def_cfa_offset 0 312; CHECK-NEXT: ret 313 %a = load <128 x i8>, ptr %x 314 %b = load <128 x i8>, ptr %y 315 %c = sext <128 x i8> %a to <128 x i16> 316 %d = sext <128 x i8> %b to <128 x i16> 317 %e = mul <128 x i16> %c, %d 318 ret <128 x i16> %e 319} 320 321define <64 x i32> @vwmul_v64i32(ptr %x, ptr %y) { 322; CHECK-LABEL: vwmul_v64i32: 323; CHECK: # %bb.0: 324; CHECK-NEXT: addi sp, sp, -16 325; CHECK-NEXT: .cfi_def_cfa_offset 16 326; CHECK-NEXT: csrr a2, vlenb 327; CHECK-NEXT: slli a2, a2, 4 328; CHECK-NEXT: sub sp, sp, a2 329; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 330; CHECK-NEXT: li a2, 64 331; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma 332; CHECK-NEXT: vle16.v v8, (a0) 333; CHECK-NEXT: addi a0, sp, 16 334; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 335; CHECK-NEXT: vle16.v v0, (a1) 336; CHECK-NEXT: li a0, 32 337; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 338; CHECK-NEXT: vslidedown.vx v16, v8, a0 339; CHECK-NEXT: vslidedown.vx v8, v0, a0 340; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 341; CHECK-NEXT: vwmul.vv v24, v16, v8 342; CHECK-NEXT: csrr a0, vlenb 343; CHECK-NEXT: slli a0, a0, 3 344; CHECK-NEXT: add a0, sp, a0 345; CHECK-NEXT: addi a0, a0, 16 346; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 347; CHECK-NEXT: addi a0, sp, 16 348; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 349; CHECK-NEXT: vwmul.vv v8, v16, v0 350; CHECK-NEXT: csrr a0, vlenb 351; CHECK-NEXT: slli a0, a0, 3 352; CHECK-NEXT: add a0, sp, a0 353; CHECK-NEXT: addi a0, a0, 16 354; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 355; CHECK-NEXT: csrr a0, vlenb 356; CHECK-NEXT: slli a0, a0, 4 357; CHECK-NEXT: add sp, sp, a0 358; CHECK-NEXT: .cfi_def_cfa sp, 16 359; CHECK-NEXT: addi sp, sp, 16 360; CHECK-NEXT: .cfi_def_cfa_offset 0 361; CHECK-NEXT: ret 362 %a = load <64 x i16>, ptr %x 363 %b = load <64 x i16>, ptr %y 364 %c = sext <64 x i16> %a to <64 x i32> 365 %d = sext <64 x i16> %b to <64 x i32> 366 %e = mul <64 x i32> %c, %d 367 ret <64 x i32> %e 368} 369 370define <32 x i64> @vwmul_v32i64(ptr %x, ptr %y) { 371; CHECK-LABEL: vwmul_v32i64: 372; CHECK: # %bb.0: 373; CHECK-NEXT: addi sp, sp, -16 374; CHECK-NEXT: .cfi_def_cfa_offset 16 375; CHECK-NEXT: csrr a2, vlenb 376; CHECK-NEXT: slli a2, a2, 4 377; CHECK-NEXT: sub sp, sp, a2 378; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 379; CHECK-NEXT: li a2, 32 380; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma 381; CHECK-NEXT: vle32.v v8, (a0) 382; CHECK-NEXT: addi a0, sp, 16 383; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 384; CHECK-NEXT: vle32.v v0, (a1) 385; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma 386; CHECK-NEXT: vslidedown.vi v16, v8, 16 387; CHECK-NEXT: vslidedown.vi v8, v0, 16 388; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 389; CHECK-NEXT: vwmul.vv v24, v16, v8 390; CHECK-NEXT: csrr a0, vlenb 391; CHECK-NEXT: slli a0, a0, 3 392; CHECK-NEXT: add a0, sp, a0 393; CHECK-NEXT: addi a0, a0, 16 394; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 395; CHECK-NEXT: addi a0, sp, 16 396; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 397; CHECK-NEXT: vwmul.vv v8, v16, v0 398; CHECK-NEXT: csrr a0, vlenb 399; CHECK-NEXT: slli a0, a0, 3 400; CHECK-NEXT: add a0, sp, a0 401; CHECK-NEXT: addi a0, a0, 16 402; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 403; CHECK-NEXT: csrr a0, vlenb 404; CHECK-NEXT: slli a0, a0, 4 405; CHECK-NEXT: add sp, sp, a0 406; CHECK-NEXT: .cfi_def_cfa sp, 16 407; CHECK-NEXT: addi sp, sp, 16 408; CHECK-NEXT: .cfi_def_cfa_offset 0 409; CHECK-NEXT: ret 410 %a = load <32 x i32>, ptr %x 411 %b = load <32 x i32>, ptr %y 412 %c = sext <32 x i32> %a to <32 x i64> 413 %d = sext <32 x i32> %b to <32 x i64> 414 %e = mul <32 x i64> %c, %d 415 ret <32 x i64> %e 416} 417 418define <2 x i32> @vwmul_v2i32_v2i8(ptr %x, ptr %y) { 419; CHECK-LABEL: vwmul_v2i32_v2i8: 420; CHECK: # %bb.0: 421; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 422; CHECK-NEXT: vle8.v v8, (a0) 423; CHECK-NEXT: vle8.v v9, (a1) 424; CHECK-NEXT: vsext.vf2 v10, v8 425; CHECK-NEXT: vsext.vf2 v11, v9 426; CHECK-NEXT: vwmul.vv v8, v10, v11 427; CHECK-NEXT: ret 428 %a = load <2 x i8>, ptr %x 429 %b = load <2 x i8>, ptr %y 430 %c = sext <2 x i8> %a to <2 x i32> 431 %d = sext <2 x i8> %b to <2 x i32> 432 %e = mul <2 x i32> %c, %d 433 ret <2 x i32> %e 434} 435 436define <4 x i32> @vwmul_v4i32_v4i8_v4i16(ptr %x, ptr %y) { 437; CHECK-LABEL: vwmul_v4i32_v4i8_v4i16: 438; CHECK: # %bb.0: 439; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 440; CHECK-NEXT: vle8.v v8, (a0) 441; CHECK-NEXT: vle16.v v9, (a1) 442; CHECK-NEXT: vsext.vf2 v10, v8 443; CHECK-NEXT: vwmul.vv v8, v10, v9 444; CHECK-NEXT: ret 445 %a = load <4 x i8>, ptr %x 446 %b = load <4 x i16>, ptr %y 447 %c = sext <4 x i8> %a to <4 x i32> 448 %d = sext <4 x i16> %b to <4 x i32> 449 %e = mul <4 x i32> %c, %d 450 ret <4 x i32> %e 451} 452 453define <4 x i64> @vwmul_v4i64_v4i32_v4i8(ptr %x, ptr %y) { 454; CHECK-LABEL: vwmul_v4i64_v4i32_v4i8: 455; CHECK: # %bb.0: 456; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 457; CHECK-NEXT: vle8.v v8, (a1) 458; CHECK-NEXT: vle32.v v10, (a0) 459; CHECK-NEXT: vsext.vf4 v11, v8 460; CHECK-NEXT: vwmul.vv v8, v10, v11 461; CHECK-NEXT: ret 462 %a = load <4 x i32>, ptr %x 463 %b = load <4 x i8>, ptr %y 464 %c = sext <4 x i32> %a to <4 x i64> 465 %d = sext <4 x i8> %b to <4 x i64> 466 %e = mul <4 x i64> %c, %d 467 ret <4 x i64> %e 468} 469 470define <2 x i16> @vwmul_vx_v2i16(ptr %x, i8 %y) { 471; CHECK-LABEL: vwmul_vx_v2i16: 472; CHECK: # %bb.0: 473; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 474; CHECK-NEXT: vle8.v v9, (a0) 475; CHECK-NEXT: vwmul.vx v8, v9, a1 476; CHECK-NEXT: ret 477 %a = load <2 x i8>, ptr %x 478 %b = insertelement <2 x i8> poison, i8 %y, i32 0 479 %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer 480 %d = sext <2 x i8> %a to <2 x i16> 481 %e = sext <2 x i8> %c to <2 x i16> 482 %f = mul <2 x i16> %d, %e 483 ret <2 x i16> %f 484} 485 486define <4 x i16> @vwmul_vx_v4i16(ptr %x, i8 %y) { 487; CHECK-LABEL: vwmul_vx_v4i16: 488; CHECK: # %bb.0: 489; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 490; CHECK-NEXT: vle8.v v9, (a0) 491; CHECK-NEXT: vwmul.vx v8, v9, a1 492; CHECK-NEXT: ret 493 %a = load <4 x i8>, ptr %x 494 %b = insertelement <4 x i8> poison, i8 %y, i32 0 495 %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer 496 %d = sext <4 x i8> %a to <4 x i16> 497 %e = sext <4 x i8> %c to <4 x i16> 498 %f = mul <4 x i16> %d, %e 499 ret <4 x i16> %f 500} 501 502define <2 x i32> @vwmul_vx_v2i32(ptr %x, i16 %y) { 503; CHECK-LABEL: vwmul_vx_v2i32: 504; CHECK: # %bb.0: 505; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 506; CHECK-NEXT: vle16.v v9, (a0) 507; CHECK-NEXT: vwmul.vx v8, v9, a1 508; CHECK-NEXT: ret 509 %a = load <2 x i16>, ptr %x 510 %b = insertelement <2 x i16> poison, i16 %y, i32 0 511 %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer 512 %d = sext <2 x i16> %a to <2 x i32> 513 %e = sext <2 x i16> %c to <2 x i32> 514 %f = mul <2 x i32> %d, %e 515 ret <2 x i32> %f 516} 517 518define <8 x i16> @vwmul_vx_v8i16(ptr %x, i8 %y) { 519; CHECK-LABEL: vwmul_vx_v8i16: 520; CHECK: # %bb.0: 521; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 522; CHECK-NEXT: vle8.v v9, (a0) 523; CHECK-NEXT: vwmul.vx v8, v9, a1 524; CHECK-NEXT: ret 525 %a = load <8 x i8>, ptr %x 526 %b = insertelement <8 x i8> poison, i8 %y, i32 0 527 %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer 528 %d = sext <8 x i8> %a to <8 x i16> 529 %e = sext <8 x i8> %c to <8 x i16> 530 %f = mul <8 x i16> %d, %e 531 ret <8 x i16> %f 532} 533 534define <4 x i32> @vwmul_vx_v4i32(ptr %x, i16 %y) { 535; CHECK-LABEL: vwmul_vx_v4i32: 536; CHECK: # %bb.0: 537; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 538; CHECK-NEXT: vle16.v v9, (a0) 539; CHECK-NEXT: vwmul.vx v8, v9, a1 540; CHECK-NEXT: ret 541 %a = load <4 x i16>, ptr %x 542 %b = insertelement <4 x i16> poison, i16 %y, i32 0 543 %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer 544 %d = sext <4 x i16> %a to <4 x i32> 545 %e = sext <4 x i16> %c to <4 x i32> 546 %f = mul <4 x i32> %d, %e 547 ret <4 x i32> %f 548} 549 550define <2 x i64> @vwmul_vx_v2i64(ptr %x, i32 %y) { 551; CHECK-LABEL: vwmul_vx_v2i64: 552; CHECK: # %bb.0: 553; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 554; CHECK-NEXT: vle32.v v9, (a0) 555; CHECK-NEXT: vwmul.vx v8, v9, a1 556; CHECK-NEXT: ret 557 %a = load <2 x i32>, ptr %x 558 %b = insertelement <2 x i32> poison, i32 %y, i64 0 559 %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer 560 %d = sext <2 x i32> %a to <2 x i64> 561 %e = sext <2 x i32> %c to <2 x i64> 562 %f = mul <2 x i64> %d, %e 563 ret <2 x i64> %f 564} 565 566define <16 x i16> @vwmul_vx_v16i16(ptr %x, i8 %y) { 567; CHECK-LABEL: vwmul_vx_v16i16: 568; CHECK: # %bb.0: 569; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 570; CHECK-NEXT: vle8.v v10, (a0) 571; CHECK-NEXT: vwmul.vx v8, v10, a1 572; CHECK-NEXT: ret 573 %a = load <16 x i8>, ptr %x 574 %b = insertelement <16 x i8> poison, i8 %y, i32 0 575 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer 576 %d = sext <16 x i8> %a to <16 x i16> 577 %e = sext <16 x i8> %c to <16 x i16> 578 %f = mul <16 x i16> %d, %e 579 ret <16 x i16> %f 580} 581 582define <8 x i32> @vwmul_vx_v8i32(ptr %x, i16 %y) { 583; CHECK-LABEL: vwmul_vx_v8i32: 584; CHECK: # %bb.0: 585; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 586; CHECK-NEXT: vle16.v v10, (a0) 587; CHECK-NEXT: vwmul.vx v8, v10, a1 588; CHECK-NEXT: ret 589 %a = load <8 x i16>, ptr %x 590 %b = insertelement <8 x i16> poison, i16 %y, i32 0 591 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer 592 %d = sext <8 x i16> %a to <8 x i32> 593 %e = sext <8 x i16> %c to <8 x i32> 594 %f = mul <8 x i32> %d, %e 595 ret <8 x i32> %f 596} 597 598define <4 x i64> @vwmul_vx_v4i64(ptr %x, i32 %y) { 599; CHECK-LABEL: vwmul_vx_v4i64: 600; CHECK: # %bb.0: 601; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 602; CHECK-NEXT: vle32.v v10, (a0) 603; CHECK-NEXT: vwmul.vx v8, v10, a1 604; CHECK-NEXT: ret 605 %a = load <4 x i32>, ptr %x 606 %b = insertelement <4 x i32> poison, i32 %y, i64 0 607 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer 608 %d = sext <4 x i32> %a to <4 x i64> 609 %e = sext <4 x i32> %c to <4 x i64> 610 %f = mul <4 x i64> %d, %e 611 ret <4 x i64> %f 612} 613 614define <32 x i16> @vwmul_vx_v32i16(ptr %x, i8 %y) { 615; CHECK-LABEL: vwmul_vx_v32i16: 616; CHECK: # %bb.0: 617; CHECK-NEXT: li a2, 32 618; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma 619; CHECK-NEXT: vle8.v v12, (a0) 620; CHECK-NEXT: vwmul.vx v8, v12, a1 621; CHECK-NEXT: ret 622 %a = load <32 x i8>, ptr %x 623 %b = insertelement <32 x i8> poison, i8 %y, i32 0 624 %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer 625 %d = sext <32 x i8> %a to <32 x i16> 626 %e = sext <32 x i8> %c to <32 x i16> 627 %f = mul <32 x i16> %d, %e 628 ret <32 x i16> %f 629} 630 631define <16 x i32> @vwmul_vx_v16i32(ptr %x, i16 %y) { 632; CHECK-LABEL: vwmul_vx_v16i32: 633; CHECK: # %bb.0: 634; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 635; CHECK-NEXT: vle16.v v12, (a0) 636; CHECK-NEXT: vwmul.vx v8, v12, a1 637; CHECK-NEXT: ret 638 %a = load <16 x i16>, ptr %x 639 %b = insertelement <16 x i16> poison, i16 %y, i32 0 640 %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer 641 %d = sext <16 x i16> %a to <16 x i32> 642 %e = sext <16 x i16> %c to <16 x i32> 643 %f = mul <16 x i32> %d, %e 644 ret <16 x i32> %f 645} 646 647define <8 x i64> @vwmul_vx_v8i64(ptr %x, i32 %y) { 648; CHECK-LABEL: vwmul_vx_v8i64: 649; CHECK: # %bb.0: 650; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 651; CHECK-NEXT: vle32.v v12, (a0) 652; CHECK-NEXT: vwmul.vx v8, v12, a1 653; CHECK-NEXT: ret 654 %a = load <8 x i32>, ptr %x 655 %b = insertelement <8 x i32> poison, i32 %y, i64 0 656 %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer 657 %d = sext <8 x i32> %a to <8 x i64> 658 %e = sext <8 x i32> %c to <8 x i64> 659 %f = mul <8 x i64> %d, %e 660 ret <8 x i64> %f 661} 662 663define <64 x i16> @vwmul_vx_v64i16(ptr %x, i8 %y) { 664; CHECK-LABEL: vwmul_vx_v64i16: 665; CHECK: # %bb.0: 666; CHECK-NEXT: li a2, 64 667; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma 668; CHECK-NEXT: vle8.v v16, (a0) 669; CHECK-NEXT: vwmul.vx v8, v16, a1 670; CHECK-NEXT: ret 671 %a = load <64 x i8>, ptr %x 672 %b = insertelement <64 x i8> poison, i8 %y, i32 0 673 %c = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer 674 %d = sext <64 x i8> %a to <64 x i16> 675 %e = sext <64 x i8> %c to <64 x i16> 676 %f = mul <64 x i16> %d, %e 677 ret <64 x i16> %f 678} 679 680define <32 x i32> @vwmul_vx_v32i32(ptr %x, i16 %y) { 681; CHECK-LABEL: vwmul_vx_v32i32: 682; CHECK: # %bb.0: 683; CHECK-NEXT: li a2, 32 684; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 685; CHECK-NEXT: vle16.v v16, (a0) 686; CHECK-NEXT: vwmul.vx v8, v16, a1 687; CHECK-NEXT: ret 688 %a = load <32 x i16>, ptr %x 689 %b = insertelement <32 x i16> poison, i16 %y, i32 0 690 %c = shufflevector <32 x i16> %b, <32 x i16> poison, <32 x i32> zeroinitializer 691 %d = sext <32 x i16> %a to <32 x i32> 692 %e = sext <32 x i16> %c to <32 x i32> 693 %f = mul <32 x i32> %d, %e 694 ret <32 x i32> %f 695} 696 697define <16 x i64> @vwmul_vx_v16i64(ptr %x, i32 %y) { 698; CHECK-LABEL: vwmul_vx_v16i64: 699; CHECK: # %bb.0: 700; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 701; CHECK-NEXT: vle32.v v16, (a0) 702; CHECK-NEXT: vwmul.vx v8, v16, a1 703; CHECK-NEXT: ret 704 %a = load <16 x i32>, ptr %x 705 %b = insertelement <16 x i32> poison, i32 %y, i64 0 706 %c = shufflevector <16 x i32> %b, <16 x i32> poison, <16 x i32> zeroinitializer 707 %d = sext <16 x i32> %a to <16 x i64> 708 %e = sext <16 x i32> %c to <16 x i64> 709 %f = mul <16 x i64> %d, %e 710 ret <16 x i64> %f 711} 712 713define <8 x i16> @vwmul_vx_v8i16_i8(ptr %x, ptr %y) { 714; CHECK-LABEL: vwmul_vx_v8i16_i8: 715; CHECK: # %bb.0: 716; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 717; CHECK-NEXT: vle8.v v9, (a0) 718; CHECK-NEXT: lb a0, 0(a1) 719; CHECK-NEXT: vwmul.vx v8, v9, a0 720; CHECK-NEXT: ret 721 %a = load <8 x i8>, ptr %x 722 %b = load i8, ptr %y 723 %c = sext i8 %b to i16 724 %d = insertelement <8 x i16> poison, i16 %c, i32 0 725 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer 726 %f = sext <8 x i8> %a to <8 x i16> 727 %g = mul <8 x i16> %e, %f 728 ret <8 x i16> %g 729} 730 731define <8 x i16> @vwmul_vx_v8i16_i16(ptr %x, ptr %y) { 732; CHECK-LABEL: vwmul_vx_v8i16_i16: 733; CHECK: # %bb.0: 734; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 735; CHECK-NEXT: vle8.v v8, (a0) 736; CHECK-NEXT: lh a0, 0(a1) 737; CHECK-NEXT: vsext.vf2 v9, v8 738; CHECK-NEXT: vmul.vx v8, v9, a0 739; CHECK-NEXT: ret 740 %a = load <8 x i8>, ptr %x 741 %b = load i16, ptr %y 742 %d = insertelement <8 x i16> poison, i16 %b, i32 0 743 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer 744 %f = sext <8 x i8> %a to <8 x i16> 745 %g = mul <8 x i16> %e, %f 746 ret <8 x i16> %g 747} 748 749define <4 x i32> @vwmul_vx_v4i32_i8(ptr %x, ptr %y) { 750; CHECK-LABEL: vwmul_vx_v4i32_i8: 751; CHECK: # %bb.0: 752; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 753; CHECK-NEXT: vle16.v v9, (a0) 754; CHECK-NEXT: lb a0, 0(a1) 755; CHECK-NEXT: vwmul.vx v8, v9, a0 756; CHECK-NEXT: ret 757 %a = load <4 x i16>, ptr %x 758 %b = load i8, ptr %y 759 %c = sext i8 %b to i32 760 %d = insertelement <4 x i32> poison, i32 %c, i32 0 761 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer 762 %f = sext <4 x i16> %a to <4 x i32> 763 %g = mul <4 x i32> %e, %f 764 ret <4 x i32> %g 765} 766 767define <4 x i32> @vwmul_vx_v4i32_i16(ptr %x, ptr %y) { 768; CHECK-LABEL: vwmul_vx_v4i32_i16: 769; CHECK: # %bb.0: 770; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 771; CHECK-NEXT: vle16.v v9, (a0) 772; CHECK-NEXT: lh a0, 0(a1) 773; CHECK-NEXT: vwmul.vx v8, v9, a0 774; CHECK-NEXT: ret 775 %a = load <4 x i16>, ptr %x 776 %b = load i16, ptr %y 777 %c = sext i16 %b to i32 778 %d = insertelement <4 x i32> poison, i32 %c, i32 0 779 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer 780 %f = sext <4 x i16> %a to <4 x i32> 781 %g = mul <4 x i32> %e, %f 782 ret <4 x i32> %g 783} 784 785define <4 x i32> @vwmul_vx_v4i32_i32(ptr %x, ptr %y) { 786; CHECK-LABEL: vwmul_vx_v4i32_i32: 787; CHECK: # %bb.0: 788; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 789; CHECK-NEXT: vle16.v v8, (a0) 790; CHECK-NEXT: lw a0, 0(a1) 791; CHECK-NEXT: vsext.vf2 v9, v8 792; CHECK-NEXT: vmul.vx v8, v9, a0 793; CHECK-NEXT: ret 794 %a = load <4 x i16>, ptr %x 795 %b = load i32, ptr %y 796 %d = insertelement <4 x i32> poison, i32 %b, i32 0 797 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer 798 %f = sext <4 x i16> %a to <4 x i32> 799 %g = mul <4 x i32> %e, %f 800 ret <4 x i32> %g 801} 802 803define <2 x i64> @vwmul_vx_v2i64_i8(ptr %x, ptr %y) { 804; CHECK-LABEL: vwmul_vx_v2i64_i8: 805; CHECK: # %bb.0: 806; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 807; CHECK-NEXT: vle32.v v9, (a0) 808; CHECK-NEXT: lb a0, 0(a1) 809; CHECK-NEXT: vwmul.vx v8, v9, a0 810; CHECK-NEXT: ret 811 %a = load <2 x i32>, ptr %x 812 %b = load i8, ptr %y 813 %c = sext i8 %b to i64 814 %d = insertelement <2 x i64> poison, i64 %c, i64 0 815 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer 816 %f = sext <2 x i32> %a to <2 x i64> 817 %g = mul <2 x i64> %e, %f 818 ret <2 x i64> %g 819} 820 821define <2 x i64> @vwmul_vx_v2i64_i16(ptr %x, ptr %y) { 822; CHECK-LABEL: vwmul_vx_v2i64_i16: 823; CHECK: # %bb.0: 824; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 825; CHECK-NEXT: vle32.v v9, (a0) 826; CHECK-NEXT: lh a0, 0(a1) 827; CHECK-NEXT: vwmul.vx v8, v9, a0 828; CHECK-NEXT: ret 829 %a = load <2 x i32>, ptr %x 830 %b = load i16, ptr %y 831 %c = sext i16 %b to i64 832 %d = insertelement <2 x i64> poison, i64 %c, i64 0 833 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer 834 %f = sext <2 x i32> %a to <2 x i64> 835 %g = mul <2 x i64> %e, %f 836 ret <2 x i64> %g 837} 838 839define <2 x i64> @vwmul_vx_v2i64_i32(ptr %x, ptr %y) { 840; CHECK-LABEL: vwmul_vx_v2i64_i32: 841; CHECK: # %bb.0: 842; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 843; CHECK-NEXT: vle32.v v9, (a0) 844; CHECK-NEXT: lw a0, 0(a1) 845; CHECK-NEXT: vwmul.vx v8, v9, a0 846; CHECK-NEXT: ret 847 %a = load <2 x i32>, ptr %x 848 %b = load i32, ptr %y 849 %c = sext i32 %b to i64 850 %d = insertelement <2 x i64> poison, i64 %c, i64 0 851 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer 852 %f = sext <2 x i32> %a to <2 x i64> 853 %g = mul <2 x i64> %e, %f 854 ret <2 x i64> %g 855} 856 857define <2 x i64> @vwmul_vx_v2i64_i64(ptr %x, ptr %y) { 858; RV32-LABEL: vwmul_vx_v2i64_i64: 859; RV32: # %bb.0: 860; RV32-NEXT: addi sp, sp, -16 861; RV32-NEXT: .cfi_def_cfa_offset 16 862; RV32-NEXT: lw a2, 0(a1) 863; RV32-NEXT: lw a1, 4(a1) 864; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 865; RV32-NEXT: vle32.v v8, (a0) 866; RV32-NEXT: sw a2, 8(sp) 867; RV32-NEXT: sw a1, 12(sp) 868; RV32-NEXT: addi a0, sp, 8 869; RV32-NEXT: vlse64.v v9, (a0), zero 870; RV32-NEXT: vsext.vf2 v10, v8 871; RV32-NEXT: vmul.vv v8, v9, v10 872; RV32-NEXT: addi sp, sp, 16 873; RV32-NEXT: .cfi_def_cfa_offset 0 874; RV32-NEXT: ret 875; 876; RV64-LABEL: vwmul_vx_v2i64_i64: 877; RV64: # %bb.0: 878; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 879; RV64-NEXT: vle32.v v8, (a0) 880; RV64-NEXT: ld a0, 0(a1) 881; RV64-NEXT: vsext.vf2 v9, v8 882; RV64-NEXT: vmul.vx v8, v9, a0 883; RV64-NEXT: ret 884 %a = load <2 x i32>, ptr %x 885 %b = load i64, ptr %y 886 %d = insertelement <2 x i64> poison, i64 %b, i64 0 887 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer 888 %f = sext <2 x i32> %a to <2 x i64> 889 %g = mul <2 x i64> %e, %f 890 ret <2 x i64> %g 891} 892 893define <2 x i16> @vwmul_v2i16_multiuse(ptr %x, ptr %y, ptr %z, ptr %w) { 894; CHECK-LABEL: vwmul_v2i16_multiuse: 895; CHECK: # %bb.0: 896; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 897; CHECK-NEXT: vle8.v v8, (a0) 898; CHECK-NEXT: vle8.v v9, (a1) 899; CHECK-NEXT: vle8.v v10, (a2) 900; CHECK-NEXT: vle8.v v11, (a3) 901; CHECK-NEXT: vsext.vf2 v12, v8 902; CHECK-NEXT: vsext.vf2 v8, v9 903; CHECK-NEXT: vsext.vf2 v9, v10 904; CHECK-NEXT: vsext.vf2 v10, v11 905; CHECK-NEXT: vmul.vv v11, v12, v10 906; CHECK-NEXT: vmul.vv v10, v8, v10 907; CHECK-NEXT: vdivu.vv v8, v8, v9 908; CHECK-NEXT: vor.vv v9, v11, v10 909; CHECK-NEXT: vor.vv v8, v9, v8 910; CHECK-NEXT: ret 911 %a = load <2 x i8>, ptr %x 912 %b = load <2 x i8>, ptr %y 913 %c = load <2 x i8>, ptr %z 914 %d = load <2 x i8>, ptr %w 915 916 %as = sext <2 x i8> %a to <2 x i16> 917 %bs = sext <2 x i8> %b to <2 x i16> 918 %cs = sext <2 x i8> %c to <2 x i16> 919 %ds = sext <2 x i8> %d to <2 x i16> 920 921 %e = mul <2 x i16> %as, %ds 922 %f = mul <2 x i16> %bs, %ds ; shares 1 use with %e 923 %g = udiv <2 x i16> %bs, %cs ; shares 1 use with %f, and no uses with %e 924 925 %h = or <2 x i16> %e, %f 926 %i = or <2 x i16> %h, %g 927 ret <2 x i16> %i 928} 929