1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4 5define <2 x i16> @vwsub_v2i16(ptr %x, ptr %y) { 6; CHECK-LABEL: vwsub_v2i16: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 9; CHECK-NEXT: vle8.v v9, (a0) 10; CHECK-NEXT: vle8.v v10, (a1) 11; CHECK-NEXT: vwsub.vv v8, v9, v10 12; CHECK-NEXT: ret 13 %a = load <2 x i8>, ptr %x 14 %b = load <2 x i8>, ptr %y 15 %c = sext <2 x i8> %a to <2 x i16> 16 %d = sext <2 x i8> %b to <2 x i16> 17 %e = sub <2 x i16> %c, %d 18 ret <2 x i16> %e 19} 20 21define <4 x i16> @vwsub_v4i16(ptr %x, ptr %y) { 22; CHECK-LABEL: vwsub_v4i16: 23; CHECK: # %bb.0: 24; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 25; CHECK-NEXT: vle8.v v9, (a0) 26; CHECK-NEXT: vle8.v v10, (a1) 27; CHECK-NEXT: vwsub.vv v8, v9, v10 28; CHECK-NEXT: ret 29 %a = load <4 x i8>, ptr %x 30 %b = load <4 x i8>, ptr %y 31 %c = sext <4 x i8> %a to <4 x i16> 32 %d = sext <4 x i8> %b to <4 x i16> 33 %e = sub <4 x i16> %c, %d 34 ret <4 x i16> %e 35} 36 37define <2 x i32> @vwsub_v2i32(ptr %x, ptr %y) { 38; CHECK-LABEL: vwsub_v2i32: 39; CHECK: # %bb.0: 40; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 41; CHECK-NEXT: vle16.v v9, (a0) 42; CHECK-NEXT: vle16.v v10, (a1) 43; CHECK-NEXT: vwsub.vv v8, v9, v10 44; CHECK-NEXT: ret 45 %a = load <2 x i16>, ptr %x 46 %b = load <2 x i16>, ptr %y 47 %c = sext <2 x i16> %a to <2 x i32> 48 %d = sext <2 x i16> %b to <2 x i32> 49 %e = sub <2 x i32> %c, %d 50 ret <2 x i32> %e 51} 52 53define <8 x i16> @vwsub_v8i16(ptr %x, ptr %y) { 54; CHECK-LABEL: vwsub_v8i16: 55; CHECK: # %bb.0: 56; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 57; CHECK-NEXT: vle8.v v9, (a0) 58; CHECK-NEXT: vle8.v v10, (a1) 59; CHECK-NEXT: vwsub.vv v8, v9, v10 60; CHECK-NEXT: ret 61 %a = load <8 x i8>, ptr %x 62 %b = load <8 x i8>, ptr %y 63 %c = sext <8 x i8> %a to <8 x i16> 64 %d = sext <8 x i8> %b to <8 x i16> 65 %e = sub <8 x i16> %c, %d 66 ret <8 x i16> %e 67} 68 69define <4 x i32> @vwsub_v4i32(ptr %x, ptr %y) { 70; CHECK-LABEL: vwsub_v4i32: 71; CHECK: # %bb.0: 72; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 73; CHECK-NEXT: vle16.v v9, (a0) 74; CHECK-NEXT: vle16.v v10, (a1) 75; CHECK-NEXT: vwsub.vv v8, v9, v10 76; CHECK-NEXT: ret 77 %a = load <4 x i16>, ptr %x 78 %b = load <4 x i16>, ptr %y 79 %c = sext <4 x i16> %a to <4 x i32> 80 %d = sext <4 x i16> %b to <4 x i32> 81 %e = sub <4 x i32> %c, %d 82 ret <4 x i32> %e 83} 84 85define <2 x i64> @vwsub_v2i64(ptr %x, ptr %y) { 86; CHECK-LABEL: vwsub_v2i64: 87; CHECK: # %bb.0: 88; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 89; CHECK-NEXT: vle32.v v9, (a0) 90; CHECK-NEXT: vle32.v v10, (a1) 91; CHECK-NEXT: vwsub.vv v8, v9, v10 92; CHECK-NEXT: ret 93 %a = load <2 x i32>, ptr %x 94 %b = load <2 x i32>, ptr %y 95 %c = sext <2 x i32> %a to <2 x i64> 96 %d = sext <2 x i32> %b to <2 x i64> 97 %e = sub <2 x i64> %c, %d 98 ret <2 x i64> %e 99} 100 101define <16 x i16> @vwsub_v16i16(ptr %x, ptr %y) { 102; CHECK-LABEL: vwsub_v16i16: 103; CHECK: # %bb.0: 104; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 105; CHECK-NEXT: vle8.v v10, (a0) 106; CHECK-NEXT: vle8.v v11, (a1) 107; CHECK-NEXT: vwsub.vv v8, v10, v11 108; CHECK-NEXT: ret 109 %a = load <16 x i8>, ptr %x 110 %b = load <16 x i8>, ptr %y 111 %c = sext <16 x i8> %a to <16 x i16> 112 %d = sext <16 x i8> %b to <16 x i16> 113 %e = sub <16 x i16> %c, %d 114 ret <16 x i16> %e 115} 116 117define <8 x i32> @vwsub_v8i32(ptr %x, ptr %y) { 118; CHECK-LABEL: vwsub_v8i32: 119; CHECK: # %bb.0: 120; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 121; CHECK-NEXT: vle16.v v10, (a0) 122; CHECK-NEXT: vle16.v v11, (a1) 123; CHECK-NEXT: vwsub.vv v8, v10, v11 124; CHECK-NEXT: ret 125 %a = load <8 x i16>, ptr %x 126 %b = load <8 x i16>, ptr %y 127 %c = sext <8 x i16> %a to <8 x i32> 128 %d = sext <8 x i16> %b to <8 x i32> 129 %e = sub <8 x i32> %c, %d 130 ret <8 x i32> %e 131} 132 133define <4 x i64> @vwsub_v4i64(ptr %x, ptr %y) { 134; CHECK-LABEL: vwsub_v4i64: 135; CHECK: # %bb.0: 136; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 137; CHECK-NEXT: vle32.v v10, (a0) 138; CHECK-NEXT: vle32.v v11, (a1) 139; CHECK-NEXT: vwsub.vv v8, v10, v11 140; CHECK-NEXT: ret 141 %a = load <4 x i32>, ptr %x 142 %b = load <4 x i32>, ptr %y 143 %c = sext <4 x i32> %a to <4 x i64> 144 %d = sext <4 x i32> %b to <4 x i64> 145 %e = sub <4 x i64> %c, %d 146 ret <4 x i64> %e 147} 148 149define <32 x i16> @vwsub_v32i16(ptr %x, ptr %y) { 150; CHECK-LABEL: vwsub_v32i16: 151; CHECK: # %bb.0: 152; CHECK-NEXT: li a2, 32 153; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma 154; CHECK-NEXT: vle8.v v12, (a0) 155; CHECK-NEXT: vle8.v v14, (a1) 156; CHECK-NEXT: vwsub.vv v8, v12, v14 157; CHECK-NEXT: ret 158 %a = load <32 x i8>, ptr %x 159 %b = load <32 x i8>, ptr %y 160 %c = sext <32 x i8> %a to <32 x i16> 161 %d = sext <32 x i8> %b to <32 x i16> 162 %e = sub <32 x i16> %c, %d 163 ret <32 x i16> %e 164} 165 166define <16 x i32> @vwsub_v16i32(ptr %x, ptr %y) { 167; CHECK-LABEL: vwsub_v16i32: 168; CHECK: # %bb.0: 169; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 170; CHECK-NEXT: vle16.v v12, (a0) 171; CHECK-NEXT: vle16.v v14, (a1) 172; CHECK-NEXT: vwsub.vv v8, v12, v14 173; CHECK-NEXT: ret 174 %a = load <16 x i16>, ptr %x 175 %b = load <16 x i16>, ptr %y 176 %c = sext <16 x i16> %a to <16 x i32> 177 %d = sext <16 x i16> %b to <16 x i32> 178 %e = sub <16 x i32> %c, %d 179 ret <16 x i32> %e 180} 181 182define <8 x i64> @vwsub_v8i64(ptr %x, ptr %y) { 183; CHECK-LABEL: vwsub_v8i64: 184; CHECK: # %bb.0: 185; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 186; CHECK-NEXT: vle32.v v12, (a0) 187; CHECK-NEXT: vle32.v v14, (a1) 188; CHECK-NEXT: vwsub.vv v8, v12, v14 189; CHECK-NEXT: ret 190 %a = load <8 x i32>, ptr %x 191 %b = load <8 x i32>, ptr %y 192 %c = sext <8 x i32> %a to <8 x i64> 193 %d = sext <8 x i32> %b to <8 x i64> 194 %e = sub <8 x i64> %c, %d 195 ret <8 x i64> %e 196} 197 198define <64 x i16> @vwsub_v64i16(ptr %x, ptr %y) { 199; CHECK-LABEL: vwsub_v64i16: 200; CHECK: # %bb.0: 201; CHECK-NEXT: li a2, 64 202; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma 203; CHECK-NEXT: vle8.v v16, (a0) 204; CHECK-NEXT: vle8.v v20, (a1) 205; CHECK-NEXT: vwsub.vv v8, v16, v20 206; CHECK-NEXT: ret 207 %a = load <64 x i8>, ptr %x 208 %b = load <64 x i8>, ptr %y 209 %c = sext <64 x i8> %a to <64 x i16> 210 %d = sext <64 x i8> %b to <64 x i16> 211 %e = sub <64 x i16> %c, %d 212 ret <64 x i16> %e 213} 214 215define <32 x i32> @vwsub_v32i32(ptr %x, ptr %y) { 216; CHECK-LABEL: vwsub_v32i32: 217; CHECK: # %bb.0: 218; CHECK-NEXT: li a2, 32 219; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 220; CHECK-NEXT: vle16.v v16, (a0) 221; CHECK-NEXT: vle16.v v20, (a1) 222; CHECK-NEXT: vwsub.vv v8, v16, v20 223; CHECK-NEXT: ret 224 %a = load <32 x i16>, ptr %x 225 %b = load <32 x i16>, ptr %y 226 %c = sext <32 x i16> %a to <32 x i32> 227 %d = sext <32 x i16> %b to <32 x i32> 228 %e = sub <32 x i32> %c, %d 229 ret <32 x i32> %e 230} 231 232define <16 x i64> @vwsub_v16i64(ptr %x, ptr %y) { 233; CHECK-LABEL: vwsub_v16i64: 234; CHECK: # %bb.0: 235; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 236; CHECK-NEXT: vle32.v v16, (a0) 237; CHECK-NEXT: vle32.v v20, (a1) 238; CHECK-NEXT: vwsub.vv v8, v16, v20 239; CHECK-NEXT: ret 240 %a = load <16 x i32>, ptr %x 241 %b = load <16 x i32>, ptr %y 242 %c = sext <16 x i32> %a to <16 x i64> 243 %d = sext <16 x i32> %b to <16 x i64> 244 %e = sub <16 x i64> %c, %d 245 ret <16 x i64> %e 246} 247 248define <128 x i16> @vwsub_v128i16(ptr %x, ptr %y) nounwind { 249; CHECK-LABEL: vwsub_v128i16: 250; CHECK: # %bb.0: 251; CHECK-NEXT: addi sp, sp, -16 252; CHECK-NEXT: csrr a2, vlenb 253; CHECK-NEXT: slli a2, a2, 4 254; CHECK-NEXT: sub sp, sp, a2 255; CHECK-NEXT: li a2, 128 256; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma 257; CHECK-NEXT: vle8.v v8, (a0) 258; CHECK-NEXT: addi a0, sp, 16 259; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 260; CHECK-NEXT: vle8.v v0, (a1) 261; CHECK-NEXT: li a0, 64 262; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 263; CHECK-NEXT: vslidedown.vx v16, v8, a0 264; CHECK-NEXT: vslidedown.vx v8, v0, a0 265; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma 266; CHECK-NEXT: vwsub.vv v24, v16, v8 267; CHECK-NEXT: csrr a0, vlenb 268; CHECK-NEXT: slli a0, a0, 3 269; CHECK-NEXT: add a0, sp, a0 270; CHECK-NEXT: addi a0, a0, 16 271; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 272; CHECK-NEXT: addi a0, sp, 16 273; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 274; CHECK-NEXT: vwsub.vv v8, v16, v0 275; CHECK-NEXT: csrr a0, vlenb 276; CHECK-NEXT: slli a0, a0, 3 277; CHECK-NEXT: add a0, sp, a0 278; CHECK-NEXT: addi a0, a0, 16 279; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 280; CHECK-NEXT: csrr a0, vlenb 281; CHECK-NEXT: slli a0, a0, 4 282; CHECK-NEXT: add sp, sp, a0 283; CHECK-NEXT: addi sp, sp, 16 284; CHECK-NEXT: ret 285 %a = load <128 x i8>, ptr %x 286 %b = load <128 x i8>, ptr %y 287 %c = sext <128 x i8> %a to <128 x i16> 288 %d = sext <128 x i8> %b to <128 x i16> 289 %e = sub <128 x i16> %c, %d 290 ret <128 x i16> %e 291} 292 293define <64 x i32> @vwsub_v64i32(ptr %x, ptr %y) nounwind { 294; CHECK-LABEL: vwsub_v64i32: 295; CHECK: # %bb.0: 296; CHECK-NEXT: addi sp, sp, -16 297; CHECK-NEXT: csrr a2, vlenb 298; CHECK-NEXT: slli a2, a2, 4 299; CHECK-NEXT: sub sp, sp, a2 300; CHECK-NEXT: li a2, 64 301; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma 302; CHECK-NEXT: vle16.v v8, (a0) 303; CHECK-NEXT: addi a0, sp, 16 304; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 305; CHECK-NEXT: vle16.v v0, (a1) 306; CHECK-NEXT: li a0, 32 307; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 308; CHECK-NEXT: vslidedown.vx v16, v8, a0 309; CHECK-NEXT: vslidedown.vx v8, v0, a0 310; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 311; CHECK-NEXT: vwsub.vv v24, v16, v8 312; CHECK-NEXT: csrr a0, vlenb 313; CHECK-NEXT: slli a0, a0, 3 314; CHECK-NEXT: add a0, sp, a0 315; CHECK-NEXT: addi a0, a0, 16 316; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 317; CHECK-NEXT: addi a0, sp, 16 318; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 319; CHECK-NEXT: vwsub.vv v8, v16, v0 320; CHECK-NEXT: csrr a0, vlenb 321; CHECK-NEXT: slli a0, a0, 3 322; CHECK-NEXT: add a0, sp, a0 323; CHECK-NEXT: addi a0, a0, 16 324; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 325; CHECK-NEXT: csrr a0, vlenb 326; CHECK-NEXT: slli a0, a0, 4 327; CHECK-NEXT: add sp, sp, a0 328; CHECK-NEXT: addi sp, sp, 16 329; CHECK-NEXT: ret 330 %a = load <64 x i16>, ptr %x 331 %b = load <64 x i16>, ptr %y 332 %c = sext <64 x i16> %a to <64 x i32> 333 %d = sext <64 x i16> %b to <64 x i32> 334 %e = sub <64 x i32> %c, %d 335 ret <64 x i32> %e 336} 337 338define <32 x i64> @vwsub_v32i64(ptr %x, ptr %y) nounwind { 339; CHECK-LABEL: vwsub_v32i64: 340; CHECK: # %bb.0: 341; CHECK-NEXT: addi sp, sp, -16 342; CHECK-NEXT: csrr a2, vlenb 343; CHECK-NEXT: slli a2, a2, 4 344; CHECK-NEXT: sub sp, sp, a2 345; CHECK-NEXT: li a2, 32 346; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma 347; CHECK-NEXT: vle32.v v8, (a0) 348; CHECK-NEXT: addi a0, sp, 16 349; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 350; CHECK-NEXT: vle32.v v0, (a1) 351; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma 352; CHECK-NEXT: vslidedown.vi v16, v8, 16 353; CHECK-NEXT: vslidedown.vi v8, v0, 16 354; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 355; CHECK-NEXT: vwsub.vv v24, v16, v8 356; CHECK-NEXT: csrr a0, vlenb 357; CHECK-NEXT: slli a0, a0, 3 358; CHECK-NEXT: add a0, sp, a0 359; CHECK-NEXT: addi a0, a0, 16 360; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 361; CHECK-NEXT: addi a0, sp, 16 362; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 363; CHECK-NEXT: vwsub.vv v8, v16, v0 364; CHECK-NEXT: csrr a0, vlenb 365; CHECK-NEXT: slli a0, a0, 3 366; CHECK-NEXT: add a0, sp, a0 367; CHECK-NEXT: addi a0, a0, 16 368; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 369; CHECK-NEXT: csrr a0, vlenb 370; CHECK-NEXT: slli a0, a0, 4 371; CHECK-NEXT: add sp, sp, a0 372; CHECK-NEXT: addi sp, sp, 16 373; CHECK-NEXT: ret 374 %a = load <32 x i32>, ptr %x 375 %b = load <32 x i32>, ptr %y 376 %c = sext <32 x i32> %a to <32 x i64> 377 %d = sext <32 x i32> %b to <32 x i64> 378 %e = sub <32 x i64> %c, %d 379 ret <32 x i64> %e 380} 381 382define <2 x i32> @vwsub_v2i32_v2i8(ptr %x, ptr %y) { 383; CHECK-LABEL: vwsub_v2i32_v2i8: 384; CHECK: # %bb.0: 385; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 386; CHECK-NEXT: vle8.v v8, (a0) 387; CHECK-NEXT: vle8.v v9, (a1) 388; CHECK-NEXT: vsext.vf2 v10, v8 389; CHECK-NEXT: vsext.vf2 v11, v9 390; CHECK-NEXT: vwsub.vv v8, v10, v11 391; CHECK-NEXT: ret 392 %a = load <2 x i8>, ptr %x 393 %b = load <2 x i8>, ptr %y 394 %c = sext <2 x i8> %a to <2 x i32> 395 %d = sext <2 x i8> %b to <2 x i32> 396 %e = sub <2 x i32> %c, %d 397 ret <2 x i32> %e 398} 399 400define <4 x i32> @vwsub_v4i32_v4i8_v4i16(ptr %x, ptr %y) { 401; CHECK-LABEL: vwsub_v4i32_v4i8_v4i16: 402; CHECK: # %bb.0: 403; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 404; CHECK-NEXT: vle8.v v8, (a0) 405; CHECK-NEXT: vle16.v v9, (a1) 406; CHECK-NEXT: vsext.vf2 v10, v8 407; CHECK-NEXT: vwsub.vv v8, v10, v9 408; CHECK-NEXT: ret 409 %a = load <4 x i8>, ptr %x 410 %b = load <4 x i16>, ptr %y 411 %c = sext <4 x i8> %a to <4 x i32> 412 %d = sext <4 x i16> %b to <4 x i32> 413 %e = sub <4 x i32> %c, %d 414 ret <4 x i32> %e 415} 416 417define <4 x i64> @vwsub_v4i64_v4i32_v4i8(ptr %x, ptr %y) { 418; CHECK-LABEL: vwsub_v4i64_v4i32_v4i8: 419; CHECK: # %bb.0: 420; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 421; CHECK-NEXT: vle8.v v8, (a1) 422; CHECK-NEXT: vle32.v v10, (a0) 423; CHECK-NEXT: vsext.vf4 v11, v8 424; CHECK-NEXT: vwsub.vv v8, v10, v11 425; CHECK-NEXT: ret 426 %a = load <4 x i32>, ptr %x 427 %b = load <4 x i8>, ptr %y 428 %c = sext <4 x i32> %a to <4 x i64> 429 %d = sext <4 x i8> %b to <4 x i64> 430 %e = sub <4 x i64> %c, %d 431 ret <4 x i64> %e 432} 433 434define <2 x i16> @vwsub_vx_v2i16(ptr %x, i8 %y) { 435; CHECK-LABEL: vwsub_vx_v2i16: 436; CHECK: # %bb.0: 437; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 438; CHECK-NEXT: vle8.v v9, (a0) 439; CHECK-NEXT: vwsub.vx v8, v9, a1 440; CHECK-NEXT: ret 441 %a = load <2 x i8>, ptr %x 442 %b = insertelement <2 x i8> poison, i8 %y, i32 0 443 %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer 444 %d = sext <2 x i8> %a to <2 x i16> 445 %e = sext <2 x i8> %c to <2 x i16> 446 %f = sub <2 x i16> %d, %e 447 ret <2 x i16> %f 448} 449 450define <4 x i16> @vwsub_vx_v4i16(ptr %x, i8 %y) { 451; CHECK-LABEL: vwsub_vx_v4i16: 452; CHECK: # %bb.0: 453; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 454; CHECK-NEXT: vle8.v v9, (a0) 455; CHECK-NEXT: vwsub.vx v8, v9, a1 456; CHECK-NEXT: ret 457 %a = load <4 x i8>, ptr %x 458 %b = insertelement <4 x i8> poison, i8 %y, i32 0 459 %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer 460 %d = sext <4 x i8> %a to <4 x i16> 461 %e = sext <4 x i8> %c to <4 x i16> 462 %f = sub <4 x i16> %d, %e 463 ret <4 x i16> %f 464} 465 466define <2 x i32> @vwsub_vx_v2i32(ptr %x, i16 %y) { 467; CHECK-LABEL: vwsub_vx_v2i32: 468; CHECK: # %bb.0: 469; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 470; CHECK-NEXT: vle16.v v9, (a0) 471; CHECK-NEXT: vwsub.vx v8, v9, a1 472; CHECK-NEXT: ret 473 %a = load <2 x i16>, ptr %x 474 %b = insertelement <2 x i16> poison, i16 %y, i32 0 475 %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer 476 %d = sext <2 x i16> %a to <2 x i32> 477 %e = sext <2 x i16> %c to <2 x i32> 478 %f = sub <2 x i32> %d, %e 479 ret <2 x i32> %f 480} 481 482define <8 x i16> @vwsub_vx_v8i16(ptr %x, i8 %y) { 483; CHECK-LABEL: vwsub_vx_v8i16: 484; CHECK: # %bb.0: 485; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 486; CHECK-NEXT: vle8.v v9, (a0) 487; CHECK-NEXT: vwsub.vx v8, v9, a1 488; CHECK-NEXT: ret 489 %a = load <8 x i8>, ptr %x 490 %b = insertelement <8 x i8> poison, i8 %y, i32 0 491 %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer 492 %d = sext <8 x i8> %a to <8 x i16> 493 %e = sext <8 x i8> %c to <8 x i16> 494 %f = sub <8 x i16> %d, %e 495 ret <8 x i16> %f 496} 497 498define <4 x i32> @vwsub_vx_v4i32(ptr %x, i16 %y) { 499; CHECK-LABEL: vwsub_vx_v4i32: 500; CHECK: # %bb.0: 501; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 502; CHECK-NEXT: vle16.v v9, (a0) 503; CHECK-NEXT: vwsub.vx v8, v9, a1 504; CHECK-NEXT: ret 505 %a = load <4 x i16>, ptr %x 506 %b = insertelement <4 x i16> poison, i16 %y, i32 0 507 %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer 508 %d = sext <4 x i16> %a to <4 x i32> 509 %e = sext <4 x i16> %c to <4 x i32> 510 %f = sub <4 x i32> %d, %e 511 ret <4 x i32> %f 512} 513 514define <2 x i64> @vwsub_vx_v2i64(ptr %x, i32 %y) { 515; CHECK-LABEL: vwsub_vx_v2i64: 516; CHECK: # %bb.0: 517; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 518; CHECK-NEXT: vle32.v v9, (a0) 519; CHECK-NEXT: vwsub.vx v8, v9, a1 520; CHECK-NEXT: ret 521 %a = load <2 x i32>, ptr %x 522 %b = insertelement <2 x i32> poison, i32 %y, i64 0 523 %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer 524 %d = sext <2 x i32> %a to <2 x i64> 525 %e = sext <2 x i32> %c to <2 x i64> 526 %f = sub <2 x i64> %d, %e 527 ret <2 x i64> %f 528} 529 530define <16 x i16> @vwsub_vx_v16i16(ptr %x, i8 %y) { 531; CHECK-LABEL: vwsub_vx_v16i16: 532; CHECK: # %bb.0: 533; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 534; CHECK-NEXT: vle8.v v10, (a0) 535; CHECK-NEXT: vwsub.vx v8, v10, a1 536; CHECK-NEXT: ret 537 %a = load <16 x i8>, ptr %x 538 %b = insertelement <16 x i8> poison, i8 %y, i32 0 539 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer 540 %d = sext <16 x i8> %a to <16 x i16> 541 %e = sext <16 x i8> %c to <16 x i16> 542 %f = sub <16 x i16> %d, %e 543 ret <16 x i16> %f 544} 545 546define <8 x i32> @vwsub_vx_v8i32(ptr %x, i16 %y) { 547; CHECK-LABEL: vwsub_vx_v8i32: 548; CHECK: # %bb.0: 549; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 550; CHECK-NEXT: vle16.v v10, (a0) 551; CHECK-NEXT: vwsub.vx v8, v10, a1 552; CHECK-NEXT: ret 553 %a = load <8 x i16>, ptr %x 554 %b = insertelement <8 x i16> poison, i16 %y, i32 0 555 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer 556 %d = sext <8 x i16> %a to <8 x i32> 557 %e = sext <8 x i16> %c to <8 x i32> 558 %f = sub <8 x i32> %d, %e 559 ret <8 x i32> %f 560} 561 562define <4 x i64> @vwsub_vx_v4i64(ptr %x, i32 %y) { 563; CHECK-LABEL: vwsub_vx_v4i64: 564; CHECK: # %bb.0: 565; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 566; CHECK-NEXT: vle32.v v10, (a0) 567; CHECK-NEXT: vwsub.vx v8, v10, a1 568; CHECK-NEXT: ret 569 %a = load <4 x i32>, ptr %x 570 %b = insertelement <4 x i32> poison, i32 %y, i64 0 571 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer 572 %d = sext <4 x i32> %a to <4 x i64> 573 %e = sext <4 x i32> %c to <4 x i64> 574 %f = sub <4 x i64> %d, %e 575 ret <4 x i64> %f 576} 577 578define <32 x i16> @vwsub_vx_v32i16(ptr %x, i8 %y) { 579; CHECK-LABEL: vwsub_vx_v32i16: 580; CHECK: # %bb.0: 581; CHECK-NEXT: li a2, 32 582; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma 583; CHECK-NEXT: vle8.v v12, (a0) 584; CHECK-NEXT: vwsub.vx v8, v12, a1 585; CHECK-NEXT: ret 586 %a = load <32 x i8>, ptr %x 587 %b = insertelement <32 x i8> poison, i8 %y, i32 0 588 %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer 589 %d = sext <32 x i8> %a to <32 x i16> 590 %e = sext <32 x i8> %c to <32 x i16> 591 %f = sub <32 x i16> %d, %e 592 ret <32 x i16> %f 593} 594 595define <16 x i32> @vwsub_vx_v16i32(ptr %x, i16 %y) { 596; CHECK-LABEL: vwsub_vx_v16i32: 597; CHECK: # %bb.0: 598; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 599; CHECK-NEXT: vle16.v v12, (a0) 600; CHECK-NEXT: vwsub.vx v8, v12, a1 601; CHECK-NEXT: ret 602 %a = load <16 x i16>, ptr %x 603 %b = insertelement <16 x i16> poison, i16 %y, i32 0 604 %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer 605 %d = sext <16 x i16> %a to <16 x i32> 606 %e = sext <16 x i16> %c to <16 x i32> 607 %f = sub <16 x i32> %d, %e 608 ret <16 x i32> %f 609} 610 611define <8 x i64> @vwsub_vx_v8i64(ptr %x, i32 %y) { 612; CHECK-LABEL: vwsub_vx_v8i64: 613; CHECK: # %bb.0: 614; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 615; CHECK-NEXT: vle32.v v12, (a0) 616; CHECK-NEXT: vwsub.vx v8, v12, a1 617; CHECK-NEXT: ret 618 %a = load <8 x i32>, ptr %x 619 %b = insertelement <8 x i32> poison, i32 %y, i64 0 620 %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer 621 %d = sext <8 x i32> %a to <8 x i64> 622 %e = sext <8 x i32> %c to <8 x i64> 623 %f = sub <8 x i64> %d, %e 624 ret <8 x i64> %f 625} 626 627define <64 x i16> @vwsub_vx_v64i16(ptr %x, i8 %y) { 628; CHECK-LABEL: vwsub_vx_v64i16: 629; CHECK: # %bb.0: 630; CHECK-NEXT: li a2, 64 631; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma 632; CHECK-NEXT: vle8.v v16, (a0) 633; CHECK-NEXT: vwsub.vx v8, v16, a1 634; CHECK-NEXT: ret 635 %a = load <64 x i8>, ptr %x 636 %b = insertelement <64 x i8> poison, i8 %y, i32 0 637 %c = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer 638 %d = sext <64 x i8> %a to <64 x i16> 639 %e = sext <64 x i8> %c to <64 x i16> 640 %f = sub <64 x i16> %d, %e 641 ret <64 x i16> %f 642} 643 644define <32 x i32> @vwsub_vx_v32i32(ptr %x, i16 %y) { 645; CHECK-LABEL: vwsub_vx_v32i32: 646; CHECK: # %bb.0: 647; CHECK-NEXT: li a2, 32 648; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 649; CHECK-NEXT: vle16.v v16, (a0) 650; CHECK-NEXT: vwsub.vx v8, v16, a1 651; CHECK-NEXT: ret 652 %a = load <32 x i16>, ptr %x 653 %b = insertelement <32 x i16> poison, i16 %y, i32 0 654 %c = shufflevector <32 x i16> %b, <32 x i16> poison, <32 x i32> zeroinitializer 655 %d = sext <32 x i16> %a to <32 x i32> 656 %e = sext <32 x i16> %c to <32 x i32> 657 %f = sub <32 x i32> %d, %e 658 ret <32 x i32> %f 659} 660 661define <16 x i64> @vwsub_vx_v16i64(ptr %x, i32 %y) { 662; CHECK-LABEL: vwsub_vx_v16i64: 663; CHECK: # %bb.0: 664; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 665; CHECK-NEXT: vle32.v v16, (a0) 666; CHECK-NEXT: vwsub.vx v8, v16, a1 667; CHECK-NEXT: ret 668 %a = load <16 x i32>, ptr %x 669 %b = insertelement <16 x i32> poison, i32 %y, i64 0 670 %c = shufflevector <16 x i32> %b, <16 x i32> poison, <16 x i32> zeroinitializer 671 %d = sext <16 x i32> %a to <16 x i64> 672 %e = sext <16 x i32> %c to <16 x i64> 673 %f = sub <16 x i64> %d, %e 674 ret <16 x i64> %f 675} 676 677define <8 x i16> @vwsub_vx_v8i16_i8(ptr %x, ptr %y) { 678; CHECK-LABEL: vwsub_vx_v8i16_i8: 679; CHECK: # %bb.0: 680; CHECK-NEXT: lb a1, 0(a1) 681; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 682; CHECK-NEXT: vle8.v v9, (a0) 683; CHECK-NEXT: vmv.v.x v10, a1 684; CHECK-NEXT: vwsub.vv v8, v10, v9 685; CHECK-NEXT: ret 686 %a = load <8 x i8>, ptr %x 687 %b = load i8, ptr %y 688 %c = sext i8 %b to i16 689 %d = insertelement <8 x i16> poison, i16 %c, i32 0 690 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer 691 %f = sext <8 x i8> %a to <8 x i16> 692 %g = sub <8 x i16> %e, %f 693 ret <8 x i16> %g 694} 695 696define <8 x i16> @vwsub_vx_v8i16_i16(ptr %x, ptr %y) { 697; CHECK-LABEL: vwsub_vx_v8i16_i16: 698; CHECK: # %bb.0: 699; CHECK-NEXT: lh a1, 0(a1) 700; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 701; CHECK-NEXT: vle8.v v9, (a0) 702; CHECK-NEXT: vmv.v.x v8, a1 703; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 704; CHECK-NEXT: vwsub.wv v8, v8, v9 705; CHECK-NEXT: ret 706 %a = load <8 x i8>, ptr %x 707 %b = load i16, ptr %y 708 %d = insertelement <8 x i16> poison, i16 %b, i32 0 709 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer 710 %f = sext <8 x i8> %a to <8 x i16> 711 %g = sub <8 x i16> %e, %f 712 ret <8 x i16> %g 713} 714 715define <4 x i32> @vwsub_vx_v4i32_i8(ptr %x, ptr %y) { 716; CHECK-LABEL: vwsub_vx_v4i32_i8: 717; CHECK: # %bb.0: 718; CHECK-NEXT: lb a1, 0(a1) 719; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 720; CHECK-NEXT: vle16.v v9, (a0) 721; CHECK-NEXT: vmv.v.x v10, a1 722; CHECK-NEXT: vwsub.vv v8, v10, v9 723; CHECK-NEXT: ret 724 %a = load <4 x i16>, ptr %x 725 %b = load i8, ptr %y 726 %c = sext i8 %b to i32 727 %d = insertelement <4 x i32> poison, i32 %c, i32 0 728 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer 729 %f = sext <4 x i16> %a to <4 x i32> 730 %g = sub <4 x i32> %e, %f 731 ret <4 x i32> %g 732} 733 734define <4 x i32> @vwsub_vx_v4i32_i16(ptr %x, ptr %y) { 735; CHECK-LABEL: vwsub_vx_v4i32_i16: 736; CHECK: # %bb.0: 737; CHECK-NEXT: lh a1, 0(a1) 738; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 739; CHECK-NEXT: vle16.v v9, (a0) 740; CHECK-NEXT: vmv.v.x v10, a1 741; CHECK-NEXT: vwsub.vv v8, v10, v9 742; CHECK-NEXT: ret 743 %a = load <4 x i16>, ptr %x 744 %b = load i16, ptr %y 745 %c = sext i16 %b to i32 746 %d = insertelement <4 x i32> poison, i32 %c, i32 0 747 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer 748 %f = sext <4 x i16> %a to <4 x i32> 749 %g = sub <4 x i32> %e, %f 750 ret <4 x i32> %g 751} 752 753define <4 x i32> @vwsub_vx_v4i32_i32(ptr %x, ptr %y) { 754; CHECK-LABEL: vwsub_vx_v4i32_i32: 755; CHECK: # %bb.0: 756; CHECK-NEXT: lw a1, 0(a1) 757; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 758; CHECK-NEXT: vle16.v v9, (a0) 759; CHECK-NEXT: vmv.v.x v8, a1 760; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 761; CHECK-NEXT: vwsub.wv v8, v8, v9 762; CHECK-NEXT: ret 763 %a = load <4 x i16>, ptr %x 764 %b = load i32, ptr %y 765 %d = insertelement <4 x i32> poison, i32 %b, i32 0 766 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer 767 %f = sext <4 x i16> %a to <4 x i32> 768 %g = sub <4 x i32> %e, %f 769 ret <4 x i32> %g 770} 771 772define <2 x i64> @vwsub_vx_v2i64_i8(ptr %x, ptr %y) nounwind { 773; CHECK-LABEL: vwsub_vx_v2i64_i8: 774; CHECK: # %bb.0: 775; CHECK-NEXT: lb a1, 0(a1) 776; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 777; CHECK-NEXT: vle32.v v9, (a0) 778; CHECK-NEXT: vmv.v.x v10, a1 779; CHECK-NEXT: vwsub.vv v8, v10, v9 780; CHECK-NEXT: ret 781 %a = load <2 x i32>, ptr %x 782 %b = load i8, ptr %y 783 %c = sext i8 %b to i64 784 %d = insertelement <2 x i64> poison, i64 %c, i64 0 785 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer 786 %f = sext <2 x i32> %a to <2 x i64> 787 %g = sub <2 x i64> %e, %f 788 ret <2 x i64> %g 789} 790 791define <2 x i64> @vwsub_vx_v2i64_i16(ptr %x, ptr %y) nounwind { 792; CHECK-LABEL: vwsub_vx_v2i64_i16: 793; CHECK: # %bb.0: 794; CHECK-NEXT: lh a1, 0(a1) 795; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 796; CHECK-NEXT: vle32.v v9, (a0) 797; CHECK-NEXT: vmv.v.x v10, a1 798; CHECK-NEXT: vwsub.vv v8, v10, v9 799; CHECK-NEXT: ret 800 %a = load <2 x i32>, ptr %x 801 %b = load i16, ptr %y 802 %c = sext i16 %b to i64 803 %d = insertelement <2 x i64> poison, i64 %c, i64 0 804 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer 805 %f = sext <2 x i32> %a to <2 x i64> 806 %g = sub <2 x i64> %e, %f 807 ret <2 x i64> %g 808} 809 810define <2 x i64> @vwsub_vx_v2i64_i32(ptr %x, ptr %y) nounwind { 811; CHECK-LABEL: vwsub_vx_v2i64_i32: 812; CHECK: # %bb.0: 813; CHECK-NEXT: lw a1, 0(a1) 814; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 815; CHECK-NEXT: vle32.v v9, (a0) 816; CHECK-NEXT: vmv.v.x v10, a1 817; CHECK-NEXT: vwsub.vv v8, v10, v9 818; CHECK-NEXT: ret 819 %a = load <2 x i32>, ptr %x 820 %b = load i32, ptr %y 821 %c = sext i32 %b to i64 822 %d = insertelement <2 x i64> poison, i64 %c, i64 0 823 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer 824 %f = sext <2 x i32> %a to <2 x i64> 825 %g = sub <2 x i64> %e, %f 826 ret <2 x i64> %g 827} 828 829define <2 x i64> @vwsub_vx_v2i64_i64(ptr %x, ptr %y) nounwind { 830; RV32-LABEL: vwsub_vx_v2i64_i64: 831; RV32: # %bb.0: 832; RV32-NEXT: addi sp, sp, -16 833; RV32-NEXT: lw a2, 0(a1) 834; RV32-NEXT: lw a1, 4(a1) 835; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 836; RV32-NEXT: vle32.v v9, (a0) 837; RV32-NEXT: sw a2, 8(sp) 838; RV32-NEXT: sw a1, 12(sp) 839; RV32-NEXT: addi a0, sp, 8 840; RV32-NEXT: vlse64.v v8, (a0), zero 841; RV32-NEXT: vwsub.wv v8, v8, v9 842; RV32-NEXT: addi sp, sp, 16 843; RV32-NEXT: ret 844; 845; RV64-LABEL: vwsub_vx_v2i64_i64: 846; RV64: # %bb.0: 847; RV64-NEXT: ld a1, 0(a1) 848; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 849; RV64-NEXT: vle32.v v9, (a0) 850; RV64-NEXT: vmv.v.x v8, a1 851; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 852; RV64-NEXT: vwsub.wv v8, v8, v9 853; RV64-NEXT: ret 854 %a = load <2 x i32>, ptr %x 855 %b = load i64, ptr %y 856 %d = insertelement <2 x i64> poison, i64 %b, i64 0 857 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer 858 %f = sext <2 x i32> %a to <2 x i64> 859 %g = sub <2 x i64> %e, %f 860 ret <2 x i64> %g 861} 862