1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-V 3; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVE64X 4; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-V 5; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVE64X 6 7define <vscale x 1 x i8> @vremu_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) { 8; CHECK-LABEL: vremu_vv_nxv1i8: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma 11; CHECK-NEXT: vremu.vv v8, v8, v9 12; CHECK-NEXT: ret 13 %vc = urem <vscale x 1 x i8> %va, %vb 14 ret <vscale x 1 x i8> %vc 15} 16 17define <vscale x 1 x i8> @vremu_vx_nxv1i8(<vscale x 1 x i8> %va, i8 signext %b) { 18; CHECK-LABEL: vremu_vx_nxv1i8: 19; CHECK: # %bb.0: 20; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma 21; CHECK-NEXT: vremu.vx v8, v8, a0 22; CHECK-NEXT: ret 23 %head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0 24 %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer 25 %vc = urem <vscale x 1 x i8> %va, %splat 26 ret <vscale x 1 x i8> %vc 27} 28 29define <vscale x 1 x i8> @vremu_vi_nxv1i8_0(<vscale x 1 x i8> %va) { 30; CHECK-LABEL: vremu_vi_nxv1i8_0: 31; CHECK: # %bb.0: 32; CHECK-NEXT: li a0, 33 33; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma 34; CHECK-NEXT: vmulhu.vx v9, v8, a0 35; CHECK-NEXT: vsrl.vi v9, v9, 5 36; CHECK-NEXT: li a0, -7 37; CHECK-NEXT: vnmsac.vx v8, a0, v9 38; CHECK-NEXT: ret 39 %vc = urem <vscale x 1 x i8> %va, splat (i8 -7) 40 ret <vscale x 1 x i8> %vc 41} 42 43define <vscale x 2 x i8> @vremu_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) { 44; CHECK-LABEL: vremu_vv_nxv2i8: 45; CHECK: # %bb.0: 46; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 47; CHECK-NEXT: vremu.vv v8, v8, v9 48; CHECK-NEXT: ret 49 %vc = urem <vscale x 2 x i8> %va, %vb 50 ret <vscale x 2 x i8> %vc 51} 52 53define <vscale x 2 x i8> @vremu_vx_nxv2i8(<vscale x 2 x i8> %va, i8 signext %b) { 54; CHECK-LABEL: vremu_vx_nxv2i8: 55; CHECK: # %bb.0: 56; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 57; CHECK-NEXT: vremu.vx v8, v8, a0 58; CHECK-NEXT: ret 59 %head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0 60 %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer 61 %vc = urem <vscale x 2 x i8> %va, %splat 62 ret <vscale x 2 x i8> %vc 63} 64 65define <vscale x 2 x i8> @vremu_vi_nxv2i8_0(<vscale x 2 x i8> %va) { 66; CHECK-LABEL: vremu_vi_nxv2i8_0: 67; CHECK: # %bb.0: 68; CHECK-NEXT: li a0, 33 69; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 70; CHECK-NEXT: vmulhu.vx v9, v8, a0 71; CHECK-NEXT: vsrl.vi v9, v9, 5 72; CHECK-NEXT: li a0, -7 73; CHECK-NEXT: vnmsac.vx v8, a0, v9 74; CHECK-NEXT: ret 75 %vc = urem <vscale x 2 x i8> %va, splat (i8 -7) 76 ret <vscale x 2 x i8> %vc 77} 78 79define <vscale x 4 x i8> @vremu_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) { 80; CHECK-LABEL: vremu_vv_nxv4i8: 81; CHECK: # %bb.0: 82; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma 83; CHECK-NEXT: vremu.vv v8, v8, v9 84; CHECK-NEXT: ret 85 %vc = urem <vscale x 4 x i8> %va, %vb 86 ret <vscale x 4 x i8> %vc 87} 88 89define <vscale x 4 x i8> @vremu_vx_nxv4i8(<vscale x 4 x i8> %va, i8 signext %b) { 90; CHECK-LABEL: vremu_vx_nxv4i8: 91; CHECK: # %bb.0: 92; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma 93; CHECK-NEXT: vremu.vx v8, v8, a0 94; CHECK-NEXT: ret 95 %head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0 96 %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer 97 %vc = urem <vscale x 4 x i8> %va, %splat 98 ret <vscale x 4 x i8> %vc 99} 100 101define <vscale x 4 x i8> @vremu_vi_nxv4i8_0(<vscale x 4 x i8> %va) { 102; CHECK-LABEL: vremu_vi_nxv4i8_0: 103; CHECK: # %bb.0: 104; CHECK-NEXT: li a0, 33 105; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma 106; CHECK-NEXT: vmulhu.vx v9, v8, a0 107; CHECK-NEXT: vsrl.vi v9, v9, 5 108; CHECK-NEXT: li a0, -7 109; CHECK-NEXT: vnmsac.vx v8, a0, v9 110; CHECK-NEXT: ret 111 %vc = urem <vscale x 4 x i8> %va, splat (i8 -7) 112 ret <vscale x 4 x i8> %vc 113} 114 115define <vscale x 8 x i8> @vremu_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) { 116; CHECK-LABEL: vremu_vv_nxv8i8: 117; CHECK: # %bb.0: 118; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma 119; CHECK-NEXT: vremu.vv v8, v8, v9 120; CHECK-NEXT: ret 121 %vc = urem <vscale x 8 x i8> %va, %vb 122 ret <vscale x 8 x i8> %vc 123} 124 125define <vscale x 8 x i8> @vremu_vx_nxv8i8(<vscale x 8 x i8> %va, i8 signext %b) { 126; CHECK-LABEL: vremu_vx_nxv8i8: 127; CHECK: # %bb.0: 128; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma 129; CHECK-NEXT: vremu.vx v8, v8, a0 130; CHECK-NEXT: ret 131 %head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0 132 %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer 133 %vc = urem <vscale x 8 x i8> %va, %splat 134 ret <vscale x 8 x i8> %vc 135} 136 137define <vscale x 8 x i8> @vremu_vi_nxv8i8_0(<vscale x 8 x i8> %va) { 138; CHECK-LABEL: vremu_vi_nxv8i8_0: 139; CHECK: # %bb.0: 140; CHECK-NEXT: li a0, 33 141; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma 142; CHECK-NEXT: vmulhu.vx v9, v8, a0 143; CHECK-NEXT: vsrl.vi v9, v9, 5 144; CHECK-NEXT: li a0, -7 145; CHECK-NEXT: vnmsac.vx v8, a0, v9 146; CHECK-NEXT: ret 147 %vc = urem <vscale x 8 x i8> %va, splat (i8 -7) 148 ret <vscale x 8 x i8> %vc 149} 150 151define <vscale x 16 x i8> @vremu_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb) { 152; CHECK-LABEL: vremu_vv_nxv16i8: 153; CHECK: # %bb.0: 154; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma 155; CHECK-NEXT: vremu.vv v8, v8, v10 156; CHECK-NEXT: ret 157 %vc = urem <vscale x 16 x i8> %va, %vb 158 ret <vscale x 16 x i8> %vc 159} 160 161define <vscale x 16 x i8> @vremu_vx_nxv16i8(<vscale x 16 x i8> %va, i8 signext %b) { 162; CHECK-LABEL: vremu_vx_nxv16i8: 163; CHECK: # %bb.0: 164; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma 165; CHECK-NEXT: vremu.vx v8, v8, a0 166; CHECK-NEXT: ret 167 %head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0 168 %splat = shufflevector <vscale x 16 x i8> %head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer 169 %vc = urem <vscale x 16 x i8> %va, %splat 170 ret <vscale x 16 x i8> %vc 171} 172 173define <vscale x 16 x i8> @vremu_vi_nxv16i8_0(<vscale x 16 x i8> %va) { 174; CHECK-LABEL: vremu_vi_nxv16i8_0: 175; CHECK: # %bb.0: 176; CHECK-NEXT: li a0, 33 177; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma 178; CHECK-NEXT: vmulhu.vx v10, v8, a0 179; CHECK-NEXT: vsrl.vi v10, v10, 5 180; CHECK-NEXT: li a0, -7 181; CHECK-NEXT: vnmsac.vx v8, a0, v10 182; CHECK-NEXT: ret 183 %vc = urem <vscale x 16 x i8> %va, splat (i8 -7) 184 ret <vscale x 16 x i8> %vc 185} 186 187define <vscale x 32 x i8> @vremu_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb) { 188; CHECK-LABEL: vremu_vv_nxv32i8: 189; CHECK: # %bb.0: 190; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma 191; CHECK-NEXT: vremu.vv v8, v8, v12 192; CHECK-NEXT: ret 193 %vc = urem <vscale x 32 x i8> %va, %vb 194 ret <vscale x 32 x i8> %vc 195} 196 197define <vscale x 32 x i8> @vremu_vx_nxv32i8(<vscale x 32 x i8> %va, i8 signext %b) { 198; CHECK-LABEL: vremu_vx_nxv32i8: 199; CHECK: # %bb.0: 200; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma 201; CHECK-NEXT: vremu.vx v8, v8, a0 202; CHECK-NEXT: ret 203 %head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0 204 %splat = shufflevector <vscale x 32 x i8> %head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer 205 %vc = urem <vscale x 32 x i8> %va, %splat 206 ret <vscale x 32 x i8> %vc 207} 208 209define <vscale x 32 x i8> @vremu_vi_nxv32i8_0(<vscale x 32 x i8> %va) { 210; CHECK-LABEL: vremu_vi_nxv32i8_0: 211; CHECK: # %bb.0: 212; CHECK-NEXT: li a0, 33 213; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma 214; CHECK-NEXT: vmulhu.vx v12, v8, a0 215; CHECK-NEXT: vsrl.vi v12, v12, 5 216; CHECK-NEXT: li a0, -7 217; CHECK-NEXT: vnmsac.vx v8, a0, v12 218; CHECK-NEXT: ret 219 %vc = urem <vscale x 32 x i8> %va, splat (i8 -7) 220 ret <vscale x 32 x i8> %vc 221} 222 223define <vscale x 64 x i8> @vremu_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb) { 224; CHECK-LABEL: vremu_vv_nxv64i8: 225; CHECK: # %bb.0: 226; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma 227; CHECK-NEXT: vremu.vv v8, v8, v16 228; CHECK-NEXT: ret 229 %vc = urem <vscale x 64 x i8> %va, %vb 230 ret <vscale x 64 x i8> %vc 231} 232 233define <vscale x 64 x i8> @vremu_vx_nxv64i8(<vscale x 64 x i8> %va, i8 signext %b) { 234; CHECK-LABEL: vremu_vx_nxv64i8: 235; CHECK: # %bb.0: 236; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma 237; CHECK-NEXT: vremu.vx v8, v8, a0 238; CHECK-NEXT: ret 239 %head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0 240 %splat = shufflevector <vscale x 64 x i8> %head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer 241 %vc = urem <vscale x 64 x i8> %va, %splat 242 ret <vscale x 64 x i8> %vc 243} 244 245define <vscale x 64 x i8> @vremu_vi_nxv64i8_0(<vscale x 64 x i8> %va) { 246; CHECK-LABEL: vremu_vi_nxv64i8_0: 247; CHECK: # %bb.0: 248; CHECK-NEXT: li a0, 33 249; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma 250; CHECK-NEXT: vmulhu.vx v16, v8, a0 251; CHECK-NEXT: vsrl.vi v16, v16, 5 252; CHECK-NEXT: li a0, -7 253; CHECK-NEXT: vnmsac.vx v8, a0, v16 254; CHECK-NEXT: ret 255 %vc = urem <vscale x 64 x i8> %va, splat (i8 -7) 256 ret <vscale x 64 x i8> %vc 257} 258 259define <vscale x 1 x i16> @vremu_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) { 260; CHECK-LABEL: vremu_vv_nxv1i16: 261; CHECK: # %bb.0: 262; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 263; CHECK-NEXT: vremu.vv v8, v8, v9 264; CHECK-NEXT: ret 265 %vc = urem <vscale x 1 x i16> %va, %vb 266 ret <vscale x 1 x i16> %vc 267} 268 269define <vscale x 1 x i16> @vremu_vx_nxv1i16(<vscale x 1 x i16> %va, i16 signext %b) { 270; CHECK-LABEL: vremu_vx_nxv1i16: 271; CHECK: # %bb.0: 272; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma 273; CHECK-NEXT: vremu.vx v8, v8, a0 274; CHECK-NEXT: ret 275 %head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0 276 %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer 277 %vc = urem <vscale x 1 x i16> %va, %splat 278 ret <vscale x 1 x i16> %vc 279} 280 281define <vscale x 1 x i16> @vremu_vi_nxv1i16_0(<vscale x 1 x i16> %va) { 282; CHECK-LABEL: vremu_vi_nxv1i16_0: 283; CHECK: # %bb.0: 284; CHECK-NEXT: lui a0, 2 285; CHECK-NEXT: addi a0, a0, 1 286; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma 287; CHECK-NEXT: vmulhu.vx v9, v8, a0 288; CHECK-NEXT: vsrl.vi v9, v9, 13 289; CHECK-NEXT: li a0, -7 290; CHECK-NEXT: vnmsac.vx v8, a0, v9 291; CHECK-NEXT: ret 292 %vc = urem <vscale x 1 x i16> %va, splat (i16 -7) 293 ret <vscale x 1 x i16> %vc 294} 295 296define <vscale x 2 x i16> @vremu_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) { 297; CHECK-LABEL: vremu_vv_nxv2i16: 298; CHECK: # %bb.0: 299; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 300; CHECK-NEXT: vremu.vv v8, v8, v9 301; CHECK-NEXT: ret 302 %vc = urem <vscale x 2 x i16> %va, %vb 303 ret <vscale x 2 x i16> %vc 304} 305 306define <vscale x 2 x i16> @vremu_vx_nxv2i16(<vscale x 2 x i16> %va, i16 signext %b) { 307; CHECK-LABEL: vremu_vx_nxv2i16: 308; CHECK: # %bb.0: 309; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma 310; CHECK-NEXT: vremu.vx v8, v8, a0 311; CHECK-NEXT: ret 312 %head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0 313 %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer 314 %vc = urem <vscale x 2 x i16> %va, %splat 315 ret <vscale x 2 x i16> %vc 316} 317 318define <vscale x 2 x i16> @vremu_vi_nxv2i16_0(<vscale x 2 x i16> %va) { 319; CHECK-LABEL: vremu_vi_nxv2i16_0: 320; CHECK: # %bb.0: 321; CHECK-NEXT: lui a0, 2 322; CHECK-NEXT: addi a0, a0, 1 323; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma 324; CHECK-NEXT: vmulhu.vx v9, v8, a0 325; CHECK-NEXT: vsrl.vi v9, v9, 13 326; CHECK-NEXT: li a0, -7 327; CHECK-NEXT: vnmsac.vx v8, a0, v9 328; CHECK-NEXT: ret 329 %vc = urem <vscale x 2 x i16> %va, splat (i16 -7) 330 ret <vscale x 2 x i16> %vc 331} 332 333define <vscale x 4 x i16> @vremu_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) { 334; CHECK-LABEL: vremu_vv_nxv4i16: 335; CHECK: # %bb.0: 336; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 337; CHECK-NEXT: vremu.vv v8, v8, v9 338; CHECK-NEXT: ret 339 %vc = urem <vscale x 4 x i16> %va, %vb 340 ret <vscale x 4 x i16> %vc 341} 342 343define <vscale x 4 x i16> @vremu_vx_nxv4i16(<vscale x 4 x i16> %va, i16 signext %b) { 344; CHECK-LABEL: vremu_vx_nxv4i16: 345; CHECK: # %bb.0: 346; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma 347; CHECK-NEXT: vremu.vx v8, v8, a0 348; CHECK-NEXT: ret 349 %head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0 350 %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer 351 %vc = urem <vscale x 4 x i16> %va, %splat 352 ret <vscale x 4 x i16> %vc 353} 354 355define <vscale x 4 x i16> @vremu_vi_nxv4i16_0(<vscale x 4 x i16> %va) { 356; CHECK-LABEL: vremu_vi_nxv4i16_0: 357; CHECK: # %bb.0: 358; CHECK-NEXT: lui a0, 2 359; CHECK-NEXT: addi a0, a0, 1 360; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma 361; CHECK-NEXT: vmulhu.vx v9, v8, a0 362; CHECK-NEXT: vsrl.vi v9, v9, 13 363; CHECK-NEXT: li a0, -7 364; CHECK-NEXT: vnmsac.vx v8, a0, v9 365; CHECK-NEXT: ret 366 %vc = urem <vscale x 4 x i16> %va, splat (i16 -7) 367 ret <vscale x 4 x i16> %vc 368} 369 370define <vscale x 8 x i16> @vremu_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) { 371; CHECK-LABEL: vremu_vv_nxv8i16: 372; CHECK: # %bb.0: 373; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma 374; CHECK-NEXT: vremu.vv v8, v8, v10 375; CHECK-NEXT: ret 376 %vc = urem <vscale x 8 x i16> %va, %vb 377 ret <vscale x 8 x i16> %vc 378} 379 380define <vscale x 8 x i16> @vremu_vx_nxv8i16(<vscale x 8 x i16> %va, i16 signext %b) { 381; CHECK-LABEL: vremu_vx_nxv8i16: 382; CHECK: # %bb.0: 383; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma 384; CHECK-NEXT: vremu.vx v8, v8, a0 385; CHECK-NEXT: ret 386 %head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0 387 %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer 388 %vc = urem <vscale x 8 x i16> %va, %splat 389 ret <vscale x 8 x i16> %vc 390} 391 392define <vscale x 8 x i16> @vremu_vi_nxv8i16_0(<vscale x 8 x i16> %va) { 393; CHECK-LABEL: vremu_vi_nxv8i16_0: 394; CHECK: # %bb.0: 395; CHECK-NEXT: lui a0, 2 396; CHECK-NEXT: addi a0, a0, 1 397; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma 398; CHECK-NEXT: vmulhu.vx v10, v8, a0 399; CHECK-NEXT: vsrl.vi v10, v10, 13 400; CHECK-NEXT: li a0, -7 401; CHECK-NEXT: vnmsac.vx v8, a0, v10 402; CHECK-NEXT: ret 403 %vc = urem <vscale x 8 x i16> %va, splat (i16 -7) 404 ret <vscale x 8 x i16> %vc 405} 406 407define <vscale x 16 x i16> @vremu_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb) { 408; CHECK-LABEL: vremu_vv_nxv16i16: 409; CHECK: # %bb.0: 410; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma 411; CHECK-NEXT: vremu.vv v8, v8, v12 412; CHECK-NEXT: ret 413 %vc = urem <vscale x 16 x i16> %va, %vb 414 ret <vscale x 16 x i16> %vc 415} 416 417define <vscale x 16 x i16> @vremu_vx_nxv16i16(<vscale x 16 x i16> %va, i16 signext %b) { 418; CHECK-LABEL: vremu_vx_nxv16i16: 419; CHECK: # %bb.0: 420; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma 421; CHECK-NEXT: vremu.vx v8, v8, a0 422; CHECK-NEXT: ret 423 %head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0 424 %splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer 425 %vc = urem <vscale x 16 x i16> %va, %splat 426 ret <vscale x 16 x i16> %vc 427} 428 429define <vscale x 16 x i16> @vremu_vi_nxv16i16_0(<vscale x 16 x i16> %va) { 430; CHECK-LABEL: vremu_vi_nxv16i16_0: 431; CHECK: # %bb.0: 432; CHECK-NEXT: lui a0, 2 433; CHECK-NEXT: addi a0, a0, 1 434; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma 435; CHECK-NEXT: vmulhu.vx v12, v8, a0 436; CHECK-NEXT: vsrl.vi v12, v12, 13 437; CHECK-NEXT: li a0, -7 438; CHECK-NEXT: vnmsac.vx v8, a0, v12 439; CHECK-NEXT: ret 440 %vc = urem <vscale x 16 x i16> %va, splat (i16 -7) 441 ret <vscale x 16 x i16> %vc 442} 443 444define <vscale x 32 x i16> @vremu_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb) { 445; CHECK-LABEL: vremu_vv_nxv32i16: 446; CHECK: # %bb.0: 447; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma 448; CHECK-NEXT: vremu.vv v8, v8, v16 449; CHECK-NEXT: ret 450 %vc = urem <vscale x 32 x i16> %va, %vb 451 ret <vscale x 32 x i16> %vc 452} 453 454define <vscale x 32 x i16> @vremu_vx_nxv32i16(<vscale x 32 x i16> %va, i16 signext %b) { 455; CHECK-LABEL: vremu_vx_nxv32i16: 456; CHECK: # %bb.0: 457; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma 458; CHECK-NEXT: vremu.vx v8, v8, a0 459; CHECK-NEXT: ret 460 %head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0 461 %splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer 462 %vc = urem <vscale x 32 x i16> %va, %splat 463 ret <vscale x 32 x i16> %vc 464} 465 466define <vscale x 32 x i16> @vremu_vi_nxv32i16_0(<vscale x 32 x i16> %va) { 467; CHECK-LABEL: vremu_vi_nxv32i16_0: 468; CHECK: # %bb.0: 469; CHECK-NEXT: lui a0, 2 470; CHECK-NEXT: addi a0, a0, 1 471; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma 472; CHECK-NEXT: vmulhu.vx v16, v8, a0 473; CHECK-NEXT: vsrl.vi v16, v16, 13 474; CHECK-NEXT: li a0, -7 475; CHECK-NEXT: vnmsac.vx v8, a0, v16 476; CHECK-NEXT: ret 477 %vc = urem <vscale x 32 x i16> %va, splat (i16 -7) 478 ret <vscale x 32 x i16> %vc 479} 480 481define <vscale x 1 x i32> @vremu_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) { 482; CHECK-LABEL: vremu_vv_nxv1i32: 483; CHECK: # %bb.0: 484; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 485; CHECK-NEXT: vremu.vv v8, v8, v9 486; CHECK-NEXT: ret 487 %vc = urem <vscale x 1 x i32> %va, %vb 488 ret <vscale x 1 x i32> %vc 489} 490 491define <vscale x 1 x i32> @vremu_vx_nxv1i32(<vscale x 1 x i32> %va, i32 signext %b) { 492; CHECK-LABEL: vremu_vx_nxv1i32: 493; CHECK: # %bb.0: 494; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma 495; CHECK-NEXT: vremu.vx v8, v8, a0 496; CHECK-NEXT: ret 497 %head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0 498 %splat = shufflevector <vscale x 1 x i32> %head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer 499 %vc = urem <vscale x 1 x i32> %va, %splat 500 ret <vscale x 1 x i32> %vc 501} 502 503define <vscale x 1 x i32> @vremu_vi_nxv1i32_0(<vscale x 1 x i32> %va) { 504; CHECK-LABEL: vremu_vi_nxv1i32_0: 505; CHECK: # %bb.0: 506; CHECK-NEXT: lui a0, 131072 507; CHECK-NEXT: addi a0, a0, 1 508; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma 509; CHECK-NEXT: vmulhu.vx v9, v8, a0 510; CHECK-NEXT: vsrl.vi v9, v9, 29 511; CHECK-NEXT: li a0, -7 512; CHECK-NEXT: vnmsac.vx v8, a0, v9 513; CHECK-NEXT: ret 514 %vc = urem <vscale x 1 x i32> %va, splat (i32 -7) 515 ret <vscale x 1 x i32> %vc 516} 517 518define <vscale x 2 x i32> @vremu_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) { 519; CHECK-LABEL: vremu_vv_nxv2i32: 520; CHECK: # %bb.0: 521; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 522; CHECK-NEXT: vremu.vv v8, v8, v9 523; CHECK-NEXT: ret 524 %vc = urem <vscale x 2 x i32> %va, %vb 525 ret <vscale x 2 x i32> %vc 526} 527 528define <vscale x 2 x i32> @vremu_vx_nxv2i32(<vscale x 2 x i32> %va, i32 signext %b) { 529; CHECK-LABEL: vremu_vx_nxv2i32: 530; CHECK: # %bb.0: 531; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma 532; CHECK-NEXT: vremu.vx v8, v8, a0 533; CHECK-NEXT: ret 534 %head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0 535 %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer 536 %vc = urem <vscale x 2 x i32> %va, %splat 537 ret <vscale x 2 x i32> %vc 538} 539 540define <vscale x 2 x i32> @vremu_vi_nxv2i32_0(<vscale x 2 x i32> %va) { 541; CHECK-LABEL: vremu_vi_nxv2i32_0: 542; CHECK: # %bb.0: 543; CHECK-NEXT: lui a0, 131072 544; CHECK-NEXT: addi a0, a0, 1 545; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma 546; CHECK-NEXT: vmulhu.vx v9, v8, a0 547; CHECK-NEXT: vsrl.vi v9, v9, 29 548; CHECK-NEXT: li a0, -7 549; CHECK-NEXT: vnmsac.vx v8, a0, v9 550; CHECK-NEXT: ret 551 %vc = urem <vscale x 2 x i32> %va, splat (i32 -7) 552 ret <vscale x 2 x i32> %vc 553} 554 555define <vscale x 4 x i32> @vremu_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb) { 556; CHECK-LABEL: vremu_vv_nxv4i32: 557; CHECK: # %bb.0: 558; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 559; CHECK-NEXT: vremu.vv v8, v8, v10 560; CHECK-NEXT: ret 561 %vc = urem <vscale x 4 x i32> %va, %vb 562 ret <vscale x 4 x i32> %vc 563} 564 565define <vscale x 4 x i32> @vremu_vx_nxv4i32(<vscale x 4 x i32> %va, i32 signext %b) { 566; CHECK-LABEL: vremu_vx_nxv4i32: 567; CHECK: # %bb.0: 568; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma 569; CHECK-NEXT: vremu.vx v8, v8, a0 570; CHECK-NEXT: ret 571 %head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0 572 %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 573 %vc = urem <vscale x 4 x i32> %va, %splat 574 ret <vscale x 4 x i32> %vc 575} 576 577define <vscale x 4 x i32> @vremu_vi_nxv4i32_0(<vscale x 4 x i32> %va) { 578; CHECK-LABEL: vremu_vi_nxv4i32_0: 579; CHECK: # %bb.0: 580; CHECK-NEXT: lui a0, 131072 581; CHECK-NEXT: addi a0, a0, 1 582; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma 583; CHECK-NEXT: vmulhu.vx v10, v8, a0 584; CHECK-NEXT: vsrl.vi v10, v10, 29 585; CHECK-NEXT: li a0, -7 586; CHECK-NEXT: vnmsac.vx v8, a0, v10 587; CHECK-NEXT: ret 588 %vc = urem <vscale x 4 x i32> %va, splat (i32 -7) 589 ret <vscale x 4 x i32> %vc 590} 591 592define <vscale x 8 x i32> @vremu_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb) { 593; CHECK-LABEL: vremu_vv_nxv8i32: 594; CHECK: # %bb.0: 595; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma 596; CHECK-NEXT: vremu.vv v8, v8, v12 597; CHECK-NEXT: ret 598 %vc = urem <vscale x 8 x i32> %va, %vb 599 ret <vscale x 8 x i32> %vc 600} 601 602define <vscale x 8 x i32> @vremu_vx_nxv8i32(<vscale x 8 x i32> %va, i32 signext %b) { 603; CHECK-LABEL: vremu_vx_nxv8i32: 604; CHECK: # %bb.0: 605; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma 606; CHECK-NEXT: vremu.vx v8, v8, a0 607; CHECK-NEXT: ret 608 %head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 609 %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer 610 %vc = urem <vscale x 8 x i32> %va, %splat 611 ret <vscale x 8 x i32> %vc 612} 613 614define <vscale x 8 x i32> @vremu_vi_nxv8i32_0(<vscale x 8 x i32> %va) { 615; CHECK-LABEL: vremu_vi_nxv8i32_0: 616; CHECK: # %bb.0: 617; CHECK-NEXT: lui a0, 131072 618; CHECK-NEXT: addi a0, a0, 1 619; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma 620; CHECK-NEXT: vmulhu.vx v12, v8, a0 621; CHECK-NEXT: vsrl.vi v12, v12, 29 622; CHECK-NEXT: li a0, -7 623; CHECK-NEXT: vnmsac.vx v8, a0, v12 624; CHECK-NEXT: ret 625 %vc = urem <vscale x 8 x i32> %va, splat (i32 -7) 626 ret <vscale x 8 x i32> %vc 627} 628 629define <vscale x 16 x i32> @vremu_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb) { 630; CHECK-LABEL: vremu_vv_nxv16i32: 631; CHECK: # %bb.0: 632; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma 633; CHECK-NEXT: vremu.vv v8, v8, v16 634; CHECK-NEXT: ret 635 %vc = urem <vscale x 16 x i32> %va, %vb 636 ret <vscale x 16 x i32> %vc 637} 638 639define <vscale x 16 x i32> @vremu_vx_nxv16i32(<vscale x 16 x i32> %va, i32 signext %b) { 640; CHECK-LABEL: vremu_vx_nxv16i32: 641; CHECK: # %bb.0: 642; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma 643; CHECK-NEXT: vremu.vx v8, v8, a0 644; CHECK-NEXT: ret 645 %head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0 646 %splat = shufflevector <vscale x 16 x i32> %head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer 647 %vc = urem <vscale x 16 x i32> %va, %splat 648 ret <vscale x 16 x i32> %vc 649} 650 651define <vscale x 16 x i32> @vremu_vi_nxv16i32_0(<vscale x 16 x i32> %va) { 652; CHECK-LABEL: vremu_vi_nxv16i32_0: 653; CHECK: # %bb.0: 654; CHECK-NEXT: lui a0, 131072 655; CHECK-NEXT: addi a0, a0, 1 656; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma 657; CHECK-NEXT: vmulhu.vx v16, v8, a0 658; CHECK-NEXT: vsrl.vi v16, v16, 29 659; CHECK-NEXT: li a0, -7 660; CHECK-NEXT: vnmsac.vx v8, a0, v16 661; CHECK-NEXT: ret 662 %vc = urem <vscale x 16 x i32> %va, splat (i32 -7) 663 ret <vscale x 16 x i32> %vc 664} 665 666define <vscale x 1 x i64> @vremu_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb) { 667; CHECK-LABEL: vremu_vv_nxv1i64: 668; CHECK: # %bb.0: 669; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 670; CHECK-NEXT: vremu.vv v8, v8, v9 671; CHECK-NEXT: ret 672 %vc = urem <vscale x 1 x i64> %va, %vb 673 ret <vscale x 1 x i64> %vc 674} 675 676define <vscale x 1 x i64> @vremu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) { 677; RV32-LABEL: vremu_vx_nxv1i64: 678; RV32: # %bb.0: 679; RV32-NEXT: addi sp, sp, -16 680; RV32-NEXT: .cfi_def_cfa_offset 16 681; RV32-NEXT: sw a0, 8(sp) 682; RV32-NEXT: sw a1, 12(sp) 683; RV32-NEXT: addi a0, sp, 8 684; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma 685; RV32-NEXT: vlse64.v v9, (a0), zero 686; RV32-NEXT: vremu.vv v8, v8, v9 687; RV32-NEXT: addi sp, sp, 16 688; RV32-NEXT: .cfi_def_cfa_offset 0 689; RV32-NEXT: ret 690; 691; RV64-LABEL: vremu_vx_nxv1i64: 692; RV64: # %bb.0: 693; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma 694; RV64-NEXT: vremu.vx v8, v8, a0 695; RV64-NEXT: ret 696 %head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0 697 %splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 698 %vc = urem <vscale x 1 x i64> %va, %splat 699 ret <vscale x 1 x i64> %vc 700} 701 702define <vscale x 1 x i64> @vremu_vi_nxv1i64_0(<vscale x 1 x i64> %va) { 703; RV32-V-LABEL: vremu_vi_nxv1i64_0: 704; RV32-V: # %bb.0: 705; RV32-V-NEXT: addi sp, sp, -16 706; RV32-V-NEXT: .cfi_def_cfa_offset 16 707; RV32-V-NEXT: lui a0, 131072 708; RV32-V-NEXT: li a1, 1 709; RV32-V-NEXT: sw a1, 8(sp) 710; RV32-V-NEXT: sw a0, 12(sp) 711; RV32-V-NEXT: addi a0, sp, 8 712; RV32-V-NEXT: vsetvli a1, zero, e64, m1, ta, ma 713; RV32-V-NEXT: vlse64.v v9, (a0), zero 714; RV32-V-NEXT: li a0, 61 715; RV32-V-NEXT: vmulhu.vv v9, v8, v9 716; RV32-V-NEXT: vsrl.vx v9, v9, a0 717; RV32-V-NEXT: li a0, -7 718; RV32-V-NEXT: vnmsac.vx v8, a0, v9 719; RV32-V-NEXT: addi sp, sp, 16 720; RV32-V-NEXT: .cfi_def_cfa_offset 0 721; RV32-V-NEXT: ret 722; 723; ZVE64X-LABEL: vremu_vi_nxv1i64_0: 724; ZVE64X: # %bb.0: 725; ZVE64X-NEXT: li a0, -7 726; ZVE64X-NEXT: vsetvli a1, zero, e64, m1, ta, ma 727; ZVE64X-NEXT: vremu.vx v8, v8, a0 728; ZVE64X-NEXT: ret 729; 730; RV64-V-LABEL: vremu_vi_nxv1i64_0: 731; RV64-V: # %bb.0: 732; RV64-V-NEXT: li a0, 1 733; RV64-V-NEXT: slli a0, a0, 61 734; RV64-V-NEXT: addi a0, a0, 1 735; RV64-V-NEXT: vsetvli a1, zero, e64, m1, ta, ma 736; RV64-V-NEXT: vmulhu.vx v9, v8, a0 737; RV64-V-NEXT: li a0, 61 738; RV64-V-NEXT: vsrl.vx v9, v9, a0 739; RV64-V-NEXT: li a0, -7 740; RV64-V-NEXT: vnmsac.vx v8, a0, v9 741; RV64-V-NEXT: ret 742 %vc = urem <vscale x 1 x i64> %va, splat (i64 -7) 743 ret <vscale x 1 x i64> %vc 744} 745 746; fold (urem x, pow2) -> (and x, pow2-1) 747define <vscale x 1 x i64> @vremu_vi_nxv1i64_1(<vscale x 1 x i64> %va) { 748; CHECK-LABEL: vremu_vi_nxv1i64_1: 749; CHECK: # %bb.0: 750; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 751; CHECK-NEXT: vand.vi v8, v8, 15 752; CHECK-NEXT: ret 753 %vc = urem <vscale x 1 x i64> %va, splat (i64 16) 754 ret <vscale x 1 x i64> %vc 755} 756 757; fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 758define <vscale x 1 x i64> @vremu_vi_nxv1i64_2(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb) { 759; CHECK-LABEL: vremu_vi_nxv1i64_2: 760; CHECK: # %bb.0: 761; CHECK-NEXT: li a0, 16 762; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma 763; CHECK-NEXT: vmv.v.x v10, a0 764; CHECK-NEXT: vsll.vv v9, v10, v9 765; CHECK-NEXT: vadd.vi v9, v9, -1 766; CHECK-NEXT: vand.vv v8, v8, v9 767; CHECK-NEXT: ret 768 %vc = shl <vscale x 1 x i64> splat (i64 16), %vb 769 %vd = urem <vscale x 1 x i64> %va, %vc 770 ret <vscale x 1 x i64> %vd 771} 772 773define <vscale x 2 x i64> @vremu_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb) { 774; CHECK-LABEL: vremu_vv_nxv2i64: 775; CHECK: # %bb.0: 776; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma 777; CHECK-NEXT: vremu.vv v8, v8, v10 778; CHECK-NEXT: ret 779 %vc = urem <vscale x 2 x i64> %va, %vb 780 ret <vscale x 2 x i64> %vc 781} 782 783define <vscale x 2 x i64> @vremu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) { 784; RV32-LABEL: vremu_vx_nxv2i64: 785; RV32: # %bb.0: 786; RV32-NEXT: addi sp, sp, -16 787; RV32-NEXT: .cfi_def_cfa_offset 16 788; RV32-NEXT: sw a0, 8(sp) 789; RV32-NEXT: sw a1, 12(sp) 790; RV32-NEXT: addi a0, sp, 8 791; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma 792; RV32-NEXT: vlse64.v v10, (a0), zero 793; RV32-NEXT: vremu.vv v8, v8, v10 794; RV32-NEXT: addi sp, sp, 16 795; RV32-NEXT: .cfi_def_cfa_offset 0 796; RV32-NEXT: ret 797; 798; RV64-LABEL: vremu_vx_nxv2i64: 799; RV64: # %bb.0: 800; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, ma 801; RV64-NEXT: vremu.vx v8, v8, a0 802; RV64-NEXT: ret 803 %head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0 804 %splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 805 %vc = urem <vscale x 2 x i64> %va, %splat 806 ret <vscale x 2 x i64> %vc 807} 808 809define <vscale x 2 x i64> @vremu_vi_nxv2i64_0(<vscale x 2 x i64> %va) { 810; RV32-V-LABEL: vremu_vi_nxv2i64_0: 811; RV32-V: # %bb.0: 812; RV32-V-NEXT: addi sp, sp, -16 813; RV32-V-NEXT: .cfi_def_cfa_offset 16 814; RV32-V-NEXT: lui a0, 131072 815; RV32-V-NEXT: li a1, 1 816; RV32-V-NEXT: sw a1, 8(sp) 817; RV32-V-NEXT: sw a0, 12(sp) 818; RV32-V-NEXT: addi a0, sp, 8 819; RV32-V-NEXT: vsetvli a1, zero, e64, m2, ta, ma 820; RV32-V-NEXT: vlse64.v v10, (a0), zero 821; RV32-V-NEXT: li a0, 61 822; RV32-V-NEXT: vmulhu.vv v10, v8, v10 823; RV32-V-NEXT: vsrl.vx v10, v10, a0 824; RV32-V-NEXT: li a0, -7 825; RV32-V-NEXT: vnmsac.vx v8, a0, v10 826; RV32-V-NEXT: addi sp, sp, 16 827; RV32-V-NEXT: .cfi_def_cfa_offset 0 828; RV32-V-NEXT: ret 829; 830; ZVE64X-LABEL: vremu_vi_nxv2i64_0: 831; ZVE64X: # %bb.0: 832; ZVE64X-NEXT: li a0, -7 833; ZVE64X-NEXT: vsetvli a1, zero, e64, m2, ta, ma 834; ZVE64X-NEXT: vremu.vx v8, v8, a0 835; ZVE64X-NEXT: ret 836; 837; RV64-V-LABEL: vremu_vi_nxv2i64_0: 838; RV64-V: # %bb.0: 839; RV64-V-NEXT: li a0, 1 840; RV64-V-NEXT: slli a0, a0, 61 841; RV64-V-NEXT: addi a0, a0, 1 842; RV64-V-NEXT: vsetvli a1, zero, e64, m2, ta, ma 843; RV64-V-NEXT: vmulhu.vx v10, v8, a0 844; RV64-V-NEXT: li a0, 61 845; RV64-V-NEXT: vsrl.vx v10, v10, a0 846; RV64-V-NEXT: li a0, -7 847; RV64-V-NEXT: vnmsac.vx v8, a0, v10 848; RV64-V-NEXT: ret 849 %vc = urem <vscale x 2 x i64> %va, splat (i64 -7) 850 ret <vscale x 2 x i64> %vc 851} 852 853; fold (urem x, pow2) -> (and x, pow2-1) 854define <vscale x 2 x i64> @vremu_vi_nxv2i64_1(<vscale x 2 x i64> %va) { 855; CHECK-LABEL: vremu_vi_nxv2i64_1: 856; CHECK: # %bb.0: 857; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma 858; CHECK-NEXT: vand.vi v8, v8, 15 859; CHECK-NEXT: ret 860 %vc = urem <vscale x 2 x i64> %va, splat (i64 16) 861 ret <vscale x 2 x i64> %vc 862} 863 864; fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 865define <vscale x 2 x i64> @vremu_vi_nxv2i64_2(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb) { 866; CHECK-LABEL: vremu_vi_nxv2i64_2: 867; CHECK: # %bb.0: 868; CHECK-NEXT: li a0, 16 869; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma 870; CHECK-NEXT: vmv.v.x v12, a0 871; CHECK-NEXT: vsll.vv v10, v12, v10 872; CHECK-NEXT: vadd.vi v10, v10, -1 873; CHECK-NEXT: vand.vv v8, v8, v10 874; CHECK-NEXT: ret 875 %vc = shl <vscale x 2 x i64> splat (i64 16), %vb 876 %vd = urem <vscale x 2 x i64> %va, %vc 877 ret <vscale x 2 x i64> %vd 878} 879 880define <vscale x 4 x i64> @vremu_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb) { 881; CHECK-LABEL: vremu_vv_nxv4i64: 882; CHECK: # %bb.0: 883; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma 884; CHECK-NEXT: vremu.vv v8, v8, v12 885; CHECK-NEXT: ret 886 %vc = urem <vscale x 4 x i64> %va, %vb 887 ret <vscale x 4 x i64> %vc 888} 889 890define <vscale x 4 x i64> @vremu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) { 891; RV32-LABEL: vremu_vx_nxv4i64: 892; RV32: # %bb.0: 893; RV32-NEXT: addi sp, sp, -16 894; RV32-NEXT: .cfi_def_cfa_offset 16 895; RV32-NEXT: sw a0, 8(sp) 896; RV32-NEXT: sw a1, 12(sp) 897; RV32-NEXT: addi a0, sp, 8 898; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma 899; RV32-NEXT: vlse64.v v12, (a0), zero 900; RV32-NEXT: vremu.vv v8, v8, v12 901; RV32-NEXT: addi sp, sp, 16 902; RV32-NEXT: .cfi_def_cfa_offset 0 903; RV32-NEXT: ret 904; 905; RV64-LABEL: vremu_vx_nxv4i64: 906; RV64: # %bb.0: 907; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, ma 908; RV64-NEXT: vremu.vx v8, v8, a0 909; RV64-NEXT: ret 910 %head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0 911 %splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer 912 %vc = urem <vscale x 4 x i64> %va, %splat 913 ret <vscale x 4 x i64> %vc 914} 915 916define <vscale x 4 x i64> @vremu_vi_nxv4i64_0(<vscale x 4 x i64> %va) { 917; RV32-V-LABEL: vremu_vi_nxv4i64_0: 918; RV32-V: # %bb.0: 919; RV32-V-NEXT: addi sp, sp, -16 920; RV32-V-NEXT: .cfi_def_cfa_offset 16 921; RV32-V-NEXT: lui a0, 131072 922; RV32-V-NEXT: li a1, 1 923; RV32-V-NEXT: sw a1, 8(sp) 924; RV32-V-NEXT: sw a0, 12(sp) 925; RV32-V-NEXT: addi a0, sp, 8 926; RV32-V-NEXT: vsetvli a1, zero, e64, m4, ta, ma 927; RV32-V-NEXT: vlse64.v v12, (a0), zero 928; RV32-V-NEXT: li a0, 61 929; RV32-V-NEXT: vmulhu.vv v12, v8, v12 930; RV32-V-NEXT: vsrl.vx v12, v12, a0 931; RV32-V-NEXT: li a0, -7 932; RV32-V-NEXT: vnmsac.vx v8, a0, v12 933; RV32-V-NEXT: addi sp, sp, 16 934; RV32-V-NEXT: .cfi_def_cfa_offset 0 935; RV32-V-NEXT: ret 936; 937; ZVE64X-LABEL: vremu_vi_nxv4i64_0: 938; ZVE64X: # %bb.0: 939; ZVE64X-NEXT: li a0, -7 940; ZVE64X-NEXT: vsetvli a1, zero, e64, m4, ta, ma 941; ZVE64X-NEXT: vremu.vx v8, v8, a0 942; ZVE64X-NEXT: ret 943; 944; RV64-V-LABEL: vremu_vi_nxv4i64_0: 945; RV64-V: # %bb.0: 946; RV64-V-NEXT: li a0, 1 947; RV64-V-NEXT: slli a0, a0, 61 948; RV64-V-NEXT: addi a0, a0, 1 949; RV64-V-NEXT: vsetvli a1, zero, e64, m4, ta, ma 950; RV64-V-NEXT: vmulhu.vx v12, v8, a0 951; RV64-V-NEXT: li a0, 61 952; RV64-V-NEXT: vsrl.vx v12, v12, a0 953; RV64-V-NEXT: li a0, -7 954; RV64-V-NEXT: vnmsac.vx v8, a0, v12 955; RV64-V-NEXT: ret 956 %vc = urem <vscale x 4 x i64> %va, splat (i64 -7) 957 ret <vscale x 4 x i64> %vc 958} 959 960; fold (urem x, pow2) -> (and x, pow2-1) 961define <vscale x 4 x i64> @vremu_vi_nxv4i64_1(<vscale x 4 x i64> %va) { 962; CHECK-LABEL: vremu_vi_nxv4i64_1: 963; CHECK: # %bb.0: 964; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma 965; CHECK-NEXT: vand.vi v8, v8, 15 966; CHECK-NEXT: ret 967 %vc = urem <vscale x 4 x i64> %va, splat (i64 16) 968 ret <vscale x 4 x i64> %vc 969} 970 971;fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 972define <vscale x 4 x i64> @vremu_vi_nxv4i64_2(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb) { 973; CHECK-LABEL: vremu_vi_nxv4i64_2: 974; CHECK: # %bb.0: 975; CHECK-NEXT: li a0, 16 976; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma 977; CHECK-NEXT: vmv.v.x v16, a0 978; CHECK-NEXT: vsll.vv v12, v16, v12 979; CHECK-NEXT: vadd.vi v12, v12, -1 980; CHECK-NEXT: vand.vv v8, v8, v12 981; CHECK-NEXT: ret 982 %vc = shl <vscale x 4 x i64> splat (i64 16), %vb 983 %vd = urem <vscale x 4 x i64> %va, %vc 984 ret <vscale x 4 x i64> %vd 985} 986 987define <vscale x 8 x i64> @vremu_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb) { 988; CHECK-LABEL: vremu_vv_nxv8i64: 989; CHECK: # %bb.0: 990; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma 991; CHECK-NEXT: vremu.vv v8, v8, v16 992; CHECK-NEXT: ret 993 %vc = urem <vscale x 8 x i64> %va, %vb 994 ret <vscale x 8 x i64> %vc 995} 996 997define <vscale x 8 x i64> @vremu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) { 998; RV32-LABEL: vremu_vx_nxv8i64: 999; RV32: # %bb.0: 1000; RV32-NEXT: addi sp, sp, -16 1001; RV32-NEXT: .cfi_def_cfa_offset 16 1002; RV32-NEXT: sw a0, 8(sp) 1003; RV32-NEXT: sw a1, 12(sp) 1004; RV32-NEXT: addi a0, sp, 8 1005; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1006; RV32-NEXT: vlse64.v v16, (a0), zero 1007; RV32-NEXT: vremu.vv v8, v8, v16 1008; RV32-NEXT: addi sp, sp, 16 1009; RV32-NEXT: .cfi_def_cfa_offset 0 1010; RV32-NEXT: ret 1011; 1012; RV64-LABEL: vremu_vx_nxv8i64: 1013; RV64: # %bb.0: 1014; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1015; RV64-NEXT: vremu.vx v8, v8, a0 1016; RV64-NEXT: ret 1017 %head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 1018 %splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer 1019 %vc = urem <vscale x 8 x i64> %va, %splat 1020 ret <vscale x 8 x i64> %vc 1021} 1022 1023define <vscale x 8 x i64> @vremu_vi_nxv8i64_0(<vscale x 8 x i64> %va) { 1024; RV32-V-LABEL: vremu_vi_nxv8i64_0: 1025; RV32-V: # %bb.0: 1026; RV32-V-NEXT: addi sp, sp, -16 1027; RV32-V-NEXT: .cfi_def_cfa_offset 16 1028; RV32-V-NEXT: lui a0, 131072 1029; RV32-V-NEXT: li a1, 1 1030; RV32-V-NEXT: sw a1, 8(sp) 1031; RV32-V-NEXT: sw a0, 12(sp) 1032; RV32-V-NEXT: addi a0, sp, 8 1033; RV32-V-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1034; RV32-V-NEXT: vlse64.v v16, (a0), zero 1035; RV32-V-NEXT: li a0, 61 1036; RV32-V-NEXT: vmulhu.vv v16, v8, v16 1037; RV32-V-NEXT: vsrl.vx v16, v16, a0 1038; RV32-V-NEXT: li a0, -7 1039; RV32-V-NEXT: vnmsac.vx v8, a0, v16 1040; RV32-V-NEXT: addi sp, sp, 16 1041; RV32-V-NEXT: .cfi_def_cfa_offset 0 1042; RV32-V-NEXT: ret 1043; 1044; ZVE64X-LABEL: vremu_vi_nxv8i64_0: 1045; ZVE64X: # %bb.0: 1046; ZVE64X-NEXT: li a0, -7 1047; ZVE64X-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1048; ZVE64X-NEXT: vremu.vx v8, v8, a0 1049; ZVE64X-NEXT: ret 1050; 1051; RV64-V-LABEL: vremu_vi_nxv8i64_0: 1052; RV64-V: # %bb.0: 1053; RV64-V-NEXT: li a0, 1 1054; RV64-V-NEXT: slli a0, a0, 61 1055; RV64-V-NEXT: addi a0, a0, 1 1056; RV64-V-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1057; RV64-V-NEXT: vmulhu.vx v16, v8, a0 1058; RV64-V-NEXT: li a0, 61 1059; RV64-V-NEXT: vsrl.vx v16, v16, a0 1060; RV64-V-NEXT: li a0, -7 1061; RV64-V-NEXT: vnmsac.vx v8, a0, v16 1062; RV64-V-NEXT: ret 1063 %vc = urem <vscale x 8 x i64> %va, splat (i64 -7) 1064 ret <vscale x 8 x i64> %vc 1065} 1066 1067; fold (urem x, pow2) -> (and x, pow2-1) 1068define <vscale x 8 x i64> @vremu_vi_nxv8i64_1(<vscale x 8 x i64> %va) { 1069; CHECK-LABEL: vremu_vi_nxv8i64_1: 1070; CHECK: # %bb.0: 1071; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1072; CHECK-NEXT: vand.vi v8, v8, 15 1073; CHECK-NEXT: ret 1074 %vc = urem <vscale x 8 x i64> %va, splat (i64 16) 1075 ret <vscale x 8 x i64> %vc 1076} 1077 1078; fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 1079define <vscale x 8 x i64> @vremu_vi_nxv8i64_2(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb) { 1080; CHECK-LABEL: vremu_vi_nxv8i64_2: 1081; CHECK: # %bb.0: 1082; CHECK-NEXT: li a0, 16 1083; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1084; CHECK-NEXT: vmv.v.x v24, a0 1085; CHECK-NEXT: vsll.vv v16, v24, v16 1086; CHECK-NEXT: vadd.vi v16, v16, -1 1087; CHECK-NEXT: vand.vv v8, v8, v16 1088; CHECK-NEXT: ret 1089 %vc = shl <vscale x 8 x i64> splat (i64 16), %vb 1090 %vd = urem <vscale x 8 x i64> %va, %vc 1091 ret <vscale x 8 x i64> %vd 1092} 1093