1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 4; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 6; RUN: llc -mtriple=riscv32 -mattr=+v,+zvkb,+m -target-abi=ilp32d \ 7; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB 8; RUN: llc -mtriple=riscv64 -mattr=+v,+zvkb,+m -target-abi=lp64d \ 9; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB 10 11declare <vscale x 1 x i16> @llvm.vp.bswap.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32) 12 13define <vscale x 1 x i16> @vp_bswap_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 14; CHECK-LABEL: vp_bswap_nxv1i16: 15; CHECK: # %bb.0: 16; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 17; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 18; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t 19; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 20; CHECK-NEXT: ret 21; 22; CHECK-ZVKB-LABEL: vp_bswap_nxv1i16: 23; CHECK-ZVKB: # %bb.0: 24; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 25; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t 26; CHECK-ZVKB-NEXT: ret 27 %v = call <vscale x 1 x i16> @llvm.vp.bswap.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 %evl) 28 ret <vscale x 1 x i16> %v 29} 30 31define <vscale x 1 x i16> @vp_bswap_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) { 32; CHECK-LABEL: vp_bswap_nxv1i16_unmasked: 33; CHECK: # %bb.0: 34; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 35; CHECK-NEXT: vsrl.vi v9, v8, 8 36; CHECK-NEXT: vsll.vi v8, v8, 8 37; CHECK-NEXT: vor.vv v8, v8, v9 38; CHECK-NEXT: ret 39; 40; CHECK-ZVKB-LABEL: vp_bswap_nxv1i16_unmasked: 41; CHECK-ZVKB: # %bb.0: 42; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 43; CHECK-ZVKB-NEXT: vrev8.v v8, v8 44; CHECK-ZVKB-NEXT: ret 45 %v = call <vscale x 1 x i16> @llvm.vp.bswap.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 46 ret <vscale x 1 x i16> %v 47} 48 49declare <vscale x 2 x i16> @llvm.vp.bswap.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32) 50 51define <vscale x 2 x i16> @vp_bswap_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 52; CHECK-LABEL: vp_bswap_nxv2i16: 53; CHECK: # %bb.0: 54; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 55; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 56; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t 57; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 58; CHECK-NEXT: ret 59; 60; CHECK-ZVKB-LABEL: vp_bswap_nxv2i16: 61; CHECK-ZVKB: # %bb.0: 62; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 63; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t 64; CHECK-ZVKB-NEXT: ret 65 %v = call <vscale x 2 x i16> @llvm.vp.bswap.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl) 66 ret <vscale x 2 x i16> %v 67} 68 69define <vscale x 2 x i16> @vp_bswap_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) { 70; CHECK-LABEL: vp_bswap_nxv2i16_unmasked: 71; CHECK: # %bb.0: 72; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 73; CHECK-NEXT: vsrl.vi v9, v8, 8 74; CHECK-NEXT: vsll.vi v8, v8, 8 75; CHECK-NEXT: vor.vv v8, v8, v9 76; CHECK-NEXT: ret 77; 78; CHECK-ZVKB-LABEL: vp_bswap_nxv2i16_unmasked: 79; CHECK-ZVKB: # %bb.0: 80; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 81; CHECK-ZVKB-NEXT: vrev8.v v8, v8 82; CHECK-ZVKB-NEXT: ret 83 %v = call <vscale x 2 x i16> @llvm.vp.bswap.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 84 ret <vscale x 2 x i16> %v 85} 86 87declare <vscale x 4 x i16> @llvm.vp.bswap.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32) 88 89define <vscale x 4 x i16> @vp_bswap_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 90; CHECK-LABEL: vp_bswap_nxv4i16: 91; CHECK: # %bb.0: 92; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 93; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 94; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t 95; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 96; CHECK-NEXT: ret 97; 98; CHECK-ZVKB-LABEL: vp_bswap_nxv4i16: 99; CHECK-ZVKB: # %bb.0: 100; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m1, ta, ma 101; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t 102; CHECK-ZVKB-NEXT: ret 103 %v = call <vscale x 4 x i16> @llvm.vp.bswap.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl) 104 ret <vscale x 4 x i16> %v 105} 106 107define <vscale x 4 x i16> @vp_bswap_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) { 108; CHECK-LABEL: vp_bswap_nxv4i16_unmasked: 109; CHECK: # %bb.0: 110; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 111; CHECK-NEXT: vsrl.vi v9, v8, 8 112; CHECK-NEXT: vsll.vi v8, v8, 8 113; CHECK-NEXT: vor.vv v8, v8, v9 114; CHECK-NEXT: ret 115; 116; CHECK-ZVKB-LABEL: vp_bswap_nxv4i16_unmasked: 117; CHECK-ZVKB: # %bb.0: 118; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m1, ta, ma 119; CHECK-ZVKB-NEXT: vrev8.v v8, v8 120; CHECK-ZVKB-NEXT: ret 121 %v = call <vscale x 4 x i16> @llvm.vp.bswap.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 122 ret <vscale x 4 x i16> %v 123} 124 125declare <vscale x 8 x i16> @llvm.vp.bswap.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32) 126 127define <vscale x 8 x i16> @vp_bswap_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 128; CHECK-LABEL: vp_bswap_nxv8i16: 129; CHECK: # %bb.0: 130; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 131; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t 132; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t 133; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 134; CHECK-NEXT: ret 135; 136; CHECK-ZVKB-LABEL: vp_bswap_nxv8i16: 137; CHECK-ZVKB: # %bb.0: 138; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m2, ta, ma 139; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t 140; CHECK-ZVKB-NEXT: ret 141 %v = call <vscale x 8 x i16> @llvm.vp.bswap.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 %evl) 142 ret <vscale x 8 x i16> %v 143} 144 145define <vscale x 8 x i16> @vp_bswap_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) { 146; CHECK-LABEL: vp_bswap_nxv8i16_unmasked: 147; CHECK: # %bb.0: 148; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 149; CHECK-NEXT: vsrl.vi v10, v8, 8 150; CHECK-NEXT: vsll.vi v8, v8, 8 151; CHECK-NEXT: vor.vv v8, v8, v10 152; CHECK-NEXT: ret 153; 154; CHECK-ZVKB-LABEL: vp_bswap_nxv8i16_unmasked: 155; CHECK-ZVKB: # %bb.0: 156; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m2, ta, ma 157; CHECK-ZVKB-NEXT: vrev8.v v8, v8 158; CHECK-ZVKB-NEXT: ret 159 %v = call <vscale x 8 x i16> @llvm.vp.bswap.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 160 ret <vscale x 8 x i16> %v 161} 162 163declare <vscale x 16 x i16> @llvm.vp.bswap.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32) 164 165define <vscale x 16 x i16> @vp_bswap_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 166; CHECK-LABEL: vp_bswap_nxv16i16: 167; CHECK: # %bb.0: 168; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 169; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t 170; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t 171; CHECK-NEXT: vor.vv v8, v8, v12, v0.t 172; CHECK-NEXT: ret 173; 174; CHECK-ZVKB-LABEL: vp_bswap_nxv16i16: 175; CHECK-ZVKB: # %bb.0: 176; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m4, ta, ma 177; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t 178; CHECK-ZVKB-NEXT: ret 179 %v = call <vscale x 16 x i16> @llvm.vp.bswap.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 %evl) 180 ret <vscale x 16 x i16> %v 181} 182 183define <vscale x 16 x i16> @vp_bswap_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) { 184; CHECK-LABEL: vp_bswap_nxv16i16_unmasked: 185; CHECK: # %bb.0: 186; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 187; CHECK-NEXT: vsrl.vi v12, v8, 8 188; CHECK-NEXT: vsll.vi v8, v8, 8 189; CHECK-NEXT: vor.vv v8, v8, v12 190; CHECK-NEXT: ret 191; 192; CHECK-ZVKB-LABEL: vp_bswap_nxv16i16_unmasked: 193; CHECK-ZVKB: # %bb.0: 194; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m4, ta, ma 195; CHECK-ZVKB-NEXT: vrev8.v v8, v8 196; CHECK-ZVKB-NEXT: ret 197 %v = call <vscale x 16 x i16> @llvm.vp.bswap.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 198 ret <vscale x 16 x i16> %v 199} 200 201declare <vscale x 32 x i16> @llvm.vp.bswap.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i1>, i32) 202 203define <vscale x 32 x i16> @vp_bswap_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { 204; CHECK-LABEL: vp_bswap_nxv32i16: 205; CHECK: # %bb.0: 206; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 207; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t 208; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t 209; CHECK-NEXT: vor.vv v8, v8, v16, v0.t 210; CHECK-NEXT: ret 211; 212; CHECK-ZVKB-LABEL: vp_bswap_nxv32i16: 213; CHECK-ZVKB: # %bb.0: 214; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m8, ta, ma 215; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t 216; CHECK-ZVKB-NEXT: ret 217 %v = call <vscale x 32 x i16> @llvm.vp.bswap.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 %evl) 218 ret <vscale x 32 x i16> %v 219} 220 221define <vscale x 32 x i16> @vp_bswap_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) { 222; CHECK-LABEL: vp_bswap_nxv32i16_unmasked: 223; CHECK: # %bb.0: 224; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 225; CHECK-NEXT: vsrl.vi v16, v8, 8 226; CHECK-NEXT: vsll.vi v8, v8, 8 227; CHECK-NEXT: vor.vv v8, v8, v16 228; CHECK-NEXT: ret 229; 230; CHECK-ZVKB-LABEL: vp_bswap_nxv32i16_unmasked: 231; CHECK-ZVKB: # %bb.0: 232; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m8, ta, ma 233; CHECK-ZVKB-NEXT: vrev8.v v8, v8 234; CHECK-ZVKB-NEXT: ret 235 %v = call <vscale x 32 x i16> @llvm.vp.bswap.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) 236 ret <vscale x 32 x i16> %v 237} 238 239declare <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32) 240 241define <vscale x 1 x i32> @vp_bswap_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 242; CHECK-LABEL: vp_bswap_nxv1i32: 243; CHECK: # %bb.0: 244; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 245; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 246; CHECK-NEXT: lui a0, 16 247; CHECK-NEXT: addi a0, a0, -256 248; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 249; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t 250; CHECK-NEXT: vor.vv v9, v9, v10, v0.t 251; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 252; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t 253; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t 254; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 255; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 256; CHECK-NEXT: ret 257; 258; CHECK-ZVKB-LABEL: vp_bswap_nxv1i32: 259; CHECK-ZVKB: # %bb.0: 260; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 261; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t 262; CHECK-ZVKB-NEXT: ret 263 %v = call <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 %evl) 264 ret <vscale x 1 x i32> %v 265} 266 267define <vscale x 1 x i32> @vp_bswap_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) { 268; CHECK-LABEL: vp_bswap_nxv1i32_unmasked: 269; CHECK: # %bb.0: 270; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 271; CHECK-NEXT: vsrl.vi v9, v8, 8 272; CHECK-NEXT: lui a0, 16 273; CHECK-NEXT: vsrl.vi v10, v8, 24 274; CHECK-NEXT: addi a0, a0, -256 275; CHECK-NEXT: vand.vx v9, v9, a0 276; CHECK-NEXT: vor.vv v9, v9, v10 277; CHECK-NEXT: vand.vx v10, v8, a0 278; CHECK-NEXT: vsll.vi v10, v10, 8 279; CHECK-NEXT: vsll.vi v8, v8, 24 280; CHECK-NEXT: vor.vv v8, v8, v10 281; CHECK-NEXT: vor.vv v8, v8, v9 282; CHECK-NEXT: ret 283; 284; CHECK-ZVKB-LABEL: vp_bswap_nxv1i32_unmasked: 285; CHECK-ZVKB: # %bb.0: 286; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 287; CHECK-ZVKB-NEXT: vrev8.v v8, v8 288; CHECK-ZVKB-NEXT: ret 289 %v = call <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 290 ret <vscale x 1 x i32> %v 291} 292 293declare <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32) 294 295define <vscale x 2 x i32> @vp_bswap_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 296; CHECK-LABEL: vp_bswap_nxv2i32: 297; CHECK: # %bb.0: 298; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 299; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 300; CHECK-NEXT: lui a0, 16 301; CHECK-NEXT: addi a0, a0, -256 302; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 303; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t 304; CHECK-NEXT: vor.vv v9, v9, v10, v0.t 305; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 306; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t 307; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t 308; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 309; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 310; CHECK-NEXT: ret 311; 312; CHECK-ZVKB-LABEL: vp_bswap_nxv2i32: 313; CHECK-ZVKB: # %bb.0: 314; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, m1, ta, ma 315; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t 316; CHECK-ZVKB-NEXT: ret 317 %v = call <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl) 318 ret <vscale x 2 x i32> %v 319} 320 321define <vscale x 2 x i32> @vp_bswap_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) { 322; CHECK-LABEL: vp_bswap_nxv2i32_unmasked: 323; CHECK: # %bb.0: 324; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 325; CHECK-NEXT: vsrl.vi v9, v8, 8 326; CHECK-NEXT: lui a0, 16 327; CHECK-NEXT: vsrl.vi v10, v8, 24 328; CHECK-NEXT: addi a0, a0, -256 329; CHECK-NEXT: vand.vx v9, v9, a0 330; CHECK-NEXT: vor.vv v9, v9, v10 331; CHECK-NEXT: vand.vx v10, v8, a0 332; CHECK-NEXT: vsll.vi v10, v10, 8 333; CHECK-NEXT: vsll.vi v8, v8, 24 334; CHECK-NEXT: vor.vv v8, v8, v10 335; CHECK-NEXT: vor.vv v8, v8, v9 336; CHECK-NEXT: ret 337; 338; CHECK-ZVKB-LABEL: vp_bswap_nxv2i32_unmasked: 339; CHECK-ZVKB: # %bb.0: 340; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, m1, ta, ma 341; CHECK-ZVKB-NEXT: vrev8.v v8, v8 342; CHECK-ZVKB-NEXT: ret 343 %v = call <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 344 ret <vscale x 2 x i32> %v 345} 346 347declare <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32) 348 349define <vscale x 4 x i32> @vp_bswap_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 350; CHECK-LABEL: vp_bswap_nxv4i32: 351; CHECK: # %bb.0: 352; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 353; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t 354; CHECK-NEXT: lui a0, 16 355; CHECK-NEXT: addi a0, a0, -256 356; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 357; CHECK-NEXT: vsrl.vi v12, v8, 24, v0.t 358; CHECK-NEXT: vor.vv v10, v10, v12, v0.t 359; CHECK-NEXT: vand.vx v12, v8, a0, v0.t 360; CHECK-NEXT: vsll.vi v12, v12, 8, v0.t 361; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t 362; CHECK-NEXT: vor.vv v8, v8, v12, v0.t 363; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 364; CHECK-NEXT: ret 365; 366; CHECK-ZVKB-LABEL: vp_bswap_nxv4i32: 367; CHECK-ZVKB: # %bb.0: 368; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, m2, ta, ma 369; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t 370; CHECK-ZVKB-NEXT: ret 371 %v = call <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl) 372 ret <vscale x 4 x i32> %v 373} 374 375define <vscale x 4 x i32> @vp_bswap_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) { 376; CHECK-LABEL: vp_bswap_nxv4i32_unmasked: 377; CHECK: # %bb.0: 378; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 379; CHECK-NEXT: vsrl.vi v10, v8, 8 380; CHECK-NEXT: lui a0, 16 381; CHECK-NEXT: vsrl.vi v12, v8, 24 382; CHECK-NEXT: addi a0, a0, -256 383; CHECK-NEXT: vand.vx v10, v10, a0 384; CHECK-NEXT: vor.vv v10, v10, v12 385; CHECK-NEXT: vand.vx v12, v8, a0 386; CHECK-NEXT: vsll.vi v12, v12, 8 387; CHECK-NEXT: vsll.vi v8, v8, 24 388; CHECK-NEXT: vor.vv v8, v8, v12 389; CHECK-NEXT: vor.vv v8, v8, v10 390; CHECK-NEXT: ret 391; 392; CHECK-ZVKB-LABEL: vp_bswap_nxv4i32_unmasked: 393; CHECK-ZVKB: # %bb.0: 394; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, m2, ta, ma 395; CHECK-ZVKB-NEXT: vrev8.v v8, v8 396; CHECK-ZVKB-NEXT: ret 397 %v = call <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 398 ret <vscale x 4 x i32> %v 399} 400 401declare <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32) 402 403define <vscale x 8 x i32> @vp_bswap_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 404; CHECK-LABEL: vp_bswap_nxv8i32: 405; CHECK: # %bb.0: 406; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 407; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t 408; CHECK-NEXT: lui a0, 16 409; CHECK-NEXT: addi a0, a0, -256 410; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 411; CHECK-NEXT: vsrl.vi v16, v8, 24, v0.t 412; CHECK-NEXT: vor.vv v12, v12, v16, v0.t 413; CHECK-NEXT: vand.vx v16, v8, a0, v0.t 414; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t 415; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t 416; CHECK-NEXT: vor.vv v8, v8, v16, v0.t 417; CHECK-NEXT: vor.vv v8, v8, v12, v0.t 418; CHECK-NEXT: ret 419; 420; CHECK-ZVKB-LABEL: vp_bswap_nxv8i32: 421; CHECK-ZVKB: # %bb.0: 422; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, m4, ta, ma 423; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t 424; CHECK-ZVKB-NEXT: ret 425 %v = call <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl) 426 ret <vscale x 8 x i32> %v 427} 428 429define <vscale x 8 x i32> @vp_bswap_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) { 430; CHECK-LABEL: vp_bswap_nxv8i32_unmasked: 431; CHECK: # %bb.0: 432; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 433; CHECK-NEXT: vsrl.vi v12, v8, 8 434; CHECK-NEXT: lui a0, 16 435; CHECK-NEXT: vsrl.vi v16, v8, 24 436; CHECK-NEXT: addi a0, a0, -256 437; CHECK-NEXT: vand.vx v12, v12, a0 438; CHECK-NEXT: vor.vv v12, v12, v16 439; CHECK-NEXT: vand.vx v16, v8, a0 440; CHECK-NEXT: vsll.vi v16, v16, 8 441; CHECK-NEXT: vsll.vi v8, v8, 24 442; CHECK-NEXT: vor.vv v8, v8, v16 443; CHECK-NEXT: vor.vv v8, v8, v12 444; CHECK-NEXT: ret 445; 446; CHECK-ZVKB-LABEL: vp_bswap_nxv8i32_unmasked: 447; CHECK-ZVKB: # %bb.0: 448; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, m4, ta, ma 449; CHECK-ZVKB-NEXT: vrev8.v v8, v8 450; CHECK-ZVKB-NEXT: ret 451 %v = call <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 452 ret <vscale x 8 x i32> %v 453} 454 455declare <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32) 456 457define <vscale x 16 x i32> @vp_bswap_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 458; CHECK-LABEL: vp_bswap_nxv16i32: 459; CHECK: # %bb.0: 460; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 461; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t 462; CHECK-NEXT: lui a0, 16 463; CHECK-NEXT: addi a0, a0, -256 464; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 465; CHECK-NEXT: vsrl.vi v24, v8, 24, v0.t 466; CHECK-NEXT: vor.vv v16, v16, v24, v0.t 467; CHECK-NEXT: vand.vx v24, v8, a0, v0.t 468; CHECK-NEXT: vsll.vi v24, v24, 8, v0.t 469; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t 470; CHECK-NEXT: vor.vv v8, v8, v24, v0.t 471; CHECK-NEXT: vor.vv v8, v8, v16, v0.t 472; CHECK-NEXT: ret 473; 474; CHECK-ZVKB-LABEL: vp_bswap_nxv16i32: 475; CHECK-ZVKB: # %bb.0: 476; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, m8, ta, ma 477; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t 478; CHECK-ZVKB-NEXT: ret 479 %v = call <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 %evl) 480 ret <vscale x 16 x i32> %v 481} 482 483define <vscale x 16 x i32> @vp_bswap_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) { 484; CHECK-LABEL: vp_bswap_nxv16i32_unmasked: 485; CHECK: # %bb.0: 486; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 487; CHECK-NEXT: vsrl.vi v16, v8, 8 488; CHECK-NEXT: lui a0, 16 489; CHECK-NEXT: vsrl.vi v24, v8, 24 490; CHECK-NEXT: addi a0, a0, -256 491; CHECK-NEXT: vand.vx v16, v16, a0 492; CHECK-NEXT: vor.vv v16, v16, v24 493; CHECK-NEXT: vand.vx v24, v8, a0 494; CHECK-NEXT: vsll.vi v24, v24, 8 495; CHECK-NEXT: vsll.vi v8, v8, 24 496; CHECK-NEXT: vor.vv v8, v8, v24 497; CHECK-NEXT: vor.vv v8, v8, v16 498; CHECK-NEXT: ret 499; 500; CHECK-ZVKB-LABEL: vp_bswap_nxv16i32_unmasked: 501; CHECK-ZVKB: # %bb.0: 502; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, m8, ta, ma 503; CHECK-ZVKB-NEXT: vrev8.v v8, v8 504; CHECK-ZVKB-NEXT: ret 505 %v = call <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 506 ret <vscale x 16 x i32> %v 507} 508 509declare <vscale x 1 x i64> @llvm.vp.bswap.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32) 510 511define <vscale x 1 x i64> @vp_bswap_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 512; RV32-LABEL: vp_bswap_nxv1i64: 513; RV32: # %bb.0: 514; RV32-NEXT: addi sp, sp, -16 515; RV32-NEXT: .cfi_def_cfa_offset 16 516; RV32-NEXT: lui a1, 1044480 517; RV32-NEXT: li a2, 56 518; RV32-NEXT: lui a3, 16 519; RV32-NEXT: li a4, 40 520; RV32-NEXT: lui a5, 4080 521; RV32-NEXT: addi a6, sp, 8 522; RV32-NEXT: sw a1, 8(sp) 523; RV32-NEXT: sw zero, 12(sp) 524; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 525; RV32-NEXT: vsll.vx v9, v8, a2, v0.t 526; RV32-NEXT: addi a0, a3, -256 527; RV32-NEXT: vand.vx v10, v8, a0, v0.t 528; RV32-NEXT: vlse64.v v11, (a6), zero 529; RV32-NEXT: vsll.vx v10, v10, a4, v0.t 530; RV32-NEXT: vor.vv v9, v9, v10, v0.t 531; RV32-NEXT: vand.vx v10, v8, a5, v0.t 532; RV32-NEXT: vsll.vi v10, v10, 24, v0.t 533; RV32-NEXT: vand.vv v12, v8, v11, v0.t 534; RV32-NEXT: vsll.vi v12, v12, 8, v0.t 535; RV32-NEXT: vor.vv v10, v10, v12, v0.t 536; RV32-NEXT: vor.vv v9, v9, v10, v0.t 537; RV32-NEXT: vsrl.vx v10, v8, a2, v0.t 538; RV32-NEXT: vsrl.vx v12, v8, a4, v0.t 539; RV32-NEXT: vand.vx v12, v12, a0, v0.t 540; RV32-NEXT: vor.vv v10, v12, v10, v0.t 541; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t 542; RV32-NEXT: vand.vx v12, v12, a5, v0.t 543; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t 544; RV32-NEXT: vand.vv v8, v8, v11, v0.t 545; RV32-NEXT: vor.vv v8, v8, v12, v0.t 546; RV32-NEXT: vor.vv v8, v8, v10, v0.t 547; RV32-NEXT: vor.vv v8, v9, v8, v0.t 548; RV32-NEXT: addi sp, sp, 16 549; RV32-NEXT: .cfi_def_cfa_offset 0 550; RV32-NEXT: ret 551; 552; RV64-LABEL: vp_bswap_nxv1i64: 553; RV64: # %bb.0: 554; RV64-NEXT: lui a1, 4080 555; RV64-NEXT: li a2, 255 556; RV64-NEXT: li a3, 56 557; RV64-NEXT: lui a4, 16 558; RV64-NEXT: li a5, 40 559; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 560; RV64-NEXT: vand.vx v9, v8, a1, v0.t 561; RV64-NEXT: slli a2, a2, 24 562; RV64-NEXT: addiw a0, a4, -256 563; RV64-NEXT: vsll.vi v9, v9, 24, v0.t 564; RV64-NEXT: vand.vx v10, v8, a2, v0.t 565; RV64-NEXT: vsll.vi v10, v10, 8, v0.t 566; RV64-NEXT: vor.vv v9, v9, v10, v0.t 567; RV64-NEXT: vsll.vx v10, v8, a3, v0.t 568; RV64-NEXT: vand.vx v11, v8, a0, v0.t 569; RV64-NEXT: vsll.vx v11, v11, a5, v0.t 570; RV64-NEXT: vor.vv v10, v10, v11, v0.t 571; RV64-NEXT: vor.vv v9, v10, v9, v0.t 572; RV64-NEXT: vsrl.vx v10, v8, a3, v0.t 573; RV64-NEXT: vsrl.vx v11, v8, a5, v0.t 574; RV64-NEXT: vand.vx v11, v11, a0, v0.t 575; RV64-NEXT: vor.vv v10, v11, v10, v0.t 576; RV64-NEXT: vsrl.vi v11, v8, 24, v0.t 577; RV64-NEXT: vand.vx v11, v11, a1, v0.t 578; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t 579; RV64-NEXT: vand.vx v8, v8, a2, v0.t 580; RV64-NEXT: vor.vv v8, v8, v11, v0.t 581; RV64-NEXT: vor.vv v8, v8, v10, v0.t 582; RV64-NEXT: vor.vv v8, v9, v8, v0.t 583; RV64-NEXT: ret 584; 585; CHECK-ZVKB-LABEL: vp_bswap_nxv1i64: 586; CHECK-ZVKB: # %bb.0: 587; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m1, ta, ma 588; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t 589; CHECK-ZVKB-NEXT: ret 590 %v = call <vscale x 1 x i64> @llvm.vp.bswap.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 %evl) 591 ret <vscale x 1 x i64> %v 592} 593 594define <vscale x 1 x i64> @vp_bswap_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) { 595; RV32-LABEL: vp_bswap_nxv1i64_unmasked: 596; RV32: # %bb.0: 597; RV32-NEXT: addi sp, sp, -16 598; RV32-NEXT: .cfi_def_cfa_offset 16 599; RV32-NEXT: lui a1, 1044480 600; RV32-NEXT: li a2, 56 601; RV32-NEXT: lui a3, 16 602; RV32-NEXT: li a4, 40 603; RV32-NEXT: lui a5, 4080 604; RV32-NEXT: addi a6, sp, 8 605; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 606; RV32-NEXT: vsrl.vi v9, v8, 24 607; RV32-NEXT: sw a1, 8(sp) 608; RV32-NEXT: sw zero, 12(sp) 609; RV32-NEXT: vsll.vx v10, v8, a2 610; RV32-NEXT: addi a0, a3, -256 611; RV32-NEXT: vsrl.vx v11, v8, a2 612; RV32-NEXT: vsrl.vx v12, v8, a4 613; RV32-NEXT: vand.vx v13, v8, a0 614; RV32-NEXT: vand.vx v12, v12, a0 615; RV32-NEXT: vor.vv v11, v12, v11 616; RV32-NEXT: vlse64.v v12, (a6), zero 617; RV32-NEXT: vsll.vx v13, v13, a4 618; RV32-NEXT: vor.vv v10, v10, v13 619; RV32-NEXT: vsrl.vi v13, v8, 8 620; RV32-NEXT: vand.vx v9, v9, a5 621; RV32-NEXT: vand.vv v13, v13, v12 622; RV32-NEXT: vor.vv v9, v13, v9 623; RV32-NEXT: vand.vv v12, v8, v12 624; RV32-NEXT: vand.vx v8, v8, a5 625; RV32-NEXT: vsll.vi v8, v8, 24 626; RV32-NEXT: vsll.vi v12, v12, 8 627; RV32-NEXT: vor.vv v8, v8, v12 628; RV32-NEXT: vor.vv v8, v10, v8 629; RV32-NEXT: vor.vv v9, v9, v11 630; RV32-NEXT: vor.vv v8, v8, v9 631; RV32-NEXT: addi sp, sp, 16 632; RV32-NEXT: .cfi_def_cfa_offset 0 633; RV32-NEXT: ret 634; 635; RV64-LABEL: vp_bswap_nxv1i64_unmasked: 636; RV64: # %bb.0: 637; RV64-NEXT: lui a1, 4080 638; RV64-NEXT: li a2, 255 639; RV64-NEXT: li a3, 56 640; RV64-NEXT: lui a4, 16 641; RV64-NEXT: li a5, 40 642; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 643; RV64-NEXT: vsrl.vi v9, v8, 24 644; RV64-NEXT: vsrl.vi v10, v8, 8 645; RV64-NEXT: addiw a0, a4, -256 646; RV64-NEXT: vsrl.vx v11, v8, a3 647; RV64-NEXT: vsrl.vx v12, v8, a5 648; RV64-NEXT: vand.vx v12, v12, a0 649; RV64-NEXT: vor.vv v11, v12, v11 650; RV64-NEXT: vand.vx v12, v8, a1 651; RV64-NEXT: slli a2, a2, 24 652; RV64-NEXT: vand.vx v9, v9, a1 653; RV64-NEXT: vsll.vi v12, v12, 24 654; RV64-NEXT: vand.vx v10, v10, a2 655; RV64-NEXT: vor.vv v9, v10, v9 656; RV64-NEXT: vand.vx v10, v8, a2 657; RV64-NEXT: vsll.vi v10, v10, 8 658; RV64-NEXT: vor.vv v10, v12, v10 659; RV64-NEXT: vsll.vx v12, v8, a3 660; RV64-NEXT: vand.vx v8, v8, a0 661; RV64-NEXT: vsll.vx v8, v8, a5 662; RV64-NEXT: vor.vv v8, v12, v8 663; RV64-NEXT: vor.vv v8, v8, v10 664; RV64-NEXT: vor.vv v9, v9, v11 665; RV64-NEXT: vor.vv v8, v8, v9 666; RV64-NEXT: ret 667; 668; CHECK-ZVKB-LABEL: vp_bswap_nxv1i64_unmasked: 669; CHECK-ZVKB: # %bb.0: 670; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m1, ta, ma 671; CHECK-ZVKB-NEXT: vrev8.v v8, v8 672; CHECK-ZVKB-NEXT: ret 673 %v = call <vscale x 1 x i64> @llvm.vp.bswap.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 674 ret <vscale x 1 x i64> %v 675} 676 677declare <vscale x 2 x i64> @llvm.vp.bswap.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32) 678 679define <vscale x 2 x i64> @vp_bswap_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 680; RV32-LABEL: vp_bswap_nxv2i64: 681; RV32: # %bb.0: 682; RV32-NEXT: addi sp, sp, -16 683; RV32-NEXT: .cfi_def_cfa_offset 16 684; RV32-NEXT: lui a1, 1044480 685; RV32-NEXT: li a2, 56 686; RV32-NEXT: lui a3, 16 687; RV32-NEXT: li a4, 40 688; RV32-NEXT: lui a5, 4080 689; RV32-NEXT: addi a6, sp, 8 690; RV32-NEXT: sw a1, 8(sp) 691; RV32-NEXT: sw zero, 12(sp) 692; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 693; RV32-NEXT: vsll.vx v10, v8, a2, v0.t 694; RV32-NEXT: addi a0, a3, -256 695; RV32-NEXT: vand.vx v12, v8, a0, v0.t 696; RV32-NEXT: vlse64.v v14, (a6), zero 697; RV32-NEXT: vsll.vx v12, v12, a4, v0.t 698; RV32-NEXT: vor.vv v10, v10, v12, v0.t 699; RV32-NEXT: vand.vx v12, v8, a5, v0.t 700; RV32-NEXT: vsll.vi v12, v12, 24, v0.t 701; RV32-NEXT: vand.vv v16, v8, v14, v0.t 702; RV32-NEXT: vsll.vi v16, v16, 8, v0.t 703; RV32-NEXT: vor.vv v12, v12, v16, v0.t 704; RV32-NEXT: vor.vv v10, v10, v12, v0.t 705; RV32-NEXT: vsrl.vx v12, v8, a2, v0.t 706; RV32-NEXT: vsrl.vx v16, v8, a4, v0.t 707; RV32-NEXT: vand.vx v16, v16, a0, v0.t 708; RV32-NEXT: vor.vv v12, v16, v12, v0.t 709; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t 710; RV32-NEXT: vand.vx v16, v16, a5, v0.t 711; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t 712; RV32-NEXT: vand.vv v8, v8, v14, v0.t 713; RV32-NEXT: vor.vv v8, v8, v16, v0.t 714; RV32-NEXT: vor.vv v8, v8, v12, v0.t 715; RV32-NEXT: vor.vv v8, v10, v8, v0.t 716; RV32-NEXT: addi sp, sp, 16 717; RV32-NEXT: .cfi_def_cfa_offset 0 718; RV32-NEXT: ret 719; 720; RV64-LABEL: vp_bswap_nxv2i64: 721; RV64: # %bb.0: 722; RV64-NEXT: lui a1, 4080 723; RV64-NEXT: li a2, 255 724; RV64-NEXT: li a3, 56 725; RV64-NEXT: lui a4, 16 726; RV64-NEXT: li a5, 40 727; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 728; RV64-NEXT: vand.vx v10, v8, a1, v0.t 729; RV64-NEXT: slli a2, a2, 24 730; RV64-NEXT: addiw a0, a4, -256 731; RV64-NEXT: vsll.vi v10, v10, 24, v0.t 732; RV64-NEXT: vand.vx v12, v8, a2, v0.t 733; RV64-NEXT: vsll.vi v12, v12, 8, v0.t 734; RV64-NEXT: vor.vv v10, v10, v12, v0.t 735; RV64-NEXT: vsll.vx v12, v8, a3, v0.t 736; RV64-NEXT: vand.vx v14, v8, a0, v0.t 737; RV64-NEXT: vsll.vx v14, v14, a5, v0.t 738; RV64-NEXT: vor.vv v12, v12, v14, v0.t 739; RV64-NEXT: vor.vv v10, v12, v10, v0.t 740; RV64-NEXT: vsrl.vx v12, v8, a3, v0.t 741; RV64-NEXT: vsrl.vx v14, v8, a5, v0.t 742; RV64-NEXT: vand.vx v14, v14, a0, v0.t 743; RV64-NEXT: vor.vv v12, v14, v12, v0.t 744; RV64-NEXT: vsrl.vi v14, v8, 24, v0.t 745; RV64-NEXT: vand.vx v14, v14, a1, v0.t 746; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t 747; RV64-NEXT: vand.vx v8, v8, a2, v0.t 748; RV64-NEXT: vor.vv v8, v8, v14, v0.t 749; RV64-NEXT: vor.vv v8, v8, v12, v0.t 750; RV64-NEXT: vor.vv v8, v10, v8, v0.t 751; RV64-NEXT: ret 752; 753; CHECK-ZVKB-LABEL: vp_bswap_nxv2i64: 754; CHECK-ZVKB: # %bb.0: 755; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m2, ta, ma 756; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t 757; CHECK-ZVKB-NEXT: ret 758 %v = call <vscale x 2 x i64> @llvm.vp.bswap.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl) 759 ret <vscale x 2 x i64> %v 760} 761 762define <vscale x 2 x i64> @vp_bswap_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) { 763; RV32-LABEL: vp_bswap_nxv2i64_unmasked: 764; RV32: # %bb.0: 765; RV32-NEXT: addi sp, sp, -16 766; RV32-NEXT: .cfi_def_cfa_offset 16 767; RV32-NEXT: lui a1, 1044480 768; RV32-NEXT: li a2, 56 769; RV32-NEXT: lui a3, 16 770; RV32-NEXT: li a4, 40 771; RV32-NEXT: lui a5, 4080 772; RV32-NEXT: addi a6, sp, 8 773; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 774; RV32-NEXT: vsrl.vi v10, v8, 24 775; RV32-NEXT: sw a1, 8(sp) 776; RV32-NEXT: sw zero, 12(sp) 777; RV32-NEXT: vsll.vx v12, v8, a2 778; RV32-NEXT: addi a0, a3, -256 779; RV32-NEXT: vsrl.vx v14, v8, a2 780; RV32-NEXT: vsrl.vx v16, v8, a4 781; RV32-NEXT: vand.vx v18, v8, a0 782; RV32-NEXT: vand.vx v16, v16, a0 783; RV32-NEXT: vor.vv v14, v16, v14 784; RV32-NEXT: vlse64.v v16, (a6), zero 785; RV32-NEXT: vsll.vx v18, v18, a4 786; RV32-NEXT: vor.vv v12, v12, v18 787; RV32-NEXT: vsrl.vi v18, v8, 8 788; RV32-NEXT: vand.vx v10, v10, a5 789; RV32-NEXT: vand.vv v18, v18, v16 790; RV32-NEXT: vor.vv v10, v18, v10 791; RV32-NEXT: vand.vv v16, v8, v16 792; RV32-NEXT: vand.vx v8, v8, a5 793; RV32-NEXT: vsll.vi v8, v8, 24 794; RV32-NEXT: vsll.vi v16, v16, 8 795; RV32-NEXT: vor.vv v8, v8, v16 796; RV32-NEXT: vor.vv v8, v12, v8 797; RV32-NEXT: vor.vv v10, v10, v14 798; RV32-NEXT: vor.vv v8, v8, v10 799; RV32-NEXT: addi sp, sp, 16 800; RV32-NEXT: .cfi_def_cfa_offset 0 801; RV32-NEXT: ret 802; 803; RV64-LABEL: vp_bswap_nxv2i64_unmasked: 804; RV64: # %bb.0: 805; RV64-NEXT: lui a1, 4080 806; RV64-NEXT: li a2, 255 807; RV64-NEXT: li a3, 56 808; RV64-NEXT: lui a4, 16 809; RV64-NEXT: li a5, 40 810; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 811; RV64-NEXT: vsrl.vi v10, v8, 24 812; RV64-NEXT: vsrl.vi v12, v8, 8 813; RV64-NEXT: addiw a0, a4, -256 814; RV64-NEXT: vsrl.vx v14, v8, a3 815; RV64-NEXT: vsrl.vx v16, v8, a5 816; RV64-NEXT: vand.vx v16, v16, a0 817; RV64-NEXT: vor.vv v14, v16, v14 818; RV64-NEXT: vand.vx v16, v8, a1 819; RV64-NEXT: slli a2, a2, 24 820; RV64-NEXT: vand.vx v10, v10, a1 821; RV64-NEXT: vsll.vi v16, v16, 24 822; RV64-NEXT: vand.vx v12, v12, a2 823; RV64-NEXT: vor.vv v10, v12, v10 824; RV64-NEXT: vand.vx v12, v8, a2 825; RV64-NEXT: vsll.vi v12, v12, 8 826; RV64-NEXT: vor.vv v12, v16, v12 827; RV64-NEXT: vsll.vx v16, v8, a3 828; RV64-NEXT: vand.vx v8, v8, a0 829; RV64-NEXT: vsll.vx v8, v8, a5 830; RV64-NEXT: vor.vv v8, v16, v8 831; RV64-NEXT: vor.vv v8, v8, v12 832; RV64-NEXT: vor.vv v10, v10, v14 833; RV64-NEXT: vor.vv v8, v8, v10 834; RV64-NEXT: ret 835; 836; CHECK-ZVKB-LABEL: vp_bswap_nxv2i64_unmasked: 837; CHECK-ZVKB: # %bb.0: 838; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m2, ta, ma 839; CHECK-ZVKB-NEXT: vrev8.v v8, v8 840; CHECK-ZVKB-NEXT: ret 841 %v = call <vscale x 2 x i64> @llvm.vp.bswap.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 842 ret <vscale x 2 x i64> %v 843} 844 845declare <vscale x 4 x i64> @llvm.vp.bswap.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32) 846 847define <vscale x 4 x i64> @vp_bswap_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 848; RV32-LABEL: vp_bswap_nxv4i64: 849; RV32: # %bb.0: 850; RV32-NEXT: addi sp, sp, -16 851; RV32-NEXT: .cfi_def_cfa_offset 16 852; RV32-NEXT: lui a1, 1044480 853; RV32-NEXT: li a2, 56 854; RV32-NEXT: lui a3, 16 855; RV32-NEXT: li a4, 40 856; RV32-NEXT: lui a5, 4080 857; RV32-NEXT: addi a6, sp, 8 858; RV32-NEXT: sw a1, 8(sp) 859; RV32-NEXT: sw zero, 12(sp) 860; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 861; RV32-NEXT: vsll.vx v16, v8, a2, v0.t 862; RV32-NEXT: addi a0, a3, -256 863; RV32-NEXT: vand.vx v20, v8, a0, v0.t 864; RV32-NEXT: vlse64.v v12, (a6), zero 865; RV32-NEXT: vsll.vx v20, v20, a4, v0.t 866; RV32-NEXT: vor.vv v16, v16, v20, v0.t 867; RV32-NEXT: vand.vx v20, v8, a5, v0.t 868; RV32-NEXT: vsll.vi v20, v20, 24, v0.t 869; RV32-NEXT: vand.vv v24, v8, v12, v0.t 870; RV32-NEXT: vsll.vi v24, v24, 8, v0.t 871; RV32-NEXT: vor.vv v20, v20, v24, v0.t 872; RV32-NEXT: vor.vv v16, v16, v20, v0.t 873; RV32-NEXT: vsrl.vx v20, v8, a2, v0.t 874; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t 875; RV32-NEXT: vand.vx v24, v24, a0, v0.t 876; RV32-NEXT: vor.vv v20, v24, v20, v0.t 877; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t 878; RV32-NEXT: vand.vx v24, v24, a5, v0.t 879; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t 880; RV32-NEXT: vand.vv v8, v8, v12, v0.t 881; RV32-NEXT: vor.vv v8, v8, v24, v0.t 882; RV32-NEXT: vor.vv v8, v8, v20, v0.t 883; RV32-NEXT: vor.vv v8, v16, v8, v0.t 884; RV32-NEXT: addi sp, sp, 16 885; RV32-NEXT: .cfi_def_cfa_offset 0 886; RV32-NEXT: ret 887; 888; RV64-LABEL: vp_bswap_nxv4i64: 889; RV64: # %bb.0: 890; RV64-NEXT: lui a1, 4080 891; RV64-NEXT: li a2, 255 892; RV64-NEXT: li a3, 56 893; RV64-NEXT: lui a4, 16 894; RV64-NEXT: li a5, 40 895; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 896; RV64-NEXT: vand.vx v12, v8, a1, v0.t 897; RV64-NEXT: slli a2, a2, 24 898; RV64-NEXT: addiw a0, a4, -256 899; RV64-NEXT: vsll.vi v12, v12, 24, v0.t 900; RV64-NEXT: vand.vx v16, v8, a2, v0.t 901; RV64-NEXT: vsll.vi v16, v16, 8, v0.t 902; RV64-NEXT: vor.vv v12, v12, v16, v0.t 903; RV64-NEXT: vsll.vx v16, v8, a3, v0.t 904; RV64-NEXT: vand.vx v20, v8, a0, v0.t 905; RV64-NEXT: vsll.vx v20, v20, a5, v0.t 906; RV64-NEXT: vor.vv v16, v16, v20, v0.t 907; RV64-NEXT: vor.vv v12, v16, v12, v0.t 908; RV64-NEXT: vsrl.vx v16, v8, a3, v0.t 909; RV64-NEXT: vsrl.vx v20, v8, a5, v0.t 910; RV64-NEXT: vand.vx v20, v20, a0, v0.t 911; RV64-NEXT: vor.vv v16, v20, v16, v0.t 912; RV64-NEXT: vsrl.vi v20, v8, 24, v0.t 913; RV64-NEXT: vand.vx v20, v20, a1, v0.t 914; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t 915; RV64-NEXT: vand.vx v8, v8, a2, v0.t 916; RV64-NEXT: vor.vv v8, v8, v20, v0.t 917; RV64-NEXT: vor.vv v8, v8, v16, v0.t 918; RV64-NEXT: vor.vv v8, v12, v8, v0.t 919; RV64-NEXT: ret 920; 921; CHECK-ZVKB-LABEL: vp_bswap_nxv4i64: 922; CHECK-ZVKB: # %bb.0: 923; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m4, ta, ma 924; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t 925; CHECK-ZVKB-NEXT: ret 926 %v = call <vscale x 4 x i64> @llvm.vp.bswap.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 %evl) 927 ret <vscale x 4 x i64> %v 928} 929 930define <vscale x 4 x i64> @vp_bswap_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) { 931; RV32-LABEL: vp_bswap_nxv4i64_unmasked: 932; RV32: # %bb.0: 933; RV32-NEXT: addi sp, sp, -16 934; RV32-NEXT: .cfi_def_cfa_offset 16 935; RV32-NEXT: lui a1, 1044480 936; RV32-NEXT: li a2, 56 937; RV32-NEXT: lui a3, 16 938; RV32-NEXT: li a4, 40 939; RV32-NEXT: lui a5, 4080 940; RV32-NEXT: addi a6, sp, 8 941; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 942; RV32-NEXT: vsrl.vi v12, v8, 24 943; RV32-NEXT: sw a1, 8(sp) 944; RV32-NEXT: sw zero, 12(sp) 945; RV32-NEXT: vsll.vx v16, v8, a2 946; RV32-NEXT: addi a0, a3, -256 947; RV32-NEXT: vsrl.vx v20, v8, a2 948; RV32-NEXT: vsrl.vx v24, v8, a4 949; RV32-NEXT: vand.vx v28, v8, a0 950; RV32-NEXT: vand.vx v24, v24, a0 951; RV32-NEXT: vor.vv v20, v24, v20 952; RV32-NEXT: vlse64.v v24, (a6), zero 953; RV32-NEXT: vsll.vx v28, v28, a4 954; RV32-NEXT: vor.vv v16, v16, v28 955; RV32-NEXT: vsrl.vi v28, v8, 8 956; RV32-NEXT: vand.vx v12, v12, a5 957; RV32-NEXT: vand.vv v28, v28, v24 958; RV32-NEXT: vor.vv v12, v28, v12 959; RV32-NEXT: vand.vv v24, v8, v24 960; RV32-NEXT: vand.vx v8, v8, a5 961; RV32-NEXT: vsll.vi v8, v8, 24 962; RV32-NEXT: vsll.vi v24, v24, 8 963; RV32-NEXT: vor.vv v8, v8, v24 964; RV32-NEXT: vor.vv v8, v16, v8 965; RV32-NEXT: vor.vv v12, v12, v20 966; RV32-NEXT: vor.vv v8, v8, v12 967; RV32-NEXT: addi sp, sp, 16 968; RV32-NEXT: .cfi_def_cfa_offset 0 969; RV32-NEXT: ret 970; 971; RV64-LABEL: vp_bswap_nxv4i64_unmasked: 972; RV64: # %bb.0: 973; RV64-NEXT: lui a1, 4080 974; RV64-NEXT: li a2, 255 975; RV64-NEXT: li a3, 56 976; RV64-NEXT: lui a4, 16 977; RV64-NEXT: li a5, 40 978; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 979; RV64-NEXT: vsrl.vi v12, v8, 24 980; RV64-NEXT: vsrl.vi v16, v8, 8 981; RV64-NEXT: addiw a0, a4, -256 982; RV64-NEXT: vsrl.vx v20, v8, a3 983; RV64-NEXT: vsrl.vx v24, v8, a5 984; RV64-NEXT: vand.vx v24, v24, a0 985; RV64-NEXT: vor.vv v20, v24, v20 986; RV64-NEXT: vand.vx v24, v8, a1 987; RV64-NEXT: slli a2, a2, 24 988; RV64-NEXT: vand.vx v12, v12, a1 989; RV64-NEXT: vsll.vi v24, v24, 24 990; RV64-NEXT: vand.vx v16, v16, a2 991; RV64-NEXT: vor.vv v12, v16, v12 992; RV64-NEXT: vand.vx v16, v8, a2 993; RV64-NEXT: vsll.vi v16, v16, 8 994; RV64-NEXT: vor.vv v16, v24, v16 995; RV64-NEXT: vsll.vx v24, v8, a3 996; RV64-NEXT: vand.vx v8, v8, a0 997; RV64-NEXT: vsll.vx v8, v8, a5 998; RV64-NEXT: vor.vv v8, v24, v8 999; RV64-NEXT: vor.vv v8, v8, v16 1000; RV64-NEXT: vor.vv v12, v12, v20 1001; RV64-NEXT: vor.vv v8, v8, v12 1002; RV64-NEXT: ret 1003; 1004; CHECK-ZVKB-LABEL: vp_bswap_nxv4i64_unmasked: 1005; CHECK-ZVKB: # %bb.0: 1006; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1007; CHECK-ZVKB-NEXT: vrev8.v v8, v8 1008; CHECK-ZVKB-NEXT: ret 1009 %v = call <vscale x 4 x i64> @llvm.vp.bswap.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 1010 ret <vscale x 4 x i64> %v 1011} 1012 1013declare <vscale x 7 x i64> @llvm.vp.bswap.nxv7i64(<vscale x 7 x i64>, <vscale x 7 x i1>, i32) 1014 1015define <vscale x 7 x i64> @vp_bswap_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) { 1016; RV32-LABEL: vp_bswap_nxv7i64: 1017; RV32: # %bb.0: 1018; RV32-NEXT: addi sp, sp, -16 1019; RV32-NEXT: .cfi_def_cfa_offset 16 1020; RV32-NEXT: csrr a1, vlenb 1021; RV32-NEXT: li a2, 24 1022; RV32-NEXT: mul a1, a1, a2 1023; RV32-NEXT: sub sp, sp, a1 1024; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb 1025; RV32-NEXT: lui a1, 1044480 1026; RV32-NEXT: li a2, 56 1027; RV32-NEXT: lui a3, 16 1028; RV32-NEXT: li a4, 40 1029; RV32-NEXT: addi a5, sp, 8 1030; RV32-NEXT: sw a1, 8(sp) 1031; RV32-NEXT: sw zero, 12(sp) 1032; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1033; RV32-NEXT: vsll.vx v16, v8, a2, v0.t 1034; RV32-NEXT: addi a0, a3, -256 1035; RV32-NEXT: vand.vx v24, v8, a0, v0.t 1036; RV32-NEXT: vsll.vx v24, v24, a4, v0.t 1037; RV32-NEXT: vor.vv v16, v16, v24, v0.t 1038; RV32-NEXT: csrr a1, vlenb 1039; RV32-NEXT: slli a1, a1, 4 1040; RV32-NEXT: add a1, sp, a1 1041; RV32-NEXT: addi a1, a1, 16 1042; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 1043; RV32-NEXT: vlse64.v v16, (a5), zero 1044; RV32-NEXT: csrr a1, vlenb 1045; RV32-NEXT: slli a1, a1, 3 1046; RV32-NEXT: add a1, sp, a1 1047; RV32-NEXT: addi a1, a1, 16 1048; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 1049; RV32-NEXT: lui a1, 4080 1050; RV32-NEXT: vand.vx v24, v8, a1, v0.t 1051; RV32-NEXT: vsll.vi v24, v24, 24, v0.t 1052; RV32-NEXT: addi a3, sp, 16 1053; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill 1054; RV32-NEXT: vand.vv v24, v8, v16, v0.t 1055; RV32-NEXT: vsll.vi v16, v24, 8, v0.t 1056; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload 1057; RV32-NEXT: vor.vv v16, v24, v16, v0.t 1058; RV32-NEXT: csrr a3, vlenb 1059; RV32-NEXT: slli a3, a3, 4 1060; RV32-NEXT: add a3, sp, a3 1061; RV32-NEXT: addi a3, a3, 16 1062; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload 1063; RV32-NEXT: vor.vv v16, v24, v16, v0.t 1064; RV32-NEXT: csrr a3, vlenb 1065; RV32-NEXT: slli a3, a3, 4 1066; RV32-NEXT: add a3, sp, a3 1067; RV32-NEXT: addi a3, a3, 16 1068; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 1069; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t 1070; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t 1071; RV32-NEXT: vand.vx v24, v24, a0, v0.t 1072; RV32-NEXT: vor.vv v16, v24, v16, v0.t 1073; RV32-NEXT: addi a0, sp, 16 1074; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 1075; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t 1076; RV32-NEXT: vand.vx v24, v24, a1, v0.t 1077; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t 1078; RV32-NEXT: csrr a0, vlenb 1079; RV32-NEXT: slli a0, a0, 3 1080; RV32-NEXT: add a0, sp, a0 1081; RV32-NEXT: addi a0, a0, 16 1082; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1083; RV32-NEXT: vand.vv v8, v8, v16, v0.t 1084; RV32-NEXT: vor.vv v8, v8, v24, v0.t 1085; RV32-NEXT: addi a0, sp, 16 1086; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1087; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1088; RV32-NEXT: csrr a0, vlenb 1089; RV32-NEXT: slli a0, a0, 4 1090; RV32-NEXT: add a0, sp, a0 1091; RV32-NEXT: addi a0, a0, 16 1092; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1093; RV32-NEXT: vor.vv v8, v16, v8, v0.t 1094; RV32-NEXT: csrr a0, vlenb 1095; RV32-NEXT: li a1, 24 1096; RV32-NEXT: mul a0, a0, a1 1097; RV32-NEXT: add sp, sp, a0 1098; RV32-NEXT: .cfi_def_cfa sp, 16 1099; RV32-NEXT: addi sp, sp, 16 1100; RV32-NEXT: .cfi_def_cfa_offset 0 1101; RV32-NEXT: ret 1102; 1103; RV64-LABEL: vp_bswap_nxv7i64: 1104; RV64: # %bb.0: 1105; RV64-NEXT: addi sp, sp, -16 1106; RV64-NEXT: .cfi_def_cfa_offset 16 1107; RV64-NEXT: csrr a1, vlenb 1108; RV64-NEXT: slli a1, a1, 3 1109; RV64-NEXT: sub sp, sp, a1 1110; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 1111; RV64-NEXT: lui a1, 4080 1112; RV64-NEXT: li a2, 255 1113; RV64-NEXT: li a3, 56 1114; RV64-NEXT: lui a4, 16 1115; RV64-NEXT: li a5, 40 1116; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1117; RV64-NEXT: vand.vx v16, v8, a1, v0.t 1118; RV64-NEXT: slli a2, a2, 24 1119; RV64-NEXT: addiw a0, a4, -256 1120; RV64-NEXT: vsll.vi v16, v16, 24, v0.t 1121; RV64-NEXT: vand.vx v24, v8, a2, v0.t 1122; RV64-NEXT: vsll.vi v24, v24, 8, v0.t 1123; RV64-NEXT: vor.vv v16, v16, v24, v0.t 1124; RV64-NEXT: addi a4, sp, 16 1125; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 1126; RV64-NEXT: vsll.vx v24, v8, a3, v0.t 1127; RV64-NEXT: vand.vx v16, v8, a0, v0.t 1128; RV64-NEXT: vsll.vx v16, v16, a5, v0.t 1129; RV64-NEXT: vor.vv v16, v24, v16, v0.t 1130; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload 1131; RV64-NEXT: vor.vv v16, v16, v24, v0.t 1132; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 1133; RV64-NEXT: vsrl.vx v24, v8, a3, v0.t 1134; RV64-NEXT: vsrl.vx v16, v8, a5, v0.t 1135; RV64-NEXT: vand.vx v16, v16, a0, v0.t 1136; RV64-NEXT: vor.vv v24, v16, v24, v0.t 1137; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t 1138; RV64-NEXT: vand.vx v16, v16, a1, v0.t 1139; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t 1140; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1141; RV64-NEXT: vor.vv v8, v8, v16, v0.t 1142; RV64-NEXT: vor.vv v8, v8, v24, v0.t 1143; RV64-NEXT: addi a0, sp, 16 1144; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1145; RV64-NEXT: vor.vv v8, v16, v8, v0.t 1146; RV64-NEXT: csrr a0, vlenb 1147; RV64-NEXT: slli a0, a0, 3 1148; RV64-NEXT: add sp, sp, a0 1149; RV64-NEXT: .cfi_def_cfa sp, 16 1150; RV64-NEXT: addi sp, sp, 16 1151; RV64-NEXT: .cfi_def_cfa_offset 0 1152; RV64-NEXT: ret 1153; 1154; CHECK-ZVKB-LABEL: vp_bswap_nxv7i64: 1155; CHECK-ZVKB: # %bb.0: 1156; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1157; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t 1158; CHECK-ZVKB-NEXT: ret 1159 %v = call <vscale x 7 x i64> @llvm.vp.bswap.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 %evl) 1160 ret <vscale x 7 x i64> %v 1161} 1162 1163define <vscale x 7 x i64> @vp_bswap_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32 zeroext %evl) { 1164; RV32-LABEL: vp_bswap_nxv7i64_unmasked: 1165; RV32: # %bb.0: 1166; RV32-NEXT: addi sp, sp, -16 1167; RV32-NEXT: .cfi_def_cfa_offset 16 1168; RV32-NEXT: csrr a1, vlenb 1169; RV32-NEXT: slli a1, a1, 4 1170; RV32-NEXT: sub sp, sp, a1 1171; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 1172; RV32-NEXT: lui a1, 1044480 1173; RV32-NEXT: li a2, 56 1174; RV32-NEXT: lui a3, 16 1175; RV32-NEXT: li a4, 40 1176; RV32-NEXT: lui a5, 4080 1177; RV32-NEXT: addi a6, sp, 8 1178; RV32-NEXT: sw a1, 8(sp) 1179; RV32-NEXT: sw zero, 12(sp) 1180; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1181; RV32-NEXT: vsll.vx v24, v8, a2 1182; RV32-NEXT: addi a0, a3, -256 1183; RV32-NEXT: vsrl.vx v16, v8, a2 1184; RV32-NEXT: vsrl.vx v0, v8, a4 1185; RV32-NEXT: vand.vx v0, v0, a0 1186; RV32-NEXT: vor.vv v16, v0, v16 1187; RV32-NEXT: csrr a1, vlenb 1188; RV32-NEXT: slli a1, a1, 3 1189; RV32-NEXT: add a1, sp, a1 1190; RV32-NEXT: addi a1, a1, 16 1191; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 1192; RV32-NEXT: vand.vx v0, v8, a0 1193; RV32-NEXT: vsll.vx v0, v0, a4 1194; RV32-NEXT: vor.vv v16, v24, v0 1195; RV32-NEXT: addi a0, sp, 16 1196; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 1197; RV32-NEXT: vlse64.v v0, (a6), zero 1198; RV32-NEXT: vsrl.vi v16, v8, 24 1199; RV32-NEXT: vand.vx v16, v16, a5 1200; RV32-NEXT: vsrl.vi v24, v8, 8 1201; RV32-NEXT: vand.vv v24, v24, v0 1202; RV32-NEXT: vor.vv v16, v24, v16 1203; RV32-NEXT: vand.vv v24, v8, v0 1204; RV32-NEXT: vand.vx v8, v8, a5 1205; RV32-NEXT: vsll.vi v8, v8, 24 1206; RV32-NEXT: vsll.vi v24, v24, 8 1207; RV32-NEXT: vor.vv v8, v8, v24 1208; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 1209; RV32-NEXT: vor.vv v8, v24, v8 1210; RV32-NEXT: csrr a0, vlenb 1211; RV32-NEXT: slli a0, a0, 3 1212; RV32-NEXT: add a0, sp, a0 1213; RV32-NEXT: addi a0, a0, 16 1214; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 1215; RV32-NEXT: vor.vv v16, v16, v24 1216; RV32-NEXT: vor.vv v8, v8, v16 1217; RV32-NEXT: csrr a0, vlenb 1218; RV32-NEXT: slli a0, a0, 4 1219; RV32-NEXT: add sp, sp, a0 1220; RV32-NEXT: .cfi_def_cfa sp, 16 1221; RV32-NEXT: addi sp, sp, 16 1222; RV32-NEXT: .cfi_def_cfa_offset 0 1223; RV32-NEXT: ret 1224; 1225; RV64-LABEL: vp_bswap_nxv7i64_unmasked: 1226; RV64: # %bb.0: 1227; RV64-NEXT: addi sp, sp, -16 1228; RV64-NEXT: .cfi_def_cfa_offset 16 1229; RV64-NEXT: csrr a1, vlenb 1230; RV64-NEXT: slli a1, a1, 3 1231; RV64-NEXT: sub sp, sp, a1 1232; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 1233; RV64-NEXT: lui a1, 4080 1234; RV64-NEXT: li a2, 255 1235; RV64-NEXT: li a3, 56 1236; RV64-NEXT: lui a4, 16 1237; RV64-NEXT: li a5, 40 1238; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1239; RV64-NEXT: vsrl.vi v24, v8, 24 1240; RV64-NEXT: addiw a0, a4, -256 1241; RV64-NEXT: vsrl.vx v16, v8, a3 1242; RV64-NEXT: vsrl.vx v0, v8, a5 1243; RV64-NEXT: vand.vx v0, v0, a0 1244; RV64-NEXT: vor.vv v16, v0, v16 1245; RV64-NEXT: addi a4, sp, 16 1246; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 1247; RV64-NEXT: vsrl.vi v0, v8, 8 1248; RV64-NEXT: slli a2, a2, 24 1249; RV64-NEXT: vand.vx v24, v24, a1 1250; RV64-NEXT: vand.vx v0, v0, a2 1251; RV64-NEXT: vor.vv v24, v0, v24 1252; RV64-NEXT: vand.vx v0, v8, a1 1253; RV64-NEXT: vsll.vi v0, v0, 24 1254; RV64-NEXT: vand.vx v16, v8, a2 1255; RV64-NEXT: vsll.vi v16, v16, 8 1256; RV64-NEXT: vor.vv v16, v0, v16 1257; RV64-NEXT: vsll.vx v0, v8, a3 1258; RV64-NEXT: vand.vx v8, v8, a0 1259; RV64-NEXT: vsll.vx v8, v8, a5 1260; RV64-NEXT: vor.vv v8, v0, v8 1261; RV64-NEXT: vor.vv v8, v8, v16 1262; RV64-NEXT: addi a0, sp, 16 1263; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1264; RV64-NEXT: vor.vv v16, v24, v16 1265; RV64-NEXT: vor.vv v8, v8, v16 1266; RV64-NEXT: csrr a0, vlenb 1267; RV64-NEXT: slli a0, a0, 3 1268; RV64-NEXT: add sp, sp, a0 1269; RV64-NEXT: .cfi_def_cfa sp, 16 1270; RV64-NEXT: addi sp, sp, 16 1271; RV64-NEXT: .cfi_def_cfa_offset 0 1272; RV64-NEXT: ret 1273; 1274; CHECK-ZVKB-LABEL: vp_bswap_nxv7i64_unmasked: 1275; CHECK-ZVKB: # %bb.0: 1276; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1277; CHECK-ZVKB-NEXT: vrev8.v v8, v8 1278; CHECK-ZVKB-NEXT: ret 1279 %v = call <vscale x 7 x i64> @llvm.vp.bswap.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> splat (i1 true), i32 %evl) 1280 ret <vscale x 7 x i64> %v 1281} 1282 1283declare <vscale x 8 x i64> @llvm.vp.bswap.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32) 1284 1285define <vscale x 8 x i64> @vp_bswap_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1286; RV32-LABEL: vp_bswap_nxv8i64: 1287; RV32: # %bb.0: 1288; RV32-NEXT: addi sp, sp, -16 1289; RV32-NEXT: .cfi_def_cfa_offset 16 1290; RV32-NEXT: csrr a1, vlenb 1291; RV32-NEXT: li a2, 24 1292; RV32-NEXT: mul a1, a1, a2 1293; RV32-NEXT: sub sp, sp, a1 1294; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb 1295; RV32-NEXT: lui a1, 1044480 1296; RV32-NEXT: li a2, 56 1297; RV32-NEXT: lui a3, 16 1298; RV32-NEXT: li a4, 40 1299; RV32-NEXT: addi a5, sp, 8 1300; RV32-NEXT: sw a1, 8(sp) 1301; RV32-NEXT: sw zero, 12(sp) 1302; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1303; RV32-NEXT: vsll.vx v16, v8, a2, v0.t 1304; RV32-NEXT: addi a0, a3, -256 1305; RV32-NEXT: vand.vx v24, v8, a0, v0.t 1306; RV32-NEXT: vsll.vx v24, v24, a4, v0.t 1307; RV32-NEXT: vor.vv v16, v16, v24, v0.t 1308; RV32-NEXT: csrr a1, vlenb 1309; RV32-NEXT: slli a1, a1, 4 1310; RV32-NEXT: add a1, sp, a1 1311; RV32-NEXT: addi a1, a1, 16 1312; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 1313; RV32-NEXT: vlse64.v v16, (a5), zero 1314; RV32-NEXT: csrr a1, vlenb 1315; RV32-NEXT: slli a1, a1, 3 1316; RV32-NEXT: add a1, sp, a1 1317; RV32-NEXT: addi a1, a1, 16 1318; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 1319; RV32-NEXT: lui a1, 4080 1320; RV32-NEXT: vand.vx v24, v8, a1, v0.t 1321; RV32-NEXT: vsll.vi v24, v24, 24, v0.t 1322; RV32-NEXT: addi a3, sp, 16 1323; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill 1324; RV32-NEXT: vand.vv v24, v8, v16, v0.t 1325; RV32-NEXT: vsll.vi v16, v24, 8, v0.t 1326; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload 1327; RV32-NEXT: vor.vv v16, v24, v16, v0.t 1328; RV32-NEXT: csrr a3, vlenb 1329; RV32-NEXT: slli a3, a3, 4 1330; RV32-NEXT: add a3, sp, a3 1331; RV32-NEXT: addi a3, a3, 16 1332; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload 1333; RV32-NEXT: vor.vv v16, v24, v16, v0.t 1334; RV32-NEXT: csrr a3, vlenb 1335; RV32-NEXT: slli a3, a3, 4 1336; RV32-NEXT: add a3, sp, a3 1337; RV32-NEXT: addi a3, a3, 16 1338; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 1339; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t 1340; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t 1341; RV32-NEXT: vand.vx v24, v24, a0, v0.t 1342; RV32-NEXT: vor.vv v16, v24, v16, v0.t 1343; RV32-NEXT: addi a0, sp, 16 1344; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 1345; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t 1346; RV32-NEXT: vand.vx v24, v24, a1, v0.t 1347; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t 1348; RV32-NEXT: csrr a0, vlenb 1349; RV32-NEXT: slli a0, a0, 3 1350; RV32-NEXT: add a0, sp, a0 1351; RV32-NEXT: addi a0, a0, 16 1352; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1353; RV32-NEXT: vand.vv v8, v8, v16, v0.t 1354; RV32-NEXT: vor.vv v8, v8, v24, v0.t 1355; RV32-NEXT: addi a0, sp, 16 1356; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1357; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1358; RV32-NEXT: csrr a0, vlenb 1359; RV32-NEXT: slli a0, a0, 4 1360; RV32-NEXT: add a0, sp, a0 1361; RV32-NEXT: addi a0, a0, 16 1362; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1363; RV32-NEXT: vor.vv v8, v16, v8, v0.t 1364; RV32-NEXT: csrr a0, vlenb 1365; RV32-NEXT: li a1, 24 1366; RV32-NEXT: mul a0, a0, a1 1367; RV32-NEXT: add sp, sp, a0 1368; RV32-NEXT: .cfi_def_cfa sp, 16 1369; RV32-NEXT: addi sp, sp, 16 1370; RV32-NEXT: .cfi_def_cfa_offset 0 1371; RV32-NEXT: ret 1372; 1373; RV64-LABEL: vp_bswap_nxv8i64: 1374; RV64: # %bb.0: 1375; RV64-NEXT: addi sp, sp, -16 1376; RV64-NEXT: .cfi_def_cfa_offset 16 1377; RV64-NEXT: csrr a1, vlenb 1378; RV64-NEXT: slli a1, a1, 3 1379; RV64-NEXT: sub sp, sp, a1 1380; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 1381; RV64-NEXT: lui a1, 4080 1382; RV64-NEXT: li a2, 255 1383; RV64-NEXT: li a3, 56 1384; RV64-NEXT: lui a4, 16 1385; RV64-NEXT: li a5, 40 1386; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1387; RV64-NEXT: vand.vx v16, v8, a1, v0.t 1388; RV64-NEXT: slli a2, a2, 24 1389; RV64-NEXT: addiw a0, a4, -256 1390; RV64-NEXT: vsll.vi v16, v16, 24, v0.t 1391; RV64-NEXT: vand.vx v24, v8, a2, v0.t 1392; RV64-NEXT: vsll.vi v24, v24, 8, v0.t 1393; RV64-NEXT: vor.vv v16, v16, v24, v0.t 1394; RV64-NEXT: addi a4, sp, 16 1395; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 1396; RV64-NEXT: vsll.vx v24, v8, a3, v0.t 1397; RV64-NEXT: vand.vx v16, v8, a0, v0.t 1398; RV64-NEXT: vsll.vx v16, v16, a5, v0.t 1399; RV64-NEXT: vor.vv v16, v24, v16, v0.t 1400; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload 1401; RV64-NEXT: vor.vv v16, v16, v24, v0.t 1402; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 1403; RV64-NEXT: vsrl.vx v24, v8, a3, v0.t 1404; RV64-NEXT: vsrl.vx v16, v8, a5, v0.t 1405; RV64-NEXT: vand.vx v16, v16, a0, v0.t 1406; RV64-NEXT: vor.vv v24, v16, v24, v0.t 1407; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t 1408; RV64-NEXT: vand.vx v16, v16, a1, v0.t 1409; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t 1410; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1411; RV64-NEXT: vor.vv v8, v8, v16, v0.t 1412; RV64-NEXT: vor.vv v8, v8, v24, v0.t 1413; RV64-NEXT: addi a0, sp, 16 1414; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1415; RV64-NEXT: vor.vv v8, v16, v8, v0.t 1416; RV64-NEXT: csrr a0, vlenb 1417; RV64-NEXT: slli a0, a0, 3 1418; RV64-NEXT: add sp, sp, a0 1419; RV64-NEXT: .cfi_def_cfa sp, 16 1420; RV64-NEXT: addi sp, sp, 16 1421; RV64-NEXT: .cfi_def_cfa_offset 0 1422; RV64-NEXT: ret 1423; 1424; CHECK-ZVKB-LABEL: vp_bswap_nxv8i64: 1425; CHECK-ZVKB: # %bb.0: 1426; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1427; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t 1428; CHECK-ZVKB-NEXT: ret 1429 %v = call <vscale x 8 x i64> @llvm.vp.bswap.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 %evl) 1430 ret <vscale x 8 x i64> %v 1431} 1432 1433define <vscale x 8 x i64> @vp_bswap_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) { 1434; RV32-LABEL: vp_bswap_nxv8i64_unmasked: 1435; RV32: # %bb.0: 1436; RV32-NEXT: addi sp, sp, -16 1437; RV32-NEXT: .cfi_def_cfa_offset 16 1438; RV32-NEXT: csrr a1, vlenb 1439; RV32-NEXT: slli a1, a1, 4 1440; RV32-NEXT: sub sp, sp, a1 1441; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 1442; RV32-NEXT: lui a1, 1044480 1443; RV32-NEXT: li a2, 56 1444; RV32-NEXT: lui a3, 16 1445; RV32-NEXT: li a4, 40 1446; RV32-NEXT: lui a5, 4080 1447; RV32-NEXT: addi a6, sp, 8 1448; RV32-NEXT: sw a1, 8(sp) 1449; RV32-NEXT: sw zero, 12(sp) 1450; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1451; RV32-NEXT: vsll.vx v24, v8, a2 1452; RV32-NEXT: addi a0, a3, -256 1453; RV32-NEXT: vsrl.vx v16, v8, a2 1454; RV32-NEXT: vsrl.vx v0, v8, a4 1455; RV32-NEXT: vand.vx v0, v0, a0 1456; RV32-NEXT: vor.vv v16, v0, v16 1457; RV32-NEXT: csrr a1, vlenb 1458; RV32-NEXT: slli a1, a1, 3 1459; RV32-NEXT: add a1, sp, a1 1460; RV32-NEXT: addi a1, a1, 16 1461; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 1462; RV32-NEXT: vand.vx v0, v8, a0 1463; RV32-NEXT: vsll.vx v0, v0, a4 1464; RV32-NEXT: vor.vv v16, v24, v0 1465; RV32-NEXT: addi a0, sp, 16 1466; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 1467; RV32-NEXT: vlse64.v v0, (a6), zero 1468; RV32-NEXT: vsrl.vi v16, v8, 24 1469; RV32-NEXT: vand.vx v16, v16, a5 1470; RV32-NEXT: vsrl.vi v24, v8, 8 1471; RV32-NEXT: vand.vv v24, v24, v0 1472; RV32-NEXT: vor.vv v16, v24, v16 1473; RV32-NEXT: vand.vv v24, v8, v0 1474; RV32-NEXT: vand.vx v8, v8, a5 1475; RV32-NEXT: vsll.vi v8, v8, 24 1476; RV32-NEXT: vsll.vi v24, v24, 8 1477; RV32-NEXT: vor.vv v8, v8, v24 1478; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 1479; RV32-NEXT: vor.vv v8, v24, v8 1480; RV32-NEXT: csrr a0, vlenb 1481; RV32-NEXT: slli a0, a0, 3 1482; RV32-NEXT: add a0, sp, a0 1483; RV32-NEXT: addi a0, a0, 16 1484; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 1485; RV32-NEXT: vor.vv v16, v16, v24 1486; RV32-NEXT: vor.vv v8, v8, v16 1487; RV32-NEXT: csrr a0, vlenb 1488; RV32-NEXT: slli a0, a0, 4 1489; RV32-NEXT: add sp, sp, a0 1490; RV32-NEXT: .cfi_def_cfa sp, 16 1491; RV32-NEXT: addi sp, sp, 16 1492; RV32-NEXT: .cfi_def_cfa_offset 0 1493; RV32-NEXT: ret 1494; 1495; RV64-LABEL: vp_bswap_nxv8i64_unmasked: 1496; RV64: # %bb.0: 1497; RV64-NEXT: addi sp, sp, -16 1498; RV64-NEXT: .cfi_def_cfa_offset 16 1499; RV64-NEXT: csrr a1, vlenb 1500; RV64-NEXT: slli a1, a1, 3 1501; RV64-NEXT: sub sp, sp, a1 1502; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 1503; RV64-NEXT: lui a1, 4080 1504; RV64-NEXT: li a2, 255 1505; RV64-NEXT: li a3, 56 1506; RV64-NEXT: lui a4, 16 1507; RV64-NEXT: li a5, 40 1508; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1509; RV64-NEXT: vsrl.vi v24, v8, 24 1510; RV64-NEXT: addiw a0, a4, -256 1511; RV64-NEXT: vsrl.vx v16, v8, a3 1512; RV64-NEXT: vsrl.vx v0, v8, a5 1513; RV64-NEXT: vand.vx v0, v0, a0 1514; RV64-NEXT: vor.vv v16, v0, v16 1515; RV64-NEXT: addi a4, sp, 16 1516; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 1517; RV64-NEXT: vsrl.vi v0, v8, 8 1518; RV64-NEXT: slli a2, a2, 24 1519; RV64-NEXT: vand.vx v24, v24, a1 1520; RV64-NEXT: vand.vx v0, v0, a2 1521; RV64-NEXT: vor.vv v24, v0, v24 1522; RV64-NEXT: vand.vx v0, v8, a1 1523; RV64-NEXT: vsll.vi v0, v0, 24 1524; RV64-NEXT: vand.vx v16, v8, a2 1525; RV64-NEXT: vsll.vi v16, v16, 8 1526; RV64-NEXT: vor.vv v16, v0, v16 1527; RV64-NEXT: vsll.vx v0, v8, a3 1528; RV64-NEXT: vand.vx v8, v8, a0 1529; RV64-NEXT: vsll.vx v8, v8, a5 1530; RV64-NEXT: vor.vv v8, v0, v8 1531; RV64-NEXT: vor.vv v8, v8, v16 1532; RV64-NEXT: addi a0, sp, 16 1533; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1534; RV64-NEXT: vor.vv v16, v24, v16 1535; RV64-NEXT: vor.vv v8, v8, v16 1536; RV64-NEXT: csrr a0, vlenb 1537; RV64-NEXT: slli a0, a0, 3 1538; RV64-NEXT: add sp, sp, a0 1539; RV64-NEXT: .cfi_def_cfa sp, 16 1540; RV64-NEXT: addi sp, sp, 16 1541; RV64-NEXT: .cfi_def_cfa_offset 0 1542; RV64-NEXT: ret 1543; 1544; CHECK-ZVKB-LABEL: vp_bswap_nxv8i64_unmasked: 1545; CHECK-ZVKB: # %bb.0: 1546; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1547; CHECK-ZVKB-NEXT: vrev8.v v8, v8 1548; CHECK-ZVKB-NEXT: ret 1549 %v = call <vscale x 8 x i64> @llvm.vp.bswap.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 1550 ret <vscale x 8 x i64> %v 1551} 1552 1553; Test splitting. Use i16 version for easier check. 1554declare <vscale x 64 x i16> @llvm.vp.bswap.nxv64i16(<vscale x 64 x i16>, <vscale x 64 x i1>, i32) 1555 1556define <vscale x 64 x i16> @vp_bswap_nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) { 1557; CHECK-LABEL: vp_bswap_nxv64i16: 1558; CHECK: # %bb.0: 1559; CHECK-NEXT: addi sp, sp, -16 1560; CHECK-NEXT: .cfi_def_cfa_offset 16 1561; CHECK-NEXT: csrr a1, vlenb 1562; CHECK-NEXT: slli a1, a1, 4 1563; CHECK-NEXT: sub sp, sp, a1 1564; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 1565; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma 1566; CHECK-NEXT: vmv1r.v v24, v0 1567; CHECK-NEXT: csrr a1, vlenb 1568; CHECK-NEXT: slli a1, a1, 3 1569; CHECK-NEXT: add a1, sp, a1 1570; CHECK-NEXT: addi a1, a1, 16 1571; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 1572; CHECK-NEXT: csrr a1, vlenb 1573; CHECK-NEXT: srli a2, a1, 1 1574; CHECK-NEXT: slli a1, a1, 2 1575; CHECK-NEXT: vslidedown.vx v0, v0, a2 1576; CHECK-NEXT: sub a2, a0, a1 1577; CHECK-NEXT: sltu a3, a0, a2 1578; CHECK-NEXT: addi a3, a3, -1 1579; CHECK-NEXT: and a2, a3, a2 1580; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma 1581; CHECK-NEXT: vsrl.vi v8, v16, 8, v0.t 1582; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t 1583; CHECK-NEXT: vor.vv v8, v16, v8, v0.t 1584; CHECK-NEXT: addi a2, sp, 16 1585; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 1586; CHECK-NEXT: bltu a0, a1, .LBB32_2 1587; CHECK-NEXT: # %bb.1: 1588; CHECK-NEXT: mv a0, a1 1589; CHECK-NEXT: .LBB32_2: 1590; CHECK-NEXT: vmv1r.v v0, v24 1591; CHECK-NEXT: csrr a1, vlenb 1592; CHECK-NEXT: slli a1, a1, 3 1593; CHECK-NEXT: add a1, sp, a1 1594; CHECK-NEXT: addi a1, a1, 16 1595; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 1596; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 1597; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t 1598; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t 1599; CHECK-NEXT: vor.vv v8, v8, v16, v0.t 1600; CHECK-NEXT: addi a0, sp, 16 1601; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1602; CHECK-NEXT: csrr a0, vlenb 1603; CHECK-NEXT: slli a0, a0, 4 1604; CHECK-NEXT: add sp, sp, a0 1605; CHECK-NEXT: .cfi_def_cfa sp, 16 1606; CHECK-NEXT: addi sp, sp, 16 1607; CHECK-NEXT: .cfi_def_cfa_offset 0 1608; CHECK-NEXT: ret 1609; 1610; CHECK-ZVKB-LABEL: vp_bswap_nxv64i16: 1611; CHECK-ZVKB: # %bb.0: 1612; CHECK-ZVKB-NEXT: vsetvli a1, zero, e8, m1, ta, ma 1613; CHECK-ZVKB-NEXT: vmv1r.v v24, v0 1614; CHECK-ZVKB-NEXT: csrr a1, vlenb 1615; CHECK-ZVKB-NEXT: srli a2, a1, 1 1616; CHECK-ZVKB-NEXT: slli a1, a1, 2 1617; CHECK-ZVKB-NEXT: vslidedown.vx v0, v0, a2 1618; CHECK-ZVKB-NEXT: sub a2, a0, a1 1619; CHECK-ZVKB-NEXT: sltu a3, a0, a2 1620; CHECK-ZVKB-NEXT: addi a3, a3, -1 1621; CHECK-ZVKB-NEXT: and a2, a3, a2 1622; CHECK-ZVKB-NEXT: vsetvli zero, a2, e16, m8, ta, ma 1623; CHECK-ZVKB-NEXT: vrev8.v v16, v16, v0.t 1624; CHECK-ZVKB-NEXT: bltu a0, a1, .LBB32_2 1625; CHECK-ZVKB-NEXT: # %bb.1: 1626; CHECK-ZVKB-NEXT: mv a0, a1 1627; CHECK-ZVKB-NEXT: .LBB32_2: 1628; CHECK-ZVKB-NEXT: vmv1r.v v0, v24 1629; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m8, ta, ma 1630; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t 1631; CHECK-ZVKB-NEXT: ret 1632 %v = call <vscale x 64 x i16> @llvm.vp.bswap.nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 %evl) 1633 ret <vscale x 64 x i16> %v 1634} 1635 1636define <vscale x 64 x i16> @vp_bswap_nxv64i16_unmasked(<vscale x 64 x i16> %va, i32 zeroext %evl) { 1637; CHECK-LABEL: vp_bswap_nxv64i16_unmasked: 1638; CHECK: # %bb.0: 1639; CHECK-NEXT: csrr a1, vlenb 1640; CHECK-NEXT: slli a1, a1, 2 1641; CHECK-NEXT: sub a2, a0, a1 1642; CHECK-NEXT: sltu a3, a0, a2 1643; CHECK-NEXT: addi a3, a3, -1 1644; CHECK-NEXT: and a2, a3, a2 1645; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma 1646; CHECK-NEXT: vsrl.vi v24, v16, 8 1647; CHECK-NEXT: vsll.vi v16, v16, 8 1648; CHECK-NEXT: vor.vv v16, v16, v24 1649; CHECK-NEXT: bltu a0, a1, .LBB33_2 1650; CHECK-NEXT: # %bb.1: 1651; CHECK-NEXT: mv a0, a1 1652; CHECK-NEXT: .LBB33_2: 1653; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 1654; CHECK-NEXT: vsrl.vi v24, v8, 8 1655; CHECK-NEXT: vsll.vi v8, v8, 8 1656; CHECK-NEXT: vor.vv v8, v8, v24 1657; CHECK-NEXT: ret 1658; 1659; CHECK-ZVKB-LABEL: vp_bswap_nxv64i16_unmasked: 1660; CHECK-ZVKB: # %bb.0: 1661; CHECK-ZVKB-NEXT: csrr a1, vlenb 1662; CHECK-ZVKB-NEXT: slli a1, a1, 2 1663; CHECK-ZVKB-NEXT: sub a2, a0, a1 1664; CHECK-ZVKB-NEXT: sltu a3, a0, a2 1665; CHECK-ZVKB-NEXT: addi a3, a3, -1 1666; CHECK-ZVKB-NEXT: and a2, a3, a2 1667; CHECK-ZVKB-NEXT: vsetvli zero, a2, e16, m8, ta, ma 1668; CHECK-ZVKB-NEXT: vrev8.v v16, v16 1669; CHECK-ZVKB-NEXT: bltu a0, a1, .LBB33_2 1670; CHECK-ZVKB-NEXT: # %bb.1: 1671; CHECK-ZVKB-NEXT: mv a0, a1 1672; CHECK-ZVKB-NEXT: .LBB33_2: 1673; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m8, ta, ma 1674; CHECK-ZVKB-NEXT: vrev8.v v8, v8 1675; CHECK-ZVKB-NEXT: ret 1676 %v = call <vscale x 64 x i16> @llvm.vp.bswap.nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> splat (i1 true), i32 %evl) 1677 ret <vscale x 64 x i16> %v 1678} 1679 1680; Test promotion. 1681declare <vscale x 1 x i48> @llvm.vp.bswap.nxv1i48(<vscale x 1 x i48>, <vscale x 1 x i1>, i32) 1682define <vscale x 1 x i48> @vp_bswap_nxv1i48(<vscale x 1 x i48> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1683; RV32-LABEL: vp_bswap_nxv1i48: 1684; RV32: # %bb.0: 1685; RV32-NEXT: addi sp, sp, -16 1686; RV32-NEXT: .cfi_def_cfa_offset 16 1687; RV32-NEXT: lui a1, 1044480 1688; RV32-NEXT: li a2, 56 1689; RV32-NEXT: lui a3, 16 1690; RV32-NEXT: li a4, 40 1691; RV32-NEXT: lui a5, 4080 1692; RV32-NEXT: addi a6, sp, 8 1693; RV32-NEXT: sw a1, 8(sp) 1694; RV32-NEXT: sw zero, 12(sp) 1695; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1696; RV32-NEXT: vsll.vx v9, v8, a2, v0.t 1697; RV32-NEXT: addi a0, a3, -256 1698; RV32-NEXT: vand.vx v10, v8, a0, v0.t 1699; RV32-NEXT: vlse64.v v11, (a6), zero 1700; RV32-NEXT: vsll.vx v10, v10, a4, v0.t 1701; RV32-NEXT: vor.vv v9, v9, v10, v0.t 1702; RV32-NEXT: vand.vx v10, v8, a5, v0.t 1703; RV32-NEXT: vsll.vi v10, v10, 24, v0.t 1704; RV32-NEXT: vand.vv v12, v8, v11, v0.t 1705; RV32-NEXT: vsll.vi v12, v12, 8, v0.t 1706; RV32-NEXT: vor.vv v10, v10, v12, v0.t 1707; RV32-NEXT: vor.vv v9, v9, v10, v0.t 1708; RV32-NEXT: vsrl.vx v10, v8, a2, v0.t 1709; RV32-NEXT: vsrl.vx v12, v8, a4, v0.t 1710; RV32-NEXT: vand.vx v12, v12, a0, v0.t 1711; RV32-NEXT: vor.vv v10, v12, v10, v0.t 1712; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t 1713; RV32-NEXT: vand.vx v12, v12, a5, v0.t 1714; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t 1715; RV32-NEXT: vand.vv v8, v8, v11, v0.t 1716; RV32-NEXT: vor.vv v8, v8, v12, v0.t 1717; RV32-NEXT: vor.vv v8, v8, v10, v0.t 1718; RV32-NEXT: vor.vv v8, v9, v8, v0.t 1719; RV32-NEXT: vsrl.vi v8, v8, 16, v0.t 1720; RV32-NEXT: addi sp, sp, 16 1721; RV32-NEXT: .cfi_def_cfa_offset 0 1722; RV32-NEXT: ret 1723; 1724; RV64-LABEL: vp_bswap_nxv1i48: 1725; RV64: # %bb.0: 1726; RV64-NEXT: lui a1, 4080 1727; RV64-NEXT: li a2, 255 1728; RV64-NEXT: li a3, 56 1729; RV64-NEXT: lui a4, 16 1730; RV64-NEXT: li a5, 40 1731; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1732; RV64-NEXT: vand.vx v9, v8, a1, v0.t 1733; RV64-NEXT: slli a2, a2, 24 1734; RV64-NEXT: addiw a0, a4, -256 1735; RV64-NEXT: vsll.vi v9, v9, 24, v0.t 1736; RV64-NEXT: vand.vx v10, v8, a2, v0.t 1737; RV64-NEXT: vsll.vi v10, v10, 8, v0.t 1738; RV64-NEXT: vor.vv v9, v9, v10, v0.t 1739; RV64-NEXT: vsll.vx v10, v8, a3, v0.t 1740; RV64-NEXT: vand.vx v11, v8, a0, v0.t 1741; RV64-NEXT: vsll.vx v11, v11, a5, v0.t 1742; RV64-NEXT: vor.vv v10, v10, v11, v0.t 1743; RV64-NEXT: vor.vv v9, v10, v9, v0.t 1744; RV64-NEXT: vsrl.vx v10, v8, a3, v0.t 1745; RV64-NEXT: vsrl.vx v11, v8, a5, v0.t 1746; RV64-NEXT: vand.vx v11, v11, a0, v0.t 1747; RV64-NEXT: vor.vv v10, v11, v10, v0.t 1748; RV64-NEXT: vsrl.vi v11, v8, 24, v0.t 1749; RV64-NEXT: vand.vx v11, v11, a1, v0.t 1750; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t 1751; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1752; RV64-NEXT: vor.vv v8, v8, v11, v0.t 1753; RV64-NEXT: vor.vv v8, v8, v10, v0.t 1754; RV64-NEXT: vor.vv v8, v9, v8, v0.t 1755; RV64-NEXT: vsrl.vi v8, v8, 16, v0.t 1756; RV64-NEXT: ret 1757; 1758; CHECK-ZVKB-LABEL: vp_bswap_nxv1i48: 1759; CHECK-ZVKB: # %bb.0: 1760; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1761; CHECK-ZVKB-NEXT: vrev8.v v8, v8, v0.t 1762; CHECK-ZVKB-NEXT: vsrl.vi v8, v8, 16, v0.t 1763; CHECK-ZVKB-NEXT: ret 1764 %v = call <vscale x 1 x i48> @llvm.vp.bswap.nxv1i48(<vscale x 1 x i48> %va, <vscale x 1 x i1> %m, i32 %evl) 1765 ret <vscale x 1 x i48> %v 1766} 1767