1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4; RUN: llc -mtriple=riscv32 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB 5; RUN: llc -mtriple=riscv64 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB 6 7define <vscale x 1 x i16> @bswap_nxv1i16(<vscale x 1 x i16> %va) { 8; CHECK-LABEL: bswap_nxv1i16: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 11; CHECK-NEXT: vsrl.vi v9, v8, 8 12; CHECK-NEXT: vsll.vi v8, v8, 8 13; CHECK-NEXT: vor.vv v8, v8, v9 14; CHECK-NEXT: ret 15; 16; CHECK-ZVKB-LABEL: bswap_nxv1i16: 17; CHECK-ZVKB: # %bb.0: 18; CHECK-ZVKB-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 19; CHECK-ZVKB-NEXT: vrev8.v v8, v8 20; CHECK-ZVKB-NEXT: ret 21 %a = call <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16> %va) 22 ret <vscale x 1 x i16> %a 23} 24declare <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16>) 25 26define <vscale x 2 x i16> @bswap_nxv2i16(<vscale x 2 x i16> %va) { 27; CHECK-LABEL: bswap_nxv2i16: 28; CHECK: # %bb.0: 29; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 30; CHECK-NEXT: vsrl.vi v9, v8, 8 31; CHECK-NEXT: vsll.vi v8, v8, 8 32; CHECK-NEXT: vor.vv v8, v8, v9 33; CHECK-NEXT: ret 34; 35; CHECK-ZVKB-LABEL: bswap_nxv2i16: 36; CHECK-ZVKB: # %bb.0: 37; CHECK-ZVKB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 38; CHECK-ZVKB-NEXT: vrev8.v v8, v8 39; CHECK-ZVKB-NEXT: ret 40 %a = call <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16> %va) 41 ret <vscale x 2 x i16> %a 42} 43declare <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16>) 44 45define <vscale x 4 x i16> @bswap_nxv4i16(<vscale x 4 x i16> %va) { 46; CHECK-LABEL: bswap_nxv4i16: 47; CHECK: # %bb.0: 48; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 49; CHECK-NEXT: vsrl.vi v9, v8, 8 50; CHECK-NEXT: vsll.vi v8, v8, 8 51; CHECK-NEXT: vor.vv v8, v8, v9 52; CHECK-NEXT: ret 53; 54; CHECK-ZVKB-LABEL: bswap_nxv4i16: 55; CHECK-ZVKB: # %bb.0: 56; CHECK-ZVKB-NEXT: vsetvli a0, zero, e16, m1, ta, ma 57; CHECK-ZVKB-NEXT: vrev8.v v8, v8 58; CHECK-ZVKB-NEXT: ret 59 %a = call <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16> %va) 60 ret <vscale x 4 x i16> %a 61} 62declare <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16>) 63 64define <vscale x 8 x i16> @bswap_nxv8i16(<vscale x 8 x i16> %va) { 65; CHECK-LABEL: bswap_nxv8i16: 66; CHECK: # %bb.0: 67; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma 68; CHECK-NEXT: vsrl.vi v10, v8, 8 69; CHECK-NEXT: vsll.vi v8, v8, 8 70; CHECK-NEXT: vor.vv v8, v8, v10 71; CHECK-NEXT: ret 72; 73; CHECK-ZVKB-LABEL: bswap_nxv8i16: 74; CHECK-ZVKB: # %bb.0: 75; CHECK-ZVKB-NEXT: vsetvli a0, zero, e16, m2, ta, ma 76; CHECK-ZVKB-NEXT: vrev8.v v8, v8 77; CHECK-ZVKB-NEXT: ret 78 %a = call <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16> %va) 79 ret <vscale x 8 x i16> %a 80} 81declare <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16>) 82 83define <vscale x 16 x i16> @bswap_nxv16i16(<vscale x 16 x i16> %va) { 84; CHECK-LABEL: bswap_nxv16i16: 85; CHECK: # %bb.0: 86; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma 87; CHECK-NEXT: vsrl.vi v12, v8, 8 88; CHECK-NEXT: vsll.vi v8, v8, 8 89; CHECK-NEXT: vor.vv v8, v8, v12 90; CHECK-NEXT: ret 91; 92; CHECK-ZVKB-LABEL: bswap_nxv16i16: 93; CHECK-ZVKB: # %bb.0: 94; CHECK-ZVKB-NEXT: vsetvli a0, zero, e16, m4, ta, ma 95; CHECK-ZVKB-NEXT: vrev8.v v8, v8 96; CHECK-ZVKB-NEXT: ret 97 %a = call <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16> %va) 98 ret <vscale x 16 x i16> %a 99} 100declare <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16>) 101 102define <vscale x 32 x i16> @bswap_nxv32i16(<vscale x 32 x i16> %va) { 103; CHECK-LABEL: bswap_nxv32i16: 104; CHECK: # %bb.0: 105; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma 106; CHECK-NEXT: vsrl.vi v16, v8, 8 107; CHECK-NEXT: vsll.vi v8, v8, 8 108; CHECK-NEXT: vor.vv v8, v8, v16 109; CHECK-NEXT: ret 110; 111; CHECK-ZVKB-LABEL: bswap_nxv32i16: 112; CHECK-ZVKB: # %bb.0: 113; CHECK-ZVKB-NEXT: vsetvli a0, zero, e16, m8, ta, ma 114; CHECK-ZVKB-NEXT: vrev8.v v8, v8 115; CHECK-ZVKB-NEXT: ret 116 %a = call <vscale x 32 x i16> @llvm.bswap.nxv32i16(<vscale x 32 x i16> %va) 117 ret <vscale x 32 x i16> %a 118} 119declare <vscale x 32 x i16> @llvm.bswap.nxv32i16(<vscale x 32 x i16>) 120 121define <vscale x 1 x i32> @bswap_nxv1i32(<vscale x 1 x i32> %va) { 122; CHECK-LABEL: bswap_nxv1i32: 123; CHECK: # %bb.0: 124; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 125; CHECK-NEXT: vsrl.vi v9, v8, 8 126; CHECK-NEXT: lui a0, 16 127; CHECK-NEXT: vsrl.vi v10, v8, 24 128; CHECK-NEXT: addi a0, a0, -256 129; CHECK-NEXT: vand.vx v9, v9, a0 130; CHECK-NEXT: vor.vv v9, v9, v10 131; CHECK-NEXT: vand.vx v10, v8, a0 132; CHECK-NEXT: vsll.vi v10, v10, 8 133; CHECK-NEXT: vsll.vi v8, v8, 24 134; CHECK-NEXT: vor.vv v8, v8, v10 135; CHECK-NEXT: vor.vv v8, v8, v9 136; CHECK-NEXT: ret 137; 138; CHECK-ZVKB-LABEL: bswap_nxv1i32: 139; CHECK-ZVKB: # %bb.0: 140; CHECK-ZVKB-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 141; CHECK-ZVKB-NEXT: vrev8.v v8, v8 142; CHECK-ZVKB-NEXT: ret 143 %a = call <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32> %va) 144 ret <vscale x 1 x i32> %a 145} 146declare <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32>) 147 148define <vscale x 2 x i32> @bswap_nxv2i32(<vscale x 2 x i32> %va) { 149; CHECK-LABEL: bswap_nxv2i32: 150; CHECK: # %bb.0: 151; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 152; CHECK-NEXT: vsrl.vi v9, v8, 8 153; CHECK-NEXT: lui a0, 16 154; CHECK-NEXT: vsrl.vi v10, v8, 24 155; CHECK-NEXT: addi a0, a0, -256 156; CHECK-NEXT: vand.vx v9, v9, a0 157; CHECK-NEXT: vor.vv v9, v9, v10 158; CHECK-NEXT: vand.vx v10, v8, a0 159; CHECK-NEXT: vsll.vi v10, v10, 8 160; CHECK-NEXT: vsll.vi v8, v8, 24 161; CHECK-NEXT: vor.vv v8, v8, v10 162; CHECK-NEXT: vor.vv v8, v8, v9 163; CHECK-NEXT: ret 164; 165; CHECK-ZVKB-LABEL: bswap_nxv2i32: 166; CHECK-ZVKB: # %bb.0: 167; CHECK-ZVKB-NEXT: vsetvli a0, zero, e32, m1, ta, ma 168; CHECK-ZVKB-NEXT: vrev8.v v8, v8 169; CHECK-ZVKB-NEXT: ret 170 %a = call <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32> %va) 171 ret <vscale x 2 x i32> %a 172} 173declare <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32>) 174 175define <vscale x 4 x i32> @bswap_nxv4i32(<vscale x 4 x i32> %va) { 176; CHECK-LABEL: bswap_nxv4i32: 177; CHECK: # %bb.0: 178; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 179; CHECK-NEXT: vsrl.vi v10, v8, 8 180; CHECK-NEXT: lui a0, 16 181; CHECK-NEXT: vsrl.vi v12, v8, 24 182; CHECK-NEXT: addi a0, a0, -256 183; CHECK-NEXT: vand.vx v10, v10, a0 184; CHECK-NEXT: vor.vv v10, v10, v12 185; CHECK-NEXT: vand.vx v12, v8, a0 186; CHECK-NEXT: vsll.vi v12, v12, 8 187; CHECK-NEXT: vsll.vi v8, v8, 24 188; CHECK-NEXT: vor.vv v8, v8, v12 189; CHECK-NEXT: vor.vv v8, v8, v10 190; CHECK-NEXT: ret 191; 192; CHECK-ZVKB-LABEL: bswap_nxv4i32: 193; CHECK-ZVKB: # %bb.0: 194; CHECK-ZVKB-NEXT: vsetvli a0, zero, e32, m2, ta, ma 195; CHECK-ZVKB-NEXT: vrev8.v v8, v8 196; CHECK-ZVKB-NEXT: ret 197 %a = call <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32> %va) 198 ret <vscale x 4 x i32> %a 199} 200declare <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32>) 201 202define <vscale x 8 x i32> @bswap_nxv8i32(<vscale x 8 x i32> %va) { 203; CHECK-LABEL: bswap_nxv8i32: 204; CHECK: # %bb.0: 205; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma 206; CHECK-NEXT: vsrl.vi v12, v8, 8 207; CHECK-NEXT: lui a0, 16 208; CHECK-NEXT: vsrl.vi v16, v8, 24 209; CHECK-NEXT: addi a0, a0, -256 210; CHECK-NEXT: vand.vx v12, v12, a0 211; CHECK-NEXT: vor.vv v12, v12, v16 212; CHECK-NEXT: vand.vx v16, v8, a0 213; CHECK-NEXT: vsll.vi v16, v16, 8 214; CHECK-NEXT: vsll.vi v8, v8, 24 215; CHECK-NEXT: vor.vv v8, v8, v16 216; CHECK-NEXT: vor.vv v8, v8, v12 217; CHECK-NEXT: ret 218; 219; CHECK-ZVKB-LABEL: bswap_nxv8i32: 220; CHECK-ZVKB: # %bb.0: 221; CHECK-ZVKB-NEXT: vsetvli a0, zero, e32, m4, ta, ma 222; CHECK-ZVKB-NEXT: vrev8.v v8, v8 223; CHECK-ZVKB-NEXT: ret 224 %a = call <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32> %va) 225 ret <vscale x 8 x i32> %a 226} 227declare <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32>) 228 229define <vscale x 16 x i32> @bswap_nxv16i32(<vscale x 16 x i32> %va) { 230; CHECK-LABEL: bswap_nxv16i32: 231; CHECK: # %bb.0: 232; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma 233; CHECK-NEXT: vsrl.vi v16, v8, 8 234; CHECK-NEXT: lui a0, 16 235; CHECK-NEXT: vsrl.vi v24, v8, 24 236; CHECK-NEXT: addi a0, a0, -256 237; CHECK-NEXT: vand.vx v16, v16, a0 238; CHECK-NEXT: vor.vv v16, v16, v24 239; CHECK-NEXT: vand.vx v24, v8, a0 240; CHECK-NEXT: vsll.vi v24, v24, 8 241; CHECK-NEXT: vsll.vi v8, v8, 24 242; CHECK-NEXT: vor.vv v8, v8, v24 243; CHECK-NEXT: vor.vv v8, v8, v16 244; CHECK-NEXT: ret 245; 246; CHECK-ZVKB-LABEL: bswap_nxv16i32: 247; CHECK-ZVKB: # %bb.0: 248; CHECK-ZVKB-NEXT: vsetvli a0, zero, e32, m8, ta, ma 249; CHECK-ZVKB-NEXT: vrev8.v v8, v8 250; CHECK-ZVKB-NEXT: ret 251 %a = call <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32> %va) 252 ret <vscale x 16 x i32> %a 253} 254declare <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32>) 255 256define <vscale x 1 x i64> @bswap_nxv1i64(<vscale x 1 x i64> %va) { 257; RV32-LABEL: bswap_nxv1i64: 258; RV32: # %bb.0: 259; RV32-NEXT: addi sp, sp, -16 260; RV32-NEXT: .cfi_def_cfa_offset 16 261; RV32-NEXT: lui a0, 1044480 262; RV32-NEXT: li a1, 56 263; RV32-NEXT: li a2, 40 264; RV32-NEXT: lui a3, 16 265; RV32-NEXT: vsetvli a4, zero, e64, m1, ta, ma 266; RV32-NEXT: vsrl.vi v9, v8, 24 267; RV32-NEXT: lui a4, 4080 268; RV32-NEXT: addi a5, sp, 8 269; RV32-NEXT: sw a0, 8(sp) 270; RV32-NEXT: sw zero, 12(sp) 271; RV32-NEXT: vsrl.vx v10, v8, a1 272; RV32-NEXT: vsrl.vx v11, v8, a2 273; RV32-NEXT: addi a0, a3, -256 274; RV32-NEXT: vsll.vx v12, v8, a1 275; RV32-NEXT: vand.vx v11, v11, a0 276; RV32-NEXT: vlse64.v v13, (a5), zero 277; RV32-NEXT: vor.vv v10, v11, v10 278; RV32-NEXT: vand.vx v11, v8, a0 279; RV32-NEXT: vsll.vx v11, v11, a2 280; RV32-NEXT: vor.vv v11, v12, v11 281; RV32-NEXT: vsrl.vi v12, v8, 8 282; RV32-NEXT: vand.vx v9, v9, a4 283; RV32-NEXT: vand.vv v12, v12, v13 284; RV32-NEXT: vor.vv v9, v12, v9 285; RV32-NEXT: vand.vv v12, v8, v13 286; RV32-NEXT: vand.vx v8, v8, a4 287; RV32-NEXT: vsll.vi v8, v8, 24 288; RV32-NEXT: vsll.vi v12, v12, 8 289; RV32-NEXT: vor.vv v9, v9, v10 290; RV32-NEXT: vor.vv v8, v8, v12 291; RV32-NEXT: vor.vv v8, v11, v8 292; RV32-NEXT: vor.vv v8, v8, v9 293; RV32-NEXT: addi sp, sp, 16 294; RV32-NEXT: .cfi_def_cfa_offset 0 295; RV32-NEXT: ret 296; 297; RV64-LABEL: bswap_nxv1i64: 298; RV64: # %bb.0: 299; RV64-NEXT: li a0, 56 300; RV64-NEXT: li a1, 40 301; RV64-NEXT: lui a2, 16 302; RV64-NEXT: vsetvli a3, zero, e64, m1, ta, ma 303; RV64-NEXT: vsrl.vi v9, v8, 24 304; RV64-NEXT: lui a3, 4080 305; RV64-NEXT: vsrl.vx v10, v8, a0 306; RV64-NEXT: vsrl.vx v11, v8, a1 307; RV64-NEXT: addiw a2, a2, -256 308; RV64-NEXT: vand.vx v11, v11, a2 309; RV64-NEXT: vor.vv v10, v11, v10 310; RV64-NEXT: vsrl.vi v11, v8, 8 311; RV64-NEXT: li a4, 255 312; RV64-NEXT: vand.vx v9, v9, a3 313; RV64-NEXT: slli a4, a4, 24 314; RV64-NEXT: vand.vx v11, v11, a4 315; RV64-NEXT: vor.vv v9, v11, v9 316; RV64-NEXT: vand.vx v11, v8, a3 317; RV64-NEXT: vsll.vi v11, v11, 24 318; RV64-NEXT: vor.vv v9, v9, v10 319; RV64-NEXT: vand.vx v10, v8, a4 320; RV64-NEXT: vsll.vi v10, v10, 8 321; RV64-NEXT: vor.vv v10, v11, v10 322; RV64-NEXT: vsll.vx v11, v8, a0 323; RV64-NEXT: vand.vx v8, v8, a2 324; RV64-NEXT: vsll.vx v8, v8, a1 325; RV64-NEXT: vor.vv v8, v11, v8 326; RV64-NEXT: vor.vv v8, v8, v10 327; RV64-NEXT: vor.vv v8, v8, v9 328; RV64-NEXT: ret 329; 330; CHECK-ZVKB-LABEL: bswap_nxv1i64: 331; CHECK-ZVKB: # %bb.0: 332; CHECK-ZVKB-NEXT: vsetvli a0, zero, e64, m1, ta, ma 333; CHECK-ZVKB-NEXT: vrev8.v v8, v8 334; CHECK-ZVKB-NEXT: ret 335 %a = call <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64> %va) 336 ret <vscale x 1 x i64> %a 337} 338declare <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64>) 339 340define <vscale x 2 x i64> @bswap_nxv2i64(<vscale x 2 x i64> %va) { 341; RV32-LABEL: bswap_nxv2i64: 342; RV32: # %bb.0: 343; RV32-NEXT: addi sp, sp, -16 344; RV32-NEXT: .cfi_def_cfa_offset 16 345; RV32-NEXT: lui a0, 1044480 346; RV32-NEXT: li a1, 56 347; RV32-NEXT: li a2, 40 348; RV32-NEXT: lui a3, 16 349; RV32-NEXT: vsetvli a4, zero, e64, m2, ta, ma 350; RV32-NEXT: vsrl.vi v10, v8, 24 351; RV32-NEXT: lui a4, 4080 352; RV32-NEXT: addi a5, sp, 8 353; RV32-NEXT: sw a0, 8(sp) 354; RV32-NEXT: sw zero, 12(sp) 355; RV32-NEXT: vsrl.vx v12, v8, a1 356; RV32-NEXT: vsrl.vx v14, v8, a2 357; RV32-NEXT: addi a0, a3, -256 358; RV32-NEXT: vsll.vx v16, v8, a1 359; RV32-NEXT: vand.vx v14, v14, a0 360; RV32-NEXT: vlse64.v v18, (a5), zero 361; RV32-NEXT: vor.vv v12, v14, v12 362; RV32-NEXT: vand.vx v14, v8, a0 363; RV32-NEXT: vsll.vx v14, v14, a2 364; RV32-NEXT: vor.vv v14, v16, v14 365; RV32-NEXT: vsrl.vi v16, v8, 8 366; RV32-NEXT: vand.vx v10, v10, a4 367; RV32-NEXT: vand.vv v16, v16, v18 368; RV32-NEXT: vor.vv v10, v16, v10 369; RV32-NEXT: vand.vv v16, v8, v18 370; RV32-NEXT: vand.vx v8, v8, a4 371; RV32-NEXT: vsll.vi v8, v8, 24 372; RV32-NEXT: vsll.vi v16, v16, 8 373; RV32-NEXT: vor.vv v10, v10, v12 374; RV32-NEXT: vor.vv v8, v8, v16 375; RV32-NEXT: vor.vv v8, v14, v8 376; RV32-NEXT: vor.vv v8, v8, v10 377; RV32-NEXT: addi sp, sp, 16 378; RV32-NEXT: .cfi_def_cfa_offset 0 379; RV32-NEXT: ret 380; 381; RV64-LABEL: bswap_nxv2i64: 382; RV64: # %bb.0: 383; RV64-NEXT: li a0, 56 384; RV64-NEXT: li a1, 40 385; RV64-NEXT: lui a2, 16 386; RV64-NEXT: vsetvli a3, zero, e64, m2, ta, ma 387; RV64-NEXT: vsrl.vi v10, v8, 24 388; RV64-NEXT: lui a3, 4080 389; RV64-NEXT: vsrl.vx v12, v8, a0 390; RV64-NEXT: vsrl.vx v14, v8, a1 391; RV64-NEXT: addiw a2, a2, -256 392; RV64-NEXT: vand.vx v14, v14, a2 393; RV64-NEXT: vor.vv v12, v14, v12 394; RV64-NEXT: vsrl.vi v14, v8, 8 395; RV64-NEXT: li a4, 255 396; RV64-NEXT: vand.vx v10, v10, a3 397; RV64-NEXT: slli a4, a4, 24 398; RV64-NEXT: vand.vx v14, v14, a4 399; RV64-NEXT: vor.vv v10, v14, v10 400; RV64-NEXT: vand.vx v14, v8, a3 401; RV64-NEXT: vsll.vi v14, v14, 24 402; RV64-NEXT: vor.vv v10, v10, v12 403; RV64-NEXT: vand.vx v12, v8, a4 404; RV64-NEXT: vsll.vi v12, v12, 8 405; RV64-NEXT: vor.vv v12, v14, v12 406; RV64-NEXT: vsll.vx v14, v8, a0 407; RV64-NEXT: vand.vx v8, v8, a2 408; RV64-NEXT: vsll.vx v8, v8, a1 409; RV64-NEXT: vor.vv v8, v14, v8 410; RV64-NEXT: vor.vv v8, v8, v12 411; RV64-NEXT: vor.vv v8, v8, v10 412; RV64-NEXT: ret 413; 414; CHECK-ZVKB-LABEL: bswap_nxv2i64: 415; CHECK-ZVKB: # %bb.0: 416; CHECK-ZVKB-NEXT: vsetvli a0, zero, e64, m2, ta, ma 417; CHECK-ZVKB-NEXT: vrev8.v v8, v8 418; CHECK-ZVKB-NEXT: ret 419 %a = call <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64> %va) 420 ret <vscale x 2 x i64> %a 421} 422declare <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64>) 423 424define <vscale x 4 x i64> @bswap_nxv4i64(<vscale x 4 x i64> %va) { 425; RV32-LABEL: bswap_nxv4i64: 426; RV32: # %bb.0: 427; RV32-NEXT: addi sp, sp, -16 428; RV32-NEXT: .cfi_def_cfa_offset 16 429; RV32-NEXT: lui a0, 1044480 430; RV32-NEXT: li a1, 56 431; RV32-NEXT: li a2, 40 432; RV32-NEXT: lui a3, 16 433; RV32-NEXT: vsetvli a4, zero, e64, m4, ta, ma 434; RV32-NEXT: vsrl.vi v12, v8, 24 435; RV32-NEXT: lui a4, 4080 436; RV32-NEXT: addi a5, sp, 8 437; RV32-NEXT: sw a0, 8(sp) 438; RV32-NEXT: sw zero, 12(sp) 439; RV32-NEXT: vsrl.vx v16, v8, a1 440; RV32-NEXT: vsrl.vx v20, v8, a2 441; RV32-NEXT: addi a0, a3, -256 442; RV32-NEXT: vsll.vx v24, v8, a1 443; RV32-NEXT: vand.vx v20, v20, a0 444; RV32-NEXT: vlse64.v v28, (a5), zero 445; RV32-NEXT: vor.vv v16, v20, v16 446; RV32-NEXT: vand.vx v20, v8, a0 447; RV32-NEXT: vsll.vx v20, v20, a2 448; RV32-NEXT: vor.vv v20, v24, v20 449; RV32-NEXT: vsrl.vi v24, v8, 8 450; RV32-NEXT: vand.vx v12, v12, a4 451; RV32-NEXT: vand.vv v24, v24, v28 452; RV32-NEXT: vor.vv v12, v24, v12 453; RV32-NEXT: vand.vv v24, v8, v28 454; RV32-NEXT: vand.vx v8, v8, a4 455; RV32-NEXT: vsll.vi v8, v8, 24 456; RV32-NEXT: vsll.vi v24, v24, 8 457; RV32-NEXT: vor.vv v12, v12, v16 458; RV32-NEXT: vor.vv v8, v8, v24 459; RV32-NEXT: vor.vv v8, v20, v8 460; RV32-NEXT: vor.vv v8, v8, v12 461; RV32-NEXT: addi sp, sp, 16 462; RV32-NEXT: .cfi_def_cfa_offset 0 463; RV32-NEXT: ret 464; 465; RV64-LABEL: bswap_nxv4i64: 466; RV64: # %bb.0: 467; RV64-NEXT: li a0, 56 468; RV64-NEXT: li a1, 40 469; RV64-NEXT: lui a2, 16 470; RV64-NEXT: vsetvli a3, zero, e64, m4, ta, ma 471; RV64-NEXT: vsrl.vi v12, v8, 24 472; RV64-NEXT: lui a3, 4080 473; RV64-NEXT: vsrl.vx v16, v8, a0 474; RV64-NEXT: vsrl.vx v20, v8, a1 475; RV64-NEXT: addiw a2, a2, -256 476; RV64-NEXT: vand.vx v20, v20, a2 477; RV64-NEXT: vor.vv v16, v20, v16 478; RV64-NEXT: vsrl.vi v20, v8, 8 479; RV64-NEXT: li a4, 255 480; RV64-NEXT: vand.vx v12, v12, a3 481; RV64-NEXT: slli a4, a4, 24 482; RV64-NEXT: vand.vx v20, v20, a4 483; RV64-NEXT: vor.vv v12, v20, v12 484; RV64-NEXT: vand.vx v20, v8, a3 485; RV64-NEXT: vsll.vi v20, v20, 24 486; RV64-NEXT: vor.vv v12, v12, v16 487; RV64-NEXT: vand.vx v16, v8, a4 488; RV64-NEXT: vsll.vi v16, v16, 8 489; RV64-NEXT: vor.vv v16, v20, v16 490; RV64-NEXT: vsll.vx v20, v8, a0 491; RV64-NEXT: vand.vx v8, v8, a2 492; RV64-NEXT: vsll.vx v8, v8, a1 493; RV64-NEXT: vor.vv v8, v20, v8 494; RV64-NEXT: vor.vv v8, v8, v16 495; RV64-NEXT: vor.vv v8, v8, v12 496; RV64-NEXT: ret 497; 498; CHECK-ZVKB-LABEL: bswap_nxv4i64: 499; CHECK-ZVKB: # %bb.0: 500; CHECK-ZVKB-NEXT: vsetvli a0, zero, e64, m4, ta, ma 501; CHECK-ZVKB-NEXT: vrev8.v v8, v8 502; CHECK-ZVKB-NEXT: ret 503 %a = call <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64> %va) 504 ret <vscale x 4 x i64> %a 505} 506declare <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64>) 507 508define <vscale x 8 x i64> @bswap_nxv8i64(<vscale x 8 x i64> %va) { 509; RV32-LABEL: bswap_nxv8i64: 510; RV32: # %bb.0: 511; RV32-NEXT: addi sp, sp, -16 512; RV32-NEXT: .cfi_def_cfa_offset 16 513; RV32-NEXT: csrr a0, vlenb 514; RV32-NEXT: slli a0, a0, 4 515; RV32-NEXT: sub sp, sp, a0 516; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 517; RV32-NEXT: lui a0, 1044480 518; RV32-NEXT: li a1, 56 519; RV32-NEXT: li a2, 40 520; RV32-NEXT: lui a3, 16 521; RV32-NEXT: lui a4, 4080 522; RV32-NEXT: addi a5, sp, 8 523; RV32-NEXT: sw a0, 8(sp) 524; RV32-NEXT: sw zero, 12(sp) 525; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma 526; RV32-NEXT: vsrl.vx v16, v8, a1 527; RV32-NEXT: vsrl.vx v24, v8, a2 528; RV32-NEXT: addi a0, a3, -256 529; RV32-NEXT: vsll.vx v0, v8, a1 530; RV32-NEXT: vand.vx v24, v24, a0 531; RV32-NEXT: vor.vv v16, v24, v16 532; RV32-NEXT: addi a1, sp, 16 533; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 534; RV32-NEXT: vand.vx v16, v8, a0 535; RV32-NEXT: vsll.vx v16, v16, a2 536; RV32-NEXT: vor.vv v16, v0, v16 537; RV32-NEXT: csrr a0, vlenb 538; RV32-NEXT: slli a0, a0, 3 539; RV32-NEXT: add a0, sp, a0 540; RV32-NEXT: addi a0, a0, 16 541; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 542; RV32-NEXT: vlse64.v v0, (a5), zero 543; RV32-NEXT: vsrl.vi v16, v8, 24 544; RV32-NEXT: vand.vx v16, v16, a4 545; RV32-NEXT: vsrl.vi v24, v8, 8 546; RV32-NEXT: vand.vv v24, v24, v0 547; RV32-NEXT: vor.vv v16, v24, v16 548; RV32-NEXT: addi a0, sp, 16 549; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 550; RV32-NEXT: vor.vv v24, v16, v24 551; RV32-NEXT: vand.vv v16, v8, v0 552; RV32-NEXT: vand.vx v8, v8, a4 553; RV32-NEXT: vsll.vi v8, v8, 24 554; RV32-NEXT: vsll.vi v16, v16, 8 555; RV32-NEXT: vor.vv v8, v8, v16 556; RV32-NEXT: csrr a0, vlenb 557; RV32-NEXT: slli a0, a0, 3 558; RV32-NEXT: add a0, sp, a0 559; RV32-NEXT: addi a0, a0, 16 560; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 561; RV32-NEXT: vor.vv v8, v16, v8 562; RV32-NEXT: vor.vv v8, v8, v24 563; RV32-NEXT: csrr a0, vlenb 564; RV32-NEXT: slli a0, a0, 4 565; RV32-NEXT: add sp, sp, a0 566; RV32-NEXT: .cfi_def_cfa sp, 16 567; RV32-NEXT: addi sp, sp, 16 568; RV32-NEXT: .cfi_def_cfa_offset 0 569; RV32-NEXT: ret 570; 571; RV64-LABEL: bswap_nxv8i64: 572; RV64: # %bb.0: 573; RV64-NEXT: li a0, 56 574; RV64-NEXT: li a1, 40 575; RV64-NEXT: lui a2, 16 576; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma 577; RV64-NEXT: vsrl.vi v24, v8, 24 578; RV64-NEXT: lui a3, 4080 579; RV64-NEXT: vsrl.vx v16, v8, a0 580; RV64-NEXT: vsrl.vx v0, v8, a1 581; RV64-NEXT: addiw a2, a2, -256 582; RV64-NEXT: vand.vx v0, v0, a2 583; RV64-NEXT: vor.vv v16, v0, v16 584; RV64-NEXT: vsrl.vi v0, v8, 8 585; RV64-NEXT: li a4, 255 586; RV64-NEXT: vand.vx v24, v24, a3 587; RV64-NEXT: slli a4, a4, 24 588; RV64-NEXT: vand.vx v0, v0, a4 589; RV64-NEXT: vor.vv v24, v0, v24 590; RV64-NEXT: vand.vx v0, v8, a3 591; RV64-NEXT: vsll.vi v0, v0, 24 592; RV64-NEXT: vor.vv v16, v24, v16 593; RV64-NEXT: vand.vx v24, v8, a4 594; RV64-NEXT: vsll.vi v24, v24, 8 595; RV64-NEXT: vor.vv v24, v0, v24 596; RV64-NEXT: vsll.vx v0, v8, a0 597; RV64-NEXT: vand.vx v8, v8, a2 598; RV64-NEXT: vsll.vx v8, v8, a1 599; RV64-NEXT: vor.vv v8, v0, v8 600; RV64-NEXT: vor.vv v8, v8, v24 601; RV64-NEXT: vor.vv v8, v8, v16 602; RV64-NEXT: ret 603; 604; CHECK-ZVKB-LABEL: bswap_nxv8i64: 605; CHECK-ZVKB: # %bb.0: 606; CHECK-ZVKB-NEXT: vsetvli a0, zero, e64, m8, ta, ma 607; CHECK-ZVKB-NEXT: vrev8.v v8, v8 608; CHECK-ZVKB-NEXT: ret 609 %a = call <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64> %va) 610 ret <vscale x 8 x i64> %a 611} 612declare <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64>) 613