1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4; RUN: llc -mtriple=riscv32 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVKB 5; RUN: llc -mtriple=riscv64 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVKB 6 7define void @bswap_v8i16(ptr %x, ptr %y) { 8; CHECK-LABEL: bswap_v8i16: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 11; CHECK-NEXT: vle16.v v8, (a0) 12; CHECK-NEXT: vsrl.vi v9, v8, 8 13; CHECK-NEXT: vsll.vi v8, v8, 8 14; CHECK-NEXT: vor.vv v8, v8, v9 15; CHECK-NEXT: vse16.v v8, (a0) 16; CHECK-NEXT: ret 17; 18; ZVKB-LABEL: bswap_v8i16: 19; ZVKB: # %bb.0: 20; ZVKB-NEXT: vsetivli zero, 8, e16, m1, ta, ma 21; ZVKB-NEXT: vle16.v v8, (a0) 22; ZVKB-NEXT: vrev8.v v8, v8 23; ZVKB-NEXT: vse16.v v8, (a0) 24; ZVKB-NEXT: ret 25 %a = load <8 x i16>, ptr %x 26 %b = load <8 x i16>, ptr %y 27 %c = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a) 28 store <8 x i16> %c, ptr %x 29 ret void 30} 31declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) 32 33define void @bswap_v4i32(ptr %x, ptr %y) { 34; CHECK-LABEL: bswap_v4i32: 35; CHECK: # %bb.0: 36; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 37; CHECK-NEXT: vle32.v v8, (a0) 38; CHECK-NEXT: lui a1, 16 39; CHECK-NEXT: addi a1, a1, -256 40; CHECK-NEXT: vsrl.vi v9, v8, 8 41; CHECK-NEXT: vsrl.vi v10, v8, 24 42; CHECK-NEXT: vand.vx v9, v9, a1 43; CHECK-NEXT: vor.vv v9, v9, v10 44; CHECK-NEXT: vand.vx v10, v8, a1 45; CHECK-NEXT: vsll.vi v8, v8, 24 46; CHECK-NEXT: vsll.vi v10, v10, 8 47; CHECK-NEXT: vor.vv v8, v8, v10 48; CHECK-NEXT: vor.vv v8, v8, v9 49; CHECK-NEXT: vse32.v v8, (a0) 50; CHECK-NEXT: ret 51; 52; ZVKB-LABEL: bswap_v4i32: 53; ZVKB: # %bb.0: 54; ZVKB-NEXT: vsetivli zero, 4, e32, m1, ta, ma 55; ZVKB-NEXT: vle32.v v8, (a0) 56; ZVKB-NEXT: vrev8.v v8, v8 57; ZVKB-NEXT: vse32.v v8, (a0) 58; ZVKB-NEXT: ret 59 %a = load <4 x i32>, ptr %x 60 %b = load <4 x i32>, ptr %y 61 %c = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a) 62 store <4 x i32> %c, ptr %x 63 ret void 64} 65declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) 66 67define void @bswap_v2i64(ptr %x, ptr %y) { 68; RV32-LABEL: bswap_v2i64: 69; RV32: # %bb.0: 70; RV32-NEXT: addi sp, sp, -16 71; RV32-NEXT: .cfi_def_cfa_offset 16 72; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 73; RV32-NEXT: vle64.v v8, (a0) 74; RV32-NEXT: lui a1, 1044480 75; RV32-NEXT: li a2, 56 76; RV32-NEXT: li a3, 40 77; RV32-NEXT: lui a4, 16 78; RV32-NEXT: lui a5, 4080 79; RV32-NEXT: addi a6, sp, 8 80; RV32-NEXT: sw a1, 8(sp) 81; RV32-NEXT: sw zero, 12(sp) 82; RV32-NEXT: addi a1, a4, -256 83; RV32-NEXT: vlse64.v v9, (a6), zero 84; RV32-NEXT: vsrl.vx v10, v8, a2 85; RV32-NEXT: vsrl.vx v11, v8, a3 86; RV32-NEXT: vsrl.vi v12, v8, 24 87; RV32-NEXT: vsll.vx v13, v8, a2 88; RV32-NEXT: vand.vx v11, v11, a1 89; RV32-NEXT: vor.vv v10, v11, v10 90; RV32-NEXT: vand.vx v11, v8, a1 91; RV32-NEXT: vsll.vx v11, v11, a3 92; RV32-NEXT: vor.vv v11, v13, v11 93; RV32-NEXT: vsrl.vi v13, v8, 8 94; RV32-NEXT: vand.vx v12, v12, a5 95; RV32-NEXT: vand.vv v13, v13, v9 96; RV32-NEXT: vor.vv v12, v13, v12 97; RV32-NEXT: vand.vv v9, v8, v9 98; RV32-NEXT: vand.vx v8, v8, a5 99; RV32-NEXT: vsll.vi v8, v8, 24 100; RV32-NEXT: vsll.vi v9, v9, 8 101; RV32-NEXT: vor.vv v10, v12, v10 102; RV32-NEXT: vor.vv v8, v8, v9 103; RV32-NEXT: vor.vv v8, v11, v8 104; RV32-NEXT: vor.vv v8, v8, v10 105; RV32-NEXT: vse64.v v8, (a0) 106; RV32-NEXT: addi sp, sp, 16 107; RV32-NEXT: .cfi_def_cfa_offset 0 108; RV32-NEXT: ret 109; 110; RV64-LABEL: bswap_v2i64: 111; RV64: # %bb.0: 112; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 113; RV64-NEXT: vle64.v v8, (a0) 114; RV64-NEXT: li a1, 56 115; RV64-NEXT: li a2, 40 116; RV64-NEXT: lui a3, 16 117; RV64-NEXT: lui a4, 4080 118; RV64-NEXT: li a5, 255 119; RV64-NEXT: addiw a3, a3, -256 120; RV64-NEXT: slli a5, a5, 24 121; RV64-NEXT: vsrl.vx v9, v8, a1 122; RV64-NEXT: vsrl.vx v10, v8, a2 123; RV64-NEXT: vsrl.vi v11, v8, 24 124; RV64-NEXT: vsrl.vi v12, v8, 8 125; RV64-NEXT: vand.vx v10, v10, a3 126; RV64-NEXT: vor.vv v9, v10, v9 127; RV64-NEXT: vand.vx v10, v8, a5 128; RV64-NEXT: vand.vx v11, v11, a4 129; RV64-NEXT: vand.vx v12, v12, a5 130; RV64-NEXT: vor.vv v11, v12, v11 131; RV64-NEXT: vand.vx v12, v8, a4 132; RV64-NEXT: vsll.vi v10, v10, 8 133; RV64-NEXT: vsll.vi v12, v12, 24 134; RV64-NEXT: vor.vv v10, v12, v10 135; RV64-NEXT: vsll.vx v12, v8, a1 136; RV64-NEXT: vand.vx v8, v8, a3 137; RV64-NEXT: vsll.vx v8, v8, a2 138; RV64-NEXT: vor.vv v8, v12, v8 139; RV64-NEXT: vor.vv v9, v11, v9 140; RV64-NEXT: vor.vv v8, v8, v10 141; RV64-NEXT: vor.vv v8, v8, v9 142; RV64-NEXT: vse64.v v8, (a0) 143; RV64-NEXT: ret 144; 145; ZVKB-LABEL: bswap_v2i64: 146; ZVKB: # %bb.0: 147; ZVKB-NEXT: vsetivli zero, 2, e64, m1, ta, ma 148; ZVKB-NEXT: vle64.v v8, (a0) 149; ZVKB-NEXT: vrev8.v v8, v8 150; ZVKB-NEXT: vse64.v v8, (a0) 151; ZVKB-NEXT: ret 152 %a = load <2 x i64>, ptr %x 153 %b = load <2 x i64>, ptr %y 154 %c = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a) 155 store <2 x i64> %c, ptr %x 156 ret void 157} 158declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) 159 160define void @bswap_v16i16(ptr %x, ptr %y) { 161; CHECK-LABEL: bswap_v16i16: 162; CHECK: # %bb.0: 163; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 164; CHECK-NEXT: vle16.v v8, (a0) 165; CHECK-NEXT: vsrl.vi v10, v8, 8 166; CHECK-NEXT: vsll.vi v8, v8, 8 167; CHECK-NEXT: vor.vv v8, v8, v10 168; CHECK-NEXT: vse16.v v8, (a0) 169; CHECK-NEXT: ret 170; 171; ZVKB-LABEL: bswap_v16i16: 172; ZVKB: # %bb.0: 173; ZVKB-NEXT: vsetivli zero, 16, e16, m2, ta, ma 174; ZVKB-NEXT: vle16.v v8, (a0) 175; ZVKB-NEXT: vrev8.v v8, v8 176; ZVKB-NEXT: vse16.v v8, (a0) 177; ZVKB-NEXT: ret 178 %a = load <16 x i16>, ptr %x 179 %b = load <16 x i16>, ptr %y 180 %c = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a) 181 store <16 x i16> %c, ptr %x 182 ret void 183} 184declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>) 185 186define void @bswap_v8i32(ptr %x, ptr %y) { 187; CHECK-LABEL: bswap_v8i32: 188; CHECK: # %bb.0: 189; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 190; CHECK-NEXT: vle32.v v8, (a0) 191; CHECK-NEXT: lui a1, 16 192; CHECK-NEXT: addi a1, a1, -256 193; CHECK-NEXT: vsrl.vi v10, v8, 8 194; CHECK-NEXT: vsrl.vi v12, v8, 24 195; CHECK-NEXT: vand.vx v10, v10, a1 196; CHECK-NEXT: vor.vv v10, v10, v12 197; CHECK-NEXT: vand.vx v12, v8, a1 198; CHECK-NEXT: vsll.vi v8, v8, 24 199; CHECK-NEXT: vsll.vi v12, v12, 8 200; CHECK-NEXT: vor.vv v8, v8, v12 201; CHECK-NEXT: vor.vv v8, v8, v10 202; CHECK-NEXT: vse32.v v8, (a0) 203; CHECK-NEXT: ret 204; 205; ZVKB-LABEL: bswap_v8i32: 206; ZVKB: # %bb.0: 207; ZVKB-NEXT: vsetivli zero, 8, e32, m2, ta, ma 208; ZVKB-NEXT: vle32.v v8, (a0) 209; ZVKB-NEXT: vrev8.v v8, v8 210; ZVKB-NEXT: vse32.v v8, (a0) 211; ZVKB-NEXT: ret 212 %a = load <8 x i32>, ptr %x 213 %b = load <8 x i32>, ptr %y 214 %c = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a) 215 store <8 x i32> %c, ptr %x 216 ret void 217} 218declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>) 219 220define void @bswap_v4i64(ptr %x, ptr %y) { 221; RV32-LABEL: bswap_v4i64: 222; RV32: # %bb.0: 223; RV32-NEXT: addi sp, sp, -16 224; RV32-NEXT: .cfi_def_cfa_offset 16 225; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 226; RV32-NEXT: vle64.v v8, (a0) 227; RV32-NEXT: lui a1, 1044480 228; RV32-NEXT: li a2, 56 229; RV32-NEXT: li a3, 40 230; RV32-NEXT: lui a4, 16 231; RV32-NEXT: lui a5, 4080 232; RV32-NEXT: addi a6, sp, 8 233; RV32-NEXT: sw a1, 8(sp) 234; RV32-NEXT: sw zero, 12(sp) 235; RV32-NEXT: addi a1, a4, -256 236; RV32-NEXT: vlse64.v v10, (a6), zero 237; RV32-NEXT: vsrl.vx v12, v8, a2 238; RV32-NEXT: vsrl.vx v14, v8, a3 239; RV32-NEXT: vsrl.vi v16, v8, 24 240; RV32-NEXT: vsll.vx v18, v8, a2 241; RV32-NEXT: vand.vx v14, v14, a1 242; RV32-NEXT: vor.vv v12, v14, v12 243; RV32-NEXT: vand.vx v14, v8, a1 244; RV32-NEXT: vsll.vx v14, v14, a3 245; RV32-NEXT: vor.vv v14, v18, v14 246; RV32-NEXT: vsrl.vi v18, v8, 8 247; RV32-NEXT: vand.vx v16, v16, a5 248; RV32-NEXT: vand.vv v18, v18, v10 249; RV32-NEXT: vor.vv v16, v18, v16 250; RV32-NEXT: vand.vv v10, v8, v10 251; RV32-NEXT: vand.vx v8, v8, a5 252; RV32-NEXT: vsll.vi v8, v8, 24 253; RV32-NEXT: vsll.vi v10, v10, 8 254; RV32-NEXT: vor.vv v12, v16, v12 255; RV32-NEXT: vor.vv v8, v8, v10 256; RV32-NEXT: vor.vv v8, v14, v8 257; RV32-NEXT: vor.vv v8, v8, v12 258; RV32-NEXT: vse64.v v8, (a0) 259; RV32-NEXT: addi sp, sp, 16 260; RV32-NEXT: .cfi_def_cfa_offset 0 261; RV32-NEXT: ret 262; 263; RV64-LABEL: bswap_v4i64: 264; RV64: # %bb.0: 265; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 266; RV64-NEXT: vle64.v v8, (a0) 267; RV64-NEXT: li a1, 56 268; RV64-NEXT: li a2, 40 269; RV64-NEXT: lui a3, 16 270; RV64-NEXT: lui a4, 4080 271; RV64-NEXT: li a5, 255 272; RV64-NEXT: addiw a3, a3, -256 273; RV64-NEXT: slli a5, a5, 24 274; RV64-NEXT: vsrl.vx v10, v8, a1 275; RV64-NEXT: vsrl.vx v12, v8, a2 276; RV64-NEXT: vsrl.vi v14, v8, 24 277; RV64-NEXT: vsrl.vi v16, v8, 8 278; RV64-NEXT: vand.vx v12, v12, a3 279; RV64-NEXT: vor.vv v10, v12, v10 280; RV64-NEXT: vand.vx v12, v8, a5 281; RV64-NEXT: vand.vx v14, v14, a4 282; RV64-NEXT: vand.vx v16, v16, a5 283; RV64-NEXT: vor.vv v14, v16, v14 284; RV64-NEXT: vand.vx v16, v8, a4 285; RV64-NEXT: vsll.vi v12, v12, 8 286; RV64-NEXT: vsll.vi v16, v16, 24 287; RV64-NEXT: vor.vv v12, v16, v12 288; RV64-NEXT: vsll.vx v16, v8, a1 289; RV64-NEXT: vand.vx v8, v8, a3 290; RV64-NEXT: vsll.vx v8, v8, a2 291; RV64-NEXT: vor.vv v8, v16, v8 292; RV64-NEXT: vor.vv v10, v14, v10 293; RV64-NEXT: vor.vv v8, v8, v12 294; RV64-NEXT: vor.vv v8, v8, v10 295; RV64-NEXT: vse64.v v8, (a0) 296; RV64-NEXT: ret 297; 298; ZVKB-LABEL: bswap_v4i64: 299; ZVKB: # %bb.0: 300; ZVKB-NEXT: vsetivli zero, 4, e64, m2, ta, ma 301; ZVKB-NEXT: vle64.v v8, (a0) 302; ZVKB-NEXT: vrev8.v v8, v8 303; ZVKB-NEXT: vse64.v v8, (a0) 304; ZVKB-NEXT: ret 305 %a = load <4 x i64>, ptr %x 306 %b = load <4 x i64>, ptr %y 307 %c = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a) 308 store <4 x i64> %c, ptr %x 309 ret void 310} 311declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) 312