1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB 5; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB 6 7define void @bitreverse_v8i16(ptr %x, ptr %y) { 8; CHECK-LABEL: bitreverse_v8i16: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 11; CHECK-NEXT: vle16.v v8, (a0) 12; CHECK-NEXT: lui a1, 1 13; CHECK-NEXT: addi a1, a1, -241 14; CHECK-NEXT: vsrl.vi v9, v8, 8 15; CHECK-NEXT: vsll.vi v8, v8, 8 16; CHECK-NEXT: vor.vv v8, v8, v9 17; CHECK-NEXT: vsrl.vi v9, v8, 4 18; CHECK-NEXT: vand.vx v8, v8, a1 19; CHECK-NEXT: vand.vx v9, v9, a1 20; CHECK-NEXT: lui a1, 3 21; CHECK-NEXT: addi a1, a1, 819 22; CHECK-NEXT: vsll.vi v8, v8, 4 23; CHECK-NEXT: vor.vv v8, v9, v8 24; CHECK-NEXT: vsrl.vi v9, v8, 2 25; CHECK-NEXT: vand.vx v8, v8, a1 26; CHECK-NEXT: vand.vx v9, v9, a1 27; CHECK-NEXT: lui a1, 5 28; CHECK-NEXT: addi a1, a1, 1365 29; CHECK-NEXT: vsll.vi v8, v8, 2 30; CHECK-NEXT: vor.vv v8, v9, v8 31; CHECK-NEXT: vsrl.vi v9, v8, 1 32; CHECK-NEXT: vand.vx v8, v8, a1 33; CHECK-NEXT: vand.vx v9, v9, a1 34; CHECK-NEXT: vadd.vv v8, v8, v8 35; CHECK-NEXT: vor.vv v8, v9, v8 36; CHECK-NEXT: vse16.v v8, (a0) 37; CHECK-NEXT: ret 38; 39; ZVBB-LABEL: bitreverse_v8i16: 40; ZVBB: # %bb.0: 41; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma 42; ZVBB-NEXT: vle16.v v8, (a0) 43; ZVBB-NEXT: vbrev.v v8, v8 44; ZVBB-NEXT: vse16.v v8, (a0) 45; ZVBB-NEXT: ret 46 %a = load <8 x i16>, ptr %x 47 %b = load <8 x i16>, ptr %y 48 %c = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a) 49 store <8 x i16> %c, ptr %x 50 ret void 51} 52declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>) 53 54define void @bitreverse_v4i32(ptr %x, ptr %y) { 55; CHECK-LABEL: bitreverse_v4i32: 56; CHECK: # %bb.0: 57; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 58; CHECK-NEXT: vle32.v v8, (a0) 59; CHECK-NEXT: lui a1, 16 60; CHECK-NEXT: addi a1, a1, -256 61; CHECK-NEXT: vsrl.vi v9, v8, 8 62; CHECK-NEXT: vsrl.vi v10, v8, 24 63; CHECK-NEXT: vand.vx v9, v9, a1 64; CHECK-NEXT: vor.vv v9, v9, v10 65; CHECK-NEXT: vand.vx v10, v8, a1 66; CHECK-NEXT: lui a1, 61681 67; CHECK-NEXT: addi a1, a1, -241 68; CHECK-NEXT: vsll.vi v8, v8, 24 69; CHECK-NEXT: vsll.vi v10, v10, 8 70; CHECK-NEXT: vor.vv v8, v8, v10 71; CHECK-NEXT: vor.vv v8, v8, v9 72; CHECK-NEXT: vsrl.vi v9, v8, 4 73; CHECK-NEXT: vand.vx v8, v8, a1 74; CHECK-NEXT: vand.vx v9, v9, a1 75; CHECK-NEXT: lui a1, 209715 76; CHECK-NEXT: addi a1, a1, 819 77; CHECK-NEXT: vsll.vi v8, v8, 4 78; CHECK-NEXT: vor.vv v8, v9, v8 79; CHECK-NEXT: vsrl.vi v9, v8, 2 80; CHECK-NEXT: vand.vx v8, v8, a1 81; CHECK-NEXT: vand.vx v9, v9, a1 82; CHECK-NEXT: lui a1, 349525 83; CHECK-NEXT: addi a1, a1, 1365 84; CHECK-NEXT: vsll.vi v8, v8, 2 85; CHECK-NEXT: vor.vv v8, v9, v8 86; CHECK-NEXT: vsrl.vi v9, v8, 1 87; CHECK-NEXT: vand.vx v8, v8, a1 88; CHECK-NEXT: vand.vx v9, v9, a1 89; CHECK-NEXT: vadd.vv v8, v8, v8 90; CHECK-NEXT: vor.vv v8, v9, v8 91; CHECK-NEXT: vse32.v v8, (a0) 92; CHECK-NEXT: ret 93; 94; ZVBB-LABEL: bitreverse_v4i32: 95; ZVBB: # %bb.0: 96; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma 97; ZVBB-NEXT: vle32.v v8, (a0) 98; ZVBB-NEXT: vbrev.v v8, v8 99; ZVBB-NEXT: vse32.v v8, (a0) 100; ZVBB-NEXT: ret 101 %a = load <4 x i32>, ptr %x 102 %b = load <4 x i32>, ptr %y 103 %c = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a) 104 store <4 x i32> %c, ptr %x 105 ret void 106} 107declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) 108 109define void @bitreverse_v2i64(ptr %x, ptr %y) { 110; RV32-LABEL: bitreverse_v2i64: 111; RV32: # %bb.0: 112; RV32-NEXT: addi sp, sp, -16 113; RV32-NEXT: .cfi_def_cfa_offset 16 114; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 115; RV32-NEXT: vle64.v v8, (a0) 116; RV32-NEXT: lui a2, 1044480 117; RV32-NEXT: li a3, 56 118; RV32-NEXT: li a4, 40 119; RV32-NEXT: lui a5, 16 120; RV32-NEXT: lui a1, 4080 121; RV32-NEXT: addi a6, sp, 8 122; RV32-NEXT: sw a2, 8(sp) 123; RV32-NEXT: sw zero, 12(sp) 124; RV32-NEXT: addi a2, a5, -256 125; RV32-NEXT: vlse64.v v9, (a6), zero 126; RV32-NEXT: vsrl.vx v10, v8, a3 127; RV32-NEXT: vsrl.vx v11, v8, a4 128; RV32-NEXT: vsrl.vi v12, v8, 24 129; RV32-NEXT: vsll.vx v13, v8, a3 130; RV32-NEXT: vand.vx v11, v11, a2 131; RV32-NEXT: vor.vv v10, v11, v10 132; RV32-NEXT: vand.vx v11, v8, a2 133; RV32-NEXT: vsll.vx v11, v11, a4 134; RV32-NEXT: vor.vv v11, v13, v11 135; RV32-NEXT: vsrl.vi v13, v8, 8 136; RV32-NEXT: vand.vx v12, v12, a1 137; RV32-NEXT: vand.vv v13, v13, v9 138; RV32-NEXT: vor.vv v12, v13, v12 139; RV32-NEXT: lui a2, 61681 140; RV32-NEXT: lui a3, 209715 141; RV32-NEXT: lui a4, 349525 142; RV32-NEXT: addi a2, a2, -241 143; RV32-NEXT: addi a3, a3, 819 144; RV32-NEXT: addi a4, a4, 1365 145; RV32-NEXT: vor.vv v10, v12, v10 146; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 147; RV32-NEXT: vmv.v.x v12, a2 148; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 149; RV32-NEXT: vand.vv v9, v8, v9 150; RV32-NEXT: vand.vx v8, v8, a1 151; RV32-NEXT: vsll.vi v8, v8, 24 152; RV32-NEXT: vsll.vi v9, v9, 8 153; RV32-NEXT: vor.vv v8, v8, v9 154; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 155; RV32-NEXT: vmv.v.x v9, a3 156; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 157; RV32-NEXT: vor.vv v8, v11, v8 158; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 159; RV32-NEXT: vmv.v.x v11, a4 160; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 161; RV32-NEXT: vor.vv v8, v8, v10 162; RV32-NEXT: vsrl.vi v10, v8, 4 163; RV32-NEXT: vand.vv v8, v8, v12 164; RV32-NEXT: vand.vv v10, v10, v12 165; RV32-NEXT: vsll.vi v8, v8, 4 166; RV32-NEXT: vor.vv v8, v10, v8 167; RV32-NEXT: vsrl.vi v10, v8, 2 168; RV32-NEXT: vand.vv v8, v8, v9 169; RV32-NEXT: vand.vv v9, v10, v9 170; RV32-NEXT: vsll.vi v8, v8, 2 171; RV32-NEXT: vor.vv v8, v9, v8 172; RV32-NEXT: vsrl.vi v9, v8, 1 173; RV32-NEXT: vand.vv v8, v8, v11 174; RV32-NEXT: vand.vv v9, v9, v11 175; RV32-NEXT: vadd.vv v8, v8, v8 176; RV32-NEXT: vor.vv v8, v9, v8 177; RV32-NEXT: vse64.v v8, (a0) 178; RV32-NEXT: addi sp, sp, 16 179; RV32-NEXT: .cfi_def_cfa_offset 0 180; RV32-NEXT: ret 181; 182; RV64-LABEL: bitreverse_v2i64: 183; RV64: # %bb.0: 184; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 185; RV64-NEXT: vle64.v v8, (a0) 186; RV64-NEXT: li a1, 56 187; RV64-NEXT: li a2, 40 188; RV64-NEXT: lui a3, 16 189; RV64-NEXT: lui a4, 4080 190; RV64-NEXT: li a5, 255 191; RV64-NEXT: addiw a3, a3, -256 192; RV64-NEXT: slli a5, a5, 24 193; RV64-NEXT: vsrl.vx v9, v8, a1 194; RV64-NEXT: vsrl.vx v10, v8, a2 195; RV64-NEXT: vsrl.vi v11, v8, 24 196; RV64-NEXT: vsrl.vi v12, v8, 8 197; RV64-NEXT: vand.vx v10, v10, a3 198; RV64-NEXT: vor.vv v9, v10, v9 199; RV64-NEXT: vand.vx v10, v8, a5 200; RV64-NEXT: vand.vx v11, v11, a4 201; RV64-NEXT: vand.vx v12, v12, a5 202; RV64-NEXT: vor.vv v11, v12, v11 203; RV64-NEXT: vand.vx v12, v8, a4 204; RV64-NEXT: vsll.vi v10, v10, 8 205; RV64-NEXT: vsll.vi v12, v12, 24 206; RV64-NEXT: vor.vv v10, v12, v10 207; RV64-NEXT: vsll.vx v12, v8, a1 208; RV64-NEXT: vand.vx v8, v8, a3 209; RV64-NEXT: vsll.vx v8, v8, a2 210; RV64-NEXT: vor.vv v8, v12, v8 211; RV64-NEXT: lui a1, 61681 212; RV64-NEXT: lui a2, 209715 213; RV64-NEXT: lui a3, 349525 214; RV64-NEXT: addiw a1, a1, -241 215; RV64-NEXT: addiw a2, a2, 819 216; RV64-NEXT: addiw a3, a3, 1365 217; RV64-NEXT: slli a4, a1, 32 218; RV64-NEXT: slli a5, a2, 32 219; RV64-NEXT: add a1, a1, a4 220; RV64-NEXT: slli a4, a3, 32 221; RV64-NEXT: add a2, a2, a5 222; RV64-NEXT: add a3, a3, a4 223; RV64-NEXT: vor.vv v9, v11, v9 224; RV64-NEXT: vor.vv v8, v8, v10 225; RV64-NEXT: vor.vv v8, v8, v9 226; RV64-NEXT: vsrl.vi v9, v8, 4 227; RV64-NEXT: vand.vx v8, v8, a1 228; RV64-NEXT: vand.vx v9, v9, a1 229; RV64-NEXT: vsll.vi v8, v8, 4 230; RV64-NEXT: vor.vv v8, v9, v8 231; RV64-NEXT: vsrl.vi v9, v8, 2 232; RV64-NEXT: vand.vx v8, v8, a2 233; RV64-NEXT: vand.vx v9, v9, a2 234; RV64-NEXT: vsll.vi v8, v8, 2 235; RV64-NEXT: vor.vv v8, v9, v8 236; RV64-NEXT: vsrl.vi v9, v8, 1 237; RV64-NEXT: vand.vx v8, v8, a3 238; RV64-NEXT: vand.vx v9, v9, a3 239; RV64-NEXT: vadd.vv v8, v8, v8 240; RV64-NEXT: vor.vv v8, v9, v8 241; RV64-NEXT: vse64.v v8, (a0) 242; RV64-NEXT: ret 243; 244; ZVBB-LABEL: bitreverse_v2i64: 245; ZVBB: # %bb.0: 246; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma 247; ZVBB-NEXT: vle64.v v8, (a0) 248; ZVBB-NEXT: vbrev.v v8, v8 249; ZVBB-NEXT: vse64.v v8, (a0) 250; ZVBB-NEXT: ret 251 %a = load <2 x i64>, ptr %x 252 %b = load <2 x i64>, ptr %y 253 %c = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a) 254 store <2 x i64> %c, ptr %x 255 ret void 256} 257declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>) 258 259define void @bitreverse_v16i16(ptr %x, ptr %y) { 260; CHECK-LABEL: bitreverse_v16i16: 261; CHECK: # %bb.0: 262; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 263; CHECK-NEXT: vle16.v v8, (a0) 264; CHECK-NEXT: lui a1, 1 265; CHECK-NEXT: addi a1, a1, -241 266; CHECK-NEXT: vsrl.vi v10, v8, 8 267; CHECK-NEXT: vsll.vi v8, v8, 8 268; CHECK-NEXT: vor.vv v8, v8, v10 269; CHECK-NEXT: vsrl.vi v10, v8, 4 270; CHECK-NEXT: vand.vx v8, v8, a1 271; CHECK-NEXT: vand.vx v10, v10, a1 272; CHECK-NEXT: lui a1, 3 273; CHECK-NEXT: addi a1, a1, 819 274; CHECK-NEXT: vsll.vi v8, v8, 4 275; CHECK-NEXT: vor.vv v8, v10, v8 276; CHECK-NEXT: vsrl.vi v10, v8, 2 277; CHECK-NEXT: vand.vx v8, v8, a1 278; CHECK-NEXT: vand.vx v10, v10, a1 279; CHECK-NEXT: lui a1, 5 280; CHECK-NEXT: addi a1, a1, 1365 281; CHECK-NEXT: vsll.vi v8, v8, 2 282; CHECK-NEXT: vor.vv v8, v10, v8 283; CHECK-NEXT: vsrl.vi v10, v8, 1 284; CHECK-NEXT: vand.vx v8, v8, a1 285; CHECK-NEXT: vand.vx v10, v10, a1 286; CHECK-NEXT: vadd.vv v8, v8, v8 287; CHECK-NEXT: vor.vv v8, v10, v8 288; CHECK-NEXT: vse16.v v8, (a0) 289; CHECK-NEXT: ret 290; 291; ZVBB-LABEL: bitreverse_v16i16: 292; ZVBB: # %bb.0: 293; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma 294; ZVBB-NEXT: vle16.v v8, (a0) 295; ZVBB-NEXT: vbrev.v v8, v8 296; ZVBB-NEXT: vse16.v v8, (a0) 297; ZVBB-NEXT: ret 298 %a = load <16 x i16>, ptr %x 299 %b = load <16 x i16>, ptr %y 300 %c = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) 301 store <16 x i16> %c, ptr %x 302 ret void 303} 304declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>) 305 306define void @bitreverse_v8i32(ptr %x, ptr %y) { 307; CHECK-LABEL: bitreverse_v8i32: 308; CHECK: # %bb.0: 309; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 310; CHECK-NEXT: vle32.v v8, (a0) 311; CHECK-NEXT: lui a1, 16 312; CHECK-NEXT: addi a1, a1, -256 313; CHECK-NEXT: vsrl.vi v10, v8, 8 314; CHECK-NEXT: vsrl.vi v12, v8, 24 315; CHECK-NEXT: vand.vx v10, v10, a1 316; CHECK-NEXT: vor.vv v10, v10, v12 317; CHECK-NEXT: vand.vx v12, v8, a1 318; CHECK-NEXT: lui a1, 61681 319; CHECK-NEXT: addi a1, a1, -241 320; CHECK-NEXT: vsll.vi v8, v8, 24 321; CHECK-NEXT: vsll.vi v12, v12, 8 322; CHECK-NEXT: vor.vv v8, v8, v12 323; CHECK-NEXT: vor.vv v8, v8, v10 324; CHECK-NEXT: vsrl.vi v10, v8, 4 325; CHECK-NEXT: vand.vx v8, v8, a1 326; CHECK-NEXT: vand.vx v10, v10, a1 327; CHECK-NEXT: lui a1, 209715 328; CHECK-NEXT: addi a1, a1, 819 329; CHECK-NEXT: vsll.vi v8, v8, 4 330; CHECK-NEXT: vor.vv v8, v10, v8 331; CHECK-NEXT: vsrl.vi v10, v8, 2 332; CHECK-NEXT: vand.vx v8, v8, a1 333; CHECK-NEXT: vand.vx v10, v10, a1 334; CHECK-NEXT: lui a1, 349525 335; CHECK-NEXT: addi a1, a1, 1365 336; CHECK-NEXT: vsll.vi v8, v8, 2 337; CHECK-NEXT: vor.vv v8, v10, v8 338; CHECK-NEXT: vsrl.vi v10, v8, 1 339; CHECK-NEXT: vand.vx v8, v8, a1 340; CHECK-NEXT: vand.vx v10, v10, a1 341; CHECK-NEXT: vadd.vv v8, v8, v8 342; CHECK-NEXT: vor.vv v8, v10, v8 343; CHECK-NEXT: vse32.v v8, (a0) 344; CHECK-NEXT: ret 345; 346; ZVBB-LABEL: bitreverse_v8i32: 347; ZVBB: # %bb.0: 348; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma 349; ZVBB-NEXT: vle32.v v8, (a0) 350; ZVBB-NEXT: vbrev.v v8, v8 351; ZVBB-NEXT: vse32.v v8, (a0) 352; ZVBB-NEXT: ret 353 %a = load <8 x i32>, ptr %x 354 %b = load <8 x i32>, ptr %y 355 %c = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) 356 store <8 x i32> %c, ptr %x 357 ret void 358} 359declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>) 360 361define void @bitreverse_v4i64(ptr %x, ptr %y) { 362; RV32-LABEL: bitreverse_v4i64: 363; RV32: # %bb.0: 364; RV32-NEXT: addi sp, sp, -16 365; RV32-NEXT: .cfi_def_cfa_offset 16 366; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 367; RV32-NEXT: vle64.v v8, (a0) 368; RV32-NEXT: lui a2, 1044480 369; RV32-NEXT: li a3, 56 370; RV32-NEXT: li a4, 40 371; RV32-NEXT: lui a5, 16 372; RV32-NEXT: lui a1, 4080 373; RV32-NEXT: addi a6, sp, 8 374; RV32-NEXT: sw a2, 8(sp) 375; RV32-NEXT: sw zero, 12(sp) 376; RV32-NEXT: addi a2, a5, -256 377; RV32-NEXT: vlse64.v v10, (a6), zero 378; RV32-NEXT: vsrl.vx v12, v8, a3 379; RV32-NEXT: vsrl.vx v14, v8, a4 380; RV32-NEXT: vsrl.vi v16, v8, 24 381; RV32-NEXT: vsll.vx v18, v8, a3 382; RV32-NEXT: vand.vx v14, v14, a2 383; RV32-NEXT: vor.vv v14, v14, v12 384; RV32-NEXT: vand.vx v12, v8, a2 385; RV32-NEXT: vsll.vx v12, v12, a4 386; RV32-NEXT: vor.vv v12, v18, v12 387; RV32-NEXT: vsrl.vi v18, v8, 8 388; RV32-NEXT: vand.vx v16, v16, a1 389; RV32-NEXT: vand.vv v18, v18, v10 390; RV32-NEXT: vor.vv v16, v18, v16 391; RV32-NEXT: lui a2, 61681 392; RV32-NEXT: lui a3, 209715 393; RV32-NEXT: lui a4, 349525 394; RV32-NEXT: addi a2, a2, -241 395; RV32-NEXT: addi a3, a3, 819 396; RV32-NEXT: addi a4, a4, 1365 397; RV32-NEXT: vor.vv v14, v16, v14 398; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 399; RV32-NEXT: vmv.v.x v16, a2 400; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 401; RV32-NEXT: vand.vv v10, v8, v10 402; RV32-NEXT: vand.vx v8, v8, a1 403; RV32-NEXT: vsll.vi v8, v8, 24 404; RV32-NEXT: vsll.vi v10, v10, 8 405; RV32-NEXT: vor.vv v8, v8, v10 406; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 407; RV32-NEXT: vmv.v.x v10, a3 408; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 409; RV32-NEXT: vor.vv v8, v12, v8 410; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 411; RV32-NEXT: vmv.v.x v12, a4 412; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 413; RV32-NEXT: vor.vv v8, v8, v14 414; RV32-NEXT: vsrl.vi v14, v8, 4 415; RV32-NEXT: vand.vv v8, v8, v16 416; RV32-NEXT: vand.vv v14, v14, v16 417; RV32-NEXT: vsll.vi v8, v8, 4 418; RV32-NEXT: vor.vv v8, v14, v8 419; RV32-NEXT: vsrl.vi v14, v8, 2 420; RV32-NEXT: vand.vv v8, v8, v10 421; RV32-NEXT: vand.vv v10, v14, v10 422; RV32-NEXT: vsll.vi v8, v8, 2 423; RV32-NEXT: vor.vv v8, v10, v8 424; RV32-NEXT: vsrl.vi v10, v8, 1 425; RV32-NEXT: vand.vv v8, v8, v12 426; RV32-NEXT: vand.vv v10, v10, v12 427; RV32-NEXT: vadd.vv v8, v8, v8 428; RV32-NEXT: vor.vv v8, v10, v8 429; RV32-NEXT: vse64.v v8, (a0) 430; RV32-NEXT: addi sp, sp, 16 431; RV32-NEXT: .cfi_def_cfa_offset 0 432; RV32-NEXT: ret 433; 434; RV64-LABEL: bitreverse_v4i64: 435; RV64: # %bb.0: 436; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 437; RV64-NEXT: vle64.v v14, (a0) 438; RV64-NEXT: li a1, 56 439; RV64-NEXT: li a2, 40 440; RV64-NEXT: lui a3, 16 441; RV64-NEXT: lui a4, 4080 442; RV64-NEXT: li a5, 255 443; RV64-NEXT: addiw a3, a3, -256 444; RV64-NEXT: slli a5, a5, 24 445; RV64-NEXT: vsrl.vx v8, v14, a1 446; RV64-NEXT: vsrl.vx v10, v14, a2 447; RV64-NEXT: vsrl.vi v12, v14, 24 448; RV64-NEXT: vsrl.vi v16, v14, 8 449; RV64-NEXT: vand.vx v10, v10, a3 450; RV64-NEXT: vor.vv v8, v10, v8 451; RV64-NEXT: vand.vx v18, v14, a5 452; RV64-NEXT: vand.vx v10, v12, a4 453; RV64-NEXT: vand.vx v12, v16, a5 454; RV64-NEXT: vor.vv v10, v12, v10 455; RV64-NEXT: vand.vx v12, v14, a4 456; RV64-NEXT: vsll.vi v16, v18, 8 457; RV64-NEXT: vsll.vi v12, v12, 24 458; RV64-NEXT: vor.vv v12, v12, v16 459; RV64-NEXT: vsll.vx v16, v14, a1 460; RV64-NEXT: vand.vx v14, v14, a3 461; RV64-NEXT: vsll.vx v14, v14, a2 462; RV64-NEXT: vor.vv v14, v16, v14 463; RV64-NEXT: lui a1, 61681 464; RV64-NEXT: lui a2, 209715 465; RV64-NEXT: lui a3, 349525 466; RV64-NEXT: addiw a1, a1, -241 467; RV64-NEXT: addiw a2, a2, 819 468; RV64-NEXT: addiw a3, a3, 1365 469; RV64-NEXT: slli a4, a1, 32 470; RV64-NEXT: slli a5, a2, 32 471; RV64-NEXT: add a1, a1, a4 472; RV64-NEXT: slli a4, a3, 32 473; RV64-NEXT: add a2, a2, a5 474; RV64-NEXT: add a3, a3, a4 475; RV64-NEXT: vor.vv v8, v10, v8 476; RV64-NEXT: vor.vv v10, v14, v12 477; RV64-NEXT: vor.vv v8, v10, v8 478; RV64-NEXT: vsrl.vi v10, v8, 4 479; RV64-NEXT: vand.vx v8, v8, a1 480; RV64-NEXT: vand.vx v10, v10, a1 481; RV64-NEXT: vsll.vi v8, v8, 4 482; RV64-NEXT: vor.vv v8, v10, v8 483; RV64-NEXT: vsrl.vi v10, v8, 2 484; RV64-NEXT: vand.vx v8, v8, a2 485; RV64-NEXT: vand.vx v10, v10, a2 486; RV64-NEXT: vsll.vi v8, v8, 2 487; RV64-NEXT: vor.vv v8, v10, v8 488; RV64-NEXT: vsrl.vi v10, v8, 1 489; RV64-NEXT: vand.vx v8, v8, a3 490; RV64-NEXT: vand.vx v10, v10, a3 491; RV64-NEXT: vadd.vv v8, v8, v8 492; RV64-NEXT: vor.vv v8, v10, v8 493; RV64-NEXT: vse64.v v8, (a0) 494; RV64-NEXT: ret 495; 496; ZVBB-LABEL: bitreverse_v4i64: 497; ZVBB: # %bb.0: 498; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma 499; ZVBB-NEXT: vle64.v v8, (a0) 500; ZVBB-NEXT: vbrev.v v8, v8 501; ZVBB-NEXT: vse64.v v8, (a0) 502; ZVBB-NEXT: ret 503 %a = load <4 x i64>, ptr %x 504 %b = load <4 x i64>, ptr %y 505 %c = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) 506 store <4 x i64> %c, ptr %x 507 ret void 508} 509declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>) 510