1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB 5; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB 6 7define <vscale x 1 x i8> @bitreverse_nxv1i8(<vscale x 1 x i8> %va) { 8; CHECK-LABEL: bitreverse_nxv1i8: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma 11; CHECK-NEXT: vsll.vi v9, v8, 4 12; CHECK-NEXT: vsrl.vi v8, v8, 4 13; CHECK-NEXT: li a0, 51 14; CHECK-NEXT: vor.vv v8, v8, v9 15; CHECK-NEXT: vsrl.vi v9, v8, 2 16; CHECK-NEXT: vand.vx v8, v8, a0 17; CHECK-NEXT: vand.vx v9, v9, a0 18; CHECK-NEXT: li a0, 85 19; CHECK-NEXT: vsll.vi v8, v8, 2 20; CHECK-NEXT: vor.vv v8, v9, v8 21; CHECK-NEXT: vsrl.vi v9, v8, 1 22; CHECK-NEXT: vand.vx v8, v8, a0 23; CHECK-NEXT: vand.vx v9, v9, a0 24; CHECK-NEXT: vadd.vv v8, v8, v8 25; CHECK-NEXT: vor.vv v8, v9, v8 26; CHECK-NEXT: ret 27; 28; CHECK-ZVBB-LABEL: bitreverse_nxv1i8: 29; CHECK-ZVBB: # %bb.0: 30; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, mf8, ta, ma 31; CHECK-ZVBB-NEXT: vbrev.v v8, v8 32; CHECK-ZVBB-NEXT: ret 33 %a = call <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8> %va) 34 ret <vscale x 1 x i8> %a 35} 36declare <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8>) 37 38define <vscale x 2 x i8> @bitreverse_nxv2i8(<vscale x 2 x i8> %va) { 39; CHECK-LABEL: bitreverse_nxv2i8: 40; CHECK: # %bb.0: 41; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 42; CHECK-NEXT: vsll.vi v9, v8, 4 43; CHECK-NEXT: vsrl.vi v8, v8, 4 44; CHECK-NEXT: li a0, 51 45; CHECK-NEXT: vor.vv v8, v8, v9 46; CHECK-NEXT: vsrl.vi v9, v8, 2 47; CHECK-NEXT: vand.vx v8, v8, a0 48; CHECK-NEXT: vand.vx v9, v9, a0 49; CHECK-NEXT: li a0, 85 50; CHECK-NEXT: vsll.vi v8, v8, 2 51; CHECK-NEXT: vor.vv v8, v9, v8 52; CHECK-NEXT: vsrl.vi v9, v8, 1 53; CHECK-NEXT: vand.vx v8, v8, a0 54; CHECK-NEXT: vand.vx v9, v9, a0 55; CHECK-NEXT: vadd.vv v8, v8, v8 56; CHECK-NEXT: vor.vv v8, v9, v8 57; CHECK-NEXT: ret 58; 59; CHECK-ZVBB-LABEL: bitreverse_nxv2i8: 60; CHECK-ZVBB: # %bb.0: 61; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 62; CHECK-ZVBB-NEXT: vbrev.v v8, v8 63; CHECK-ZVBB-NEXT: ret 64 %a = call <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8> %va) 65 ret <vscale x 2 x i8> %a 66} 67declare <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8>) 68 69define <vscale x 4 x i8> @bitreverse_nxv4i8(<vscale x 4 x i8> %va) { 70; CHECK-LABEL: bitreverse_nxv4i8: 71; CHECK: # %bb.0: 72; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma 73; CHECK-NEXT: vsll.vi v9, v8, 4 74; CHECK-NEXT: vsrl.vi v8, v8, 4 75; CHECK-NEXT: li a0, 51 76; CHECK-NEXT: vor.vv v8, v8, v9 77; CHECK-NEXT: vsrl.vi v9, v8, 2 78; CHECK-NEXT: vand.vx v8, v8, a0 79; CHECK-NEXT: vand.vx v9, v9, a0 80; CHECK-NEXT: li a0, 85 81; CHECK-NEXT: vsll.vi v8, v8, 2 82; CHECK-NEXT: vor.vv v8, v9, v8 83; CHECK-NEXT: vsrl.vi v9, v8, 1 84; CHECK-NEXT: vand.vx v8, v8, a0 85; CHECK-NEXT: vand.vx v9, v9, a0 86; CHECK-NEXT: vadd.vv v8, v8, v8 87; CHECK-NEXT: vor.vv v8, v9, v8 88; CHECK-NEXT: ret 89; 90; CHECK-ZVBB-LABEL: bitreverse_nxv4i8: 91; CHECK-ZVBB: # %bb.0: 92; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma 93; CHECK-ZVBB-NEXT: vbrev.v v8, v8 94; CHECK-ZVBB-NEXT: ret 95 %a = call <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8> %va) 96 ret <vscale x 4 x i8> %a 97} 98declare <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8>) 99 100define <vscale x 8 x i8> @bitreverse_nxv8i8(<vscale x 8 x i8> %va) { 101; CHECK-LABEL: bitreverse_nxv8i8: 102; CHECK: # %bb.0: 103; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma 104; CHECK-NEXT: vsll.vi v9, v8, 4 105; CHECK-NEXT: vsrl.vi v8, v8, 4 106; CHECK-NEXT: li a0, 51 107; CHECK-NEXT: vor.vv v8, v8, v9 108; CHECK-NEXT: vsrl.vi v9, v8, 2 109; CHECK-NEXT: vand.vx v8, v8, a0 110; CHECK-NEXT: vand.vx v9, v9, a0 111; CHECK-NEXT: li a0, 85 112; CHECK-NEXT: vsll.vi v8, v8, 2 113; CHECK-NEXT: vor.vv v8, v9, v8 114; CHECK-NEXT: vsrl.vi v9, v8, 1 115; CHECK-NEXT: vand.vx v8, v8, a0 116; CHECK-NEXT: vand.vx v9, v9, a0 117; CHECK-NEXT: vadd.vv v8, v8, v8 118; CHECK-NEXT: vor.vv v8, v9, v8 119; CHECK-NEXT: ret 120; 121; CHECK-ZVBB-LABEL: bitreverse_nxv8i8: 122; CHECK-ZVBB: # %bb.0: 123; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma 124; CHECK-ZVBB-NEXT: vbrev.v v8, v8 125; CHECK-ZVBB-NEXT: ret 126 %a = call <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8> %va) 127 ret <vscale x 8 x i8> %a 128} 129declare <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8>) 130 131define <vscale x 16 x i8> @bitreverse_nxv16i8(<vscale x 16 x i8> %va) { 132; CHECK-LABEL: bitreverse_nxv16i8: 133; CHECK: # %bb.0: 134; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma 135; CHECK-NEXT: vsll.vi v10, v8, 4 136; CHECK-NEXT: vsrl.vi v8, v8, 4 137; CHECK-NEXT: li a0, 51 138; CHECK-NEXT: vor.vv v8, v8, v10 139; CHECK-NEXT: vsrl.vi v10, v8, 2 140; CHECK-NEXT: vand.vx v8, v8, a0 141; CHECK-NEXT: vand.vx v10, v10, a0 142; CHECK-NEXT: li a0, 85 143; CHECK-NEXT: vsll.vi v8, v8, 2 144; CHECK-NEXT: vor.vv v8, v10, v8 145; CHECK-NEXT: vsrl.vi v10, v8, 1 146; CHECK-NEXT: vand.vx v8, v8, a0 147; CHECK-NEXT: vand.vx v10, v10, a0 148; CHECK-NEXT: vadd.vv v8, v8, v8 149; CHECK-NEXT: vor.vv v8, v10, v8 150; CHECK-NEXT: ret 151; 152; CHECK-ZVBB-LABEL: bitreverse_nxv16i8: 153; CHECK-ZVBB: # %bb.0: 154; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma 155; CHECK-ZVBB-NEXT: vbrev.v v8, v8 156; CHECK-ZVBB-NEXT: ret 157 %a = call <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8> %va) 158 ret <vscale x 16 x i8> %a 159} 160declare <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8>) 161 162define <vscale x 32 x i8> @bitreverse_nxv32i8(<vscale x 32 x i8> %va) { 163; CHECK-LABEL: bitreverse_nxv32i8: 164; CHECK: # %bb.0: 165; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma 166; CHECK-NEXT: vsll.vi v12, v8, 4 167; CHECK-NEXT: vsrl.vi v8, v8, 4 168; CHECK-NEXT: li a0, 51 169; CHECK-NEXT: vor.vv v8, v8, v12 170; CHECK-NEXT: vsrl.vi v12, v8, 2 171; CHECK-NEXT: vand.vx v8, v8, a0 172; CHECK-NEXT: vand.vx v12, v12, a0 173; CHECK-NEXT: li a0, 85 174; CHECK-NEXT: vsll.vi v8, v8, 2 175; CHECK-NEXT: vor.vv v8, v12, v8 176; CHECK-NEXT: vsrl.vi v12, v8, 1 177; CHECK-NEXT: vand.vx v8, v8, a0 178; CHECK-NEXT: vand.vx v12, v12, a0 179; CHECK-NEXT: vadd.vv v8, v8, v8 180; CHECK-NEXT: vor.vv v8, v12, v8 181; CHECK-NEXT: ret 182; 183; CHECK-ZVBB-LABEL: bitreverse_nxv32i8: 184; CHECK-ZVBB: # %bb.0: 185; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m4, ta, ma 186; CHECK-ZVBB-NEXT: vbrev.v v8, v8 187; CHECK-ZVBB-NEXT: ret 188 %a = call <vscale x 32 x i8> @llvm.bitreverse.nxv32i8(<vscale x 32 x i8> %va) 189 ret <vscale x 32 x i8> %a 190} 191declare <vscale x 32 x i8> @llvm.bitreverse.nxv32i8(<vscale x 32 x i8>) 192 193define <vscale x 64 x i8> @bitreverse_nxv64i8(<vscale x 64 x i8> %va) { 194; CHECK-LABEL: bitreverse_nxv64i8: 195; CHECK: # %bb.0: 196; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma 197; CHECK-NEXT: vsll.vi v16, v8, 4 198; CHECK-NEXT: vsrl.vi v8, v8, 4 199; CHECK-NEXT: li a0, 51 200; CHECK-NEXT: vor.vv v8, v8, v16 201; CHECK-NEXT: vsrl.vi v16, v8, 2 202; CHECK-NEXT: vand.vx v8, v8, a0 203; CHECK-NEXT: vand.vx v16, v16, a0 204; CHECK-NEXT: li a0, 85 205; CHECK-NEXT: vsll.vi v8, v8, 2 206; CHECK-NEXT: vor.vv v8, v16, v8 207; CHECK-NEXT: vsrl.vi v16, v8, 1 208; CHECK-NEXT: vand.vx v8, v8, a0 209; CHECK-NEXT: vand.vx v16, v16, a0 210; CHECK-NEXT: vadd.vv v8, v8, v8 211; CHECK-NEXT: vor.vv v8, v16, v8 212; CHECK-NEXT: ret 213; 214; CHECK-ZVBB-LABEL: bitreverse_nxv64i8: 215; CHECK-ZVBB: # %bb.0: 216; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m8, ta, ma 217; CHECK-ZVBB-NEXT: vbrev.v v8, v8 218; CHECK-ZVBB-NEXT: ret 219 %a = call <vscale x 64 x i8> @llvm.bitreverse.nxv64i8(<vscale x 64 x i8> %va) 220 ret <vscale x 64 x i8> %a 221} 222declare <vscale x 64 x i8> @llvm.bitreverse.nxv64i8(<vscale x 64 x i8>) 223 224define <vscale x 1 x i16> @bitreverse_nxv1i16(<vscale x 1 x i16> %va) { 225; CHECK-LABEL: bitreverse_nxv1i16: 226; CHECK: # %bb.0: 227; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 228; CHECK-NEXT: vsrl.vi v9, v8, 8 229; CHECK-NEXT: vsll.vi v8, v8, 8 230; CHECK-NEXT: lui a0, 1 231; CHECK-NEXT: vor.vv v8, v8, v9 232; CHECK-NEXT: addi a0, a0, -241 233; CHECK-NEXT: vsrl.vi v9, v8, 4 234; CHECK-NEXT: vand.vx v8, v8, a0 235; CHECK-NEXT: vand.vx v9, v9, a0 236; CHECK-NEXT: lui a0, 3 237; CHECK-NEXT: addi a0, a0, 819 238; CHECK-NEXT: vsll.vi v8, v8, 4 239; CHECK-NEXT: vor.vv v8, v9, v8 240; CHECK-NEXT: vsrl.vi v9, v8, 2 241; CHECK-NEXT: vand.vx v8, v8, a0 242; CHECK-NEXT: vand.vx v9, v9, a0 243; CHECK-NEXT: lui a0, 5 244; CHECK-NEXT: addi a0, a0, 1365 245; CHECK-NEXT: vsll.vi v8, v8, 2 246; CHECK-NEXT: vor.vv v8, v9, v8 247; CHECK-NEXT: vsrl.vi v9, v8, 1 248; CHECK-NEXT: vand.vx v8, v8, a0 249; CHECK-NEXT: vand.vx v9, v9, a0 250; CHECK-NEXT: vadd.vv v8, v8, v8 251; CHECK-NEXT: vor.vv v8, v9, v8 252; CHECK-NEXT: ret 253; 254; CHECK-ZVBB-LABEL: bitreverse_nxv1i16: 255; CHECK-ZVBB: # %bb.0: 256; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 257; CHECK-ZVBB-NEXT: vbrev.v v8, v8 258; CHECK-ZVBB-NEXT: ret 259 %a = call <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16> %va) 260 ret <vscale x 1 x i16> %a 261} 262declare <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16>) 263 264define <vscale x 2 x i16> @bitreverse_nxv2i16(<vscale x 2 x i16> %va) { 265; CHECK-LABEL: bitreverse_nxv2i16: 266; CHECK: # %bb.0: 267; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 268; CHECK-NEXT: vsrl.vi v9, v8, 8 269; CHECK-NEXT: vsll.vi v8, v8, 8 270; CHECK-NEXT: lui a0, 1 271; CHECK-NEXT: vor.vv v8, v8, v9 272; CHECK-NEXT: addi a0, a0, -241 273; CHECK-NEXT: vsrl.vi v9, v8, 4 274; CHECK-NEXT: vand.vx v8, v8, a0 275; CHECK-NEXT: vand.vx v9, v9, a0 276; CHECK-NEXT: lui a0, 3 277; CHECK-NEXT: addi a0, a0, 819 278; CHECK-NEXT: vsll.vi v8, v8, 4 279; CHECK-NEXT: vor.vv v8, v9, v8 280; CHECK-NEXT: vsrl.vi v9, v8, 2 281; CHECK-NEXT: vand.vx v8, v8, a0 282; CHECK-NEXT: vand.vx v9, v9, a0 283; CHECK-NEXT: lui a0, 5 284; CHECK-NEXT: addi a0, a0, 1365 285; CHECK-NEXT: vsll.vi v8, v8, 2 286; CHECK-NEXT: vor.vv v8, v9, v8 287; CHECK-NEXT: vsrl.vi v9, v8, 1 288; CHECK-NEXT: vand.vx v8, v8, a0 289; CHECK-NEXT: vand.vx v9, v9, a0 290; CHECK-NEXT: vadd.vv v8, v8, v8 291; CHECK-NEXT: vor.vv v8, v9, v8 292; CHECK-NEXT: ret 293; 294; CHECK-ZVBB-LABEL: bitreverse_nxv2i16: 295; CHECK-ZVBB: # %bb.0: 296; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 297; CHECK-ZVBB-NEXT: vbrev.v v8, v8 298; CHECK-ZVBB-NEXT: ret 299 %a = call <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16> %va) 300 ret <vscale x 2 x i16> %a 301} 302declare <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16>) 303 304define <vscale x 4 x i16> @bitreverse_nxv4i16(<vscale x 4 x i16> %va) { 305; CHECK-LABEL: bitreverse_nxv4i16: 306; CHECK: # %bb.0: 307; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 308; CHECK-NEXT: vsrl.vi v9, v8, 8 309; CHECK-NEXT: vsll.vi v8, v8, 8 310; CHECK-NEXT: lui a0, 1 311; CHECK-NEXT: vor.vv v8, v8, v9 312; CHECK-NEXT: addi a0, a0, -241 313; CHECK-NEXT: vsrl.vi v9, v8, 4 314; CHECK-NEXT: vand.vx v8, v8, a0 315; CHECK-NEXT: vand.vx v9, v9, a0 316; CHECK-NEXT: lui a0, 3 317; CHECK-NEXT: addi a0, a0, 819 318; CHECK-NEXT: vsll.vi v8, v8, 4 319; CHECK-NEXT: vor.vv v8, v9, v8 320; CHECK-NEXT: vsrl.vi v9, v8, 2 321; CHECK-NEXT: vand.vx v8, v8, a0 322; CHECK-NEXT: vand.vx v9, v9, a0 323; CHECK-NEXT: lui a0, 5 324; CHECK-NEXT: addi a0, a0, 1365 325; CHECK-NEXT: vsll.vi v8, v8, 2 326; CHECK-NEXT: vor.vv v8, v9, v8 327; CHECK-NEXT: vsrl.vi v9, v8, 1 328; CHECK-NEXT: vand.vx v8, v8, a0 329; CHECK-NEXT: vand.vx v9, v9, a0 330; CHECK-NEXT: vadd.vv v8, v8, v8 331; CHECK-NEXT: vor.vv v8, v9, v8 332; CHECK-NEXT: ret 333; 334; CHECK-ZVBB-LABEL: bitreverse_nxv4i16: 335; CHECK-ZVBB: # %bb.0: 336; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma 337; CHECK-ZVBB-NEXT: vbrev.v v8, v8 338; CHECK-ZVBB-NEXT: ret 339 %a = call <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16> %va) 340 ret <vscale x 4 x i16> %a 341} 342declare <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16>) 343 344define <vscale x 8 x i16> @bitreverse_nxv8i16(<vscale x 8 x i16> %va) { 345; CHECK-LABEL: bitreverse_nxv8i16: 346; CHECK: # %bb.0: 347; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma 348; CHECK-NEXT: vsrl.vi v10, v8, 8 349; CHECK-NEXT: vsll.vi v8, v8, 8 350; CHECK-NEXT: lui a0, 1 351; CHECK-NEXT: vor.vv v8, v8, v10 352; CHECK-NEXT: addi a0, a0, -241 353; CHECK-NEXT: vsrl.vi v10, v8, 4 354; CHECK-NEXT: vand.vx v8, v8, a0 355; CHECK-NEXT: vand.vx v10, v10, a0 356; CHECK-NEXT: lui a0, 3 357; CHECK-NEXT: addi a0, a0, 819 358; CHECK-NEXT: vsll.vi v8, v8, 4 359; CHECK-NEXT: vor.vv v8, v10, v8 360; CHECK-NEXT: vsrl.vi v10, v8, 2 361; CHECK-NEXT: vand.vx v8, v8, a0 362; CHECK-NEXT: vand.vx v10, v10, a0 363; CHECK-NEXT: lui a0, 5 364; CHECK-NEXT: addi a0, a0, 1365 365; CHECK-NEXT: vsll.vi v8, v8, 2 366; CHECK-NEXT: vor.vv v8, v10, v8 367; CHECK-NEXT: vsrl.vi v10, v8, 1 368; CHECK-NEXT: vand.vx v8, v8, a0 369; CHECK-NEXT: vand.vx v10, v10, a0 370; CHECK-NEXT: vadd.vv v8, v8, v8 371; CHECK-NEXT: vor.vv v8, v10, v8 372; CHECK-NEXT: ret 373; 374; CHECK-ZVBB-LABEL: bitreverse_nxv8i16: 375; CHECK-ZVBB: # %bb.0: 376; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m2, ta, ma 377; CHECK-ZVBB-NEXT: vbrev.v v8, v8 378; CHECK-ZVBB-NEXT: ret 379 %a = call <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16> %va) 380 ret <vscale x 8 x i16> %a 381} 382declare <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16>) 383 384define <vscale x 16 x i16> @bitreverse_nxv16i16(<vscale x 16 x i16> %va) { 385; CHECK-LABEL: bitreverse_nxv16i16: 386; CHECK: # %bb.0: 387; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma 388; CHECK-NEXT: vsrl.vi v12, v8, 8 389; CHECK-NEXT: vsll.vi v8, v8, 8 390; CHECK-NEXT: lui a0, 1 391; CHECK-NEXT: vor.vv v8, v8, v12 392; CHECK-NEXT: addi a0, a0, -241 393; CHECK-NEXT: vsrl.vi v12, v8, 4 394; CHECK-NEXT: vand.vx v8, v8, a0 395; CHECK-NEXT: vand.vx v12, v12, a0 396; CHECK-NEXT: lui a0, 3 397; CHECK-NEXT: addi a0, a0, 819 398; CHECK-NEXT: vsll.vi v8, v8, 4 399; CHECK-NEXT: vor.vv v8, v12, v8 400; CHECK-NEXT: vsrl.vi v12, v8, 2 401; CHECK-NEXT: vand.vx v8, v8, a0 402; CHECK-NEXT: vand.vx v12, v12, a0 403; CHECK-NEXT: lui a0, 5 404; CHECK-NEXT: addi a0, a0, 1365 405; CHECK-NEXT: vsll.vi v8, v8, 2 406; CHECK-NEXT: vor.vv v8, v12, v8 407; CHECK-NEXT: vsrl.vi v12, v8, 1 408; CHECK-NEXT: vand.vx v8, v8, a0 409; CHECK-NEXT: vand.vx v12, v12, a0 410; CHECK-NEXT: vadd.vv v8, v8, v8 411; CHECK-NEXT: vor.vv v8, v12, v8 412; CHECK-NEXT: ret 413; 414; CHECK-ZVBB-LABEL: bitreverse_nxv16i16: 415; CHECK-ZVBB: # %bb.0: 416; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m4, ta, ma 417; CHECK-ZVBB-NEXT: vbrev.v v8, v8 418; CHECK-ZVBB-NEXT: ret 419 %a = call <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16> %va) 420 ret <vscale x 16 x i16> %a 421} 422declare <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16>) 423 424define <vscale x 32 x i16> @bitreverse_nxv32i16(<vscale x 32 x i16> %va) { 425; CHECK-LABEL: bitreverse_nxv32i16: 426; CHECK: # %bb.0: 427; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma 428; CHECK-NEXT: vsrl.vi v16, v8, 8 429; CHECK-NEXT: vsll.vi v8, v8, 8 430; CHECK-NEXT: lui a0, 1 431; CHECK-NEXT: vor.vv v8, v8, v16 432; CHECK-NEXT: addi a0, a0, -241 433; CHECK-NEXT: vsrl.vi v16, v8, 4 434; CHECK-NEXT: vand.vx v8, v8, a0 435; CHECK-NEXT: vand.vx v16, v16, a0 436; CHECK-NEXT: lui a0, 3 437; CHECK-NEXT: addi a0, a0, 819 438; CHECK-NEXT: vsll.vi v8, v8, 4 439; CHECK-NEXT: vor.vv v8, v16, v8 440; CHECK-NEXT: vsrl.vi v16, v8, 2 441; CHECK-NEXT: vand.vx v8, v8, a0 442; CHECK-NEXT: vand.vx v16, v16, a0 443; CHECK-NEXT: lui a0, 5 444; CHECK-NEXT: addi a0, a0, 1365 445; CHECK-NEXT: vsll.vi v8, v8, 2 446; CHECK-NEXT: vor.vv v8, v16, v8 447; CHECK-NEXT: vsrl.vi v16, v8, 1 448; CHECK-NEXT: vand.vx v8, v8, a0 449; CHECK-NEXT: vand.vx v16, v16, a0 450; CHECK-NEXT: vadd.vv v8, v8, v8 451; CHECK-NEXT: vor.vv v8, v16, v8 452; CHECK-NEXT: ret 453; 454; CHECK-ZVBB-LABEL: bitreverse_nxv32i16: 455; CHECK-ZVBB: # %bb.0: 456; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m8, ta, ma 457; CHECK-ZVBB-NEXT: vbrev.v v8, v8 458; CHECK-ZVBB-NEXT: ret 459 %a = call <vscale x 32 x i16> @llvm.bitreverse.nxv32i16(<vscale x 32 x i16> %va) 460 ret <vscale x 32 x i16> %a 461} 462declare <vscale x 32 x i16> @llvm.bitreverse.nxv32i16(<vscale x 32 x i16>) 463 464define <vscale x 1 x i32> @bitreverse_nxv1i32(<vscale x 1 x i32> %va) { 465; CHECK-LABEL: bitreverse_nxv1i32: 466; CHECK: # %bb.0: 467; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 468; CHECK-NEXT: vsrl.vi v9, v8, 8 469; CHECK-NEXT: lui a0, 16 470; CHECK-NEXT: vsrl.vi v10, v8, 24 471; CHECK-NEXT: addi a0, a0, -256 472; CHECK-NEXT: vand.vx v9, v9, a0 473; CHECK-NEXT: vor.vv v9, v9, v10 474; CHECK-NEXT: vsll.vi v10, v8, 24 475; CHECK-NEXT: vand.vx v8, v8, a0 476; CHECK-NEXT: lui a0, 61681 477; CHECK-NEXT: addi a0, a0, -241 478; CHECK-NEXT: vsll.vi v8, v8, 8 479; CHECK-NEXT: vor.vv v8, v10, v8 480; CHECK-NEXT: vor.vv v8, v8, v9 481; CHECK-NEXT: vsrl.vi v9, v8, 4 482; CHECK-NEXT: vand.vx v8, v8, a0 483; CHECK-NEXT: vand.vx v9, v9, a0 484; CHECK-NEXT: lui a0, 209715 485; CHECK-NEXT: addi a0, a0, 819 486; CHECK-NEXT: vsll.vi v8, v8, 4 487; CHECK-NEXT: vor.vv v8, v9, v8 488; CHECK-NEXT: vsrl.vi v9, v8, 2 489; CHECK-NEXT: vand.vx v8, v8, a0 490; CHECK-NEXT: vand.vx v9, v9, a0 491; CHECK-NEXT: lui a0, 349525 492; CHECK-NEXT: addi a0, a0, 1365 493; CHECK-NEXT: vsll.vi v8, v8, 2 494; CHECK-NEXT: vor.vv v8, v9, v8 495; CHECK-NEXT: vsrl.vi v9, v8, 1 496; CHECK-NEXT: vand.vx v8, v8, a0 497; CHECK-NEXT: vand.vx v9, v9, a0 498; CHECK-NEXT: vadd.vv v8, v8, v8 499; CHECK-NEXT: vor.vv v8, v9, v8 500; CHECK-NEXT: ret 501; 502; CHECK-ZVBB-LABEL: bitreverse_nxv1i32: 503; CHECK-ZVBB: # %bb.0: 504; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 505; CHECK-ZVBB-NEXT: vbrev.v v8, v8 506; CHECK-ZVBB-NEXT: ret 507 %a = call <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32> %va) 508 ret <vscale x 1 x i32> %a 509} 510declare <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32>) 511 512define <vscale x 2 x i32> @bitreverse_nxv2i32(<vscale x 2 x i32> %va) { 513; CHECK-LABEL: bitreverse_nxv2i32: 514; CHECK: # %bb.0: 515; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 516; CHECK-NEXT: vsrl.vi v9, v8, 8 517; CHECK-NEXT: lui a0, 16 518; CHECK-NEXT: vsrl.vi v10, v8, 24 519; CHECK-NEXT: addi a0, a0, -256 520; CHECK-NEXT: vand.vx v9, v9, a0 521; CHECK-NEXT: vor.vv v9, v9, v10 522; CHECK-NEXT: vsll.vi v10, v8, 24 523; CHECK-NEXT: vand.vx v8, v8, a0 524; CHECK-NEXT: lui a0, 61681 525; CHECK-NEXT: addi a0, a0, -241 526; CHECK-NEXT: vsll.vi v8, v8, 8 527; CHECK-NEXT: vor.vv v8, v10, v8 528; CHECK-NEXT: vor.vv v8, v8, v9 529; CHECK-NEXT: vsrl.vi v9, v8, 4 530; CHECK-NEXT: vand.vx v8, v8, a0 531; CHECK-NEXT: vand.vx v9, v9, a0 532; CHECK-NEXT: lui a0, 209715 533; CHECK-NEXT: addi a0, a0, 819 534; CHECK-NEXT: vsll.vi v8, v8, 4 535; CHECK-NEXT: vor.vv v8, v9, v8 536; CHECK-NEXT: vsrl.vi v9, v8, 2 537; CHECK-NEXT: vand.vx v8, v8, a0 538; CHECK-NEXT: vand.vx v9, v9, a0 539; CHECK-NEXT: lui a0, 349525 540; CHECK-NEXT: addi a0, a0, 1365 541; CHECK-NEXT: vsll.vi v8, v8, 2 542; CHECK-NEXT: vor.vv v8, v9, v8 543; CHECK-NEXT: vsrl.vi v9, v8, 1 544; CHECK-NEXT: vand.vx v8, v8, a0 545; CHECK-NEXT: vand.vx v9, v9, a0 546; CHECK-NEXT: vadd.vv v8, v8, v8 547; CHECK-NEXT: vor.vv v8, v9, v8 548; CHECK-NEXT: ret 549; 550; CHECK-ZVBB-LABEL: bitreverse_nxv2i32: 551; CHECK-ZVBB: # %bb.0: 552; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma 553; CHECK-ZVBB-NEXT: vbrev.v v8, v8 554; CHECK-ZVBB-NEXT: ret 555 %a = call <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32> %va) 556 ret <vscale x 2 x i32> %a 557} 558declare <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32>) 559 560define <vscale x 4 x i32> @bitreverse_nxv4i32(<vscale x 4 x i32> %va) { 561; CHECK-LABEL: bitreverse_nxv4i32: 562; CHECK: # %bb.0: 563; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 564; CHECK-NEXT: vsrl.vi v10, v8, 8 565; CHECK-NEXT: lui a0, 16 566; CHECK-NEXT: vsrl.vi v12, v8, 24 567; CHECK-NEXT: addi a0, a0, -256 568; CHECK-NEXT: vand.vx v10, v10, a0 569; CHECK-NEXT: vor.vv v10, v10, v12 570; CHECK-NEXT: vsll.vi v12, v8, 24 571; CHECK-NEXT: vand.vx v8, v8, a0 572; CHECK-NEXT: lui a0, 61681 573; CHECK-NEXT: addi a0, a0, -241 574; CHECK-NEXT: vsll.vi v8, v8, 8 575; CHECK-NEXT: vor.vv v8, v12, v8 576; CHECK-NEXT: vor.vv v8, v8, v10 577; CHECK-NEXT: vsrl.vi v10, v8, 4 578; CHECK-NEXT: vand.vx v8, v8, a0 579; CHECK-NEXT: vand.vx v10, v10, a0 580; CHECK-NEXT: lui a0, 209715 581; CHECK-NEXT: addi a0, a0, 819 582; CHECK-NEXT: vsll.vi v8, v8, 4 583; CHECK-NEXT: vor.vv v8, v10, v8 584; CHECK-NEXT: vsrl.vi v10, v8, 2 585; CHECK-NEXT: vand.vx v8, v8, a0 586; CHECK-NEXT: vand.vx v10, v10, a0 587; CHECK-NEXT: lui a0, 349525 588; CHECK-NEXT: addi a0, a0, 1365 589; CHECK-NEXT: vsll.vi v8, v8, 2 590; CHECK-NEXT: vor.vv v8, v10, v8 591; CHECK-NEXT: vsrl.vi v10, v8, 1 592; CHECK-NEXT: vand.vx v8, v8, a0 593; CHECK-NEXT: vand.vx v10, v10, a0 594; CHECK-NEXT: vadd.vv v8, v8, v8 595; CHECK-NEXT: vor.vv v8, v10, v8 596; CHECK-NEXT: ret 597; 598; CHECK-ZVBB-LABEL: bitreverse_nxv4i32: 599; CHECK-ZVBB: # %bb.0: 600; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m2, ta, ma 601; CHECK-ZVBB-NEXT: vbrev.v v8, v8 602; CHECK-ZVBB-NEXT: ret 603 %a = call <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32> %va) 604 ret <vscale x 4 x i32> %a 605} 606declare <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32>) 607 608define <vscale x 8 x i32> @bitreverse_nxv8i32(<vscale x 8 x i32> %va) { 609; CHECK-LABEL: bitreverse_nxv8i32: 610; CHECK: # %bb.0: 611; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma 612; CHECK-NEXT: vsrl.vi v12, v8, 8 613; CHECK-NEXT: lui a0, 16 614; CHECK-NEXT: vsrl.vi v16, v8, 24 615; CHECK-NEXT: addi a0, a0, -256 616; CHECK-NEXT: vand.vx v12, v12, a0 617; CHECK-NEXT: vor.vv v12, v12, v16 618; CHECK-NEXT: vsll.vi v16, v8, 24 619; CHECK-NEXT: vand.vx v8, v8, a0 620; CHECK-NEXT: lui a0, 61681 621; CHECK-NEXT: addi a0, a0, -241 622; CHECK-NEXT: vsll.vi v8, v8, 8 623; CHECK-NEXT: vor.vv v8, v16, v8 624; CHECK-NEXT: vor.vv v8, v8, v12 625; CHECK-NEXT: vsrl.vi v12, v8, 4 626; CHECK-NEXT: vand.vx v8, v8, a0 627; CHECK-NEXT: vand.vx v12, v12, a0 628; CHECK-NEXT: lui a0, 209715 629; CHECK-NEXT: addi a0, a0, 819 630; CHECK-NEXT: vsll.vi v8, v8, 4 631; CHECK-NEXT: vor.vv v8, v12, v8 632; CHECK-NEXT: vsrl.vi v12, v8, 2 633; CHECK-NEXT: vand.vx v8, v8, a0 634; CHECK-NEXT: vand.vx v12, v12, a0 635; CHECK-NEXT: lui a0, 349525 636; CHECK-NEXT: addi a0, a0, 1365 637; CHECK-NEXT: vsll.vi v8, v8, 2 638; CHECK-NEXT: vor.vv v8, v12, v8 639; CHECK-NEXT: vsrl.vi v12, v8, 1 640; CHECK-NEXT: vand.vx v8, v8, a0 641; CHECK-NEXT: vand.vx v12, v12, a0 642; CHECK-NEXT: vadd.vv v8, v8, v8 643; CHECK-NEXT: vor.vv v8, v12, v8 644; CHECK-NEXT: ret 645; 646; CHECK-ZVBB-LABEL: bitreverse_nxv8i32: 647; CHECK-ZVBB: # %bb.0: 648; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m4, ta, ma 649; CHECK-ZVBB-NEXT: vbrev.v v8, v8 650; CHECK-ZVBB-NEXT: ret 651 %a = call <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32> %va) 652 ret <vscale x 8 x i32> %a 653} 654declare <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32>) 655 656define <vscale x 16 x i32> @bitreverse_nxv16i32(<vscale x 16 x i32> %va) { 657; CHECK-LABEL: bitreverse_nxv16i32: 658; CHECK: # %bb.0: 659; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma 660; CHECK-NEXT: vsrl.vi v16, v8, 8 661; CHECK-NEXT: lui a0, 16 662; CHECK-NEXT: vsrl.vi v24, v8, 24 663; CHECK-NEXT: addi a0, a0, -256 664; CHECK-NEXT: vand.vx v16, v16, a0 665; CHECK-NEXT: vor.vv v16, v16, v24 666; CHECK-NEXT: vsll.vi v24, v8, 24 667; CHECK-NEXT: vand.vx v8, v8, a0 668; CHECK-NEXT: lui a0, 61681 669; CHECK-NEXT: addi a0, a0, -241 670; CHECK-NEXT: vsll.vi v8, v8, 8 671; CHECK-NEXT: vor.vv v8, v24, v8 672; CHECK-NEXT: vor.vv v8, v8, v16 673; CHECK-NEXT: vsrl.vi v16, v8, 4 674; CHECK-NEXT: vand.vx v8, v8, a0 675; CHECK-NEXT: vand.vx v16, v16, a0 676; CHECK-NEXT: lui a0, 209715 677; CHECK-NEXT: addi a0, a0, 819 678; CHECK-NEXT: vsll.vi v8, v8, 4 679; CHECK-NEXT: vor.vv v8, v16, v8 680; CHECK-NEXT: vsrl.vi v16, v8, 2 681; CHECK-NEXT: vand.vx v8, v8, a0 682; CHECK-NEXT: vand.vx v16, v16, a0 683; CHECK-NEXT: lui a0, 349525 684; CHECK-NEXT: addi a0, a0, 1365 685; CHECK-NEXT: vsll.vi v8, v8, 2 686; CHECK-NEXT: vor.vv v8, v16, v8 687; CHECK-NEXT: vsrl.vi v16, v8, 1 688; CHECK-NEXT: vand.vx v8, v8, a0 689; CHECK-NEXT: vand.vx v16, v16, a0 690; CHECK-NEXT: vadd.vv v8, v8, v8 691; CHECK-NEXT: vor.vv v8, v16, v8 692; CHECK-NEXT: ret 693; 694; CHECK-ZVBB-LABEL: bitreverse_nxv16i32: 695; CHECK-ZVBB: # %bb.0: 696; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m8, ta, ma 697; CHECK-ZVBB-NEXT: vbrev.v v8, v8 698; CHECK-ZVBB-NEXT: ret 699 %a = call <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32> %va) 700 ret <vscale x 16 x i32> %a 701} 702declare <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32>) 703 704define <vscale x 1 x i64> @bitreverse_nxv1i64(<vscale x 1 x i64> %va) { 705; RV32-LABEL: bitreverse_nxv1i64: 706; RV32: # %bb.0: 707; RV32-NEXT: addi sp, sp, -16 708; RV32-NEXT: .cfi_def_cfa_offset 16 709; RV32-NEXT: lui a0, 1044480 710; RV32-NEXT: li a1, 56 711; RV32-NEXT: li a2, 40 712; RV32-NEXT: lui a3, 16 713; RV32-NEXT: vsetvli a4, zero, e64, m1, ta, ma 714; RV32-NEXT: vsrl.vi v9, v8, 24 715; RV32-NEXT: lui a4, 4080 716; RV32-NEXT: addi a5, sp, 8 717; RV32-NEXT: sw a0, 8(sp) 718; RV32-NEXT: sw zero, 12(sp) 719; RV32-NEXT: vsrl.vx v10, v8, a1 720; RV32-NEXT: vsrl.vx v11, v8, a2 721; RV32-NEXT: addi a0, a3, -256 722; RV32-NEXT: vsll.vx v12, v8, a1 723; RV32-NEXT: vand.vx v11, v11, a0 724; RV32-NEXT: vlse64.v v13, (a5), zero 725; RV32-NEXT: vor.vv v10, v11, v10 726; RV32-NEXT: vand.vx v11, v8, a0 727; RV32-NEXT: vsll.vx v11, v11, a2 728; RV32-NEXT: vor.vv v11, v12, v11 729; RV32-NEXT: vsrl.vi v12, v8, 8 730; RV32-NEXT: vand.vx v9, v9, a4 731; RV32-NEXT: vand.vv v12, v12, v13 732; RV32-NEXT: vor.vv v9, v12, v9 733; RV32-NEXT: lui a0, 61681 734; RV32-NEXT: lui a1, 209715 735; RV32-NEXT: lui a2, 349525 736; RV32-NEXT: vand.vv v12, v8, v13 737; RV32-NEXT: vand.vx v8, v8, a4 738; RV32-NEXT: addi a0, a0, -241 739; RV32-NEXT: addi a1, a1, 819 740; RV32-NEXT: addi a2, a2, 1365 741; RV32-NEXT: vsll.vi v8, v8, 24 742; RV32-NEXT: vor.vv v9, v9, v10 743; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma 744; RV32-NEXT: vmv.v.x v10, a0 745; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma 746; RV32-NEXT: vsll.vi v12, v12, 8 747; RV32-NEXT: vor.vv v8, v8, v12 748; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma 749; RV32-NEXT: vmv.v.x v12, a1 750; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma 751; RV32-NEXT: vor.vv v8, v11, v8 752; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma 753; RV32-NEXT: vmv.v.x v11, a2 754; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma 755; RV32-NEXT: vor.vv v8, v8, v9 756; RV32-NEXT: vsrl.vi v9, v8, 4 757; RV32-NEXT: vand.vv v8, v8, v10 758; RV32-NEXT: vand.vv v9, v9, v10 759; RV32-NEXT: vsll.vi v8, v8, 4 760; RV32-NEXT: vor.vv v8, v9, v8 761; RV32-NEXT: vsrl.vi v9, v8, 2 762; RV32-NEXT: vand.vv v8, v8, v12 763; RV32-NEXT: vand.vv v9, v9, v12 764; RV32-NEXT: vsll.vi v8, v8, 2 765; RV32-NEXT: vor.vv v8, v9, v8 766; RV32-NEXT: vsrl.vi v9, v8, 1 767; RV32-NEXT: vand.vv v8, v8, v11 768; RV32-NEXT: vand.vv v9, v9, v11 769; RV32-NEXT: vadd.vv v8, v8, v8 770; RV32-NEXT: vor.vv v8, v9, v8 771; RV32-NEXT: addi sp, sp, 16 772; RV32-NEXT: .cfi_def_cfa_offset 0 773; RV32-NEXT: ret 774; 775; RV64-LABEL: bitreverse_nxv1i64: 776; RV64: # %bb.0: 777; RV64-NEXT: li a1, 56 778; RV64-NEXT: li a0, 40 779; RV64-NEXT: lui a2, 16 780; RV64-NEXT: vsetvli a3, zero, e64, m1, ta, ma 781; RV64-NEXT: vsrl.vi v9, v8, 24 782; RV64-NEXT: lui a3, 4080 783; RV64-NEXT: vsrl.vx v10, v8, a1 784; RV64-NEXT: vsrl.vx v11, v8, a0 785; RV64-NEXT: addiw a2, a2, -256 786; RV64-NEXT: vand.vx v11, v11, a2 787; RV64-NEXT: vor.vv v10, v11, v10 788; RV64-NEXT: vsrl.vi v11, v8, 8 789; RV64-NEXT: li a4, 255 790; RV64-NEXT: vand.vx v9, v9, a3 791; RV64-NEXT: slli a4, a4, 24 792; RV64-NEXT: vand.vx v11, v11, a4 793; RV64-NEXT: vor.vv v9, v11, v9 794; RV64-NEXT: vand.vx v11, v8, a3 795; RV64-NEXT: lui a3, 61681 796; RV64-NEXT: vor.vv v9, v9, v10 797; RV64-NEXT: vand.vx v10, v8, a4 798; RV64-NEXT: lui a4, 209715 799; RV64-NEXT: vsll.vi v11, v11, 24 800; RV64-NEXT: vsll.vi v10, v10, 8 801; RV64-NEXT: vor.vv v10, v11, v10 802; RV64-NEXT: vsll.vx v11, v8, a1 803; RV64-NEXT: lui a1, 349525 804; RV64-NEXT: addiw a3, a3, -241 805; RV64-NEXT: addiw a4, a4, 819 806; RV64-NEXT: addiw a1, a1, 1365 807; RV64-NEXT: vand.vx v8, v8, a2 808; RV64-NEXT: slli a2, a3, 32 809; RV64-NEXT: vsll.vx v8, v8, a0 810; RV64-NEXT: slli a0, a4, 32 811; RV64-NEXT: add a2, a3, a2 812; RV64-NEXT: slli a3, a1, 32 813; RV64-NEXT: add a0, a4, a0 814; RV64-NEXT: add a1, a1, a3 815; RV64-NEXT: vor.vv v8, v11, v8 816; RV64-NEXT: vor.vv v8, v8, v10 817; RV64-NEXT: vor.vv v8, v8, v9 818; RV64-NEXT: vsrl.vi v9, v8, 4 819; RV64-NEXT: vand.vx v8, v8, a2 820; RV64-NEXT: vand.vx v9, v9, a2 821; RV64-NEXT: vsll.vi v8, v8, 4 822; RV64-NEXT: vor.vv v8, v9, v8 823; RV64-NEXT: vsrl.vi v9, v8, 2 824; RV64-NEXT: vand.vx v8, v8, a0 825; RV64-NEXT: vand.vx v9, v9, a0 826; RV64-NEXT: vsll.vi v8, v8, 2 827; RV64-NEXT: vor.vv v8, v9, v8 828; RV64-NEXT: vsrl.vi v9, v8, 1 829; RV64-NEXT: vand.vx v8, v8, a1 830; RV64-NEXT: vand.vx v9, v9, a1 831; RV64-NEXT: vadd.vv v8, v8, v8 832; RV64-NEXT: vor.vv v8, v9, v8 833; RV64-NEXT: ret 834; 835; CHECK-ZVBB-LABEL: bitreverse_nxv1i64: 836; CHECK-ZVBB: # %bb.0: 837; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m1, ta, ma 838; CHECK-ZVBB-NEXT: vbrev.v v8, v8 839; CHECK-ZVBB-NEXT: ret 840 %a = call <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64> %va) 841 ret <vscale x 1 x i64> %a 842} 843declare <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64>) 844 845define <vscale x 2 x i64> @bitreverse_nxv2i64(<vscale x 2 x i64> %va) { 846; RV32-LABEL: bitreverse_nxv2i64: 847; RV32: # %bb.0: 848; RV32-NEXT: addi sp, sp, -16 849; RV32-NEXT: .cfi_def_cfa_offset 16 850; RV32-NEXT: lui a0, 1044480 851; RV32-NEXT: li a1, 56 852; RV32-NEXT: li a2, 40 853; RV32-NEXT: lui a3, 16 854; RV32-NEXT: vsetvli a4, zero, e64, m2, ta, ma 855; RV32-NEXT: vsrl.vi v16, v8, 24 856; RV32-NEXT: lui a4, 4080 857; RV32-NEXT: addi a5, sp, 8 858; RV32-NEXT: sw a0, 8(sp) 859; RV32-NEXT: sw zero, 12(sp) 860; RV32-NEXT: vsrl.vx v10, v8, a1 861; RV32-NEXT: vsrl.vx v12, v8, a2 862; RV32-NEXT: addi a0, a3, -256 863; RV32-NEXT: vsll.vx v18, v8, a1 864; RV32-NEXT: vand.vx v12, v12, a0 865; RV32-NEXT: vlse64.v v14, (a5), zero 866; RV32-NEXT: vor.vv v12, v12, v10 867; RV32-NEXT: vand.vx v10, v8, a0 868; RV32-NEXT: vsll.vx v10, v10, a2 869; RV32-NEXT: vor.vv v10, v18, v10 870; RV32-NEXT: vsrl.vi v18, v8, 8 871; RV32-NEXT: vand.vx v16, v16, a4 872; RV32-NEXT: vand.vv v18, v18, v14 873; RV32-NEXT: vor.vv v16, v18, v16 874; RV32-NEXT: lui a0, 61681 875; RV32-NEXT: lui a1, 209715 876; RV32-NEXT: lui a2, 349525 877; RV32-NEXT: vand.vv v14, v8, v14 878; RV32-NEXT: vand.vx v8, v8, a4 879; RV32-NEXT: addi a0, a0, -241 880; RV32-NEXT: addi a1, a1, 819 881; RV32-NEXT: addi a2, a2, 1365 882; RV32-NEXT: vsll.vi v8, v8, 24 883; RV32-NEXT: vor.vv v12, v16, v12 884; RV32-NEXT: vsetvli a3, zero, e32, m2, ta, ma 885; RV32-NEXT: vmv.v.x v16, a0 886; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma 887; RV32-NEXT: vsll.vi v14, v14, 8 888; RV32-NEXT: vor.vv v8, v8, v14 889; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 890; RV32-NEXT: vmv.v.x v14, a1 891; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma 892; RV32-NEXT: vor.vv v8, v10, v8 893; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 894; RV32-NEXT: vmv.v.x v10, a2 895; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma 896; RV32-NEXT: vor.vv v8, v8, v12 897; RV32-NEXT: vsrl.vi v12, v8, 4 898; RV32-NEXT: vand.vv v8, v8, v16 899; RV32-NEXT: vand.vv v12, v12, v16 900; RV32-NEXT: vsll.vi v8, v8, 4 901; RV32-NEXT: vor.vv v8, v12, v8 902; RV32-NEXT: vsrl.vi v12, v8, 2 903; RV32-NEXT: vand.vv v8, v8, v14 904; RV32-NEXT: vand.vv v12, v12, v14 905; RV32-NEXT: vsll.vi v8, v8, 2 906; RV32-NEXT: vor.vv v8, v12, v8 907; RV32-NEXT: vsrl.vi v12, v8, 1 908; RV32-NEXT: vand.vv v8, v8, v10 909; RV32-NEXT: vand.vv v10, v12, v10 910; RV32-NEXT: vadd.vv v8, v8, v8 911; RV32-NEXT: vor.vv v8, v10, v8 912; RV32-NEXT: addi sp, sp, 16 913; RV32-NEXT: .cfi_def_cfa_offset 0 914; RV32-NEXT: ret 915; 916; RV64-LABEL: bitreverse_nxv2i64: 917; RV64: # %bb.0: 918; RV64-NEXT: li a1, 56 919; RV64-NEXT: li a0, 40 920; RV64-NEXT: lui a2, 16 921; RV64-NEXT: vsetvli a3, zero, e64, m2, ta, ma 922; RV64-NEXT: vsrl.vi v10, v8, 24 923; RV64-NEXT: lui a3, 4080 924; RV64-NEXT: vsrl.vx v12, v8, a1 925; RV64-NEXT: vsrl.vx v14, v8, a0 926; RV64-NEXT: addiw a2, a2, -256 927; RV64-NEXT: vand.vx v14, v14, a2 928; RV64-NEXT: vor.vv v12, v14, v12 929; RV64-NEXT: vsrl.vi v14, v8, 8 930; RV64-NEXT: li a4, 255 931; RV64-NEXT: vand.vx v10, v10, a3 932; RV64-NEXT: slli a4, a4, 24 933; RV64-NEXT: vand.vx v14, v14, a4 934; RV64-NEXT: vor.vv v10, v14, v10 935; RV64-NEXT: vand.vx v14, v8, a3 936; RV64-NEXT: lui a3, 61681 937; RV64-NEXT: vor.vv v10, v10, v12 938; RV64-NEXT: vand.vx v12, v8, a4 939; RV64-NEXT: lui a4, 209715 940; RV64-NEXT: vsll.vi v14, v14, 24 941; RV64-NEXT: vsll.vi v12, v12, 8 942; RV64-NEXT: vor.vv v12, v14, v12 943; RV64-NEXT: vsll.vx v14, v8, a1 944; RV64-NEXT: lui a1, 349525 945; RV64-NEXT: addiw a3, a3, -241 946; RV64-NEXT: addiw a4, a4, 819 947; RV64-NEXT: addiw a1, a1, 1365 948; RV64-NEXT: vand.vx v8, v8, a2 949; RV64-NEXT: slli a2, a3, 32 950; RV64-NEXT: vsll.vx v8, v8, a0 951; RV64-NEXT: slli a0, a4, 32 952; RV64-NEXT: add a2, a3, a2 953; RV64-NEXT: slli a3, a1, 32 954; RV64-NEXT: add a0, a4, a0 955; RV64-NEXT: add a1, a1, a3 956; RV64-NEXT: vor.vv v8, v14, v8 957; RV64-NEXT: vor.vv v8, v8, v12 958; RV64-NEXT: vor.vv v8, v8, v10 959; RV64-NEXT: vsrl.vi v10, v8, 4 960; RV64-NEXT: vand.vx v8, v8, a2 961; RV64-NEXT: vand.vx v10, v10, a2 962; RV64-NEXT: vsll.vi v8, v8, 4 963; RV64-NEXT: vor.vv v8, v10, v8 964; RV64-NEXT: vsrl.vi v10, v8, 2 965; RV64-NEXT: vand.vx v8, v8, a0 966; RV64-NEXT: vand.vx v10, v10, a0 967; RV64-NEXT: vsll.vi v8, v8, 2 968; RV64-NEXT: vor.vv v8, v10, v8 969; RV64-NEXT: vsrl.vi v10, v8, 1 970; RV64-NEXT: vand.vx v8, v8, a1 971; RV64-NEXT: vand.vx v10, v10, a1 972; RV64-NEXT: vadd.vv v8, v8, v8 973; RV64-NEXT: vor.vv v8, v10, v8 974; RV64-NEXT: ret 975; 976; CHECK-ZVBB-LABEL: bitreverse_nxv2i64: 977; CHECK-ZVBB: # %bb.0: 978; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma 979; CHECK-ZVBB-NEXT: vbrev.v v8, v8 980; CHECK-ZVBB-NEXT: ret 981 %a = call <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64> %va) 982 ret <vscale x 2 x i64> %a 983} 984declare <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64>) 985 986define <vscale x 4 x i64> @bitreverse_nxv4i64(<vscale x 4 x i64> %va) { 987; RV32-LABEL: bitreverse_nxv4i64: 988; RV32: # %bb.0: 989; RV32-NEXT: addi sp, sp, -16 990; RV32-NEXT: .cfi_def_cfa_offset 16 991; RV32-NEXT: lui a0, 1044480 992; RV32-NEXT: li a1, 56 993; RV32-NEXT: li a2, 40 994; RV32-NEXT: lui a3, 16 995; RV32-NEXT: vsetvli a4, zero, e64, m4, ta, ma 996; RV32-NEXT: vsrl.vi v24, v8, 24 997; RV32-NEXT: lui a4, 4080 998; RV32-NEXT: addi a5, sp, 8 999; RV32-NEXT: sw a0, 8(sp) 1000; RV32-NEXT: sw zero, 12(sp) 1001; RV32-NEXT: vsrl.vx v12, v8, a1 1002; RV32-NEXT: vsrl.vx v16, v8, a2 1003; RV32-NEXT: addi a0, a3, -256 1004; RV32-NEXT: vsll.vx v28, v8, a1 1005; RV32-NEXT: vand.vx v16, v16, a0 1006; RV32-NEXT: vlse64.v v20, (a5), zero 1007; RV32-NEXT: vor.vv v16, v16, v12 1008; RV32-NEXT: vand.vx v12, v8, a0 1009; RV32-NEXT: vsll.vx v12, v12, a2 1010; RV32-NEXT: vor.vv v12, v28, v12 1011; RV32-NEXT: vsrl.vi v28, v8, 8 1012; RV32-NEXT: vand.vx v24, v24, a4 1013; RV32-NEXT: vand.vv v28, v28, v20 1014; RV32-NEXT: vor.vv v24, v28, v24 1015; RV32-NEXT: lui a0, 61681 1016; RV32-NEXT: lui a1, 209715 1017; RV32-NEXT: lui a2, 349525 1018; RV32-NEXT: vand.vv v20, v8, v20 1019; RV32-NEXT: vand.vx v8, v8, a4 1020; RV32-NEXT: addi a0, a0, -241 1021; RV32-NEXT: addi a1, a1, 819 1022; RV32-NEXT: addi a2, a2, 1365 1023; RV32-NEXT: vsll.vi v8, v8, 24 1024; RV32-NEXT: vor.vv v16, v24, v16 1025; RV32-NEXT: vsetvli a3, zero, e32, m4, ta, ma 1026; RV32-NEXT: vmv.v.x v24, a0 1027; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1028; RV32-NEXT: vsll.vi v20, v20, 8 1029; RV32-NEXT: vor.vv v8, v8, v20 1030; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma 1031; RV32-NEXT: vmv.v.x v20, a1 1032; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1033; RV32-NEXT: vor.vv v8, v12, v8 1034; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma 1035; RV32-NEXT: vmv.v.x v12, a2 1036; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1037; RV32-NEXT: vor.vv v8, v8, v16 1038; RV32-NEXT: vsrl.vi v16, v8, 4 1039; RV32-NEXT: vand.vv v8, v8, v24 1040; RV32-NEXT: vand.vv v16, v16, v24 1041; RV32-NEXT: vsll.vi v8, v8, 4 1042; RV32-NEXT: vor.vv v8, v16, v8 1043; RV32-NEXT: vsrl.vi v16, v8, 2 1044; RV32-NEXT: vand.vv v8, v8, v20 1045; RV32-NEXT: vand.vv v16, v16, v20 1046; RV32-NEXT: vsll.vi v8, v8, 2 1047; RV32-NEXT: vor.vv v8, v16, v8 1048; RV32-NEXT: vsrl.vi v16, v8, 1 1049; RV32-NEXT: vand.vv v8, v8, v12 1050; RV32-NEXT: vand.vv v12, v16, v12 1051; RV32-NEXT: vadd.vv v8, v8, v8 1052; RV32-NEXT: vor.vv v8, v12, v8 1053; RV32-NEXT: addi sp, sp, 16 1054; RV32-NEXT: .cfi_def_cfa_offset 0 1055; RV32-NEXT: ret 1056; 1057; RV64-LABEL: bitreverse_nxv4i64: 1058; RV64: # %bb.0: 1059; RV64-NEXT: li a1, 56 1060; RV64-NEXT: li a0, 40 1061; RV64-NEXT: lui a2, 16 1062; RV64-NEXT: vsetvli a3, zero, e64, m4, ta, ma 1063; RV64-NEXT: vsrl.vi v16, v8, 24 1064; RV64-NEXT: lui a3, 4080 1065; RV64-NEXT: vsrl.vx v12, v8, a1 1066; RV64-NEXT: vsrl.vx v20, v8, a0 1067; RV64-NEXT: addiw a2, a2, -256 1068; RV64-NEXT: vand.vx v20, v20, a2 1069; RV64-NEXT: vor.vv v12, v20, v12 1070; RV64-NEXT: vsrl.vi v20, v8, 8 1071; RV64-NEXT: li a4, 255 1072; RV64-NEXT: vand.vx v16, v16, a3 1073; RV64-NEXT: slli a4, a4, 24 1074; RV64-NEXT: vand.vx v20, v20, a4 1075; RV64-NEXT: vor.vv v20, v20, v16 1076; RV64-NEXT: vand.vx v16, v8, a3 1077; RV64-NEXT: lui a3, 61681 1078; RV64-NEXT: vor.vv v12, v20, v12 1079; RV64-NEXT: vand.vx v20, v8, a4 1080; RV64-NEXT: lui a4, 209715 1081; RV64-NEXT: vsll.vi v16, v16, 24 1082; RV64-NEXT: vsll.vi v20, v20, 8 1083; RV64-NEXT: vor.vv v16, v16, v20 1084; RV64-NEXT: vsll.vx v20, v8, a1 1085; RV64-NEXT: lui a1, 349525 1086; RV64-NEXT: addiw a3, a3, -241 1087; RV64-NEXT: addiw a4, a4, 819 1088; RV64-NEXT: addiw a1, a1, 1365 1089; RV64-NEXT: vand.vx v8, v8, a2 1090; RV64-NEXT: slli a2, a3, 32 1091; RV64-NEXT: vsll.vx v8, v8, a0 1092; RV64-NEXT: slli a0, a4, 32 1093; RV64-NEXT: add a2, a3, a2 1094; RV64-NEXT: slli a3, a1, 32 1095; RV64-NEXT: add a0, a4, a0 1096; RV64-NEXT: add a1, a1, a3 1097; RV64-NEXT: vor.vv v8, v20, v8 1098; RV64-NEXT: vor.vv v8, v8, v16 1099; RV64-NEXT: vor.vv v8, v8, v12 1100; RV64-NEXT: vsrl.vi v12, v8, 4 1101; RV64-NEXT: vand.vx v8, v8, a2 1102; RV64-NEXT: vand.vx v12, v12, a2 1103; RV64-NEXT: vsll.vi v8, v8, 4 1104; RV64-NEXT: vor.vv v8, v12, v8 1105; RV64-NEXT: vsrl.vi v12, v8, 2 1106; RV64-NEXT: vand.vx v8, v8, a0 1107; RV64-NEXT: vand.vx v12, v12, a0 1108; RV64-NEXT: vsll.vi v8, v8, 2 1109; RV64-NEXT: vor.vv v8, v12, v8 1110; RV64-NEXT: vsrl.vi v12, v8, 1 1111; RV64-NEXT: vand.vx v8, v8, a1 1112; RV64-NEXT: vand.vx v12, v12, a1 1113; RV64-NEXT: vadd.vv v8, v8, v8 1114; RV64-NEXT: vor.vv v8, v12, v8 1115; RV64-NEXT: ret 1116; 1117; CHECK-ZVBB-LABEL: bitreverse_nxv4i64: 1118; CHECK-ZVBB: # %bb.0: 1119; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1120; CHECK-ZVBB-NEXT: vbrev.v v8, v8 1121; CHECK-ZVBB-NEXT: ret 1122 %a = call <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64> %va) 1123 ret <vscale x 4 x i64> %a 1124} 1125declare <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64>) 1126 1127define <vscale x 8 x i64> @bitreverse_nxv8i64(<vscale x 8 x i64> %va) { 1128; RV32-LABEL: bitreverse_nxv8i64: 1129; RV32: # %bb.0: 1130; RV32-NEXT: addi sp, sp, -16 1131; RV32-NEXT: .cfi_def_cfa_offset 16 1132; RV32-NEXT: csrr a0, vlenb 1133; RV32-NEXT: slli a0, a0, 4 1134; RV32-NEXT: sub sp, sp, a0 1135; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 1136; RV32-NEXT: lui a0, 1044480 1137; RV32-NEXT: li a1, 56 1138; RV32-NEXT: li a2, 40 1139; RV32-NEXT: lui a3, 16 1140; RV32-NEXT: lui a4, 4080 1141; RV32-NEXT: addi a5, sp, 8 1142; RV32-NEXT: sw a0, 8(sp) 1143; RV32-NEXT: sw zero, 12(sp) 1144; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1145; RV32-NEXT: vsrl.vx v16, v8, a1 1146; RV32-NEXT: vsrl.vx v24, v8, a2 1147; RV32-NEXT: addi a0, a3, -256 1148; RV32-NEXT: vsll.vx v0, v8, a1 1149; RV32-NEXT: vand.vx v24, v24, a0 1150; RV32-NEXT: vor.vv v16, v24, v16 1151; RV32-NEXT: addi a1, sp, 16 1152; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 1153; RV32-NEXT: vand.vx v16, v8, a0 1154; RV32-NEXT: vsll.vx v16, v16, a2 1155; RV32-NEXT: vor.vv v16, v0, v16 1156; RV32-NEXT: csrr a0, vlenb 1157; RV32-NEXT: slli a0, a0, 3 1158; RV32-NEXT: add a0, sp, a0 1159; RV32-NEXT: addi a0, a0, 16 1160; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 1161; RV32-NEXT: vlse64.v v0, (a5), zero 1162; RV32-NEXT: vsrl.vi v16, v8, 24 1163; RV32-NEXT: vand.vx v16, v16, a4 1164; RV32-NEXT: vsrl.vi v24, v8, 8 1165; RV32-NEXT: vand.vv v24, v24, v0 1166; RV32-NEXT: vor.vv v16, v24, v16 1167; RV32-NEXT: addi a0, sp, 16 1168; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 1169; RV32-NEXT: vor.vv v24, v16, v24 1170; RV32-NEXT: vand.vv v16, v8, v0 1171; RV32-NEXT: vand.vx v8, v8, a4 1172; RV32-NEXT: vsll.vi v8, v8, 24 1173; RV32-NEXT: vsll.vi v16, v16, 8 1174; RV32-NEXT: vor.vv v8, v8, v16 1175; RV32-NEXT: lui a0, 61681 1176; RV32-NEXT: lui a1, 209715 1177; RV32-NEXT: lui a2, 349525 1178; RV32-NEXT: addi a0, a0, -241 1179; RV32-NEXT: addi a1, a1, 819 1180; RV32-NEXT: addi a2, a2, 1365 1181; RV32-NEXT: csrr a3, vlenb 1182; RV32-NEXT: slli a3, a3, 3 1183; RV32-NEXT: add a3, sp, a3 1184; RV32-NEXT: addi a3, a3, 16 1185; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 1186; RV32-NEXT: vor.vv v8, v16, v8 1187; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma 1188; RV32-NEXT: vmv.v.x v16, a0 1189; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1190; RV32-NEXT: vor.vv v8, v8, v24 1191; RV32-NEXT: vsrl.vi v24, v8, 4 1192; RV32-NEXT: vand.vv v8, v8, v16 1193; RV32-NEXT: vand.vv v16, v24, v16 1194; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma 1195; RV32-NEXT: vmv.v.x v24, a1 1196; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1197; RV32-NEXT: vsll.vi v8, v8, 4 1198; RV32-NEXT: vor.vv v8, v16, v8 1199; RV32-NEXT: vsrl.vi v16, v8, 2 1200; RV32-NEXT: vand.vv v8, v8, v24 1201; RV32-NEXT: vand.vv v16, v16, v24 1202; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma 1203; RV32-NEXT: vmv.v.x v24, a2 1204; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1205; RV32-NEXT: vsll.vi v8, v8, 2 1206; RV32-NEXT: vor.vv v8, v16, v8 1207; RV32-NEXT: vsrl.vi v16, v8, 1 1208; RV32-NEXT: vand.vv v8, v8, v24 1209; RV32-NEXT: vand.vv v16, v16, v24 1210; RV32-NEXT: vadd.vv v8, v8, v8 1211; RV32-NEXT: vor.vv v8, v16, v8 1212; RV32-NEXT: csrr a0, vlenb 1213; RV32-NEXT: slli a0, a0, 4 1214; RV32-NEXT: add sp, sp, a0 1215; RV32-NEXT: .cfi_def_cfa sp, 16 1216; RV32-NEXT: addi sp, sp, 16 1217; RV32-NEXT: .cfi_def_cfa_offset 0 1218; RV32-NEXT: ret 1219; 1220; RV64-LABEL: bitreverse_nxv8i64: 1221; RV64: # %bb.0: 1222; RV64-NEXT: li a1, 56 1223; RV64-NEXT: li a0, 40 1224; RV64-NEXT: lui a2, 16 1225; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma 1226; RV64-NEXT: vsrl.vi v24, v8, 24 1227; RV64-NEXT: lui a3, 4080 1228; RV64-NEXT: vsrl.vx v16, v8, a1 1229; RV64-NEXT: vsrl.vx v0, v8, a0 1230; RV64-NEXT: addiw a2, a2, -256 1231; RV64-NEXT: vand.vx v0, v0, a2 1232; RV64-NEXT: vor.vv v16, v0, v16 1233; RV64-NEXT: vsrl.vi v0, v8, 8 1234; RV64-NEXT: li a4, 255 1235; RV64-NEXT: vand.vx v24, v24, a3 1236; RV64-NEXT: slli a4, a4, 24 1237; RV64-NEXT: vand.vx v0, v0, a4 1238; RV64-NEXT: vor.vv v0, v0, v24 1239; RV64-NEXT: vand.vx v24, v8, a3 1240; RV64-NEXT: lui a3, 61681 1241; RV64-NEXT: vor.vv v16, v0, v16 1242; RV64-NEXT: vand.vx v0, v8, a4 1243; RV64-NEXT: lui a4, 209715 1244; RV64-NEXT: vsll.vi v24, v24, 24 1245; RV64-NEXT: vsll.vi v0, v0, 8 1246; RV64-NEXT: vor.vv v24, v24, v0 1247; RV64-NEXT: vsll.vx v0, v8, a1 1248; RV64-NEXT: lui a1, 349525 1249; RV64-NEXT: addiw a3, a3, -241 1250; RV64-NEXT: addiw a4, a4, 819 1251; RV64-NEXT: addiw a1, a1, 1365 1252; RV64-NEXT: vand.vx v8, v8, a2 1253; RV64-NEXT: slli a2, a3, 32 1254; RV64-NEXT: vsll.vx v8, v8, a0 1255; RV64-NEXT: slli a0, a4, 32 1256; RV64-NEXT: add a2, a3, a2 1257; RV64-NEXT: slli a3, a1, 32 1258; RV64-NEXT: add a0, a4, a0 1259; RV64-NEXT: add a1, a1, a3 1260; RV64-NEXT: vor.vv v8, v0, v8 1261; RV64-NEXT: vor.vv v8, v8, v24 1262; RV64-NEXT: vor.vv v8, v8, v16 1263; RV64-NEXT: vsrl.vi v16, v8, 4 1264; RV64-NEXT: vand.vx v8, v8, a2 1265; RV64-NEXT: vand.vx v16, v16, a2 1266; RV64-NEXT: vsll.vi v8, v8, 4 1267; RV64-NEXT: vor.vv v8, v16, v8 1268; RV64-NEXT: vsrl.vi v16, v8, 2 1269; RV64-NEXT: vand.vx v8, v8, a0 1270; RV64-NEXT: vand.vx v16, v16, a0 1271; RV64-NEXT: vsll.vi v8, v8, 2 1272; RV64-NEXT: vor.vv v8, v16, v8 1273; RV64-NEXT: vsrl.vi v16, v8, 1 1274; RV64-NEXT: vand.vx v8, v8, a1 1275; RV64-NEXT: vand.vx v16, v16, a1 1276; RV64-NEXT: vadd.vv v8, v8, v8 1277; RV64-NEXT: vor.vv v8, v16, v8 1278; RV64-NEXT: ret 1279; 1280; CHECK-ZVBB-LABEL: bitreverse_nxv8i64: 1281; CHECK-ZVBB: # %bb.0: 1282; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1283; CHECK-ZVBB-NEXT: vbrev.v v8, v8 1284; CHECK-ZVBB-NEXT: ret 1285 %a = call <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64> %va) 1286 ret <vscale x 8 x i64> %a 1287} 1288declare <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64>) 1289