1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 4; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 6; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb,+m -target-abi=ilp32d \ 7; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB 8; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb,+m -target-abi=lp64d \ 9; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB 10 11declare <vscale x 1 x i8> @llvm.vp.bitreverse.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i1>, i32) 12 13define <vscale x 1 x i8> @vp_bitreverse_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 14; CHECK-LABEL: vp_bitreverse_nxv1i8: 15; CHECK: # %bb.0: 16; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 17; CHECK-NEXT: vand.vi v9, v8, 15, v0.t 18; CHECK-NEXT: li a0, 51 19; CHECK-NEXT: vsll.vi v9, v9, 4, v0.t 20; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t 21; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 22; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 23; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 24; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 25; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 26; CHECK-NEXT: li a0, 85 27; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 28; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 29; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 30; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 31; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 32; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 33; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 34; CHECK-NEXT: ret 35; 36; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i8: 37; CHECK-ZVBB: # %bb.0: 38; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 39; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 40; CHECK-ZVBB-NEXT: ret 41 %v = call <vscale x 1 x i8> @llvm.vp.bitreverse.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl) 42 ret <vscale x 1 x i8> %v 43} 44 45define <vscale x 1 x i8> @vp_bitreverse_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) { 46; CHECK-LABEL: vp_bitreverse_nxv1i8_unmasked: 47; CHECK: # %bb.0: 48; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 49; CHECK-NEXT: vand.vi v9, v8, 15 50; CHECK-NEXT: vsrl.vi v8, v8, 4 51; CHECK-NEXT: li a0, 51 52; CHECK-NEXT: vsll.vi v9, v9, 4 53; CHECK-NEXT: vand.vi v8, v8, 15 54; CHECK-NEXT: vor.vv v8, v8, v9 55; CHECK-NEXT: vsrl.vi v9, v8, 2 56; CHECK-NEXT: vand.vx v8, v8, a0 57; CHECK-NEXT: vand.vx v9, v9, a0 58; CHECK-NEXT: li a0, 85 59; CHECK-NEXT: vsll.vi v8, v8, 2 60; CHECK-NEXT: vor.vv v8, v9, v8 61; CHECK-NEXT: vsrl.vi v9, v8, 1 62; CHECK-NEXT: vand.vx v8, v8, a0 63; CHECK-NEXT: vand.vx v9, v9, a0 64; CHECK-NEXT: vadd.vv v8, v8, v8 65; CHECK-NEXT: vor.vv v8, v9, v8 66; CHECK-NEXT: ret 67; 68; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i8_unmasked: 69; CHECK-ZVBB: # %bb.0: 70; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 71; CHECK-ZVBB-NEXT: vbrev.v v8, v8 72; CHECK-ZVBB-NEXT: ret 73 %v = call <vscale x 1 x i8> @llvm.vp.bitreverse.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 74 ret <vscale x 1 x i8> %v 75} 76 77declare <vscale x 2 x i8> @llvm.vp.bitreverse.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32) 78 79define <vscale x 2 x i8> @vp_bitreverse_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 80; CHECK-LABEL: vp_bitreverse_nxv2i8: 81; CHECK: # %bb.0: 82; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 83; CHECK-NEXT: vand.vi v9, v8, 15, v0.t 84; CHECK-NEXT: li a0, 51 85; CHECK-NEXT: vsll.vi v9, v9, 4, v0.t 86; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t 87; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 88; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 89; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 90; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 91; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 92; CHECK-NEXT: li a0, 85 93; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 94; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 95; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 96; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 97; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 98; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 99; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 100; CHECK-NEXT: ret 101; 102; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i8: 103; CHECK-ZVBB: # %bb.0: 104; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 105; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 106; CHECK-ZVBB-NEXT: ret 107 %v = call <vscale x 2 x i8> @llvm.vp.bitreverse.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl) 108 ret <vscale x 2 x i8> %v 109} 110 111define <vscale x 2 x i8> @vp_bitreverse_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) { 112; CHECK-LABEL: vp_bitreverse_nxv2i8_unmasked: 113; CHECK: # %bb.0: 114; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 115; CHECK-NEXT: vand.vi v9, v8, 15 116; CHECK-NEXT: vsrl.vi v8, v8, 4 117; CHECK-NEXT: li a0, 51 118; CHECK-NEXT: vsll.vi v9, v9, 4 119; CHECK-NEXT: vand.vi v8, v8, 15 120; CHECK-NEXT: vor.vv v8, v8, v9 121; CHECK-NEXT: vsrl.vi v9, v8, 2 122; CHECK-NEXT: vand.vx v8, v8, a0 123; CHECK-NEXT: vand.vx v9, v9, a0 124; CHECK-NEXT: li a0, 85 125; CHECK-NEXT: vsll.vi v8, v8, 2 126; CHECK-NEXT: vor.vv v8, v9, v8 127; CHECK-NEXT: vsrl.vi v9, v8, 1 128; CHECK-NEXT: vand.vx v8, v8, a0 129; CHECK-NEXT: vand.vx v9, v9, a0 130; CHECK-NEXT: vadd.vv v8, v8, v8 131; CHECK-NEXT: vor.vv v8, v9, v8 132; CHECK-NEXT: ret 133; 134; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i8_unmasked: 135; CHECK-ZVBB: # %bb.0: 136; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 137; CHECK-ZVBB-NEXT: vbrev.v v8, v8 138; CHECK-ZVBB-NEXT: ret 139 %v = call <vscale x 2 x i8> @llvm.vp.bitreverse.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 140 ret <vscale x 2 x i8> %v 141} 142 143declare <vscale x 4 x i8> @llvm.vp.bitreverse.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i32) 144 145define <vscale x 4 x i8> @vp_bitreverse_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 146; CHECK-LABEL: vp_bitreverse_nxv4i8: 147; CHECK: # %bb.0: 148; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 149; CHECK-NEXT: vand.vi v9, v8, 15, v0.t 150; CHECK-NEXT: li a0, 51 151; CHECK-NEXT: vsll.vi v9, v9, 4, v0.t 152; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t 153; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 154; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 155; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 156; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 157; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 158; CHECK-NEXT: li a0, 85 159; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 160; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 161; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 162; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 163; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 164; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 165; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 166; CHECK-NEXT: ret 167; 168; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i8: 169; CHECK-ZVBB: # %bb.0: 170; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 171; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 172; CHECK-ZVBB-NEXT: ret 173 %v = call <vscale x 4 x i8> @llvm.vp.bitreverse.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 %evl) 174 ret <vscale x 4 x i8> %v 175} 176 177define <vscale x 4 x i8> @vp_bitreverse_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) { 178; CHECK-LABEL: vp_bitreverse_nxv4i8_unmasked: 179; CHECK: # %bb.0: 180; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 181; CHECK-NEXT: vand.vi v9, v8, 15 182; CHECK-NEXT: vsrl.vi v8, v8, 4 183; CHECK-NEXT: li a0, 51 184; CHECK-NEXT: vsll.vi v9, v9, 4 185; CHECK-NEXT: vand.vi v8, v8, 15 186; CHECK-NEXT: vor.vv v8, v8, v9 187; CHECK-NEXT: vsrl.vi v9, v8, 2 188; CHECK-NEXT: vand.vx v8, v8, a0 189; CHECK-NEXT: vand.vx v9, v9, a0 190; CHECK-NEXT: li a0, 85 191; CHECK-NEXT: vsll.vi v8, v8, 2 192; CHECK-NEXT: vor.vv v8, v9, v8 193; CHECK-NEXT: vsrl.vi v9, v8, 1 194; CHECK-NEXT: vand.vx v8, v8, a0 195; CHECK-NEXT: vand.vx v9, v9, a0 196; CHECK-NEXT: vadd.vv v8, v8, v8 197; CHECK-NEXT: vor.vv v8, v9, v8 198; CHECK-NEXT: ret 199; 200; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i8_unmasked: 201; CHECK-ZVBB: # %bb.0: 202; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 203; CHECK-ZVBB-NEXT: vbrev.v v8, v8 204; CHECK-ZVBB-NEXT: ret 205 %v = call <vscale x 4 x i8> @llvm.vp.bitreverse.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 206 ret <vscale x 4 x i8> %v 207} 208 209declare <vscale x 8 x i8> @llvm.vp.bitreverse.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, i32) 210 211define <vscale x 8 x i8> @vp_bitreverse_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 212; CHECK-LABEL: vp_bitreverse_nxv8i8: 213; CHECK: # %bb.0: 214; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 215; CHECK-NEXT: vand.vi v9, v8, 15, v0.t 216; CHECK-NEXT: li a0, 51 217; CHECK-NEXT: vsll.vi v9, v9, 4, v0.t 218; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t 219; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 220; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 221; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 222; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 223; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 224; CHECK-NEXT: li a0, 85 225; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 226; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 227; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 228; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 229; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 230; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 231; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 232; CHECK-NEXT: ret 233; 234; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i8: 235; CHECK-ZVBB: # %bb.0: 236; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma 237; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 238; CHECK-ZVBB-NEXT: ret 239 %v = call <vscale x 8 x i8> @llvm.vp.bitreverse.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 %evl) 240 ret <vscale x 8 x i8> %v 241} 242 243define <vscale x 8 x i8> @vp_bitreverse_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) { 244; CHECK-LABEL: vp_bitreverse_nxv8i8_unmasked: 245; CHECK: # %bb.0: 246; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 247; CHECK-NEXT: vand.vi v9, v8, 15 248; CHECK-NEXT: vsrl.vi v8, v8, 4 249; CHECK-NEXT: li a0, 51 250; CHECK-NEXT: vsll.vi v9, v9, 4 251; CHECK-NEXT: vand.vi v8, v8, 15 252; CHECK-NEXT: vor.vv v8, v8, v9 253; CHECK-NEXT: vsrl.vi v9, v8, 2 254; CHECK-NEXT: vand.vx v8, v8, a0 255; CHECK-NEXT: vand.vx v9, v9, a0 256; CHECK-NEXT: li a0, 85 257; CHECK-NEXT: vsll.vi v8, v8, 2 258; CHECK-NEXT: vor.vv v8, v9, v8 259; CHECK-NEXT: vsrl.vi v9, v8, 1 260; CHECK-NEXT: vand.vx v8, v8, a0 261; CHECK-NEXT: vand.vx v9, v9, a0 262; CHECK-NEXT: vadd.vv v8, v8, v8 263; CHECK-NEXT: vor.vv v8, v9, v8 264; CHECK-NEXT: ret 265; 266; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i8_unmasked: 267; CHECK-ZVBB: # %bb.0: 268; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma 269; CHECK-ZVBB-NEXT: vbrev.v v8, v8 270; CHECK-ZVBB-NEXT: ret 271 %v = call <vscale x 8 x i8> @llvm.vp.bitreverse.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 272 ret <vscale x 8 x i8> %v 273} 274 275declare <vscale x 16 x i8> @llvm.vp.bitreverse.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i32) 276 277define <vscale x 16 x i8> @vp_bitreverse_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 278; CHECK-LABEL: vp_bitreverse_nxv16i8: 279; CHECK: # %bb.0: 280; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma 281; CHECK-NEXT: vand.vi v10, v8, 15, v0.t 282; CHECK-NEXT: li a0, 51 283; CHECK-NEXT: vsll.vi v10, v10, 4, v0.t 284; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t 285; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 286; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 287; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t 288; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 289; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 290; CHECK-NEXT: li a0, 85 291; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 292; CHECK-NEXT: vor.vv v8, v10, v8, v0.t 293; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 294; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 295; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 296; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 297; CHECK-NEXT: vor.vv v8, v10, v8, v0.t 298; CHECK-NEXT: ret 299; 300; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i8: 301; CHECK-ZVBB: # %bb.0: 302; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma 303; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 304; CHECK-ZVBB-NEXT: ret 305 %v = call <vscale x 16 x i8> @llvm.vp.bitreverse.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 %evl) 306 ret <vscale x 16 x i8> %v 307} 308 309define <vscale x 16 x i8> @vp_bitreverse_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) { 310; CHECK-LABEL: vp_bitreverse_nxv16i8_unmasked: 311; CHECK: # %bb.0: 312; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma 313; CHECK-NEXT: vand.vi v10, v8, 15 314; CHECK-NEXT: vsrl.vi v8, v8, 4 315; CHECK-NEXT: li a0, 51 316; CHECK-NEXT: vsll.vi v10, v10, 4 317; CHECK-NEXT: vand.vi v8, v8, 15 318; CHECK-NEXT: vor.vv v8, v8, v10 319; CHECK-NEXT: vsrl.vi v10, v8, 2 320; CHECK-NEXT: vand.vx v8, v8, a0 321; CHECK-NEXT: vand.vx v10, v10, a0 322; CHECK-NEXT: li a0, 85 323; CHECK-NEXT: vsll.vi v8, v8, 2 324; CHECK-NEXT: vor.vv v8, v10, v8 325; CHECK-NEXT: vsrl.vi v10, v8, 1 326; CHECK-NEXT: vand.vx v8, v8, a0 327; CHECK-NEXT: vand.vx v10, v10, a0 328; CHECK-NEXT: vadd.vv v8, v8, v8 329; CHECK-NEXT: vor.vv v8, v10, v8 330; CHECK-NEXT: ret 331; 332; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i8_unmasked: 333; CHECK-ZVBB: # %bb.0: 334; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma 335; CHECK-ZVBB-NEXT: vbrev.v v8, v8 336; CHECK-ZVBB-NEXT: ret 337 %v = call <vscale x 16 x i8> @llvm.vp.bitreverse.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 338 ret <vscale x 16 x i8> %v 339} 340 341declare <vscale x 32 x i8> @llvm.vp.bitreverse.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i1>, i32) 342 343define <vscale x 32 x i8> @vp_bitreverse_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { 344; CHECK-LABEL: vp_bitreverse_nxv32i8: 345; CHECK: # %bb.0: 346; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma 347; CHECK-NEXT: vand.vi v12, v8, 15, v0.t 348; CHECK-NEXT: li a0, 51 349; CHECK-NEXT: vsll.vi v12, v12, 4, v0.t 350; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t 351; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 352; CHECK-NEXT: vor.vv v8, v8, v12, v0.t 353; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t 354; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 355; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 356; CHECK-NEXT: li a0, 85 357; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 358; CHECK-NEXT: vor.vv v8, v12, v8, v0.t 359; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t 360; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 361; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 362; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 363; CHECK-NEXT: vor.vv v8, v12, v8, v0.t 364; CHECK-NEXT: ret 365; 366; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i8: 367; CHECK-ZVBB: # %bb.0: 368; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma 369; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 370; CHECK-ZVBB-NEXT: ret 371 %v = call <vscale x 32 x i8> @llvm.vp.bitreverse.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 %evl) 372 ret <vscale x 32 x i8> %v 373} 374 375define <vscale x 32 x i8> @vp_bitreverse_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) { 376; CHECK-LABEL: vp_bitreverse_nxv32i8_unmasked: 377; CHECK: # %bb.0: 378; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma 379; CHECK-NEXT: vand.vi v12, v8, 15 380; CHECK-NEXT: vsrl.vi v8, v8, 4 381; CHECK-NEXT: li a0, 51 382; CHECK-NEXT: vsll.vi v12, v12, 4 383; CHECK-NEXT: vand.vi v8, v8, 15 384; CHECK-NEXT: vor.vv v8, v8, v12 385; CHECK-NEXT: vsrl.vi v12, v8, 2 386; CHECK-NEXT: vand.vx v8, v8, a0 387; CHECK-NEXT: vand.vx v12, v12, a0 388; CHECK-NEXT: li a0, 85 389; CHECK-NEXT: vsll.vi v8, v8, 2 390; CHECK-NEXT: vor.vv v8, v12, v8 391; CHECK-NEXT: vsrl.vi v12, v8, 1 392; CHECK-NEXT: vand.vx v8, v8, a0 393; CHECK-NEXT: vand.vx v12, v12, a0 394; CHECK-NEXT: vadd.vv v8, v8, v8 395; CHECK-NEXT: vor.vv v8, v12, v8 396; CHECK-NEXT: ret 397; 398; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i8_unmasked: 399; CHECK-ZVBB: # %bb.0: 400; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma 401; CHECK-ZVBB-NEXT: vbrev.v v8, v8 402; CHECK-ZVBB-NEXT: ret 403 %v = call <vscale x 32 x i8> @llvm.vp.bitreverse.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) 404 ret <vscale x 32 x i8> %v 405} 406 407declare <vscale x 64 x i8> @llvm.vp.bitreverse.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i1>, i32) 408 409define <vscale x 64 x i8> @vp_bitreverse_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) { 410; CHECK-LABEL: vp_bitreverse_nxv64i8: 411; CHECK: # %bb.0: 412; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 413; CHECK-NEXT: vand.vi v16, v8, 15, v0.t 414; CHECK-NEXT: li a0, 51 415; CHECK-NEXT: vsll.vi v16, v16, 4, v0.t 416; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t 417; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 418; CHECK-NEXT: vor.vv v16, v8, v16, v0.t 419; CHECK-NEXT: vsrl.vi v8, v16, 2, v0.t 420; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 421; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 422; CHECK-NEXT: li a0, 85 423; CHECK-NEXT: vsll.vi v16, v16, 2, v0.t 424; CHECK-NEXT: vor.vv v8, v8, v16, v0.t 425; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t 426; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 427; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 428; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 429; CHECK-NEXT: vor.vv v8, v16, v8, v0.t 430; CHECK-NEXT: ret 431; 432; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i8: 433; CHECK-ZVBB: # %bb.0: 434; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma 435; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 436; CHECK-ZVBB-NEXT: ret 437 %v = call <vscale x 64 x i8> @llvm.vp.bitreverse.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 %evl) 438 ret <vscale x 64 x i8> %v 439} 440 441define <vscale x 64 x i8> @vp_bitreverse_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) { 442; CHECK-LABEL: vp_bitreverse_nxv64i8_unmasked: 443; CHECK: # %bb.0: 444; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 445; CHECK-NEXT: vand.vi v16, v8, 15 446; CHECK-NEXT: vsrl.vi v8, v8, 4 447; CHECK-NEXT: li a0, 51 448; CHECK-NEXT: vsll.vi v16, v16, 4 449; CHECK-NEXT: vand.vi v8, v8, 15 450; CHECK-NEXT: vor.vv v8, v8, v16 451; CHECK-NEXT: vsrl.vi v16, v8, 2 452; CHECK-NEXT: vand.vx v8, v8, a0 453; CHECK-NEXT: vand.vx v16, v16, a0 454; CHECK-NEXT: li a0, 85 455; CHECK-NEXT: vsll.vi v8, v8, 2 456; CHECK-NEXT: vor.vv v8, v16, v8 457; CHECK-NEXT: vsrl.vi v16, v8, 1 458; CHECK-NEXT: vand.vx v8, v8, a0 459; CHECK-NEXT: vand.vx v16, v16, a0 460; CHECK-NEXT: vadd.vv v8, v8, v8 461; CHECK-NEXT: vor.vv v8, v16, v8 462; CHECK-NEXT: ret 463; 464; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i8_unmasked: 465; CHECK-ZVBB: # %bb.0: 466; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma 467; CHECK-ZVBB-NEXT: vbrev.v v8, v8 468; CHECK-ZVBB-NEXT: ret 469 %v = call <vscale x 64 x i8> @llvm.vp.bitreverse.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> splat (i1 true), i32 %evl) 470 ret <vscale x 64 x i8> %v 471} 472 473declare <vscale x 1 x i16> @llvm.vp.bitreverse.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32) 474 475define <vscale x 1 x i16> @vp_bitreverse_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 476; CHECK-LABEL: vp_bitreverse_nxv1i16: 477; CHECK: # %bb.0: 478; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 479; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 480; CHECK-NEXT: lui a0, 1 481; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t 482; CHECK-NEXT: addi a0, a0, -241 483; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 484; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 485; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 486; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 487; CHECK-NEXT: lui a0, 3 488; CHECK-NEXT: addi a0, a0, 819 489; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 490; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 491; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 492; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 493; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 494; CHECK-NEXT: lui a0, 5 495; CHECK-NEXT: addi a0, a0, 1365 496; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 497; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 498; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 499; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 500; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 501; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 502; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 503; CHECK-NEXT: ret 504; 505; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i16: 506; CHECK-ZVBB: # %bb.0: 507; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 508; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 509; CHECK-ZVBB-NEXT: ret 510 %v = call <vscale x 1 x i16> @llvm.vp.bitreverse.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 %evl) 511 ret <vscale x 1 x i16> %v 512} 513 514define <vscale x 1 x i16> @vp_bitreverse_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) { 515; CHECK-LABEL: vp_bitreverse_nxv1i16_unmasked: 516; CHECK: # %bb.0: 517; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 518; CHECK-NEXT: vsrl.vi v9, v8, 8 519; CHECK-NEXT: vsll.vi v8, v8, 8 520; CHECK-NEXT: lui a0, 1 521; CHECK-NEXT: vor.vv v8, v8, v9 522; CHECK-NEXT: addi a0, a0, -241 523; CHECK-NEXT: vsrl.vi v9, v8, 4 524; CHECK-NEXT: vand.vx v8, v8, a0 525; CHECK-NEXT: vand.vx v9, v9, a0 526; CHECK-NEXT: lui a0, 3 527; CHECK-NEXT: addi a0, a0, 819 528; CHECK-NEXT: vsll.vi v8, v8, 4 529; CHECK-NEXT: vor.vv v8, v9, v8 530; CHECK-NEXT: vsrl.vi v9, v8, 2 531; CHECK-NEXT: vand.vx v8, v8, a0 532; CHECK-NEXT: vand.vx v9, v9, a0 533; CHECK-NEXT: lui a0, 5 534; CHECK-NEXT: addi a0, a0, 1365 535; CHECK-NEXT: vsll.vi v8, v8, 2 536; CHECK-NEXT: vor.vv v8, v9, v8 537; CHECK-NEXT: vsrl.vi v9, v8, 1 538; CHECK-NEXT: vand.vx v8, v8, a0 539; CHECK-NEXT: vand.vx v9, v9, a0 540; CHECK-NEXT: vadd.vv v8, v8, v8 541; CHECK-NEXT: vor.vv v8, v9, v8 542; CHECK-NEXT: ret 543; 544; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i16_unmasked: 545; CHECK-ZVBB: # %bb.0: 546; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 547; CHECK-ZVBB-NEXT: vbrev.v v8, v8 548; CHECK-ZVBB-NEXT: ret 549 %v = call <vscale x 1 x i16> @llvm.vp.bitreverse.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 550 ret <vscale x 1 x i16> %v 551} 552 553declare <vscale x 2 x i16> @llvm.vp.bitreverse.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32) 554 555define <vscale x 2 x i16> @vp_bitreverse_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 556; CHECK-LABEL: vp_bitreverse_nxv2i16: 557; CHECK: # %bb.0: 558; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 559; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 560; CHECK-NEXT: lui a0, 1 561; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t 562; CHECK-NEXT: addi a0, a0, -241 563; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 564; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 565; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 566; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 567; CHECK-NEXT: lui a0, 3 568; CHECK-NEXT: addi a0, a0, 819 569; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 570; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 571; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 572; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 573; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 574; CHECK-NEXT: lui a0, 5 575; CHECK-NEXT: addi a0, a0, 1365 576; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 577; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 578; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 579; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 580; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 581; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 582; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 583; CHECK-NEXT: ret 584; 585; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i16: 586; CHECK-ZVBB: # %bb.0: 587; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 588; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 589; CHECK-ZVBB-NEXT: ret 590 %v = call <vscale x 2 x i16> @llvm.vp.bitreverse.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl) 591 ret <vscale x 2 x i16> %v 592} 593 594define <vscale x 2 x i16> @vp_bitreverse_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) { 595; CHECK-LABEL: vp_bitreverse_nxv2i16_unmasked: 596; CHECK: # %bb.0: 597; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 598; CHECK-NEXT: vsrl.vi v9, v8, 8 599; CHECK-NEXT: vsll.vi v8, v8, 8 600; CHECK-NEXT: lui a0, 1 601; CHECK-NEXT: vor.vv v8, v8, v9 602; CHECK-NEXT: addi a0, a0, -241 603; CHECK-NEXT: vsrl.vi v9, v8, 4 604; CHECK-NEXT: vand.vx v8, v8, a0 605; CHECK-NEXT: vand.vx v9, v9, a0 606; CHECK-NEXT: lui a0, 3 607; CHECK-NEXT: addi a0, a0, 819 608; CHECK-NEXT: vsll.vi v8, v8, 4 609; CHECK-NEXT: vor.vv v8, v9, v8 610; CHECK-NEXT: vsrl.vi v9, v8, 2 611; CHECK-NEXT: vand.vx v8, v8, a0 612; CHECK-NEXT: vand.vx v9, v9, a0 613; CHECK-NEXT: lui a0, 5 614; CHECK-NEXT: addi a0, a0, 1365 615; CHECK-NEXT: vsll.vi v8, v8, 2 616; CHECK-NEXT: vor.vv v8, v9, v8 617; CHECK-NEXT: vsrl.vi v9, v8, 1 618; CHECK-NEXT: vand.vx v8, v8, a0 619; CHECK-NEXT: vand.vx v9, v9, a0 620; CHECK-NEXT: vadd.vv v8, v8, v8 621; CHECK-NEXT: vor.vv v8, v9, v8 622; CHECK-NEXT: ret 623; 624; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i16_unmasked: 625; CHECK-ZVBB: # %bb.0: 626; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 627; CHECK-ZVBB-NEXT: vbrev.v v8, v8 628; CHECK-ZVBB-NEXT: ret 629 %v = call <vscale x 2 x i16> @llvm.vp.bitreverse.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 630 ret <vscale x 2 x i16> %v 631} 632 633declare <vscale x 4 x i16> @llvm.vp.bitreverse.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32) 634 635define <vscale x 4 x i16> @vp_bitreverse_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 636; CHECK-LABEL: vp_bitreverse_nxv4i16: 637; CHECK: # %bb.0: 638; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 639; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 640; CHECK-NEXT: lui a0, 1 641; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t 642; CHECK-NEXT: addi a0, a0, -241 643; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 644; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 645; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 646; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 647; CHECK-NEXT: lui a0, 3 648; CHECK-NEXT: addi a0, a0, 819 649; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 650; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 651; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 652; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 653; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 654; CHECK-NEXT: lui a0, 5 655; CHECK-NEXT: addi a0, a0, 1365 656; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 657; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 658; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 659; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 660; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 661; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 662; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 663; CHECK-NEXT: ret 664; 665; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i16: 666; CHECK-ZVBB: # %bb.0: 667; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma 668; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 669; CHECK-ZVBB-NEXT: ret 670 %v = call <vscale x 4 x i16> @llvm.vp.bitreverse.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl) 671 ret <vscale x 4 x i16> %v 672} 673 674define <vscale x 4 x i16> @vp_bitreverse_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) { 675; CHECK-LABEL: vp_bitreverse_nxv4i16_unmasked: 676; CHECK: # %bb.0: 677; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 678; CHECK-NEXT: vsrl.vi v9, v8, 8 679; CHECK-NEXT: vsll.vi v8, v8, 8 680; CHECK-NEXT: lui a0, 1 681; CHECK-NEXT: vor.vv v8, v8, v9 682; CHECK-NEXT: addi a0, a0, -241 683; CHECK-NEXT: vsrl.vi v9, v8, 4 684; CHECK-NEXT: vand.vx v8, v8, a0 685; CHECK-NEXT: vand.vx v9, v9, a0 686; CHECK-NEXT: lui a0, 3 687; CHECK-NEXT: addi a0, a0, 819 688; CHECK-NEXT: vsll.vi v8, v8, 4 689; CHECK-NEXT: vor.vv v8, v9, v8 690; CHECK-NEXT: vsrl.vi v9, v8, 2 691; CHECK-NEXT: vand.vx v8, v8, a0 692; CHECK-NEXT: vand.vx v9, v9, a0 693; CHECK-NEXT: lui a0, 5 694; CHECK-NEXT: addi a0, a0, 1365 695; CHECK-NEXT: vsll.vi v8, v8, 2 696; CHECK-NEXT: vor.vv v8, v9, v8 697; CHECK-NEXT: vsrl.vi v9, v8, 1 698; CHECK-NEXT: vand.vx v8, v8, a0 699; CHECK-NEXT: vand.vx v9, v9, a0 700; CHECK-NEXT: vadd.vv v8, v8, v8 701; CHECK-NEXT: vor.vv v8, v9, v8 702; CHECK-NEXT: ret 703; 704; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i16_unmasked: 705; CHECK-ZVBB: # %bb.0: 706; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma 707; CHECK-ZVBB-NEXT: vbrev.v v8, v8 708; CHECK-ZVBB-NEXT: ret 709 %v = call <vscale x 4 x i16> @llvm.vp.bitreverse.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 710 ret <vscale x 4 x i16> %v 711} 712 713declare <vscale x 8 x i16> @llvm.vp.bitreverse.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32) 714 715define <vscale x 8 x i16> @vp_bitreverse_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 716; CHECK-LABEL: vp_bitreverse_nxv8i16: 717; CHECK: # %bb.0: 718; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 719; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t 720; CHECK-NEXT: lui a0, 1 721; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t 722; CHECK-NEXT: addi a0, a0, -241 723; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 724; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 725; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 726; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 727; CHECK-NEXT: lui a0, 3 728; CHECK-NEXT: addi a0, a0, 819 729; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 730; CHECK-NEXT: vor.vv v8, v10, v8, v0.t 731; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t 732; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 733; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 734; CHECK-NEXT: lui a0, 5 735; CHECK-NEXT: addi a0, a0, 1365 736; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 737; CHECK-NEXT: vor.vv v8, v10, v8, v0.t 738; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 739; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 740; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 741; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 742; CHECK-NEXT: vor.vv v8, v10, v8, v0.t 743; CHECK-NEXT: ret 744; 745; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i16: 746; CHECK-ZVBB: # %bb.0: 747; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma 748; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 749; CHECK-ZVBB-NEXT: ret 750 %v = call <vscale x 8 x i16> @llvm.vp.bitreverse.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 %evl) 751 ret <vscale x 8 x i16> %v 752} 753 754define <vscale x 8 x i16> @vp_bitreverse_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) { 755; CHECK-LABEL: vp_bitreverse_nxv8i16_unmasked: 756; CHECK: # %bb.0: 757; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 758; CHECK-NEXT: vsrl.vi v10, v8, 8 759; CHECK-NEXT: vsll.vi v8, v8, 8 760; CHECK-NEXT: lui a0, 1 761; CHECK-NEXT: vor.vv v8, v8, v10 762; CHECK-NEXT: addi a0, a0, -241 763; CHECK-NEXT: vsrl.vi v10, v8, 4 764; CHECK-NEXT: vand.vx v8, v8, a0 765; CHECK-NEXT: vand.vx v10, v10, a0 766; CHECK-NEXT: lui a0, 3 767; CHECK-NEXT: addi a0, a0, 819 768; CHECK-NEXT: vsll.vi v8, v8, 4 769; CHECK-NEXT: vor.vv v8, v10, v8 770; CHECK-NEXT: vsrl.vi v10, v8, 2 771; CHECK-NEXT: vand.vx v8, v8, a0 772; CHECK-NEXT: vand.vx v10, v10, a0 773; CHECK-NEXT: lui a0, 5 774; CHECK-NEXT: addi a0, a0, 1365 775; CHECK-NEXT: vsll.vi v8, v8, 2 776; CHECK-NEXT: vor.vv v8, v10, v8 777; CHECK-NEXT: vsrl.vi v10, v8, 1 778; CHECK-NEXT: vand.vx v8, v8, a0 779; CHECK-NEXT: vand.vx v10, v10, a0 780; CHECK-NEXT: vadd.vv v8, v8, v8 781; CHECK-NEXT: vor.vv v8, v10, v8 782; CHECK-NEXT: ret 783; 784; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i16_unmasked: 785; CHECK-ZVBB: # %bb.0: 786; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma 787; CHECK-ZVBB-NEXT: vbrev.v v8, v8 788; CHECK-ZVBB-NEXT: ret 789 %v = call <vscale x 8 x i16> @llvm.vp.bitreverse.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 790 ret <vscale x 8 x i16> %v 791} 792 793declare <vscale x 16 x i16> @llvm.vp.bitreverse.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32) 794 795define <vscale x 16 x i16> @vp_bitreverse_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 796; CHECK-LABEL: vp_bitreverse_nxv16i16: 797; CHECK: # %bb.0: 798; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 799; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t 800; CHECK-NEXT: lui a0, 1 801; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t 802; CHECK-NEXT: addi a0, a0, -241 803; CHECK-NEXT: vor.vv v8, v8, v12, v0.t 804; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t 805; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 806; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 807; CHECK-NEXT: lui a0, 3 808; CHECK-NEXT: addi a0, a0, 819 809; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 810; CHECK-NEXT: vor.vv v8, v12, v8, v0.t 811; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t 812; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 813; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 814; CHECK-NEXT: lui a0, 5 815; CHECK-NEXT: addi a0, a0, 1365 816; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 817; CHECK-NEXT: vor.vv v8, v12, v8, v0.t 818; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t 819; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 820; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 821; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 822; CHECK-NEXT: vor.vv v8, v12, v8, v0.t 823; CHECK-NEXT: ret 824; 825; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i16: 826; CHECK-ZVBB: # %bb.0: 827; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma 828; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 829; CHECK-ZVBB-NEXT: ret 830 %v = call <vscale x 16 x i16> @llvm.vp.bitreverse.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 %evl) 831 ret <vscale x 16 x i16> %v 832} 833 834define <vscale x 16 x i16> @vp_bitreverse_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) { 835; CHECK-LABEL: vp_bitreverse_nxv16i16_unmasked: 836; CHECK: # %bb.0: 837; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 838; CHECK-NEXT: vsrl.vi v12, v8, 8 839; CHECK-NEXT: vsll.vi v8, v8, 8 840; CHECK-NEXT: lui a0, 1 841; CHECK-NEXT: vor.vv v8, v8, v12 842; CHECK-NEXT: addi a0, a0, -241 843; CHECK-NEXT: vsrl.vi v12, v8, 4 844; CHECK-NEXT: vand.vx v8, v8, a0 845; CHECK-NEXT: vand.vx v12, v12, a0 846; CHECK-NEXT: lui a0, 3 847; CHECK-NEXT: addi a0, a0, 819 848; CHECK-NEXT: vsll.vi v8, v8, 4 849; CHECK-NEXT: vor.vv v8, v12, v8 850; CHECK-NEXT: vsrl.vi v12, v8, 2 851; CHECK-NEXT: vand.vx v8, v8, a0 852; CHECK-NEXT: vand.vx v12, v12, a0 853; CHECK-NEXT: lui a0, 5 854; CHECK-NEXT: addi a0, a0, 1365 855; CHECK-NEXT: vsll.vi v8, v8, 2 856; CHECK-NEXT: vor.vv v8, v12, v8 857; CHECK-NEXT: vsrl.vi v12, v8, 1 858; CHECK-NEXT: vand.vx v8, v8, a0 859; CHECK-NEXT: vand.vx v12, v12, a0 860; CHECK-NEXT: vadd.vv v8, v8, v8 861; CHECK-NEXT: vor.vv v8, v12, v8 862; CHECK-NEXT: ret 863; 864; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i16_unmasked: 865; CHECK-ZVBB: # %bb.0: 866; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma 867; CHECK-ZVBB-NEXT: vbrev.v v8, v8 868; CHECK-ZVBB-NEXT: ret 869 %v = call <vscale x 16 x i16> @llvm.vp.bitreverse.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 870 ret <vscale x 16 x i16> %v 871} 872 873declare <vscale x 32 x i16> @llvm.vp.bitreverse.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i1>, i32) 874 875define <vscale x 32 x i16> @vp_bitreverse_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { 876; CHECK-LABEL: vp_bitreverse_nxv32i16: 877; CHECK: # %bb.0: 878; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 879; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t 880; CHECK-NEXT: lui a0, 1 881; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t 882; CHECK-NEXT: addi a0, a0, -241 883; CHECK-NEXT: vor.vv v16, v8, v16, v0.t 884; CHECK-NEXT: vsrl.vi v8, v16, 4, v0.t 885; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 886; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 887; CHECK-NEXT: lui a0, 3 888; CHECK-NEXT: addi a0, a0, 819 889; CHECK-NEXT: vsll.vi v16, v16, 4, v0.t 890; CHECK-NEXT: vor.vv v16, v8, v16, v0.t 891; CHECK-NEXT: vsrl.vi v8, v16, 2, v0.t 892; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 893; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 894; CHECK-NEXT: lui a0, 5 895; CHECK-NEXT: addi a0, a0, 1365 896; CHECK-NEXT: vsll.vi v16, v16, 2, v0.t 897; CHECK-NEXT: vor.vv v8, v8, v16, v0.t 898; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t 899; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 900; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 901; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 902; CHECK-NEXT: vor.vv v8, v16, v8, v0.t 903; CHECK-NEXT: ret 904; 905; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i16: 906; CHECK-ZVBB: # %bb.0: 907; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma 908; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 909; CHECK-ZVBB-NEXT: ret 910 %v = call <vscale x 32 x i16> @llvm.vp.bitreverse.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 %evl) 911 ret <vscale x 32 x i16> %v 912} 913 914define <vscale x 32 x i16> @vp_bitreverse_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) { 915; CHECK-LABEL: vp_bitreverse_nxv32i16_unmasked: 916; CHECK: # %bb.0: 917; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 918; CHECK-NEXT: vsrl.vi v16, v8, 8 919; CHECK-NEXT: vsll.vi v8, v8, 8 920; CHECK-NEXT: lui a0, 1 921; CHECK-NEXT: vor.vv v8, v8, v16 922; CHECK-NEXT: addi a0, a0, -241 923; CHECK-NEXT: vsrl.vi v16, v8, 4 924; CHECK-NEXT: vand.vx v8, v8, a0 925; CHECK-NEXT: vand.vx v16, v16, a0 926; CHECK-NEXT: lui a0, 3 927; CHECK-NEXT: addi a0, a0, 819 928; CHECK-NEXT: vsll.vi v8, v8, 4 929; CHECK-NEXT: vor.vv v8, v16, v8 930; CHECK-NEXT: vsrl.vi v16, v8, 2 931; CHECK-NEXT: vand.vx v8, v8, a0 932; CHECK-NEXT: vand.vx v16, v16, a0 933; CHECK-NEXT: lui a0, 5 934; CHECK-NEXT: addi a0, a0, 1365 935; CHECK-NEXT: vsll.vi v8, v8, 2 936; CHECK-NEXT: vor.vv v8, v16, v8 937; CHECK-NEXT: vsrl.vi v16, v8, 1 938; CHECK-NEXT: vand.vx v8, v8, a0 939; CHECK-NEXT: vand.vx v16, v16, a0 940; CHECK-NEXT: vadd.vv v8, v8, v8 941; CHECK-NEXT: vor.vv v8, v16, v8 942; CHECK-NEXT: ret 943; 944; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i16_unmasked: 945; CHECK-ZVBB: # %bb.0: 946; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma 947; CHECK-ZVBB-NEXT: vbrev.v v8, v8 948; CHECK-ZVBB-NEXT: ret 949 %v = call <vscale x 32 x i16> @llvm.vp.bitreverse.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) 950 ret <vscale x 32 x i16> %v 951} 952 953declare <vscale x 1 x i32> @llvm.vp.bitreverse.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32) 954 955define <vscale x 1 x i32> @vp_bitreverse_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 956; CHECK-LABEL: vp_bitreverse_nxv1i32: 957; CHECK: # %bb.0: 958; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 959; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 960; CHECK-NEXT: lui a0, 16 961; CHECK-NEXT: addi a0, a0, -256 962; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 963; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t 964; CHECK-NEXT: vor.vv v9, v9, v10, v0.t 965; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 966; CHECK-NEXT: lui a0, 61681 967; CHECK-NEXT: addi a0, a0, -241 968; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t 969; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t 970; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 971; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 972; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 973; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 974; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 975; CHECK-NEXT: lui a0, 209715 976; CHECK-NEXT: addi a0, a0, 819 977; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 978; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 979; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 980; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 981; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 982; CHECK-NEXT: lui a0, 349525 983; CHECK-NEXT: addi a0, a0, 1365 984; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 985; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 986; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 987; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 988; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 989; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 990; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 991; CHECK-NEXT: ret 992; 993; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i32: 994; CHECK-ZVBB: # %bb.0: 995; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 996; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 997; CHECK-ZVBB-NEXT: ret 998 %v = call <vscale x 1 x i32> @llvm.vp.bitreverse.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 %evl) 999 ret <vscale x 1 x i32> %v 1000} 1001 1002define <vscale x 1 x i32> @vp_bitreverse_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) { 1003; CHECK-LABEL: vp_bitreverse_nxv1i32_unmasked: 1004; CHECK: # %bb.0: 1005; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1006; CHECK-NEXT: vsrl.vi v9, v8, 8 1007; CHECK-NEXT: lui a0, 16 1008; CHECK-NEXT: vsrl.vi v10, v8, 24 1009; CHECK-NEXT: addi a0, a0, -256 1010; CHECK-NEXT: vand.vx v9, v9, a0 1011; CHECK-NEXT: vor.vv v9, v9, v10 1012; CHECK-NEXT: vsll.vi v10, v8, 24 1013; CHECK-NEXT: vand.vx v8, v8, a0 1014; CHECK-NEXT: lui a0, 61681 1015; CHECK-NEXT: addi a0, a0, -241 1016; CHECK-NEXT: vsll.vi v8, v8, 8 1017; CHECK-NEXT: vor.vv v8, v10, v8 1018; CHECK-NEXT: vor.vv v8, v8, v9 1019; CHECK-NEXT: vsrl.vi v9, v8, 4 1020; CHECK-NEXT: vand.vx v8, v8, a0 1021; CHECK-NEXT: vand.vx v9, v9, a0 1022; CHECK-NEXT: lui a0, 209715 1023; CHECK-NEXT: addi a0, a0, 819 1024; CHECK-NEXT: vsll.vi v8, v8, 4 1025; CHECK-NEXT: vor.vv v8, v9, v8 1026; CHECK-NEXT: vsrl.vi v9, v8, 2 1027; CHECK-NEXT: vand.vx v8, v8, a0 1028; CHECK-NEXT: vand.vx v9, v9, a0 1029; CHECK-NEXT: lui a0, 349525 1030; CHECK-NEXT: addi a0, a0, 1365 1031; CHECK-NEXT: vsll.vi v8, v8, 2 1032; CHECK-NEXT: vor.vv v8, v9, v8 1033; CHECK-NEXT: vsrl.vi v9, v8, 1 1034; CHECK-NEXT: vand.vx v8, v8, a0 1035; CHECK-NEXT: vand.vx v9, v9, a0 1036; CHECK-NEXT: vadd.vv v8, v8, v8 1037; CHECK-NEXT: vor.vv v8, v9, v8 1038; CHECK-NEXT: ret 1039; 1040; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i32_unmasked: 1041; CHECK-ZVBB: # %bb.0: 1042; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1043; CHECK-ZVBB-NEXT: vbrev.v v8, v8 1044; CHECK-ZVBB-NEXT: ret 1045 %v = call <vscale x 1 x i32> @llvm.vp.bitreverse.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 1046 ret <vscale x 1 x i32> %v 1047} 1048 1049declare <vscale x 2 x i32> @llvm.vp.bitreverse.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32) 1050 1051define <vscale x 2 x i32> @vp_bitreverse_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1052; CHECK-LABEL: vp_bitreverse_nxv2i32: 1053; CHECK: # %bb.0: 1054; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1055; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 1056; CHECK-NEXT: lui a0, 16 1057; CHECK-NEXT: addi a0, a0, -256 1058; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 1059; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t 1060; CHECK-NEXT: vor.vv v9, v9, v10, v0.t 1061; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 1062; CHECK-NEXT: lui a0, 61681 1063; CHECK-NEXT: addi a0, a0, -241 1064; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t 1065; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t 1066; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 1067; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 1068; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 1069; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 1070; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1071; CHECK-NEXT: lui a0, 209715 1072; CHECK-NEXT: addi a0, a0, 819 1073; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 1074; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 1075; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 1076; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 1077; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1078; CHECK-NEXT: lui a0, 349525 1079; CHECK-NEXT: addi a0, a0, 1365 1080; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 1081; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 1082; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 1083; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 1084; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1085; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 1086; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 1087; CHECK-NEXT: ret 1088; 1089; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i32: 1090; CHECK-ZVBB: # %bb.0: 1091; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1092; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 1093; CHECK-ZVBB-NEXT: ret 1094 %v = call <vscale x 2 x i32> @llvm.vp.bitreverse.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl) 1095 ret <vscale x 2 x i32> %v 1096} 1097 1098define <vscale x 2 x i32> @vp_bitreverse_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) { 1099; CHECK-LABEL: vp_bitreverse_nxv2i32_unmasked: 1100; CHECK: # %bb.0: 1101; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1102; CHECK-NEXT: vsrl.vi v9, v8, 8 1103; CHECK-NEXT: lui a0, 16 1104; CHECK-NEXT: vsrl.vi v10, v8, 24 1105; CHECK-NEXT: addi a0, a0, -256 1106; CHECK-NEXT: vand.vx v9, v9, a0 1107; CHECK-NEXT: vor.vv v9, v9, v10 1108; CHECK-NEXT: vsll.vi v10, v8, 24 1109; CHECK-NEXT: vand.vx v8, v8, a0 1110; CHECK-NEXT: lui a0, 61681 1111; CHECK-NEXT: addi a0, a0, -241 1112; CHECK-NEXT: vsll.vi v8, v8, 8 1113; CHECK-NEXT: vor.vv v8, v10, v8 1114; CHECK-NEXT: vor.vv v8, v8, v9 1115; CHECK-NEXT: vsrl.vi v9, v8, 4 1116; CHECK-NEXT: vand.vx v8, v8, a0 1117; CHECK-NEXT: vand.vx v9, v9, a0 1118; CHECK-NEXT: lui a0, 209715 1119; CHECK-NEXT: addi a0, a0, 819 1120; CHECK-NEXT: vsll.vi v8, v8, 4 1121; CHECK-NEXT: vor.vv v8, v9, v8 1122; CHECK-NEXT: vsrl.vi v9, v8, 2 1123; CHECK-NEXT: vand.vx v8, v8, a0 1124; CHECK-NEXT: vand.vx v9, v9, a0 1125; CHECK-NEXT: lui a0, 349525 1126; CHECK-NEXT: addi a0, a0, 1365 1127; CHECK-NEXT: vsll.vi v8, v8, 2 1128; CHECK-NEXT: vor.vv v8, v9, v8 1129; CHECK-NEXT: vsrl.vi v9, v8, 1 1130; CHECK-NEXT: vand.vx v8, v8, a0 1131; CHECK-NEXT: vand.vx v9, v9, a0 1132; CHECK-NEXT: vadd.vv v8, v8, v8 1133; CHECK-NEXT: vor.vv v8, v9, v8 1134; CHECK-NEXT: ret 1135; 1136; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i32_unmasked: 1137; CHECK-ZVBB: # %bb.0: 1138; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1139; CHECK-ZVBB-NEXT: vbrev.v v8, v8 1140; CHECK-ZVBB-NEXT: ret 1141 %v = call <vscale x 2 x i32> @llvm.vp.bitreverse.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 1142 ret <vscale x 2 x i32> %v 1143} 1144 1145declare <vscale x 4 x i32> @llvm.vp.bitreverse.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32) 1146 1147define <vscale x 4 x i32> @vp_bitreverse_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1148; CHECK-LABEL: vp_bitreverse_nxv4i32: 1149; CHECK: # %bb.0: 1150; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1151; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t 1152; CHECK-NEXT: lui a0, 16 1153; CHECK-NEXT: addi a0, a0, -256 1154; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 1155; CHECK-NEXT: vsrl.vi v12, v8, 24, v0.t 1156; CHECK-NEXT: vor.vv v10, v10, v12, v0.t 1157; CHECK-NEXT: vand.vx v12, v8, a0, v0.t 1158; CHECK-NEXT: lui a0, 61681 1159; CHECK-NEXT: addi a0, a0, -241 1160; CHECK-NEXT: vsll.vi v12, v12, 8, v0.t 1161; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t 1162; CHECK-NEXT: vor.vv v8, v8, v12, v0.t 1163; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 1164; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 1165; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 1166; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1167; CHECK-NEXT: lui a0, 209715 1168; CHECK-NEXT: addi a0, a0, 819 1169; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 1170; CHECK-NEXT: vor.vv v8, v10, v8, v0.t 1171; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t 1172; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 1173; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1174; CHECK-NEXT: lui a0, 349525 1175; CHECK-NEXT: addi a0, a0, 1365 1176; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 1177; CHECK-NEXT: vor.vv v8, v10, v8, v0.t 1178; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 1179; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 1180; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1181; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 1182; CHECK-NEXT: vor.vv v8, v10, v8, v0.t 1183; CHECK-NEXT: ret 1184; 1185; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i32: 1186; CHECK-ZVBB: # %bb.0: 1187; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1188; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 1189; CHECK-ZVBB-NEXT: ret 1190 %v = call <vscale x 4 x i32> @llvm.vp.bitreverse.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl) 1191 ret <vscale x 4 x i32> %v 1192} 1193 1194define <vscale x 4 x i32> @vp_bitreverse_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) { 1195; CHECK-LABEL: vp_bitreverse_nxv4i32_unmasked: 1196; CHECK: # %bb.0: 1197; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1198; CHECK-NEXT: vsrl.vi v10, v8, 8 1199; CHECK-NEXT: lui a0, 16 1200; CHECK-NEXT: vsrl.vi v12, v8, 24 1201; CHECK-NEXT: addi a0, a0, -256 1202; CHECK-NEXT: vand.vx v10, v10, a0 1203; CHECK-NEXT: vor.vv v10, v10, v12 1204; CHECK-NEXT: vsll.vi v12, v8, 24 1205; CHECK-NEXT: vand.vx v8, v8, a0 1206; CHECK-NEXT: lui a0, 61681 1207; CHECK-NEXT: addi a0, a0, -241 1208; CHECK-NEXT: vsll.vi v8, v8, 8 1209; CHECK-NEXT: vor.vv v8, v12, v8 1210; CHECK-NEXT: vor.vv v8, v8, v10 1211; CHECK-NEXT: vsrl.vi v10, v8, 4 1212; CHECK-NEXT: vand.vx v8, v8, a0 1213; CHECK-NEXT: vand.vx v10, v10, a0 1214; CHECK-NEXT: lui a0, 209715 1215; CHECK-NEXT: addi a0, a0, 819 1216; CHECK-NEXT: vsll.vi v8, v8, 4 1217; CHECK-NEXT: vor.vv v8, v10, v8 1218; CHECK-NEXT: vsrl.vi v10, v8, 2 1219; CHECK-NEXT: vand.vx v8, v8, a0 1220; CHECK-NEXT: vand.vx v10, v10, a0 1221; CHECK-NEXT: lui a0, 349525 1222; CHECK-NEXT: addi a0, a0, 1365 1223; CHECK-NEXT: vsll.vi v8, v8, 2 1224; CHECK-NEXT: vor.vv v8, v10, v8 1225; CHECK-NEXT: vsrl.vi v10, v8, 1 1226; CHECK-NEXT: vand.vx v8, v8, a0 1227; CHECK-NEXT: vand.vx v10, v10, a0 1228; CHECK-NEXT: vadd.vv v8, v8, v8 1229; CHECK-NEXT: vor.vv v8, v10, v8 1230; CHECK-NEXT: ret 1231; 1232; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i32_unmasked: 1233; CHECK-ZVBB: # %bb.0: 1234; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1235; CHECK-ZVBB-NEXT: vbrev.v v8, v8 1236; CHECK-ZVBB-NEXT: ret 1237 %v = call <vscale x 4 x i32> @llvm.vp.bitreverse.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 1238 ret <vscale x 4 x i32> %v 1239} 1240 1241declare <vscale x 8 x i32> @llvm.vp.bitreverse.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32) 1242 1243define <vscale x 8 x i32> @vp_bitreverse_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1244; CHECK-LABEL: vp_bitreverse_nxv8i32: 1245; CHECK: # %bb.0: 1246; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1247; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t 1248; CHECK-NEXT: lui a0, 16 1249; CHECK-NEXT: addi a0, a0, -256 1250; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 1251; CHECK-NEXT: vsrl.vi v16, v8, 24, v0.t 1252; CHECK-NEXT: vor.vv v12, v12, v16, v0.t 1253; CHECK-NEXT: vand.vx v16, v8, a0, v0.t 1254; CHECK-NEXT: lui a0, 61681 1255; CHECK-NEXT: addi a0, a0, -241 1256; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t 1257; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t 1258; CHECK-NEXT: vor.vv v8, v8, v16, v0.t 1259; CHECK-NEXT: vor.vv v8, v8, v12, v0.t 1260; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t 1261; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 1262; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1263; CHECK-NEXT: lui a0, 209715 1264; CHECK-NEXT: addi a0, a0, 819 1265; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 1266; CHECK-NEXT: vor.vv v8, v12, v8, v0.t 1267; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t 1268; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 1269; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1270; CHECK-NEXT: lui a0, 349525 1271; CHECK-NEXT: addi a0, a0, 1365 1272; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 1273; CHECK-NEXT: vor.vv v8, v12, v8, v0.t 1274; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t 1275; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 1276; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1277; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 1278; CHECK-NEXT: vor.vv v8, v12, v8, v0.t 1279; CHECK-NEXT: ret 1280; 1281; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i32: 1282; CHECK-ZVBB: # %bb.0: 1283; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1284; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 1285; CHECK-ZVBB-NEXT: ret 1286 %v = call <vscale x 8 x i32> @llvm.vp.bitreverse.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl) 1287 ret <vscale x 8 x i32> %v 1288} 1289 1290define <vscale x 8 x i32> @vp_bitreverse_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) { 1291; CHECK-LABEL: vp_bitreverse_nxv8i32_unmasked: 1292; CHECK: # %bb.0: 1293; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1294; CHECK-NEXT: vsrl.vi v12, v8, 8 1295; CHECK-NEXT: lui a0, 16 1296; CHECK-NEXT: vsrl.vi v16, v8, 24 1297; CHECK-NEXT: addi a0, a0, -256 1298; CHECK-NEXT: vand.vx v12, v12, a0 1299; CHECK-NEXT: vor.vv v12, v12, v16 1300; CHECK-NEXT: vsll.vi v16, v8, 24 1301; CHECK-NEXT: vand.vx v8, v8, a0 1302; CHECK-NEXT: lui a0, 61681 1303; CHECK-NEXT: addi a0, a0, -241 1304; CHECK-NEXT: vsll.vi v8, v8, 8 1305; CHECK-NEXT: vor.vv v8, v16, v8 1306; CHECK-NEXT: vor.vv v8, v8, v12 1307; CHECK-NEXT: vsrl.vi v12, v8, 4 1308; CHECK-NEXT: vand.vx v8, v8, a0 1309; CHECK-NEXT: vand.vx v12, v12, a0 1310; CHECK-NEXT: lui a0, 209715 1311; CHECK-NEXT: addi a0, a0, 819 1312; CHECK-NEXT: vsll.vi v8, v8, 4 1313; CHECK-NEXT: vor.vv v8, v12, v8 1314; CHECK-NEXT: vsrl.vi v12, v8, 2 1315; CHECK-NEXT: vand.vx v8, v8, a0 1316; CHECK-NEXT: vand.vx v12, v12, a0 1317; CHECK-NEXT: lui a0, 349525 1318; CHECK-NEXT: addi a0, a0, 1365 1319; CHECK-NEXT: vsll.vi v8, v8, 2 1320; CHECK-NEXT: vor.vv v8, v12, v8 1321; CHECK-NEXT: vsrl.vi v12, v8, 1 1322; CHECK-NEXT: vand.vx v8, v8, a0 1323; CHECK-NEXT: vand.vx v12, v12, a0 1324; CHECK-NEXT: vadd.vv v8, v8, v8 1325; CHECK-NEXT: vor.vv v8, v12, v8 1326; CHECK-NEXT: ret 1327; 1328; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i32_unmasked: 1329; CHECK-ZVBB: # %bb.0: 1330; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1331; CHECK-ZVBB-NEXT: vbrev.v v8, v8 1332; CHECK-ZVBB-NEXT: ret 1333 %v = call <vscale x 8 x i32> @llvm.vp.bitreverse.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 1334 ret <vscale x 8 x i32> %v 1335} 1336 1337declare <vscale x 16 x i32> @llvm.vp.bitreverse.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32) 1338 1339define <vscale x 16 x i32> @vp_bitreverse_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1340; CHECK-LABEL: vp_bitreverse_nxv16i32: 1341; CHECK: # %bb.0: 1342; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1343; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t 1344; CHECK-NEXT: lui a0, 16 1345; CHECK-NEXT: addi a0, a0, -256 1346; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 1347; CHECK-NEXT: vsrl.vi v24, v8, 24, v0.t 1348; CHECK-NEXT: vor.vv v16, v16, v24, v0.t 1349; CHECK-NEXT: vand.vx v24, v8, a0, v0.t 1350; CHECK-NEXT: lui a0, 61681 1351; CHECK-NEXT: addi a0, a0, -241 1352; CHECK-NEXT: vsll.vi v24, v24, 8, v0.t 1353; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t 1354; CHECK-NEXT: vor.vv v8, v8, v24, v0.t 1355; CHECK-NEXT: vor.vv v16, v8, v16, v0.t 1356; CHECK-NEXT: vsrl.vi v8, v16, 4, v0.t 1357; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1358; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 1359; CHECK-NEXT: lui a0, 209715 1360; CHECK-NEXT: addi a0, a0, 819 1361; CHECK-NEXT: vsll.vi v16, v16, 4, v0.t 1362; CHECK-NEXT: vor.vv v16, v8, v16, v0.t 1363; CHECK-NEXT: vsrl.vi v8, v16, 2, v0.t 1364; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1365; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 1366; CHECK-NEXT: lui a0, 349525 1367; CHECK-NEXT: addi a0, a0, 1365 1368; CHECK-NEXT: vsll.vi v16, v16, 2, v0.t 1369; CHECK-NEXT: vor.vv v8, v8, v16, v0.t 1370; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t 1371; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 1372; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1373; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 1374; CHECK-NEXT: vor.vv v8, v16, v8, v0.t 1375; CHECK-NEXT: ret 1376; 1377; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i32: 1378; CHECK-ZVBB: # %bb.0: 1379; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1380; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 1381; CHECK-ZVBB-NEXT: ret 1382 %v = call <vscale x 16 x i32> @llvm.vp.bitreverse.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 %evl) 1383 ret <vscale x 16 x i32> %v 1384} 1385 1386define <vscale x 16 x i32> @vp_bitreverse_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) { 1387; CHECK-LABEL: vp_bitreverse_nxv16i32_unmasked: 1388; CHECK: # %bb.0: 1389; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1390; CHECK-NEXT: vsrl.vi v16, v8, 8 1391; CHECK-NEXT: lui a0, 16 1392; CHECK-NEXT: vsrl.vi v24, v8, 24 1393; CHECK-NEXT: addi a0, a0, -256 1394; CHECK-NEXT: vand.vx v16, v16, a0 1395; CHECK-NEXT: vor.vv v16, v16, v24 1396; CHECK-NEXT: vsll.vi v24, v8, 24 1397; CHECK-NEXT: vand.vx v8, v8, a0 1398; CHECK-NEXT: lui a0, 61681 1399; CHECK-NEXT: addi a0, a0, -241 1400; CHECK-NEXT: vsll.vi v8, v8, 8 1401; CHECK-NEXT: vor.vv v8, v24, v8 1402; CHECK-NEXT: vor.vv v8, v8, v16 1403; CHECK-NEXT: vsrl.vi v16, v8, 4 1404; CHECK-NEXT: vand.vx v8, v8, a0 1405; CHECK-NEXT: vand.vx v16, v16, a0 1406; CHECK-NEXT: lui a0, 209715 1407; CHECK-NEXT: addi a0, a0, 819 1408; CHECK-NEXT: vsll.vi v8, v8, 4 1409; CHECK-NEXT: vor.vv v8, v16, v8 1410; CHECK-NEXT: vsrl.vi v16, v8, 2 1411; CHECK-NEXT: vand.vx v8, v8, a0 1412; CHECK-NEXT: vand.vx v16, v16, a0 1413; CHECK-NEXT: lui a0, 349525 1414; CHECK-NEXT: addi a0, a0, 1365 1415; CHECK-NEXT: vsll.vi v8, v8, 2 1416; CHECK-NEXT: vor.vv v8, v16, v8 1417; CHECK-NEXT: vsrl.vi v16, v8, 1 1418; CHECK-NEXT: vand.vx v8, v8, a0 1419; CHECK-NEXT: vand.vx v16, v16, a0 1420; CHECK-NEXT: vadd.vv v8, v8, v8 1421; CHECK-NEXT: vor.vv v8, v16, v8 1422; CHECK-NEXT: ret 1423; 1424; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i32_unmasked: 1425; CHECK-ZVBB: # %bb.0: 1426; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1427; CHECK-ZVBB-NEXT: vbrev.v v8, v8 1428; CHECK-ZVBB-NEXT: ret 1429 %v = call <vscale x 16 x i32> @llvm.vp.bitreverse.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 1430 ret <vscale x 16 x i32> %v 1431} 1432 1433declare <vscale x 1 x i64> @llvm.vp.bitreverse.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32) 1434 1435define <vscale x 1 x i64> @vp_bitreverse_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1436; RV32-LABEL: vp_bitreverse_nxv1i64: 1437; RV32: # %bb.0: 1438; RV32-NEXT: addi sp, sp, -16 1439; RV32-NEXT: .cfi_def_cfa_offset 16 1440; RV32-NEXT: lui a4, 1044480 1441; RV32-NEXT: li a3, 56 1442; RV32-NEXT: lui a5, 16 1443; RV32-NEXT: li a2, 40 1444; RV32-NEXT: lui a1, 4080 1445; RV32-NEXT: addi a6, sp, 8 1446; RV32-NEXT: sw a4, 8(sp) 1447; RV32-NEXT: sw zero, 12(sp) 1448; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1449; RV32-NEXT: vlse64.v v9, (a6), zero 1450; RV32-NEXT: lui a4, 61681 1451; RV32-NEXT: vsll.vx v10, v8, a3, v0.t 1452; RV32-NEXT: addi a5, a5, -256 1453; RV32-NEXT: vand.vx v11, v8, a5, v0.t 1454; RV32-NEXT: vsll.vx v11, v11, a2, v0.t 1455; RV32-NEXT: vor.vv v10, v10, v11, v0.t 1456; RV32-NEXT: vand.vx v11, v8, a1, v0.t 1457; RV32-NEXT: vsll.vi v11, v11, 24, v0.t 1458; RV32-NEXT: vand.vv v12, v8, v9, v0.t 1459; RV32-NEXT: vsll.vi v12, v12, 8, v0.t 1460; RV32-NEXT: vor.vv v11, v11, v12, v0.t 1461; RV32-NEXT: vor.vv v10, v10, v11, v0.t 1462; RV32-NEXT: vsrl.vx v11, v8, a3, v0.t 1463; RV32-NEXT: lui a3, 209715 1464; RV32-NEXT: vsrl.vx v12, v8, a2, v0.t 1465; RV32-NEXT: lui a2, 349525 1466; RV32-NEXT: addi a4, a4, -241 1467; RV32-NEXT: addi a3, a3, 819 1468; RV32-NEXT: addi a2, a2, 1365 1469; RV32-NEXT: vand.vx v12, v12, a5, v0.t 1470; RV32-NEXT: vor.vv v11, v12, v11, v0.t 1471; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t 1472; RV32-NEXT: vand.vx v12, v12, a1, v0.t 1473; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t 1474; RV32-NEXT: vand.vv v8, v8, v9, v0.t 1475; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma 1476; RV32-NEXT: vmv.v.x v9, a4 1477; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1478; RV32-NEXT: vor.vv v8, v8, v12, v0.t 1479; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma 1480; RV32-NEXT: vmv.v.x v12, a3 1481; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1482; RV32-NEXT: vor.vv v8, v8, v11, v0.t 1483; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma 1484; RV32-NEXT: vmv.v.x v11, a2 1485; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1486; RV32-NEXT: vor.vv v8, v10, v8, v0.t 1487; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t 1488; RV32-NEXT: vand.vv v10, v10, v9, v0.t 1489; RV32-NEXT: vand.vv v8, v8, v9, v0.t 1490; RV32-NEXT: vsll.vi v8, v8, 4, v0.t 1491; RV32-NEXT: vor.vv v8, v10, v8, v0.t 1492; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t 1493; RV32-NEXT: vand.vv v9, v9, v12, v0.t 1494; RV32-NEXT: vand.vv v8, v8, v12, v0.t 1495; RV32-NEXT: vsll.vi v8, v8, 2, v0.t 1496; RV32-NEXT: vor.vv v8, v9, v8, v0.t 1497; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t 1498; RV32-NEXT: vand.vv v9, v9, v11, v0.t 1499; RV32-NEXT: vand.vv v8, v8, v11, v0.t 1500; RV32-NEXT: vsll.vi v8, v8, 1, v0.t 1501; RV32-NEXT: vor.vv v8, v9, v8, v0.t 1502; RV32-NEXT: addi sp, sp, 16 1503; RV32-NEXT: .cfi_def_cfa_offset 0 1504; RV32-NEXT: ret 1505; 1506; RV64-LABEL: vp_bitreverse_nxv1i64: 1507; RV64: # %bb.0: 1508; RV64-NEXT: lui a1, 4080 1509; RV64-NEXT: li a3, 255 1510; RV64-NEXT: li a2, 56 1511; RV64-NEXT: lui a4, 16 1512; RV64-NEXT: lui a5, 61681 1513; RV64-NEXT: lui a6, 209715 1514; RV64-NEXT: lui a7, 349525 1515; RV64-NEXT: addiw a5, a5, -241 1516; RV64-NEXT: addiw a6, a6, 819 1517; RV64-NEXT: addiw a7, a7, 1365 1518; RV64-NEXT: slli t0, a5, 32 1519; RV64-NEXT: add t0, a5, t0 1520; RV64-NEXT: slli a5, a6, 32 1521; RV64-NEXT: add a6, a6, a5 1522; RV64-NEXT: slli a5, a7, 32 1523; RV64-NEXT: add a5, a7, a5 1524; RV64-NEXT: li a7, 40 1525; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1526; RV64-NEXT: vand.vx v9, v8, a1, v0.t 1527; RV64-NEXT: slli a3, a3, 24 1528; RV64-NEXT: addiw a0, a4, -256 1529; RV64-NEXT: vsll.vi v9, v9, 24, v0.t 1530; RV64-NEXT: vand.vx v10, v8, a3, v0.t 1531; RV64-NEXT: vsll.vi v10, v10, 8, v0.t 1532; RV64-NEXT: vor.vv v9, v9, v10, v0.t 1533; RV64-NEXT: vsll.vx v10, v8, a2, v0.t 1534; RV64-NEXT: vand.vx v11, v8, a0, v0.t 1535; RV64-NEXT: vsll.vx v11, v11, a7, v0.t 1536; RV64-NEXT: vor.vv v10, v10, v11, v0.t 1537; RV64-NEXT: vor.vv v9, v10, v9, v0.t 1538; RV64-NEXT: vsrl.vx v10, v8, a2, v0.t 1539; RV64-NEXT: vsrl.vx v11, v8, a7, v0.t 1540; RV64-NEXT: vand.vx v11, v11, a0, v0.t 1541; RV64-NEXT: vor.vv v10, v11, v10, v0.t 1542; RV64-NEXT: vsrl.vi v11, v8, 24, v0.t 1543; RV64-NEXT: vand.vx v11, v11, a1, v0.t 1544; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t 1545; RV64-NEXT: vand.vx v8, v8, a3, v0.t 1546; RV64-NEXT: vor.vv v8, v8, v11, v0.t 1547; RV64-NEXT: vor.vv v8, v8, v10, v0.t 1548; RV64-NEXT: vor.vv v8, v9, v8, v0.t 1549; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t 1550; RV64-NEXT: vand.vx v9, v9, t0, v0.t 1551; RV64-NEXT: vand.vx v8, v8, t0, v0.t 1552; RV64-NEXT: vsll.vi v8, v8, 4, v0.t 1553; RV64-NEXT: vor.vv v8, v9, v8, v0.t 1554; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t 1555; RV64-NEXT: vand.vx v9, v9, a6, v0.t 1556; RV64-NEXT: vand.vx v8, v8, a6, v0.t 1557; RV64-NEXT: vsll.vi v8, v8, 2, v0.t 1558; RV64-NEXT: vor.vv v8, v9, v8, v0.t 1559; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t 1560; RV64-NEXT: vand.vx v9, v9, a5, v0.t 1561; RV64-NEXT: vand.vx v8, v8, a5, v0.t 1562; RV64-NEXT: vsll.vi v8, v8, 1, v0.t 1563; RV64-NEXT: vor.vv v8, v9, v8, v0.t 1564; RV64-NEXT: ret 1565; 1566; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i64: 1567; CHECK-ZVBB: # %bb.0: 1568; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1569; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 1570; CHECK-ZVBB-NEXT: ret 1571 %v = call <vscale x 1 x i64> @llvm.vp.bitreverse.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 %evl) 1572 ret <vscale x 1 x i64> %v 1573} 1574 1575define <vscale x 1 x i64> @vp_bitreverse_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) { 1576; RV32-LABEL: vp_bitreverse_nxv1i64_unmasked: 1577; RV32: # %bb.0: 1578; RV32-NEXT: addi sp, sp, -16 1579; RV32-NEXT: .cfi_def_cfa_offset 16 1580; RV32-NEXT: lui a1, 1044480 1581; RV32-NEXT: li a2, 56 1582; RV32-NEXT: lui a3, 16 1583; RV32-NEXT: li a4, 40 1584; RV32-NEXT: lui a5, 4080 1585; RV32-NEXT: addi a6, sp, 8 1586; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1587; RV32-NEXT: vsrl.vi v9, v8, 24 1588; RV32-NEXT: sw a1, 8(sp) 1589; RV32-NEXT: sw zero, 12(sp) 1590; RV32-NEXT: vsll.vx v10, v8, a2 1591; RV32-NEXT: addi a1, a3, -256 1592; RV32-NEXT: vsrl.vx v11, v8, a2 1593; RV32-NEXT: vsrl.vx v12, v8, a4 1594; RV32-NEXT: vand.vx v13, v8, a1 1595; RV32-NEXT: vand.vx v12, v12, a1 1596; RV32-NEXT: vor.vv v11, v12, v11 1597; RV32-NEXT: vlse64.v v12, (a6), zero 1598; RV32-NEXT: vsll.vx v13, v13, a4 1599; RV32-NEXT: vor.vv v10, v10, v13 1600; RV32-NEXT: vsrl.vi v13, v8, 8 1601; RV32-NEXT: vand.vx v9, v9, a5 1602; RV32-NEXT: vand.vv v13, v13, v12 1603; RV32-NEXT: vor.vv v9, v13, v9 1604; RV32-NEXT: lui a1, 61681 1605; RV32-NEXT: lui a2, 209715 1606; RV32-NEXT: lui a3, 349525 1607; RV32-NEXT: vand.vv v12, v8, v12 1608; RV32-NEXT: vand.vx v8, v8, a5 1609; RV32-NEXT: addi a1, a1, -241 1610; RV32-NEXT: addi a2, a2, 819 1611; RV32-NEXT: addi a3, a3, 1365 1612; RV32-NEXT: vsll.vi v8, v8, 24 1613; RV32-NEXT: vsll.vi v12, v12, 8 1614; RV32-NEXT: vor.vv v8, v8, v12 1615; RV32-NEXT: vsetvli a4, zero, e32, m1, ta, ma 1616; RV32-NEXT: vmv.v.x v12, a1 1617; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1618; RV32-NEXT: vor.vv v9, v9, v11 1619; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma 1620; RV32-NEXT: vmv.v.x v11, a2 1621; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1622; RV32-NEXT: vor.vv v8, v10, v8 1623; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma 1624; RV32-NEXT: vmv.v.x v10, a3 1625; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1626; RV32-NEXT: vor.vv v8, v8, v9 1627; RV32-NEXT: vsrl.vi v9, v8, 4 1628; RV32-NEXT: vand.vv v8, v8, v12 1629; RV32-NEXT: vand.vv v9, v9, v12 1630; RV32-NEXT: vsll.vi v8, v8, 4 1631; RV32-NEXT: vor.vv v8, v9, v8 1632; RV32-NEXT: vsrl.vi v9, v8, 2 1633; RV32-NEXT: vand.vv v8, v8, v11 1634; RV32-NEXT: vand.vv v9, v9, v11 1635; RV32-NEXT: vsll.vi v8, v8, 2 1636; RV32-NEXT: vor.vv v8, v9, v8 1637; RV32-NEXT: vsrl.vi v9, v8, 1 1638; RV32-NEXT: vand.vv v8, v8, v10 1639; RV32-NEXT: vand.vv v9, v9, v10 1640; RV32-NEXT: vadd.vv v8, v8, v8 1641; RV32-NEXT: vor.vv v8, v9, v8 1642; RV32-NEXT: addi sp, sp, 16 1643; RV32-NEXT: .cfi_def_cfa_offset 0 1644; RV32-NEXT: ret 1645; 1646; RV64-LABEL: vp_bitreverse_nxv1i64_unmasked: 1647; RV64: # %bb.0: 1648; RV64-NEXT: lui a1, 4080 1649; RV64-NEXT: li a2, 255 1650; RV64-NEXT: li a3, 56 1651; RV64-NEXT: lui a4, 16 1652; RV64-NEXT: li a5, 40 1653; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1654; RV64-NEXT: vsrl.vi v9, v8, 24 1655; RV64-NEXT: vsrl.vi v10, v8, 8 1656; RV64-NEXT: addiw a0, a4, -256 1657; RV64-NEXT: vsrl.vx v11, v8, a3 1658; RV64-NEXT: vsrl.vx v12, v8, a5 1659; RV64-NEXT: vand.vx v12, v12, a0 1660; RV64-NEXT: vor.vv v11, v12, v11 1661; RV64-NEXT: vand.vx v12, v8, a1 1662; RV64-NEXT: slli a2, a2, 24 1663; RV64-NEXT: vand.vx v9, v9, a1 1664; RV64-NEXT: vsll.vi v12, v12, 24 1665; RV64-NEXT: vand.vx v10, v10, a2 1666; RV64-NEXT: vor.vv v9, v10, v9 1667; RV64-NEXT: vand.vx v10, v8, a2 1668; RV64-NEXT: vsll.vi v10, v10, 8 1669; RV64-NEXT: vor.vv v10, v12, v10 1670; RV64-NEXT: vsll.vx v12, v8, a3 1671; RV64-NEXT: vand.vx v8, v8, a0 1672; RV64-NEXT: vsll.vx v8, v8, a5 1673; RV64-NEXT: vor.vv v8, v12, v8 1674; RV64-NEXT: lui a0, 61681 1675; RV64-NEXT: lui a1, 209715 1676; RV64-NEXT: lui a2, 349525 1677; RV64-NEXT: addiw a0, a0, -241 1678; RV64-NEXT: addiw a1, a1, 819 1679; RV64-NEXT: addiw a2, a2, 1365 1680; RV64-NEXT: slli a3, a0, 32 1681; RV64-NEXT: slli a4, a1, 32 1682; RV64-NEXT: add a0, a0, a3 1683; RV64-NEXT: slli a3, a2, 32 1684; RV64-NEXT: add a1, a1, a4 1685; RV64-NEXT: add a2, a2, a3 1686; RV64-NEXT: vor.vv v9, v9, v11 1687; RV64-NEXT: vor.vv v8, v8, v10 1688; RV64-NEXT: vor.vv v8, v8, v9 1689; RV64-NEXT: vsrl.vi v9, v8, 4 1690; RV64-NEXT: vand.vx v8, v8, a0 1691; RV64-NEXT: vand.vx v9, v9, a0 1692; RV64-NEXT: vsll.vi v8, v8, 4 1693; RV64-NEXT: vor.vv v8, v9, v8 1694; RV64-NEXT: vsrl.vi v9, v8, 2 1695; RV64-NEXT: vand.vx v8, v8, a1 1696; RV64-NEXT: vand.vx v9, v9, a1 1697; RV64-NEXT: vsll.vi v8, v8, 2 1698; RV64-NEXT: vor.vv v8, v9, v8 1699; RV64-NEXT: vsrl.vi v9, v8, 1 1700; RV64-NEXT: vand.vx v8, v8, a2 1701; RV64-NEXT: vand.vx v9, v9, a2 1702; RV64-NEXT: vadd.vv v8, v8, v8 1703; RV64-NEXT: vor.vv v8, v9, v8 1704; RV64-NEXT: ret 1705; 1706; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i64_unmasked: 1707; CHECK-ZVBB: # %bb.0: 1708; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1709; CHECK-ZVBB-NEXT: vbrev.v v8, v8 1710; CHECK-ZVBB-NEXT: ret 1711 %v = call <vscale x 1 x i64> @llvm.vp.bitreverse.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 1712 ret <vscale x 1 x i64> %v 1713} 1714 1715declare <vscale x 2 x i64> @llvm.vp.bitreverse.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32) 1716 1717define <vscale x 2 x i64> @vp_bitreverse_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1718; RV32-LABEL: vp_bitreverse_nxv2i64: 1719; RV32: # %bb.0: 1720; RV32-NEXT: addi sp, sp, -16 1721; RV32-NEXT: .cfi_def_cfa_offset 16 1722; RV32-NEXT: lui a4, 1044480 1723; RV32-NEXT: li a3, 56 1724; RV32-NEXT: lui a5, 16 1725; RV32-NEXT: li a2, 40 1726; RV32-NEXT: lui a1, 4080 1727; RV32-NEXT: addi a6, sp, 8 1728; RV32-NEXT: sw a4, 8(sp) 1729; RV32-NEXT: sw zero, 12(sp) 1730; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1731; RV32-NEXT: vlse64.v v10, (a6), zero 1732; RV32-NEXT: lui a4, 61681 1733; RV32-NEXT: vsll.vx v12, v8, a3, v0.t 1734; RV32-NEXT: addi a5, a5, -256 1735; RV32-NEXT: vand.vx v14, v8, a5, v0.t 1736; RV32-NEXT: vsll.vx v14, v14, a2, v0.t 1737; RV32-NEXT: vor.vv v12, v12, v14, v0.t 1738; RV32-NEXT: vand.vx v14, v8, a1, v0.t 1739; RV32-NEXT: vsll.vi v14, v14, 24, v0.t 1740; RV32-NEXT: vand.vv v16, v8, v10, v0.t 1741; RV32-NEXT: vsll.vi v16, v16, 8, v0.t 1742; RV32-NEXT: vor.vv v14, v14, v16, v0.t 1743; RV32-NEXT: vor.vv v12, v12, v14, v0.t 1744; RV32-NEXT: vsrl.vx v14, v8, a3, v0.t 1745; RV32-NEXT: lui a3, 209715 1746; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t 1747; RV32-NEXT: lui a2, 349525 1748; RV32-NEXT: addi a4, a4, -241 1749; RV32-NEXT: addi a3, a3, 819 1750; RV32-NEXT: addi a2, a2, 1365 1751; RV32-NEXT: vand.vx v16, v16, a5, v0.t 1752; RV32-NEXT: vor.vv v14, v16, v14, v0.t 1753; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t 1754; RV32-NEXT: vand.vx v16, v16, a1, v0.t 1755; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t 1756; RV32-NEXT: vand.vv v8, v8, v10, v0.t 1757; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma 1758; RV32-NEXT: vmv.v.x v10, a4 1759; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1760; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1761; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma 1762; RV32-NEXT: vmv.v.x v16, a3 1763; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1764; RV32-NEXT: vor.vv v8, v8, v14, v0.t 1765; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma 1766; RV32-NEXT: vmv.v.x v14, a2 1767; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1768; RV32-NEXT: vor.vv v8, v12, v8, v0.t 1769; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t 1770; RV32-NEXT: vand.vv v12, v12, v10, v0.t 1771; RV32-NEXT: vand.vv v8, v8, v10, v0.t 1772; RV32-NEXT: vsll.vi v8, v8, 4, v0.t 1773; RV32-NEXT: vor.vv v8, v12, v8, v0.t 1774; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t 1775; RV32-NEXT: vand.vv v10, v10, v16, v0.t 1776; RV32-NEXT: vand.vv v8, v8, v16, v0.t 1777; RV32-NEXT: vsll.vi v8, v8, 2, v0.t 1778; RV32-NEXT: vor.vv v8, v10, v8, v0.t 1779; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t 1780; RV32-NEXT: vand.vv v10, v10, v14, v0.t 1781; RV32-NEXT: vand.vv v8, v8, v14, v0.t 1782; RV32-NEXT: vsll.vi v8, v8, 1, v0.t 1783; RV32-NEXT: vor.vv v8, v10, v8, v0.t 1784; RV32-NEXT: addi sp, sp, 16 1785; RV32-NEXT: .cfi_def_cfa_offset 0 1786; RV32-NEXT: ret 1787; 1788; RV64-LABEL: vp_bitreverse_nxv2i64: 1789; RV64: # %bb.0: 1790; RV64-NEXT: lui a1, 4080 1791; RV64-NEXT: li a3, 255 1792; RV64-NEXT: li a2, 56 1793; RV64-NEXT: lui a4, 16 1794; RV64-NEXT: lui a5, 61681 1795; RV64-NEXT: lui a6, 209715 1796; RV64-NEXT: lui a7, 349525 1797; RV64-NEXT: addiw a5, a5, -241 1798; RV64-NEXT: addiw a6, a6, 819 1799; RV64-NEXT: addiw a7, a7, 1365 1800; RV64-NEXT: slli t0, a5, 32 1801; RV64-NEXT: add t0, a5, t0 1802; RV64-NEXT: slli a5, a6, 32 1803; RV64-NEXT: add a6, a6, a5 1804; RV64-NEXT: slli a5, a7, 32 1805; RV64-NEXT: add a5, a7, a5 1806; RV64-NEXT: li a7, 40 1807; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1808; RV64-NEXT: vand.vx v10, v8, a1, v0.t 1809; RV64-NEXT: slli a3, a3, 24 1810; RV64-NEXT: addiw a0, a4, -256 1811; RV64-NEXT: vsll.vi v10, v10, 24, v0.t 1812; RV64-NEXT: vand.vx v12, v8, a3, v0.t 1813; RV64-NEXT: vsll.vi v12, v12, 8, v0.t 1814; RV64-NEXT: vor.vv v10, v10, v12, v0.t 1815; RV64-NEXT: vsll.vx v12, v8, a2, v0.t 1816; RV64-NEXT: vand.vx v14, v8, a0, v0.t 1817; RV64-NEXT: vsll.vx v14, v14, a7, v0.t 1818; RV64-NEXT: vor.vv v12, v12, v14, v0.t 1819; RV64-NEXT: vor.vv v10, v12, v10, v0.t 1820; RV64-NEXT: vsrl.vx v12, v8, a2, v0.t 1821; RV64-NEXT: vsrl.vx v14, v8, a7, v0.t 1822; RV64-NEXT: vand.vx v14, v14, a0, v0.t 1823; RV64-NEXT: vor.vv v12, v14, v12, v0.t 1824; RV64-NEXT: vsrl.vi v14, v8, 24, v0.t 1825; RV64-NEXT: vand.vx v14, v14, a1, v0.t 1826; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t 1827; RV64-NEXT: vand.vx v8, v8, a3, v0.t 1828; RV64-NEXT: vor.vv v8, v8, v14, v0.t 1829; RV64-NEXT: vor.vv v8, v8, v12, v0.t 1830; RV64-NEXT: vor.vv v8, v10, v8, v0.t 1831; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t 1832; RV64-NEXT: vand.vx v10, v10, t0, v0.t 1833; RV64-NEXT: vand.vx v8, v8, t0, v0.t 1834; RV64-NEXT: vsll.vi v8, v8, 4, v0.t 1835; RV64-NEXT: vor.vv v8, v10, v8, v0.t 1836; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t 1837; RV64-NEXT: vand.vx v10, v10, a6, v0.t 1838; RV64-NEXT: vand.vx v8, v8, a6, v0.t 1839; RV64-NEXT: vsll.vi v8, v8, 2, v0.t 1840; RV64-NEXT: vor.vv v8, v10, v8, v0.t 1841; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t 1842; RV64-NEXT: vand.vx v10, v10, a5, v0.t 1843; RV64-NEXT: vand.vx v8, v8, a5, v0.t 1844; RV64-NEXT: vsll.vi v8, v8, 1, v0.t 1845; RV64-NEXT: vor.vv v8, v10, v8, v0.t 1846; RV64-NEXT: ret 1847; 1848; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i64: 1849; CHECK-ZVBB: # %bb.0: 1850; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1851; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 1852; CHECK-ZVBB-NEXT: ret 1853 %v = call <vscale x 2 x i64> @llvm.vp.bitreverse.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl) 1854 ret <vscale x 2 x i64> %v 1855} 1856 1857define <vscale x 2 x i64> @vp_bitreverse_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) { 1858; RV32-LABEL: vp_bitreverse_nxv2i64_unmasked: 1859; RV32: # %bb.0: 1860; RV32-NEXT: addi sp, sp, -16 1861; RV32-NEXT: .cfi_def_cfa_offset 16 1862; RV32-NEXT: lui a1, 1044480 1863; RV32-NEXT: li a2, 56 1864; RV32-NEXT: lui a3, 16 1865; RV32-NEXT: li a4, 40 1866; RV32-NEXT: lui a5, 4080 1867; RV32-NEXT: addi a6, sp, 8 1868; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1869; RV32-NEXT: vsrl.vi v14, v8, 24 1870; RV32-NEXT: sw a1, 8(sp) 1871; RV32-NEXT: sw zero, 12(sp) 1872; RV32-NEXT: vsll.vx v12, v8, a2 1873; RV32-NEXT: addi a1, a3, -256 1874; RV32-NEXT: vsrl.vx v10, v8, a2 1875; RV32-NEXT: vsrl.vx v16, v8, a4 1876; RV32-NEXT: vand.vx v18, v8, a1 1877; RV32-NEXT: vand.vx v16, v16, a1 1878; RV32-NEXT: vor.vv v10, v16, v10 1879; RV32-NEXT: vlse64.v v16, (a6), zero 1880; RV32-NEXT: vsll.vx v18, v18, a4 1881; RV32-NEXT: vor.vv v12, v12, v18 1882; RV32-NEXT: vsrl.vi v18, v8, 8 1883; RV32-NEXT: vand.vx v14, v14, a5 1884; RV32-NEXT: vand.vv v18, v18, v16 1885; RV32-NEXT: vor.vv v14, v18, v14 1886; RV32-NEXT: lui a1, 61681 1887; RV32-NEXT: lui a2, 209715 1888; RV32-NEXT: lui a3, 349525 1889; RV32-NEXT: vand.vv v16, v8, v16 1890; RV32-NEXT: vand.vx v8, v8, a5 1891; RV32-NEXT: addi a1, a1, -241 1892; RV32-NEXT: addi a2, a2, 819 1893; RV32-NEXT: addi a3, a3, 1365 1894; RV32-NEXT: vsll.vi v8, v8, 24 1895; RV32-NEXT: vsll.vi v16, v16, 8 1896; RV32-NEXT: vor.vv v8, v8, v16 1897; RV32-NEXT: vsetvli a4, zero, e32, m2, ta, ma 1898; RV32-NEXT: vmv.v.x v16, a1 1899; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1900; RV32-NEXT: vor.vv v10, v14, v10 1901; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma 1902; RV32-NEXT: vmv.v.x v14, a2 1903; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1904; RV32-NEXT: vor.vv v8, v12, v8 1905; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma 1906; RV32-NEXT: vmv.v.x v12, a3 1907; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1908; RV32-NEXT: vor.vv v8, v8, v10 1909; RV32-NEXT: vsrl.vi v10, v8, 4 1910; RV32-NEXT: vand.vv v8, v8, v16 1911; RV32-NEXT: vand.vv v10, v10, v16 1912; RV32-NEXT: vsll.vi v8, v8, 4 1913; RV32-NEXT: vor.vv v8, v10, v8 1914; RV32-NEXT: vsrl.vi v10, v8, 2 1915; RV32-NEXT: vand.vv v8, v8, v14 1916; RV32-NEXT: vand.vv v10, v10, v14 1917; RV32-NEXT: vsll.vi v8, v8, 2 1918; RV32-NEXT: vor.vv v8, v10, v8 1919; RV32-NEXT: vsrl.vi v10, v8, 1 1920; RV32-NEXT: vand.vv v8, v8, v12 1921; RV32-NEXT: vand.vv v10, v10, v12 1922; RV32-NEXT: vadd.vv v8, v8, v8 1923; RV32-NEXT: vor.vv v8, v10, v8 1924; RV32-NEXT: addi sp, sp, 16 1925; RV32-NEXT: .cfi_def_cfa_offset 0 1926; RV32-NEXT: ret 1927; 1928; RV64-LABEL: vp_bitreverse_nxv2i64_unmasked: 1929; RV64: # %bb.0: 1930; RV64-NEXT: lui a1, 4080 1931; RV64-NEXT: li a2, 255 1932; RV64-NEXT: li a3, 56 1933; RV64-NEXT: lui a4, 16 1934; RV64-NEXT: li a5, 40 1935; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1936; RV64-NEXT: vsrl.vi v12, v8, 24 1937; RV64-NEXT: vsrl.vi v14, v8, 8 1938; RV64-NEXT: addiw a0, a4, -256 1939; RV64-NEXT: vsrl.vx v10, v8, a3 1940; RV64-NEXT: vsrl.vx v16, v8, a5 1941; RV64-NEXT: vand.vx v16, v16, a0 1942; RV64-NEXT: vor.vv v10, v16, v10 1943; RV64-NEXT: vand.vx v16, v8, a1 1944; RV64-NEXT: slli a2, a2, 24 1945; RV64-NEXT: vand.vx v12, v12, a1 1946; RV64-NEXT: vsll.vi v16, v16, 24 1947; RV64-NEXT: vand.vx v14, v14, a2 1948; RV64-NEXT: vor.vv v12, v14, v12 1949; RV64-NEXT: vand.vx v14, v8, a2 1950; RV64-NEXT: vsll.vi v14, v14, 8 1951; RV64-NEXT: vor.vv v14, v16, v14 1952; RV64-NEXT: vsll.vx v16, v8, a3 1953; RV64-NEXT: vand.vx v8, v8, a0 1954; RV64-NEXT: vsll.vx v8, v8, a5 1955; RV64-NEXT: vor.vv v8, v16, v8 1956; RV64-NEXT: lui a0, 61681 1957; RV64-NEXT: lui a1, 209715 1958; RV64-NEXT: lui a2, 349525 1959; RV64-NEXT: addiw a0, a0, -241 1960; RV64-NEXT: addiw a1, a1, 819 1961; RV64-NEXT: addiw a2, a2, 1365 1962; RV64-NEXT: slli a3, a0, 32 1963; RV64-NEXT: slli a4, a1, 32 1964; RV64-NEXT: add a0, a0, a3 1965; RV64-NEXT: slli a3, a2, 32 1966; RV64-NEXT: add a1, a1, a4 1967; RV64-NEXT: add a2, a2, a3 1968; RV64-NEXT: vor.vv v10, v12, v10 1969; RV64-NEXT: vor.vv v8, v8, v14 1970; RV64-NEXT: vor.vv v8, v8, v10 1971; RV64-NEXT: vsrl.vi v10, v8, 4 1972; RV64-NEXT: vand.vx v8, v8, a0 1973; RV64-NEXT: vand.vx v10, v10, a0 1974; RV64-NEXT: vsll.vi v8, v8, 4 1975; RV64-NEXT: vor.vv v8, v10, v8 1976; RV64-NEXT: vsrl.vi v10, v8, 2 1977; RV64-NEXT: vand.vx v8, v8, a1 1978; RV64-NEXT: vand.vx v10, v10, a1 1979; RV64-NEXT: vsll.vi v8, v8, 2 1980; RV64-NEXT: vor.vv v8, v10, v8 1981; RV64-NEXT: vsrl.vi v10, v8, 1 1982; RV64-NEXT: vand.vx v8, v8, a2 1983; RV64-NEXT: vand.vx v10, v10, a2 1984; RV64-NEXT: vadd.vv v8, v8, v8 1985; RV64-NEXT: vor.vv v8, v10, v8 1986; RV64-NEXT: ret 1987; 1988; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i64_unmasked: 1989; CHECK-ZVBB: # %bb.0: 1990; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1991; CHECK-ZVBB-NEXT: vbrev.v v8, v8 1992; CHECK-ZVBB-NEXT: ret 1993 %v = call <vscale x 2 x i64> @llvm.vp.bitreverse.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 1994 ret <vscale x 2 x i64> %v 1995} 1996 1997declare <vscale x 4 x i64> @llvm.vp.bitreverse.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32) 1998 1999define <vscale x 4 x i64> @vp_bitreverse_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 2000; RV32-LABEL: vp_bitreverse_nxv4i64: 2001; RV32: # %bb.0: 2002; RV32-NEXT: addi sp, sp, -16 2003; RV32-NEXT: .cfi_def_cfa_offset 16 2004; RV32-NEXT: lui a4, 1044480 2005; RV32-NEXT: li a3, 56 2006; RV32-NEXT: lui a5, 16 2007; RV32-NEXT: li a2, 40 2008; RV32-NEXT: lui a1, 4080 2009; RV32-NEXT: addi a6, sp, 8 2010; RV32-NEXT: sw a4, 8(sp) 2011; RV32-NEXT: sw zero, 12(sp) 2012; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 2013; RV32-NEXT: vlse64.v v12, (a6), zero 2014; RV32-NEXT: lui a4, 61681 2015; RV32-NEXT: vsll.vx v16, v8, a3, v0.t 2016; RV32-NEXT: addi a5, a5, -256 2017; RV32-NEXT: vand.vx v20, v8, a5, v0.t 2018; RV32-NEXT: vsll.vx v20, v20, a2, v0.t 2019; RV32-NEXT: vor.vv v16, v16, v20, v0.t 2020; RV32-NEXT: vand.vx v20, v8, a1, v0.t 2021; RV32-NEXT: vsll.vi v20, v20, 24, v0.t 2022; RV32-NEXT: vand.vv v24, v8, v12, v0.t 2023; RV32-NEXT: vsll.vi v24, v24, 8, v0.t 2024; RV32-NEXT: vor.vv v20, v20, v24, v0.t 2025; RV32-NEXT: vor.vv v16, v16, v20, v0.t 2026; RV32-NEXT: vsrl.vx v20, v8, a3, v0.t 2027; RV32-NEXT: lui a3, 209715 2028; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t 2029; RV32-NEXT: lui a2, 349525 2030; RV32-NEXT: addi a4, a4, -241 2031; RV32-NEXT: addi a3, a3, 819 2032; RV32-NEXT: addi a2, a2, 1365 2033; RV32-NEXT: vand.vx v24, v24, a5, v0.t 2034; RV32-NEXT: vor.vv v20, v24, v20, v0.t 2035; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t 2036; RV32-NEXT: vand.vx v24, v24, a1, v0.t 2037; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t 2038; RV32-NEXT: vand.vv v8, v8, v12, v0.t 2039; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 2040; RV32-NEXT: vmv.v.x v28, a4 2041; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 2042; RV32-NEXT: vor.vv v8, v8, v24, v0.t 2043; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 2044; RV32-NEXT: vmv.v.x v12, a3 2045; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 2046; RV32-NEXT: vor.vv v20, v8, v20, v0.t 2047; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 2048; RV32-NEXT: vmv.v.x v8, a2 2049; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 2050; RV32-NEXT: vor.vv v16, v16, v20, v0.t 2051; RV32-NEXT: vsrl.vi v20, v16, 4, v0.t 2052; RV32-NEXT: vand.vv v20, v20, v28, v0.t 2053; RV32-NEXT: vand.vv v16, v16, v28, v0.t 2054; RV32-NEXT: vsll.vi v16, v16, 4, v0.t 2055; RV32-NEXT: vor.vv v16, v20, v16, v0.t 2056; RV32-NEXT: vsrl.vi v20, v16, 2, v0.t 2057; RV32-NEXT: vand.vv v20, v20, v12, v0.t 2058; RV32-NEXT: vand.vv v12, v16, v12, v0.t 2059; RV32-NEXT: vsll.vi v12, v12, 2, v0.t 2060; RV32-NEXT: vor.vv v12, v20, v12, v0.t 2061; RV32-NEXT: vsrl.vi v16, v12, 1, v0.t 2062; RV32-NEXT: vand.vv v16, v16, v8, v0.t 2063; RV32-NEXT: vand.vv v8, v12, v8, v0.t 2064; RV32-NEXT: vsll.vi v8, v8, 1, v0.t 2065; RV32-NEXT: vor.vv v8, v16, v8, v0.t 2066; RV32-NEXT: addi sp, sp, 16 2067; RV32-NEXT: .cfi_def_cfa_offset 0 2068; RV32-NEXT: ret 2069; 2070; RV64-LABEL: vp_bitreverse_nxv4i64: 2071; RV64: # %bb.0: 2072; RV64-NEXT: lui a1, 4080 2073; RV64-NEXT: li a3, 255 2074; RV64-NEXT: li a2, 56 2075; RV64-NEXT: lui a4, 16 2076; RV64-NEXT: lui a5, 61681 2077; RV64-NEXT: lui a6, 209715 2078; RV64-NEXT: lui a7, 349525 2079; RV64-NEXT: addiw a5, a5, -241 2080; RV64-NEXT: addiw a6, a6, 819 2081; RV64-NEXT: addiw a7, a7, 1365 2082; RV64-NEXT: slli t0, a5, 32 2083; RV64-NEXT: add t0, a5, t0 2084; RV64-NEXT: slli a5, a6, 32 2085; RV64-NEXT: add a6, a6, a5 2086; RV64-NEXT: slli a5, a7, 32 2087; RV64-NEXT: add a5, a7, a5 2088; RV64-NEXT: li a7, 40 2089; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 2090; RV64-NEXT: vand.vx v12, v8, a1, v0.t 2091; RV64-NEXT: slli a3, a3, 24 2092; RV64-NEXT: addiw a0, a4, -256 2093; RV64-NEXT: vsll.vi v12, v12, 24, v0.t 2094; RV64-NEXT: vand.vx v16, v8, a3, v0.t 2095; RV64-NEXT: vsll.vi v16, v16, 8, v0.t 2096; RV64-NEXT: vor.vv v12, v12, v16, v0.t 2097; RV64-NEXT: vsll.vx v16, v8, a2, v0.t 2098; RV64-NEXT: vand.vx v20, v8, a0, v0.t 2099; RV64-NEXT: vsll.vx v20, v20, a7, v0.t 2100; RV64-NEXT: vor.vv v16, v16, v20, v0.t 2101; RV64-NEXT: vor.vv v12, v16, v12, v0.t 2102; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t 2103; RV64-NEXT: vsrl.vx v20, v8, a7, v0.t 2104; RV64-NEXT: vand.vx v20, v20, a0, v0.t 2105; RV64-NEXT: vor.vv v16, v20, v16, v0.t 2106; RV64-NEXT: vsrl.vi v20, v8, 24, v0.t 2107; RV64-NEXT: vand.vx v20, v20, a1, v0.t 2108; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t 2109; RV64-NEXT: vand.vx v8, v8, a3, v0.t 2110; RV64-NEXT: vor.vv v8, v8, v20, v0.t 2111; RV64-NEXT: vor.vv v8, v8, v16, v0.t 2112; RV64-NEXT: vor.vv v8, v12, v8, v0.t 2113; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t 2114; RV64-NEXT: vand.vx v12, v12, t0, v0.t 2115; RV64-NEXT: vand.vx v8, v8, t0, v0.t 2116; RV64-NEXT: vsll.vi v8, v8, 4, v0.t 2117; RV64-NEXT: vor.vv v8, v12, v8, v0.t 2118; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t 2119; RV64-NEXT: vand.vx v12, v12, a6, v0.t 2120; RV64-NEXT: vand.vx v8, v8, a6, v0.t 2121; RV64-NEXT: vsll.vi v8, v8, 2, v0.t 2122; RV64-NEXT: vor.vv v8, v12, v8, v0.t 2123; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t 2124; RV64-NEXT: vand.vx v12, v12, a5, v0.t 2125; RV64-NEXT: vand.vx v8, v8, a5, v0.t 2126; RV64-NEXT: vsll.vi v8, v8, 1, v0.t 2127; RV64-NEXT: vor.vv v8, v12, v8, v0.t 2128; RV64-NEXT: ret 2129; 2130; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i64: 2131; CHECK-ZVBB: # %bb.0: 2132; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma 2133; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 2134; CHECK-ZVBB-NEXT: ret 2135 %v = call <vscale x 4 x i64> @llvm.vp.bitreverse.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 %evl) 2136 ret <vscale x 4 x i64> %v 2137} 2138 2139define <vscale x 4 x i64> @vp_bitreverse_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) { 2140; RV32-LABEL: vp_bitreverse_nxv4i64_unmasked: 2141; RV32: # %bb.0: 2142; RV32-NEXT: addi sp, sp, -16 2143; RV32-NEXT: .cfi_def_cfa_offset 16 2144; RV32-NEXT: lui a1, 1044480 2145; RV32-NEXT: li a2, 56 2146; RV32-NEXT: lui a3, 16 2147; RV32-NEXT: li a4, 40 2148; RV32-NEXT: lui a5, 4080 2149; RV32-NEXT: addi a6, sp, 8 2150; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 2151; RV32-NEXT: vsrl.vi v20, v8, 24 2152; RV32-NEXT: sw a1, 8(sp) 2153; RV32-NEXT: sw zero, 12(sp) 2154; RV32-NEXT: vsll.vx v16, v8, a2 2155; RV32-NEXT: addi a1, a3, -256 2156; RV32-NEXT: vsrl.vx v12, v8, a2 2157; RV32-NEXT: vsrl.vx v24, v8, a4 2158; RV32-NEXT: vand.vx v28, v8, a1 2159; RV32-NEXT: vand.vx v24, v24, a1 2160; RV32-NEXT: vor.vv v12, v24, v12 2161; RV32-NEXT: vlse64.v v24, (a6), zero 2162; RV32-NEXT: vsll.vx v28, v28, a4 2163; RV32-NEXT: vor.vv v16, v16, v28 2164; RV32-NEXT: vsrl.vi v28, v8, 8 2165; RV32-NEXT: vand.vx v20, v20, a5 2166; RV32-NEXT: vand.vv v28, v28, v24 2167; RV32-NEXT: vor.vv v20, v28, v20 2168; RV32-NEXT: lui a1, 61681 2169; RV32-NEXT: lui a2, 209715 2170; RV32-NEXT: lui a3, 349525 2171; RV32-NEXT: vand.vv v24, v8, v24 2172; RV32-NEXT: vand.vx v8, v8, a5 2173; RV32-NEXT: addi a1, a1, -241 2174; RV32-NEXT: addi a2, a2, 819 2175; RV32-NEXT: addi a3, a3, 1365 2176; RV32-NEXT: vsll.vi v8, v8, 24 2177; RV32-NEXT: vsll.vi v24, v24, 8 2178; RV32-NEXT: vor.vv v8, v8, v24 2179; RV32-NEXT: vsetvli a4, zero, e32, m4, ta, ma 2180; RV32-NEXT: vmv.v.x v24, a1 2181; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 2182; RV32-NEXT: vor.vv v12, v20, v12 2183; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 2184; RV32-NEXT: vmv.v.x v20, a2 2185; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 2186; RV32-NEXT: vor.vv v8, v16, v8 2187; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 2188; RV32-NEXT: vmv.v.x v16, a3 2189; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 2190; RV32-NEXT: vor.vv v8, v8, v12 2191; RV32-NEXT: vsrl.vi v12, v8, 4 2192; RV32-NEXT: vand.vv v8, v8, v24 2193; RV32-NEXT: vand.vv v12, v12, v24 2194; RV32-NEXT: vsll.vi v8, v8, 4 2195; RV32-NEXT: vor.vv v8, v12, v8 2196; RV32-NEXT: vsrl.vi v12, v8, 2 2197; RV32-NEXT: vand.vv v8, v8, v20 2198; RV32-NEXT: vand.vv v12, v12, v20 2199; RV32-NEXT: vsll.vi v8, v8, 2 2200; RV32-NEXT: vor.vv v8, v12, v8 2201; RV32-NEXT: vsrl.vi v12, v8, 1 2202; RV32-NEXT: vand.vv v8, v8, v16 2203; RV32-NEXT: vand.vv v12, v12, v16 2204; RV32-NEXT: vadd.vv v8, v8, v8 2205; RV32-NEXT: vor.vv v8, v12, v8 2206; RV32-NEXT: addi sp, sp, 16 2207; RV32-NEXT: .cfi_def_cfa_offset 0 2208; RV32-NEXT: ret 2209; 2210; RV64-LABEL: vp_bitreverse_nxv4i64_unmasked: 2211; RV64: # %bb.0: 2212; RV64-NEXT: lui a1, 4080 2213; RV64-NEXT: li a2, 255 2214; RV64-NEXT: li a3, 56 2215; RV64-NEXT: lui a4, 16 2216; RV64-NEXT: li a5, 40 2217; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 2218; RV64-NEXT: vsrl.vi v16, v8, 24 2219; RV64-NEXT: vsrl.vi v20, v8, 8 2220; RV64-NEXT: addiw a0, a4, -256 2221; RV64-NEXT: vsrl.vx v12, v8, a3 2222; RV64-NEXT: vsrl.vx v24, v8, a5 2223; RV64-NEXT: vand.vx v24, v24, a0 2224; RV64-NEXT: vor.vv v12, v24, v12 2225; RV64-NEXT: vand.vx v24, v8, a1 2226; RV64-NEXT: slli a2, a2, 24 2227; RV64-NEXT: vand.vx v16, v16, a1 2228; RV64-NEXT: vsll.vi v24, v24, 24 2229; RV64-NEXT: vand.vx v20, v20, a2 2230; RV64-NEXT: vor.vv v16, v20, v16 2231; RV64-NEXT: vand.vx v20, v8, a2 2232; RV64-NEXT: vsll.vi v20, v20, 8 2233; RV64-NEXT: vor.vv v20, v24, v20 2234; RV64-NEXT: vsll.vx v24, v8, a3 2235; RV64-NEXT: vand.vx v8, v8, a0 2236; RV64-NEXT: vsll.vx v8, v8, a5 2237; RV64-NEXT: vor.vv v8, v24, v8 2238; RV64-NEXT: lui a0, 61681 2239; RV64-NEXT: lui a1, 209715 2240; RV64-NEXT: lui a2, 349525 2241; RV64-NEXT: addiw a0, a0, -241 2242; RV64-NEXT: addiw a1, a1, 819 2243; RV64-NEXT: addiw a2, a2, 1365 2244; RV64-NEXT: slli a3, a0, 32 2245; RV64-NEXT: slli a4, a1, 32 2246; RV64-NEXT: add a0, a0, a3 2247; RV64-NEXT: slli a3, a2, 32 2248; RV64-NEXT: add a1, a1, a4 2249; RV64-NEXT: add a2, a2, a3 2250; RV64-NEXT: vor.vv v12, v16, v12 2251; RV64-NEXT: vor.vv v8, v8, v20 2252; RV64-NEXT: vor.vv v8, v8, v12 2253; RV64-NEXT: vsrl.vi v12, v8, 4 2254; RV64-NEXT: vand.vx v8, v8, a0 2255; RV64-NEXT: vand.vx v12, v12, a0 2256; RV64-NEXT: vsll.vi v8, v8, 4 2257; RV64-NEXT: vor.vv v8, v12, v8 2258; RV64-NEXT: vsrl.vi v12, v8, 2 2259; RV64-NEXT: vand.vx v8, v8, a1 2260; RV64-NEXT: vand.vx v12, v12, a1 2261; RV64-NEXT: vsll.vi v8, v8, 2 2262; RV64-NEXT: vor.vv v8, v12, v8 2263; RV64-NEXT: vsrl.vi v12, v8, 1 2264; RV64-NEXT: vand.vx v8, v8, a2 2265; RV64-NEXT: vand.vx v12, v12, a2 2266; RV64-NEXT: vadd.vv v8, v8, v8 2267; RV64-NEXT: vor.vv v8, v12, v8 2268; RV64-NEXT: ret 2269; 2270; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i64_unmasked: 2271; CHECK-ZVBB: # %bb.0: 2272; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma 2273; CHECK-ZVBB-NEXT: vbrev.v v8, v8 2274; CHECK-ZVBB-NEXT: ret 2275 %v = call <vscale x 4 x i64> @llvm.vp.bitreverse.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 2276 ret <vscale x 4 x i64> %v 2277} 2278 2279declare <vscale x 7 x i64> @llvm.vp.bitreverse.nxv7i64(<vscale x 7 x i64>, <vscale x 7 x i1>, i32) 2280 2281define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) { 2282; RV32-LABEL: vp_bitreverse_nxv7i64: 2283; RV32: # %bb.0: 2284; RV32-NEXT: addi sp, sp, -16 2285; RV32-NEXT: .cfi_def_cfa_offset 16 2286; RV32-NEXT: csrr a1, vlenb 2287; RV32-NEXT: li a2, 24 2288; RV32-NEXT: mul a1, a1, a2 2289; RV32-NEXT: sub sp, sp, a1 2290; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb 2291; RV32-NEXT: lui a1, 1044480 2292; RV32-NEXT: li a2, 56 2293; RV32-NEXT: lui a3, 16 2294; RV32-NEXT: li a4, 40 2295; RV32-NEXT: addi a5, sp, 8 2296; RV32-NEXT: sw a1, 8(sp) 2297; RV32-NEXT: sw zero, 12(sp) 2298; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2299; RV32-NEXT: vsll.vx v16, v8, a2, v0.t 2300; RV32-NEXT: addi a1, a3, -256 2301; RV32-NEXT: vand.vx v24, v8, a1, v0.t 2302; RV32-NEXT: vsll.vx v24, v24, a4, v0.t 2303; RV32-NEXT: vor.vv v16, v16, v24, v0.t 2304; RV32-NEXT: csrr a3, vlenb 2305; RV32-NEXT: slli a3, a3, 4 2306; RV32-NEXT: add a3, sp, a3 2307; RV32-NEXT: addi a3, a3, 16 2308; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 2309; RV32-NEXT: vlse64.v v16, (a5), zero 2310; RV32-NEXT: csrr a3, vlenb 2311; RV32-NEXT: slli a3, a3, 3 2312; RV32-NEXT: add a3, sp, a3 2313; RV32-NEXT: addi a3, a3, 16 2314; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 2315; RV32-NEXT: lui a3, 4080 2316; RV32-NEXT: vand.vx v24, v8, a3, v0.t 2317; RV32-NEXT: vsll.vi v24, v24, 24, v0.t 2318; RV32-NEXT: addi a5, sp, 16 2319; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill 2320; RV32-NEXT: vand.vv v24, v8, v16, v0.t 2321; RV32-NEXT: vsll.vi v16, v24, 8, v0.t 2322; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload 2323; RV32-NEXT: vor.vv v16, v24, v16, v0.t 2324; RV32-NEXT: csrr a5, vlenb 2325; RV32-NEXT: slli a5, a5, 4 2326; RV32-NEXT: add a5, sp, a5 2327; RV32-NEXT: addi a5, a5, 16 2328; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload 2329; RV32-NEXT: vor.vv v16, v24, v16, v0.t 2330; RV32-NEXT: csrr a5, vlenb 2331; RV32-NEXT: slli a5, a5, 4 2332; RV32-NEXT: add a5, sp, a5 2333; RV32-NEXT: addi a5, a5, 16 2334; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill 2335; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t 2336; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t 2337; RV32-NEXT: vand.vx v24, v24, a1, v0.t 2338; RV32-NEXT: vor.vv v16, v24, v16, v0.t 2339; RV32-NEXT: addi a1, sp, 16 2340; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 2341; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t 2342; RV32-NEXT: vand.vx v24, v24, a3, v0.t 2343; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t 2344; RV32-NEXT: csrr a1, vlenb 2345; RV32-NEXT: slli a1, a1, 3 2346; RV32-NEXT: add a1, sp, a1 2347; RV32-NEXT: addi a1, a1, 16 2348; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 2349; RV32-NEXT: vand.vv v8, v8, v16, v0.t 2350; RV32-NEXT: vor.vv v8, v8, v24, v0.t 2351; RV32-NEXT: addi a1, sp, 16 2352; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 2353; RV32-NEXT: vor.vv v8, v8, v16, v0.t 2354; RV32-NEXT: lui a1, 61681 2355; RV32-NEXT: lui a2, 209715 2356; RV32-NEXT: lui a3, 349525 2357; RV32-NEXT: addi a1, a1, -241 2358; RV32-NEXT: addi a2, a2, 819 2359; RV32-NEXT: addi a3, a3, 1365 2360; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma 2361; RV32-NEXT: vmv.v.x v24, a1 2362; RV32-NEXT: csrr a1, vlenb 2363; RV32-NEXT: slli a1, a1, 4 2364; RV32-NEXT: add a1, sp, a1 2365; RV32-NEXT: addi a1, a1, 16 2366; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 2367; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2368; RV32-NEXT: vor.vv v8, v16, v8, v0.t 2369; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 2370; RV32-NEXT: vand.vv v16, v16, v24, v0.t 2371; RV32-NEXT: vand.vv v24, v8, v24, v0.t 2372; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma 2373; RV32-NEXT: vmv.v.x v8, a2 2374; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2375; RV32-NEXT: vsll.vi v24, v24, 4, v0.t 2376; RV32-NEXT: vor.vv v24, v16, v24, v0.t 2377; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t 2378; RV32-NEXT: vand.vv v16, v16, v8, v0.t 2379; RV32-NEXT: vand.vv v24, v24, v8, v0.t 2380; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma 2381; RV32-NEXT: vmv.v.x v8, a3 2382; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2383; RV32-NEXT: vsll.vi v24, v24, 2, v0.t 2384; RV32-NEXT: vor.vv v16, v16, v24, v0.t 2385; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t 2386; RV32-NEXT: vand.vv v24, v24, v8, v0.t 2387; RV32-NEXT: vand.vv v8, v16, v8, v0.t 2388; RV32-NEXT: vsll.vi v8, v8, 1, v0.t 2389; RV32-NEXT: vor.vv v8, v24, v8, v0.t 2390; RV32-NEXT: csrr a0, vlenb 2391; RV32-NEXT: li a1, 24 2392; RV32-NEXT: mul a0, a0, a1 2393; RV32-NEXT: add sp, sp, a0 2394; RV32-NEXT: .cfi_def_cfa sp, 16 2395; RV32-NEXT: addi sp, sp, 16 2396; RV32-NEXT: .cfi_def_cfa_offset 0 2397; RV32-NEXT: ret 2398; 2399; RV64-LABEL: vp_bitreverse_nxv7i64: 2400; RV64: # %bb.0: 2401; RV64-NEXT: addi sp, sp, -16 2402; RV64-NEXT: .cfi_def_cfa_offset 16 2403; RV64-NEXT: csrr a1, vlenb 2404; RV64-NEXT: slli a1, a1, 3 2405; RV64-NEXT: sub sp, sp, a1 2406; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 2407; RV64-NEXT: lui a1, 4080 2408; RV64-NEXT: li a2, 255 2409; RV64-NEXT: li a3, 56 2410; RV64-NEXT: lui a4, 16 2411; RV64-NEXT: li a5, 40 2412; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2413; RV64-NEXT: vand.vx v16, v8, a1, v0.t 2414; RV64-NEXT: slli a2, a2, 24 2415; RV64-NEXT: addiw a0, a4, -256 2416; RV64-NEXT: vsll.vi v16, v16, 24, v0.t 2417; RV64-NEXT: vand.vx v24, v8, a2, v0.t 2418; RV64-NEXT: vsll.vi v24, v24, 8, v0.t 2419; RV64-NEXT: vor.vv v16, v16, v24, v0.t 2420; RV64-NEXT: addi a4, sp, 16 2421; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 2422; RV64-NEXT: vsll.vx v24, v8, a3, v0.t 2423; RV64-NEXT: vand.vx v16, v8, a0, v0.t 2424; RV64-NEXT: vsll.vx v16, v16, a5, v0.t 2425; RV64-NEXT: vor.vv v16, v24, v16, v0.t 2426; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload 2427; RV64-NEXT: vor.vv v16, v16, v24, v0.t 2428; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 2429; RV64-NEXT: vsrl.vx v24, v8, a3, v0.t 2430; RV64-NEXT: vsrl.vx v16, v8, a5, v0.t 2431; RV64-NEXT: vand.vx v16, v16, a0, v0.t 2432; RV64-NEXT: vor.vv v24, v16, v24, v0.t 2433; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t 2434; RV64-NEXT: vand.vx v16, v16, a1, v0.t 2435; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t 2436; RV64-NEXT: vand.vx v8, v8, a2, v0.t 2437; RV64-NEXT: vor.vv v8, v8, v16, v0.t 2438; RV64-NEXT: vor.vv v8, v8, v24, v0.t 2439; RV64-NEXT: lui a0, 61681 2440; RV64-NEXT: lui a1, 209715 2441; RV64-NEXT: lui a2, 349525 2442; RV64-NEXT: addiw a0, a0, -241 2443; RV64-NEXT: addiw a1, a1, 819 2444; RV64-NEXT: addiw a2, a2, 1365 2445; RV64-NEXT: slli a3, a0, 32 2446; RV64-NEXT: slli a4, a1, 32 2447; RV64-NEXT: add a0, a0, a3 2448; RV64-NEXT: slli a3, a2, 32 2449; RV64-NEXT: add a1, a1, a4 2450; RV64-NEXT: add a2, a2, a3 2451; RV64-NEXT: addi a3, sp, 16 2452; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 2453; RV64-NEXT: vor.vv v8, v16, v8, v0.t 2454; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 2455; RV64-NEXT: vand.vx v16, v16, a0, v0.t 2456; RV64-NEXT: vand.vx v8, v8, a0, v0.t 2457; RV64-NEXT: vsll.vi v8, v8, 4, v0.t 2458; RV64-NEXT: vor.vv v8, v16, v8, v0.t 2459; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t 2460; RV64-NEXT: vand.vx v16, v16, a1, v0.t 2461; RV64-NEXT: vand.vx v8, v8, a1, v0.t 2462; RV64-NEXT: vsll.vi v8, v8, 2, v0.t 2463; RV64-NEXT: vor.vv v8, v16, v8, v0.t 2464; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 2465; RV64-NEXT: vand.vx v16, v16, a2, v0.t 2466; RV64-NEXT: vand.vx v8, v8, a2, v0.t 2467; RV64-NEXT: vsll.vi v8, v8, 1, v0.t 2468; RV64-NEXT: vor.vv v8, v16, v8, v0.t 2469; RV64-NEXT: csrr a0, vlenb 2470; RV64-NEXT: slli a0, a0, 3 2471; RV64-NEXT: add sp, sp, a0 2472; RV64-NEXT: .cfi_def_cfa sp, 16 2473; RV64-NEXT: addi sp, sp, 16 2474; RV64-NEXT: .cfi_def_cfa_offset 0 2475; RV64-NEXT: ret 2476; 2477; CHECK-ZVBB-LABEL: vp_bitreverse_nxv7i64: 2478; CHECK-ZVBB: # %bb.0: 2479; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2480; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 2481; CHECK-ZVBB-NEXT: ret 2482 %v = call <vscale x 7 x i64> @llvm.vp.bitreverse.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 %evl) 2483 ret <vscale x 7 x i64> %v 2484} 2485 2486define <vscale x 7 x i64> @vp_bitreverse_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32 zeroext %evl) { 2487; RV32-LABEL: vp_bitreverse_nxv7i64_unmasked: 2488; RV32: # %bb.0: 2489; RV32-NEXT: addi sp, sp, -16 2490; RV32-NEXT: .cfi_def_cfa_offset 16 2491; RV32-NEXT: csrr a1, vlenb 2492; RV32-NEXT: slli a1, a1, 4 2493; RV32-NEXT: sub sp, sp, a1 2494; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 2495; RV32-NEXT: lui a1, 1044480 2496; RV32-NEXT: li a2, 56 2497; RV32-NEXT: lui a3, 16 2498; RV32-NEXT: li a4, 40 2499; RV32-NEXT: lui a5, 4080 2500; RV32-NEXT: addi a6, sp, 8 2501; RV32-NEXT: sw a1, 8(sp) 2502; RV32-NEXT: sw zero, 12(sp) 2503; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2504; RV32-NEXT: vsll.vx v16, v8, a2 2505; RV32-NEXT: addi a1, a3, -256 2506; RV32-NEXT: vsrl.vx v24, v8, a2 2507; RV32-NEXT: vsrl.vx v0, v8, a4 2508; RV32-NEXT: vand.vx v0, v0, a1 2509; RV32-NEXT: vor.vv v24, v0, v24 2510; RV32-NEXT: addi a2, sp, 16 2511; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill 2512; RV32-NEXT: vand.vx v24, v8, a1 2513; RV32-NEXT: vsll.vx v24, v24, a4 2514; RV32-NEXT: vor.vv v16, v16, v24 2515; RV32-NEXT: csrr a1, vlenb 2516; RV32-NEXT: slli a1, a1, 3 2517; RV32-NEXT: add a1, sp, a1 2518; RV32-NEXT: addi a1, a1, 16 2519; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 2520; RV32-NEXT: vlse64.v v24, (a6), zero 2521; RV32-NEXT: vsrl.vi v16, v8, 24 2522; RV32-NEXT: vand.vx v16, v16, a5 2523; RV32-NEXT: vsrl.vi v0, v8, 8 2524; RV32-NEXT: vand.vv v0, v0, v24 2525; RV32-NEXT: vor.vv v16, v0, v16 2526; RV32-NEXT: vand.vv v24, v8, v24 2527; RV32-NEXT: vand.vx v8, v8, a5 2528; RV32-NEXT: vsll.vi v8, v8, 24 2529; RV32-NEXT: vsll.vi v24, v24, 8 2530; RV32-NEXT: vor.vv v24, v8, v24 2531; RV32-NEXT: addi a1, sp, 16 2532; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 2533; RV32-NEXT: vor.vv v8, v16, v8 2534; RV32-NEXT: lui a1, 61681 2535; RV32-NEXT: lui a2, 209715 2536; RV32-NEXT: lui a3, 349525 2537; RV32-NEXT: addi a1, a1, -241 2538; RV32-NEXT: addi a2, a2, 819 2539; RV32-NEXT: addi a3, a3, 1365 2540; RV32-NEXT: csrr a4, vlenb 2541; RV32-NEXT: slli a4, a4, 3 2542; RV32-NEXT: add a4, sp, a4 2543; RV32-NEXT: addi a4, a4, 16 2544; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload 2545; RV32-NEXT: vor.vv v16, v16, v24 2546; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma 2547; RV32-NEXT: vmv.v.x v24, a1 2548; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2549; RV32-NEXT: vor.vv v8, v16, v8 2550; RV32-NEXT: vsrl.vi v16, v8, 4 2551; RV32-NEXT: vand.vv v8, v8, v24 2552; RV32-NEXT: vand.vv v16, v16, v24 2553; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma 2554; RV32-NEXT: vmv.v.x v24, a2 2555; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2556; RV32-NEXT: vsll.vi v8, v8, 4 2557; RV32-NEXT: vor.vv v8, v16, v8 2558; RV32-NEXT: vsrl.vi v16, v8, 2 2559; RV32-NEXT: vand.vv v8, v8, v24 2560; RV32-NEXT: vand.vv v16, v16, v24 2561; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma 2562; RV32-NEXT: vmv.v.x v24, a3 2563; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2564; RV32-NEXT: vsll.vi v8, v8, 2 2565; RV32-NEXT: vor.vv v8, v16, v8 2566; RV32-NEXT: vsrl.vi v16, v8, 1 2567; RV32-NEXT: vand.vv v8, v8, v24 2568; RV32-NEXT: vand.vv v16, v16, v24 2569; RV32-NEXT: vadd.vv v8, v8, v8 2570; RV32-NEXT: vor.vv v8, v16, v8 2571; RV32-NEXT: csrr a0, vlenb 2572; RV32-NEXT: slli a0, a0, 4 2573; RV32-NEXT: add sp, sp, a0 2574; RV32-NEXT: .cfi_def_cfa sp, 16 2575; RV32-NEXT: addi sp, sp, 16 2576; RV32-NEXT: .cfi_def_cfa_offset 0 2577; RV32-NEXT: ret 2578; 2579; RV64-LABEL: vp_bitreverse_nxv7i64_unmasked: 2580; RV64: # %bb.0: 2581; RV64-NEXT: addi sp, sp, -16 2582; RV64-NEXT: .cfi_def_cfa_offset 16 2583; RV64-NEXT: csrr a1, vlenb 2584; RV64-NEXT: slli a1, a1, 3 2585; RV64-NEXT: sub sp, sp, a1 2586; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 2587; RV64-NEXT: lui a1, 4080 2588; RV64-NEXT: li a2, 255 2589; RV64-NEXT: li a3, 56 2590; RV64-NEXT: lui a4, 16 2591; RV64-NEXT: li a5, 40 2592; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2593; RV64-NEXT: vsrl.vi v24, v8, 24 2594; RV64-NEXT: addiw a0, a4, -256 2595; RV64-NEXT: vsrl.vx v16, v8, a3 2596; RV64-NEXT: vsrl.vx v0, v8, a5 2597; RV64-NEXT: vand.vx v0, v0, a0 2598; RV64-NEXT: vor.vv v16, v0, v16 2599; RV64-NEXT: addi a4, sp, 16 2600; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 2601; RV64-NEXT: vsrl.vi v0, v8, 8 2602; RV64-NEXT: slli a2, a2, 24 2603; RV64-NEXT: vand.vx v24, v24, a1 2604; RV64-NEXT: vand.vx v0, v0, a2 2605; RV64-NEXT: vor.vv v24, v0, v24 2606; RV64-NEXT: vand.vx v0, v8, a1 2607; RV64-NEXT: vsll.vi v0, v0, 24 2608; RV64-NEXT: vand.vx v16, v8, a2 2609; RV64-NEXT: vsll.vi v16, v16, 8 2610; RV64-NEXT: vor.vv v0, v0, v16 2611; RV64-NEXT: vsll.vx v16, v8, a3 2612; RV64-NEXT: vand.vx v8, v8, a0 2613; RV64-NEXT: vsll.vx v8, v8, a5 2614; RV64-NEXT: vor.vv v8, v16, v8 2615; RV64-NEXT: lui a0, 61681 2616; RV64-NEXT: lui a1, 209715 2617; RV64-NEXT: lui a2, 349525 2618; RV64-NEXT: addiw a0, a0, -241 2619; RV64-NEXT: addiw a1, a1, 819 2620; RV64-NEXT: addiw a2, a2, 1365 2621; RV64-NEXT: slli a3, a0, 32 2622; RV64-NEXT: slli a4, a1, 32 2623; RV64-NEXT: add a0, a0, a3 2624; RV64-NEXT: slli a3, a2, 32 2625; RV64-NEXT: add a1, a1, a4 2626; RV64-NEXT: add a2, a2, a3 2627; RV64-NEXT: addi a3, sp, 16 2628; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 2629; RV64-NEXT: vor.vv v16, v24, v16 2630; RV64-NEXT: vor.vv v8, v8, v0 2631; RV64-NEXT: vor.vv v8, v8, v16 2632; RV64-NEXT: vsrl.vi v16, v8, 4 2633; RV64-NEXT: vand.vx v8, v8, a0 2634; RV64-NEXT: vand.vx v16, v16, a0 2635; RV64-NEXT: vsll.vi v8, v8, 4 2636; RV64-NEXT: vor.vv v8, v16, v8 2637; RV64-NEXT: vsrl.vi v16, v8, 2 2638; RV64-NEXT: vand.vx v8, v8, a1 2639; RV64-NEXT: vand.vx v16, v16, a1 2640; RV64-NEXT: vsll.vi v8, v8, 2 2641; RV64-NEXT: vor.vv v8, v16, v8 2642; RV64-NEXT: vsrl.vi v16, v8, 1 2643; RV64-NEXT: vand.vx v8, v8, a2 2644; RV64-NEXT: vand.vx v16, v16, a2 2645; RV64-NEXT: vadd.vv v8, v8, v8 2646; RV64-NEXT: vor.vv v8, v16, v8 2647; RV64-NEXT: csrr a0, vlenb 2648; RV64-NEXT: slli a0, a0, 3 2649; RV64-NEXT: add sp, sp, a0 2650; RV64-NEXT: .cfi_def_cfa sp, 16 2651; RV64-NEXT: addi sp, sp, 16 2652; RV64-NEXT: .cfi_def_cfa_offset 0 2653; RV64-NEXT: ret 2654; 2655; CHECK-ZVBB-LABEL: vp_bitreverse_nxv7i64_unmasked: 2656; CHECK-ZVBB: # %bb.0: 2657; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2658; CHECK-ZVBB-NEXT: vbrev.v v8, v8 2659; CHECK-ZVBB-NEXT: ret 2660 %v = call <vscale x 7 x i64> @llvm.vp.bitreverse.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> splat (i1 true), i32 %evl) 2661 ret <vscale x 7 x i64> %v 2662} 2663 2664declare <vscale x 8 x i64> @llvm.vp.bitreverse.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32) 2665 2666define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2667; RV32-LABEL: vp_bitreverse_nxv8i64: 2668; RV32: # %bb.0: 2669; RV32-NEXT: addi sp, sp, -16 2670; RV32-NEXT: .cfi_def_cfa_offset 16 2671; RV32-NEXT: csrr a1, vlenb 2672; RV32-NEXT: li a2, 24 2673; RV32-NEXT: mul a1, a1, a2 2674; RV32-NEXT: sub sp, sp, a1 2675; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb 2676; RV32-NEXT: lui a1, 1044480 2677; RV32-NEXT: li a2, 56 2678; RV32-NEXT: lui a3, 16 2679; RV32-NEXT: li a4, 40 2680; RV32-NEXT: addi a5, sp, 8 2681; RV32-NEXT: sw a1, 8(sp) 2682; RV32-NEXT: sw zero, 12(sp) 2683; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2684; RV32-NEXT: vsll.vx v16, v8, a2, v0.t 2685; RV32-NEXT: addi a1, a3, -256 2686; RV32-NEXT: vand.vx v24, v8, a1, v0.t 2687; RV32-NEXT: vsll.vx v24, v24, a4, v0.t 2688; RV32-NEXT: vor.vv v16, v16, v24, v0.t 2689; RV32-NEXT: csrr a3, vlenb 2690; RV32-NEXT: slli a3, a3, 4 2691; RV32-NEXT: add a3, sp, a3 2692; RV32-NEXT: addi a3, a3, 16 2693; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 2694; RV32-NEXT: vlse64.v v16, (a5), zero 2695; RV32-NEXT: csrr a3, vlenb 2696; RV32-NEXT: slli a3, a3, 3 2697; RV32-NEXT: add a3, sp, a3 2698; RV32-NEXT: addi a3, a3, 16 2699; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 2700; RV32-NEXT: lui a3, 4080 2701; RV32-NEXT: vand.vx v24, v8, a3, v0.t 2702; RV32-NEXT: vsll.vi v24, v24, 24, v0.t 2703; RV32-NEXT: addi a5, sp, 16 2704; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill 2705; RV32-NEXT: vand.vv v24, v8, v16, v0.t 2706; RV32-NEXT: vsll.vi v16, v24, 8, v0.t 2707; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload 2708; RV32-NEXT: vor.vv v16, v24, v16, v0.t 2709; RV32-NEXT: csrr a5, vlenb 2710; RV32-NEXT: slli a5, a5, 4 2711; RV32-NEXT: add a5, sp, a5 2712; RV32-NEXT: addi a5, a5, 16 2713; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload 2714; RV32-NEXT: vor.vv v16, v24, v16, v0.t 2715; RV32-NEXT: csrr a5, vlenb 2716; RV32-NEXT: slli a5, a5, 4 2717; RV32-NEXT: add a5, sp, a5 2718; RV32-NEXT: addi a5, a5, 16 2719; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill 2720; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t 2721; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t 2722; RV32-NEXT: vand.vx v24, v24, a1, v0.t 2723; RV32-NEXT: vor.vv v16, v24, v16, v0.t 2724; RV32-NEXT: addi a1, sp, 16 2725; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 2726; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t 2727; RV32-NEXT: vand.vx v24, v24, a3, v0.t 2728; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t 2729; RV32-NEXT: csrr a1, vlenb 2730; RV32-NEXT: slli a1, a1, 3 2731; RV32-NEXT: add a1, sp, a1 2732; RV32-NEXT: addi a1, a1, 16 2733; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 2734; RV32-NEXT: vand.vv v8, v8, v16, v0.t 2735; RV32-NEXT: vor.vv v8, v8, v24, v0.t 2736; RV32-NEXT: addi a1, sp, 16 2737; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 2738; RV32-NEXT: vor.vv v8, v8, v16, v0.t 2739; RV32-NEXT: lui a1, 61681 2740; RV32-NEXT: lui a2, 209715 2741; RV32-NEXT: lui a3, 349525 2742; RV32-NEXT: addi a1, a1, -241 2743; RV32-NEXT: addi a2, a2, 819 2744; RV32-NEXT: addi a3, a3, 1365 2745; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma 2746; RV32-NEXT: vmv.v.x v24, a1 2747; RV32-NEXT: csrr a1, vlenb 2748; RV32-NEXT: slli a1, a1, 4 2749; RV32-NEXT: add a1, sp, a1 2750; RV32-NEXT: addi a1, a1, 16 2751; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 2752; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2753; RV32-NEXT: vor.vv v8, v16, v8, v0.t 2754; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 2755; RV32-NEXT: vand.vv v16, v16, v24, v0.t 2756; RV32-NEXT: vand.vv v24, v8, v24, v0.t 2757; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma 2758; RV32-NEXT: vmv.v.x v8, a2 2759; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2760; RV32-NEXT: vsll.vi v24, v24, 4, v0.t 2761; RV32-NEXT: vor.vv v24, v16, v24, v0.t 2762; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t 2763; RV32-NEXT: vand.vv v16, v16, v8, v0.t 2764; RV32-NEXT: vand.vv v24, v24, v8, v0.t 2765; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma 2766; RV32-NEXT: vmv.v.x v8, a3 2767; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2768; RV32-NEXT: vsll.vi v24, v24, 2, v0.t 2769; RV32-NEXT: vor.vv v16, v16, v24, v0.t 2770; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t 2771; RV32-NEXT: vand.vv v24, v24, v8, v0.t 2772; RV32-NEXT: vand.vv v8, v16, v8, v0.t 2773; RV32-NEXT: vsll.vi v8, v8, 1, v0.t 2774; RV32-NEXT: vor.vv v8, v24, v8, v0.t 2775; RV32-NEXT: csrr a0, vlenb 2776; RV32-NEXT: li a1, 24 2777; RV32-NEXT: mul a0, a0, a1 2778; RV32-NEXT: add sp, sp, a0 2779; RV32-NEXT: .cfi_def_cfa sp, 16 2780; RV32-NEXT: addi sp, sp, 16 2781; RV32-NEXT: .cfi_def_cfa_offset 0 2782; RV32-NEXT: ret 2783; 2784; RV64-LABEL: vp_bitreverse_nxv8i64: 2785; RV64: # %bb.0: 2786; RV64-NEXT: addi sp, sp, -16 2787; RV64-NEXT: .cfi_def_cfa_offset 16 2788; RV64-NEXT: csrr a1, vlenb 2789; RV64-NEXT: slli a1, a1, 3 2790; RV64-NEXT: sub sp, sp, a1 2791; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 2792; RV64-NEXT: lui a1, 4080 2793; RV64-NEXT: li a2, 255 2794; RV64-NEXT: li a3, 56 2795; RV64-NEXT: lui a4, 16 2796; RV64-NEXT: li a5, 40 2797; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2798; RV64-NEXT: vand.vx v16, v8, a1, v0.t 2799; RV64-NEXT: slli a2, a2, 24 2800; RV64-NEXT: addiw a0, a4, -256 2801; RV64-NEXT: vsll.vi v16, v16, 24, v0.t 2802; RV64-NEXT: vand.vx v24, v8, a2, v0.t 2803; RV64-NEXT: vsll.vi v24, v24, 8, v0.t 2804; RV64-NEXT: vor.vv v16, v16, v24, v0.t 2805; RV64-NEXT: addi a4, sp, 16 2806; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 2807; RV64-NEXT: vsll.vx v24, v8, a3, v0.t 2808; RV64-NEXT: vand.vx v16, v8, a0, v0.t 2809; RV64-NEXT: vsll.vx v16, v16, a5, v0.t 2810; RV64-NEXT: vor.vv v16, v24, v16, v0.t 2811; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload 2812; RV64-NEXT: vor.vv v16, v16, v24, v0.t 2813; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 2814; RV64-NEXT: vsrl.vx v24, v8, a3, v0.t 2815; RV64-NEXT: vsrl.vx v16, v8, a5, v0.t 2816; RV64-NEXT: vand.vx v16, v16, a0, v0.t 2817; RV64-NEXT: vor.vv v24, v16, v24, v0.t 2818; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t 2819; RV64-NEXT: vand.vx v16, v16, a1, v0.t 2820; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t 2821; RV64-NEXT: vand.vx v8, v8, a2, v0.t 2822; RV64-NEXT: vor.vv v8, v8, v16, v0.t 2823; RV64-NEXT: vor.vv v8, v8, v24, v0.t 2824; RV64-NEXT: lui a0, 61681 2825; RV64-NEXT: lui a1, 209715 2826; RV64-NEXT: lui a2, 349525 2827; RV64-NEXT: addiw a0, a0, -241 2828; RV64-NEXT: addiw a1, a1, 819 2829; RV64-NEXT: addiw a2, a2, 1365 2830; RV64-NEXT: slli a3, a0, 32 2831; RV64-NEXT: slli a4, a1, 32 2832; RV64-NEXT: add a0, a0, a3 2833; RV64-NEXT: slli a3, a2, 32 2834; RV64-NEXT: add a1, a1, a4 2835; RV64-NEXT: add a2, a2, a3 2836; RV64-NEXT: addi a3, sp, 16 2837; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 2838; RV64-NEXT: vor.vv v8, v16, v8, v0.t 2839; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 2840; RV64-NEXT: vand.vx v16, v16, a0, v0.t 2841; RV64-NEXT: vand.vx v8, v8, a0, v0.t 2842; RV64-NEXT: vsll.vi v8, v8, 4, v0.t 2843; RV64-NEXT: vor.vv v8, v16, v8, v0.t 2844; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t 2845; RV64-NEXT: vand.vx v16, v16, a1, v0.t 2846; RV64-NEXT: vand.vx v8, v8, a1, v0.t 2847; RV64-NEXT: vsll.vi v8, v8, 2, v0.t 2848; RV64-NEXT: vor.vv v8, v16, v8, v0.t 2849; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 2850; RV64-NEXT: vand.vx v16, v16, a2, v0.t 2851; RV64-NEXT: vand.vx v8, v8, a2, v0.t 2852; RV64-NEXT: vsll.vi v8, v8, 1, v0.t 2853; RV64-NEXT: vor.vv v8, v16, v8, v0.t 2854; RV64-NEXT: csrr a0, vlenb 2855; RV64-NEXT: slli a0, a0, 3 2856; RV64-NEXT: add sp, sp, a0 2857; RV64-NEXT: .cfi_def_cfa sp, 16 2858; RV64-NEXT: addi sp, sp, 16 2859; RV64-NEXT: .cfi_def_cfa_offset 0 2860; RV64-NEXT: ret 2861; 2862; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i64: 2863; CHECK-ZVBB: # %bb.0: 2864; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2865; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 2866; CHECK-ZVBB-NEXT: ret 2867 %v = call <vscale x 8 x i64> @llvm.vp.bitreverse.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 %evl) 2868 ret <vscale x 8 x i64> %v 2869} 2870 2871define <vscale x 8 x i64> @vp_bitreverse_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) { 2872; RV32-LABEL: vp_bitreverse_nxv8i64_unmasked: 2873; RV32: # %bb.0: 2874; RV32-NEXT: addi sp, sp, -16 2875; RV32-NEXT: .cfi_def_cfa_offset 16 2876; RV32-NEXT: csrr a1, vlenb 2877; RV32-NEXT: slli a1, a1, 4 2878; RV32-NEXT: sub sp, sp, a1 2879; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 2880; RV32-NEXT: lui a1, 1044480 2881; RV32-NEXT: li a2, 56 2882; RV32-NEXT: lui a3, 16 2883; RV32-NEXT: li a4, 40 2884; RV32-NEXT: lui a5, 4080 2885; RV32-NEXT: addi a6, sp, 8 2886; RV32-NEXT: sw a1, 8(sp) 2887; RV32-NEXT: sw zero, 12(sp) 2888; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2889; RV32-NEXT: vsll.vx v16, v8, a2 2890; RV32-NEXT: addi a1, a3, -256 2891; RV32-NEXT: vsrl.vx v24, v8, a2 2892; RV32-NEXT: vsrl.vx v0, v8, a4 2893; RV32-NEXT: vand.vx v0, v0, a1 2894; RV32-NEXT: vor.vv v24, v0, v24 2895; RV32-NEXT: addi a2, sp, 16 2896; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill 2897; RV32-NEXT: vand.vx v24, v8, a1 2898; RV32-NEXT: vsll.vx v24, v24, a4 2899; RV32-NEXT: vor.vv v16, v16, v24 2900; RV32-NEXT: csrr a1, vlenb 2901; RV32-NEXT: slli a1, a1, 3 2902; RV32-NEXT: add a1, sp, a1 2903; RV32-NEXT: addi a1, a1, 16 2904; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 2905; RV32-NEXT: vlse64.v v24, (a6), zero 2906; RV32-NEXT: vsrl.vi v16, v8, 24 2907; RV32-NEXT: vand.vx v16, v16, a5 2908; RV32-NEXT: vsrl.vi v0, v8, 8 2909; RV32-NEXT: vand.vv v0, v0, v24 2910; RV32-NEXT: vor.vv v16, v0, v16 2911; RV32-NEXT: vand.vv v24, v8, v24 2912; RV32-NEXT: vand.vx v8, v8, a5 2913; RV32-NEXT: vsll.vi v8, v8, 24 2914; RV32-NEXT: vsll.vi v24, v24, 8 2915; RV32-NEXT: vor.vv v24, v8, v24 2916; RV32-NEXT: addi a1, sp, 16 2917; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 2918; RV32-NEXT: vor.vv v8, v16, v8 2919; RV32-NEXT: lui a1, 61681 2920; RV32-NEXT: lui a2, 209715 2921; RV32-NEXT: lui a3, 349525 2922; RV32-NEXT: addi a1, a1, -241 2923; RV32-NEXT: addi a2, a2, 819 2924; RV32-NEXT: addi a3, a3, 1365 2925; RV32-NEXT: csrr a4, vlenb 2926; RV32-NEXT: slli a4, a4, 3 2927; RV32-NEXT: add a4, sp, a4 2928; RV32-NEXT: addi a4, a4, 16 2929; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload 2930; RV32-NEXT: vor.vv v16, v16, v24 2931; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma 2932; RV32-NEXT: vmv.v.x v24, a1 2933; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2934; RV32-NEXT: vor.vv v8, v16, v8 2935; RV32-NEXT: vsrl.vi v16, v8, 4 2936; RV32-NEXT: vand.vv v8, v8, v24 2937; RV32-NEXT: vand.vv v16, v16, v24 2938; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma 2939; RV32-NEXT: vmv.v.x v24, a2 2940; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2941; RV32-NEXT: vsll.vi v8, v8, 4 2942; RV32-NEXT: vor.vv v8, v16, v8 2943; RV32-NEXT: vsrl.vi v16, v8, 2 2944; RV32-NEXT: vand.vv v8, v8, v24 2945; RV32-NEXT: vand.vv v16, v16, v24 2946; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma 2947; RV32-NEXT: vmv.v.x v24, a3 2948; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2949; RV32-NEXT: vsll.vi v8, v8, 2 2950; RV32-NEXT: vor.vv v8, v16, v8 2951; RV32-NEXT: vsrl.vi v16, v8, 1 2952; RV32-NEXT: vand.vv v8, v8, v24 2953; RV32-NEXT: vand.vv v16, v16, v24 2954; RV32-NEXT: vadd.vv v8, v8, v8 2955; RV32-NEXT: vor.vv v8, v16, v8 2956; RV32-NEXT: csrr a0, vlenb 2957; RV32-NEXT: slli a0, a0, 4 2958; RV32-NEXT: add sp, sp, a0 2959; RV32-NEXT: .cfi_def_cfa sp, 16 2960; RV32-NEXT: addi sp, sp, 16 2961; RV32-NEXT: .cfi_def_cfa_offset 0 2962; RV32-NEXT: ret 2963; 2964; RV64-LABEL: vp_bitreverse_nxv8i64_unmasked: 2965; RV64: # %bb.0: 2966; RV64-NEXT: addi sp, sp, -16 2967; RV64-NEXT: .cfi_def_cfa_offset 16 2968; RV64-NEXT: csrr a1, vlenb 2969; RV64-NEXT: slli a1, a1, 3 2970; RV64-NEXT: sub sp, sp, a1 2971; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 2972; RV64-NEXT: lui a1, 4080 2973; RV64-NEXT: li a2, 255 2974; RV64-NEXT: li a3, 56 2975; RV64-NEXT: lui a4, 16 2976; RV64-NEXT: li a5, 40 2977; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2978; RV64-NEXT: vsrl.vi v24, v8, 24 2979; RV64-NEXT: addiw a0, a4, -256 2980; RV64-NEXT: vsrl.vx v16, v8, a3 2981; RV64-NEXT: vsrl.vx v0, v8, a5 2982; RV64-NEXT: vand.vx v0, v0, a0 2983; RV64-NEXT: vor.vv v16, v0, v16 2984; RV64-NEXT: addi a4, sp, 16 2985; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 2986; RV64-NEXT: vsrl.vi v0, v8, 8 2987; RV64-NEXT: slli a2, a2, 24 2988; RV64-NEXT: vand.vx v24, v24, a1 2989; RV64-NEXT: vand.vx v0, v0, a2 2990; RV64-NEXT: vor.vv v24, v0, v24 2991; RV64-NEXT: vand.vx v0, v8, a1 2992; RV64-NEXT: vsll.vi v0, v0, 24 2993; RV64-NEXT: vand.vx v16, v8, a2 2994; RV64-NEXT: vsll.vi v16, v16, 8 2995; RV64-NEXT: vor.vv v0, v0, v16 2996; RV64-NEXT: vsll.vx v16, v8, a3 2997; RV64-NEXT: vand.vx v8, v8, a0 2998; RV64-NEXT: vsll.vx v8, v8, a5 2999; RV64-NEXT: vor.vv v8, v16, v8 3000; RV64-NEXT: lui a0, 61681 3001; RV64-NEXT: lui a1, 209715 3002; RV64-NEXT: lui a2, 349525 3003; RV64-NEXT: addiw a0, a0, -241 3004; RV64-NEXT: addiw a1, a1, 819 3005; RV64-NEXT: addiw a2, a2, 1365 3006; RV64-NEXT: slli a3, a0, 32 3007; RV64-NEXT: slli a4, a1, 32 3008; RV64-NEXT: add a0, a0, a3 3009; RV64-NEXT: slli a3, a2, 32 3010; RV64-NEXT: add a1, a1, a4 3011; RV64-NEXT: add a2, a2, a3 3012; RV64-NEXT: addi a3, sp, 16 3013; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 3014; RV64-NEXT: vor.vv v16, v24, v16 3015; RV64-NEXT: vor.vv v8, v8, v0 3016; RV64-NEXT: vor.vv v8, v8, v16 3017; RV64-NEXT: vsrl.vi v16, v8, 4 3018; RV64-NEXT: vand.vx v8, v8, a0 3019; RV64-NEXT: vand.vx v16, v16, a0 3020; RV64-NEXT: vsll.vi v8, v8, 4 3021; RV64-NEXT: vor.vv v8, v16, v8 3022; RV64-NEXT: vsrl.vi v16, v8, 2 3023; RV64-NEXT: vand.vx v8, v8, a1 3024; RV64-NEXT: vand.vx v16, v16, a1 3025; RV64-NEXT: vsll.vi v8, v8, 2 3026; RV64-NEXT: vor.vv v8, v16, v8 3027; RV64-NEXT: vsrl.vi v16, v8, 1 3028; RV64-NEXT: vand.vx v8, v8, a2 3029; RV64-NEXT: vand.vx v16, v16, a2 3030; RV64-NEXT: vadd.vv v8, v8, v8 3031; RV64-NEXT: vor.vv v8, v16, v8 3032; RV64-NEXT: csrr a0, vlenb 3033; RV64-NEXT: slli a0, a0, 3 3034; RV64-NEXT: add sp, sp, a0 3035; RV64-NEXT: .cfi_def_cfa sp, 16 3036; RV64-NEXT: addi sp, sp, 16 3037; RV64-NEXT: .cfi_def_cfa_offset 0 3038; RV64-NEXT: ret 3039; 3040; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i64_unmasked: 3041; CHECK-ZVBB: # %bb.0: 3042; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3043; CHECK-ZVBB-NEXT: vbrev.v v8, v8 3044; CHECK-ZVBB-NEXT: ret 3045 %v = call <vscale x 8 x i64> @llvm.vp.bitreverse.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 3046 ret <vscale x 8 x i64> %v 3047} 3048 3049; Test splitting. Use i16 version for easier check. 3050declare <vscale x 64 x i16> @llvm.vp.bitreverse.nxv64i16(<vscale x 64 x i16>, <vscale x 64 x i1>, i32) 3051 3052define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) { 3053; CHECK-LABEL: vp_bitreverse_nxv64i16: 3054; CHECK: # %bb.0: 3055; CHECK-NEXT: addi sp, sp, -16 3056; CHECK-NEXT: .cfi_def_cfa_offset 16 3057; CHECK-NEXT: csrr a1, vlenb 3058; CHECK-NEXT: slli a1, a1, 4 3059; CHECK-NEXT: sub sp, sp, a1 3060; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 3061; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma 3062; CHECK-NEXT: vmv1r.v v24, v0 3063; CHECK-NEXT: csrr a1, vlenb 3064; CHECK-NEXT: slli a1, a1, 3 3065; CHECK-NEXT: add a1, sp, a1 3066; CHECK-NEXT: addi a1, a1, 16 3067; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 3068; CHECK-NEXT: csrr a3, vlenb 3069; CHECK-NEXT: lui a1, 1 3070; CHECK-NEXT: lui a2, 3 3071; CHECK-NEXT: srli a4, a3, 1 3072; CHECK-NEXT: slli a3, a3, 2 3073; CHECK-NEXT: vslidedown.vx v0, v0, a4 3074; CHECK-NEXT: sub a4, a0, a3 3075; CHECK-NEXT: sltu a5, a0, a4 3076; CHECK-NEXT: addi a5, a5, -1 3077; CHECK-NEXT: and a5, a5, a4 3078; CHECK-NEXT: lui a6, 5 3079; CHECK-NEXT: addi a4, a1, -241 3080; CHECK-NEXT: addi a2, a2, 819 3081; CHECK-NEXT: addi a1, a6, 1365 3082; CHECK-NEXT: vsetvli zero, a5, e16, m8, ta, ma 3083; CHECK-NEXT: vsrl.vi v8, v16, 8, v0.t 3084; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t 3085; CHECK-NEXT: vor.vv v8, v16, v8, v0.t 3086; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t 3087; CHECK-NEXT: vand.vx v16, v16, a4, v0.t 3088; CHECK-NEXT: vand.vx v8, v8, a4, v0.t 3089; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 3090; CHECK-NEXT: vor.vv v8, v16, v8, v0.t 3091; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t 3092; CHECK-NEXT: vand.vx v16, v16, a2, v0.t 3093; CHECK-NEXT: vand.vx v8, v8, a2, v0.t 3094; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 3095; CHECK-NEXT: vor.vv v8, v16, v8, v0.t 3096; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t 3097; CHECK-NEXT: vand.vx v16, v16, a1, v0.t 3098; CHECK-NEXT: vand.vx v8, v8, a1, v0.t 3099; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 3100; CHECK-NEXT: vor.vv v8, v16, v8, v0.t 3101; CHECK-NEXT: addi a5, sp, 16 3102; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill 3103; CHECK-NEXT: bltu a0, a3, .LBB46_2 3104; CHECK-NEXT: # %bb.1: 3105; CHECK-NEXT: mv a0, a3 3106; CHECK-NEXT: .LBB46_2: 3107; CHECK-NEXT: vmv1r.v v0, v24 3108; CHECK-NEXT: csrr a3, vlenb 3109; CHECK-NEXT: slli a3, a3, 3 3110; CHECK-NEXT: add a3, sp, a3 3111; CHECK-NEXT: addi a3, a3, 16 3112; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload 3113; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 3114; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t 3115; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t 3116; CHECK-NEXT: vor.vv v8, v8, v16, v0.t 3117; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t 3118; CHECK-NEXT: vand.vx v16, v16, a4, v0.t 3119; CHECK-NEXT: vand.vx v8, v8, a4, v0.t 3120; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 3121; CHECK-NEXT: vor.vv v8, v16, v8, v0.t 3122; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t 3123; CHECK-NEXT: vand.vx v16, v16, a2, v0.t 3124; CHECK-NEXT: vand.vx v8, v8, a2, v0.t 3125; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 3126; CHECK-NEXT: vor.vv v8, v16, v8, v0.t 3127; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t 3128; CHECK-NEXT: vand.vx v16, v16, a1, v0.t 3129; CHECK-NEXT: vand.vx v8, v8, a1, v0.t 3130; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 3131; CHECK-NEXT: vor.vv v8, v16, v8, v0.t 3132; CHECK-NEXT: addi a0, sp, 16 3133; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 3134; CHECK-NEXT: csrr a0, vlenb 3135; CHECK-NEXT: slli a0, a0, 4 3136; CHECK-NEXT: add sp, sp, a0 3137; CHECK-NEXT: .cfi_def_cfa sp, 16 3138; CHECK-NEXT: addi sp, sp, 16 3139; CHECK-NEXT: .cfi_def_cfa_offset 0 3140; CHECK-NEXT: ret 3141; 3142; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16: 3143; CHECK-ZVBB: # %bb.0: 3144; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, m1, ta, ma 3145; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 3146; CHECK-ZVBB-NEXT: csrr a1, vlenb 3147; CHECK-ZVBB-NEXT: srli a2, a1, 1 3148; CHECK-ZVBB-NEXT: slli a1, a1, 2 3149; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2 3150; CHECK-ZVBB-NEXT: sub a2, a0, a1 3151; CHECK-ZVBB-NEXT: sltu a3, a0, a2 3152; CHECK-ZVBB-NEXT: addi a3, a3, -1 3153; CHECK-ZVBB-NEXT: and a2, a3, a2 3154; CHECK-ZVBB-NEXT: vsetvli zero, a2, e16, m8, ta, ma 3155; CHECK-ZVBB-NEXT: vbrev.v v16, v16, v0.t 3156; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB46_2 3157; CHECK-ZVBB-NEXT: # %bb.1: 3158; CHECK-ZVBB-NEXT: mv a0, a1 3159; CHECK-ZVBB-NEXT: .LBB46_2: 3160; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 3161; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma 3162; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 3163; CHECK-ZVBB-NEXT: ret 3164 %v = call <vscale x 64 x i16> @llvm.vp.bitreverse.nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 %evl) 3165 ret <vscale x 64 x i16> %v 3166} 3167 3168define <vscale x 64 x i16> @vp_bitreverse_nxv64i16_unmasked(<vscale x 64 x i16> %va, i32 zeroext %evl) { 3169; CHECK-LABEL: vp_bitreverse_nxv64i16_unmasked: 3170; CHECK: # %bb.0: 3171; CHECK-NEXT: csrr a3, vlenb 3172; CHECK-NEXT: lui a1, 1 3173; CHECK-NEXT: lui a2, 3 3174; CHECK-NEXT: slli a3, a3, 2 3175; CHECK-NEXT: sub a4, a0, a3 3176; CHECK-NEXT: sltu a5, a0, a4 3177; CHECK-NEXT: addi a5, a5, -1 3178; CHECK-NEXT: and a5, a5, a4 3179; CHECK-NEXT: lui a6, 5 3180; CHECK-NEXT: addi a4, a1, -241 3181; CHECK-NEXT: addi a2, a2, 819 3182; CHECK-NEXT: addi a1, a6, 1365 3183; CHECK-NEXT: vsetvli zero, a5, e16, m8, ta, ma 3184; CHECK-NEXT: vsrl.vi v24, v16, 8 3185; CHECK-NEXT: vsll.vi v16, v16, 8 3186; CHECK-NEXT: vor.vv v16, v16, v24 3187; CHECK-NEXT: vsrl.vi v24, v16, 4 3188; CHECK-NEXT: vand.vx v16, v16, a4 3189; CHECK-NEXT: vand.vx v24, v24, a4 3190; CHECK-NEXT: vsll.vi v16, v16, 4 3191; CHECK-NEXT: vor.vv v16, v24, v16 3192; CHECK-NEXT: vsrl.vi v24, v16, 2 3193; CHECK-NEXT: vand.vx v16, v16, a2 3194; CHECK-NEXT: vand.vx v24, v24, a2 3195; CHECK-NEXT: vsll.vi v16, v16, 2 3196; CHECK-NEXT: vor.vv v16, v24, v16 3197; CHECK-NEXT: vsrl.vi v24, v16, 1 3198; CHECK-NEXT: vand.vx v16, v16, a1 3199; CHECK-NEXT: vand.vx v24, v24, a1 3200; CHECK-NEXT: vadd.vv v16, v16, v16 3201; CHECK-NEXT: vor.vv v16, v24, v16 3202; CHECK-NEXT: bltu a0, a3, .LBB47_2 3203; CHECK-NEXT: # %bb.1: 3204; CHECK-NEXT: mv a0, a3 3205; CHECK-NEXT: .LBB47_2: 3206; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 3207; CHECK-NEXT: vsrl.vi v24, v8, 8 3208; CHECK-NEXT: vsll.vi v8, v8, 8 3209; CHECK-NEXT: vor.vv v8, v8, v24 3210; CHECK-NEXT: vsrl.vi v24, v8, 4 3211; CHECK-NEXT: vand.vx v8, v8, a4 3212; CHECK-NEXT: vand.vx v24, v24, a4 3213; CHECK-NEXT: vsll.vi v8, v8, 4 3214; CHECK-NEXT: vor.vv v8, v24, v8 3215; CHECK-NEXT: vsrl.vi v24, v8, 2 3216; CHECK-NEXT: vand.vx v8, v8, a2 3217; CHECK-NEXT: vand.vx v24, v24, a2 3218; CHECK-NEXT: vsll.vi v8, v8, 2 3219; CHECK-NEXT: vor.vv v8, v24, v8 3220; CHECK-NEXT: vsrl.vi v24, v8, 1 3221; CHECK-NEXT: vand.vx v8, v8, a1 3222; CHECK-NEXT: vand.vx v24, v24, a1 3223; CHECK-NEXT: vadd.vv v8, v8, v8 3224; CHECK-NEXT: vor.vv v8, v24, v8 3225; CHECK-NEXT: ret 3226; 3227; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16_unmasked: 3228; CHECK-ZVBB: # %bb.0: 3229; CHECK-ZVBB-NEXT: csrr a1, vlenb 3230; CHECK-ZVBB-NEXT: slli a1, a1, 2 3231; CHECK-ZVBB-NEXT: sub a2, a0, a1 3232; CHECK-ZVBB-NEXT: sltu a3, a0, a2 3233; CHECK-ZVBB-NEXT: addi a3, a3, -1 3234; CHECK-ZVBB-NEXT: and a2, a3, a2 3235; CHECK-ZVBB-NEXT: vsetvli zero, a2, e16, m8, ta, ma 3236; CHECK-ZVBB-NEXT: vbrev.v v16, v16 3237; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB47_2 3238; CHECK-ZVBB-NEXT: # %bb.1: 3239; CHECK-ZVBB-NEXT: mv a0, a1 3240; CHECK-ZVBB-NEXT: .LBB47_2: 3241; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma 3242; CHECK-ZVBB-NEXT: vbrev.v v8, v8 3243; CHECK-ZVBB-NEXT: ret 3244 %v = call <vscale x 64 x i16> @llvm.vp.bitreverse.nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> splat (i1 true), i32 %evl) 3245 ret <vscale x 64 x i16> %v 3246} 3247 3248; Test promotion. 3249declare <vscale x 1 x i9> @llvm.vp.bitreverse.nxv1i9(<vscale x 1 x i9>, <vscale x 1 x i1>, i32) 3250define <vscale x 1 x i9> @vp_bitreverse_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 3251; CHECK-LABEL: vp_bitreverse_nxv1i9: 3252; CHECK: # %bb.0: 3253; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 3254; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 3255; CHECK-NEXT: lui a0, 1 3256; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t 3257; CHECK-NEXT: addi a0, a0, -241 3258; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 3259; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 3260; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 3261; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 3262; CHECK-NEXT: lui a0, 3 3263; CHECK-NEXT: addi a0, a0, 819 3264; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 3265; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 3266; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 3267; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 3268; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 3269; CHECK-NEXT: lui a0, 5 3270; CHECK-NEXT: addi a0, a0, 1365 3271; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 3272; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 3273; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 3274; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 3275; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 3276; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 3277; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 3278; CHECK-NEXT: vsrl.vi v8, v8, 7, v0.t 3279; CHECK-NEXT: ret 3280; 3281; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i9: 3282; CHECK-ZVBB: # %bb.0: 3283; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 3284; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t 3285; CHECK-ZVBB-NEXT: vsrl.vi v8, v8, 7, v0.t 3286; CHECK-ZVBB-NEXT: ret 3287 %v = call <vscale x 1 x i9> @llvm.vp.bitreverse.nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 %evl) 3288 ret <vscale x 1 x i9> %v 3289} 3290