1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 4; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 6 7declare <2 x i8> @llvm.vp.bitreverse.v2i8(<2 x i8>, <2 x i1>, i32) 8 9define <2 x i8> @vp_bitreverse_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { 10; CHECK-LABEL: vp_bitreverse_v2i8: 11; CHECK: # %bb.0: 12; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 13; CHECK-NEXT: vand.vi v9, v8, 15, v0.t 14; CHECK-NEXT: li a0, 51 15; CHECK-NEXT: vsll.vi v9, v9, 4, v0.t 16; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t 17; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 18; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 19; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 20; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 21; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 22; CHECK-NEXT: li a0, 85 23; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 24; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 25; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 26; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 27; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 28; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 29; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 30; CHECK-NEXT: ret 31 %v = call <2 x i8> @llvm.vp.bitreverse.v2i8(<2 x i8> %va, <2 x i1> %m, i32 %evl) 32 ret <2 x i8> %v 33} 34 35define <2 x i8> @vp_bitreverse_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { 36; CHECK-LABEL: vp_bitreverse_v2i8_unmasked: 37; CHECK: # %bb.0: 38; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 39; CHECK-NEXT: vand.vi v9, v8, 15 40; CHECK-NEXT: vsrl.vi v8, v8, 4 41; CHECK-NEXT: li a0, 51 42; CHECK-NEXT: vsll.vi v9, v9, 4 43; CHECK-NEXT: vand.vi v8, v8, 15 44; CHECK-NEXT: vor.vv v8, v8, v9 45; CHECK-NEXT: vsrl.vi v9, v8, 2 46; CHECK-NEXT: vand.vx v8, v8, a0 47; CHECK-NEXT: vand.vx v9, v9, a0 48; CHECK-NEXT: li a0, 85 49; CHECK-NEXT: vsll.vi v8, v8, 2 50; CHECK-NEXT: vor.vv v8, v9, v8 51; CHECK-NEXT: vsrl.vi v9, v8, 1 52; CHECK-NEXT: vand.vx v8, v8, a0 53; CHECK-NEXT: vand.vx v9, v9, a0 54; CHECK-NEXT: vadd.vv v8, v8, v8 55; CHECK-NEXT: vor.vv v8, v9, v8 56; CHECK-NEXT: ret 57 %v = call <2 x i8> @llvm.vp.bitreverse.v2i8(<2 x i8> %va, <2 x i1> splat (i1 true), i32 %evl) 58 ret <2 x i8> %v 59} 60 61declare <4 x i8> @llvm.vp.bitreverse.v4i8(<4 x i8>, <4 x i1>, i32) 62 63define <4 x i8> @vp_bitreverse_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { 64; CHECK-LABEL: vp_bitreverse_v4i8: 65; CHECK: # %bb.0: 66; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 67; CHECK-NEXT: vand.vi v9, v8, 15, v0.t 68; CHECK-NEXT: li a0, 51 69; CHECK-NEXT: vsll.vi v9, v9, 4, v0.t 70; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t 71; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 72; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 73; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 74; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 75; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 76; CHECK-NEXT: li a0, 85 77; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 78; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 79; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 80; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 81; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 82; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 83; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 84; CHECK-NEXT: ret 85 %v = call <4 x i8> @llvm.vp.bitreverse.v4i8(<4 x i8> %va, <4 x i1> %m, i32 %evl) 86 ret <4 x i8> %v 87} 88 89define <4 x i8> @vp_bitreverse_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { 90; CHECK-LABEL: vp_bitreverse_v4i8_unmasked: 91; CHECK: # %bb.0: 92; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 93; CHECK-NEXT: vand.vi v9, v8, 15 94; CHECK-NEXT: vsrl.vi v8, v8, 4 95; CHECK-NEXT: li a0, 51 96; CHECK-NEXT: vsll.vi v9, v9, 4 97; CHECK-NEXT: vand.vi v8, v8, 15 98; CHECK-NEXT: vor.vv v8, v8, v9 99; CHECK-NEXT: vsrl.vi v9, v8, 2 100; CHECK-NEXT: vand.vx v8, v8, a0 101; CHECK-NEXT: vand.vx v9, v9, a0 102; CHECK-NEXT: li a0, 85 103; CHECK-NEXT: vsll.vi v8, v8, 2 104; CHECK-NEXT: vor.vv v8, v9, v8 105; CHECK-NEXT: vsrl.vi v9, v8, 1 106; CHECK-NEXT: vand.vx v8, v8, a0 107; CHECK-NEXT: vand.vx v9, v9, a0 108; CHECK-NEXT: vadd.vv v8, v8, v8 109; CHECK-NEXT: vor.vv v8, v9, v8 110; CHECK-NEXT: ret 111 %v = call <4 x i8> @llvm.vp.bitreverse.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl) 112 ret <4 x i8> %v 113} 114 115declare <8 x i8> @llvm.vp.bitreverse.v8i8(<8 x i8>, <8 x i1>, i32) 116 117define <8 x i8> @vp_bitreverse_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { 118; CHECK-LABEL: vp_bitreverse_v8i8: 119; CHECK: # %bb.0: 120; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 121; CHECK-NEXT: vand.vi v9, v8, 15, v0.t 122; CHECK-NEXT: li a0, 51 123; CHECK-NEXT: vsll.vi v9, v9, 4, v0.t 124; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t 125; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 126; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 127; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 128; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 129; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 130; CHECK-NEXT: li a0, 85 131; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 132; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 133; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 134; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 135; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 136; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 137; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 138; CHECK-NEXT: ret 139 %v = call <8 x i8> @llvm.vp.bitreverse.v8i8(<8 x i8> %va, <8 x i1> %m, i32 %evl) 140 ret <8 x i8> %v 141} 142 143define <8 x i8> @vp_bitreverse_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { 144; CHECK-LABEL: vp_bitreverse_v8i8_unmasked: 145; CHECK: # %bb.0: 146; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 147; CHECK-NEXT: vand.vi v9, v8, 15 148; CHECK-NEXT: vsrl.vi v8, v8, 4 149; CHECK-NEXT: li a0, 51 150; CHECK-NEXT: vsll.vi v9, v9, 4 151; CHECK-NEXT: vand.vi v8, v8, 15 152; CHECK-NEXT: vor.vv v8, v8, v9 153; CHECK-NEXT: vsrl.vi v9, v8, 2 154; CHECK-NEXT: vand.vx v8, v8, a0 155; CHECK-NEXT: vand.vx v9, v9, a0 156; CHECK-NEXT: li a0, 85 157; CHECK-NEXT: vsll.vi v8, v8, 2 158; CHECK-NEXT: vor.vv v8, v9, v8 159; CHECK-NEXT: vsrl.vi v9, v8, 1 160; CHECK-NEXT: vand.vx v8, v8, a0 161; CHECK-NEXT: vand.vx v9, v9, a0 162; CHECK-NEXT: vadd.vv v8, v8, v8 163; CHECK-NEXT: vor.vv v8, v9, v8 164; CHECK-NEXT: ret 165 %v = call <8 x i8> @llvm.vp.bitreverse.v8i8(<8 x i8> %va, <8 x i1> splat (i1 true), i32 %evl) 166 ret <8 x i8> %v 167} 168 169declare <16 x i8> @llvm.vp.bitreverse.v16i8(<16 x i8>, <16 x i1>, i32) 170 171define <16 x i8> @vp_bitreverse_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { 172; CHECK-LABEL: vp_bitreverse_v16i8: 173; CHECK: # %bb.0: 174; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 175; CHECK-NEXT: vand.vi v9, v8, 15, v0.t 176; CHECK-NEXT: li a0, 51 177; CHECK-NEXT: vsll.vi v9, v9, 4, v0.t 178; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t 179; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 180; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 181; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 182; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 183; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 184; CHECK-NEXT: li a0, 85 185; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 186; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 187; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 188; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 189; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 190; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 191; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 192; CHECK-NEXT: ret 193 %v = call <16 x i8> @llvm.vp.bitreverse.v16i8(<16 x i8> %va, <16 x i1> %m, i32 %evl) 194 ret <16 x i8> %v 195} 196 197define <16 x i8> @vp_bitreverse_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { 198; CHECK-LABEL: vp_bitreverse_v16i8_unmasked: 199; CHECK: # %bb.0: 200; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 201; CHECK-NEXT: vand.vi v9, v8, 15 202; CHECK-NEXT: vsrl.vi v8, v8, 4 203; CHECK-NEXT: li a0, 51 204; CHECK-NEXT: vsll.vi v9, v9, 4 205; CHECK-NEXT: vand.vi v8, v8, 15 206; CHECK-NEXT: vor.vv v8, v8, v9 207; CHECK-NEXT: vsrl.vi v9, v8, 2 208; CHECK-NEXT: vand.vx v8, v8, a0 209; CHECK-NEXT: vand.vx v9, v9, a0 210; CHECK-NEXT: li a0, 85 211; CHECK-NEXT: vsll.vi v8, v8, 2 212; CHECK-NEXT: vor.vv v8, v9, v8 213; CHECK-NEXT: vsrl.vi v9, v8, 1 214; CHECK-NEXT: vand.vx v8, v8, a0 215; CHECK-NEXT: vand.vx v9, v9, a0 216; CHECK-NEXT: vadd.vv v8, v8, v8 217; CHECK-NEXT: vor.vv v8, v9, v8 218; CHECK-NEXT: ret 219 %v = call <16 x i8> @llvm.vp.bitreverse.v16i8(<16 x i8> %va, <16 x i1> splat (i1 true), i32 %evl) 220 ret <16 x i8> %v 221} 222 223declare <2 x i16> @llvm.vp.bitreverse.v2i16(<2 x i16>, <2 x i1>, i32) 224 225define <2 x i16> @vp_bitreverse_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { 226; CHECK-LABEL: vp_bitreverse_v2i16: 227; CHECK: # %bb.0: 228; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 229; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 230; CHECK-NEXT: lui a0, 1 231; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t 232; CHECK-NEXT: addi a0, a0, -241 233; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 234; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 235; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 236; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 237; CHECK-NEXT: lui a0, 3 238; CHECK-NEXT: addi a0, a0, 819 239; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 240; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 241; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 242; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 243; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 244; CHECK-NEXT: lui a0, 5 245; CHECK-NEXT: addi a0, a0, 1365 246; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 247; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 248; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 249; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 250; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 251; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 252; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 253; CHECK-NEXT: ret 254 %v = call <2 x i16> @llvm.vp.bitreverse.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl) 255 ret <2 x i16> %v 256} 257 258define <2 x i16> @vp_bitreverse_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { 259; CHECK-LABEL: vp_bitreverse_v2i16_unmasked: 260; CHECK: # %bb.0: 261; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 262; CHECK-NEXT: vsrl.vi v9, v8, 8 263; CHECK-NEXT: vsll.vi v8, v8, 8 264; CHECK-NEXT: lui a0, 1 265; CHECK-NEXT: vor.vv v8, v8, v9 266; CHECK-NEXT: addi a0, a0, -241 267; CHECK-NEXT: vsrl.vi v9, v8, 4 268; CHECK-NEXT: vand.vx v8, v8, a0 269; CHECK-NEXT: vand.vx v9, v9, a0 270; CHECK-NEXT: lui a0, 3 271; CHECK-NEXT: addi a0, a0, 819 272; CHECK-NEXT: vsll.vi v8, v8, 4 273; CHECK-NEXT: vor.vv v8, v9, v8 274; CHECK-NEXT: vsrl.vi v9, v8, 2 275; CHECK-NEXT: vand.vx v8, v8, a0 276; CHECK-NEXT: vand.vx v9, v9, a0 277; CHECK-NEXT: lui a0, 5 278; CHECK-NEXT: addi a0, a0, 1365 279; CHECK-NEXT: vsll.vi v8, v8, 2 280; CHECK-NEXT: vor.vv v8, v9, v8 281; CHECK-NEXT: vsrl.vi v9, v8, 1 282; CHECK-NEXT: vand.vx v8, v8, a0 283; CHECK-NEXT: vand.vx v9, v9, a0 284; CHECK-NEXT: vadd.vv v8, v8, v8 285; CHECK-NEXT: vor.vv v8, v9, v8 286; CHECK-NEXT: ret 287 %v = call <2 x i16> @llvm.vp.bitreverse.v2i16(<2 x i16> %va, <2 x i1> splat (i1 true), i32 %evl) 288 ret <2 x i16> %v 289} 290 291declare <4 x i16> @llvm.vp.bitreverse.v4i16(<4 x i16>, <4 x i1>, i32) 292 293define <4 x i16> @vp_bitreverse_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { 294; CHECK-LABEL: vp_bitreverse_v4i16: 295; CHECK: # %bb.0: 296; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 297; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 298; CHECK-NEXT: lui a0, 1 299; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t 300; CHECK-NEXT: addi a0, a0, -241 301; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 302; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 303; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 304; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 305; CHECK-NEXT: lui a0, 3 306; CHECK-NEXT: addi a0, a0, 819 307; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 308; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 309; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 310; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 311; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 312; CHECK-NEXT: lui a0, 5 313; CHECK-NEXT: addi a0, a0, 1365 314; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 315; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 316; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 317; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 318; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 319; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 320; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 321; CHECK-NEXT: ret 322 %v = call <4 x i16> @llvm.vp.bitreverse.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl) 323 ret <4 x i16> %v 324} 325 326define <4 x i16> @vp_bitreverse_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { 327; CHECK-LABEL: vp_bitreverse_v4i16_unmasked: 328; CHECK: # %bb.0: 329; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 330; CHECK-NEXT: vsrl.vi v9, v8, 8 331; CHECK-NEXT: vsll.vi v8, v8, 8 332; CHECK-NEXT: lui a0, 1 333; CHECK-NEXT: vor.vv v8, v8, v9 334; CHECK-NEXT: addi a0, a0, -241 335; CHECK-NEXT: vsrl.vi v9, v8, 4 336; CHECK-NEXT: vand.vx v8, v8, a0 337; CHECK-NEXT: vand.vx v9, v9, a0 338; CHECK-NEXT: lui a0, 3 339; CHECK-NEXT: addi a0, a0, 819 340; CHECK-NEXT: vsll.vi v8, v8, 4 341; CHECK-NEXT: vor.vv v8, v9, v8 342; CHECK-NEXT: vsrl.vi v9, v8, 2 343; CHECK-NEXT: vand.vx v8, v8, a0 344; CHECK-NEXT: vand.vx v9, v9, a0 345; CHECK-NEXT: lui a0, 5 346; CHECK-NEXT: addi a0, a0, 1365 347; CHECK-NEXT: vsll.vi v8, v8, 2 348; CHECK-NEXT: vor.vv v8, v9, v8 349; CHECK-NEXT: vsrl.vi v9, v8, 1 350; CHECK-NEXT: vand.vx v8, v8, a0 351; CHECK-NEXT: vand.vx v9, v9, a0 352; CHECK-NEXT: vadd.vv v8, v8, v8 353; CHECK-NEXT: vor.vv v8, v9, v8 354; CHECK-NEXT: ret 355 %v = call <4 x i16> @llvm.vp.bitreverse.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl) 356 ret <4 x i16> %v 357} 358 359declare <8 x i16> @llvm.vp.bitreverse.v8i16(<8 x i16>, <8 x i1>, i32) 360 361define <8 x i16> @vp_bitreverse_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { 362; CHECK-LABEL: vp_bitreverse_v8i16: 363; CHECK: # %bb.0: 364; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 365; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 366; CHECK-NEXT: lui a0, 1 367; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t 368; CHECK-NEXT: addi a0, a0, -241 369; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 370; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 371; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 372; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 373; CHECK-NEXT: lui a0, 3 374; CHECK-NEXT: addi a0, a0, 819 375; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 376; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 377; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 378; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 379; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 380; CHECK-NEXT: lui a0, 5 381; CHECK-NEXT: addi a0, a0, 1365 382; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 383; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 384; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 385; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 386; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 387; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 388; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 389; CHECK-NEXT: ret 390 %v = call <8 x i16> @llvm.vp.bitreverse.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl) 391 ret <8 x i16> %v 392} 393 394define <8 x i16> @vp_bitreverse_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { 395; CHECK-LABEL: vp_bitreverse_v8i16_unmasked: 396; CHECK: # %bb.0: 397; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 398; CHECK-NEXT: vsrl.vi v9, v8, 8 399; CHECK-NEXT: vsll.vi v8, v8, 8 400; CHECK-NEXT: lui a0, 1 401; CHECK-NEXT: vor.vv v8, v8, v9 402; CHECK-NEXT: addi a0, a0, -241 403; CHECK-NEXT: vsrl.vi v9, v8, 4 404; CHECK-NEXT: vand.vx v8, v8, a0 405; CHECK-NEXT: vand.vx v9, v9, a0 406; CHECK-NEXT: lui a0, 3 407; CHECK-NEXT: addi a0, a0, 819 408; CHECK-NEXT: vsll.vi v8, v8, 4 409; CHECK-NEXT: vor.vv v8, v9, v8 410; CHECK-NEXT: vsrl.vi v9, v8, 2 411; CHECK-NEXT: vand.vx v8, v8, a0 412; CHECK-NEXT: vand.vx v9, v9, a0 413; CHECK-NEXT: lui a0, 5 414; CHECK-NEXT: addi a0, a0, 1365 415; CHECK-NEXT: vsll.vi v8, v8, 2 416; CHECK-NEXT: vor.vv v8, v9, v8 417; CHECK-NEXT: vsrl.vi v9, v8, 1 418; CHECK-NEXT: vand.vx v8, v8, a0 419; CHECK-NEXT: vand.vx v9, v9, a0 420; CHECK-NEXT: vadd.vv v8, v8, v8 421; CHECK-NEXT: vor.vv v8, v9, v8 422; CHECK-NEXT: ret 423 %v = call <8 x i16> @llvm.vp.bitreverse.v8i16(<8 x i16> %va, <8 x i1> splat (i1 true), i32 %evl) 424 ret <8 x i16> %v 425} 426 427declare <16 x i16> @llvm.vp.bitreverse.v16i16(<16 x i16>, <16 x i1>, i32) 428 429define <16 x i16> @vp_bitreverse_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { 430; CHECK-LABEL: vp_bitreverse_v16i16: 431; CHECK: # %bb.0: 432; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 433; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t 434; CHECK-NEXT: lui a0, 1 435; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t 436; CHECK-NEXT: addi a0, a0, -241 437; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 438; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 439; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 440; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 441; CHECK-NEXT: lui a0, 3 442; CHECK-NEXT: addi a0, a0, 819 443; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 444; CHECK-NEXT: vor.vv v8, v10, v8, v0.t 445; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t 446; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 447; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 448; CHECK-NEXT: lui a0, 5 449; CHECK-NEXT: addi a0, a0, 1365 450; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 451; CHECK-NEXT: vor.vv v8, v10, v8, v0.t 452; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 453; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 454; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 455; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 456; CHECK-NEXT: vor.vv v8, v10, v8, v0.t 457; CHECK-NEXT: ret 458 %v = call <16 x i16> @llvm.vp.bitreverse.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl) 459 ret <16 x i16> %v 460} 461 462define <16 x i16> @vp_bitreverse_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { 463; CHECK-LABEL: vp_bitreverse_v16i16_unmasked: 464; CHECK: # %bb.0: 465; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 466; CHECK-NEXT: vsrl.vi v10, v8, 8 467; CHECK-NEXT: vsll.vi v8, v8, 8 468; CHECK-NEXT: lui a0, 1 469; CHECK-NEXT: vor.vv v8, v8, v10 470; CHECK-NEXT: addi a0, a0, -241 471; CHECK-NEXT: vsrl.vi v10, v8, 4 472; CHECK-NEXT: vand.vx v8, v8, a0 473; CHECK-NEXT: vand.vx v10, v10, a0 474; CHECK-NEXT: lui a0, 3 475; CHECK-NEXT: addi a0, a0, 819 476; CHECK-NEXT: vsll.vi v8, v8, 4 477; CHECK-NEXT: vor.vv v8, v10, v8 478; CHECK-NEXT: vsrl.vi v10, v8, 2 479; CHECK-NEXT: vand.vx v8, v8, a0 480; CHECK-NEXT: vand.vx v10, v10, a0 481; CHECK-NEXT: lui a0, 5 482; CHECK-NEXT: addi a0, a0, 1365 483; CHECK-NEXT: vsll.vi v8, v8, 2 484; CHECK-NEXT: vor.vv v8, v10, v8 485; CHECK-NEXT: vsrl.vi v10, v8, 1 486; CHECK-NEXT: vand.vx v8, v8, a0 487; CHECK-NEXT: vand.vx v10, v10, a0 488; CHECK-NEXT: vadd.vv v8, v8, v8 489; CHECK-NEXT: vor.vv v8, v10, v8 490; CHECK-NEXT: ret 491 %v = call <16 x i16> @llvm.vp.bitreverse.v16i16(<16 x i16> %va, <16 x i1> splat (i1 true), i32 %evl) 492 ret <16 x i16> %v 493} 494 495declare <2 x i32> @llvm.vp.bitreverse.v2i32(<2 x i32>, <2 x i1>, i32) 496 497define <2 x i32> @vp_bitreverse_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { 498; CHECK-LABEL: vp_bitreverse_v2i32: 499; CHECK: # %bb.0: 500; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 501; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 502; CHECK-NEXT: lui a0, 16 503; CHECK-NEXT: addi a0, a0, -256 504; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 505; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t 506; CHECK-NEXT: vor.vv v9, v9, v10, v0.t 507; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 508; CHECK-NEXT: lui a0, 61681 509; CHECK-NEXT: addi a0, a0, -241 510; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t 511; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t 512; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 513; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 514; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 515; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 516; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 517; CHECK-NEXT: lui a0, 209715 518; CHECK-NEXT: addi a0, a0, 819 519; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 520; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 521; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 522; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 523; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 524; CHECK-NEXT: lui a0, 349525 525; CHECK-NEXT: addi a0, a0, 1365 526; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 527; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 528; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 529; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 530; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 531; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 532; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 533; CHECK-NEXT: ret 534 %v = call <2 x i32> @llvm.vp.bitreverse.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl) 535 ret <2 x i32> %v 536} 537 538define <2 x i32> @vp_bitreverse_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { 539; CHECK-LABEL: vp_bitreverse_v2i32_unmasked: 540; CHECK: # %bb.0: 541; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 542; CHECK-NEXT: vsrl.vi v9, v8, 8 543; CHECK-NEXT: lui a0, 16 544; CHECK-NEXT: vsrl.vi v10, v8, 24 545; CHECK-NEXT: addi a0, a0, -256 546; CHECK-NEXT: vand.vx v9, v9, a0 547; CHECK-NEXT: vor.vv v9, v9, v10 548; CHECK-NEXT: vsll.vi v10, v8, 24 549; CHECK-NEXT: vand.vx v8, v8, a0 550; CHECK-NEXT: lui a0, 61681 551; CHECK-NEXT: addi a0, a0, -241 552; CHECK-NEXT: vsll.vi v8, v8, 8 553; CHECK-NEXT: vor.vv v8, v10, v8 554; CHECK-NEXT: vor.vv v8, v8, v9 555; CHECK-NEXT: vsrl.vi v9, v8, 4 556; CHECK-NEXT: vand.vx v8, v8, a0 557; CHECK-NEXT: vand.vx v9, v9, a0 558; CHECK-NEXT: lui a0, 209715 559; CHECK-NEXT: addi a0, a0, 819 560; CHECK-NEXT: vsll.vi v8, v8, 4 561; CHECK-NEXT: vor.vv v8, v9, v8 562; CHECK-NEXT: vsrl.vi v9, v8, 2 563; CHECK-NEXT: vand.vx v8, v8, a0 564; CHECK-NEXT: vand.vx v9, v9, a0 565; CHECK-NEXT: lui a0, 349525 566; CHECK-NEXT: addi a0, a0, 1365 567; CHECK-NEXT: vsll.vi v8, v8, 2 568; CHECK-NEXT: vor.vv v8, v9, v8 569; CHECK-NEXT: vsrl.vi v9, v8, 1 570; CHECK-NEXT: vand.vx v8, v8, a0 571; CHECK-NEXT: vand.vx v9, v9, a0 572; CHECK-NEXT: vadd.vv v8, v8, v8 573; CHECK-NEXT: vor.vv v8, v9, v8 574; CHECK-NEXT: ret 575 %v = call <2 x i32> @llvm.vp.bitreverse.v2i32(<2 x i32> %va, <2 x i1> splat (i1 true), i32 %evl) 576 ret <2 x i32> %v 577} 578 579declare <4 x i32> @llvm.vp.bitreverse.v4i32(<4 x i32>, <4 x i1>, i32) 580 581define <4 x i32> @vp_bitreverse_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { 582; CHECK-LABEL: vp_bitreverse_v4i32: 583; CHECK: # %bb.0: 584; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 585; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 586; CHECK-NEXT: lui a0, 16 587; CHECK-NEXT: addi a0, a0, -256 588; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 589; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t 590; CHECK-NEXT: vor.vv v9, v9, v10, v0.t 591; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 592; CHECK-NEXT: lui a0, 61681 593; CHECK-NEXT: addi a0, a0, -241 594; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t 595; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t 596; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 597; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 598; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 599; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 600; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 601; CHECK-NEXT: lui a0, 209715 602; CHECK-NEXT: addi a0, a0, 819 603; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 604; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 605; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 606; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 607; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 608; CHECK-NEXT: lui a0, 349525 609; CHECK-NEXT: addi a0, a0, 1365 610; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 611; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 612; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 613; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 614; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 615; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 616; CHECK-NEXT: vor.vv v8, v9, v8, v0.t 617; CHECK-NEXT: ret 618 %v = call <4 x i32> @llvm.vp.bitreverse.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl) 619 ret <4 x i32> %v 620} 621 622define <4 x i32> @vp_bitreverse_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { 623; CHECK-LABEL: vp_bitreverse_v4i32_unmasked: 624; CHECK: # %bb.0: 625; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 626; CHECK-NEXT: vsrl.vi v9, v8, 8 627; CHECK-NEXT: lui a0, 16 628; CHECK-NEXT: vsrl.vi v10, v8, 24 629; CHECK-NEXT: addi a0, a0, -256 630; CHECK-NEXT: vand.vx v9, v9, a0 631; CHECK-NEXT: vor.vv v9, v9, v10 632; CHECK-NEXT: vsll.vi v10, v8, 24 633; CHECK-NEXT: vand.vx v8, v8, a0 634; CHECK-NEXT: lui a0, 61681 635; CHECK-NEXT: addi a0, a0, -241 636; CHECK-NEXT: vsll.vi v8, v8, 8 637; CHECK-NEXT: vor.vv v8, v10, v8 638; CHECK-NEXT: vor.vv v8, v8, v9 639; CHECK-NEXT: vsrl.vi v9, v8, 4 640; CHECK-NEXT: vand.vx v8, v8, a0 641; CHECK-NEXT: vand.vx v9, v9, a0 642; CHECK-NEXT: lui a0, 209715 643; CHECK-NEXT: addi a0, a0, 819 644; CHECK-NEXT: vsll.vi v8, v8, 4 645; CHECK-NEXT: vor.vv v8, v9, v8 646; CHECK-NEXT: vsrl.vi v9, v8, 2 647; CHECK-NEXT: vand.vx v8, v8, a0 648; CHECK-NEXT: vand.vx v9, v9, a0 649; CHECK-NEXT: lui a0, 349525 650; CHECK-NEXT: addi a0, a0, 1365 651; CHECK-NEXT: vsll.vi v8, v8, 2 652; CHECK-NEXT: vor.vv v8, v9, v8 653; CHECK-NEXT: vsrl.vi v9, v8, 1 654; CHECK-NEXT: vand.vx v8, v8, a0 655; CHECK-NEXT: vand.vx v9, v9, a0 656; CHECK-NEXT: vadd.vv v8, v8, v8 657; CHECK-NEXT: vor.vv v8, v9, v8 658; CHECK-NEXT: ret 659 %v = call <4 x i32> @llvm.vp.bitreverse.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl) 660 ret <4 x i32> %v 661} 662 663declare <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32>, <8 x i1>, i32) 664 665define <8 x i32> @vp_bitreverse_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { 666; CHECK-LABEL: vp_bitreverse_v8i32: 667; CHECK: # %bb.0: 668; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 669; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t 670; CHECK-NEXT: lui a0, 16 671; CHECK-NEXT: addi a0, a0, -256 672; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 673; CHECK-NEXT: vsrl.vi v12, v8, 24, v0.t 674; CHECK-NEXT: vor.vv v10, v10, v12, v0.t 675; CHECK-NEXT: vand.vx v12, v8, a0, v0.t 676; CHECK-NEXT: lui a0, 61681 677; CHECK-NEXT: addi a0, a0, -241 678; CHECK-NEXT: vsll.vi v12, v12, 8, v0.t 679; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t 680; CHECK-NEXT: vor.vv v8, v8, v12, v0.t 681; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 682; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 683; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 684; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 685; CHECK-NEXT: lui a0, 209715 686; CHECK-NEXT: addi a0, a0, 819 687; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 688; CHECK-NEXT: vor.vv v8, v10, v8, v0.t 689; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t 690; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 691; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 692; CHECK-NEXT: lui a0, 349525 693; CHECK-NEXT: addi a0, a0, 1365 694; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 695; CHECK-NEXT: vor.vv v8, v10, v8, v0.t 696; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 697; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 698; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 699; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 700; CHECK-NEXT: vor.vv v8, v10, v8, v0.t 701; CHECK-NEXT: ret 702 %v = call <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl) 703 ret <8 x i32> %v 704} 705 706define <8 x i32> @vp_bitreverse_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { 707; CHECK-LABEL: vp_bitreverse_v8i32_unmasked: 708; CHECK: # %bb.0: 709; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 710; CHECK-NEXT: vsrl.vi v10, v8, 8 711; CHECK-NEXT: lui a0, 16 712; CHECK-NEXT: vsrl.vi v12, v8, 24 713; CHECK-NEXT: addi a0, a0, -256 714; CHECK-NEXT: vand.vx v10, v10, a0 715; CHECK-NEXT: vor.vv v10, v10, v12 716; CHECK-NEXT: vsll.vi v12, v8, 24 717; CHECK-NEXT: vand.vx v8, v8, a0 718; CHECK-NEXT: lui a0, 61681 719; CHECK-NEXT: addi a0, a0, -241 720; CHECK-NEXT: vsll.vi v8, v8, 8 721; CHECK-NEXT: vor.vv v8, v12, v8 722; CHECK-NEXT: vor.vv v8, v8, v10 723; CHECK-NEXT: vsrl.vi v10, v8, 4 724; CHECK-NEXT: vand.vx v8, v8, a0 725; CHECK-NEXT: vand.vx v10, v10, a0 726; CHECK-NEXT: lui a0, 209715 727; CHECK-NEXT: addi a0, a0, 819 728; CHECK-NEXT: vsll.vi v8, v8, 4 729; CHECK-NEXT: vor.vv v8, v10, v8 730; CHECK-NEXT: vsrl.vi v10, v8, 2 731; CHECK-NEXT: vand.vx v8, v8, a0 732; CHECK-NEXT: vand.vx v10, v10, a0 733; CHECK-NEXT: lui a0, 349525 734; CHECK-NEXT: addi a0, a0, 1365 735; CHECK-NEXT: vsll.vi v8, v8, 2 736; CHECK-NEXT: vor.vv v8, v10, v8 737; CHECK-NEXT: vsrl.vi v10, v8, 1 738; CHECK-NEXT: vand.vx v8, v8, a0 739; CHECK-NEXT: vand.vx v10, v10, a0 740; CHECK-NEXT: vadd.vv v8, v8, v8 741; CHECK-NEXT: vor.vv v8, v10, v8 742; CHECK-NEXT: ret 743 %v = call <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32> %va, <8 x i1> splat (i1 true), i32 %evl) 744 ret <8 x i32> %v 745} 746 747declare <16 x i32> @llvm.vp.bitreverse.v16i32(<16 x i32>, <16 x i1>, i32) 748 749define <16 x i32> @vp_bitreverse_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { 750; CHECK-LABEL: vp_bitreverse_v16i32: 751; CHECK: # %bb.0: 752; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 753; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t 754; CHECK-NEXT: lui a0, 16 755; CHECK-NEXT: addi a0, a0, -256 756; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 757; CHECK-NEXT: vsrl.vi v16, v8, 24, v0.t 758; CHECK-NEXT: vor.vv v12, v12, v16, v0.t 759; CHECK-NEXT: vand.vx v16, v8, a0, v0.t 760; CHECK-NEXT: lui a0, 61681 761; CHECK-NEXT: addi a0, a0, -241 762; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t 763; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t 764; CHECK-NEXT: vor.vv v8, v8, v16, v0.t 765; CHECK-NEXT: vor.vv v8, v8, v12, v0.t 766; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t 767; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 768; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 769; CHECK-NEXT: lui a0, 209715 770; CHECK-NEXT: addi a0, a0, 819 771; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 772; CHECK-NEXT: vor.vv v8, v12, v8, v0.t 773; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t 774; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 775; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 776; CHECK-NEXT: lui a0, 349525 777; CHECK-NEXT: addi a0, a0, 1365 778; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 779; CHECK-NEXT: vor.vv v8, v12, v8, v0.t 780; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t 781; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 782; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 783; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 784; CHECK-NEXT: vor.vv v8, v12, v8, v0.t 785; CHECK-NEXT: ret 786 %v = call <16 x i32> @llvm.vp.bitreverse.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl) 787 ret <16 x i32> %v 788} 789 790define <16 x i32> @vp_bitreverse_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { 791; CHECK-LABEL: vp_bitreverse_v16i32_unmasked: 792; CHECK: # %bb.0: 793; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 794; CHECK-NEXT: vsrl.vi v12, v8, 8 795; CHECK-NEXT: lui a0, 16 796; CHECK-NEXT: vsrl.vi v16, v8, 24 797; CHECK-NEXT: addi a0, a0, -256 798; CHECK-NEXT: vand.vx v12, v12, a0 799; CHECK-NEXT: vor.vv v12, v12, v16 800; CHECK-NEXT: vsll.vi v16, v8, 24 801; CHECK-NEXT: vand.vx v8, v8, a0 802; CHECK-NEXT: lui a0, 61681 803; CHECK-NEXT: addi a0, a0, -241 804; CHECK-NEXT: vsll.vi v8, v8, 8 805; CHECK-NEXT: vor.vv v8, v16, v8 806; CHECK-NEXT: vor.vv v8, v8, v12 807; CHECK-NEXT: vsrl.vi v12, v8, 4 808; CHECK-NEXT: vand.vx v8, v8, a0 809; CHECK-NEXT: vand.vx v12, v12, a0 810; CHECK-NEXT: lui a0, 209715 811; CHECK-NEXT: addi a0, a0, 819 812; CHECK-NEXT: vsll.vi v8, v8, 4 813; CHECK-NEXT: vor.vv v8, v12, v8 814; CHECK-NEXT: vsrl.vi v12, v8, 2 815; CHECK-NEXT: vand.vx v8, v8, a0 816; CHECK-NEXT: vand.vx v12, v12, a0 817; CHECK-NEXT: lui a0, 349525 818; CHECK-NEXT: addi a0, a0, 1365 819; CHECK-NEXT: vsll.vi v8, v8, 2 820; CHECK-NEXT: vor.vv v8, v12, v8 821; CHECK-NEXT: vsrl.vi v12, v8, 1 822; CHECK-NEXT: vand.vx v8, v8, a0 823; CHECK-NEXT: vand.vx v12, v12, a0 824; CHECK-NEXT: vadd.vv v8, v8, v8 825; CHECK-NEXT: vor.vv v8, v12, v8 826; CHECK-NEXT: ret 827 %v = call <16 x i32> @llvm.vp.bitreverse.v16i32(<16 x i32> %va, <16 x i1> splat (i1 true), i32 %evl) 828 ret <16 x i32> %v 829} 830 831declare <2 x i64> @llvm.vp.bitreverse.v2i64(<2 x i64>, <2 x i1>, i32) 832 833define <2 x i64> @vp_bitreverse_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { 834; RV32-LABEL: vp_bitreverse_v2i64: 835; RV32: # %bb.0: 836; RV32-NEXT: addi sp, sp, -16 837; RV32-NEXT: .cfi_def_cfa_offset 16 838; RV32-NEXT: lui a4, 1044480 839; RV32-NEXT: li a3, 56 840; RV32-NEXT: lui a5, 16 841; RV32-NEXT: li a2, 40 842; RV32-NEXT: lui a1, 4080 843; RV32-NEXT: addi a6, sp, 8 844; RV32-NEXT: sw a4, 8(sp) 845; RV32-NEXT: sw zero, 12(sp) 846; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 847; RV32-NEXT: vlse64.v v9, (a6), zero 848; RV32-NEXT: lui a4, 61681 849; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 850; RV32-NEXT: vsll.vx v10, v8, a3, v0.t 851; RV32-NEXT: addi a5, a5, -256 852; RV32-NEXT: vand.vx v11, v8, a5, v0.t 853; RV32-NEXT: vsll.vx v11, v11, a2, v0.t 854; RV32-NEXT: vor.vv v10, v10, v11, v0.t 855; RV32-NEXT: vand.vx v11, v8, a1, v0.t 856; RV32-NEXT: vsll.vi v11, v11, 24, v0.t 857; RV32-NEXT: vand.vv v12, v8, v9, v0.t 858; RV32-NEXT: vsll.vi v12, v12, 8, v0.t 859; RV32-NEXT: vor.vv v11, v11, v12, v0.t 860; RV32-NEXT: vor.vv v10, v10, v11, v0.t 861; RV32-NEXT: vsrl.vx v11, v8, a3, v0.t 862; RV32-NEXT: lui a3, 209715 863; RV32-NEXT: vsrl.vx v12, v8, a2, v0.t 864; RV32-NEXT: lui a2, 349525 865; RV32-NEXT: addi a4, a4, -241 866; RV32-NEXT: addi a3, a3, 819 867; RV32-NEXT: addi a2, a2, 1365 868; RV32-NEXT: vand.vx v12, v12, a5, v0.t 869; RV32-NEXT: vor.vv v11, v12, v11, v0.t 870; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t 871; RV32-NEXT: vand.vx v12, v12, a1, v0.t 872; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t 873; RV32-NEXT: vand.vv v8, v8, v9, v0.t 874; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 875; RV32-NEXT: vmv.v.x v9, a4 876; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 877; RV32-NEXT: vor.vv v8, v8, v12, v0.t 878; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 879; RV32-NEXT: vmv.v.x v12, a3 880; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 881; RV32-NEXT: vor.vv v8, v8, v11, v0.t 882; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 883; RV32-NEXT: vmv.v.x v11, a2 884; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 885; RV32-NEXT: vor.vv v8, v10, v8, v0.t 886; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t 887; RV32-NEXT: vand.vv v10, v10, v9, v0.t 888; RV32-NEXT: vand.vv v8, v8, v9, v0.t 889; RV32-NEXT: vsll.vi v8, v8, 4, v0.t 890; RV32-NEXT: vor.vv v8, v10, v8, v0.t 891; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t 892; RV32-NEXT: vand.vv v9, v9, v12, v0.t 893; RV32-NEXT: vand.vv v8, v8, v12, v0.t 894; RV32-NEXT: vsll.vi v8, v8, 2, v0.t 895; RV32-NEXT: vor.vv v8, v9, v8, v0.t 896; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t 897; RV32-NEXT: vand.vv v9, v9, v11, v0.t 898; RV32-NEXT: vand.vv v8, v8, v11, v0.t 899; RV32-NEXT: vsll.vi v8, v8, 1, v0.t 900; RV32-NEXT: vor.vv v8, v9, v8, v0.t 901; RV32-NEXT: addi sp, sp, 16 902; RV32-NEXT: .cfi_def_cfa_offset 0 903; RV32-NEXT: ret 904; 905; RV64-LABEL: vp_bitreverse_v2i64: 906; RV64: # %bb.0: 907; RV64-NEXT: lui a1, 4080 908; RV64-NEXT: li a3, 255 909; RV64-NEXT: li a2, 56 910; RV64-NEXT: lui a4, 16 911; RV64-NEXT: lui a5, 61681 912; RV64-NEXT: lui a6, 209715 913; RV64-NEXT: lui a7, 349525 914; RV64-NEXT: addiw a5, a5, -241 915; RV64-NEXT: addiw a6, a6, 819 916; RV64-NEXT: addiw a7, a7, 1365 917; RV64-NEXT: slli t0, a5, 32 918; RV64-NEXT: add t0, a5, t0 919; RV64-NEXT: slli a5, a6, 32 920; RV64-NEXT: add a6, a6, a5 921; RV64-NEXT: slli a5, a7, 32 922; RV64-NEXT: add a5, a7, a5 923; RV64-NEXT: li a7, 40 924; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 925; RV64-NEXT: vand.vx v9, v8, a1, v0.t 926; RV64-NEXT: slli a3, a3, 24 927; RV64-NEXT: addiw a0, a4, -256 928; RV64-NEXT: vsll.vi v9, v9, 24, v0.t 929; RV64-NEXT: vand.vx v10, v8, a3, v0.t 930; RV64-NEXT: vsll.vi v10, v10, 8, v0.t 931; RV64-NEXT: vor.vv v9, v9, v10, v0.t 932; RV64-NEXT: vsll.vx v10, v8, a2, v0.t 933; RV64-NEXT: vand.vx v11, v8, a0, v0.t 934; RV64-NEXT: vsll.vx v11, v11, a7, v0.t 935; RV64-NEXT: vor.vv v10, v10, v11, v0.t 936; RV64-NEXT: vor.vv v9, v10, v9, v0.t 937; RV64-NEXT: vsrl.vx v10, v8, a2, v0.t 938; RV64-NEXT: vsrl.vx v11, v8, a7, v0.t 939; RV64-NEXT: vand.vx v11, v11, a0, v0.t 940; RV64-NEXT: vor.vv v10, v11, v10, v0.t 941; RV64-NEXT: vsrl.vi v11, v8, 24, v0.t 942; RV64-NEXT: vand.vx v11, v11, a1, v0.t 943; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t 944; RV64-NEXT: vand.vx v8, v8, a3, v0.t 945; RV64-NEXT: vor.vv v8, v8, v11, v0.t 946; RV64-NEXT: vor.vv v8, v8, v10, v0.t 947; RV64-NEXT: vor.vv v8, v9, v8, v0.t 948; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t 949; RV64-NEXT: vand.vx v9, v9, t0, v0.t 950; RV64-NEXT: vand.vx v8, v8, t0, v0.t 951; RV64-NEXT: vsll.vi v8, v8, 4, v0.t 952; RV64-NEXT: vor.vv v8, v9, v8, v0.t 953; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t 954; RV64-NEXT: vand.vx v9, v9, a6, v0.t 955; RV64-NEXT: vand.vx v8, v8, a6, v0.t 956; RV64-NEXT: vsll.vi v8, v8, 2, v0.t 957; RV64-NEXT: vor.vv v8, v9, v8, v0.t 958; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t 959; RV64-NEXT: vand.vx v9, v9, a5, v0.t 960; RV64-NEXT: vand.vx v8, v8, a5, v0.t 961; RV64-NEXT: vsll.vi v8, v8, 1, v0.t 962; RV64-NEXT: vor.vv v8, v9, v8, v0.t 963; RV64-NEXT: ret 964 %v = call <2 x i64> @llvm.vp.bitreverse.v2i64(<2 x i64> %va, <2 x i1> %m, i32 %evl) 965 ret <2 x i64> %v 966} 967 968define <2 x i64> @vp_bitreverse_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { 969; RV32-LABEL: vp_bitreverse_v2i64_unmasked: 970; RV32: # %bb.0: 971; RV32-NEXT: addi sp, sp, -16 972; RV32-NEXT: .cfi_def_cfa_offset 16 973; RV32-NEXT: lui a1, 1044480 974; RV32-NEXT: li a2, 56 975; RV32-NEXT: lui a3, 16 976; RV32-NEXT: li a4, 40 977; RV32-NEXT: lui a5, 4080 978; RV32-NEXT: addi a6, sp, 8 979; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 980; RV32-NEXT: vsrl.vi v9, v8, 24 981; RV32-NEXT: sw a1, 8(sp) 982; RV32-NEXT: sw zero, 12(sp) 983; RV32-NEXT: vsll.vx v10, v8, a2 984; RV32-NEXT: addi a1, a3, -256 985; RV32-NEXT: vsrl.vx v11, v8, a2 986; RV32-NEXT: vsrl.vx v12, v8, a4 987; RV32-NEXT: vand.vx v13, v8, a1 988; RV32-NEXT: vand.vx v12, v12, a1 989; RV32-NEXT: vor.vv v11, v12, v11 990; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 991; RV32-NEXT: vlse64.v v12, (a6), zero 992; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 993; RV32-NEXT: vsll.vx v13, v13, a4 994; RV32-NEXT: vor.vv v10, v10, v13 995; RV32-NEXT: vsrl.vi v13, v8, 8 996; RV32-NEXT: vand.vx v9, v9, a5 997; RV32-NEXT: vand.vv v13, v13, v12 998; RV32-NEXT: vor.vv v9, v13, v9 999; RV32-NEXT: lui a1, 61681 1000; RV32-NEXT: lui a2, 209715 1001; RV32-NEXT: lui a3, 349525 1002; RV32-NEXT: vand.vv v12, v8, v12 1003; RV32-NEXT: vand.vx v8, v8, a5 1004; RV32-NEXT: addi a1, a1, -241 1005; RV32-NEXT: addi a2, a2, 819 1006; RV32-NEXT: addi a3, a3, 1365 1007; RV32-NEXT: vsll.vi v8, v8, 24 1008; RV32-NEXT: vsll.vi v12, v12, 8 1009; RV32-NEXT: vor.vv v8, v8, v12 1010; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1011; RV32-NEXT: vmv.v.x v12, a1 1012; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1013; RV32-NEXT: vor.vv v9, v9, v11 1014; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1015; RV32-NEXT: vmv.v.x v11, a2 1016; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1017; RV32-NEXT: vor.vv v8, v10, v8 1018; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1019; RV32-NEXT: vmv.v.x v10, a3 1020; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1021; RV32-NEXT: vor.vv v8, v8, v9 1022; RV32-NEXT: vsrl.vi v9, v8, 4 1023; RV32-NEXT: vand.vv v8, v8, v12 1024; RV32-NEXT: vand.vv v9, v9, v12 1025; RV32-NEXT: vsll.vi v8, v8, 4 1026; RV32-NEXT: vor.vv v8, v9, v8 1027; RV32-NEXT: vsrl.vi v9, v8, 2 1028; RV32-NEXT: vand.vv v8, v8, v11 1029; RV32-NEXT: vand.vv v9, v9, v11 1030; RV32-NEXT: vsll.vi v8, v8, 2 1031; RV32-NEXT: vor.vv v8, v9, v8 1032; RV32-NEXT: vsrl.vi v9, v8, 1 1033; RV32-NEXT: vand.vv v8, v8, v10 1034; RV32-NEXT: vand.vv v9, v9, v10 1035; RV32-NEXT: vadd.vv v8, v8, v8 1036; RV32-NEXT: vor.vv v8, v9, v8 1037; RV32-NEXT: addi sp, sp, 16 1038; RV32-NEXT: .cfi_def_cfa_offset 0 1039; RV32-NEXT: ret 1040; 1041; RV64-LABEL: vp_bitreverse_v2i64_unmasked: 1042; RV64: # %bb.0: 1043; RV64-NEXT: lui a1, 4080 1044; RV64-NEXT: li a2, 255 1045; RV64-NEXT: li a3, 56 1046; RV64-NEXT: lui a4, 16 1047; RV64-NEXT: li a5, 40 1048; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1049; RV64-NEXT: vsrl.vi v9, v8, 24 1050; RV64-NEXT: vsrl.vi v10, v8, 8 1051; RV64-NEXT: addiw a0, a4, -256 1052; RV64-NEXT: vsrl.vx v11, v8, a3 1053; RV64-NEXT: vsrl.vx v12, v8, a5 1054; RV64-NEXT: vand.vx v12, v12, a0 1055; RV64-NEXT: vor.vv v11, v12, v11 1056; RV64-NEXT: vand.vx v12, v8, a1 1057; RV64-NEXT: slli a2, a2, 24 1058; RV64-NEXT: vand.vx v9, v9, a1 1059; RV64-NEXT: vsll.vi v12, v12, 24 1060; RV64-NEXT: vand.vx v10, v10, a2 1061; RV64-NEXT: vor.vv v9, v10, v9 1062; RV64-NEXT: vand.vx v10, v8, a2 1063; RV64-NEXT: vsll.vi v10, v10, 8 1064; RV64-NEXT: vor.vv v10, v12, v10 1065; RV64-NEXT: vsll.vx v12, v8, a3 1066; RV64-NEXT: vand.vx v8, v8, a0 1067; RV64-NEXT: vsll.vx v8, v8, a5 1068; RV64-NEXT: vor.vv v8, v12, v8 1069; RV64-NEXT: lui a0, 61681 1070; RV64-NEXT: lui a1, 209715 1071; RV64-NEXT: lui a2, 349525 1072; RV64-NEXT: addiw a0, a0, -241 1073; RV64-NEXT: addiw a1, a1, 819 1074; RV64-NEXT: addiw a2, a2, 1365 1075; RV64-NEXT: slli a3, a0, 32 1076; RV64-NEXT: slli a4, a1, 32 1077; RV64-NEXT: add a0, a0, a3 1078; RV64-NEXT: slli a3, a2, 32 1079; RV64-NEXT: add a1, a1, a4 1080; RV64-NEXT: add a2, a2, a3 1081; RV64-NEXT: vor.vv v9, v9, v11 1082; RV64-NEXT: vor.vv v8, v8, v10 1083; RV64-NEXT: vor.vv v8, v8, v9 1084; RV64-NEXT: vsrl.vi v9, v8, 4 1085; RV64-NEXT: vand.vx v8, v8, a0 1086; RV64-NEXT: vand.vx v9, v9, a0 1087; RV64-NEXT: vsll.vi v8, v8, 4 1088; RV64-NEXT: vor.vv v8, v9, v8 1089; RV64-NEXT: vsrl.vi v9, v8, 2 1090; RV64-NEXT: vand.vx v8, v8, a1 1091; RV64-NEXT: vand.vx v9, v9, a1 1092; RV64-NEXT: vsll.vi v8, v8, 2 1093; RV64-NEXT: vor.vv v8, v9, v8 1094; RV64-NEXT: vsrl.vi v9, v8, 1 1095; RV64-NEXT: vand.vx v8, v8, a2 1096; RV64-NEXT: vand.vx v9, v9, a2 1097; RV64-NEXT: vadd.vv v8, v8, v8 1098; RV64-NEXT: vor.vv v8, v9, v8 1099; RV64-NEXT: ret 1100 %v = call <2 x i64> @llvm.vp.bitreverse.v2i64(<2 x i64> %va, <2 x i1> splat (i1 true), i32 %evl) 1101 ret <2 x i64> %v 1102} 1103 1104declare <4 x i64> @llvm.vp.bitreverse.v4i64(<4 x i64>, <4 x i1>, i32) 1105 1106define <4 x i64> @vp_bitreverse_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { 1107; RV32-LABEL: vp_bitreverse_v4i64: 1108; RV32: # %bb.0: 1109; RV32-NEXT: addi sp, sp, -16 1110; RV32-NEXT: .cfi_def_cfa_offset 16 1111; RV32-NEXT: lui a4, 1044480 1112; RV32-NEXT: li a3, 56 1113; RV32-NEXT: lui a5, 16 1114; RV32-NEXT: li a2, 40 1115; RV32-NEXT: lui a1, 4080 1116; RV32-NEXT: addi a6, sp, 8 1117; RV32-NEXT: sw a4, 8(sp) 1118; RV32-NEXT: sw zero, 12(sp) 1119; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1120; RV32-NEXT: vlse64.v v10, (a6), zero 1121; RV32-NEXT: lui a4, 61681 1122; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1123; RV32-NEXT: vsll.vx v12, v8, a3, v0.t 1124; RV32-NEXT: addi a5, a5, -256 1125; RV32-NEXT: vand.vx v14, v8, a5, v0.t 1126; RV32-NEXT: vsll.vx v14, v14, a2, v0.t 1127; RV32-NEXT: vor.vv v12, v12, v14, v0.t 1128; RV32-NEXT: vand.vx v14, v8, a1, v0.t 1129; RV32-NEXT: vsll.vi v14, v14, 24, v0.t 1130; RV32-NEXT: vand.vv v16, v8, v10, v0.t 1131; RV32-NEXT: vsll.vi v16, v16, 8, v0.t 1132; RV32-NEXT: vor.vv v14, v14, v16, v0.t 1133; RV32-NEXT: vor.vv v12, v12, v14, v0.t 1134; RV32-NEXT: vsrl.vx v14, v8, a3, v0.t 1135; RV32-NEXT: lui a3, 209715 1136; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t 1137; RV32-NEXT: lui a2, 349525 1138; RV32-NEXT: addi a4, a4, -241 1139; RV32-NEXT: addi a3, a3, 819 1140; RV32-NEXT: addi a2, a2, 1365 1141; RV32-NEXT: vand.vx v16, v16, a5, v0.t 1142; RV32-NEXT: vor.vv v14, v16, v14, v0.t 1143; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t 1144; RV32-NEXT: vand.vx v16, v16, a1, v0.t 1145; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t 1146; RV32-NEXT: vand.vv v8, v8, v10, v0.t 1147; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1148; RV32-NEXT: vmv.v.x v10, a4 1149; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1150; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1151; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1152; RV32-NEXT: vmv.v.x v16, a3 1153; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1154; RV32-NEXT: vor.vv v8, v8, v14, v0.t 1155; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1156; RV32-NEXT: vmv.v.x v14, a2 1157; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1158; RV32-NEXT: vor.vv v8, v12, v8, v0.t 1159; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t 1160; RV32-NEXT: vand.vv v12, v12, v10, v0.t 1161; RV32-NEXT: vand.vv v8, v8, v10, v0.t 1162; RV32-NEXT: vsll.vi v8, v8, 4, v0.t 1163; RV32-NEXT: vor.vv v8, v12, v8, v0.t 1164; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t 1165; RV32-NEXT: vand.vv v10, v10, v16, v0.t 1166; RV32-NEXT: vand.vv v8, v8, v16, v0.t 1167; RV32-NEXT: vsll.vi v8, v8, 2, v0.t 1168; RV32-NEXT: vor.vv v8, v10, v8, v0.t 1169; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t 1170; RV32-NEXT: vand.vv v10, v10, v14, v0.t 1171; RV32-NEXT: vand.vv v8, v8, v14, v0.t 1172; RV32-NEXT: vsll.vi v8, v8, 1, v0.t 1173; RV32-NEXT: vor.vv v8, v10, v8, v0.t 1174; RV32-NEXT: addi sp, sp, 16 1175; RV32-NEXT: .cfi_def_cfa_offset 0 1176; RV32-NEXT: ret 1177; 1178; RV64-LABEL: vp_bitreverse_v4i64: 1179; RV64: # %bb.0: 1180; RV64-NEXT: lui a1, 4080 1181; RV64-NEXT: li a3, 255 1182; RV64-NEXT: li a2, 56 1183; RV64-NEXT: lui a4, 16 1184; RV64-NEXT: lui a5, 61681 1185; RV64-NEXT: lui a6, 209715 1186; RV64-NEXT: lui a7, 349525 1187; RV64-NEXT: addiw a5, a5, -241 1188; RV64-NEXT: addiw a6, a6, 819 1189; RV64-NEXT: addiw a7, a7, 1365 1190; RV64-NEXT: slli t0, a5, 32 1191; RV64-NEXT: add t0, a5, t0 1192; RV64-NEXT: slli a5, a6, 32 1193; RV64-NEXT: add a6, a6, a5 1194; RV64-NEXT: slli a5, a7, 32 1195; RV64-NEXT: add a5, a7, a5 1196; RV64-NEXT: li a7, 40 1197; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1198; RV64-NEXT: vand.vx v10, v8, a1, v0.t 1199; RV64-NEXT: slli a3, a3, 24 1200; RV64-NEXT: addiw a0, a4, -256 1201; RV64-NEXT: vsll.vi v10, v10, 24, v0.t 1202; RV64-NEXT: vand.vx v12, v8, a3, v0.t 1203; RV64-NEXT: vsll.vi v12, v12, 8, v0.t 1204; RV64-NEXT: vor.vv v10, v10, v12, v0.t 1205; RV64-NEXT: vsll.vx v12, v8, a2, v0.t 1206; RV64-NEXT: vand.vx v14, v8, a0, v0.t 1207; RV64-NEXT: vsll.vx v14, v14, a7, v0.t 1208; RV64-NEXT: vor.vv v12, v12, v14, v0.t 1209; RV64-NEXT: vor.vv v10, v12, v10, v0.t 1210; RV64-NEXT: vsrl.vx v12, v8, a2, v0.t 1211; RV64-NEXT: vsrl.vx v14, v8, a7, v0.t 1212; RV64-NEXT: vand.vx v14, v14, a0, v0.t 1213; RV64-NEXT: vor.vv v12, v14, v12, v0.t 1214; RV64-NEXT: vsrl.vi v14, v8, 24, v0.t 1215; RV64-NEXT: vand.vx v14, v14, a1, v0.t 1216; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t 1217; RV64-NEXT: vand.vx v8, v8, a3, v0.t 1218; RV64-NEXT: vor.vv v8, v8, v14, v0.t 1219; RV64-NEXT: vor.vv v8, v8, v12, v0.t 1220; RV64-NEXT: vor.vv v8, v10, v8, v0.t 1221; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t 1222; RV64-NEXT: vand.vx v10, v10, t0, v0.t 1223; RV64-NEXT: vand.vx v8, v8, t0, v0.t 1224; RV64-NEXT: vsll.vi v8, v8, 4, v0.t 1225; RV64-NEXT: vor.vv v8, v10, v8, v0.t 1226; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t 1227; RV64-NEXT: vand.vx v10, v10, a6, v0.t 1228; RV64-NEXT: vand.vx v8, v8, a6, v0.t 1229; RV64-NEXT: vsll.vi v8, v8, 2, v0.t 1230; RV64-NEXT: vor.vv v8, v10, v8, v0.t 1231; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t 1232; RV64-NEXT: vand.vx v10, v10, a5, v0.t 1233; RV64-NEXT: vand.vx v8, v8, a5, v0.t 1234; RV64-NEXT: vsll.vi v8, v8, 1, v0.t 1235; RV64-NEXT: vor.vv v8, v10, v8, v0.t 1236; RV64-NEXT: ret 1237 %v = call <4 x i64> @llvm.vp.bitreverse.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl) 1238 ret <4 x i64> %v 1239} 1240 1241define <4 x i64> @vp_bitreverse_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { 1242; RV32-LABEL: vp_bitreverse_v4i64_unmasked: 1243; RV32: # %bb.0: 1244; RV32-NEXT: addi sp, sp, -16 1245; RV32-NEXT: .cfi_def_cfa_offset 16 1246; RV32-NEXT: lui a1, 1044480 1247; RV32-NEXT: li a2, 56 1248; RV32-NEXT: lui a3, 16 1249; RV32-NEXT: li a4, 40 1250; RV32-NEXT: lui a5, 4080 1251; RV32-NEXT: addi a6, sp, 8 1252; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1253; RV32-NEXT: vsrl.vi v14, v8, 24 1254; RV32-NEXT: sw a1, 8(sp) 1255; RV32-NEXT: sw zero, 12(sp) 1256; RV32-NEXT: vsll.vx v12, v8, a2 1257; RV32-NEXT: addi a1, a3, -256 1258; RV32-NEXT: vsrl.vx v10, v8, a2 1259; RV32-NEXT: vsrl.vx v16, v8, a4 1260; RV32-NEXT: vand.vx v18, v8, a1 1261; RV32-NEXT: vand.vx v16, v16, a1 1262; RV32-NEXT: vor.vv v10, v16, v10 1263; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1264; RV32-NEXT: vlse64.v v16, (a6), zero 1265; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1266; RV32-NEXT: vsll.vx v18, v18, a4 1267; RV32-NEXT: vor.vv v12, v12, v18 1268; RV32-NEXT: vsrl.vi v18, v8, 8 1269; RV32-NEXT: vand.vx v14, v14, a5 1270; RV32-NEXT: vand.vv v18, v18, v16 1271; RV32-NEXT: vor.vv v14, v18, v14 1272; RV32-NEXT: lui a1, 61681 1273; RV32-NEXT: lui a2, 209715 1274; RV32-NEXT: lui a3, 349525 1275; RV32-NEXT: vand.vv v16, v8, v16 1276; RV32-NEXT: vand.vx v8, v8, a5 1277; RV32-NEXT: addi a1, a1, -241 1278; RV32-NEXT: addi a2, a2, 819 1279; RV32-NEXT: addi a3, a3, 1365 1280; RV32-NEXT: vsll.vi v8, v8, 24 1281; RV32-NEXT: vsll.vi v16, v16, 8 1282; RV32-NEXT: vor.vv v8, v8, v16 1283; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1284; RV32-NEXT: vmv.v.x v16, a1 1285; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1286; RV32-NEXT: vor.vv v10, v14, v10 1287; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1288; RV32-NEXT: vmv.v.x v14, a2 1289; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1290; RV32-NEXT: vor.vv v8, v12, v8 1291; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1292; RV32-NEXT: vmv.v.x v12, a3 1293; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1294; RV32-NEXT: vor.vv v8, v8, v10 1295; RV32-NEXT: vsrl.vi v10, v8, 4 1296; RV32-NEXT: vand.vv v8, v8, v16 1297; RV32-NEXT: vand.vv v10, v10, v16 1298; RV32-NEXT: vsll.vi v8, v8, 4 1299; RV32-NEXT: vor.vv v8, v10, v8 1300; RV32-NEXT: vsrl.vi v10, v8, 2 1301; RV32-NEXT: vand.vv v8, v8, v14 1302; RV32-NEXT: vand.vv v10, v10, v14 1303; RV32-NEXT: vsll.vi v8, v8, 2 1304; RV32-NEXT: vor.vv v8, v10, v8 1305; RV32-NEXT: vsrl.vi v10, v8, 1 1306; RV32-NEXT: vand.vv v8, v8, v12 1307; RV32-NEXT: vand.vv v10, v10, v12 1308; RV32-NEXT: vadd.vv v8, v8, v8 1309; RV32-NEXT: vor.vv v8, v10, v8 1310; RV32-NEXT: addi sp, sp, 16 1311; RV32-NEXT: .cfi_def_cfa_offset 0 1312; RV32-NEXT: ret 1313; 1314; RV64-LABEL: vp_bitreverse_v4i64_unmasked: 1315; RV64: # %bb.0: 1316; RV64-NEXT: lui a1, 4080 1317; RV64-NEXT: li a2, 255 1318; RV64-NEXT: li a3, 56 1319; RV64-NEXT: lui a4, 16 1320; RV64-NEXT: li a5, 40 1321; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1322; RV64-NEXT: vsrl.vi v12, v8, 24 1323; RV64-NEXT: vsrl.vi v14, v8, 8 1324; RV64-NEXT: addiw a0, a4, -256 1325; RV64-NEXT: vsrl.vx v10, v8, a3 1326; RV64-NEXT: vsrl.vx v16, v8, a5 1327; RV64-NEXT: vand.vx v16, v16, a0 1328; RV64-NEXT: vor.vv v10, v16, v10 1329; RV64-NEXT: vand.vx v16, v8, a1 1330; RV64-NEXT: slli a2, a2, 24 1331; RV64-NEXT: vand.vx v12, v12, a1 1332; RV64-NEXT: vsll.vi v16, v16, 24 1333; RV64-NEXT: vand.vx v14, v14, a2 1334; RV64-NEXT: vor.vv v12, v14, v12 1335; RV64-NEXT: vand.vx v14, v8, a2 1336; RV64-NEXT: vsll.vi v14, v14, 8 1337; RV64-NEXT: vor.vv v14, v16, v14 1338; RV64-NEXT: vsll.vx v16, v8, a3 1339; RV64-NEXT: vand.vx v8, v8, a0 1340; RV64-NEXT: vsll.vx v8, v8, a5 1341; RV64-NEXT: vor.vv v8, v16, v8 1342; RV64-NEXT: lui a0, 61681 1343; RV64-NEXT: lui a1, 209715 1344; RV64-NEXT: lui a2, 349525 1345; RV64-NEXT: addiw a0, a0, -241 1346; RV64-NEXT: addiw a1, a1, 819 1347; RV64-NEXT: addiw a2, a2, 1365 1348; RV64-NEXT: slli a3, a0, 32 1349; RV64-NEXT: slli a4, a1, 32 1350; RV64-NEXT: add a0, a0, a3 1351; RV64-NEXT: slli a3, a2, 32 1352; RV64-NEXT: add a1, a1, a4 1353; RV64-NEXT: add a2, a2, a3 1354; RV64-NEXT: vor.vv v10, v12, v10 1355; RV64-NEXT: vor.vv v8, v8, v14 1356; RV64-NEXT: vor.vv v8, v8, v10 1357; RV64-NEXT: vsrl.vi v10, v8, 4 1358; RV64-NEXT: vand.vx v8, v8, a0 1359; RV64-NEXT: vand.vx v10, v10, a0 1360; RV64-NEXT: vsll.vi v8, v8, 4 1361; RV64-NEXT: vor.vv v8, v10, v8 1362; RV64-NEXT: vsrl.vi v10, v8, 2 1363; RV64-NEXT: vand.vx v8, v8, a1 1364; RV64-NEXT: vand.vx v10, v10, a1 1365; RV64-NEXT: vsll.vi v8, v8, 2 1366; RV64-NEXT: vor.vv v8, v10, v8 1367; RV64-NEXT: vsrl.vi v10, v8, 1 1368; RV64-NEXT: vand.vx v8, v8, a2 1369; RV64-NEXT: vand.vx v10, v10, a2 1370; RV64-NEXT: vadd.vv v8, v8, v8 1371; RV64-NEXT: vor.vv v8, v10, v8 1372; RV64-NEXT: ret 1373 %v = call <4 x i64> @llvm.vp.bitreverse.v4i64(<4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl) 1374 ret <4 x i64> %v 1375} 1376 1377declare <8 x i64> @llvm.vp.bitreverse.v8i64(<8 x i64>, <8 x i1>, i32) 1378 1379define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { 1380; RV32-LABEL: vp_bitreverse_v8i64: 1381; RV32: # %bb.0: 1382; RV32-NEXT: addi sp, sp, -16 1383; RV32-NEXT: .cfi_def_cfa_offset 16 1384; RV32-NEXT: lui a4, 1044480 1385; RV32-NEXT: li a3, 56 1386; RV32-NEXT: lui a5, 16 1387; RV32-NEXT: li a2, 40 1388; RV32-NEXT: lui a1, 4080 1389; RV32-NEXT: addi a6, sp, 8 1390; RV32-NEXT: sw a4, 8(sp) 1391; RV32-NEXT: sw zero, 12(sp) 1392; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1393; RV32-NEXT: vlse64.v v12, (a6), zero 1394; RV32-NEXT: lui a4, 61681 1395; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1396; RV32-NEXT: vsll.vx v16, v8, a3, v0.t 1397; RV32-NEXT: addi a5, a5, -256 1398; RV32-NEXT: vand.vx v20, v8, a5, v0.t 1399; RV32-NEXT: vsll.vx v20, v20, a2, v0.t 1400; RV32-NEXT: vor.vv v16, v16, v20, v0.t 1401; RV32-NEXT: vand.vx v20, v8, a1, v0.t 1402; RV32-NEXT: vsll.vi v20, v20, 24, v0.t 1403; RV32-NEXT: vand.vv v24, v8, v12, v0.t 1404; RV32-NEXT: vsll.vi v24, v24, 8, v0.t 1405; RV32-NEXT: vor.vv v20, v20, v24, v0.t 1406; RV32-NEXT: vor.vv v16, v16, v20, v0.t 1407; RV32-NEXT: vsrl.vx v20, v8, a3, v0.t 1408; RV32-NEXT: lui a3, 209715 1409; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t 1410; RV32-NEXT: lui a2, 349525 1411; RV32-NEXT: addi a4, a4, -241 1412; RV32-NEXT: addi a3, a3, 819 1413; RV32-NEXT: addi a2, a2, 1365 1414; RV32-NEXT: vand.vx v24, v24, a5, v0.t 1415; RV32-NEXT: vor.vv v20, v24, v20, v0.t 1416; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t 1417; RV32-NEXT: vand.vx v24, v24, a1, v0.t 1418; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t 1419; RV32-NEXT: vand.vv v8, v8, v12, v0.t 1420; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1421; RV32-NEXT: vmv.v.x v28, a4 1422; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1423; RV32-NEXT: vor.vv v8, v8, v24, v0.t 1424; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1425; RV32-NEXT: vmv.v.x v12, a3 1426; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1427; RV32-NEXT: vor.vv v20, v8, v20, v0.t 1428; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1429; RV32-NEXT: vmv.v.x v8, a2 1430; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1431; RV32-NEXT: vor.vv v16, v16, v20, v0.t 1432; RV32-NEXT: vsrl.vi v20, v16, 4, v0.t 1433; RV32-NEXT: vand.vv v20, v20, v28, v0.t 1434; RV32-NEXT: vand.vv v16, v16, v28, v0.t 1435; RV32-NEXT: vsll.vi v16, v16, 4, v0.t 1436; RV32-NEXT: vor.vv v16, v20, v16, v0.t 1437; RV32-NEXT: vsrl.vi v20, v16, 2, v0.t 1438; RV32-NEXT: vand.vv v20, v20, v12, v0.t 1439; RV32-NEXT: vand.vv v12, v16, v12, v0.t 1440; RV32-NEXT: vsll.vi v12, v12, 2, v0.t 1441; RV32-NEXT: vor.vv v12, v20, v12, v0.t 1442; RV32-NEXT: vsrl.vi v16, v12, 1, v0.t 1443; RV32-NEXT: vand.vv v16, v16, v8, v0.t 1444; RV32-NEXT: vand.vv v8, v12, v8, v0.t 1445; RV32-NEXT: vsll.vi v8, v8, 1, v0.t 1446; RV32-NEXT: vor.vv v8, v16, v8, v0.t 1447; RV32-NEXT: addi sp, sp, 16 1448; RV32-NEXT: .cfi_def_cfa_offset 0 1449; RV32-NEXT: ret 1450; 1451; RV64-LABEL: vp_bitreverse_v8i64: 1452; RV64: # %bb.0: 1453; RV64-NEXT: lui a1, 4080 1454; RV64-NEXT: li a3, 255 1455; RV64-NEXT: li a2, 56 1456; RV64-NEXT: lui a4, 16 1457; RV64-NEXT: lui a5, 61681 1458; RV64-NEXT: lui a6, 209715 1459; RV64-NEXT: lui a7, 349525 1460; RV64-NEXT: addiw a5, a5, -241 1461; RV64-NEXT: addiw a6, a6, 819 1462; RV64-NEXT: addiw a7, a7, 1365 1463; RV64-NEXT: slli t0, a5, 32 1464; RV64-NEXT: add t0, a5, t0 1465; RV64-NEXT: slli a5, a6, 32 1466; RV64-NEXT: add a6, a6, a5 1467; RV64-NEXT: slli a5, a7, 32 1468; RV64-NEXT: add a5, a7, a5 1469; RV64-NEXT: li a7, 40 1470; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1471; RV64-NEXT: vand.vx v12, v8, a1, v0.t 1472; RV64-NEXT: slli a3, a3, 24 1473; RV64-NEXT: addiw a0, a4, -256 1474; RV64-NEXT: vsll.vi v12, v12, 24, v0.t 1475; RV64-NEXT: vand.vx v16, v8, a3, v0.t 1476; RV64-NEXT: vsll.vi v16, v16, 8, v0.t 1477; RV64-NEXT: vor.vv v12, v12, v16, v0.t 1478; RV64-NEXT: vsll.vx v16, v8, a2, v0.t 1479; RV64-NEXT: vand.vx v20, v8, a0, v0.t 1480; RV64-NEXT: vsll.vx v20, v20, a7, v0.t 1481; RV64-NEXT: vor.vv v16, v16, v20, v0.t 1482; RV64-NEXT: vor.vv v12, v16, v12, v0.t 1483; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t 1484; RV64-NEXT: vsrl.vx v20, v8, a7, v0.t 1485; RV64-NEXT: vand.vx v20, v20, a0, v0.t 1486; RV64-NEXT: vor.vv v16, v20, v16, v0.t 1487; RV64-NEXT: vsrl.vi v20, v8, 24, v0.t 1488; RV64-NEXT: vand.vx v20, v20, a1, v0.t 1489; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t 1490; RV64-NEXT: vand.vx v8, v8, a3, v0.t 1491; RV64-NEXT: vor.vv v8, v8, v20, v0.t 1492; RV64-NEXT: vor.vv v8, v8, v16, v0.t 1493; RV64-NEXT: vor.vv v8, v12, v8, v0.t 1494; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t 1495; RV64-NEXT: vand.vx v12, v12, t0, v0.t 1496; RV64-NEXT: vand.vx v8, v8, t0, v0.t 1497; RV64-NEXT: vsll.vi v8, v8, 4, v0.t 1498; RV64-NEXT: vor.vv v8, v12, v8, v0.t 1499; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t 1500; RV64-NEXT: vand.vx v12, v12, a6, v0.t 1501; RV64-NEXT: vand.vx v8, v8, a6, v0.t 1502; RV64-NEXT: vsll.vi v8, v8, 2, v0.t 1503; RV64-NEXT: vor.vv v8, v12, v8, v0.t 1504; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t 1505; RV64-NEXT: vand.vx v12, v12, a5, v0.t 1506; RV64-NEXT: vand.vx v8, v8, a5, v0.t 1507; RV64-NEXT: vsll.vi v8, v8, 1, v0.t 1508; RV64-NEXT: vor.vv v8, v12, v8, v0.t 1509; RV64-NEXT: ret 1510 %v = call <8 x i64> @llvm.vp.bitreverse.v8i64(<8 x i64> %va, <8 x i1> %m, i32 %evl) 1511 ret <8 x i64> %v 1512} 1513 1514define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { 1515; RV32-LABEL: vp_bitreverse_v8i64_unmasked: 1516; RV32: # %bb.0: 1517; RV32-NEXT: addi sp, sp, -16 1518; RV32-NEXT: .cfi_def_cfa_offset 16 1519; RV32-NEXT: lui a1, 1044480 1520; RV32-NEXT: li a2, 56 1521; RV32-NEXT: lui a3, 16 1522; RV32-NEXT: li a4, 40 1523; RV32-NEXT: lui a5, 4080 1524; RV32-NEXT: addi a6, sp, 8 1525; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1526; RV32-NEXT: vsrl.vi v20, v8, 24 1527; RV32-NEXT: sw a1, 8(sp) 1528; RV32-NEXT: sw zero, 12(sp) 1529; RV32-NEXT: vsll.vx v16, v8, a2 1530; RV32-NEXT: addi a1, a3, -256 1531; RV32-NEXT: vsrl.vx v12, v8, a2 1532; RV32-NEXT: vsrl.vx v24, v8, a4 1533; RV32-NEXT: vand.vx v28, v8, a1 1534; RV32-NEXT: vand.vx v24, v24, a1 1535; RV32-NEXT: vor.vv v12, v24, v12 1536; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1537; RV32-NEXT: vlse64.v v24, (a6), zero 1538; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1539; RV32-NEXT: vsll.vx v28, v28, a4 1540; RV32-NEXT: vor.vv v16, v16, v28 1541; RV32-NEXT: vsrl.vi v28, v8, 8 1542; RV32-NEXT: vand.vx v20, v20, a5 1543; RV32-NEXT: vand.vv v28, v28, v24 1544; RV32-NEXT: vor.vv v20, v28, v20 1545; RV32-NEXT: lui a1, 61681 1546; RV32-NEXT: lui a2, 209715 1547; RV32-NEXT: lui a3, 349525 1548; RV32-NEXT: vand.vv v24, v8, v24 1549; RV32-NEXT: vand.vx v8, v8, a5 1550; RV32-NEXT: addi a1, a1, -241 1551; RV32-NEXT: addi a2, a2, 819 1552; RV32-NEXT: addi a3, a3, 1365 1553; RV32-NEXT: vsll.vi v8, v8, 24 1554; RV32-NEXT: vsll.vi v24, v24, 8 1555; RV32-NEXT: vor.vv v8, v8, v24 1556; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1557; RV32-NEXT: vmv.v.x v24, a1 1558; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1559; RV32-NEXT: vor.vv v12, v20, v12 1560; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1561; RV32-NEXT: vmv.v.x v20, a2 1562; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1563; RV32-NEXT: vor.vv v8, v16, v8 1564; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1565; RV32-NEXT: vmv.v.x v16, a3 1566; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1567; RV32-NEXT: vor.vv v8, v8, v12 1568; RV32-NEXT: vsrl.vi v12, v8, 4 1569; RV32-NEXT: vand.vv v8, v8, v24 1570; RV32-NEXT: vand.vv v12, v12, v24 1571; RV32-NEXT: vsll.vi v8, v8, 4 1572; RV32-NEXT: vor.vv v8, v12, v8 1573; RV32-NEXT: vsrl.vi v12, v8, 2 1574; RV32-NEXT: vand.vv v8, v8, v20 1575; RV32-NEXT: vand.vv v12, v12, v20 1576; RV32-NEXT: vsll.vi v8, v8, 2 1577; RV32-NEXT: vor.vv v8, v12, v8 1578; RV32-NEXT: vsrl.vi v12, v8, 1 1579; RV32-NEXT: vand.vv v8, v8, v16 1580; RV32-NEXT: vand.vv v12, v12, v16 1581; RV32-NEXT: vadd.vv v8, v8, v8 1582; RV32-NEXT: vor.vv v8, v12, v8 1583; RV32-NEXT: addi sp, sp, 16 1584; RV32-NEXT: .cfi_def_cfa_offset 0 1585; RV32-NEXT: ret 1586; 1587; RV64-LABEL: vp_bitreverse_v8i64_unmasked: 1588; RV64: # %bb.0: 1589; RV64-NEXT: lui a1, 4080 1590; RV64-NEXT: li a2, 255 1591; RV64-NEXT: li a3, 56 1592; RV64-NEXT: lui a4, 16 1593; RV64-NEXT: li a5, 40 1594; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1595; RV64-NEXT: vsrl.vi v16, v8, 24 1596; RV64-NEXT: vsrl.vi v20, v8, 8 1597; RV64-NEXT: addiw a0, a4, -256 1598; RV64-NEXT: vsrl.vx v12, v8, a3 1599; RV64-NEXT: vsrl.vx v24, v8, a5 1600; RV64-NEXT: vand.vx v24, v24, a0 1601; RV64-NEXT: vor.vv v12, v24, v12 1602; RV64-NEXT: vand.vx v24, v8, a1 1603; RV64-NEXT: slli a2, a2, 24 1604; RV64-NEXT: vand.vx v16, v16, a1 1605; RV64-NEXT: vsll.vi v24, v24, 24 1606; RV64-NEXT: vand.vx v20, v20, a2 1607; RV64-NEXT: vor.vv v16, v20, v16 1608; RV64-NEXT: vand.vx v20, v8, a2 1609; RV64-NEXT: vsll.vi v20, v20, 8 1610; RV64-NEXT: vor.vv v20, v24, v20 1611; RV64-NEXT: vsll.vx v24, v8, a3 1612; RV64-NEXT: vand.vx v8, v8, a0 1613; RV64-NEXT: vsll.vx v8, v8, a5 1614; RV64-NEXT: vor.vv v8, v24, v8 1615; RV64-NEXT: lui a0, 61681 1616; RV64-NEXT: lui a1, 209715 1617; RV64-NEXT: lui a2, 349525 1618; RV64-NEXT: addiw a0, a0, -241 1619; RV64-NEXT: addiw a1, a1, 819 1620; RV64-NEXT: addiw a2, a2, 1365 1621; RV64-NEXT: slli a3, a0, 32 1622; RV64-NEXT: slli a4, a1, 32 1623; RV64-NEXT: add a0, a0, a3 1624; RV64-NEXT: slli a3, a2, 32 1625; RV64-NEXT: add a1, a1, a4 1626; RV64-NEXT: add a2, a2, a3 1627; RV64-NEXT: vor.vv v12, v16, v12 1628; RV64-NEXT: vor.vv v8, v8, v20 1629; RV64-NEXT: vor.vv v8, v8, v12 1630; RV64-NEXT: vsrl.vi v12, v8, 4 1631; RV64-NEXT: vand.vx v8, v8, a0 1632; RV64-NEXT: vand.vx v12, v12, a0 1633; RV64-NEXT: vsll.vi v8, v8, 4 1634; RV64-NEXT: vor.vv v8, v12, v8 1635; RV64-NEXT: vsrl.vi v12, v8, 2 1636; RV64-NEXT: vand.vx v8, v8, a1 1637; RV64-NEXT: vand.vx v12, v12, a1 1638; RV64-NEXT: vsll.vi v8, v8, 2 1639; RV64-NEXT: vor.vv v8, v12, v8 1640; RV64-NEXT: vsrl.vi v12, v8, 1 1641; RV64-NEXT: vand.vx v8, v8, a2 1642; RV64-NEXT: vand.vx v12, v12, a2 1643; RV64-NEXT: vadd.vv v8, v8, v8 1644; RV64-NEXT: vor.vv v8, v12, v8 1645; RV64-NEXT: ret 1646 %v = call <8 x i64> @llvm.vp.bitreverse.v8i64(<8 x i64> %va, <8 x i1> splat (i1 true), i32 %evl) 1647 ret <8 x i64> %v 1648} 1649 1650declare <15 x i64> @llvm.vp.bitreverse.v15i64(<15 x i64>, <15 x i1>, i32) 1651 1652define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { 1653; RV32-LABEL: vp_bitreverse_v15i64: 1654; RV32: # %bb.0: 1655; RV32-NEXT: addi sp, sp, -48 1656; RV32-NEXT: .cfi_def_cfa_offset 48 1657; RV32-NEXT: csrr a1, vlenb 1658; RV32-NEXT: li a2, 24 1659; RV32-NEXT: mul a1, a1, a2 1660; RV32-NEXT: sub sp, sp, a1 1661; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb 1662; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1663; RV32-NEXT: vmv8r.v v24, v8 1664; RV32-NEXT: lui a2, 1044480 1665; RV32-NEXT: lui a3, 61681 1666; RV32-NEXT: lui a4, 209715 1667; RV32-NEXT: lui a5, 349525 1668; RV32-NEXT: li a1, 56 1669; RV32-NEXT: lui a6, 16 1670; RV32-NEXT: sw a2, 16(sp) 1671; RV32-NEXT: sw zero, 20(sp) 1672; RV32-NEXT: addi a2, a3, -241 1673; RV32-NEXT: sw a2, 40(sp) 1674; RV32-NEXT: sw a2, 44(sp) 1675; RV32-NEXT: li a2, 40 1676; RV32-NEXT: addi a3, a4, 819 1677; RV32-NEXT: sw a3, 32(sp) 1678; RV32-NEXT: sw a3, 36(sp) 1679; RV32-NEXT: addi a3, sp, 16 1680; RV32-NEXT: addi a4, a5, 1365 1681; RV32-NEXT: vsll.vx v16, v8, a1, v0.t 1682; RV32-NEXT: addi a5, a6, -256 1683; RV32-NEXT: sw a4, 24(sp) 1684; RV32-NEXT: sw a4, 28(sp) 1685; RV32-NEXT: vand.vx v8, v8, a5, v0.t 1686; RV32-NEXT: vsll.vx v8, v8, a2, v0.t 1687; RV32-NEXT: vor.vv v8, v16, v8, v0.t 1688; RV32-NEXT: csrr a4, vlenb 1689; RV32-NEXT: slli a4, a4, 4 1690; RV32-NEXT: add a4, sp, a4 1691; RV32-NEXT: addi a4, a4, 48 1692; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill 1693; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1694; RV32-NEXT: vlse64.v v8, (a3), zero 1695; RV32-NEXT: csrr a3, vlenb 1696; RV32-NEXT: slli a3, a3, 3 1697; RV32-NEXT: add a3, sp, a3 1698; RV32-NEXT: addi a3, a3, 48 1699; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 1700; RV32-NEXT: lui a3, 4080 1701; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1702; RV32-NEXT: vand.vx v16, v24, a3, v0.t 1703; RV32-NEXT: vsll.vi v16, v16, 24, v0.t 1704; RV32-NEXT: addi a4, sp, 48 1705; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 1706; RV32-NEXT: vand.vv v16, v24, v8, v0.t 1707; RV32-NEXT: vsll.vi v16, v16, 8, v0.t 1708; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload 1709; RV32-NEXT: vor.vv v16, v8, v16, v0.t 1710; RV32-NEXT: csrr a4, vlenb 1711; RV32-NEXT: slli a4, a4, 4 1712; RV32-NEXT: add a4, sp, a4 1713; RV32-NEXT: addi a4, a4, 48 1714; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload 1715; RV32-NEXT: vor.vv v16, v8, v16, v0.t 1716; RV32-NEXT: csrr a4, vlenb 1717; RV32-NEXT: slli a4, a4, 4 1718; RV32-NEXT: add a4, sp, a4 1719; RV32-NEXT: addi a4, a4, 48 1720; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 1721; RV32-NEXT: vsrl.vx v16, v24, a1, v0.t 1722; RV32-NEXT: vsrl.vx v8, v24, a2, v0.t 1723; RV32-NEXT: vand.vx v8, v8, a5, v0.t 1724; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1725; RV32-NEXT: addi a1, sp, 48 1726; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 1727; RV32-NEXT: vsrl.vi v8, v24, 24, v0.t 1728; RV32-NEXT: vand.vx v16, v8, a3, v0.t 1729; RV32-NEXT: vsrl.vi v8, v24, 8, v0.t 1730; RV32-NEXT: csrr a1, vlenb 1731; RV32-NEXT: slli a1, a1, 3 1732; RV32-NEXT: add a1, sp, a1 1733; RV32-NEXT: addi a1, a1, 48 1734; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 1735; RV32-NEXT: vand.vv v8, v8, v24, v0.t 1736; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1737; RV32-NEXT: addi a1, sp, 48 1738; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 1739; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1740; RV32-NEXT: addi a1, sp, 40 1741; RV32-NEXT: addi a2, sp, 32 1742; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1743; RV32-NEXT: vlse64.v v24, (a1), zero 1744; RV32-NEXT: addi a1, sp, 24 1745; RV32-NEXT: csrr a3, vlenb 1746; RV32-NEXT: slli a3, a3, 4 1747; RV32-NEXT: add a3, sp, a3 1748; RV32-NEXT: addi a3, a3, 48 1749; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 1750; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1751; RV32-NEXT: vor.vv v8, v16, v8, v0.t 1752; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 1753; RV32-NEXT: vand.vv v16, v16, v24, v0.t 1754; RV32-NEXT: vand.vv v24, v8, v24, v0.t 1755; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1756; RV32-NEXT: vlse64.v v8, (a2), zero 1757; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1758; RV32-NEXT: vsll.vi v24, v24, 4, v0.t 1759; RV32-NEXT: vor.vv v24, v16, v24, v0.t 1760; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t 1761; RV32-NEXT: vand.vv v16, v16, v8, v0.t 1762; RV32-NEXT: vand.vv v24, v24, v8, v0.t 1763; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1764; RV32-NEXT: vlse64.v v8, (a1), zero 1765; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1766; RV32-NEXT: vsll.vi v24, v24, 2, v0.t 1767; RV32-NEXT: vor.vv v16, v16, v24, v0.t 1768; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t 1769; RV32-NEXT: vand.vv v24, v24, v8, v0.t 1770; RV32-NEXT: vand.vv v8, v16, v8, v0.t 1771; RV32-NEXT: vsll.vi v8, v8, 1, v0.t 1772; RV32-NEXT: vor.vv v8, v24, v8, v0.t 1773; RV32-NEXT: csrr a0, vlenb 1774; RV32-NEXT: li a1, 24 1775; RV32-NEXT: mul a0, a0, a1 1776; RV32-NEXT: add sp, sp, a0 1777; RV32-NEXT: .cfi_def_cfa sp, 48 1778; RV32-NEXT: addi sp, sp, 48 1779; RV32-NEXT: .cfi_def_cfa_offset 0 1780; RV32-NEXT: ret 1781; 1782; RV64-LABEL: vp_bitreverse_v15i64: 1783; RV64: # %bb.0: 1784; RV64-NEXT: addi sp, sp, -16 1785; RV64-NEXT: .cfi_def_cfa_offset 16 1786; RV64-NEXT: csrr a1, vlenb 1787; RV64-NEXT: slli a1, a1, 3 1788; RV64-NEXT: sub sp, sp, a1 1789; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 1790; RV64-NEXT: lui a1, 4080 1791; RV64-NEXT: li a2, 255 1792; RV64-NEXT: li a3, 56 1793; RV64-NEXT: lui a4, 16 1794; RV64-NEXT: li a5, 40 1795; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1796; RV64-NEXT: vand.vx v16, v8, a1, v0.t 1797; RV64-NEXT: slli a2, a2, 24 1798; RV64-NEXT: addiw a0, a4, -256 1799; RV64-NEXT: vsll.vi v16, v16, 24, v0.t 1800; RV64-NEXT: vand.vx v24, v8, a2, v0.t 1801; RV64-NEXT: vsll.vi v24, v24, 8, v0.t 1802; RV64-NEXT: vor.vv v16, v16, v24, v0.t 1803; RV64-NEXT: addi a4, sp, 16 1804; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 1805; RV64-NEXT: vsll.vx v24, v8, a3, v0.t 1806; RV64-NEXT: vand.vx v16, v8, a0, v0.t 1807; RV64-NEXT: vsll.vx v16, v16, a5, v0.t 1808; RV64-NEXT: vor.vv v16, v24, v16, v0.t 1809; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload 1810; RV64-NEXT: vor.vv v16, v16, v24, v0.t 1811; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 1812; RV64-NEXT: vsrl.vx v24, v8, a3, v0.t 1813; RV64-NEXT: vsrl.vx v16, v8, a5, v0.t 1814; RV64-NEXT: vand.vx v16, v16, a0, v0.t 1815; RV64-NEXT: vor.vv v24, v16, v24, v0.t 1816; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t 1817; RV64-NEXT: vand.vx v16, v16, a1, v0.t 1818; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t 1819; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1820; RV64-NEXT: vor.vv v8, v8, v16, v0.t 1821; RV64-NEXT: vor.vv v8, v8, v24, v0.t 1822; RV64-NEXT: lui a0, 61681 1823; RV64-NEXT: lui a1, 209715 1824; RV64-NEXT: lui a2, 349525 1825; RV64-NEXT: addiw a0, a0, -241 1826; RV64-NEXT: addiw a1, a1, 819 1827; RV64-NEXT: addiw a2, a2, 1365 1828; RV64-NEXT: slli a3, a0, 32 1829; RV64-NEXT: slli a4, a1, 32 1830; RV64-NEXT: add a0, a0, a3 1831; RV64-NEXT: slli a3, a2, 32 1832; RV64-NEXT: add a1, a1, a4 1833; RV64-NEXT: add a2, a2, a3 1834; RV64-NEXT: addi a3, sp, 16 1835; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 1836; RV64-NEXT: vor.vv v8, v16, v8, v0.t 1837; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 1838; RV64-NEXT: vand.vx v16, v16, a0, v0.t 1839; RV64-NEXT: vand.vx v8, v8, a0, v0.t 1840; RV64-NEXT: vsll.vi v8, v8, 4, v0.t 1841; RV64-NEXT: vor.vv v8, v16, v8, v0.t 1842; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t 1843; RV64-NEXT: vand.vx v16, v16, a1, v0.t 1844; RV64-NEXT: vand.vx v8, v8, a1, v0.t 1845; RV64-NEXT: vsll.vi v8, v8, 2, v0.t 1846; RV64-NEXT: vor.vv v8, v16, v8, v0.t 1847; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 1848; RV64-NEXT: vand.vx v16, v16, a2, v0.t 1849; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1850; RV64-NEXT: vsll.vi v8, v8, 1, v0.t 1851; RV64-NEXT: vor.vv v8, v16, v8, v0.t 1852; RV64-NEXT: csrr a0, vlenb 1853; RV64-NEXT: slli a0, a0, 3 1854; RV64-NEXT: add sp, sp, a0 1855; RV64-NEXT: .cfi_def_cfa sp, 16 1856; RV64-NEXT: addi sp, sp, 16 1857; RV64-NEXT: .cfi_def_cfa_offset 0 1858; RV64-NEXT: ret 1859 %v = call <15 x i64> @llvm.vp.bitreverse.v15i64(<15 x i64> %va, <15 x i1> %m, i32 %evl) 1860 ret <15 x i64> %v 1861} 1862 1863define <15 x i64> @vp_bitreverse_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { 1864; RV32-LABEL: vp_bitreverse_v15i64_unmasked: 1865; RV32: # %bb.0: 1866; RV32-NEXT: addi sp, sp, -48 1867; RV32-NEXT: .cfi_def_cfa_offset 48 1868; RV32-NEXT: csrr a1, vlenb 1869; RV32-NEXT: slli a1, a1, 4 1870; RV32-NEXT: sub sp, sp, a1 1871; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb 1872; RV32-NEXT: lui a1, 1044480 1873; RV32-NEXT: lui a2, 61681 1874; RV32-NEXT: lui a3, 209715 1875; RV32-NEXT: lui a4, 349525 1876; RV32-NEXT: li a5, 56 1877; RV32-NEXT: lui a6, 16 1878; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1879; RV32-NEXT: vsll.vx v16, v8, a5 1880; RV32-NEXT: vsrl.vx v24, v8, a5 1881; RV32-NEXT: li a5, 40 1882; RV32-NEXT: sw a1, 16(sp) 1883; RV32-NEXT: sw zero, 20(sp) 1884; RV32-NEXT: addi a1, a2, -241 1885; RV32-NEXT: sw a1, 40(sp) 1886; RV32-NEXT: sw a1, 44(sp) 1887; RV32-NEXT: lui a1, 4080 1888; RV32-NEXT: addi a2, a3, 819 1889; RV32-NEXT: sw a2, 32(sp) 1890; RV32-NEXT: sw a2, 36(sp) 1891; RV32-NEXT: addi a2, sp, 16 1892; RV32-NEXT: addi a3, a4, 1365 1893; RV32-NEXT: addi a4, a6, -256 1894; RV32-NEXT: vsrl.vx v0, v8, a5 1895; RV32-NEXT: sw a3, 24(sp) 1896; RV32-NEXT: sw a3, 28(sp) 1897; RV32-NEXT: vand.vx v0, v0, a4 1898; RV32-NEXT: vor.vv v24, v0, v24 1899; RV32-NEXT: addi a3, sp, 48 1900; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill 1901; RV32-NEXT: vand.vx v0, v8, a4 1902; RV32-NEXT: vsll.vx v0, v0, a5 1903; RV32-NEXT: vor.vv v16, v16, v0 1904; RV32-NEXT: csrr a3, vlenb 1905; RV32-NEXT: slli a3, a3, 3 1906; RV32-NEXT: add a3, sp, a3 1907; RV32-NEXT: addi a3, a3, 48 1908; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 1909; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1910; RV32-NEXT: vlse64.v v0, (a2), zero 1911; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1912; RV32-NEXT: vsrl.vi v16, v8, 24 1913; RV32-NEXT: vand.vx v16, v16, a1 1914; RV32-NEXT: vsrl.vi v24, v8, 8 1915; RV32-NEXT: vand.vv v24, v24, v0 1916; RV32-NEXT: vor.vv v16, v24, v16 1917; RV32-NEXT: vand.vv v24, v8, v0 1918; RV32-NEXT: vand.vx v8, v8, a1 1919; RV32-NEXT: vsll.vi v8, v8, 24 1920; RV32-NEXT: vsll.vi v24, v24, 8 1921; RV32-NEXT: vor.vv v0, v8, v24 1922; RV32-NEXT: addi a1, sp, 48 1923; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 1924; RV32-NEXT: vor.vv v8, v16, v8 1925; RV32-NEXT: addi a1, sp, 40 1926; RV32-NEXT: addi a2, sp, 32 1927; RV32-NEXT: csrr a3, vlenb 1928; RV32-NEXT: slli a3, a3, 3 1929; RV32-NEXT: add a3, sp, a3 1930; RV32-NEXT: addi a3, a3, 48 1931; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 1932; RV32-NEXT: vor.vv v24, v16, v0 1933; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1934; RV32-NEXT: vlse64.v v16, (a1), zero 1935; RV32-NEXT: addi a1, sp, 24 1936; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1937; RV32-NEXT: vor.vv v8, v24, v8 1938; RV32-NEXT: vsrl.vi v24, v8, 4 1939; RV32-NEXT: vand.vv v8, v8, v16 1940; RV32-NEXT: vand.vv v16, v24, v16 1941; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1942; RV32-NEXT: vlse64.v v24, (a2), zero 1943; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1944; RV32-NEXT: vsll.vi v8, v8, 4 1945; RV32-NEXT: vor.vv v8, v16, v8 1946; RV32-NEXT: vsrl.vi v16, v8, 2 1947; RV32-NEXT: vand.vv v8, v8, v24 1948; RV32-NEXT: vand.vv v16, v16, v24 1949; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1950; RV32-NEXT: vlse64.v v24, (a1), zero 1951; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1952; RV32-NEXT: vsll.vi v8, v8, 2 1953; RV32-NEXT: vor.vv v8, v16, v8 1954; RV32-NEXT: vsrl.vi v16, v8, 1 1955; RV32-NEXT: vand.vv v8, v8, v24 1956; RV32-NEXT: vand.vv v16, v16, v24 1957; RV32-NEXT: vadd.vv v8, v8, v8 1958; RV32-NEXT: vor.vv v8, v16, v8 1959; RV32-NEXT: csrr a0, vlenb 1960; RV32-NEXT: slli a0, a0, 4 1961; RV32-NEXT: add sp, sp, a0 1962; RV32-NEXT: .cfi_def_cfa sp, 48 1963; RV32-NEXT: addi sp, sp, 48 1964; RV32-NEXT: .cfi_def_cfa_offset 0 1965; RV32-NEXT: ret 1966; 1967; RV64-LABEL: vp_bitreverse_v15i64_unmasked: 1968; RV64: # %bb.0: 1969; RV64-NEXT: addi sp, sp, -16 1970; RV64-NEXT: .cfi_def_cfa_offset 16 1971; RV64-NEXT: csrr a1, vlenb 1972; RV64-NEXT: slli a1, a1, 3 1973; RV64-NEXT: sub sp, sp, a1 1974; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 1975; RV64-NEXT: lui a1, 4080 1976; RV64-NEXT: li a2, 255 1977; RV64-NEXT: li a3, 56 1978; RV64-NEXT: lui a4, 16 1979; RV64-NEXT: li a5, 40 1980; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1981; RV64-NEXT: vsrl.vi v24, v8, 24 1982; RV64-NEXT: addiw a0, a4, -256 1983; RV64-NEXT: vsrl.vx v16, v8, a3 1984; RV64-NEXT: vsrl.vx v0, v8, a5 1985; RV64-NEXT: vand.vx v0, v0, a0 1986; RV64-NEXT: vor.vv v16, v0, v16 1987; RV64-NEXT: addi a4, sp, 16 1988; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 1989; RV64-NEXT: vsrl.vi v0, v8, 8 1990; RV64-NEXT: slli a2, a2, 24 1991; RV64-NEXT: vand.vx v24, v24, a1 1992; RV64-NEXT: vand.vx v0, v0, a2 1993; RV64-NEXT: vor.vv v24, v0, v24 1994; RV64-NEXT: vand.vx v0, v8, a1 1995; RV64-NEXT: vsll.vi v0, v0, 24 1996; RV64-NEXT: vand.vx v16, v8, a2 1997; RV64-NEXT: vsll.vi v16, v16, 8 1998; RV64-NEXT: vor.vv v0, v0, v16 1999; RV64-NEXT: vsll.vx v16, v8, a3 2000; RV64-NEXT: vand.vx v8, v8, a0 2001; RV64-NEXT: vsll.vx v8, v8, a5 2002; RV64-NEXT: vor.vv v8, v16, v8 2003; RV64-NEXT: lui a0, 61681 2004; RV64-NEXT: lui a1, 209715 2005; RV64-NEXT: lui a2, 349525 2006; RV64-NEXT: addiw a0, a0, -241 2007; RV64-NEXT: addiw a1, a1, 819 2008; RV64-NEXT: addiw a2, a2, 1365 2009; RV64-NEXT: slli a3, a0, 32 2010; RV64-NEXT: slli a4, a1, 32 2011; RV64-NEXT: add a0, a0, a3 2012; RV64-NEXT: slli a3, a2, 32 2013; RV64-NEXT: add a1, a1, a4 2014; RV64-NEXT: add a2, a2, a3 2015; RV64-NEXT: addi a3, sp, 16 2016; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 2017; RV64-NEXT: vor.vv v16, v24, v16 2018; RV64-NEXT: vor.vv v8, v8, v0 2019; RV64-NEXT: vor.vv v8, v8, v16 2020; RV64-NEXT: vsrl.vi v16, v8, 4 2021; RV64-NEXT: vand.vx v8, v8, a0 2022; RV64-NEXT: vand.vx v16, v16, a0 2023; RV64-NEXT: vsll.vi v8, v8, 4 2024; RV64-NEXT: vor.vv v8, v16, v8 2025; RV64-NEXT: vsrl.vi v16, v8, 2 2026; RV64-NEXT: vand.vx v8, v8, a1 2027; RV64-NEXT: vand.vx v16, v16, a1 2028; RV64-NEXT: vsll.vi v8, v8, 2 2029; RV64-NEXT: vor.vv v8, v16, v8 2030; RV64-NEXT: vsrl.vi v16, v8, 1 2031; RV64-NEXT: vand.vx v8, v8, a2 2032; RV64-NEXT: vand.vx v16, v16, a2 2033; RV64-NEXT: vadd.vv v8, v8, v8 2034; RV64-NEXT: vor.vv v8, v16, v8 2035; RV64-NEXT: csrr a0, vlenb 2036; RV64-NEXT: slli a0, a0, 3 2037; RV64-NEXT: add sp, sp, a0 2038; RV64-NEXT: .cfi_def_cfa sp, 16 2039; RV64-NEXT: addi sp, sp, 16 2040; RV64-NEXT: .cfi_def_cfa_offset 0 2041; RV64-NEXT: ret 2042 %v = call <15 x i64> @llvm.vp.bitreverse.v15i64(<15 x i64> %va, <15 x i1> splat (i1 true), i32 %evl) 2043 ret <15 x i64> %v 2044} 2045 2046declare <16 x i64> @llvm.vp.bitreverse.v16i64(<16 x i64>, <16 x i1>, i32) 2047 2048define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { 2049; RV32-LABEL: vp_bitreverse_v16i64: 2050; RV32: # %bb.0: 2051; RV32-NEXT: addi sp, sp, -48 2052; RV32-NEXT: .cfi_def_cfa_offset 48 2053; RV32-NEXT: csrr a1, vlenb 2054; RV32-NEXT: li a2, 24 2055; RV32-NEXT: mul a1, a1, a2 2056; RV32-NEXT: sub sp, sp, a1 2057; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb 2058; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2059; RV32-NEXT: vmv8r.v v24, v8 2060; RV32-NEXT: lui a2, 1044480 2061; RV32-NEXT: lui a3, 61681 2062; RV32-NEXT: lui a4, 209715 2063; RV32-NEXT: lui a5, 349525 2064; RV32-NEXT: li a1, 56 2065; RV32-NEXT: lui a6, 16 2066; RV32-NEXT: sw a2, 16(sp) 2067; RV32-NEXT: sw zero, 20(sp) 2068; RV32-NEXT: addi a2, a3, -241 2069; RV32-NEXT: sw a2, 40(sp) 2070; RV32-NEXT: sw a2, 44(sp) 2071; RV32-NEXT: li a2, 40 2072; RV32-NEXT: addi a3, a4, 819 2073; RV32-NEXT: sw a3, 32(sp) 2074; RV32-NEXT: sw a3, 36(sp) 2075; RV32-NEXT: addi a3, sp, 16 2076; RV32-NEXT: addi a4, a5, 1365 2077; RV32-NEXT: vsll.vx v16, v8, a1, v0.t 2078; RV32-NEXT: addi a5, a6, -256 2079; RV32-NEXT: sw a4, 24(sp) 2080; RV32-NEXT: sw a4, 28(sp) 2081; RV32-NEXT: vand.vx v8, v8, a5, v0.t 2082; RV32-NEXT: vsll.vx v8, v8, a2, v0.t 2083; RV32-NEXT: vor.vv v8, v16, v8, v0.t 2084; RV32-NEXT: csrr a4, vlenb 2085; RV32-NEXT: slli a4, a4, 4 2086; RV32-NEXT: add a4, sp, a4 2087; RV32-NEXT: addi a4, a4, 48 2088; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill 2089; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2090; RV32-NEXT: vlse64.v v8, (a3), zero 2091; RV32-NEXT: csrr a3, vlenb 2092; RV32-NEXT: slli a3, a3, 3 2093; RV32-NEXT: add a3, sp, a3 2094; RV32-NEXT: addi a3, a3, 48 2095; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 2096; RV32-NEXT: lui a3, 4080 2097; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2098; RV32-NEXT: vand.vx v16, v24, a3, v0.t 2099; RV32-NEXT: vsll.vi v16, v16, 24, v0.t 2100; RV32-NEXT: addi a4, sp, 48 2101; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 2102; RV32-NEXT: vand.vv v16, v24, v8, v0.t 2103; RV32-NEXT: vsll.vi v16, v16, 8, v0.t 2104; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload 2105; RV32-NEXT: vor.vv v16, v8, v16, v0.t 2106; RV32-NEXT: csrr a4, vlenb 2107; RV32-NEXT: slli a4, a4, 4 2108; RV32-NEXT: add a4, sp, a4 2109; RV32-NEXT: addi a4, a4, 48 2110; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload 2111; RV32-NEXT: vor.vv v16, v8, v16, v0.t 2112; RV32-NEXT: csrr a4, vlenb 2113; RV32-NEXT: slli a4, a4, 4 2114; RV32-NEXT: add a4, sp, a4 2115; RV32-NEXT: addi a4, a4, 48 2116; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 2117; RV32-NEXT: vsrl.vx v16, v24, a1, v0.t 2118; RV32-NEXT: vsrl.vx v8, v24, a2, v0.t 2119; RV32-NEXT: vand.vx v8, v8, a5, v0.t 2120; RV32-NEXT: vor.vv v8, v8, v16, v0.t 2121; RV32-NEXT: addi a1, sp, 48 2122; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 2123; RV32-NEXT: vsrl.vi v8, v24, 24, v0.t 2124; RV32-NEXT: vand.vx v16, v8, a3, v0.t 2125; RV32-NEXT: vsrl.vi v8, v24, 8, v0.t 2126; RV32-NEXT: csrr a1, vlenb 2127; RV32-NEXT: slli a1, a1, 3 2128; RV32-NEXT: add a1, sp, a1 2129; RV32-NEXT: addi a1, a1, 48 2130; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 2131; RV32-NEXT: vand.vv v8, v8, v24, v0.t 2132; RV32-NEXT: vor.vv v8, v8, v16, v0.t 2133; RV32-NEXT: addi a1, sp, 48 2134; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 2135; RV32-NEXT: vor.vv v8, v8, v16, v0.t 2136; RV32-NEXT: addi a1, sp, 40 2137; RV32-NEXT: addi a2, sp, 32 2138; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2139; RV32-NEXT: vlse64.v v24, (a1), zero 2140; RV32-NEXT: addi a1, sp, 24 2141; RV32-NEXT: csrr a3, vlenb 2142; RV32-NEXT: slli a3, a3, 4 2143; RV32-NEXT: add a3, sp, a3 2144; RV32-NEXT: addi a3, a3, 48 2145; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 2146; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2147; RV32-NEXT: vor.vv v8, v16, v8, v0.t 2148; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 2149; RV32-NEXT: vand.vv v16, v16, v24, v0.t 2150; RV32-NEXT: vand.vv v24, v8, v24, v0.t 2151; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2152; RV32-NEXT: vlse64.v v8, (a2), zero 2153; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2154; RV32-NEXT: vsll.vi v24, v24, 4, v0.t 2155; RV32-NEXT: vor.vv v24, v16, v24, v0.t 2156; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t 2157; RV32-NEXT: vand.vv v16, v16, v8, v0.t 2158; RV32-NEXT: vand.vv v24, v24, v8, v0.t 2159; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2160; RV32-NEXT: vlse64.v v8, (a1), zero 2161; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2162; RV32-NEXT: vsll.vi v24, v24, 2, v0.t 2163; RV32-NEXT: vor.vv v16, v16, v24, v0.t 2164; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t 2165; RV32-NEXT: vand.vv v24, v24, v8, v0.t 2166; RV32-NEXT: vand.vv v8, v16, v8, v0.t 2167; RV32-NEXT: vsll.vi v8, v8, 1, v0.t 2168; RV32-NEXT: vor.vv v8, v24, v8, v0.t 2169; RV32-NEXT: csrr a0, vlenb 2170; RV32-NEXT: li a1, 24 2171; RV32-NEXT: mul a0, a0, a1 2172; RV32-NEXT: add sp, sp, a0 2173; RV32-NEXT: .cfi_def_cfa sp, 48 2174; RV32-NEXT: addi sp, sp, 48 2175; RV32-NEXT: .cfi_def_cfa_offset 0 2176; RV32-NEXT: ret 2177; 2178; RV64-LABEL: vp_bitreverse_v16i64: 2179; RV64: # %bb.0: 2180; RV64-NEXT: addi sp, sp, -16 2181; RV64-NEXT: .cfi_def_cfa_offset 16 2182; RV64-NEXT: csrr a1, vlenb 2183; RV64-NEXT: slli a1, a1, 3 2184; RV64-NEXT: sub sp, sp, a1 2185; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 2186; RV64-NEXT: lui a1, 4080 2187; RV64-NEXT: li a2, 255 2188; RV64-NEXT: li a3, 56 2189; RV64-NEXT: lui a4, 16 2190; RV64-NEXT: li a5, 40 2191; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2192; RV64-NEXT: vand.vx v16, v8, a1, v0.t 2193; RV64-NEXT: slli a2, a2, 24 2194; RV64-NEXT: addiw a0, a4, -256 2195; RV64-NEXT: vsll.vi v16, v16, 24, v0.t 2196; RV64-NEXT: vand.vx v24, v8, a2, v0.t 2197; RV64-NEXT: vsll.vi v24, v24, 8, v0.t 2198; RV64-NEXT: vor.vv v16, v16, v24, v0.t 2199; RV64-NEXT: addi a4, sp, 16 2200; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 2201; RV64-NEXT: vsll.vx v24, v8, a3, v0.t 2202; RV64-NEXT: vand.vx v16, v8, a0, v0.t 2203; RV64-NEXT: vsll.vx v16, v16, a5, v0.t 2204; RV64-NEXT: vor.vv v16, v24, v16, v0.t 2205; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload 2206; RV64-NEXT: vor.vv v16, v16, v24, v0.t 2207; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 2208; RV64-NEXT: vsrl.vx v24, v8, a3, v0.t 2209; RV64-NEXT: vsrl.vx v16, v8, a5, v0.t 2210; RV64-NEXT: vand.vx v16, v16, a0, v0.t 2211; RV64-NEXT: vor.vv v24, v16, v24, v0.t 2212; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t 2213; RV64-NEXT: vand.vx v16, v16, a1, v0.t 2214; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t 2215; RV64-NEXT: vand.vx v8, v8, a2, v0.t 2216; RV64-NEXT: vor.vv v8, v8, v16, v0.t 2217; RV64-NEXT: vor.vv v8, v8, v24, v0.t 2218; RV64-NEXT: lui a0, 61681 2219; RV64-NEXT: lui a1, 209715 2220; RV64-NEXT: lui a2, 349525 2221; RV64-NEXT: addiw a0, a0, -241 2222; RV64-NEXT: addiw a1, a1, 819 2223; RV64-NEXT: addiw a2, a2, 1365 2224; RV64-NEXT: slli a3, a0, 32 2225; RV64-NEXT: slli a4, a1, 32 2226; RV64-NEXT: add a0, a0, a3 2227; RV64-NEXT: slli a3, a2, 32 2228; RV64-NEXT: add a1, a1, a4 2229; RV64-NEXT: add a2, a2, a3 2230; RV64-NEXT: addi a3, sp, 16 2231; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 2232; RV64-NEXT: vor.vv v8, v16, v8, v0.t 2233; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 2234; RV64-NEXT: vand.vx v16, v16, a0, v0.t 2235; RV64-NEXT: vand.vx v8, v8, a0, v0.t 2236; RV64-NEXT: vsll.vi v8, v8, 4, v0.t 2237; RV64-NEXT: vor.vv v8, v16, v8, v0.t 2238; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t 2239; RV64-NEXT: vand.vx v16, v16, a1, v0.t 2240; RV64-NEXT: vand.vx v8, v8, a1, v0.t 2241; RV64-NEXT: vsll.vi v8, v8, 2, v0.t 2242; RV64-NEXT: vor.vv v8, v16, v8, v0.t 2243; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 2244; RV64-NEXT: vand.vx v16, v16, a2, v0.t 2245; RV64-NEXT: vand.vx v8, v8, a2, v0.t 2246; RV64-NEXT: vsll.vi v8, v8, 1, v0.t 2247; RV64-NEXT: vor.vv v8, v16, v8, v0.t 2248; RV64-NEXT: csrr a0, vlenb 2249; RV64-NEXT: slli a0, a0, 3 2250; RV64-NEXT: add sp, sp, a0 2251; RV64-NEXT: .cfi_def_cfa sp, 16 2252; RV64-NEXT: addi sp, sp, 16 2253; RV64-NEXT: .cfi_def_cfa_offset 0 2254; RV64-NEXT: ret 2255 %v = call <16 x i64> @llvm.vp.bitreverse.v16i64(<16 x i64> %va, <16 x i1> %m, i32 %evl) 2256 ret <16 x i64> %v 2257} 2258 2259define <16 x i64> @vp_bitreverse_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { 2260; RV32-LABEL: vp_bitreverse_v16i64_unmasked: 2261; RV32: # %bb.0: 2262; RV32-NEXT: addi sp, sp, -48 2263; RV32-NEXT: .cfi_def_cfa_offset 48 2264; RV32-NEXT: csrr a1, vlenb 2265; RV32-NEXT: slli a1, a1, 4 2266; RV32-NEXT: sub sp, sp, a1 2267; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb 2268; RV32-NEXT: lui a1, 1044480 2269; RV32-NEXT: lui a2, 61681 2270; RV32-NEXT: lui a3, 209715 2271; RV32-NEXT: lui a4, 349525 2272; RV32-NEXT: li a5, 56 2273; RV32-NEXT: lui a6, 16 2274; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2275; RV32-NEXT: vsll.vx v16, v8, a5 2276; RV32-NEXT: vsrl.vx v24, v8, a5 2277; RV32-NEXT: li a5, 40 2278; RV32-NEXT: sw a1, 16(sp) 2279; RV32-NEXT: sw zero, 20(sp) 2280; RV32-NEXT: addi a1, a2, -241 2281; RV32-NEXT: sw a1, 40(sp) 2282; RV32-NEXT: sw a1, 44(sp) 2283; RV32-NEXT: lui a1, 4080 2284; RV32-NEXT: addi a2, a3, 819 2285; RV32-NEXT: sw a2, 32(sp) 2286; RV32-NEXT: sw a2, 36(sp) 2287; RV32-NEXT: addi a2, sp, 16 2288; RV32-NEXT: addi a3, a4, 1365 2289; RV32-NEXT: addi a4, a6, -256 2290; RV32-NEXT: vsrl.vx v0, v8, a5 2291; RV32-NEXT: sw a3, 24(sp) 2292; RV32-NEXT: sw a3, 28(sp) 2293; RV32-NEXT: vand.vx v0, v0, a4 2294; RV32-NEXT: vor.vv v24, v0, v24 2295; RV32-NEXT: addi a3, sp, 48 2296; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill 2297; RV32-NEXT: vand.vx v0, v8, a4 2298; RV32-NEXT: vsll.vx v0, v0, a5 2299; RV32-NEXT: vor.vv v16, v16, v0 2300; RV32-NEXT: csrr a3, vlenb 2301; RV32-NEXT: slli a3, a3, 3 2302; RV32-NEXT: add a3, sp, a3 2303; RV32-NEXT: addi a3, a3, 48 2304; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 2305; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2306; RV32-NEXT: vlse64.v v0, (a2), zero 2307; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2308; RV32-NEXT: vsrl.vi v16, v8, 24 2309; RV32-NEXT: vand.vx v16, v16, a1 2310; RV32-NEXT: vsrl.vi v24, v8, 8 2311; RV32-NEXT: vand.vv v24, v24, v0 2312; RV32-NEXT: vor.vv v16, v24, v16 2313; RV32-NEXT: vand.vv v24, v8, v0 2314; RV32-NEXT: vand.vx v8, v8, a1 2315; RV32-NEXT: vsll.vi v8, v8, 24 2316; RV32-NEXT: vsll.vi v24, v24, 8 2317; RV32-NEXT: vor.vv v0, v8, v24 2318; RV32-NEXT: addi a1, sp, 48 2319; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 2320; RV32-NEXT: vor.vv v8, v16, v8 2321; RV32-NEXT: addi a1, sp, 40 2322; RV32-NEXT: addi a2, sp, 32 2323; RV32-NEXT: csrr a3, vlenb 2324; RV32-NEXT: slli a3, a3, 3 2325; RV32-NEXT: add a3, sp, a3 2326; RV32-NEXT: addi a3, a3, 48 2327; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 2328; RV32-NEXT: vor.vv v24, v16, v0 2329; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2330; RV32-NEXT: vlse64.v v16, (a1), zero 2331; RV32-NEXT: addi a1, sp, 24 2332; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2333; RV32-NEXT: vor.vv v8, v24, v8 2334; RV32-NEXT: vsrl.vi v24, v8, 4 2335; RV32-NEXT: vand.vv v8, v8, v16 2336; RV32-NEXT: vand.vv v16, v24, v16 2337; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2338; RV32-NEXT: vlse64.v v24, (a2), zero 2339; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2340; RV32-NEXT: vsll.vi v8, v8, 4 2341; RV32-NEXT: vor.vv v8, v16, v8 2342; RV32-NEXT: vsrl.vi v16, v8, 2 2343; RV32-NEXT: vand.vv v8, v8, v24 2344; RV32-NEXT: vand.vv v16, v16, v24 2345; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2346; RV32-NEXT: vlse64.v v24, (a1), zero 2347; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2348; RV32-NEXT: vsll.vi v8, v8, 2 2349; RV32-NEXT: vor.vv v8, v16, v8 2350; RV32-NEXT: vsrl.vi v16, v8, 1 2351; RV32-NEXT: vand.vv v8, v8, v24 2352; RV32-NEXT: vand.vv v16, v16, v24 2353; RV32-NEXT: vadd.vv v8, v8, v8 2354; RV32-NEXT: vor.vv v8, v16, v8 2355; RV32-NEXT: csrr a0, vlenb 2356; RV32-NEXT: slli a0, a0, 4 2357; RV32-NEXT: add sp, sp, a0 2358; RV32-NEXT: .cfi_def_cfa sp, 48 2359; RV32-NEXT: addi sp, sp, 48 2360; RV32-NEXT: .cfi_def_cfa_offset 0 2361; RV32-NEXT: ret 2362; 2363; RV64-LABEL: vp_bitreverse_v16i64_unmasked: 2364; RV64: # %bb.0: 2365; RV64-NEXT: addi sp, sp, -16 2366; RV64-NEXT: .cfi_def_cfa_offset 16 2367; RV64-NEXT: csrr a1, vlenb 2368; RV64-NEXT: slli a1, a1, 3 2369; RV64-NEXT: sub sp, sp, a1 2370; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 2371; RV64-NEXT: lui a1, 4080 2372; RV64-NEXT: li a2, 255 2373; RV64-NEXT: li a3, 56 2374; RV64-NEXT: lui a4, 16 2375; RV64-NEXT: li a5, 40 2376; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2377; RV64-NEXT: vsrl.vi v24, v8, 24 2378; RV64-NEXT: addiw a0, a4, -256 2379; RV64-NEXT: vsrl.vx v16, v8, a3 2380; RV64-NEXT: vsrl.vx v0, v8, a5 2381; RV64-NEXT: vand.vx v0, v0, a0 2382; RV64-NEXT: vor.vv v16, v0, v16 2383; RV64-NEXT: addi a4, sp, 16 2384; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 2385; RV64-NEXT: vsrl.vi v0, v8, 8 2386; RV64-NEXT: slli a2, a2, 24 2387; RV64-NEXT: vand.vx v24, v24, a1 2388; RV64-NEXT: vand.vx v0, v0, a2 2389; RV64-NEXT: vor.vv v24, v0, v24 2390; RV64-NEXT: vand.vx v0, v8, a1 2391; RV64-NEXT: vsll.vi v0, v0, 24 2392; RV64-NEXT: vand.vx v16, v8, a2 2393; RV64-NEXT: vsll.vi v16, v16, 8 2394; RV64-NEXT: vor.vv v0, v0, v16 2395; RV64-NEXT: vsll.vx v16, v8, a3 2396; RV64-NEXT: vand.vx v8, v8, a0 2397; RV64-NEXT: vsll.vx v8, v8, a5 2398; RV64-NEXT: vor.vv v8, v16, v8 2399; RV64-NEXT: lui a0, 61681 2400; RV64-NEXT: lui a1, 209715 2401; RV64-NEXT: lui a2, 349525 2402; RV64-NEXT: addiw a0, a0, -241 2403; RV64-NEXT: addiw a1, a1, 819 2404; RV64-NEXT: addiw a2, a2, 1365 2405; RV64-NEXT: slli a3, a0, 32 2406; RV64-NEXT: slli a4, a1, 32 2407; RV64-NEXT: add a0, a0, a3 2408; RV64-NEXT: slli a3, a2, 32 2409; RV64-NEXT: add a1, a1, a4 2410; RV64-NEXT: add a2, a2, a3 2411; RV64-NEXT: addi a3, sp, 16 2412; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 2413; RV64-NEXT: vor.vv v16, v24, v16 2414; RV64-NEXT: vor.vv v8, v8, v0 2415; RV64-NEXT: vor.vv v8, v8, v16 2416; RV64-NEXT: vsrl.vi v16, v8, 4 2417; RV64-NEXT: vand.vx v8, v8, a0 2418; RV64-NEXT: vand.vx v16, v16, a0 2419; RV64-NEXT: vsll.vi v8, v8, 4 2420; RV64-NEXT: vor.vv v8, v16, v8 2421; RV64-NEXT: vsrl.vi v16, v8, 2 2422; RV64-NEXT: vand.vx v8, v8, a1 2423; RV64-NEXT: vand.vx v16, v16, a1 2424; RV64-NEXT: vsll.vi v8, v8, 2 2425; RV64-NEXT: vor.vv v8, v16, v8 2426; RV64-NEXT: vsrl.vi v16, v8, 1 2427; RV64-NEXT: vand.vx v8, v8, a2 2428; RV64-NEXT: vand.vx v16, v16, a2 2429; RV64-NEXT: vadd.vv v8, v8, v8 2430; RV64-NEXT: vor.vv v8, v16, v8 2431; RV64-NEXT: csrr a0, vlenb 2432; RV64-NEXT: slli a0, a0, 3 2433; RV64-NEXT: add sp, sp, a0 2434; RV64-NEXT: .cfi_def_cfa sp, 16 2435; RV64-NEXT: addi sp, sp, 16 2436; RV64-NEXT: .cfi_def_cfa_offset 0 2437; RV64-NEXT: ret 2438 %v = call <16 x i64> @llvm.vp.bitreverse.v16i64(<16 x i64> %va, <16 x i1> splat (i1 true), i32 %evl) 2439 ret <16 x i64> %v 2440} 2441 2442declare <128 x i16> @llvm.vp.bitreverse.v128i16(<128 x i16>, <128 x i1>, i32) 2443 2444define <128 x i16> @vp_bitreverse_v128i16(<128 x i16> %va, <128 x i1> %m, i32 zeroext %evl) { 2445; CHECK-LABEL: vp_bitreverse_v128i16: 2446; CHECK: # %bb.0: 2447; CHECK-NEXT: addi sp, sp, -16 2448; CHECK-NEXT: .cfi_def_cfa_offset 16 2449; CHECK-NEXT: csrr a1, vlenb 2450; CHECK-NEXT: slli a1, a1, 4 2451; CHECK-NEXT: sub sp, sp, a1 2452; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 2453; CHECK-NEXT: csrr a1, vlenb 2454; CHECK-NEXT: slli a1, a1, 3 2455; CHECK-NEXT: add a1, sp, a1 2456; CHECK-NEXT: addi a1, a1, 16 2457; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 2458; CHECK-NEXT: li a2, 64 2459; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma 2460; CHECK-NEXT: vslidedown.vi v24, v0, 8 2461; CHECK-NEXT: mv a1, a0 2462; CHECK-NEXT: bltu a0, a2, .LBB34_2 2463; CHECK-NEXT: # %bb.1: 2464; CHECK-NEXT: li a1, 64 2465; CHECK-NEXT: .LBB34_2: 2466; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma 2467; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t 2468; CHECK-NEXT: lui a1, 1 2469; CHECK-NEXT: lui a2, 3 2470; CHECK-NEXT: addi a3, a0, -64 2471; CHECK-NEXT: sltu a0, a0, a3 2472; CHECK-NEXT: addi a0, a0, -1 2473; CHECK-NEXT: and a3, a0, a3 2474; CHECK-NEXT: lui a0, 5 2475; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t 2476; CHECK-NEXT: addi a4, a1, -241 2477; CHECK-NEXT: addi a1, a2, 819 2478; CHECK-NEXT: addi a0, a0, 1365 2479; CHECK-NEXT: vor.vv v8, v8, v16, v0.t 2480; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t 2481; CHECK-NEXT: vand.vx v16, v16, a4, v0.t 2482; CHECK-NEXT: vand.vx v8, v8, a4, v0.t 2483; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 2484; CHECK-NEXT: vor.vv v8, v16, v8, v0.t 2485; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t 2486; CHECK-NEXT: vand.vx v16, v16, a1, v0.t 2487; CHECK-NEXT: vand.vx v8, v8, a1, v0.t 2488; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 2489; CHECK-NEXT: vor.vv v8, v16, v8, v0.t 2490; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t 2491; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 2492; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2493; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 2494; CHECK-NEXT: vor.vv v8, v16, v8, v0.t 2495; CHECK-NEXT: addi a2, sp, 16 2496; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 2497; CHECK-NEXT: vmv1r.v v0, v24 2498; CHECK-NEXT: csrr a2, vlenb 2499; CHECK-NEXT: slli a2, a2, 3 2500; CHECK-NEXT: add a2, sp, a2 2501; CHECK-NEXT: addi a2, a2, 16 2502; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 2503; CHECK-NEXT: vsetvli zero, a3, e16, m8, ta, ma 2504; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t 2505; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t 2506; CHECK-NEXT: vor.vv v8, v8, v16, v0.t 2507; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t 2508; CHECK-NEXT: vand.vx v16, v16, a4, v0.t 2509; CHECK-NEXT: vand.vx v8, v8, a4, v0.t 2510; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t 2511; CHECK-NEXT: vor.vv v8, v16, v8, v0.t 2512; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t 2513; CHECK-NEXT: vand.vx v16, v16, a1, v0.t 2514; CHECK-NEXT: vand.vx v8, v8, a1, v0.t 2515; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t 2516; CHECK-NEXT: vor.vv v8, v16, v8, v0.t 2517; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t 2518; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 2519; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2520; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t 2521; CHECK-NEXT: vor.vv v16, v16, v8, v0.t 2522; CHECK-NEXT: addi a0, sp, 16 2523; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2524; CHECK-NEXT: csrr a0, vlenb 2525; CHECK-NEXT: slli a0, a0, 4 2526; CHECK-NEXT: add sp, sp, a0 2527; CHECK-NEXT: .cfi_def_cfa sp, 16 2528; CHECK-NEXT: addi sp, sp, 16 2529; CHECK-NEXT: .cfi_def_cfa_offset 0 2530; CHECK-NEXT: ret 2531 %v = call <128 x i16> @llvm.vp.bitreverse.v128i16(<128 x i16> %va, <128 x i1> %m, i32 %evl) 2532 ret <128 x i16> %v 2533} 2534 2535define <128 x i16> @vp_bitreverse_v128i16_unmasked(<128 x i16> %va, i32 zeroext %evl) { 2536; CHECK-LABEL: vp_bitreverse_v128i16_unmasked: 2537; CHECK: # %bb.0: 2538; CHECK-NEXT: li a2, 64 2539; CHECK-NEXT: mv a1, a0 2540; CHECK-NEXT: bltu a0, a2, .LBB35_2 2541; CHECK-NEXT: # %bb.1: 2542; CHECK-NEXT: li a1, 64 2543; CHECK-NEXT: .LBB35_2: 2544; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma 2545; CHECK-NEXT: vsrl.vi v24, v8, 8 2546; CHECK-NEXT: vsll.vi v8, v8, 8 2547; CHECK-NEXT: lui a2, 1 2548; CHECK-NEXT: lui a3, 3 2549; CHECK-NEXT: addi a4, a0, -64 2550; CHECK-NEXT: sltu a0, a0, a4 2551; CHECK-NEXT: addi a0, a0, -1 2552; CHECK-NEXT: and a0, a0, a4 2553; CHECK-NEXT: lui a4, 5 2554; CHECK-NEXT: vor.vv v8, v8, v24 2555; CHECK-NEXT: addi a2, a2, -241 2556; CHECK-NEXT: addi a3, a3, 819 2557; CHECK-NEXT: addi a4, a4, 1365 2558; CHECK-NEXT: vsrl.vi v24, v8, 4 2559; CHECK-NEXT: vand.vx v8, v8, a2 2560; CHECK-NEXT: vand.vx v24, v24, a2 2561; CHECK-NEXT: vsll.vi v8, v8, 4 2562; CHECK-NEXT: vor.vv v8, v24, v8 2563; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 2564; CHECK-NEXT: vsrl.vi v24, v16, 8 2565; CHECK-NEXT: vsll.vi v16, v16, 8 2566; CHECK-NEXT: vor.vv v16, v16, v24 2567; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma 2568; CHECK-NEXT: vsrl.vi v24, v8, 2 2569; CHECK-NEXT: vand.vx v8, v8, a3 2570; CHECK-NEXT: vand.vx v24, v24, a3 2571; CHECK-NEXT: vsll.vi v8, v8, 2 2572; CHECK-NEXT: vor.vv v8, v24, v8 2573; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 2574; CHECK-NEXT: vsrl.vi v24, v16, 4 2575; CHECK-NEXT: vand.vx v16, v16, a2 2576; CHECK-NEXT: vand.vx v24, v24, a2 2577; CHECK-NEXT: vsll.vi v16, v16, 4 2578; CHECK-NEXT: vor.vv v16, v24, v16 2579; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma 2580; CHECK-NEXT: vsrl.vi v24, v8, 1 2581; CHECK-NEXT: vand.vx v8, v8, a4 2582; CHECK-NEXT: vand.vx v24, v24, a4 2583; CHECK-NEXT: vadd.vv v8, v8, v8 2584; CHECK-NEXT: vor.vv v8, v24, v8 2585; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 2586; CHECK-NEXT: vsrl.vi v24, v16, 2 2587; CHECK-NEXT: vand.vx v16, v16, a3 2588; CHECK-NEXT: vand.vx v24, v24, a3 2589; CHECK-NEXT: vsll.vi v16, v16, 2 2590; CHECK-NEXT: vor.vv v16, v24, v16 2591; CHECK-NEXT: vsrl.vi v24, v16, 1 2592; CHECK-NEXT: vand.vx v16, v16, a4 2593; CHECK-NEXT: vand.vx v24, v24, a4 2594; CHECK-NEXT: vadd.vv v16, v16, v16 2595; CHECK-NEXT: vor.vv v16, v24, v16 2596; CHECK-NEXT: ret 2597 %v = call <128 x i16> @llvm.vp.bitreverse.v128i16(<128 x i16> %va, <128 x i1> splat (i1 true), i32 %evl) 2598 ret <128 x i16> %v 2599} 2600