1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NO-ZVBB,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NO-ZVBB,RV64 4; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVBB,RV32-ZVBB 5; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVBB,RV64-ZVBB 6 7define <2 x i1> @reverse_v2i1(<2 x i1> %a) { 8; NO-ZVBB-LABEL: reverse_v2i1: 9; NO-ZVBB: # %bb.0: 10; NO-ZVBB-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 11; NO-ZVBB-NEXT: vmv.v.i v8, 0 12; NO-ZVBB-NEXT: vmerge.vim v8, v8, 1, v0 13; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1 14; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1 15; NO-ZVBB-NEXT: vmsne.vi v0, v9, 0 16; NO-ZVBB-NEXT: ret 17; 18; ZVBB-LABEL: reverse_v2i1: 19; ZVBB: # %bb.0: 20; ZVBB-NEXT: vsetivli zero, 1, e8, mf8, ta, ma 21; ZVBB-NEXT: vbrev.v v8, v0 22; ZVBB-NEXT: vsrl.vi v0, v8, 6 23; ZVBB-NEXT: ret 24 %res = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> <i32 1, i32 0> 25 ret <2 x i1> %res 26} 27 28define <4 x i1> @reverse_v4i1(<4 x i1> %a) { 29; NO-ZVBB-LABEL: reverse_v4i1: 30; NO-ZVBB: # %bb.0: 31; NO-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 32; NO-ZVBB-NEXT: vmv.v.i v8, 0 33; NO-ZVBB-NEXT: vmerge.vim v8, v8, 1, v0 34; NO-ZVBB-NEXT: vid.v v9 35; NO-ZVBB-NEXT: vrsub.vi v9, v9, 3 36; NO-ZVBB-NEXT: vrgather.vv v10, v8, v9 37; NO-ZVBB-NEXT: vmsne.vi v0, v10, 0 38; NO-ZVBB-NEXT: ret 39; 40; ZVBB-LABEL: reverse_v4i1: 41; ZVBB: # %bb.0: 42; ZVBB-NEXT: vsetivli zero, 1, e8, mf8, ta, ma 43; ZVBB-NEXT: vbrev.v v8, v0 44; ZVBB-NEXT: vsrl.vi v0, v8, 4 45; ZVBB-NEXT: ret 46 %res = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 47 ret <4 x i1> %res 48} 49 50define <8 x i1> @reverse_v8i1(<8 x i1> %a) { 51; NO-ZVBB-LABEL: reverse_v8i1: 52; NO-ZVBB: # %bb.0: 53; NO-ZVBB-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 54; NO-ZVBB-NEXT: vmv.v.i v8, 0 55; NO-ZVBB-NEXT: vmerge.vim v8, v8, 1, v0 56; NO-ZVBB-NEXT: vid.v v9 57; NO-ZVBB-NEXT: vrsub.vi v9, v9, 7 58; NO-ZVBB-NEXT: vrgather.vv v10, v8, v9 59; NO-ZVBB-NEXT: vmsne.vi v0, v10, 0 60; NO-ZVBB-NEXT: ret 61; 62; ZVBB-LABEL: reverse_v8i1: 63; ZVBB: # %bb.0: 64; ZVBB-NEXT: vsetivli zero, 1, e8, mf8, ta, ma 65; ZVBB-NEXT: vbrev.v v0, v0 66; ZVBB-NEXT: ret 67 %res = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 68 ret <8 x i1> %res 69} 70 71define <16 x i1> @reverse_v16i1(<16 x i1> %a) { 72; NO-ZVBB-LABEL: reverse_v16i1: 73; NO-ZVBB: # %bb.0: 74; NO-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma 75; NO-ZVBB-NEXT: vmv.v.i v8, 0 76; NO-ZVBB-NEXT: vmerge.vim v8, v8, 1, v0 77; NO-ZVBB-NEXT: vid.v v9 78; NO-ZVBB-NEXT: vrsub.vi v9, v9, 15 79; NO-ZVBB-NEXT: vrgather.vv v10, v8, v9 80; NO-ZVBB-NEXT: vmsne.vi v0, v10, 0 81; NO-ZVBB-NEXT: ret 82; 83; ZVBB-LABEL: reverse_v16i1: 84; ZVBB: # %bb.0: 85; ZVBB-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 86; ZVBB-NEXT: vbrev.v v0, v0 87; ZVBB-NEXT: ret 88 %res = shufflevector <16 x i1> %a, <16 x i1> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 89 ret <16 x i1> %res 90} 91 92define <32 x i1> @reverse_v32i1(<32 x i1> %a) { 93; NO-ZVBB-LABEL: reverse_v32i1: 94; NO-ZVBB: # %bb.0: 95; NO-ZVBB-NEXT: li a0, 32 96; NO-ZVBB-NEXT: csrr a1, vlenb 97; NO-ZVBB-NEXT: vsetvli a2, zero, e16, m2, ta, ma 98; NO-ZVBB-NEXT: vid.v v8 99; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma 100; NO-ZVBB-NEXT: vmv.v.i v10, 0 101; NO-ZVBB-NEXT: addi a2, a1, -1 102; NO-ZVBB-NEXT: slli a1, a1, 1 103; NO-ZVBB-NEXT: vmerge.vim v10, v10, 1, v0 104; NO-ZVBB-NEXT: vsetvli a3, zero, e16, m2, ta, ma 105; NO-ZVBB-NEXT: vrsub.vx v8, v8, a2 106; NO-ZVBB-NEXT: vsetvli zero, zero, e8, m1, ta, ma 107; NO-ZVBB-NEXT: vrgatherei16.vv v13, v10, v8 108; NO-ZVBB-NEXT: vrgatherei16.vv v12, v11, v8 109; NO-ZVBB-NEXT: addi a1, a1, -32 110; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma 111; NO-ZVBB-NEXT: vslidedown.vx v8, v12, a1 112; NO-ZVBB-NEXT: vmsne.vi v0, v8, 0 113; NO-ZVBB-NEXT: ret 114; 115; ZVBB-LABEL: reverse_v32i1: 116; ZVBB: # %bb.0: 117; ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma 118; ZVBB-NEXT: vbrev.v v0, v0 119; ZVBB-NEXT: ret 120 %res = shufflevector <32 x i1> %a, <32 x i1> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 121 ret <32 x i1> %res 122} 123 124define <64 x i1> @reverse_v64i1(<64 x i1> %a) { 125; NO-ZVBB-LABEL: reverse_v64i1: 126; NO-ZVBB: # %bb.0: 127; NO-ZVBB-NEXT: li a0, 64 128; NO-ZVBB-NEXT: csrr a1, vlenb 129; NO-ZVBB-NEXT: vsetvli a2, zero, e16, m2, ta, ma 130; NO-ZVBB-NEXT: vid.v v12 131; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma 132; NO-ZVBB-NEXT: vmv.v.i v8, 0 133; NO-ZVBB-NEXT: addi a2, a1, -1 134; NO-ZVBB-NEXT: slli a1, a1, 2 135; NO-ZVBB-NEXT: vmerge.vim v8, v8, 1, v0 136; NO-ZVBB-NEXT: vsetvli a3, zero, e16, m2, ta, ma 137; NO-ZVBB-NEXT: vrsub.vx v12, v12, a2 138; NO-ZVBB-NEXT: vsetvli zero, zero, e8, m1, ta, ma 139; NO-ZVBB-NEXT: vrgatherei16.vv v19, v8, v12 140; NO-ZVBB-NEXT: vrgatherei16.vv v18, v9, v12 141; NO-ZVBB-NEXT: vrgatherei16.vv v17, v10, v12 142; NO-ZVBB-NEXT: vrgatherei16.vv v16, v11, v12 143; NO-ZVBB-NEXT: addi a1, a1, -64 144; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma 145; NO-ZVBB-NEXT: vslidedown.vx v8, v16, a1 146; NO-ZVBB-NEXT: vmsne.vi v0, v8, 0 147; NO-ZVBB-NEXT: ret 148; 149; ZVBB-LABEL: reverse_v64i1: 150; ZVBB: # %bb.0: 151; ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma 152; ZVBB-NEXT: vbrev.v v0, v0 153; ZVBB-NEXT: ret 154 %res = shufflevector <64 x i1> %a, <64 x i1> poison, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 155 ret <64 x i1> %res 156} 157 158define <128 x i1> @reverse_v128i1(<128 x i1> %a) { 159; CHECK-LABEL: reverse_v128i1: 160; CHECK: # %bb.0: 161; CHECK-NEXT: li a0, 128 162; CHECK-NEXT: csrr a1, vlenb 163; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma 164; CHECK-NEXT: vid.v v8 165; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 166; CHECK-NEXT: vmv.v.i v16, 0 167; CHECK-NEXT: addi a2, a1, -1 168; CHECK-NEXT: slli a1, a1, 3 169; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 170; CHECK-NEXT: vsetvli a3, zero, e16, m2, ta, ma 171; CHECK-NEXT: vrsub.vx v24, v8, a2 172; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma 173; CHECK-NEXT: vrgatherei16.vv v15, v16, v24 174; CHECK-NEXT: vrgatherei16.vv v14, v17, v24 175; CHECK-NEXT: vrgatherei16.vv v13, v18, v24 176; CHECK-NEXT: vrgatherei16.vv v12, v19, v24 177; CHECK-NEXT: vrgatherei16.vv v11, v20, v24 178; CHECK-NEXT: vrgatherei16.vv v10, v21, v24 179; CHECK-NEXT: vrgatherei16.vv v9, v22, v24 180; CHECK-NEXT: vrgatherei16.vv v8, v23, v24 181; CHECK-NEXT: addi a1, a1, -128 182; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 183; CHECK-NEXT: vslidedown.vx v8, v8, a1 184; CHECK-NEXT: vmsne.vi v0, v8, 0 185; CHECK-NEXT: ret 186 %res = shufflevector <128 x i1> %a, <128 x i1> poison, <128 x i32> <i32 127, i32 126, i32 125, i32 124, i32 123, i32 122, i32 121, i32 120, i32 119, i32 118, i32 117, i32 116, i32 115, i32 114, i32 113, i32 112, i32 111, i32 110, i32 109, i32 108, i32 107, i32 106, i32 105, i32 104, i32 103, i32 102, i32 101, i32 100, i32 99, i32 98, i32 97, i32 96, i32 95, i32 94, i32 93, i32 92, i32 91, i32 90, i32 89, i32 88, i32 87, i32 86, i32 85, i32 84, i32 83, i32 82, i32 81, i32 80, i32 79, i32 78, i32 77, i32 76, i32 75, i32 74, i32 73, i32 72, i32 71, i32 70, i32 69, i32 68, i32 67, i32 66, i32 65, i32 64, i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 187 ret <128 x i1> %res 188} 189 190define <1 x i8> @reverse_v1i8(<1 x i8> %a) { 191; CHECK-LABEL: reverse_v1i8: 192; CHECK: # %bb.0: 193; CHECK-NEXT: ret 194 %res = shufflevector <1 x i8> %a, <1 x i8> poison, <1 x i32> <i32 0> 195 ret <1 x i8> %res 196} 197 198define <2 x i8> @reverse_v2i8(<2 x i8> %a) { 199; NO-ZVBB-LABEL: reverse_v2i8: 200; NO-ZVBB: # %bb.0: 201; NO-ZVBB-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 202; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1 203; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1 204; NO-ZVBB-NEXT: vmv1r.v v8, v9 205; NO-ZVBB-NEXT: ret 206; 207; ZVBB-LABEL: reverse_v2i8: 208; ZVBB: # %bb.0: 209; ZVBB-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 210; ZVBB-NEXT: vrev8.v v8, v8 211; ZVBB-NEXT: ret 212 %res = shufflevector <2 x i8> %a, <2 x i8> poison, <2 x i32> <i32 1, i32 0> 213 ret <2 x i8> %res 214} 215 216define <4 x i8> @reverse_v4i8(<4 x i8> %a) { 217; CHECK-LABEL: reverse_v4i8: 218; CHECK: # %bb.0: 219; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 220; CHECK-NEXT: vid.v v9 221; CHECK-NEXT: vrsub.vi v10, v9, 3 222; CHECK-NEXT: vrgather.vv v9, v8, v10 223; CHECK-NEXT: vmv1r.v v8, v9 224; CHECK-NEXT: ret 225 %res = shufflevector <4 x i8> %a, <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 226 ret <4 x i8> %res 227} 228 229define <8 x i8> @reverse_v8i8(<8 x i8> %a) { 230; CHECK-LABEL: reverse_v8i8: 231; CHECK: # %bb.0: 232; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 233; CHECK-NEXT: vid.v v9 234; CHECK-NEXT: vrsub.vi v10, v9, 7 235; CHECK-NEXT: vrgather.vv v9, v8, v10 236; CHECK-NEXT: vmv1r.v v8, v9 237; CHECK-NEXT: ret 238 %res = shufflevector <8 x i8> %a, <8 x i8> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 239 ret <8 x i8> %res 240} 241 242define <16 x i8> @reverse_v16i8(<16 x i8> %a) { 243; CHECK-LABEL: reverse_v16i8: 244; CHECK: # %bb.0: 245; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 246; CHECK-NEXT: vid.v v9 247; CHECK-NEXT: vrsub.vi v10, v9, 15 248; CHECK-NEXT: vrgather.vv v9, v8, v10 249; CHECK-NEXT: vmv.v.v v8, v9 250; CHECK-NEXT: ret 251 %res = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 252 ret <16 x i8> %res 253} 254 255define <32 x i8> @reverse_v32i8(<32 x i8> %a) { 256; CHECK-LABEL: reverse_v32i8: 257; CHECK: # %bb.0: 258; CHECK-NEXT: csrr a0, vlenb 259; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma 260; CHECK-NEXT: vid.v v10 261; CHECK-NEXT: addi a1, a0, -1 262; CHECK-NEXT: slli a0, a0, 1 263; CHECK-NEXT: vrsub.vx v10, v10, a1 264; CHECK-NEXT: addi a0, a0, -32 265; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma 266; CHECK-NEXT: vrgatherei16.vv v13, v8, v10 267; CHECK-NEXT: vrgatherei16.vv v12, v9, v10 268; CHECK-NEXT: li a1, 32 269; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma 270; CHECK-NEXT: vslidedown.vx v8, v12, a0 271; CHECK-NEXT: ret 272 %res = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 273 ret <32 x i8> %res 274} 275 276define <64 x i8> @reverse_v64i8(<64 x i8> %a) { 277; CHECK-LABEL: reverse_v64i8: 278; CHECK: # %bb.0: 279; CHECK-NEXT: csrr a0, vlenb 280; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma 281; CHECK-NEXT: vid.v v12 282; CHECK-NEXT: addi a1, a0, -1 283; CHECK-NEXT: slli a0, a0, 2 284; CHECK-NEXT: vrsub.vx v12, v12, a1 285; CHECK-NEXT: addi a0, a0, -64 286; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma 287; CHECK-NEXT: vrgatherei16.vv v19, v8, v12 288; CHECK-NEXT: vrgatherei16.vv v18, v9, v12 289; CHECK-NEXT: vrgatherei16.vv v17, v10, v12 290; CHECK-NEXT: vrgatherei16.vv v16, v11, v12 291; CHECK-NEXT: li a1, 64 292; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma 293; CHECK-NEXT: vslidedown.vx v8, v16, a0 294; CHECK-NEXT: ret 295 %res = shufflevector <64 x i8> %a, <64 x i8> poison, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 296 ret <64 x i8> %res 297} 298 299define <1 x i16> @reverse_v1i16(<1 x i16> %a) { 300; CHECK-LABEL: reverse_v1i16: 301; CHECK: # %bb.0: 302; CHECK-NEXT: ret 303 %res = shufflevector <1 x i16> %a, <1 x i16> poison, <1 x i32> <i32 0> 304 ret <1 x i16> %res 305} 306 307define <2 x i16> @reverse_v2i16(<2 x i16> %a) { 308; NO-ZVBB-LABEL: reverse_v2i16: 309; NO-ZVBB: # %bb.0: 310; NO-ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 311; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1 312; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1 313; NO-ZVBB-NEXT: vmv1r.v v8, v9 314; NO-ZVBB-NEXT: ret 315; 316; ZVBB-LABEL: reverse_v2i16: 317; ZVBB: # %bb.0: 318; ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma 319; ZVBB-NEXT: vror.vi v8, v8, 16 320; ZVBB-NEXT: ret 321 %res = shufflevector <2 x i16> %a, <2 x i16> poison, <2 x i32> <i32 1, i32 0> 322 ret <2 x i16> %res 323} 324 325define <4 x i16> @reverse_v4i16(<4 x i16> %a) { 326; CHECK-LABEL: reverse_v4i16: 327; CHECK: # %bb.0: 328; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 329; CHECK-NEXT: vid.v v9 330; CHECK-NEXT: vrsub.vi v10, v9, 3 331; CHECK-NEXT: vrgather.vv v9, v8, v10 332; CHECK-NEXT: vmv1r.v v8, v9 333; CHECK-NEXT: ret 334 %res = shufflevector <4 x i16> %a, <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 335 ret <4 x i16> %res 336} 337 338define <8 x i16> @reverse_v8i16(<8 x i16> %a) { 339; CHECK-LABEL: reverse_v8i16: 340; CHECK: # %bb.0: 341; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 342; CHECK-NEXT: vid.v v9 343; CHECK-NEXT: vrsub.vi v10, v9, 7 344; CHECK-NEXT: vrgather.vv v9, v8, v10 345; CHECK-NEXT: vmv.v.v v8, v9 346; CHECK-NEXT: ret 347 %res = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 348 ret <8 x i16> %res 349} 350 351define <16 x i16> @reverse_v16i16(<16 x i16> %a) { 352; CHECK-LABEL: reverse_v16i16: 353; CHECK: # %bb.0: 354; CHECK-NEXT: csrr a0, vlenb 355; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma 356; CHECK-NEXT: vid.v v10 357; CHECK-NEXT: srli a1, a0, 1 358; CHECK-NEXT: addi a1, a1, -1 359; CHECK-NEXT: vrsub.vx v10, v10, a1 360; CHECK-NEXT: vrgather.vv v13, v8, v10 361; CHECK-NEXT: vrgather.vv v12, v9, v10 362; CHECK-NEXT: addi a0, a0, -16 363; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 364; CHECK-NEXT: vslidedown.vx v8, v12, a0 365; CHECK-NEXT: ret 366 %res = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 367 ret <16 x i16> %res 368} 369 370define <32 x i16> @reverse_v32i16(<32 x i16> %a) { 371; CHECK-LABEL: reverse_v32i16: 372; CHECK: # %bb.0: 373; CHECK-NEXT: csrr a0, vlenb 374; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma 375; CHECK-NEXT: vid.v v12 376; CHECK-NEXT: srli a1, a0, 1 377; CHECK-NEXT: slli a0, a0, 1 378; CHECK-NEXT: addi a1, a1, -1 379; CHECK-NEXT: addi a0, a0, -32 380; CHECK-NEXT: vrsub.vx v12, v12, a1 381; CHECK-NEXT: vrgather.vv v19, v8, v12 382; CHECK-NEXT: vrgather.vv v18, v9, v12 383; CHECK-NEXT: vrgather.vv v17, v10, v12 384; CHECK-NEXT: vrgather.vv v16, v11, v12 385; CHECK-NEXT: li a1, 32 386; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma 387; CHECK-NEXT: vslidedown.vx v8, v16, a0 388; CHECK-NEXT: ret 389 %res = shufflevector <32 x i16> %a, <32 x i16> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 390 ret <32 x i16> %res 391} 392 393define <1 x i32> @reverse_v1i32(<1 x i32> %a) { 394; CHECK-LABEL: reverse_v1i32: 395; CHECK: # %bb.0: 396; CHECK-NEXT: ret 397 %res = shufflevector <1 x i32> %a, <1 x i32> poison, <1 x i32> <i32 0> 398 ret <1 x i32> %res 399} 400 401define <2 x i32> @reverse_v2i32(<2 x i32> %a) { 402; NO-ZVBB-LABEL: reverse_v2i32: 403; NO-ZVBB: # %bb.0: 404; NO-ZVBB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 405; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1 406; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1 407; NO-ZVBB-NEXT: vmv1r.v v8, v9 408; NO-ZVBB-NEXT: ret 409; 410; ZVBB-LABEL: reverse_v2i32: 411; ZVBB: # %bb.0: 412; ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma 413; ZVBB-NEXT: vror.vi v8, v8, 32 414; ZVBB-NEXT: ret 415 %res = shufflevector <2 x i32> %a, <2 x i32> poison, <2 x i32> <i32 1, i32 0> 416 ret <2 x i32> %res 417} 418 419define <4 x i32> @reverse_v4i32(<4 x i32> %a) { 420; CHECK-LABEL: reverse_v4i32: 421; CHECK: # %bb.0: 422; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 423; CHECK-NEXT: vid.v v9 424; CHECK-NEXT: vrsub.vi v10, v9, 3 425; CHECK-NEXT: vrgather.vv v9, v8, v10 426; CHECK-NEXT: vmv.v.v v8, v9 427; CHECK-NEXT: ret 428 %res = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 429 ret <4 x i32> %res 430} 431 432define <8 x i32> @reverse_v8i32(<8 x i32> %a) { 433; CHECK-LABEL: reverse_v8i32: 434; CHECK: # %bb.0: 435; CHECK-NEXT: csrr a0, vlenb 436; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma 437; CHECK-NEXT: vid.v v10 438; CHECK-NEXT: srli a1, a0, 2 439; CHECK-NEXT: srli a0, a0, 1 440; CHECK-NEXT: addi a1, a1, -1 441; CHECK-NEXT: vrsub.vx v10, v10, a1 442; CHECK-NEXT: vrgather.vv v13, v8, v10 443; CHECK-NEXT: vrgather.vv v12, v9, v10 444; CHECK-NEXT: addi a0, a0, -8 445; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 446; CHECK-NEXT: vslidedown.vx v8, v12, a0 447; CHECK-NEXT: ret 448 %res = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 449 ret <8 x i32> %res 450} 451 452define <16 x i32> @reverse_v16i32(<16 x i32> %a) { 453; CHECK-LABEL: reverse_v16i32: 454; CHECK: # %bb.0: 455; CHECK-NEXT: csrr a0, vlenb 456; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma 457; CHECK-NEXT: vid.v v12 458; CHECK-NEXT: srli a1, a0, 2 459; CHECK-NEXT: addi a1, a1, -1 460; CHECK-NEXT: vrsub.vx v16, v12, a1 461; CHECK-NEXT: vrgather.vv v15, v8, v16 462; CHECK-NEXT: vrgather.vv v14, v9, v16 463; CHECK-NEXT: vrgather.vv v13, v10, v16 464; CHECK-NEXT: vrgather.vv v12, v11, v16 465; CHECK-NEXT: addi a0, a0, -16 466; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 467; CHECK-NEXT: vslidedown.vx v8, v12, a0 468; CHECK-NEXT: ret 469 %res = shufflevector <16 x i32> %a, <16 x i32> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 470 ret <16 x i32> %res 471} 472 473define <1 x i64> @reverse_v1i64(<1 x i64> %a) { 474; CHECK-LABEL: reverse_v1i64: 475; CHECK: # %bb.0: 476; CHECK-NEXT: ret 477 %res = shufflevector <1 x i64> %a, <1 x i64> poison, <1 x i32> <i32 0> 478 ret <1 x i64> %res 479} 480 481define <2 x i64> @reverse_v2i64(<2 x i64> %a) { 482; CHECK-LABEL: reverse_v2i64: 483; CHECK: # %bb.0: 484; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 485; CHECK-NEXT: vslidedown.vi v9, v8, 1 486; CHECK-NEXT: vslideup.vi v9, v8, 1 487; CHECK-NEXT: vmv.v.v v8, v9 488; CHECK-NEXT: ret 489 %res = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0> 490 ret <2 x i64> %res 491} 492 493define <4 x i64> @reverse_v4i64(<4 x i64> %a) { 494; CHECK-LABEL: reverse_v4i64: 495; CHECK: # %bb.0: 496; CHECK-NEXT: csrr a0, vlenb 497; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma 498; CHECK-NEXT: vid.v v10 499; CHECK-NEXT: srli a1, a0, 3 500; CHECK-NEXT: srli a0, a0, 2 501; CHECK-NEXT: addi a1, a1, -1 502; CHECK-NEXT: vrsub.vx v10, v10, a1 503; CHECK-NEXT: vrgather.vv v13, v8, v10 504; CHECK-NEXT: vrgather.vv v12, v9, v10 505; CHECK-NEXT: addi a0, a0, -4 506; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 507; CHECK-NEXT: vslidedown.vx v8, v12, a0 508; CHECK-NEXT: ret 509 %res = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 510 ret <4 x i64> %res 511} 512 513define <8 x i64> @reverse_v8i64(<8 x i64> %a) { 514; CHECK-LABEL: reverse_v8i64: 515; CHECK: # %bb.0: 516; CHECK-NEXT: csrr a0, vlenb 517; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma 518; CHECK-NEXT: vid.v v12 519; CHECK-NEXT: srli a1, a0, 3 520; CHECK-NEXT: srli a0, a0, 1 521; CHECK-NEXT: addi a1, a1, -1 522; CHECK-NEXT: vrsub.vx v12, v12, a1 523; CHECK-NEXT: vrgather.vv v19, v8, v12 524; CHECK-NEXT: vrgather.vv v18, v9, v12 525; CHECK-NEXT: vrgather.vv v17, v10, v12 526; CHECK-NEXT: vrgather.vv v16, v11, v12 527; CHECK-NEXT: addi a0, a0, -8 528; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 529; CHECK-NEXT: vslidedown.vx v8, v16, a0 530; CHECK-NEXT: ret 531 %res = shufflevector <8 x i64> %a, <8 x i64> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 532 ret <8 x i64> %res 533} 534 535 536define <1 x half> @reverse_v1f16(<1 x half> %a) { 537; CHECK-LABEL: reverse_v1f16: 538; CHECK: # %bb.0: 539; CHECK-NEXT: ret 540 %res = shufflevector <1 x half> %a, <1 x half> poison, <1 x i32> <i32 0> 541 ret <1 x half> %res 542} 543 544define <2 x half> @reverse_v2f16(<2 x half> %a) { 545; NO-ZVBB-LABEL: reverse_v2f16: 546; NO-ZVBB: # %bb.0: 547; NO-ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 548; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1 549; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1 550; NO-ZVBB-NEXT: vmv1r.v v8, v9 551; NO-ZVBB-NEXT: ret 552; 553; ZVBB-LABEL: reverse_v2f16: 554; ZVBB: # %bb.0: 555; ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma 556; ZVBB-NEXT: vror.vi v8, v8, 16 557; ZVBB-NEXT: ret 558 %res = shufflevector <2 x half> %a, <2 x half> poison, <2 x i32> <i32 1, i32 0> 559 ret <2 x half> %res 560} 561 562define <4 x half> @reverse_v4f16(<4 x half> %a) { 563; CHECK-LABEL: reverse_v4f16: 564; CHECK: # %bb.0: 565; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 566; CHECK-NEXT: vid.v v9 567; CHECK-NEXT: vrsub.vi v10, v9, 3 568; CHECK-NEXT: vrgather.vv v9, v8, v10 569; CHECK-NEXT: vmv1r.v v8, v9 570; CHECK-NEXT: ret 571 %res = shufflevector <4 x half> %a, <4 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 572 ret <4 x half> %res 573} 574 575define <8 x half> @reverse_v8f16(<8 x half> %a) { 576; CHECK-LABEL: reverse_v8f16: 577; CHECK: # %bb.0: 578; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 579; CHECK-NEXT: vid.v v9 580; CHECK-NEXT: vrsub.vi v10, v9, 7 581; CHECK-NEXT: vrgather.vv v9, v8, v10 582; CHECK-NEXT: vmv.v.v v8, v9 583; CHECK-NEXT: ret 584 %res = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 585 ret <8 x half> %res 586} 587 588define <16 x half> @reverse_v16f16(<16 x half> %a) { 589; CHECK-LABEL: reverse_v16f16: 590; CHECK: # %bb.0: 591; CHECK-NEXT: csrr a0, vlenb 592; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma 593; CHECK-NEXT: vid.v v10 594; CHECK-NEXT: srli a1, a0, 1 595; CHECK-NEXT: addi a1, a1, -1 596; CHECK-NEXT: vrsub.vx v10, v10, a1 597; CHECK-NEXT: vrgather.vv v13, v8, v10 598; CHECK-NEXT: vrgather.vv v12, v9, v10 599; CHECK-NEXT: addi a0, a0, -16 600; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 601; CHECK-NEXT: vslidedown.vx v8, v12, a0 602; CHECK-NEXT: ret 603 %res = shufflevector <16 x half> %a, <16 x half> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 604 ret <16 x half> %res 605} 606 607define <32 x half> @reverse_v32f16(<32 x half> %a) { 608; CHECK-LABEL: reverse_v32f16: 609; CHECK: # %bb.0: 610; CHECK-NEXT: csrr a0, vlenb 611; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma 612; CHECK-NEXT: vid.v v12 613; CHECK-NEXT: srli a1, a0, 1 614; CHECK-NEXT: slli a0, a0, 1 615; CHECK-NEXT: addi a1, a1, -1 616; CHECK-NEXT: addi a0, a0, -32 617; CHECK-NEXT: vrsub.vx v12, v12, a1 618; CHECK-NEXT: vrgather.vv v19, v8, v12 619; CHECK-NEXT: vrgather.vv v18, v9, v12 620; CHECK-NEXT: vrgather.vv v17, v10, v12 621; CHECK-NEXT: vrgather.vv v16, v11, v12 622; CHECK-NEXT: li a1, 32 623; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma 624; CHECK-NEXT: vslidedown.vx v8, v16, a0 625; CHECK-NEXT: ret 626 %res = shufflevector <32 x half> %a, <32 x half> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 627 ret <32 x half> %res 628} 629 630define <1 x float> @reverse_v1f32(<1 x float> %a) { 631; CHECK-LABEL: reverse_v1f32: 632; CHECK: # %bb.0: 633; CHECK-NEXT: ret 634 %res = shufflevector <1 x float> %a, <1 x float> poison, <1 x i32> <i32 0> 635 ret <1 x float> %res 636} 637 638define <2 x float> @reverse_v2f32(<2 x float> %a) { 639; NO-ZVBB-LABEL: reverse_v2f32: 640; NO-ZVBB: # %bb.0: 641; NO-ZVBB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 642; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1 643; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1 644; NO-ZVBB-NEXT: vmv1r.v v8, v9 645; NO-ZVBB-NEXT: ret 646; 647; ZVBB-LABEL: reverse_v2f32: 648; ZVBB: # %bb.0: 649; ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma 650; ZVBB-NEXT: vror.vi v8, v8, 32 651; ZVBB-NEXT: ret 652 %res = shufflevector <2 x float> %a, <2 x float> poison, <2 x i32> <i32 1, i32 0> 653 ret <2 x float> %res 654} 655 656define <4 x float> @reverse_v4f32(<4 x float> %a) { 657; CHECK-LABEL: reverse_v4f32: 658; CHECK: # %bb.0: 659; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 660; CHECK-NEXT: vid.v v9 661; CHECK-NEXT: vrsub.vi v10, v9, 3 662; CHECK-NEXT: vrgather.vv v9, v8, v10 663; CHECK-NEXT: vmv.v.v v8, v9 664; CHECK-NEXT: ret 665 %res = shufflevector <4 x float> %a, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 666 ret <4 x float> %res 667} 668 669define <8 x float> @reverse_v8f32(<8 x float> %a) { 670; CHECK-LABEL: reverse_v8f32: 671; CHECK: # %bb.0: 672; CHECK-NEXT: csrr a0, vlenb 673; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma 674; CHECK-NEXT: vid.v v10 675; CHECK-NEXT: srli a1, a0, 2 676; CHECK-NEXT: srli a0, a0, 1 677; CHECK-NEXT: addi a1, a1, -1 678; CHECK-NEXT: vrsub.vx v10, v10, a1 679; CHECK-NEXT: vrgather.vv v13, v8, v10 680; CHECK-NEXT: vrgather.vv v12, v9, v10 681; CHECK-NEXT: addi a0, a0, -8 682; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 683; CHECK-NEXT: vslidedown.vx v8, v12, a0 684; CHECK-NEXT: ret 685 %res = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 686 ret <8 x float> %res 687} 688 689define <16 x float> @reverse_v16f32(<16 x float> %a) { 690; CHECK-LABEL: reverse_v16f32: 691; CHECK: # %bb.0: 692; CHECK-NEXT: csrr a0, vlenb 693; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma 694; CHECK-NEXT: vid.v v12 695; CHECK-NEXT: srli a1, a0, 2 696; CHECK-NEXT: addi a1, a1, -1 697; CHECK-NEXT: vrsub.vx v16, v12, a1 698; CHECK-NEXT: vrgather.vv v15, v8, v16 699; CHECK-NEXT: vrgather.vv v14, v9, v16 700; CHECK-NEXT: vrgather.vv v13, v10, v16 701; CHECK-NEXT: vrgather.vv v12, v11, v16 702; CHECK-NEXT: addi a0, a0, -16 703; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 704; CHECK-NEXT: vslidedown.vx v8, v12, a0 705; CHECK-NEXT: ret 706 %res = shufflevector <16 x float> %a, <16 x float> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 707 ret <16 x float> %res 708} 709 710define <1 x double> @reverse_v1f64(<1 x double> %a) { 711; CHECK-LABEL: reverse_v1f64: 712; CHECK: # %bb.0: 713; CHECK-NEXT: ret 714 %res = shufflevector <1 x double> %a, <1 x double> poison, <1 x i32> <i32 0> 715 ret <1 x double> %res 716} 717 718define <2 x double> @reverse_v2f64(<2 x double> %a) { 719; CHECK-LABEL: reverse_v2f64: 720; CHECK: # %bb.0: 721; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 722; CHECK-NEXT: vslidedown.vi v9, v8, 1 723; CHECK-NEXT: vslideup.vi v9, v8, 1 724; CHECK-NEXT: vmv.v.v v8, v9 725; CHECK-NEXT: ret 726 %res = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> <i32 1, i32 0> 727 ret <2 x double> %res 728} 729 730define <4 x double> @reverse_v4f64(<4 x double> %a) { 731; CHECK-LABEL: reverse_v4f64: 732; CHECK: # %bb.0: 733; CHECK-NEXT: csrr a0, vlenb 734; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma 735; CHECK-NEXT: vid.v v10 736; CHECK-NEXT: srli a1, a0, 3 737; CHECK-NEXT: srli a0, a0, 2 738; CHECK-NEXT: addi a1, a1, -1 739; CHECK-NEXT: vrsub.vx v10, v10, a1 740; CHECK-NEXT: vrgather.vv v13, v8, v10 741; CHECK-NEXT: vrgather.vv v12, v9, v10 742; CHECK-NEXT: addi a0, a0, -4 743; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 744; CHECK-NEXT: vslidedown.vx v8, v12, a0 745; CHECK-NEXT: ret 746 %res = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 747 ret <4 x double> %res 748} 749 750define <8 x double> @reverse_v8f64(<8 x double> %a) { 751; CHECK-LABEL: reverse_v8f64: 752; CHECK: # %bb.0: 753; CHECK-NEXT: csrr a0, vlenb 754; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma 755; CHECK-NEXT: vid.v v12 756; CHECK-NEXT: srli a1, a0, 3 757; CHECK-NEXT: srli a0, a0, 1 758; CHECK-NEXT: addi a1, a1, -1 759; CHECK-NEXT: vrsub.vx v12, v12, a1 760; CHECK-NEXT: vrgather.vv v19, v8, v12 761; CHECK-NEXT: vrgather.vv v18, v9, v12 762; CHECK-NEXT: vrgather.vv v17, v10, v12 763; CHECK-NEXT: vrgather.vv v16, v11, v12 764; CHECK-NEXT: addi a0, a0, -8 765; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 766; CHECK-NEXT: vslidedown.vx v8, v16, a0 767; CHECK-NEXT: ret 768 %res = shufflevector <8 x double> %a, <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 769 ret <8 x double> %res 770} 771 772 773define <3 x i64> @reverse_v3i64(<3 x i64> %a) { 774; CHECK-LABEL: reverse_v3i64: 775; CHECK: # %bb.0: 776; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 777; CHECK-NEXT: vid.v v10 778; CHECK-NEXT: vrsub.vi v12, v10, 2 779; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma 780; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 781; CHECK-NEXT: vmv.v.v v8, v10 782; CHECK-NEXT: ret 783 %res = shufflevector <3 x i64> %a, <3 x i64> poison, <3 x i32> <i32 2, i32 1, i32 0> 784 ret <3 x i64> %res 785} 786 787define <6 x i64> @reverse_v6i64(<6 x i64> %a) { 788; CHECK-LABEL: reverse_v6i64: 789; CHECK: # %bb.0: 790; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 791; CHECK-NEXT: vid.v v12 792; CHECK-NEXT: vrsub.vi v16, v12, 5 793; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma 794; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 795; CHECK-NEXT: vmv.v.v v8, v12 796; CHECK-NEXT: ret 797 %res = shufflevector <6 x i64> %a, <6 x i64> poison, <6 x i32> <i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 798 ret <6 x i64> %res 799} 800 801define <12 x i64> @reverse_v12i64(<12 x i64> %a) { 802; CHECK-LABEL: reverse_v12i64: 803; CHECK: # %bb.0: 804; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 805; CHECK-NEXT: vid.v v16 806; CHECK-NEXT: vrsub.vi v24, v16, 11 807; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 808; CHECK-NEXT: vrgatherei16.vv v16, v8, v24 809; CHECK-NEXT: vmv.v.v v8, v16 810; CHECK-NEXT: ret 811 %res = shufflevector <12 x i64> %a, <12 x i64> poison, <12 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 812 ret <12 x i64> %res 813} 814 815define <4 x i8> @reverse_v4i8_2(<2 x i8> %a, <2 x i8> %b) { 816; NO-ZVBB-LABEL: reverse_v4i8_2: 817; NO-ZVBB: # %bb.0: 818; NO-ZVBB-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 819; NO-ZVBB-NEXT: vslidedown.vi v10, v8, 1 820; NO-ZVBB-NEXT: vslideup.vi v10, v8, 1 821; NO-ZVBB-NEXT: vslidedown.vi v8, v9, 1 822; NO-ZVBB-NEXT: vslideup.vi v8, v9, 1 823; NO-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 824; NO-ZVBB-NEXT: vslideup.vi v8, v10, 2 825; NO-ZVBB-NEXT: ret 826; 827; ZVBB-LABEL: reverse_v4i8_2: 828; ZVBB: # %bb.0: 829; ZVBB-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 830; ZVBB-NEXT: vrev8.v v10, v8 831; ZVBB-NEXT: vrev8.v v8, v9 832; ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 833; ZVBB-NEXT: vslideup.vi v8, v10, 2 834; ZVBB-NEXT: ret 835 %res = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 836 ret <4 x i8> %res 837} 838 839define <8 x i8> @reverse_v8i8_2(<4 x i8> %a, <4 x i8> %b) { 840; CHECK-LABEL: reverse_v8i8_2: 841; CHECK: # %bb.0: 842; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 843; CHECK-NEXT: vid.v v11 844; CHECK-NEXT: vrsub.vi v12, v11, 7 845; CHECK-NEXT: vrgather.vv v10, v8, v12 846; CHECK-NEXT: vmv.v.i v0, 15 847; CHECK-NEXT: vrsub.vi v8, v11, 3 848; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t 849; CHECK-NEXT: vmv1r.v v8, v10 850; CHECK-NEXT: ret 851 %res = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 852 ret <8 x i8> %res 853} 854 855define <16 x i8> @reverse_v16i8_2(<8 x i8> %a, <8 x i8> %b) { 856; CHECK-LABEL: reverse_v16i8_2: 857; CHECK: # %bb.0: 858; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 859; CHECK-NEXT: vid.v v11 860; CHECK-NEXT: li a0, 255 861; CHECK-NEXT: vrsub.vi v12, v11, 15 862; CHECK-NEXT: vrgather.vv v10, v8, v12 863; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 864; CHECK-NEXT: vmv.s.x v0, a0 865; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu 866; CHECK-NEXT: vrsub.vi v8, v11, 7 867; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t 868; CHECK-NEXT: vmv.v.v v8, v10 869; CHECK-NEXT: ret 870 %res = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 871 ret <16 x i8> %res 872} 873 874define <32 x i8> @reverse_v32i8_2(<16 x i8> %a, <16 x i8> %b) { 875; CHECK-LABEL: reverse_v32i8_2: 876; CHECK: # %bb.0: 877; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma 878; CHECK-NEXT: vmv1r.v v10, v9 879; CHECK-NEXT: csrr a0, vlenb 880; CHECK-NEXT: vid.v v12 881; CHECK-NEXT: addi a1, a0, -1 882; CHECK-NEXT: vrsub.vx v12, v12, a1 883; CHECK-NEXT: lui a1, 16 884; CHECK-NEXT: addi a1, a1, -1 885; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma 886; CHECK-NEXT: vrgatherei16.vv v15, v8, v12 887; CHECK-NEXT: vrgatherei16.vv v14, v9, v12 888; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 889; CHECK-NEXT: vmv.s.x v0, a1 890; CHECK-NEXT: li a1, 32 891; CHECK-NEXT: slli a0, a0, 1 892; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu 893; CHECK-NEXT: vid.v v8 894; CHECK-NEXT: addi a0, a0, -32 895; CHECK-NEXT: vrsub.vi v12, v8, 15 896; CHECK-NEXT: vslidedown.vx v8, v14, a0 897; CHECK-NEXT: vrgather.vv v8, v10, v12, v0.t 898; CHECK-NEXT: ret 899 %res = shufflevector <16 x i8> %a, <16 x i8> %b, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 900 ret <32 x i8> %res 901} 902 903define <4 x i16> @reverse_v4i16_2(<2 x i16> %a, <2 x i16> %b) { 904; NO-ZVBB-LABEL: reverse_v4i16_2: 905; NO-ZVBB: # %bb.0: 906; NO-ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 907; NO-ZVBB-NEXT: vslidedown.vi v10, v8, 1 908; NO-ZVBB-NEXT: vslideup.vi v10, v8, 1 909; NO-ZVBB-NEXT: vslidedown.vi v8, v9, 1 910; NO-ZVBB-NEXT: vslideup.vi v8, v9, 1 911; NO-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 912; NO-ZVBB-NEXT: vslideup.vi v8, v10, 2 913; NO-ZVBB-NEXT: ret 914; 915; ZVBB-LABEL: reverse_v4i16_2: 916; ZVBB: # %bb.0: 917; ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma 918; ZVBB-NEXT: vror.vi v10, v8, 16 919; ZVBB-NEXT: vror.vi v8, v9, 16 920; ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 921; ZVBB-NEXT: vslideup.vi v8, v10, 2 922; ZVBB-NEXT: ret 923 %res = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 924 ret <4 x i16> %res 925} 926 927define <8 x i16> @reverse_v8i16_2(<4 x i16> %a, <4 x i16> %b) { 928; CHECK-LABEL: reverse_v8i16_2: 929; CHECK: # %bb.0: 930; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu 931; CHECK-NEXT: vid.v v11 932; CHECK-NEXT: vrsub.vi v12, v11, 7 933; CHECK-NEXT: vrgather.vv v10, v8, v12 934; CHECK-NEXT: vmv.v.i v0, 15 935; CHECK-NEXT: vrsub.vi v8, v11, 3 936; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t 937; CHECK-NEXT: vmv.v.v v8, v10 938; CHECK-NEXT: ret 939 %res = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 940 ret <8 x i16> %res 941} 942 943define <16 x i16> @reverse_v16i16_2(<8 x i16> %a, <8 x i16> %b) { 944; CHECK-LABEL: reverse_v16i16_2: 945; CHECK: # %bb.0: 946; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 947; CHECK-NEXT: vmv1r.v v10, v9 948; CHECK-NEXT: csrr a0, vlenb 949; CHECK-NEXT: vid.v v9 950; CHECK-NEXT: srli a1, a0, 1 951; CHECK-NEXT: addi a1, a1, -1 952; CHECK-NEXT: vrsub.vx v9, v9, a1 953; CHECK-NEXT: vrgather.vv v13, v8, v9 954; CHECK-NEXT: vrgather.vv v12, v11, v9 955; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu 956; CHECK-NEXT: vid.v v8 957; CHECK-NEXT: li a1, 255 958; CHECK-NEXT: addi a0, a0, -16 959; CHECK-NEXT: vrsub.vi v14, v8, 7 960; CHECK-NEXT: vmv.s.x v0, a1 961; CHECK-NEXT: vslidedown.vx v8, v12, a0 962; CHECK-NEXT: vrgather.vv v8, v10, v14, v0.t 963; CHECK-NEXT: ret 964 %res = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 965 ret <16 x i16> %res 966} 967 968define <32 x i16> @reverse_v32i16_2(<16 x i16> %a, <16 x i16> %b) { 969; CHECK-LABEL: reverse_v32i16_2: 970; CHECK: # %bb.0: 971; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 972; CHECK-NEXT: vmv2r.v v12, v10 973; CHECK-NEXT: csrr a0, vlenb 974; CHECK-NEXT: vid.v v10 975; CHECK-NEXT: lui a1, 16 976; CHECK-NEXT: addi a1, a1, -1 977; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 978; CHECK-NEXT: vmv.s.x v0, a1 979; CHECK-NEXT: srli a1, a0, 1 980; CHECK-NEXT: addi a1, a1, -1 981; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 982; CHECK-NEXT: vrsub.vx v10, v10, a1 983; CHECK-NEXT: li a1, 32 984; CHECK-NEXT: slli a0, a0, 1 985; CHECK-NEXT: vrgather.vv v19, v8, v10 986; CHECK-NEXT: vrgather.vv v18, v9, v10 987; CHECK-NEXT: vrgather.vv v16, v11, v10 988; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu 989; CHECK-NEXT: vid.v v8 990; CHECK-NEXT: addi a0, a0, -32 991; CHECK-NEXT: vrsub.vi v20, v8, 15 992; CHECK-NEXT: vmv1r.v v17, v16 993; CHECK-NEXT: vslidedown.vx v8, v16, a0 994; CHECK-NEXT: vrgather.vv v8, v12, v20, v0.t 995; CHECK-NEXT: ret 996 %res = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 997 ret <32 x i16> %res 998} 999 1000define <4 x i32> @reverse_v4i32_2(<2 x i32> %a, < 2 x i32> %b) { 1001; NO-ZVBB-LABEL: reverse_v4i32_2: 1002; NO-ZVBB: # %bb.0: 1003; NO-ZVBB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 1004; NO-ZVBB-NEXT: vslidedown.vi v10, v8, 1 1005; NO-ZVBB-NEXT: vslideup.vi v10, v8, 1 1006; NO-ZVBB-NEXT: vslidedown.vi v8, v9, 1 1007; NO-ZVBB-NEXT: vslideup.vi v8, v9, 1 1008; NO-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1009; NO-ZVBB-NEXT: vslideup.vi v8, v10, 2 1010; NO-ZVBB-NEXT: ret 1011; 1012; ZVBB-LABEL: reverse_v4i32_2: 1013; ZVBB: # %bb.0: 1014; ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1015; ZVBB-NEXT: vror.vi v10, v8, 32 1016; ZVBB-NEXT: vror.vi v8, v9, 32 1017; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1018; ZVBB-NEXT: vslideup.vi v8, v10, 2 1019; ZVBB-NEXT: ret 1020 %res = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1021 ret <4 x i32> %res 1022} 1023 1024define <8 x i32> @reverse_v8i32_2(<4 x i32> %a, <4 x i32> %b) { 1025; CHECK-LABEL: reverse_v8i32_2: 1026; CHECK: # %bb.0: 1027; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 1028; CHECK-NEXT: vmv1r.v v10, v9 1029; CHECK-NEXT: csrr a0, vlenb 1030; CHECK-NEXT: vid.v v9 1031; CHECK-NEXT: srli a1, a0, 2 1032; CHECK-NEXT: addi a1, a1, -1 1033; CHECK-NEXT: vrsub.vx v9, v9, a1 1034; CHECK-NEXT: vrgather.vv v13, v8, v9 1035; CHECK-NEXT: vrgather.vv v12, v11, v9 1036; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1037; CHECK-NEXT: vid.v v8 1038; CHECK-NEXT: vmv.v.i v0, 15 1039; CHECK-NEXT: srli a0, a0, 1 1040; CHECK-NEXT: vrsub.vi v14, v8, 3 1041; CHECK-NEXT: addi a0, a0, -8 1042; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 1043; CHECK-NEXT: vslidedown.vx v8, v12, a0 1044; CHECK-NEXT: vrgatherei16.vv v8, v10, v14, v0.t 1045; CHECK-NEXT: ret 1046 %res = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1047 ret <8 x i32> %res 1048} 1049 1050define <16 x i32> @reverse_v16i32_2(<8 x i32> %a, <8 x i32> %b) { 1051; CHECK-LABEL: reverse_v16i32_2: 1052; CHECK: # %bb.0: 1053; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 1054; CHECK-NEXT: vmv2r.v v12, v10 1055; CHECK-NEXT: csrr a0, vlenb 1056; CHECK-NEXT: vid.v v10 1057; CHECK-NEXT: srli a1, a0, 2 1058; CHECK-NEXT: addi a1, a1, -1 1059; CHECK-NEXT: vrsub.vx v14, v10, a1 1060; CHECK-NEXT: vrgather.vv v11, v8, v14 1061; CHECK-NEXT: vrgather.vv v10, v9, v14 1062; CHECK-NEXT: vrgather.vv v8, v9, v14 1063; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1064; CHECK-NEXT: vid.v v14 1065; CHECK-NEXT: li a1, 255 1066; CHECK-NEXT: addi a0, a0, -16 1067; CHECK-NEXT: vrsub.vi v16, v14, 7 1068; CHECK-NEXT: vmv.s.x v0, a1 1069; CHECK-NEXT: vmv1r.v v9, v8 1070; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1071; CHECK-NEXT: vslidedown.vx v8, v8, a0 1072; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t 1073; CHECK-NEXT: ret 1074 %res = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1075 ret <16 x i32> %res 1076} 1077 1078define <32 x i32> @reverse_v32i32_2(<16 x i32> %a, <16 x i32> %b) { 1079; CHECK-LABEL: reverse_v32i32_2: 1080; CHECK: # %bb.0: 1081; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 1082; CHECK-NEXT: vmv4r.v v16, v12 1083; CHECK-NEXT: csrr a0, vlenb 1084; CHECK-NEXT: vid.v v12 1085; CHECK-NEXT: srli a1, a0, 2 1086; CHECK-NEXT: addi a1, a1, -1 1087; CHECK-NEXT: vrsub.vx v20, v12, a1 1088; CHECK-NEXT: vrgather.vv v15, v8, v20 1089; CHECK-NEXT: vrgather.vv v14, v9, v20 1090; CHECK-NEXT: vrgather.vv v13, v10, v20 1091; CHECK-NEXT: vrgather.vv v12, v11, v20 1092; CHECK-NEXT: lui a1, 16 1093; CHECK-NEXT: addi a1, a1, -1 1094; CHECK-NEXT: vmv.s.x v0, a1 1095; CHECK-NEXT: li a1, 32 1096; CHECK-NEXT: slli a0, a0, 1 1097; CHECK-NEXT: vrgather.vv v8, v9, v20 1098; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma 1099; CHECK-NEXT: vid.v v20 1100; CHECK-NEXT: addi a0, a0, -32 1101; CHECK-NEXT: vmv1r.v v9, v8 1102; CHECK-NEXT: vrsub.vi v24, v20, 15 1103; CHECK-NEXT: vmv2r.v v10, v8 1104; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 1105; CHECK-NEXT: vslidedown.vx v8, v8, a0 1106; CHECK-NEXT: vrgatherei16.vv v8, v16, v24, v0.t 1107; CHECK-NEXT: ret 1108 %res = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1109 ret <32 x i32> %res 1110} 1111 1112define <4 x i64> @reverse_v4i64_2(<2 x i64> %a, < 2 x i64> %b) { 1113; CHECK-LABEL: reverse_v4i64_2: 1114; CHECK: # %bb.0: 1115; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1116; CHECK-NEXT: vslidedown.vi v10, v8, 1 1117; CHECK-NEXT: vslideup.vi v10, v8, 1 1118; CHECK-NEXT: vslidedown.vi v8, v9, 1 1119; CHECK-NEXT: vslideup.vi v8, v9, 1 1120; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1121; CHECK-NEXT: vslideup.vi v8, v10, 2 1122; CHECK-NEXT: ret 1123 %res = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1124 ret <4 x i64> %res 1125} 1126 1127define <8 x i64> @reverse_v8i64_2(<4 x i64> %a, <4 x i64> %b) { 1128; CHECK-LABEL: reverse_v8i64_2: 1129; CHECK: # %bb.0: 1130; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1131; CHECK-NEXT: vmv2r.v v12, v10 1132; CHECK-NEXT: csrr a0, vlenb 1133; CHECK-NEXT: vid.v v10 1134; CHECK-NEXT: srli a1, a0, 3 1135; CHECK-NEXT: addi a1, a1, -1 1136; CHECK-NEXT: vrsub.vx v14, v10, a1 1137; CHECK-NEXT: vrgather.vv v11, v8, v14 1138; CHECK-NEXT: vrgather.vv v10, v9, v14 1139; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1140; CHECK-NEXT: vid.v v15 1141; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma 1142; CHECK-NEXT: vrgather.vv v8, v9, v14 1143; CHECK-NEXT: vmv.v.i v0, 15 1144; CHECK-NEXT: srli a0, a0, 1 1145; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1146; CHECK-NEXT: vrsub.vi v16, v15, 3 1147; CHECK-NEXT: addi a0, a0, -8 1148; CHECK-NEXT: vmv1r.v v9, v8 1149; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu 1150; CHECK-NEXT: vslidedown.vx v8, v8, a0 1151; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t 1152; CHECK-NEXT: ret 1153 %res = shufflevector <4 x i64> %a, <4 x i64> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1154 ret <8 x i64> %res 1155} 1156 1157define <4 x half> @reverse_v4f16_2(<2 x half> %a, <2 x half> %b) { 1158; NO-ZVBB-LABEL: reverse_v4f16_2: 1159; NO-ZVBB: # %bb.0: 1160; NO-ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 1161; NO-ZVBB-NEXT: vslidedown.vi v10, v8, 1 1162; NO-ZVBB-NEXT: vslideup.vi v10, v8, 1 1163; NO-ZVBB-NEXT: vslidedown.vi v8, v9, 1 1164; NO-ZVBB-NEXT: vslideup.vi v8, v9, 1 1165; NO-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 1166; NO-ZVBB-NEXT: vslideup.vi v8, v10, 2 1167; NO-ZVBB-NEXT: ret 1168; 1169; ZVBB-LABEL: reverse_v4f16_2: 1170; ZVBB: # %bb.0: 1171; ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma 1172; ZVBB-NEXT: vror.vi v10, v8, 16 1173; ZVBB-NEXT: vror.vi v8, v9, 16 1174; ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 1175; ZVBB-NEXT: vslideup.vi v8, v10, 2 1176; ZVBB-NEXT: ret 1177 %res = shufflevector <2 x half> %a, <2 x half> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1178 ret <4 x half> %res 1179} 1180 1181define <8 x half> @reverse_v8f16_2(<4 x half> %a, <4 x half> %b) { 1182; CHECK-LABEL: reverse_v8f16_2: 1183; CHECK: # %bb.0: 1184; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu 1185; CHECK-NEXT: vid.v v11 1186; CHECK-NEXT: vrsub.vi v12, v11, 7 1187; CHECK-NEXT: vrgather.vv v10, v8, v12 1188; CHECK-NEXT: vmv.v.i v0, 15 1189; CHECK-NEXT: vrsub.vi v8, v11, 3 1190; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t 1191; CHECK-NEXT: vmv.v.v v8, v10 1192; CHECK-NEXT: ret 1193 %res = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1194 ret <8 x half> %res 1195} 1196 1197define <16 x half> @reverse_v16f16_2(<8 x half> %a, <8 x half> %b) { 1198; CHECK-LABEL: reverse_v16f16_2: 1199; CHECK: # %bb.0: 1200; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 1201; CHECK-NEXT: vmv1r.v v10, v9 1202; CHECK-NEXT: csrr a0, vlenb 1203; CHECK-NEXT: vid.v v9 1204; CHECK-NEXT: srli a1, a0, 1 1205; CHECK-NEXT: addi a1, a1, -1 1206; CHECK-NEXT: vrsub.vx v9, v9, a1 1207; CHECK-NEXT: vrgather.vv v13, v8, v9 1208; CHECK-NEXT: vrgather.vv v12, v11, v9 1209; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu 1210; CHECK-NEXT: vid.v v8 1211; CHECK-NEXT: li a1, 255 1212; CHECK-NEXT: addi a0, a0, -16 1213; CHECK-NEXT: vrsub.vi v14, v8, 7 1214; CHECK-NEXT: vmv.s.x v0, a1 1215; CHECK-NEXT: vslidedown.vx v8, v12, a0 1216; CHECK-NEXT: vrgather.vv v8, v10, v14, v0.t 1217; CHECK-NEXT: ret 1218 %res = shufflevector <8 x half> %a, <8 x half> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1219 ret <16 x half> %res 1220} 1221 1222define <32 x half> @reverse_v32f16_2(<16 x half> %a) { 1223; CHECK-LABEL: reverse_v32f16_2: 1224; CHECK: # %bb.0: 1225; CHECK-NEXT: csrr a0, vlenb 1226; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma 1227; CHECK-NEXT: vid.v v10 1228; CHECK-NEXT: srli a1, a0, 1 1229; CHECK-NEXT: addi a1, a1, -1 1230; CHECK-NEXT: vrsub.vx v12, v10, a1 1231; CHECK-NEXT: vrgather.vv v11, v8, v12 1232; CHECK-NEXT: vrgather.vv v10, v9, v12 1233; CHECK-NEXT: li a1, 32 1234; CHECK-NEXT: slli a0, a0, 1 1235; CHECK-NEXT: vrgather.vv v8, v9, v12 1236; CHECK-NEXT: addi a0, a0, -32 1237; CHECK-NEXT: vmv.v.v v9, v8 1238; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma 1239; CHECK-NEXT: vslidedown.vx v8, v8, a0 1240; CHECK-NEXT: ret 1241 %res = shufflevector <16 x half> %a, <16 x half> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1242 ret <32 x half> %res 1243} 1244 1245define <4 x float> @reverse_v4f32_2(<2 x float> %a, <2 x float> %b) { 1246; NO-ZVBB-LABEL: reverse_v4f32_2: 1247; NO-ZVBB: # %bb.0: 1248; NO-ZVBB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 1249; NO-ZVBB-NEXT: vslidedown.vi v10, v8, 1 1250; NO-ZVBB-NEXT: vslideup.vi v10, v8, 1 1251; NO-ZVBB-NEXT: vslidedown.vi v8, v9, 1 1252; NO-ZVBB-NEXT: vslideup.vi v8, v9, 1 1253; NO-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1254; NO-ZVBB-NEXT: vslideup.vi v8, v10, 2 1255; NO-ZVBB-NEXT: ret 1256; 1257; ZVBB-LABEL: reverse_v4f32_2: 1258; ZVBB: # %bb.0: 1259; ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1260; ZVBB-NEXT: vror.vi v10, v8, 32 1261; ZVBB-NEXT: vror.vi v8, v9, 32 1262; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1263; ZVBB-NEXT: vslideup.vi v8, v10, 2 1264; ZVBB-NEXT: ret 1265 %res = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1266 ret <4 x float> %res 1267} 1268 1269define <8 x float> @reverse_v8f32_2(<4 x float> %a, <4 x float> %b) { 1270; CHECK-LABEL: reverse_v8f32_2: 1271; CHECK: # %bb.0: 1272; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 1273; CHECK-NEXT: vmv1r.v v10, v9 1274; CHECK-NEXT: csrr a0, vlenb 1275; CHECK-NEXT: vid.v v9 1276; CHECK-NEXT: srli a1, a0, 2 1277; CHECK-NEXT: addi a1, a1, -1 1278; CHECK-NEXT: vrsub.vx v9, v9, a1 1279; CHECK-NEXT: vrgather.vv v13, v8, v9 1280; CHECK-NEXT: vrgather.vv v12, v11, v9 1281; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1282; CHECK-NEXT: vid.v v8 1283; CHECK-NEXT: vmv.v.i v0, 15 1284; CHECK-NEXT: srli a0, a0, 1 1285; CHECK-NEXT: vrsub.vi v14, v8, 3 1286; CHECK-NEXT: addi a0, a0, -8 1287; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 1288; CHECK-NEXT: vslidedown.vx v8, v12, a0 1289; CHECK-NEXT: vrgatherei16.vv v8, v10, v14, v0.t 1290; CHECK-NEXT: ret 1291 %res = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1292 ret <8 x float> %res 1293} 1294 1295define <16 x float> @reverse_v16f32_2(<8 x float> %a, <8 x float> %b) { 1296; CHECK-LABEL: reverse_v16f32_2: 1297; CHECK: # %bb.0: 1298; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 1299; CHECK-NEXT: vmv2r.v v12, v10 1300; CHECK-NEXT: csrr a0, vlenb 1301; CHECK-NEXT: vid.v v10 1302; CHECK-NEXT: srli a1, a0, 2 1303; CHECK-NEXT: addi a1, a1, -1 1304; CHECK-NEXT: vrsub.vx v14, v10, a1 1305; CHECK-NEXT: vrgather.vv v11, v8, v14 1306; CHECK-NEXT: vrgather.vv v10, v9, v14 1307; CHECK-NEXT: vrgather.vv v8, v9, v14 1308; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1309; CHECK-NEXT: vid.v v14 1310; CHECK-NEXT: li a1, 255 1311; CHECK-NEXT: addi a0, a0, -16 1312; CHECK-NEXT: vrsub.vi v16, v14, 7 1313; CHECK-NEXT: vmv.s.x v0, a1 1314; CHECK-NEXT: vmv1r.v v9, v8 1315; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1316; CHECK-NEXT: vslidedown.vx v8, v8, a0 1317; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t 1318; CHECK-NEXT: ret 1319 %res = shufflevector <8 x float> %a, <8 x float> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1320 ret <16 x float> %res 1321} 1322 1323define <4 x double> @reverse_v4f64_2(<2 x double> %a, < 2 x double> %b) { 1324; CHECK-LABEL: reverse_v4f64_2: 1325; CHECK: # %bb.0: 1326; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1327; CHECK-NEXT: vslidedown.vi v10, v8, 1 1328; CHECK-NEXT: vslideup.vi v10, v8, 1 1329; CHECK-NEXT: vslidedown.vi v8, v9, 1 1330; CHECK-NEXT: vslideup.vi v8, v9, 1 1331; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1332; CHECK-NEXT: vslideup.vi v8, v10, 2 1333; CHECK-NEXT: ret 1334 %res = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1335 ret <4 x double> %res 1336} 1337 1338define <8 x double> @reverse_v8f64_2(<4 x double> %a, <4 x double> %b) { 1339; CHECK-LABEL: reverse_v8f64_2: 1340; CHECK: # %bb.0: 1341; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1342; CHECK-NEXT: vmv2r.v v12, v10 1343; CHECK-NEXT: csrr a0, vlenb 1344; CHECK-NEXT: vid.v v10 1345; CHECK-NEXT: srli a1, a0, 3 1346; CHECK-NEXT: addi a1, a1, -1 1347; CHECK-NEXT: vrsub.vx v14, v10, a1 1348; CHECK-NEXT: vrgather.vv v11, v8, v14 1349; CHECK-NEXT: vrgather.vv v10, v9, v14 1350; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1351; CHECK-NEXT: vid.v v15 1352; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma 1353; CHECK-NEXT: vrgather.vv v8, v9, v14 1354; CHECK-NEXT: vmv.v.i v0, 15 1355; CHECK-NEXT: srli a0, a0, 1 1356; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1357; CHECK-NEXT: vrsub.vi v16, v15, 3 1358; CHECK-NEXT: addi a0, a0, -8 1359; CHECK-NEXT: vmv1r.v v9, v8 1360; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu 1361; CHECK-NEXT: vslidedown.vx v8, v8, a0 1362; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t 1363; CHECK-NEXT: ret 1364 %res = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1365 ret <8 x double> %res 1366} 1367 1368; There is no corresponding v1i256 type, so make sure we don't crash if we try 1369; to lower via lowerBitreverseShuffle. 1370define <256 x i1> @reverse_v256i1(<256 x i1> %a) vscale_range(16, 1024) { 1371; CHECK-LABEL: reverse_v256i1: 1372; CHECK: # %bb.0: 1373; CHECK-NEXT: li a0, 256 1374; CHECK-NEXT: csrr a1, vlenb 1375; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma 1376; CHECK-NEXT: vid.v v8 1377; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma 1378; CHECK-NEXT: vmv.v.i v10, 0 1379; CHECK-NEXT: addi a2, a1, -1 1380; CHECK-NEXT: slli a1, a1, 1 1381; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 1382; CHECK-NEXT: vsetvli a3, zero, e16, m2, ta, ma 1383; CHECK-NEXT: vrsub.vx v8, v8, a2 1384; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma 1385; CHECK-NEXT: vrgatherei16.vv v13, v10, v8 1386; CHECK-NEXT: vrgatherei16.vv v12, v11, v8 1387; CHECK-NEXT: addi a1, a1, -256 1388; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma 1389; CHECK-NEXT: vslidedown.vx v8, v12, a1 1390; CHECK-NEXT: vmsne.vi v0, v8, 0 1391; CHECK-NEXT: ret 1392 %res = shufflevector <256 x i1> %a, <256 x i1> poison, <256 x i32> <i32 255, i32 254, i32 253, i32 252, i32 251, i32 250, i32 249, i32 248, i32 247, i32 246, i32 245, i32 244, i32 243, i32 242, i32 241, i32 240, i32 239, i32 238, i32 237, i32 236, i32 235, i32 234, i32 233, i32 232, i32 231, i32 230, i32 229, i32 228, i32 227, i32 226, i32 225, i32 224, i32 223, i32 222, i32 221, i32 220, i32 219, i32 218, i32 217, i32 216, i32 215, i32 214, i32 213, i32 212, i32 211, i32 210, i32 209, i32 208, i32 207, i32 206, i32 205, i32 204, i32 203, i32 202, i32 201, i32 200, i32 199, i32 198, i32 197, i32 196, i32 195, i32 194, i32 193, i32 192, i32 191, i32 190, i32 189, i32 188, i32 187, i32 186, i32 185, i32 184, i32 183, i32 182, i32 181, i32 180, i32 179, i32 178, i32 177, i32 176, i32 175, i32 174, i32 173, i32 172, i32 171, i32 170, i32 169, i32 168, i32 167, i32 166, i32 165, i32 164, i32 163, i32 162, i32 161, i32 160, i32 159, i32 158, i32 157, i32 156, i32 155, i32 154, i32 153, i32 152, i32 151, i32 150, i32 149, i32 148, i32 147, i32 146, i32 145, i32 144, i32 143, i32 142, i32 141, i32 140, i32 139, i32 138, i32 137, i32 136, i32 135, i32 134, i32 133, i32 132, i32 131, i32 130, i32 129, i32 128, i32 127, i32 126, i32 125, i32 124, i32 123, i32 122, i32 121, i32 120, i32 119, i32 118, i32 117, i32 116, i32 115, i32 114, i32 113, i32 112, i32 111, i32 110, i32 109, i32 108, i32 107, i32 106, i32 105, i32 104, i32 103, i32 102, i32 101, i32 100, i32 99, i32 98, i32 97, i32 96, i32 95, i32 94, i32 93, i32 92, i32 91, i32 90, i32 89, i32 88, i32 87, i32 86, i32 85, i32 84, i32 83, i32 82, i32 81, i32 80, i32 79, i32 78, i32 77, i32 76, i32 75, i32 74, i32 73, i32 72, i32 71, i32 70, i32 69, i32 68, i32 67, i32 66, i32 65, i32 64, i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1393 ret <256 x i1> %res 1394} 1395 1396define <8 x i32> @reverse_v8i32_exact_vlen_128(<8 x i32> %a) vscale_range(2, 2) { 1397; CHECK-LABEL: reverse_v8i32_exact_vlen_128: 1398; CHECK: # %bb.0: 1399; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1400; CHECK-NEXT: vid.v v10 1401; CHECK-NEXT: vrsub.vi v12, v10, 3 1402; CHECK-NEXT: vrgather.vv v11, v8, v12 1403; CHECK-NEXT: vrgather.vv v10, v9, v12 1404; CHECK-NEXT: vmv2r.v v8, v10 1405; CHECK-NEXT: ret 1406 %res = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1407 ret <8 x i32> %res 1408} 1409 1410define <16 x i32> @reverse_v16i32_exact_vlen_256(<16 x i32> %a) vscale_range(4, 4) { 1411; CHECK-LABEL: reverse_v16i32_exact_vlen_256: 1412; CHECK: # %bb.0: 1413; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma 1414; CHECK-NEXT: vid.v v10 1415; CHECK-NEXT: vrsub.vi v12, v10, 7 1416; CHECK-NEXT: vrgather.vv v11, v8, v12 1417; CHECK-NEXT: vrgather.vv v10, v9, v12 1418; CHECK-NEXT: vmv2r.v v8, v10 1419; CHECK-NEXT: ret 1420 %res = shufflevector <16 x i32> %a, <16 x i32> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1421 ret <16 x i32> %res 1422} 1423;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 1424; RV32: {{.*}} 1425; RV32-ZVBB: {{.*}} 1426; RV64: {{.*}} 1427; RV64-ZVBB: {{.*}} 1428