1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvl256b \ 3; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \ 4; RUN: | FileCheck %s --check-prefixes=CHECK,V 5; RUN: llc < %s -mtriple=riscv64 -mattr=+f,+zve32f,+zvfh,+zvl256b \ 6; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \ 7; RUN: | FileCheck %s --check-prefixes=CHECK,ZVE32F 8 9define void @vnsrl_0_i8(ptr %in, ptr %out) { 10; CHECK-LABEL: vnsrl_0_i8: 11; CHECK: # %bb.0: # %entry 12; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma 13; CHECK-NEXT: vle8.v v8, (a0) 14; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma 15; CHECK-NEXT: vnsrl.wi v8, v8, 0 16; CHECK-NEXT: vse8.v v8, (a1) 17; CHECK-NEXT: ret 18entry: 19 %0 = load <16 x i8>, ptr %in, align 1 20 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 21 store <8 x i8> %shuffle.i5, ptr %out, align 1 22 ret void 23} 24 25define void @vnsrl_8_i8(ptr %in, ptr %out) { 26; CHECK-LABEL: vnsrl_8_i8: 27; CHECK: # %bb.0: # %entry 28; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma 29; CHECK-NEXT: vle8.v v8, (a0) 30; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma 31; CHECK-NEXT: vnsrl.wi v8, v8, 8 32; CHECK-NEXT: vse8.v v8, (a1) 33; CHECK-NEXT: ret 34entry: 35 %0 = load <16 x i8>, ptr %in, align 1 36 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 37 store <8 x i8> %shuffle.i5, ptr %out, align 1 38 ret void 39} 40 41define void @vnsrl_0_i16(ptr %in, ptr %out) { 42; V-LABEL: vnsrl_0_i16: 43; V: # %bb.0: # %entry 44; V-NEXT: vsetivli zero, 8, e16, mf2, ta, ma 45; V-NEXT: vle16.v v8, (a0) 46; V-NEXT: vsetivli zero, 4, e16, mf4, ta, ma 47; V-NEXT: vnsrl.wi v8, v8, 0 48; V-NEXT: vse16.v v8, (a1) 49; V-NEXT: ret 50; 51; ZVE32F-LABEL: vnsrl_0_i16: 52; ZVE32F: # %bb.0: # %entry 53; ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, ta, ma 54; ZVE32F-NEXT: vle16.v v8, (a0) 55; ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 56; ZVE32F-NEXT: vnsrl.wi v8, v8, 0 57; ZVE32F-NEXT: vse16.v v8, (a1) 58; ZVE32F-NEXT: ret 59entry: 60 %0 = load <8 x i16>, ptr %in, align 2 61 %shuffle.i5 = shufflevector <8 x i16> %0, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 62 store <4 x i16> %shuffle.i5, ptr %out, align 2 63 ret void 64} 65 66define void @vnsrl_16_i16(ptr %in, ptr %out) { 67; V-LABEL: vnsrl_16_i16: 68; V: # %bb.0: # %entry 69; V-NEXT: vsetivli zero, 8, e16, mf2, ta, ma 70; V-NEXT: vle16.v v8, (a0) 71; V-NEXT: vsetivli zero, 4, e16, mf4, ta, ma 72; V-NEXT: vnsrl.wi v8, v8, 16 73; V-NEXT: vse16.v v8, (a1) 74; V-NEXT: ret 75; 76; ZVE32F-LABEL: vnsrl_16_i16: 77; ZVE32F: # %bb.0: # %entry 78; ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, ta, ma 79; ZVE32F-NEXT: vle16.v v8, (a0) 80; ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 81; ZVE32F-NEXT: vnsrl.wi v8, v8, 16 82; ZVE32F-NEXT: vse16.v v8, (a1) 83; ZVE32F-NEXT: ret 84entry: 85 %0 = load <8 x i16>, ptr %in, align 2 86 %shuffle.i5 = shufflevector <8 x i16> %0, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 87 store <4 x i16> %shuffle.i5, ptr %out, align 2 88 ret void 89} 90 91define void @vnsrl_0_half(ptr %in, ptr %out) { 92; V-LABEL: vnsrl_0_half: 93; V: # %bb.0: # %entry 94; V-NEXT: vsetivli zero, 8, e16, mf2, ta, ma 95; V-NEXT: vle16.v v8, (a0) 96; V-NEXT: vsetivli zero, 4, e16, mf4, ta, ma 97; V-NEXT: vnsrl.wi v8, v8, 0 98; V-NEXT: vse16.v v8, (a1) 99; V-NEXT: ret 100; 101; ZVE32F-LABEL: vnsrl_0_half: 102; ZVE32F: # %bb.0: # %entry 103; ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, ta, ma 104; ZVE32F-NEXT: vle16.v v8, (a0) 105; ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 106; ZVE32F-NEXT: vnsrl.wi v8, v8, 0 107; ZVE32F-NEXT: vse16.v v8, (a1) 108; ZVE32F-NEXT: ret 109entry: 110 %0 = load <8 x half>, ptr %in, align 2 111 %shuffle.i5 = shufflevector <8 x half> %0, <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 112 store <4 x half> %shuffle.i5, ptr %out, align 2 113 ret void 114} 115 116define void @vnsrl_16_half(ptr %in, ptr %out) { 117; V-LABEL: vnsrl_16_half: 118; V: # %bb.0: # %entry 119; V-NEXT: vsetivli zero, 8, e16, mf2, ta, ma 120; V-NEXT: vle16.v v8, (a0) 121; V-NEXT: vsetivli zero, 4, e16, mf4, ta, ma 122; V-NEXT: vnsrl.wi v8, v8, 16 123; V-NEXT: vse16.v v8, (a1) 124; V-NEXT: ret 125; 126; ZVE32F-LABEL: vnsrl_16_half: 127; ZVE32F: # %bb.0: # %entry 128; ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, ta, ma 129; ZVE32F-NEXT: vle16.v v8, (a0) 130; ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 131; ZVE32F-NEXT: vnsrl.wi v8, v8, 16 132; ZVE32F-NEXT: vse16.v v8, (a1) 133; ZVE32F-NEXT: ret 134entry: 135 %0 = load <8 x half>, ptr %in, align 2 136 %shuffle.i5 = shufflevector <8 x half> %0, <8 x half> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 137 store <4 x half> %shuffle.i5, ptr %out, align 2 138 ret void 139} 140 141define void @vnsrl_0_i32(ptr %in, ptr %out) { 142; V-LABEL: vnsrl_0_i32: 143; V: # %bb.0: # %entry 144; V-NEXT: vsetivli zero, 4, e32, mf2, ta, ma 145; V-NEXT: vle32.v v8, (a0) 146; V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 147; V-NEXT: vnsrl.wi v8, v8, 0 148; V-NEXT: vse32.v v8, (a1) 149; V-NEXT: ret 150; 151; ZVE32F-LABEL: vnsrl_0_i32: 152; ZVE32F: # %bb.0: # %entry 153; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma 154; ZVE32F-NEXT: vle32.v v8, (a0) 155; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 156; ZVE32F-NEXT: vslidedown.vi v9, v8, 2 157; ZVE32F-NEXT: vslideup.vi v8, v9, 1 158; ZVE32F-NEXT: vse32.v v8, (a1) 159; ZVE32F-NEXT: ret 160entry: 161 %0 = load <4 x i32>, ptr %in, align 4 162 %shuffle.i5 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 2> 163 store <2 x i32> %shuffle.i5, ptr %out, align 4 164 ret void 165} 166 167define void @vnsrl_32_i32(ptr %in, ptr %out) { 168; V-LABEL: vnsrl_32_i32: 169; V: # %bb.0: # %entry 170; V-NEXT: vsetivli zero, 4, e32, mf2, ta, ma 171; V-NEXT: vle32.v v8, (a0) 172; V-NEXT: li a0, 32 173; V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 174; V-NEXT: vnsrl.wx v8, v8, a0 175; V-NEXT: vse32.v v8, (a1) 176; V-NEXT: ret 177; 178; ZVE32F-LABEL: vnsrl_32_i32: 179; ZVE32F: # %bb.0: # %entry 180; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma 181; ZVE32F-NEXT: vle32.v v8, (a0) 182; ZVE32F-NEXT: vmv.v.i v0, 1 183; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu 184; ZVE32F-NEXT: vslidedown.vi v9, v8, 2 185; ZVE32F-NEXT: vrgather.vi v9, v8, 1, v0.t 186; ZVE32F-NEXT: vse32.v v9, (a1) 187; ZVE32F-NEXT: ret 188entry: 189 %0 = load <4 x i32>, ptr %in, align 4 190 %shuffle.i5 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 1, i32 3> 191 store <2 x i32> %shuffle.i5, ptr %out, align 4 192 ret void 193} 194 195define void @vnsrl_0_float(ptr %in, ptr %out) { 196; V-LABEL: vnsrl_0_float: 197; V: # %bb.0: # %entry 198; V-NEXT: vsetivli zero, 4, e32, mf2, ta, ma 199; V-NEXT: vle32.v v8, (a0) 200; V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 201; V-NEXT: vnsrl.wi v8, v8, 0 202; V-NEXT: vse32.v v8, (a1) 203; V-NEXT: ret 204; 205; ZVE32F-LABEL: vnsrl_0_float: 206; ZVE32F: # %bb.0: # %entry 207; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma 208; ZVE32F-NEXT: vle32.v v8, (a0) 209; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 210; ZVE32F-NEXT: vslidedown.vi v9, v8, 2 211; ZVE32F-NEXT: vslideup.vi v8, v9, 1 212; ZVE32F-NEXT: vse32.v v8, (a1) 213; ZVE32F-NEXT: ret 214entry: 215 %0 = load <4 x float>, ptr %in, align 4 216 %shuffle.i5 = shufflevector <4 x float> %0, <4 x float> poison, <2 x i32> <i32 0, i32 2> 217 store <2 x float> %shuffle.i5, ptr %out, align 4 218 ret void 219} 220 221define void @vnsrl_32_float(ptr %in, ptr %out) { 222; V-LABEL: vnsrl_32_float: 223; V: # %bb.0: # %entry 224; V-NEXT: vsetivli zero, 4, e32, mf2, ta, ma 225; V-NEXT: vle32.v v8, (a0) 226; V-NEXT: li a0, 32 227; V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 228; V-NEXT: vnsrl.wx v8, v8, a0 229; V-NEXT: vse32.v v8, (a1) 230; V-NEXT: ret 231; 232; ZVE32F-LABEL: vnsrl_32_float: 233; ZVE32F: # %bb.0: # %entry 234; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma 235; ZVE32F-NEXT: vle32.v v8, (a0) 236; ZVE32F-NEXT: vmv.v.i v0, 1 237; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu 238; ZVE32F-NEXT: vslidedown.vi v9, v8, 2 239; ZVE32F-NEXT: vrgather.vi v9, v8, 1, v0.t 240; ZVE32F-NEXT: vse32.v v9, (a1) 241; ZVE32F-NEXT: ret 242entry: 243 %0 = load <4 x float>, ptr %in, align 4 244 %shuffle.i5 = shufflevector <4 x float> %0, <4 x float> poison, <2 x i32> <i32 1, i32 3> 245 store <2 x float> %shuffle.i5, ptr %out, align 4 246 ret void 247} 248 249define void @vnsrl_0_i64(ptr %in, ptr %out) { 250; V-LABEL: vnsrl_0_i64: 251; V: # %bb.0: # %entry 252; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma 253; V-NEXT: vle64.v v8, (a0) 254; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma 255; V-NEXT: vslidedown.vi v9, v8, 2 256; V-NEXT: vslideup.vi v8, v9, 1 257; V-NEXT: vse64.v v8, (a1) 258; V-NEXT: ret 259; 260; ZVE32F-LABEL: vnsrl_0_i64: 261; ZVE32F: # %bb.0: # %entry 262; ZVE32F-NEXT: ld a2, 0(a0) 263; ZVE32F-NEXT: ld a0, 16(a0) 264; ZVE32F-NEXT: sd a2, 0(a1) 265; ZVE32F-NEXT: sd a0, 8(a1) 266; ZVE32F-NEXT: ret 267entry: 268 %0 = load <4 x i64>, ptr %in, align 8 269 %shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> <i32 0, i32 2> 270 store <2 x i64> %shuffle.i5, ptr %out, align 8 271 ret void 272} 273 274define void @vnsrl_64_i64(ptr %in, ptr %out) { 275; V-LABEL: vnsrl_64_i64: 276; V: # %bb.0: # %entry 277; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma 278; V-NEXT: vle64.v v8, (a0) 279; V-NEXT: vmv.v.i v0, 1 280; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu 281; V-NEXT: vslidedown.vi v9, v8, 2 282; V-NEXT: vrgather.vi v9, v8, 1, v0.t 283; V-NEXT: vse64.v v9, (a1) 284; V-NEXT: ret 285; 286; ZVE32F-LABEL: vnsrl_64_i64: 287; ZVE32F: # %bb.0: # %entry 288; ZVE32F-NEXT: ld a2, 8(a0) 289; ZVE32F-NEXT: ld a0, 24(a0) 290; ZVE32F-NEXT: sd a2, 0(a1) 291; ZVE32F-NEXT: sd a0, 8(a1) 292; ZVE32F-NEXT: ret 293entry: 294 %0 = load <4 x i64>, ptr %in, align 8 295 %shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> <i32 1, i32 3> 296 store <2 x i64> %shuffle.i5, ptr %out, align 8 297 ret void 298} 299 300define void @vnsrl_0_double(ptr %in, ptr %out) { 301; V-LABEL: vnsrl_0_double: 302; V: # %bb.0: # %entry 303; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma 304; V-NEXT: vle64.v v8, (a0) 305; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma 306; V-NEXT: vslidedown.vi v9, v8, 2 307; V-NEXT: vslideup.vi v8, v9, 1 308; V-NEXT: vse64.v v8, (a1) 309; V-NEXT: ret 310; 311; ZVE32F-LABEL: vnsrl_0_double: 312; ZVE32F: # %bb.0: # %entry 313; ZVE32F-NEXT: ld a2, 0(a0) 314; ZVE32F-NEXT: ld a0, 16(a0) 315; ZVE32F-NEXT: sd a2, 0(a1) 316; ZVE32F-NEXT: sd a0, 8(a1) 317; ZVE32F-NEXT: ret 318entry: 319 %0 = load <4 x double>, ptr %in, align 8 320 %shuffle.i5 = shufflevector <4 x double> %0, <4 x double> poison, <2 x i32> <i32 0, i32 2> 321 store <2 x double> %shuffle.i5, ptr %out, align 8 322 ret void 323} 324 325define void @vnsrl_64_double(ptr %in, ptr %out) { 326; V-LABEL: vnsrl_64_double: 327; V: # %bb.0: # %entry 328; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma 329; V-NEXT: vle64.v v8, (a0) 330; V-NEXT: vmv.v.i v0, 1 331; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu 332; V-NEXT: vslidedown.vi v9, v8, 2 333; V-NEXT: vrgather.vi v9, v8, 1, v0.t 334; V-NEXT: vse64.v v9, (a1) 335; V-NEXT: ret 336; 337; ZVE32F-LABEL: vnsrl_64_double: 338; ZVE32F: # %bb.0: # %entry 339; ZVE32F-NEXT: ld a2, 8(a0) 340; ZVE32F-NEXT: ld a0, 24(a0) 341; ZVE32F-NEXT: sd a2, 0(a1) 342; ZVE32F-NEXT: sd a0, 8(a1) 343; ZVE32F-NEXT: ret 344entry: 345 %0 = load <4 x double>, ptr %in, align 8 346 %shuffle.i5 = shufflevector <4 x double> %0, <4 x double> poison, <2 x i32> <i32 1, i32 3> 347 store <2 x double> %shuffle.i5, ptr %out, align 8 348 ret void 349} 350 351define void @vnsrl_0_i8_undef(ptr %in, ptr %out) { 352; CHECK-LABEL: vnsrl_0_i8_undef: 353; CHECK: # %bb.0: # %entry 354; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma 355; CHECK-NEXT: vle8.v v8, (a0) 356; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma 357; CHECK-NEXT: vnsrl.wi v8, v8, 0 358; CHECK-NEXT: vse8.v v8, (a1) 359; CHECK-NEXT: ret 360entry: 361 %0 = load <16 x i8>, ptr %in, align 1 362 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 undef> 363 store <8 x i8> %shuffle.i5, ptr %out, align 1 364 ret void 365} 366 367define void @vnsrl_0_i8_undef2(ptr %in, ptr %out) { 368; CHECK-LABEL: vnsrl_0_i8_undef2: 369; CHECK: # %bb.0: # %entry 370; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma 371; CHECK-NEXT: vle8.v v8, (a0) 372; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma 373; CHECK-NEXT: vnsrl.wi v8, v8, 0 374; CHECK-NEXT: vse8.v v8, (a1) 375; CHECK-NEXT: ret 376entry: 377 %0 = load <16 x i8>, ptr %in, align 1 378 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 10, i32 12, i32 14> 379 store <8 x i8> %shuffle.i5, ptr %out, align 1 380 ret void 381} 382 383; TODO: Allow an undef initial element 384define void @vnsrl_0_i8_undef3(ptr %in, ptr %out) { 385; CHECK-LABEL: vnsrl_0_i8_undef3: 386; CHECK: # %bb.0: # %entry 387; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma 388; CHECK-NEXT: vle8.v v8, (a0) 389; CHECK-NEXT: li a0, -32 390; CHECK-NEXT: vmv.s.x v0, a0 391; CHECK-NEXT: lui a0, 24640 392; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma 393; CHECK-NEXT: vid.v v9 394; CHECK-NEXT: addi a0, a0, 6 395; CHECK-NEXT: vadd.vv v9, v9, v9 396; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 397; CHECK-NEXT: vmv.v.x v10, a0 398; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 399; CHECK-NEXT: vadd.vi v9, v9, -8 400; CHECK-NEXT: vrgather.vv v11, v8, v10 401; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 402; CHECK-NEXT: vslidedown.vi v8, v8, 8 403; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu 404; CHECK-NEXT: vrgather.vv v11, v8, v9, v0.t 405; CHECK-NEXT: vse8.v v11, (a1) 406; CHECK-NEXT: ret 407entry: 408 %0 = load <16 x i8>, ptr %in, align 1 409 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 undef, i32 undef, i32 4, i32 6, i32 6, i32 10, i32 12, i32 14> 410 store <8 x i8> %shuffle.i5, ptr %out, align 1 411 ret void 412} 413 414; Not a vnsrl (checking for a prior pattern matching bug) 415define void @vnsrl_0_i8_undef_negative(ptr %in, ptr %out) { 416; CHECK-LABEL: vnsrl_0_i8_undef_negative: 417; CHECK: # %bb.0: # %entry 418; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma 419; CHECK-NEXT: vle8.v v8, (a0) 420; CHECK-NEXT: lui a0, %hi(.LCPI17_0) 421; CHECK-NEXT: addi a0, a0, %lo(.LCPI17_0) 422; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma 423; CHECK-NEXT: vid.v v9 424; CHECK-NEXT: vle8.v v10, (a0) 425; CHECK-NEXT: li a0, 48 426; CHECK-NEXT: vadd.vv v9, v9, v9 427; CHECK-NEXT: vmv.s.x v0, a0 428; CHECK-NEXT: vadd.vi v9, v9, -8 429; CHECK-NEXT: vrgather.vv v11, v8, v10 430; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 431; CHECK-NEXT: vslidedown.vi v8, v8, 8 432; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu 433; CHECK-NEXT: vrgather.vv v11, v8, v9, v0.t 434; CHECK-NEXT: vse8.v v11, (a1) 435; CHECK-NEXT: ret 436entry: 437 %0 = load <16 x i8>, ptr %in, align 1 438 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 1> 439 store <8 x i8> %shuffle.i5, ptr %out, align 1 440 ret void 441} 442 443define void @vnsrl_0_i8_single_src(ptr %in, ptr %out) { 444; V-LABEL: vnsrl_0_i8_single_src: 445; V: # %bb.0: # %entry 446; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma 447; V-NEXT: vle8.v v8, (a0) 448; V-NEXT: vsetivli zero, 4, e8, mf8, ta, ma 449; V-NEXT: vnsrl.wi v8, v8, 0 450; V-NEXT: vse8.v v8, (a1) 451; V-NEXT: ret 452; 453; ZVE32F-LABEL: vnsrl_0_i8_single_src: 454; ZVE32F: # %bb.0: # %entry 455; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma 456; ZVE32F-NEXT: vle8.v v8, (a0) 457; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 458; ZVE32F-NEXT: vnsrl.wi v8, v8, 0 459; ZVE32F-NEXT: vse8.v v8, (a1) 460; ZVE32F-NEXT: ret 461entry: 462 %0 = load <8 x i8>, ptr %in, align 1 463 %shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 464 store <4 x i8> %shuffle.i5, ptr %out, align 1 465 ret void 466} 467 468define void @vnsrl_8_i8_single_src(ptr %in, ptr %out) { 469; V-LABEL: vnsrl_8_i8_single_src: 470; V: # %bb.0: # %entry 471; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma 472; V-NEXT: vle8.v v8, (a0) 473; V-NEXT: vsetivli zero, 4, e8, mf8, ta, ma 474; V-NEXT: vnsrl.wi v8, v8, 8 475; V-NEXT: vse8.v v8, (a1) 476; V-NEXT: ret 477; 478; ZVE32F-LABEL: vnsrl_8_i8_single_src: 479; ZVE32F: # %bb.0: # %entry 480; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma 481; ZVE32F-NEXT: vle8.v v8, (a0) 482; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 483; ZVE32F-NEXT: vnsrl.wi v8, v8, 8 484; ZVE32F-NEXT: vse8.v v8, (a1) 485; ZVE32F-NEXT: ret 486entry: 487 %0 = load <8 x i8>, ptr %in, align 1 488 %shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 489 store <4 x i8> %shuffle.i5, ptr %out, align 1 490 ret void 491} 492 493define void @vnsrl_0_i8_single_wideuse(ptr %in, ptr %out) { 494; V-LABEL: vnsrl_0_i8_single_wideuse: 495; V: # %bb.0: # %entry 496; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma 497; V-NEXT: vle8.v v8, (a0) 498; V-NEXT: vsetivli zero, 4, e8, mf8, ta, ma 499; V-NEXT: vnsrl.wi v8, v8, 0 500; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma 501; V-NEXT: vse8.v v8, (a1) 502; V-NEXT: ret 503; 504; ZVE32F-LABEL: vnsrl_0_i8_single_wideuse: 505; ZVE32F: # %bb.0: # %entry 506; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma 507; ZVE32F-NEXT: vle8.v v8, (a0) 508; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 509; ZVE32F-NEXT: vnsrl.wi v8, v8, 0 510; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma 511; ZVE32F-NEXT: vse8.v v8, (a1) 512; ZVE32F-NEXT: ret 513entry: 514 %0 = load <8 x i8>, ptr %in, align 1 515 %shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef> 516 store <8 x i8> %shuffle.i5, ptr %out, align 1 517 ret void 518} 519 520; Can't match the m8 result type as the source would have to be m16 which 521; isn't a legal type. 522define void @vnsrl_0_i32_single_src_m8(ptr %in, ptr %out) { 523; V-LABEL: vnsrl_0_i32_single_src_m8: 524; V: # %bb.0: # %entry 525; V-NEXT: li a2, 64 526; V-NEXT: vsetvli zero, a2, e32, m8, ta, ma 527; V-NEXT: vle32.v v8, (a0) 528; V-NEXT: lui a0, 341 529; V-NEXT: addiw a0, a0, 1365 530; V-NEXT: vsetivli zero, 1, e64, m1, ta, ma 531; V-NEXT: vmv.s.x v16, a0 532; V-NEXT: vsetvli zero, a2, e32, m8, ta, ma 533; V-NEXT: vcompress.vm v24, v8, v16 534; V-NEXT: vse32.v v24, (a1) 535; V-NEXT: ret 536; 537; ZVE32F-LABEL: vnsrl_0_i32_single_src_m8: 538; ZVE32F: # %bb.0: # %entry 539; ZVE32F-NEXT: li a2, 64 540; ZVE32F-NEXT: vsetvli zero, a2, e32, m8, ta, ma 541; ZVE32F-NEXT: vle32.v v8, (a0) 542; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 543; ZVE32F-NEXT: vmv.v.i v16, 0 544; ZVE32F-NEXT: lui a0, 341 545; ZVE32F-NEXT: addi a0, a0, 1365 546; ZVE32F-NEXT: vsetvli zero, zero, e32, m1, tu, ma 547; ZVE32F-NEXT: vmv.s.x v16, a0 548; ZVE32F-NEXT: vsetvli zero, a2, e32, m8, ta, ma 549; ZVE32F-NEXT: vcompress.vm v24, v8, v16 550; ZVE32F-NEXT: vse32.v v24, (a1) 551; ZVE32F-NEXT: ret 552entry: 553 %0 = load <64 x i32>, ptr %in, align 4 554 %shuffle.i5 = shufflevector <64 x i32> %0, <64 x i32> poison, <64 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 555 store <64 x i32> %shuffle.i5, ptr %out, align 4 556 ret void 557} 558