1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64 -mattr=+sve2p1 < %s | FileCheck %s 3; RUN: llc -mtriple=aarch64 -mattr=+sme2 -force-streaming < %s | FileCheck %s 4 5define <vscale x 16 x i1> @pext_b(target("aarch64.svcount") %x) nounwind { 6; CHECK-LABEL: pext_b: 7; CHECK: // %bb.0: 8; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 9; CHECK-NEXT: addvl sp, sp, #-1 10; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 11; CHECK-NEXT: mov p8.b, p0.b 12; CHECK-NEXT: pext p0.b, pn8[2] 13; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 14; CHECK-NEXT: addvl sp, sp, #1 15; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 16; CHECK-NEXT: ret 17 %res = call <vscale x 16 x i1> @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount") %x, i32 2) 18 ret <vscale x 16 x i1> %res 19} 20 21define <vscale x 8 x i1> @pext_h(target("aarch64.svcount") %x) nounwind { 22; CHECK-LABEL: pext_h: 23; CHECK: // %bb.0: 24; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 25; CHECK-NEXT: addvl sp, sp, #-1 26; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 27; CHECK-NEXT: mov p8.b, p0.b 28; CHECK-NEXT: pext p0.h, pn8[2] 29; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 30; CHECK-NEXT: addvl sp, sp, #1 31; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 32; CHECK-NEXT: ret 33 %res = call <vscale x 8 x i1> @llvm.aarch64.sve.pext.nxv8i1(target("aarch64.svcount") %x, i32 2) 34 ret <vscale x 8 x i1> %res 35} 36 37define <vscale x 4 x i1> @pext_s(target("aarch64.svcount") %x) nounwind { 38; CHECK-LABEL: pext_s: 39; CHECK: // %bb.0: 40; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 41; CHECK-NEXT: addvl sp, sp, #-1 42; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 43; CHECK-NEXT: mov p8.b, p0.b 44; CHECK-NEXT: pext p0.s, pn8[2] 45; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 46; CHECK-NEXT: addvl sp, sp, #1 47; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 48; CHECK-NEXT: ret 49 %res = call <vscale x 4 x i1> @llvm.aarch64.sve.pext.nxv4i1(target("aarch64.svcount") %x, i32 2) 50 ret <vscale x 4 x i1> %res 51} 52 53define <vscale x 2 x i1> @pext_d(target("aarch64.svcount") %x) nounwind { 54; CHECK-LABEL: pext_d: 55; CHECK: // %bb.0: 56; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 57; CHECK-NEXT: addvl sp, sp, #-1 58; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 59; CHECK-NEXT: mov p8.b, p0.b 60; CHECK-NEXT: pext p0.d, pn8[2] 61; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 62; CHECK-NEXT: addvl sp, sp, #1 63; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 64; CHECK-NEXT: ret 65 %res = call <vscale x 2 x i1> @llvm.aarch64.sve.pext.nxv2i1(target("aarch64.svcount") %x, i32 2) 66 ret <vscale x 2 x i1> %res 67} 68 69declare <vscale x 16 x i1> @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount"), i32) 70declare <vscale x 8 x i1> @llvm.aarch64.sve.pext.nxv8i1(target("aarch64.svcount"), i32) 71declare <vscale x 4 x i1> @llvm.aarch64.sve.pext.nxv4i1(target("aarch64.svcount"), i32) 72declare <vscale x 2 x i1> @llvm.aarch64.sve.pext.nxv2i1(target("aarch64.svcount"), i32) 73 74define {<vscale x 16 x i1>,<vscale x 16 x i1>} @pext_x2_b(target("aarch64.svcount") %x) nounwind { 75; CHECK-LABEL: pext_x2_b: 76; CHECK: // %bb.0: 77; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 78; CHECK-NEXT: addvl sp, sp, #-1 79; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 80; CHECK-NEXT: mov p8.b, p0.b 81; CHECK-NEXT: pext { p0.b, p1.b }, pn8[1] 82; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 83; CHECK-NEXT: addvl sp, sp, #1 84; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 85; CHECK-NEXT: ret 86 %res = call {<vscale x 16 x i1>,<vscale x 16 x i1>} @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") %x, i32 1) 87 ret {<vscale x 16 x i1>,<vscale x 16 x i1>} %res 88} 89 90define {<vscale x 8 x i1>,<vscale x 8 x i1>} @pext_x2_h(target("aarch64.svcount") %x) nounwind { 91; CHECK-LABEL: pext_x2_h: 92; CHECK: // %bb.0: 93; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 94; CHECK-NEXT: addvl sp, sp, #-1 95; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 96; CHECK-NEXT: mov p8.b, p0.b 97; CHECK-NEXT: pext { p0.h, p1.h }, pn8[1] 98; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 99; CHECK-NEXT: addvl sp, sp, #1 100; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 101; CHECK-NEXT: ret 102 %res = call {<vscale x 8 x i1>,<vscale x 8 x i1>} @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") %x, i32 1) 103 ret {<vscale x 8 x i1>,<vscale x 8 x i1>} %res 104} 105 106define {<vscale x 4 x i1>,<vscale x 4 x i1>} @pext_x2_s(target("aarch64.svcount") %x) nounwind { 107; CHECK-LABEL: pext_x2_s: 108; CHECK: // %bb.0: 109; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 110; CHECK-NEXT: addvl sp, sp, #-1 111; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 112; CHECK-NEXT: mov p8.b, p0.b 113; CHECK-NEXT: pext { p0.s, p1.s }, pn8[1] 114; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 115; CHECK-NEXT: addvl sp, sp, #1 116; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 117; CHECK-NEXT: ret 118 %res = call {<vscale x 4 x i1>,<vscale x 4 x i1>} @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") %x, i32 1) 119 ret {<vscale x 4 x i1>,<vscale x 4 x i1>} %res 120} 121 122define {<vscale x 2 x i1>,<vscale x 2 x i1>} @pext_x2_d(target("aarch64.svcount") %x) nounwind { 123; CHECK-LABEL: pext_x2_d: 124; CHECK: // %bb.0: 125; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 126; CHECK-NEXT: addvl sp, sp, #-1 127; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 128; CHECK-NEXT: mov p8.b, p0.b 129; CHECK-NEXT: pext { p0.d, p1.d }, pn8[1] 130; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 131; CHECK-NEXT: addvl sp, sp, #1 132; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 133; CHECK-NEXT: ret 134 %res = call {<vscale x 2 x i1>,<vscale x 2 x i1>} @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") %x, i32 1) 135 ret {<vscale x 2 x i1>,<vscale x 2 x i1>} %res 136} 137 138declare {<vscale x 16 x i1>,<vscale x 16 x i1>} @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount"), i32) 139declare {<vscale x 8 x i1>,<vscale x 8 x i1>} @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount"), i32) 140declare {<vscale x 4 x i1>,<vscale x 4 x i1>} @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount"), i32) 141declare {<vscale x 2 x i1>,<vscale x 2 x i1>} @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount"), i32) 142 143define target("aarch64.svcount") @ptrue_b() nounwind { 144; CHECK-LABEL: ptrue_b: 145; CHECK: // %bb.0: 146; CHECK-NEXT: ptrue pn8.b 147; CHECK-NEXT: mov p0.b, p8.b 148; CHECK-NEXT: ret 149 %res = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() 150 ret target("aarch64.svcount") %res 151} 152 153define target("aarch64.svcount") @ptrue_h() nounwind { 154; CHECK-LABEL: ptrue_h: 155; CHECK: // %bb.0: 156; CHECK-NEXT: ptrue pn8.h 157; CHECK-NEXT: mov p0.b, p8.b 158; CHECK-NEXT: ret 159 %res = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c16() 160 ret target("aarch64.svcount") %res 161} 162 163define target("aarch64.svcount") @ptrue_s() nounwind { 164; CHECK-LABEL: ptrue_s: 165; CHECK: // %bb.0: 166; CHECK-NEXT: ptrue pn8.s 167; CHECK-NEXT: mov p0.b, p8.b 168; CHECK-NEXT: ret 169 %res = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c32() 170 ret target("aarch64.svcount") %res 171} 172 173define target("aarch64.svcount") @ptrue_d() nounwind { 174; CHECK-LABEL: ptrue_d: 175; CHECK: // %bb.0: 176; CHECK-NEXT: ptrue pn8.d 177; CHECK-NEXT: mov p0.b, p8.b 178; CHECK-NEXT: ret 179 %res = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c64() 180 ret target("aarch64.svcount") %res 181} 182 183declare target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() 184declare target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c16() 185declare target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c32() 186declare target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c64() 187