1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -force-streaming -verify-machineinstrs < %s | FileCheck %s --check-prefixes=STRIDED 3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CONTIGUOUS 4 5define <vscale x 32 x i8> @ldnt1_x2_i8_z0_z8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %z1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 6; STRIDED-LABEL: ldnt1_x2_i8_z0_z8: 7; STRIDED: // %bb.0: 8; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 9; STRIDED-NEXT: addvl sp, sp, #-17 10; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 11; STRIDED-NEXT: mov p8.b, p0.b 12; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 13; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 14; STRIDED-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 15; STRIDED-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 16; STRIDED-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 17; STRIDED-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 18; STRIDED-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 19; STRIDED-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 20; STRIDED-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 21; STRIDED-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 22; STRIDED-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 23; STRIDED-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 24; STRIDED-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 25; STRIDED-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 26; STRIDED-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 27; STRIDED-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill 28; STRIDED-NEXT: ldnt1b { z0.b, z8.b }, pn8/z, [x0] 29; STRIDED-NEXT: //APP 30; STRIDED-NEXT: nop 31; STRIDED-NEXT: //NO_APP 32; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 33; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 34; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 35; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 36; STRIDED-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 37; STRIDED-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 38; STRIDED-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 39; STRIDED-NEXT: mov z1.d, z8.d 40; STRIDED-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 41; STRIDED-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 42; STRIDED-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 43; STRIDED-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 44; STRIDED-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 45; STRIDED-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 46; STRIDED-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 47; STRIDED-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 48; STRIDED-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 49; STRIDED-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload 50; STRIDED-NEXT: addvl sp, sp, #17 51; STRIDED-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 52; STRIDED-NEXT: ret 53; 54; CONTIGUOUS-LABEL: ldnt1_x2_i8_z0_z8: 55; CONTIGUOUS: // %bb.0: 56; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 57; CONTIGUOUS-NEXT: addvl sp, sp, #-16 58; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 59; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 60; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 61; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 62; CONTIGUOUS-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 63; CONTIGUOUS-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 64; CONTIGUOUS-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 65; CONTIGUOUS-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 66; CONTIGUOUS-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 67; CONTIGUOUS-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 68; CONTIGUOUS-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 69; CONTIGUOUS-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 70; CONTIGUOUS-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 71; CONTIGUOUS-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 72; CONTIGUOUS-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 73; CONTIGUOUS-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 74; CONTIGUOUS-NEXT: addvl sp, sp, #-2 75; CONTIGUOUS-NEXT: mov p8.b, p0.b 76; CONTIGUOUS-NEXT: ldnt1b { z0.b, z1.b }, pn8/z, [x0] 77; CONTIGUOUS-NEXT: str z0, [sp] 78; CONTIGUOUS-NEXT: str z1, [sp, #1, mul vl] 79; CONTIGUOUS-NEXT: //APP 80; CONTIGUOUS-NEXT: nop 81; CONTIGUOUS-NEXT: //NO_APP 82; CONTIGUOUS-NEXT: ldr z0, [sp] 83; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] 84; CONTIGUOUS-NEXT: addvl sp, sp, #2 85; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 86; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 87; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 88; CONTIGUOUS-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 89; CONTIGUOUS-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 90; CONTIGUOUS-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 91; CONTIGUOUS-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 92; CONTIGUOUS-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 93; CONTIGUOUS-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 94; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 95; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 96; CONTIGUOUS-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 97; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 98; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 99; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 100; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 101; CONTIGUOUS-NEXT: addvl sp, sp, #16 102; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 103; CONTIGUOUS-NEXT: ret 104 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") %pn, ptr %ptr) 105 call void asm sideeffect "nop", "~{z1},~{z2},~{z3},~{z4},~{z5},~{z6},~{z7},~{z9},~{z10},~{z11},~{z12},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"() nounwind 106 %res.v0 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %res, 0 107 %v0 = call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> %res.v0, i64 0) 108 %res.v1 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %res, 1 109 %v1 = call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> %v0, <vscale x 16 x i8> %res.v1, i64 16) 110 ret <vscale x 32 x i8> %v1 111} 112 113define <vscale x 32 x i8> @ldnt1_x2_i8_z0_z8_scalar(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %z1, target("aarch64.svcount") %pn, ptr %ptr, i64 %index) nounwind { 114; STRIDED-LABEL: ldnt1_x2_i8_z0_z8_scalar: 115; STRIDED: // %bb.0: 116; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 117; STRIDED-NEXT: addvl sp, sp, #-17 118; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 119; STRIDED-NEXT: mov p8.b, p0.b 120; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 121; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 122; STRIDED-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 123; STRIDED-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 124; STRIDED-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 125; STRIDED-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 126; STRIDED-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 127; STRIDED-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 128; STRIDED-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 129; STRIDED-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 130; STRIDED-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 131; STRIDED-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 132; STRIDED-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 133; STRIDED-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 134; STRIDED-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 135; STRIDED-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill 136; STRIDED-NEXT: ldnt1b { z0.b, z8.b }, pn8/z, [x0, x1] 137; STRIDED-NEXT: //APP 138; STRIDED-NEXT: nop 139; STRIDED-NEXT: //NO_APP 140; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 141; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 142; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 143; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 144; STRIDED-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 145; STRIDED-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 146; STRIDED-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 147; STRIDED-NEXT: mov z1.d, z8.d 148; STRIDED-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 149; STRIDED-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 150; STRIDED-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 151; STRIDED-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 152; STRIDED-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 153; STRIDED-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 154; STRIDED-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 155; STRIDED-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 156; STRIDED-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 157; STRIDED-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload 158; STRIDED-NEXT: addvl sp, sp, #17 159; STRIDED-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 160; STRIDED-NEXT: ret 161; 162; CONTIGUOUS-LABEL: ldnt1_x2_i8_z0_z8_scalar: 163; CONTIGUOUS: // %bb.0: 164; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 165; CONTIGUOUS-NEXT: addvl sp, sp, #-16 166; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 167; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 168; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 169; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 170; CONTIGUOUS-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 171; CONTIGUOUS-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 172; CONTIGUOUS-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 173; CONTIGUOUS-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 174; CONTIGUOUS-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 175; CONTIGUOUS-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 176; CONTIGUOUS-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 177; CONTIGUOUS-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 178; CONTIGUOUS-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 179; CONTIGUOUS-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 180; CONTIGUOUS-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 181; CONTIGUOUS-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 182; CONTIGUOUS-NEXT: addvl sp, sp, #-2 183; CONTIGUOUS-NEXT: mov p8.b, p0.b 184; CONTIGUOUS-NEXT: ldnt1b { z0.b, z1.b }, pn8/z, [x0, x1] 185; CONTIGUOUS-NEXT: str z0, [sp] 186; CONTIGUOUS-NEXT: str z1, [sp, #1, mul vl] 187; CONTIGUOUS-NEXT: //APP 188; CONTIGUOUS-NEXT: nop 189; CONTIGUOUS-NEXT: //NO_APP 190; CONTIGUOUS-NEXT: ldr z0, [sp] 191; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] 192; CONTIGUOUS-NEXT: addvl sp, sp, #2 193; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 194; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 195; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 196; CONTIGUOUS-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 197; CONTIGUOUS-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 198; CONTIGUOUS-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 199; CONTIGUOUS-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 200; CONTIGUOUS-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 201; CONTIGUOUS-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 202; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 203; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 204; CONTIGUOUS-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 205; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 206; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 207; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 208; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 209; CONTIGUOUS-NEXT: addvl sp, sp, #16 210; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 211; CONTIGUOUS-NEXT: ret 212 %base = getelementptr i8, ptr %ptr, i64 %index 213 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") %pn, ptr %base) 214 call void asm sideeffect "nop", "~{z1},~{z2},~{z3},~{z4},~{z5},~{z6},~{z7},~{z9},~{z10},~{z11},~{z12},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"() nounwind 215 %res.v0 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %res, 0 216 %v0 = call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> %res.v0, i64 0) 217 %res.v1 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %res, 1 218 %v1 = call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> %v0, <vscale x 16 x i8> %res.v1, i64 16) 219 ret <vscale x 32 x i8> %v1 220} 221 222define <vscale x 16 x i16> @ldnt1_x2_i16_z0_z8(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %z1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 223; STRIDED-LABEL: ldnt1_x2_i16_z0_z8: 224; STRIDED: // %bb.0: 225; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 226; STRIDED-NEXT: addvl sp, sp, #-17 227; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 228; STRIDED-NEXT: mov p8.b, p0.b 229; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 230; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 231; STRIDED-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 232; STRIDED-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 233; STRIDED-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 234; STRIDED-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 235; STRIDED-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 236; STRIDED-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 237; STRIDED-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 238; STRIDED-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 239; STRIDED-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 240; STRIDED-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 241; STRIDED-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 242; STRIDED-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 243; STRIDED-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 244; STRIDED-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill 245; STRIDED-NEXT: ldnt1h { z0.h, z8.h }, pn8/z, [x0] 246; STRIDED-NEXT: //APP 247; STRIDED-NEXT: nop 248; STRIDED-NEXT: //NO_APP 249; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 250; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 251; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 252; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 253; STRIDED-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 254; STRIDED-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 255; STRIDED-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 256; STRIDED-NEXT: mov z1.d, z8.d 257; STRIDED-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 258; STRIDED-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 259; STRIDED-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 260; STRIDED-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 261; STRIDED-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 262; STRIDED-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 263; STRIDED-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 264; STRIDED-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 265; STRIDED-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 266; STRIDED-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload 267; STRIDED-NEXT: addvl sp, sp, #17 268; STRIDED-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 269; STRIDED-NEXT: ret 270; 271; CONTIGUOUS-LABEL: ldnt1_x2_i16_z0_z8: 272; CONTIGUOUS: // %bb.0: 273; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 274; CONTIGUOUS-NEXT: addvl sp, sp, #-16 275; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 276; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 277; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 278; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 279; CONTIGUOUS-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 280; CONTIGUOUS-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 281; CONTIGUOUS-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 282; CONTIGUOUS-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 283; CONTIGUOUS-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 284; CONTIGUOUS-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 285; CONTIGUOUS-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 286; CONTIGUOUS-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 287; CONTIGUOUS-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 288; CONTIGUOUS-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 289; CONTIGUOUS-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 290; CONTIGUOUS-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 291; CONTIGUOUS-NEXT: addvl sp, sp, #-2 292; CONTIGUOUS-NEXT: mov p8.b, p0.b 293; CONTIGUOUS-NEXT: ldnt1h { z0.h, z1.h }, pn8/z, [x0] 294; CONTIGUOUS-NEXT: str z0, [sp] 295; CONTIGUOUS-NEXT: str z1, [sp, #1, mul vl] 296; CONTIGUOUS-NEXT: //APP 297; CONTIGUOUS-NEXT: nop 298; CONTIGUOUS-NEXT: //NO_APP 299; CONTIGUOUS-NEXT: ldr z0, [sp] 300; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] 301; CONTIGUOUS-NEXT: addvl sp, sp, #2 302; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 303; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 304; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 305; CONTIGUOUS-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 306; CONTIGUOUS-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 307; CONTIGUOUS-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 308; CONTIGUOUS-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 309; CONTIGUOUS-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 310; CONTIGUOUS-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 311; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 312; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 313; CONTIGUOUS-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 314; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 315; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 316; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 317; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 318; CONTIGUOUS-NEXT: addvl sp, sp, #16 319; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 320; CONTIGUOUS-NEXT: ret 321 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") %pn, ptr %ptr) 322 call void asm sideeffect "nop", "~{z1},~{z2},~{z3},~{z4},~{z5},~{z6},~{z7},~{z9},~{z10},~{z11},~{z12},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"() nounwind 323 %res.v0 = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %res, 0 324 %v0 = call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> %res.v0, i64 0) 325 %res.v1 = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %res, 1 326 %v1 = call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> %v0, <vscale x 8 x i16> %res.v1, i64 8) 327 ret <vscale x 16 x i16> %v1 328} 329 330define <vscale x 16 x i16> @ldnt1_x2_i16_z0_z8_scalar(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %z1, target("aarch64.svcount") %pn, ptr %ptr, i64 %index) nounwind { 331; STRIDED-LABEL: ldnt1_x2_i16_z0_z8_scalar: 332; STRIDED: // %bb.0: 333; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 334; STRIDED-NEXT: addvl sp, sp, #-17 335; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 336; STRIDED-NEXT: mov p8.b, p0.b 337; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 338; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 339; STRIDED-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 340; STRIDED-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 341; STRIDED-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 342; STRIDED-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 343; STRIDED-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 344; STRIDED-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 345; STRIDED-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 346; STRIDED-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 347; STRIDED-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 348; STRIDED-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 349; STRIDED-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 350; STRIDED-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 351; STRIDED-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 352; STRIDED-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill 353; STRIDED-NEXT: ldnt1h { z0.h, z8.h }, pn8/z, [x0, x1, lsl #1] 354; STRIDED-NEXT: //APP 355; STRIDED-NEXT: nop 356; STRIDED-NEXT: //NO_APP 357; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 358; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 359; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 360; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 361; STRIDED-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 362; STRIDED-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 363; STRIDED-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 364; STRIDED-NEXT: mov z1.d, z8.d 365; STRIDED-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 366; STRIDED-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 367; STRIDED-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 368; STRIDED-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 369; STRIDED-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 370; STRIDED-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 371; STRIDED-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 372; STRIDED-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 373; STRIDED-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 374; STRIDED-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload 375; STRIDED-NEXT: addvl sp, sp, #17 376; STRIDED-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 377; STRIDED-NEXT: ret 378; 379; CONTIGUOUS-LABEL: ldnt1_x2_i16_z0_z8_scalar: 380; CONTIGUOUS: // %bb.0: 381; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 382; CONTIGUOUS-NEXT: addvl sp, sp, #-16 383; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 384; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 385; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 386; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 387; CONTIGUOUS-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 388; CONTIGUOUS-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 389; CONTIGUOUS-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 390; CONTIGUOUS-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 391; CONTIGUOUS-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 392; CONTIGUOUS-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 393; CONTIGUOUS-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 394; CONTIGUOUS-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 395; CONTIGUOUS-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 396; CONTIGUOUS-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 397; CONTIGUOUS-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 398; CONTIGUOUS-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 399; CONTIGUOUS-NEXT: addvl sp, sp, #-2 400; CONTIGUOUS-NEXT: mov p8.b, p0.b 401; CONTIGUOUS-NEXT: ldnt1h { z0.h, z1.h }, pn8/z, [x0, x1, lsl #1] 402; CONTIGUOUS-NEXT: str z0, [sp] 403; CONTIGUOUS-NEXT: str z1, [sp, #1, mul vl] 404; CONTIGUOUS-NEXT: //APP 405; CONTIGUOUS-NEXT: nop 406; CONTIGUOUS-NEXT: //NO_APP 407; CONTIGUOUS-NEXT: ldr z0, [sp] 408; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] 409; CONTIGUOUS-NEXT: addvl sp, sp, #2 410; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 411; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 412; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 413; CONTIGUOUS-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 414; CONTIGUOUS-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 415; CONTIGUOUS-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 416; CONTIGUOUS-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 417; CONTIGUOUS-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 418; CONTIGUOUS-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 419; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 420; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 421; CONTIGUOUS-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 422; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 423; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 424; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 425; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 426; CONTIGUOUS-NEXT: addvl sp, sp, #16 427; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 428; CONTIGUOUS-NEXT: ret 429 %base = getelementptr i16, ptr %ptr, i64 %index 430 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") %pn, ptr %base) 431 call void asm sideeffect "nop", "~{z1},~{z2},~{z3},~{z4},~{z5},~{z6},~{z7},~{z9},~{z10},~{z11},~{z12},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"() nounwind 432 %res.v0 = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %res, 0 433 %v0 = call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> %res.v0, i64 0) 434 %res.v1 = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %res, 1 435 %v1 = call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> %v0, <vscale x 8 x i16> %res.v1, i64 8) 436 ret <vscale x 16 x i16> %v1 437} 438 439define <vscale x 8 x i32> @ldnt1_x2_i32_z0_z8(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %z1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 440; STRIDED-LABEL: ldnt1_x2_i32_z0_z8: 441; STRIDED: // %bb.0: 442; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 443; STRIDED-NEXT: addvl sp, sp, #-17 444; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 445; STRIDED-NEXT: mov p8.b, p0.b 446; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 447; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 448; STRIDED-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 449; STRIDED-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 450; STRIDED-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 451; STRIDED-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 452; STRIDED-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 453; STRIDED-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 454; STRIDED-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 455; STRIDED-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 456; STRIDED-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 457; STRIDED-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 458; STRIDED-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 459; STRIDED-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 460; STRIDED-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 461; STRIDED-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill 462; STRIDED-NEXT: ldnt1w { z0.s, z8.s }, pn8/z, [x0] 463; STRIDED-NEXT: //APP 464; STRIDED-NEXT: nop 465; STRIDED-NEXT: //NO_APP 466; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 467; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 468; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 469; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 470; STRIDED-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 471; STRIDED-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 472; STRIDED-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 473; STRIDED-NEXT: mov z1.d, z8.d 474; STRIDED-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 475; STRIDED-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 476; STRIDED-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 477; STRIDED-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 478; STRIDED-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 479; STRIDED-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 480; STRIDED-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 481; STRIDED-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 482; STRIDED-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 483; STRIDED-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload 484; STRIDED-NEXT: addvl sp, sp, #17 485; STRIDED-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 486; STRIDED-NEXT: ret 487; 488; CONTIGUOUS-LABEL: ldnt1_x2_i32_z0_z8: 489; CONTIGUOUS: // %bb.0: 490; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 491; CONTIGUOUS-NEXT: addvl sp, sp, #-16 492; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 493; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 494; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 495; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 496; CONTIGUOUS-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 497; CONTIGUOUS-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 498; CONTIGUOUS-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 499; CONTIGUOUS-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 500; CONTIGUOUS-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 501; CONTIGUOUS-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 502; CONTIGUOUS-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 503; CONTIGUOUS-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 504; CONTIGUOUS-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 505; CONTIGUOUS-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 506; CONTIGUOUS-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 507; CONTIGUOUS-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 508; CONTIGUOUS-NEXT: addvl sp, sp, #-2 509; CONTIGUOUS-NEXT: mov p8.b, p0.b 510; CONTIGUOUS-NEXT: ldnt1w { z0.s, z1.s }, pn8/z, [x0] 511; CONTIGUOUS-NEXT: str z0, [sp] 512; CONTIGUOUS-NEXT: str z1, [sp, #1, mul vl] 513; CONTIGUOUS-NEXT: //APP 514; CONTIGUOUS-NEXT: nop 515; CONTIGUOUS-NEXT: //NO_APP 516; CONTIGUOUS-NEXT: ldr z0, [sp] 517; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] 518; CONTIGUOUS-NEXT: addvl sp, sp, #2 519; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 520; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 521; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 522; CONTIGUOUS-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 523; CONTIGUOUS-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 524; CONTIGUOUS-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 525; CONTIGUOUS-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 526; CONTIGUOUS-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 527; CONTIGUOUS-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 528; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 529; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 530; CONTIGUOUS-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 531; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 532; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 533; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 534; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 535; CONTIGUOUS-NEXT: addvl sp, sp, #16 536; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 537; CONTIGUOUS-NEXT: ret 538 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") %pn, ptr %ptr) 539 call void asm sideeffect "nop", "~{z1},~{z2},~{z3},~{z4},~{z5},~{z6},~{z7},~{z9},~{z10},~{z11},~{z12},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"() nounwind 540 %res.v0 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %res, 0 541 %v0 = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> %res.v0, i64 0) 542 %res.v1 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %res, 1 543 %v1 = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> %v0, <vscale x 4 x i32> %res.v1, i64 4) 544 ret <vscale x 8 x i32> %v1 545} 546 547define <vscale x 8 x i32> @ldnt1_x2_i32_z0_z8_scalar(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %z1, target("aarch64.svcount") %pn, ptr %ptr, i64 %index) nounwind { 548; STRIDED-LABEL: ldnt1_x2_i32_z0_z8_scalar: 549; STRIDED: // %bb.0: 550; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 551; STRIDED-NEXT: addvl sp, sp, #-17 552; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 553; STRIDED-NEXT: mov p8.b, p0.b 554; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 555; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 556; STRIDED-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 557; STRIDED-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 558; STRIDED-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 559; STRIDED-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 560; STRIDED-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 561; STRIDED-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 562; STRIDED-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 563; STRIDED-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 564; STRIDED-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 565; STRIDED-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 566; STRIDED-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 567; STRIDED-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 568; STRIDED-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 569; STRIDED-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill 570; STRIDED-NEXT: ldnt1w { z0.s, z8.s }, pn8/z, [x0, x1, lsl #2] 571; STRIDED-NEXT: //APP 572; STRIDED-NEXT: nop 573; STRIDED-NEXT: //NO_APP 574; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 575; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 576; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 577; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 578; STRIDED-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 579; STRIDED-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 580; STRIDED-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 581; STRIDED-NEXT: mov z1.d, z8.d 582; STRIDED-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 583; STRIDED-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 584; STRIDED-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 585; STRIDED-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 586; STRIDED-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 587; STRIDED-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 588; STRIDED-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 589; STRIDED-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 590; STRIDED-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 591; STRIDED-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload 592; STRIDED-NEXT: addvl sp, sp, #17 593; STRIDED-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 594; STRIDED-NEXT: ret 595; 596; CONTIGUOUS-LABEL: ldnt1_x2_i32_z0_z8_scalar: 597; CONTIGUOUS: // %bb.0: 598; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 599; CONTIGUOUS-NEXT: addvl sp, sp, #-16 600; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 601; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 602; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 603; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 604; CONTIGUOUS-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 605; CONTIGUOUS-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 606; CONTIGUOUS-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 607; CONTIGUOUS-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 608; CONTIGUOUS-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 609; CONTIGUOUS-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 610; CONTIGUOUS-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 611; CONTIGUOUS-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 612; CONTIGUOUS-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 613; CONTIGUOUS-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 614; CONTIGUOUS-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 615; CONTIGUOUS-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 616; CONTIGUOUS-NEXT: addvl sp, sp, #-2 617; CONTIGUOUS-NEXT: mov p8.b, p0.b 618; CONTIGUOUS-NEXT: ldnt1w { z0.s, z1.s }, pn8/z, [x0, x1, lsl #2] 619; CONTIGUOUS-NEXT: str z0, [sp] 620; CONTIGUOUS-NEXT: str z1, [sp, #1, mul vl] 621; CONTIGUOUS-NEXT: //APP 622; CONTIGUOUS-NEXT: nop 623; CONTIGUOUS-NEXT: //NO_APP 624; CONTIGUOUS-NEXT: ldr z0, [sp] 625; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] 626; CONTIGUOUS-NEXT: addvl sp, sp, #2 627; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 628; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 629; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 630; CONTIGUOUS-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 631; CONTIGUOUS-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 632; CONTIGUOUS-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 633; CONTIGUOUS-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 634; CONTIGUOUS-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 635; CONTIGUOUS-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 636; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 637; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 638; CONTIGUOUS-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 639; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 640; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 641; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 642; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 643; CONTIGUOUS-NEXT: addvl sp, sp, #16 644; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 645; CONTIGUOUS-NEXT: ret 646 %base = getelementptr i32, ptr %ptr, i64 %index 647 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") %pn, ptr %base) 648 call void asm sideeffect "nop", "~{z1},~{z2},~{z3},~{z4},~{z5},~{z6},~{z7},~{z9},~{z10},~{z11},~{z12},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"() nounwind 649 %res.v0 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %res, 0 650 %v0 = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> %res.v0, i64 0) 651 %res.v1 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %res, 1 652 %v1 = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> %v0, <vscale x 4 x i32> %res.v1, i64 4) 653 ret <vscale x 8 x i32> %v1 654} 655 656define <vscale x 4 x i64> @ldnt1_x2_i64_z0_z8(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %z1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 657; STRIDED-LABEL: ldnt1_x2_i64_z0_z8: 658; STRIDED: // %bb.0: 659; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 660; STRIDED-NEXT: addvl sp, sp, #-17 661; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 662; STRIDED-NEXT: mov p8.b, p0.b 663; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 664; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 665; STRIDED-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 666; STRIDED-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 667; STRIDED-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 668; STRIDED-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 669; STRIDED-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 670; STRIDED-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 671; STRIDED-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 672; STRIDED-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 673; STRIDED-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 674; STRIDED-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 675; STRIDED-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 676; STRIDED-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 677; STRIDED-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 678; STRIDED-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill 679; STRIDED-NEXT: ldnt1d { z0.d, z8.d }, pn8/z, [x0] 680; STRIDED-NEXT: //APP 681; STRIDED-NEXT: nop 682; STRIDED-NEXT: //NO_APP 683; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 684; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 685; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 686; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 687; STRIDED-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 688; STRIDED-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 689; STRIDED-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 690; STRIDED-NEXT: mov z1.d, z8.d 691; STRIDED-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 692; STRIDED-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 693; STRIDED-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 694; STRIDED-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 695; STRIDED-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 696; STRIDED-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 697; STRIDED-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 698; STRIDED-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 699; STRIDED-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 700; STRIDED-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload 701; STRIDED-NEXT: addvl sp, sp, #17 702; STRIDED-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 703; STRIDED-NEXT: ret 704; 705; CONTIGUOUS-LABEL: ldnt1_x2_i64_z0_z8: 706; CONTIGUOUS: // %bb.0: 707; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 708; CONTIGUOUS-NEXT: addvl sp, sp, #-16 709; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 710; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 711; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 712; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 713; CONTIGUOUS-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 714; CONTIGUOUS-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 715; CONTIGUOUS-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 716; CONTIGUOUS-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 717; CONTIGUOUS-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 718; CONTIGUOUS-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 719; CONTIGUOUS-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 720; CONTIGUOUS-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 721; CONTIGUOUS-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 722; CONTIGUOUS-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 723; CONTIGUOUS-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 724; CONTIGUOUS-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 725; CONTIGUOUS-NEXT: addvl sp, sp, #-2 726; CONTIGUOUS-NEXT: mov p8.b, p0.b 727; CONTIGUOUS-NEXT: ldnt1d { z0.d, z1.d }, pn8/z, [x0] 728; CONTIGUOUS-NEXT: str z0, [sp] 729; CONTIGUOUS-NEXT: str z1, [sp, #1, mul vl] 730; CONTIGUOUS-NEXT: //APP 731; CONTIGUOUS-NEXT: nop 732; CONTIGUOUS-NEXT: //NO_APP 733; CONTIGUOUS-NEXT: ldr z0, [sp] 734; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] 735; CONTIGUOUS-NEXT: addvl sp, sp, #2 736; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 737; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 738; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 739; CONTIGUOUS-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 740; CONTIGUOUS-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 741; CONTIGUOUS-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 742; CONTIGUOUS-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 743; CONTIGUOUS-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 744; CONTIGUOUS-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 745; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 746; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 747; CONTIGUOUS-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 748; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 749; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 750; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 751; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 752; CONTIGUOUS-NEXT: addvl sp, sp, #16 753; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 754; CONTIGUOUS-NEXT: ret 755 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") %pn, ptr %ptr) 756 call void asm sideeffect "nop", "~{z1},~{z2},~{z3},~{z4},~{z5},~{z6},~{z7},~{z9},~{z10},~{z11},~{z12},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"() nounwind 757 %res.v0 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %res, 0 758 %v0 = call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> %res.v0, i64 0) 759 %res.v1 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %res, 1 760 %v1 = call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> %v0, <vscale x 2 x i64> %res.v1, i64 2) 761 ret <vscale x 4 x i64> %v1 762} 763 764define <vscale x 4 x i64> @ldnt1_x2_i64_z0_z8_scalar(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %z1, target("aarch64.svcount") %pn, ptr %ptr, i64 %index) nounwind { 765; STRIDED-LABEL: ldnt1_x2_i64_z0_z8_scalar: 766; STRIDED: // %bb.0: 767; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 768; STRIDED-NEXT: addvl sp, sp, #-17 769; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 770; STRIDED-NEXT: mov p8.b, p0.b 771; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 772; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 773; STRIDED-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 774; STRIDED-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 775; STRIDED-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 776; STRIDED-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 777; STRIDED-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 778; STRIDED-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 779; STRIDED-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 780; STRIDED-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 781; STRIDED-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 782; STRIDED-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 783; STRIDED-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 784; STRIDED-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 785; STRIDED-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 786; STRIDED-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill 787; STRIDED-NEXT: ldnt1d { z0.d, z8.d }, pn8/z, [x0, x1, lsl #3] 788; STRIDED-NEXT: //APP 789; STRIDED-NEXT: nop 790; STRIDED-NEXT: //NO_APP 791; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 792; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 793; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 794; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 795; STRIDED-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 796; STRIDED-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 797; STRIDED-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 798; STRIDED-NEXT: mov z1.d, z8.d 799; STRIDED-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 800; STRIDED-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 801; STRIDED-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 802; STRIDED-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 803; STRIDED-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 804; STRIDED-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 805; STRIDED-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 806; STRIDED-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 807; STRIDED-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 808; STRIDED-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload 809; STRIDED-NEXT: addvl sp, sp, #17 810; STRIDED-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 811; STRIDED-NEXT: ret 812; 813; CONTIGUOUS-LABEL: ldnt1_x2_i64_z0_z8_scalar: 814; CONTIGUOUS: // %bb.0: 815; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 816; CONTIGUOUS-NEXT: addvl sp, sp, #-16 817; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 818; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 819; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 820; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 821; CONTIGUOUS-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 822; CONTIGUOUS-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 823; CONTIGUOUS-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 824; CONTIGUOUS-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 825; CONTIGUOUS-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 826; CONTIGUOUS-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 827; CONTIGUOUS-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 828; CONTIGUOUS-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 829; CONTIGUOUS-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 830; CONTIGUOUS-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 831; CONTIGUOUS-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 832; CONTIGUOUS-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 833; CONTIGUOUS-NEXT: addvl sp, sp, #-2 834; CONTIGUOUS-NEXT: mov p8.b, p0.b 835; CONTIGUOUS-NEXT: ldnt1d { z0.d, z1.d }, pn8/z, [x0, x1, lsl #3] 836; CONTIGUOUS-NEXT: str z0, [sp] 837; CONTIGUOUS-NEXT: str z1, [sp, #1, mul vl] 838; CONTIGUOUS-NEXT: //APP 839; CONTIGUOUS-NEXT: nop 840; CONTIGUOUS-NEXT: //NO_APP 841; CONTIGUOUS-NEXT: ldr z0, [sp] 842; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] 843; CONTIGUOUS-NEXT: addvl sp, sp, #2 844; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 845; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 846; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 847; CONTIGUOUS-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 848; CONTIGUOUS-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 849; CONTIGUOUS-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 850; CONTIGUOUS-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 851; CONTIGUOUS-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 852; CONTIGUOUS-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 853; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 854; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 855; CONTIGUOUS-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 856; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 857; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 858; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 859; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 860; CONTIGUOUS-NEXT: addvl sp, sp, #16 861; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 862; CONTIGUOUS-NEXT: ret 863 %base = getelementptr i64, ptr %ptr, i64 %index 864 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") %pn, ptr %base) 865 call void asm sideeffect "nop", "~{z1},~{z2},~{z3},~{z4},~{z5},~{z6},~{z7},~{z9},~{z10},~{z11},~{z12},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"() nounwind 866 %res.v0 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %res, 0 867 %v0 = call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> %res.v0, i64 0) 868 %res.v1 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %res, 1 869 %v1 = call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> %v0, <vscale x 2 x i64> %res.v1, i64 2) 870 ret <vscale x 4 x i64> %v1 871} 872 873define <vscale x 64 x i8> @ldnt1_x4_i8_z0_z4_z8_z12(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %z1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 874; STRIDED-LABEL: ldnt1_x4_i8_z0_z4_z8_z12: 875; STRIDED: // %bb.0: 876; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 877; STRIDED-NEXT: addvl sp, sp, #-17 878; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 879; STRIDED-NEXT: mov p8.b, p0.b 880; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 881; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 882; STRIDED-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 883; STRIDED-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 884; STRIDED-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 885; STRIDED-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 886; STRIDED-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 887; STRIDED-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 888; STRIDED-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 889; STRIDED-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 890; STRIDED-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 891; STRIDED-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 892; STRIDED-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 893; STRIDED-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 894; STRIDED-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 895; STRIDED-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill 896; STRIDED-NEXT: ldnt1b { z0.b, z4.b, z8.b, z12.b }, pn8/z, [x0] 897; STRIDED-NEXT: //APP 898; STRIDED-NEXT: nop 899; STRIDED-NEXT: //NO_APP 900; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 901; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 902; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 903; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 904; STRIDED-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 905; STRIDED-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 906; STRIDED-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 907; STRIDED-NEXT: mov z2.d, z8.d 908; STRIDED-NEXT: mov z3.d, z12.d 909; STRIDED-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 910; STRIDED-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 911; STRIDED-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 912; STRIDED-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 913; STRIDED-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 914; STRIDED-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 915; STRIDED-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 916; STRIDED-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 917; STRIDED-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 918; STRIDED-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload 919; STRIDED-NEXT: mov z1.d, z4.d 920; STRIDED-NEXT: addvl sp, sp, #17 921; STRIDED-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 922; STRIDED-NEXT: ret 923; 924; CONTIGUOUS-LABEL: ldnt1_x4_i8_z0_z4_z8_z12: 925; CONTIGUOUS: // %bb.0: 926; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 927; CONTIGUOUS-NEXT: addvl sp, sp, #-15 928; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 929; CONTIGUOUS-NEXT: ptrue pn8.b 930; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 931; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill 932; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 933; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 934; CONTIGUOUS-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 935; CONTIGUOUS-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 936; CONTIGUOUS-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 937; CONTIGUOUS-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 938; CONTIGUOUS-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 939; CONTIGUOUS-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 940; CONTIGUOUS-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 941; CONTIGUOUS-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 942; CONTIGUOUS-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill 943; CONTIGUOUS-NEXT: addvl sp, sp, #-4 944; CONTIGUOUS-NEXT: mov p8.b, p0.b 945; CONTIGUOUS-NEXT: ldnt1b { z0.b - z3.b }, pn8/z, [x0] 946; CONTIGUOUS-NEXT: str z0, [sp] 947; CONTIGUOUS-NEXT: str z1, [sp, #1, mul vl] 948; CONTIGUOUS-NEXT: str z2, [sp, #2, mul vl] 949; CONTIGUOUS-NEXT: str z3, [sp, #3, mul vl] 950; CONTIGUOUS-NEXT: //APP 951; CONTIGUOUS-NEXT: nop 952; CONTIGUOUS-NEXT: //NO_APP 953; CONTIGUOUS-NEXT: ldr z0, [sp] 954; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] 955; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] 956; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] 957; CONTIGUOUS-NEXT: addvl sp, sp, #4 958; CONTIGUOUS-NEXT: ptrue pn8.b 959; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 960; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 961; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 962; CONTIGUOUS-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 963; CONTIGUOUS-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload 964; CONTIGUOUS-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 965; CONTIGUOUS-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 966; CONTIGUOUS-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 967; CONTIGUOUS-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 968; CONTIGUOUS-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 969; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 970; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 971; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload 972; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 973; CONTIGUOUS-NEXT: addvl sp, sp, #15 974; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 975; CONTIGUOUS-NEXT: ret 976 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") %pn, ptr %ptr) 977 call void asm sideeffect "nop", "~{z1},~{z2},~{z3},~{z5},~{z6},~{z7},~{z9},~{z10},~{z11},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"() nounwind 978 %res.v0 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res, 0 979 %v0 = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> %res.v0, i64 0) 980 %res.v1 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res, 1 981 %v1 = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> %v0, <vscale x 16 x i8> %res.v1, i64 16) 982 %res.v2 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res, 2 983 %v2 = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> %v1, <vscale x 16 x i8> %res.v2, i64 32) 984 %res.v3 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res, 3 985 %v3 = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> %v2, <vscale x 16 x i8> %res.v3, i64 48) 986 ret <vscale x 64 x i8> %v3 987} 988 989define <vscale x 64 x i8> @ldnt1_x4_i8_z0_z4_z8_z12_scalar(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %z1, target("aarch64.svcount") %pn, ptr %ptr, i64 %index) nounwind { 990; STRIDED-LABEL: ldnt1_x4_i8_z0_z4_z8_z12_scalar: 991; STRIDED: // %bb.0: 992; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 993; STRIDED-NEXT: addvl sp, sp, #-17 994; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 995; STRIDED-NEXT: mov p8.b, p0.b 996; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 997; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 998; STRIDED-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 999; STRIDED-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 1000; STRIDED-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 1001; STRIDED-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 1002; STRIDED-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 1003; STRIDED-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 1004; STRIDED-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 1005; STRIDED-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 1006; STRIDED-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 1007; STRIDED-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 1008; STRIDED-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 1009; STRIDED-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 1010; STRIDED-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 1011; STRIDED-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill 1012; STRIDED-NEXT: ldnt1b { z0.b, z4.b, z8.b, z12.b }, pn8/z, [x0, x1] 1013; STRIDED-NEXT: //APP 1014; STRIDED-NEXT: nop 1015; STRIDED-NEXT: //NO_APP 1016; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 1017; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 1018; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 1019; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 1020; STRIDED-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 1021; STRIDED-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 1022; STRIDED-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 1023; STRIDED-NEXT: mov z2.d, z8.d 1024; STRIDED-NEXT: mov z3.d, z12.d 1025; STRIDED-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 1026; STRIDED-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 1027; STRIDED-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 1028; STRIDED-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 1029; STRIDED-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 1030; STRIDED-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 1031; STRIDED-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 1032; STRIDED-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 1033; STRIDED-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 1034; STRIDED-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload 1035; STRIDED-NEXT: mov z1.d, z4.d 1036; STRIDED-NEXT: addvl sp, sp, #17 1037; STRIDED-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1038; STRIDED-NEXT: ret 1039; 1040; CONTIGUOUS-LABEL: ldnt1_x4_i8_z0_z4_z8_z12_scalar: 1041; CONTIGUOUS: // %bb.0: 1042; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1043; CONTIGUOUS-NEXT: addvl sp, sp, #-15 1044; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 1045; CONTIGUOUS-NEXT: ptrue pn8.b 1046; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 1047; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill 1048; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 1049; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 1050; CONTIGUOUS-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 1051; CONTIGUOUS-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 1052; CONTIGUOUS-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 1053; CONTIGUOUS-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 1054; CONTIGUOUS-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 1055; CONTIGUOUS-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 1056; CONTIGUOUS-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 1057; CONTIGUOUS-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 1058; CONTIGUOUS-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill 1059; CONTIGUOUS-NEXT: addvl sp, sp, #-4 1060; CONTIGUOUS-NEXT: mov p8.b, p0.b 1061; CONTIGUOUS-NEXT: ldnt1b { z0.b - z3.b }, pn8/z, [x0, x1] 1062; CONTIGUOUS-NEXT: str z0, [sp] 1063; CONTIGUOUS-NEXT: str z1, [sp, #1, mul vl] 1064; CONTIGUOUS-NEXT: str z2, [sp, #2, mul vl] 1065; CONTIGUOUS-NEXT: str z3, [sp, #3, mul vl] 1066; CONTIGUOUS-NEXT: //APP 1067; CONTIGUOUS-NEXT: nop 1068; CONTIGUOUS-NEXT: //NO_APP 1069; CONTIGUOUS-NEXT: ldr z0, [sp] 1070; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] 1071; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] 1072; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] 1073; CONTIGUOUS-NEXT: addvl sp, sp, #4 1074; CONTIGUOUS-NEXT: ptrue pn8.b 1075; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 1076; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 1077; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 1078; CONTIGUOUS-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 1079; CONTIGUOUS-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload 1080; CONTIGUOUS-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 1081; CONTIGUOUS-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 1082; CONTIGUOUS-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 1083; CONTIGUOUS-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 1084; CONTIGUOUS-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 1085; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 1086; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 1087; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload 1088; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 1089; CONTIGUOUS-NEXT: addvl sp, sp, #15 1090; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1091; CONTIGUOUS-NEXT: ret 1092 %base = getelementptr i8, ptr %ptr, i64 %index 1093 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") %pn, ptr %base) 1094 call void asm sideeffect "nop", "~{z1},~{z2},~{z3},~{z5},~{z6},~{z7},~{z9},~{z10},~{z11},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"() nounwind 1095 %res.v0 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res, 0 1096 %v0 = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> %res.v0, i64 0) 1097 %res.v1 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res, 1 1098 %v1 = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> %v0, <vscale x 16 x i8> %res.v1, i64 16) 1099 %res.v2 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res, 2 1100 %v2 = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> %v1, <vscale x 16 x i8> %res.v2, i64 32) 1101 %res.v3 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res, 3 1102 %v3 = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> %v2, <vscale x 16 x i8> %res.v3, i64 48) 1103 ret <vscale x 64 x i8> %v3 1104} 1105 1106define <vscale x 32 x i16> @ldnt1_x4_i16_z0_z4_z8_z12(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %z1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 1107; STRIDED-LABEL: ldnt1_x4_i16_z0_z4_z8_z12: 1108; STRIDED: // %bb.0: 1109; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1110; STRIDED-NEXT: addvl sp, sp, #-17 1111; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 1112; STRIDED-NEXT: mov p8.b, p0.b 1113; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 1114; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 1115; STRIDED-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 1116; STRIDED-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 1117; STRIDED-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 1118; STRIDED-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 1119; STRIDED-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 1120; STRIDED-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 1121; STRIDED-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 1122; STRIDED-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 1123; STRIDED-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 1124; STRIDED-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 1125; STRIDED-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 1126; STRIDED-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 1127; STRIDED-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 1128; STRIDED-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill 1129; STRIDED-NEXT: ldnt1h { z0.h, z4.h, z8.h, z12.h }, pn8/z, [x0] 1130; STRIDED-NEXT: //APP 1131; STRIDED-NEXT: nop 1132; STRIDED-NEXT: //NO_APP 1133; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 1134; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 1135; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 1136; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 1137; STRIDED-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 1138; STRIDED-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 1139; STRIDED-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 1140; STRIDED-NEXT: mov z2.d, z8.d 1141; STRIDED-NEXT: mov z3.d, z12.d 1142; STRIDED-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 1143; STRIDED-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 1144; STRIDED-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 1145; STRIDED-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 1146; STRIDED-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 1147; STRIDED-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 1148; STRIDED-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 1149; STRIDED-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 1150; STRIDED-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 1151; STRIDED-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload 1152; STRIDED-NEXT: mov z1.d, z4.d 1153; STRIDED-NEXT: addvl sp, sp, #17 1154; STRIDED-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1155; STRIDED-NEXT: ret 1156; 1157; CONTIGUOUS-LABEL: ldnt1_x4_i16_z0_z4_z8_z12: 1158; CONTIGUOUS: // %bb.0: 1159; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1160; CONTIGUOUS-NEXT: addvl sp, sp, #-15 1161; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 1162; CONTIGUOUS-NEXT: ptrue pn8.b 1163; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 1164; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill 1165; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 1166; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 1167; CONTIGUOUS-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 1168; CONTIGUOUS-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 1169; CONTIGUOUS-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 1170; CONTIGUOUS-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 1171; CONTIGUOUS-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 1172; CONTIGUOUS-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 1173; CONTIGUOUS-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 1174; CONTIGUOUS-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 1175; CONTIGUOUS-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill 1176; CONTIGUOUS-NEXT: addvl sp, sp, #-4 1177; CONTIGUOUS-NEXT: mov p8.b, p0.b 1178; CONTIGUOUS-NEXT: ldnt1h { z0.h - z3.h }, pn8/z, [x0] 1179; CONTIGUOUS-NEXT: str z0, [sp] 1180; CONTIGUOUS-NEXT: str z1, [sp, #1, mul vl] 1181; CONTIGUOUS-NEXT: str z2, [sp, #2, mul vl] 1182; CONTIGUOUS-NEXT: str z3, [sp, #3, mul vl] 1183; CONTIGUOUS-NEXT: //APP 1184; CONTIGUOUS-NEXT: nop 1185; CONTIGUOUS-NEXT: //NO_APP 1186; CONTIGUOUS-NEXT: ldr z0, [sp] 1187; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] 1188; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] 1189; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] 1190; CONTIGUOUS-NEXT: addvl sp, sp, #4 1191; CONTIGUOUS-NEXT: ptrue pn8.b 1192; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 1193; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 1194; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 1195; CONTIGUOUS-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 1196; CONTIGUOUS-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload 1197; CONTIGUOUS-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 1198; CONTIGUOUS-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 1199; CONTIGUOUS-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 1200; CONTIGUOUS-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 1201; CONTIGUOUS-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 1202; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 1203; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 1204; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload 1205; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 1206; CONTIGUOUS-NEXT: addvl sp, sp, #15 1207; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1208; CONTIGUOUS-NEXT: ret 1209 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") %pn, ptr %ptr) 1210 call void asm sideeffect "nop", "~{z1},~{z2},~{z3},~{z5},~{z6},~{z7},~{z9},~{z10},~{z11},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"() nounwind 1211 %res.v0 = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res, 0 1212 %v0 = call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> %res.v0, i64 0) 1213 %res.v1 = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res, 1 1214 %v1 = call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> %v0, <vscale x 8 x i16> %res.v1, i64 8) 1215 %res.v2 = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res, 2 1216 %v2 = call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> %v1, <vscale x 8 x i16> %res.v2, i64 16) 1217 %res.v3 = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res, 3 1218 %v3 = call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> %v2, <vscale x 8 x i16> %res.v3, i64 24) 1219 ret <vscale x 32 x i16> %v3 1220} 1221 1222define <vscale x 32 x i16> @ldnt1_x4_i16_z0_z4_z8_z12_scalar(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %z1, target("aarch64.svcount") %pn, ptr %ptr, i64 %index) nounwind { 1223; STRIDED-LABEL: ldnt1_x4_i16_z0_z4_z8_z12_scalar: 1224; STRIDED: // %bb.0: 1225; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1226; STRIDED-NEXT: addvl sp, sp, #-17 1227; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 1228; STRIDED-NEXT: mov p8.b, p0.b 1229; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 1230; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 1231; STRIDED-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 1232; STRIDED-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 1233; STRIDED-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 1234; STRIDED-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 1235; STRIDED-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 1236; STRIDED-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 1237; STRIDED-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 1238; STRIDED-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 1239; STRIDED-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 1240; STRIDED-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 1241; STRIDED-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 1242; STRIDED-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 1243; STRIDED-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 1244; STRIDED-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill 1245; STRIDED-NEXT: ldnt1h { z0.h, z4.h, z8.h, z12.h }, pn8/z, [x0, x1, lsl #1] 1246; STRIDED-NEXT: //APP 1247; STRIDED-NEXT: nop 1248; STRIDED-NEXT: //NO_APP 1249; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 1250; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 1251; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 1252; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 1253; STRIDED-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 1254; STRIDED-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 1255; STRIDED-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 1256; STRIDED-NEXT: mov z2.d, z8.d 1257; STRIDED-NEXT: mov z3.d, z12.d 1258; STRIDED-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 1259; STRIDED-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 1260; STRIDED-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 1261; STRIDED-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 1262; STRIDED-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 1263; STRIDED-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 1264; STRIDED-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 1265; STRIDED-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 1266; STRIDED-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 1267; STRIDED-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload 1268; STRIDED-NEXT: mov z1.d, z4.d 1269; STRIDED-NEXT: addvl sp, sp, #17 1270; STRIDED-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1271; STRIDED-NEXT: ret 1272; 1273; CONTIGUOUS-LABEL: ldnt1_x4_i16_z0_z4_z8_z12_scalar: 1274; CONTIGUOUS: // %bb.0: 1275; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1276; CONTIGUOUS-NEXT: addvl sp, sp, #-15 1277; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 1278; CONTIGUOUS-NEXT: ptrue pn8.b 1279; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 1280; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill 1281; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 1282; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 1283; CONTIGUOUS-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 1284; CONTIGUOUS-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 1285; CONTIGUOUS-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 1286; CONTIGUOUS-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 1287; CONTIGUOUS-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 1288; CONTIGUOUS-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 1289; CONTIGUOUS-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 1290; CONTIGUOUS-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 1291; CONTIGUOUS-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill 1292; CONTIGUOUS-NEXT: addvl sp, sp, #-4 1293; CONTIGUOUS-NEXT: mov p8.b, p0.b 1294; CONTIGUOUS-NEXT: ldnt1h { z0.h - z3.h }, pn8/z, [x0, x1, lsl #1] 1295; CONTIGUOUS-NEXT: str z0, [sp] 1296; CONTIGUOUS-NEXT: str z1, [sp, #1, mul vl] 1297; CONTIGUOUS-NEXT: str z2, [sp, #2, mul vl] 1298; CONTIGUOUS-NEXT: str z3, [sp, #3, mul vl] 1299; CONTIGUOUS-NEXT: //APP 1300; CONTIGUOUS-NEXT: nop 1301; CONTIGUOUS-NEXT: //NO_APP 1302; CONTIGUOUS-NEXT: ldr z0, [sp] 1303; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] 1304; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] 1305; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] 1306; CONTIGUOUS-NEXT: addvl sp, sp, #4 1307; CONTIGUOUS-NEXT: ptrue pn8.b 1308; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 1309; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 1310; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 1311; CONTIGUOUS-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 1312; CONTIGUOUS-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload 1313; CONTIGUOUS-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 1314; CONTIGUOUS-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 1315; CONTIGUOUS-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 1316; CONTIGUOUS-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 1317; CONTIGUOUS-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 1318; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 1319; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 1320; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload 1321; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 1322; CONTIGUOUS-NEXT: addvl sp, sp, #15 1323; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1324; CONTIGUOUS-NEXT: ret 1325 %base = getelementptr i16, ptr %ptr, i64 %index 1326 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") %pn, ptr %base) 1327 call void asm sideeffect "nop", "~{z1},~{z2},~{z3},~{z5},~{z6},~{z7},~{z9},~{z10},~{z11},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"() nounwind 1328 %res.v0 = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res, 0 1329 %v0 = call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> %res.v0, i64 0) 1330 %res.v1 = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res, 1 1331 %v1 = call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> %v0, <vscale x 8 x i16> %res.v1, i64 8) 1332 %res.v2 = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res, 2 1333 %v2 = call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> %v1, <vscale x 8 x i16> %res.v2, i64 16) 1334 %res.v3 = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res, 3 1335 %v3 = call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> %v2, <vscale x 8 x i16> %res.v3, i64 24) 1336 ret <vscale x 32 x i16> %v3 1337} 1338 1339define <vscale x 16 x i32> @ldnt1_x4_i32_z0_z4_z8_z12(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %z1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 1340; STRIDED-LABEL: ldnt1_x4_i32_z0_z4_z8_z12: 1341; STRIDED: // %bb.0: 1342; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1343; STRIDED-NEXT: addvl sp, sp, #-17 1344; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 1345; STRIDED-NEXT: mov p8.b, p0.b 1346; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 1347; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 1348; STRIDED-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 1349; STRIDED-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 1350; STRIDED-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 1351; STRIDED-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 1352; STRIDED-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 1353; STRIDED-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 1354; STRIDED-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 1355; STRIDED-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 1356; STRIDED-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 1357; STRIDED-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 1358; STRIDED-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 1359; STRIDED-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 1360; STRIDED-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 1361; STRIDED-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill 1362; STRIDED-NEXT: ldnt1w { z0.s, z4.s, z8.s, z12.s }, pn8/z, [x0] 1363; STRIDED-NEXT: //APP 1364; STRIDED-NEXT: nop 1365; STRIDED-NEXT: //NO_APP 1366; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 1367; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 1368; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 1369; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 1370; STRIDED-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 1371; STRIDED-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 1372; STRIDED-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 1373; STRIDED-NEXT: mov z2.d, z8.d 1374; STRIDED-NEXT: mov z3.d, z12.d 1375; STRIDED-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 1376; STRIDED-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 1377; STRIDED-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 1378; STRIDED-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 1379; STRIDED-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 1380; STRIDED-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 1381; STRIDED-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 1382; STRIDED-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 1383; STRIDED-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 1384; STRIDED-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload 1385; STRIDED-NEXT: mov z1.d, z4.d 1386; STRIDED-NEXT: addvl sp, sp, #17 1387; STRIDED-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1388; STRIDED-NEXT: ret 1389; 1390; CONTIGUOUS-LABEL: ldnt1_x4_i32_z0_z4_z8_z12: 1391; CONTIGUOUS: // %bb.0: 1392; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1393; CONTIGUOUS-NEXT: addvl sp, sp, #-15 1394; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 1395; CONTIGUOUS-NEXT: ptrue pn8.b 1396; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 1397; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill 1398; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 1399; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 1400; CONTIGUOUS-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 1401; CONTIGUOUS-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 1402; CONTIGUOUS-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 1403; CONTIGUOUS-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 1404; CONTIGUOUS-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 1405; CONTIGUOUS-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 1406; CONTIGUOUS-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 1407; CONTIGUOUS-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 1408; CONTIGUOUS-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill 1409; CONTIGUOUS-NEXT: addvl sp, sp, #-4 1410; CONTIGUOUS-NEXT: mov p8.b, p0.b 1411; CONTIGUOUS-NEXT: ldnt1w { z0.s - z3.s }, pn8/z, [x0] 1412; CONTIGUOUS-NEXT: str z0, [sp] 1413; CONTIGUOUS-NEXT: str z1, [sp, #1, mul vl] 1414; CONTIGUOUS-NEXT: str z2, [sp, #2, mul vl] 1415; CONTIGUOUS-NEXT: str z3, [sp, #3, mul vl] 1416; CONTIGUOUS-NEXT: //APP 1417; CONTIGUOUS-NEXT: nop 1418; CONTIGUOUS-NEXT: //NO_APP 1419; CONTIGUOUS-NEXT: ldr z0, [sp] 1420; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] 1421; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] 1422; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] 1423; CONTIGUOUS-NEXT: addvl sp, sp, #4 1424; CONTIGUOUS-NEXT: ptrue pn8.b 1425; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 1426; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 1427; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 1428; CONTIGUOUS-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 1429; CONTIGUOUS-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload 1430; CONTIGUOUS-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 1431; CONTIGUOUS-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 1432; CONTIGUOUS-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 1433; CONTIGUOUS-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 1434; CONTIGUOUS-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 1435; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 1436; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 1437; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload 1438; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 1439; CONTIGUOUS-NEXT: addvl sp, sp, #15 1440; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1441; CONTIGUOUS-NEXT: ret 1442 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") %pn, ptr %ptr) 1443 call void asm sideeffect "nop", "~{z1},~{z2},~{z3},~{z5},~{z6},~{z7},~{z9},~{z10},~{z11},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"() nounwind 1444 %res.v0 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res, 0 1445 %v0 = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> %res.v0, i64 0) 1446 %res.v1 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res, 1 1447 %v1 = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %v0, <vscale x 4 x i32> %res.v1, i64 4) 1448 %res.v2 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res, 2 1449 %v2 = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %v1, <vscale x 4 x i32> %res.v2, i64 8) 1450 %res.v3 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res, 3 1451 %v3 = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %v2, <vscale x 4 x i32> %res.v3, i64 12) 1452 ret <vscale x 16 x i32> %v3 1453} 1454 1455define <vscale x 16 x i32> @ldnt1_x4_i32_z0_z4_z8_z12_scalar(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %z1, target("aarch64.svcount") %pn, ptr %ptr, i64 %index) nounwind { 1456; STRIDED-LABEL: ldnt1_x4_i32_z0_z4_z8_z12_scalar: 1457; STRIDED: // %bb.0: 1458; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1459; STRIDED-NEXT: addvl sp, sp, #-17 1460; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 1461; STRIDED-NEXT: mov p8.b, p0.b 1462; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 1463; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 1464; STRIDED-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 1465; STRIDED-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 1466; STRIDED-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 1467; STRIDED-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 1468; STRIDED-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 1469; STRIDED-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 1470; STRIDED-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 1471; STRIDED-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 1472; STRIDED-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 1473; STRIDED-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 1474; STRIDED-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 1475; STRIDED-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 1476; STRIDED-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 1477; STRIDED-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill 1478; STRIDED-NEXT: ldnt1w { z0.s, z4.s, z8.s, z12.s }, pn8/z, [x0, x1, lsl #2] 1479; STRIDED-NEXT: //APP 1480; STRIDED-NEXT: nop 1481; STRIDED-NEXT: //NO_APP 1482; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 1483; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 1484; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 1485; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 1486; STRIDED-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 1487; STRIDED-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 1488; STRIDED-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 1489; STRIDED-NEXT: mov z2.d, z8.d 1490; STRIDED-NEXT: mov z3.d, z12.d 1491; STRIDED-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 1492; STRIDED-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 1493; STRIDED-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 1494; STRIDED-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 1495; STRIDED-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 1496; STRIDED-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 1497; STRIDED-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 1498; STRIDED-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 1499; STRIDED-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 1500; STRIDED-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload 1501; STRIDED-NEXT: mov z1.d, z4.d 1502; STRIDED-NEXT: addvl sp, sp, #17 1503; STRIDED-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1504; STRIDED-NEXT: ret 1505; 1506; CONTIGUOUS-LABEL: ldnt1_x4_i32_z0_z4_z8_z12_scalar: 1507; CONTIGUOUS: // %bb.0: 1508; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1509; CONTIGUOUS-NEXT: addvl sp, sp, #-15 1510; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 1511; CONTIGUOUS-NEXT: ptrue pn8.b 1512; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 1513; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill 1514; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 1515; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 1516; CONTIGUOUS-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 1517; CONTIGUOUS-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 1518; CONTIGUOUS-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 1519; CONTIGUOUS-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 1520; CONTIGUOUS-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 1521; CONTIGUOUS-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 1522; CONTIGUOUS-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 1523; CONTIGUOUS-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 1524; CONTIGUOUS-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill 1525; CONTIGUOUS-NEXT: addvl sp, sp, #-4 1526; CONTIGUOUS-NEXT: mov p8.b, p0.b 1527; CONTIGUOUS-NEXT: ldnt1w { z0.s - z3.s }, pn8/z, [x0, x1, lsl #2] 1528; CONTIGUOUS-NEXT: str z0, [sp] 1529; CONTIGUOUS-NEXT: str z1, [sp, #1, mul vl] 1530; CONTIGUOUS-NEXT: str z2, [sp, #2, mul vl] 1531; CONTIGUOUS-NEXT: str z3, [sp, #3, mul vl] 1532; CONTIGUOUS-NEXT: //APP 1533; CONTIGUOUS-NEXT: nop 1534; CONTIGUOUS-NEXT: //NO_APP 1535; CONTIGUOUS-NEXT: ldr z0, [sp] 1536; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] 1537; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] 1538; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] 1539; CONTIGUOUS-NEXT: addvl sp, sp, #4 1540; CONTIGUOUS-NEXT: ptrue pn8.b 1541; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 1542; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 1543; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 1544; CONTIGUOUS-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 1545; CONTIGUOUS-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload 1546; CONTIGUOUS-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 1547; CONTIGUOUS-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 1548; CONTIGUOUS-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 1549; CONTIGUOUS-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 1550; CONTIGUOUS-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 1551; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 1552; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 1553; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload 1554; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 1555; CONTIGUOUS-NEXT: addvl sp, sp, #15 1556; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1557; CONTIGUOUS-NEXT: ret 1558 %base = getelementptr i32, ptr %ptr, i64 %index 1559 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") %pn, ptr %base) 1560 call void asm sideeffect "nop", "~{z1},~{z2},~{z3},~{z5},~{z6},~{z7},~{z9},~{z10},~{z11},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"() nounwind 1561 %res.v0 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res, 0 1562 %v0 = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> %res.v0, i64 0) 1563 %res.v1 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res, 1 1564 %v1 = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %v0, <vscale x 4 x i32> %res.v1, i64 4) 1565 %res.v2 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res, 2 1566 %v2 = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %v1, <vscale x 4 x i32> %res.v2, i64 8) 1567 %res.v3 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res, 3 1568 %v3 = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %v2, <vscale x 4 x i32> %res.v3, i64 12) 1569 ret <vscale x 16 x i32> %v3 1570} 1571 1572define <vscale x 8 x i64> @ldnt1_x4_i64_z0_z4_z8_z12(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %z1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 1573; STRIDED-LABEL: ldnt1_x4_i64_z0_z4_z8_z12: 1574; STRIDED: // %bb.0: 1575; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1576; STRIDED-NEXT: addvl sp, sp, #-17 1577; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 1578; STRIDED-NEXT: mov p8.b, p0.b 1579; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 1580; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 1581; STRIDED-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 1582; STRIDED-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 1583; STRIDED-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 1584; STRIDED-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 1585; STRIDED-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 1586; STRIDED-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 1587; STRIDED-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 1588; STRIDED-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 1589; STRIDED-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 1590; STRIDED-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 1591; STRIDED-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 1592; STRIDED-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 1593; STRIDED-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 1594; STRIDED-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill 1595; STRIDED-NEXT: ldnt1d { z0.d, z4.d, z8.d, z12.d }, pn8/z, [x0] 1596; STRIDED-NEXT: //APP 1597; STRIDED-NEXT: nop 1598; STRIDED-NEXT: //NO_APP 1599; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 1600; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 1601; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 1602; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 1603; STRIDED-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 1604; STRIDED-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 1605; STRIDED-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 1606; STRIDED-NEXT: mov z2.d, z8.d 1607; STRIDED-NEXT: mov z3.d, z12.d 1608; STRIDED-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 1609; STRIDED-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 1610; STRIDED-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 1611; STRIDED-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 1612; STRIDED-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 1613; STRIDED-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 1614; STRIDED-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 1615; STRIDED-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 1616; STRIDED-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 1617; STRIDED-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload 1618; STRIDED-NEXT: mov z1.d, z4.d 1619; STRIDED-NEXT: addvl sp, sp, #17 1620; STRIDED-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1621; STRIDED-NEXT: ret 1622; 1623; CONTIGUOUS-LABEL: ldnt1_x4_i64_z0_z4_z8_z12: 1624; CONTIGUOUS: // %bb.0: 1625; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1626; CONTIGUOUS-NEXT: addvl sp, sp, #-15 1627; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 1628; CONTIGUOUS-NEXT: ptrue pn8.b 1629; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 1630; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill 1631; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 1632; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 1633; CONTIGUOUS-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 1634; CONTIGUOUS-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 1635; CONTIGUOUS-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 1636; CONTIGUOUS-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 1637; CONTIGUOUS-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 1638; CONTIGUOUS-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 1639; CONTIGUOUS-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 1640; CONTIGUOUS-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 1641; CONTIGUOUS-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill 1642; CONTIGUOUS-NEXT: addvl sp, sp, #-4 1643; CONTIGUOUS-NEXT: mov p8.b, p0.b 1644; CONTIGUOUS-NEXT: ldnt1d { z0.d - z3.d }, pn8/z, [x0] 1645; CONTIGUOUS-NEXT: str z0, [sp] 1646; CONTIGUOUS-NEXT: str z1, [sp, #1, mul vl] 1647; CONTIGUOUS-NEXT: str z2, [sp, #2, mul vl] 1648; CONTIGUOUS-NEXT: str z3, [sp, #3, mul vl] 1649; CONTIGUOUS-NEXT: //APP 1650; CONTIGUOUS-NEXT: nop 1651; CONTIGUOUS-NEXT: //NO_APP 1652; CONTIGUOUS-NEXT: ldr z0, [sp] 1653; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] 1654; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] 1655; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] 1656; CONTIGUOUS-NEXT: addvl sp, sp, #4 1657; CONTIGUOUS-NEXT: ptrue pn8.b 1658; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 1659; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 1660; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 1661; CONTIGUOUS-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 1662; CONTIGUOUS-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload 1663; CONTIGUOUS-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 1664; CONTIGUOUS-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 1665; CONTIGUOUS-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 1666; CONTIGUOUS-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 1667; CONTIGUOUS-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 1668; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 1669; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 1670; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload 1671; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 1672; CONTIGUOUS-NEXT: addvl sp, sp, #15 1673; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1674; CONTIGUOUS-NEXT: ret 1675 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") %pn, ptr %ptr) 1676 call void asm sideeffect "nop", "~{z1},~{z2},~{z3},~{z5},~{z6},~{z7},~{z9},~{z10},~{z11},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"() nounwind 1677 %res.v0 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res, 0 1678 %v0 = call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> %res.v0, i64 0) 1679 %res.v1 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res, 1 1680 %v1 = call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> %v0, <vscale x 2 x i64> %res.v1, i64 2) 1681 %res.v2 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res, 2 1682 %v2 = call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> %v1, <vscale x 2 x i64> %res.v2, i64 4) 1683 %res.v3 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res, 3 1684 %v3 = call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> %v2, <vscale x 2 x i64> %res.v3, i64 6) 1685 ret <vscale x 8 x i64> %v3 1686} 1687 1688define <vscale x 8 x i64> @ldnt1_x4_i64_z0_z4_z8_z12_scalar(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %z1, target("aarch64.svcount") %pn, ptr %ptr, i64 %index) nounwind { 1689; STRIDED-LABEL: ldnt1_x4_i64_z0_z4_z8_z12_scalar: 1690; STRIDED: // %bb.0: 1691; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1692; STRIDED-NEXT: addvl sp, sp, #-17 1693; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 1694; STRIDED-NEXT: mov p8.b, p0.b 1695; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 1696; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 1697; STRIDED-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 1698; STRIDED-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 1699; STRIDED-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 1700; STRIDED-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 1701; STRIDED-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 1702; STRIDED-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 1703; STRIDED-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 1704; STRIDED-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 1705; STRIDED-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 1706; STRIDED-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill 1707; STRIDED-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill 1708; STRIDED-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill 1709; STRIDED-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill 1710; STRIDED-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill 1711; STRIDED-NEXT: ldnt1d { z0.d, z4.d, z8.d, z12.d }, pn8/z, [x0, x1, lsl #3] 1712; STRIDED-NEXT: //APP 1713; STRIDED-NEXT: nop 1714; STRIDED-NEXT: //NO_APP 1715; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 1716; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 1717; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 1718; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 1719; STRIDED-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 1720; STRIDED-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 1721; STRIDED-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 1722; STRIDED-NEXT: mov z2.d, z8.d 1723; STRIDED-NEXT: mov z3.d, z12.d 1724; STRIDED-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 1725; STRIDED-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 1726; STRIDED-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 1727; STRIDED-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 1728; STRIDED-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 1729; STRIDED-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload 1730; STRIDED-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload 1731; STRIDED-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload 1732; STRIDED-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload 1733; STRIDED-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload 1734; STRIDED-NEXT: mov z1.d, z4.d 1735; STRIDED-NEXT: addvl sp, sp, #17 1736; STRIDED-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1737; STRIDED-NEXT: ret 1738; 1739; CONTIGUOUS-LABEL: ldnt1_x4_i64_z0_z4_z8_z12_scalar: 1740; CONTIGUOUS: // %bb.0: 1741; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1742; CONTIGUOUS-NEXT: addvl sp, sp, #-15 1743; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 1744; CONTIGUOUS-NEXT: ptrue pn8.b 1745; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill 1746; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill 1747; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill 1748; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill 1749; CONTIGUOUS-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill 1750; CONTIGUOUS-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill 1751; CONTIGUOUS-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill 1752; CONTIGUOUS-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill 1753; CONTIGUOUS-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill 1754; CONTIGUOUS-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill 1755; CONTIGUOUS-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill 1756; CONTIGUOUS-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill 1757; CONTIGUOUS-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill 1758; CONTIGUOUS-NEXT: addvl sp, sp, #-4 1759; CONTIGUOUS-NEXT: mov p8.b, p0.b 1760; CONTIGUOUS-NEXT: ldnt1d { z0.d - z3.d }, pn8/z, [x0, x1, lsl #3] 1761; CONTIGUOUS-NEXT: str z0, [sp] 1762; CONTIGUOUS-NEXT: str z1, [sp, #1, mul vl] 1763; CONTIGUOUS-NEXT: str z2, [sp, #2, mul vl] 1764; CONTIGUOUS-NEXT: str z3, [sp, #3, mul vl] 1765; CONTIGUOUS-NEXT: //APP 1766; CONTIGUOUS-NEXT: nop 1767; CONTIGUOUS-NEXT: //NO_APP 1768; CONTIGUOUS-NEXT: ldr z0, [sp] 1769; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] 1770; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] 1771; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] 1772; CONTIGUOUS-NEXT: addvl sp, sp, #4 1773; CONTIGUOUS-NEXT: ptrue pn8.b 1774; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload 1775; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload 1776; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload 1777; CONTIGUOUS-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload 1778; CONTIGUOUS-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload 1779; CONTIGUOUS-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload 1780; CONTIGUOUS-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload 1781; CONTIGUOUS-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload 1782; CONTIGUOUS-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload 1783; CONTIGUOUS-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload 1784; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload 1785; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload 1786; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload 1787; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 1788; CONTIGUOUS-NEXT: addvl sp, sp, #15 1789; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1790; CONTIGUOUS-NEXT: ret 1791 %base = getelementptr i64, ptr %ptr, i64 %index 1792 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") %pn, ptr %base) 1793 call void asm sideeffect "nop", "~{z1},~{z2},~{z3},~{z5},~{z6},~{z7},~{z9},~{z10},~{z11},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"() nounwind 1794 %res.v0 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res, 0 1795 %v0 = call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> %res.v0, i64 0) 1796 %res.v1 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res, 1 1797 %v1 = call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> %v0, <vscale x 2 x i64> %res.v1, i64 2) 1798 %res.v2 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res, 2 1799 %v2 = call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> %v1, <vscale x 2 x i64> %res.v2, i64 4) 1800 %res.v3 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res, 3 1801 %v3 = call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> %v2, <vscale x 2 x i64> %res.v3, i64 6) 1802 ret <vscale x 8 x i64> %v3 1803} 1804 1805declare <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8>, <vscale x 16 x i8>, i64) 1806declare <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16>, <vscale x 8 x i16>, i64) 1807declare <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32>, <vscale x 4 x i32>, i64) 1808declare <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64>, <vscale x 2 x i64>, i64) 1809declare <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8>, <vscale x 16 x i8>, i64) 1810declare <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16>, <vscale x 8 x i16>, i64) 1811declare <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32>, <vscale x 4 x i32>, i64) 1812declare <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64>, <vscale x 2 x i64>, i64) 1813declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount"), ptr) 1814declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount"), ptr) 1815declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount"), ptr) 1816declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount"), ptr) 1817declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount"), ptr) 1818declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount"), ptr) 1819declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount"), ptr) 1820declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount"), ptr) 1821