1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | not grep ptrue 3; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256 4; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256 5; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 6; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 7; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 8; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 9; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024 10; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024 11; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024 12; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024 13; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024 14; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024 15; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024 16; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024 17; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_2048 18 19target triple = "aarch64-unknown-linux-gnu" 20 21; Don't use SVE for 64-bit vectors. 22define void @load_v2f32(ptr %a, ptr %b) #0 { 23; CHECK-LABEL: load_v2f32: 24; CHECK: // %bb.0: 25; CHECK-NEXT: ldr d0, [x0] 26; CHECK-NEXT: str d0, [x1] 27; CHECK-NEXT: ret 28 %load = load <2 x float>, ptr %a 29 store <2 x float> %load, ptr %b 30 ret void 31} 32 33; Don't use SVE for 128-bit vectors. 34define void @load_v4f32(ptr %a, ptr %b) #0 { 35; CHECK-LABEL: load_v4f32: 36; CHECK: // %bb.0: 37; CHECK-NEXT: ldr q0, [x0] 38; CHECK-NEXT: str q0, [x1] 39; CHECK-NEXT: ret 40 %load = load <4 x float>, ptr %a 41 store <4 x float> %load, ptr %b 42 ret void 43} 44 45define void @load_v8f32(ptr %a, ptr %b) #0 { 46; CHECK-LABEL: load_v8f32: 47; CHECK: // %bb.0: 48; CHECK-NEXT: ptrue p0.s, vl8 49; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 50; CHECK-NEXT: st1w { z0.s }, p0, [x1] 51; CHECK-NEXT: ret 52 %load = load <8 x float>, ptr %a 53 store <8 x float> %load, ptr %b 54 ret void 55} 56 57define void @load_v16f32(ptr %a, ptr %b) #0 { 58; VBITS_GE_256-LABEL: load_v16f32: 59; VBITS_GE_256: // %bb.0: 60; VBITS_GE_256-NEXT: ptrue p0.s, vl8 61; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 62; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 63; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] 64; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] 65; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1] 66; VBITS_GE_256-NEXT: ret 67; 68; VBITS_GE_512-LABEL: load_v16f32: 69; VBITS_GE_512: // %bb.0: 70; VBITS_GE_512-NEXT: ptrue p0.s, vl16 71; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 72; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1] 73; VBITS_GE_512-NEXT: ret 74; 75; VBITS_GE_1024-LABEL: load_v16f32: 76; VBITS_GE_1024: // %bb.0: 77; VBITS_GE_1024-NEXT: ptrue p0.s, vl16 78; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0] 79; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x1] 80; VBITS_GE_1024-NEXT: ret 81; 82; VBITS_GE_2048-LABEL: load_v16f32: 83; VBITS_GE_2048: // %bb.0: 84; VBITS_GE_2048-NEXT: ptrue p0.s, vl16 85; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] 86; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x1] 87; VBITS_GE_2048-NEXT: ret 88 %load = load <16 x float>, ptr %a 89 store <16 x float> %load, ptr %b 90 ret void 91} 92 93define void @load_v32f32(ptr %a, ptr %b) #0 { 94; VBITS_GE_256-LABEL: load_v32f32: 95; VBITS_GE_256: // %bb.0: 96; VBITS_GE_256-NEXT: ptrue p0.s, vl8 97; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 98; VBITS_GE_256-NEXT: mov x9, #24 // =0x18 99; VBITS_GE_256-NEXT: mov x10, #8 // =0x8 100; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 101; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x9, lsl #2] 102; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x10, lsl #2] 103; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0] 104; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] 105; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1, x9, lsl #2] 106; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x1, x10, lsl #2] 107; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x1] 108; VBITS_GE_256-NEXT: ret 109; 110; VBITS_GE_512-LABEL: load_v32f32: 111; VBITS_GE_512: // %bb.0: 112; VBITS_GE_512-NEXT: ptrue p0.s, vl16 113; VBITS_GE_512-NEXT: mov x8, #16 // =0x10 114; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 115; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0] 116; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] 117; VBITS_GE_512-NEXT: st1w { z1.s }, p0, [x1] 118; VBITS_GE_512-NEXT: ret 119; 120; VBITS_GE_1024-LABEL: load_v32f32: 121; VBITS_GE_1024: // %bb.0: 122; VBITS_GE_1024-NEXT: ptrue p0.s, vl32 123; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0] 124; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x1] 125; VBITS_GE_1024-NEXT: ret 126; 127; VBITS_GE_2048-LABEL: load_v32f32: 128; VBITS_GE_2048: // %bb.0: 129; VBITS_GE_2048-NEXT: ptrue p0.s, vl32 130; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] 131; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x1] 132; VBITS_GE_2048-NEXT: ret 133 %load = load <32 x float>, ptr %a 134 store <32 x float> %load, ptr %b 135 ret void 136} 137 138define void @load_v64f32(ptr %a, ptr %b) #0 { 139; VBITS_GE_256-LABEL: load_v64f32: 140; VBITS_GE_256: // %bb.0: 141; VBITS_GE_256-NEXT: ptrue p0.s, vl8 142; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 143; VBITS_GE_256-NEXT: mov x9, #24 // =0x18 144; VBITS_GE_256-NEXT: mov x10, #16 // =0x10 145; VBITS_GE_256-NEXT: mov x11, #48 // =0x30 146; VBITS_GE_256-NEXT: mov x12, #40 // =0x28 147; VBITS_GE_256-NEXT: mov x13, #56 // =0x38 148; VBITS_GE_256-NEXT: mov x14, #32 // =0x20 149; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x11, lsl #2] 150; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x8, lsl #2] 151; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x13, lsl #2] 152; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0, x9, lsl #2] 153; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x0, x10, lsl #2] 154; VBITS_GE_256-NEXT: ld1w { z5.s }, p0/z, [x0, x14, lsl #2] 155; VBITS_GE_256-NEXT: ld1w { z6.s }, p0/z, [x0, x12, lsl #2] 156; VBITS_GE_256-NEXT: ld1w { z7.s }, p0/z, [x0] 157; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x11, lsl #2] 158; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x1, x13, lsl #2] 159; VBITS_GE_256-NEXT: st1w { z5.s }, p0, [x1, x14, lsl #2] 160; VBITS_GE_256-NEXT: st1w { z6.s }, p0, [x1, x12, lsl #2] 161; VBITS_GE_256-NEXT: st1w { z4.s }, p0, [x1, x10, lsl #2] 162; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x1, x9, lsl #2] 163; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1, x8, lsl #2] 164; VBITS_GE_256-NEXT: st1w { z7.s }, p0, [x1] 165; VBITS_GE_256-NEXT: ret 166; 167; VBITS_GE_512-LABEL: load_v64f32: 168; VBITS_GE_512: // %bb.0: 169; VBITS_GE_512-NEXT: ptrue p0.s, vl16 170; VBITS_GE_512-NEXT: mov x8, #32 // =0x20 171; VBITS_GE_512-NEXT: mov x9, #48 // =0x30 172; VBITS_GE_512-NEXT: mov x10, #16 // =0x10 173; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 174; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0, x9, lsl #2] 175; VBITS_GE_512-NEXT: ld1w { z2.s }, p0/z, [x0, x10, lsl #2] 176; VBITS_GE_512-NEXT: ld1w { z3.s }, p0/z, [x0] 177; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] 178; VBITS_GE_512-NEXT: st1w { z1.s }, p0, [x1, x9, lsl #2] 179; VBITS_GE_512-NEXT: st1w { z2.s }, p0, [x1, x10, lsl #2] 180; VBITS_GE_512-NEXT: st1w { z3.s }, p0, [x1] 181; VBITS_GE_512-NEXT: ret 182; 183; VBITS_GE_1024-LABEL: load_v64f32: 184; VBITS_GE_1024: // %bb.0: 185; VBITS_GE_1024-NEXT: ptrue p0.s, vl32 186; VBITS_GE_1024-NEXT: mov x8, #32 // =0x20 187; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 188; VBITS_GE_1024-NEXT: ld1w { z1.s }, p0/z, [x0] 189; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] 190; VBITS_GE_1024-NEXT: st1w { z1.s }, p0, [x1] 191; VBITS_GE_1024-NEXT: ret 192; 193; VBITS_GE_2048-LABEL: load_v64f32: 194; VBITS_GE_2048: // %bb.0: 195; VBITS_GE_2048-NEXT: ptrue p0.s, vl64 196; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] 197; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x1] 198; VBITS_GE_2048-NEXT: ret 199 %load = load <64 x float>, ptr %a 200 store <64 x float> %load, ptr %b 201 ret void 202} 203 204attributes #0 = { "target-features"="+sve" } 205