1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOARG 3; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ARG 4 5target triple = "aarch64-unknown-linux-gnu" 6 7define void @func_vscale_none(ptr %a, ptr %b) #0 { 8; CHECK-NOARG-LABEL: func_vscale_none: 9; CHECK-NOARG: // %bb.0: 10; CHECK-NOARG-NEXT: ldp q0, q3, [x1, #32] 11; CHECK-NOARG-NEXT: ldp q1, q2, [x0, #32] 12; CHECK-NOARG-NEXT: ldp q4, q6, [x1] 13; CHECK-NOARG-NEXT: add v0.4s, v1.4s, v0.4s 14; CHECK-NOARG-NEXT: ldp q1, q5, [x0] 15; CHECK-NOARG-NEXT: add v2.4s, v2.4s, v3.4s 16; CHECK-NOARG-NEXT: add v1.4s, v1.4s, v4.4s 17; CHECK-NOARG-NEXT: add v3.4s, v5.4s, v6.4s 18; CHECK-NOARG-NEXT: stp q0, q2, [x0, #32] 19; CHECK-NOARG-NEXT: stp q1, q3, [x0] 20; CHECK-NOARG-NEXT: ret 21; 22; CHECK-ARG-LABEL: func_vscale_none: 23; CHECK-ARG: // %bb.0: 24; CHECK-ARG-NEXT: ptrue p0.s, vl16 25; CHECK-ARG-NEXT: ld1w { z0.s }, p0/z, [x0] 26; CHECK-ARG-NEXT: ld1w { z1.s }, p0/z, [x1] 27; CHECK-ARG-NEXT: add z0.s, z0.s, z1.s 28; CHECK-ARG-NEXT: st1w { z0.s }, p0, [x0] 29; CHECK-ARG-NEXT: ret 30 %op1 = load <16 x i32>, ptr %a 31 %op2 = load <16 x i32>, ptr %b 32 %res = add <16 x i32> %op1, %op2 33 store <16 x i32> %res, ptr %a 34 ret void 35} 36 37attributes #0 = { "target-features"="+sve" } 38 39define void @func_vscale1_1(ptr %a, ptr %b) #1 { 40; CHECK-LABEL: func_vscale1_1: 41; CHECK: // %bb.0: 42; CHECK-NEXT: ldp q0, q3, [x1, #32] 43; CHECK-NEXT: ldp q1, q2, [x0, #32] 44; CHECK-NEXT: ldp q4, q6, [x1] 45; CHECK-NEXT: add v0.4s, v1.4s, v0.4s 46; CHECK-NEXT: ldp q1, q5, [x0] 47; CHECK-NEXT: add v2.4s, v2.4s, v3.4s 48; CHECK-NEXT: add v1.4s, v1.4s, v4.4s 49; CHECK-NEXT: add v3.4s, v5.4s, v6.4s 50; CHECK-NEXT: stp q0, q2, [x0, #32] 51; CHECK-NEXT: stp q1, q3, [x0] 52; CHECK-NEXT: ret 53 %op1 = load <16 x i32>, ptr %a 54 %op2 = load <16 x i32>, ptr %b 55 %res = add <16 x i32> %op1, %op2 56 store <16 x i32> %res, ptr %a 57 ret void 58} 59 60attributes #1 = { "target-features"="+sve" vscale_range(1,1) } 61 62define void @func_vscale2_2(ptr %a, ptr %b) #2 { 63; CHECK-LABEL: func_vscale2_2: 64; CHECK: // %bb.0: 65; CHECK-NEXT: ptrue p0.s 66; CHECK-NEXT: mov x8, #8 // =0x8 67; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 68; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2] 69; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0] 70; CHECK-NEXT: ld1w { z3.s }, p0/z, [x1] 71; CHECK-NEXT: add z0.s, z0.s, z1.s 72; CHECK-NEXT: add z1.s, z2.s, z3.s 73; CHECK-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] 74; CHECK-NEXT: st1w { z1.s }, p0, [x0] 75; CHECK-NEXT: ret 76 %op1 = load <16 x i32>, ptr %a 77 %op2 = load <16 x i32>, ptr %b 78 %res = add <16 x i32> %op1, %op2 79 store <16 x i32> %res, ptr %a 80 ret void 81} 82 83attributes #2 = { "target-features"="+sve" vscale_range(2,2) } 84 85define void @func_vscale2_4(ptr %a, ptr %b) #3 { 86; CHECK-LABEL: func_vscale2_4: 87; CHECK: // %bb.0: 88; CHECK-NEXT: ptrue p0.s, vl8 89; CHECK-NEXT: mov x8, #8 // =0x8 90; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 91; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2] 92; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0] 93; CHECK-NEXT: ld1w { z3.s }, p0/z, [x1] 94; CHECK-NEXT: add z0.s, z0.s, z1.s 95; CHECK-NEXT: add z1.s, z2.s, z3.s 96; CHECK-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] 97; CHECK-NEXT: st1w { z1.s }, p0, [x0] 98; CHECK-NEXT: ret 99 %op1 = load <16 x i32>, ptr %a 100 %op2 = load <16 x i32>, ptr %b 101 %res = add <16 x i32> %op1, %op2 102 store <16 x i32> %res, ptr %a 103 ret void 104} 105 106attributes #3 = { "target-features"="+sve" vscale_range(2,4) } 107 108define void @func_vscale4_4(ptr %a, ptr %b) #4 { 109; CHECK-LABEL: func_vscale4_4: 110; CHECK: // %bb.0: 111; CHECK-NEXT: ptrue p0.s 112; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 113; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 114; CHECK-NEXT: add z0.s, z0.s, z1.s 115; CHECK-NEXT: st1w { z0.s }, p0, [x0] 116; CHECK-NEXT: ret 117 %op1 = load <16 x i32>, ptr %a 118 %op2 = load <16 x i32>, ptr %b 119 %res = add <16 x i32> %op1, %op2 120 store <16 x i32> %res, ptr %a 121 ret void 122} 123 124attributes #4 = { "target-features"="+sve" vscale_range(4,4) } 125 126define void @func_vscale8_8(ptr %a, ptr %b) #5 { 127; CHECK-LABEL: func_vscale8_8: 128; CHECK: // %bb.0: 129; CHECK-NEXT: ptrue p0.s, vl16 130; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 131; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 132; CHECK-NEXT: add z0.s, z0.s, z1.s 133; CHECK-NEXT: st1w { z0.s }, p0, [x0] 134; CHECK-NEXT: ret 135 %op1 = load <16 x i32>, ptr %a 136 %op2 = load <16 x i32>, ptr %b 137 %res = add <16 x i32> %op1, %op2 138 store <16 x i32> %res, ptr %a 139 ret void 140} 141 142attributes #5 = { "target-features"="+sve" vscale_range(8,8) } 143