1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-enable-stp-suppress=false -verify-machineinstrs -mcpu=cyclone -aarch64-enable-sink-fold=true | FileCheck %s 3 4define void @stp_int(i32 %a, i32 %b, ptr nocapture %p) nounwind { 5; CHECK-LABEL: stp_int: 6; CHECK: // %bb.0: 7; CHECK-NEXT: stp w0, w1, [x2] 8; CHECK-NEXT: ret 9 store i32 %a, ptr %p, align 4 10 %add.ptr = getelementptr inbounds i32, ptr %p, i64 1 11 store i32 %b, ptr %add.ptr, align 4 12 ret void 13} 14 15define void @stp_long(i64 %a, i64 %b, ptr nocapture %p) nounwind { 16; CHECK-LABEL: stp_long: 17; CHECK: // %bb.0: 18; CHECK-NEXT: stp x0, x1, [x2] 19; CHECK-NEXT: ret 20 store i64 %a, ptr %p, align 8 21 %add.ptr = getelementptr inbounds i64, ptr %p, i64 1 22 store i64 %b, ptr %add.ptr, align 8 23 ret void 24} 25 26define void @stp_float(float %a, float %b, ptr nocapture %p) nounwind { 27; CHECK-LABEL: stp_float: 28; CHECK: // %bb.0: 29; CHECK-NEXT: stp s0, s1, [x0] 30; CHECK-NEXT: ret 31 store float %a, ptr %p, align 4 32 %add.ptr = getelementptr inbounds float, ptr %p, i64 1 33 store float %b, ptr %add.ptr, align 4 34 ret void 35} 36 37define void @stp_double(double %a, double %b, ptr nocapture %p) nounwind { 38; CHECK-LABEL: stp_double: 39; CHECK: // %bb.0: 40; CHECK-NEXT: stp d0, d1, [x0] 41; CHECK-NEXT: ret 42 store double %a, ptr %p, align 8 43 %add.ptr = getelementptr inbounds double, ptr %p, i64 1 44 store double %b, ptr %add.ptr, align 8 45 ret void 46} 47 48define void @stp_doublex2(<2 x double> %a, <2 x double> %b, ptr nocapture %p) nounwind { 49; CHECK-LABEL: stp_doublex2: 50; CHECK: // %bb.0: 51; CHECK-NEXT: stp q0, q1, [x0] 52; CHECK-NEXT: ret 53 store <2 x double> %a, ptr %p, align 16 54 %add.ptr = getelementptr inbounds <2 x double>, ptr %p, i64 1 55 store <2 x double> %b, ptr %add.ptr, align 16 56 ret void 57} 58 59; Test the load/store optimizer---combine ldurs into a ldp, if appropriate 60define void @stur_int(i32 %a, i32 %b, ptr nocapture %p) nounwind { 61; CHECK-LABEL: stur_int: 62; CHECK: // %bb.0: 63; CHECK-NEXT: stp w1, w0, [x2, #-8] 64; CHECK-NEXT: ret 65 %p1 = getelementptr inbounds i32, ptr %p, i32 -1 66 store i32 %a, ptr %p1, align 2 67 %p2 = getelementptr inbounds i32, ptr %p, i32 -2 68 store i32 %b, ptr %p2, align 2 69 ret void 70} 71 72define void @stur_long(i64 %a, i64 %b, ptr nocapture %p) nounwind { 73; CHECK-LABEL: stur_long: 74; CHECK: // %bb.0: 75; CHECK-NEXT: stp x1, x0, [x2, #-16] 76; CHECK-NEXT: ret 77 %p1 = getelementptr inbounds i64, ptr %p, i32 -1 78 store i64 %a, ptr %p1, align 2 79 %p2 = getelementptr inbounds i64, ptr %p, i32 -2 80 store i64 %b, ptr %p2, align 2 81 ret void 82} 83 84define void @stur_float(float %a, float %b, ptr nocapture %p) nounwind { 85; CHECK-LABEL: stur_float: 86; CHECK: // %bb.0: 87; CHECK-NEXT: stp s1, s0, [x0, #-8] 88; CHECK-NEXT: ret 89 %p1 = getelementptr inbounds float, ptr %p, i32 -1 90 store float %a, ptr %p1, align 2 91 %p2 = getelementptr inbounds float, ptr %p, i32 -2 92 store float %b, ptr %p2, align 2 93 ret void 94} 95 96define void @stur_double(double %a, double %b, ptr nocapture %p) nounwind { 97; CHECK-LABEL: stur_double: 98; CHECK: // %bb.0: 99; CHECK-NEXT: stp d1, d0, [x0, #-16] 100; CHECK-NEXT: ret 101 %p1 = getelementptr inbounds double, ptr %p, i32 -1 102 store double %a, ptr %p1, align 2 103 %p2 = getelementptr inbounds double, ptr %p, i32 -2 104 store double %b, ptr %p2, align 2 105 ret void 106} 107 108define void @stur_doublex2(<2 x double> %a, <2 x double> %b, ptr nocapture %p) nounwind { 109; CHECK-LABEL: stur_doublex2: 110; CHECK: // %bb.0: 111; CHECK-NEXT: stp q1, q0, [x0, #-32] 112; CHECK-NEXT: ret 113 %p1 = getelementptr inbounds <2 x double>, ptr %p, i32 -1 114 store <2 x double> %a, ptr %p1, align 2 115 %p2 = getelementptr inbounds <2 x double>, ptr %p, i32 -2 116 store <2 x double> %b, ptr %p2, align 2 117 ret void 118} 119 120define void @splat_v4i32(i32 %v, ptr %p) { 121; CHECK-LABEL: splat_v4i32: 122; CHECK: // %bb.0: // %entry 123; CHECK-NEXT: dup v0.4s, w0 124; CHECK-NEXT: str q0, [x1] 125; CHECK-NEXT: ret 126entry: 127 %p17 = insertelement <4 x i32> undef, i32 %v, i32 0 128 %p18 = insertelement <4 x i32> %p17, i32 %v, i32 1 129 %p19 = insertelement <4 x i32> %p18, i32 %v, i32 2 130 %p20 = insertelement <4 x i32> %p19, i32 %v, i32 3 131 store <4 x i32> %p20, ptr %p, align 4 132 ret void 133} 134 135; Check that a non-splat store that is storing a vector created by 4 136; insertelements that is not a splat vector does not get split. 137define void @nosplat_v4i32(i32 %v, ptr %p) { 138; CHECK-LABEL: nosplat_v4i32: 139; CHECK: // %bb.0: // %entry 140; CHECK-NEXT: sub sp, sp, #16 141; CHECK-NEXT: .cfi_def_cfa_offset 16 142; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 143; CHECK-NEXT: mov x8, sp 144; CHECK-NEXT: bfi x8, x0, #2, #2 145; CHECK-NEXT: str w0, [x8] 146; CHECK-NEXT: ldr q0, [sp] 147; CHECK-NEXT: mov v0.s[1], w0 148; CHECK-NEXT: mov v0.s[2], w0 149; CHECK-NEXT: mov v0.s[3], w0 150; CHECK-NEXT: str q0, [x1] 151; CHECK-NEXT: add sp, sp, #16 152; CHECK-NEXT: ret 153entry: 154 %p17 = insertelement <4 x i32> undef, i32 %v, i32 %v 155 %p18 = insertelement <4 x i32> %p17, i32 %v, i32 1 156 %p19 = insertelement <4 x i32> %p18, i32 %v, i32 2 157 %p20 = insertelement <4 x i32> %p19, i32 %v, i32 3 158 store <4 x i32> %p20, ptr %p, align 4 159 ret void 160} 161 162; Check that a non-splat store that is storing a vector created by 4 163; insertelements that is not a splat vector does not get split. 164define void @nosplat2_v4i32(i32 %v, ptr %p, <4 x i32> %vin) { 165; CHECK-LABEL: nosplat2_v4i32: 166; CHECK: // %bb.0: // %entry 167; CHECK-NEXT: mov v0.s[1], w0 168; CHECK-NEXT: mov v0.s[2], w0 169; CHECK-NEXT: mov v0.s[3], w0 170; CHECK-NEXT: str q0, [x1] 171; CHECK-NEXT: ret 172entry: 173 %p18 = insertelement <4 x i32> %vin, i32 %v, i32 1 174 %p19 = insertelement <4 x i32> %p18, i32 %v, i32 2 175 %p20 = insertelement <4 x i32> %p19, i32 %v, i32 3 176 store <4 x i32> %p20, ptr %p, align 4 177 ret void 178} 179 180; Read of %b to compute %tmp2 shouldn't prevent formation of stp 181define i32 @stp_int_rar_hazard(i32 %a, i32 %b, ptr nocapture %p) nounwind { 182; CHECK-LABEL: stp_int_rar_hazard: 183; CHECK: // %bb.0: 184; CHECK-NEXT: ldr w8, [x2, #8] 185; CHECK-NEXT: stp w0, w1, [x2] 186; CHECK-NEXT: add w0, w8, w1 187; CHECK-NEXT: ret 188 store i32 %a, ptr %p, align 4 189 %ld.ptr = getelementptr inbounds i32, ptr %p, i64 2 190 %tmp = load i32, ptr %ld.ptr, align 4 191 %tmp2 = add i32 %tmp, %b 192 %add.ptr = getelementptr inbounds i32, ptr %p, i64 1 193 store i32 %b, ptr %add.ptr, align 4 194 ret i32 %tmp2 195} 196 197; Read of %b to compute %tmp2 shouldn't prevent formation of stp 198define i32 @stp_int_rar_hazard_after(i32 %w0, i32 %a, i32 %b, ptr nocapture %p) nounwind { 199; CHECK-LABEL: stp_int_rar_hazard_after: 200; CHECK: // %bb.0: 201; CHECK-NEXT: ldr w8, [x3, #4] 202; CHECK-NEXT: stp w1, w2, [x3] 203; CHECK-NEXT: add w0, w8, w2 204; CHECK-NEXT: ret 205 store i32 %a, ptr %p, align 4 206 %ld.ptr = getelementptr inbounds i32, ptr %p, i64 1 207 %tmp = load i32, ptr %ld.ptr, align 4 208 %tmp2 = add i32 %tmp, %b 209 %add.ptr = getelementptr inbounds i32, ptr %p, i64 1 210 store i32 %b, ptr %add.ptr, align 4 211 ret i32 %tmp2 212} 213