1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm64-eabi -verify-machineinstrs | FileCheck %s 3 4define i32 @ldp_int(ptr %p) nounwind { 5; CHECK-LABEL: ldp_int: 6; CHECK: // %bb.0: 7; CHECK-NEXT: ldp w8, w9, [x0] 8; CHECK-NEXT: add w0, w9, w8 9; CHECK-NEXT: ret 10 %tmp = load i32, ptr %p, align 4 11 %add.ptr = getelementptr inbounds i32, ptr %p, i64 1 12 %tmp1 = load i32, ptr %add.ptr, align 4 13 %add = add nsw i32 %tmp1, %tmp 14 ret i32 %add 15} 16 17define i64 @ldp_sext_int(ptr %p) nounwind { 18; CHECK-LABEL: ldp_sext_int: 19; CHECK: // %bb.0: 20; CHECK-NEXT: ldpsw x8, x9, [x0] 21; CHECK-NEXT: add x0, x9, x8 22; CHECK-NEXT: ret 23 %tmp = load i32, ptr %p, align 4 24 %add.ptr = getelementptr inbounds i32, ptr %p, i64 1 25 %tmp1 = load i32, ptr %add.ptr, align 4 26 %sexttmp = sext i32 %tmp to i64 27 %sexttmp1 = sext i32 %tmp1 to i64 28 %add = add nsw i64 %sexttmp1, %sexttmp 29 ret i64 %add 30} 31 32define i64 @ldp_half_sext_res0_int(ptr %p) nounwind { 33; CHECK-LABEL: ldp_half_sext_res0_int: 34; CHECK: // %bb.0: 35; CHECK-NEXT: ldp w8, w9, [x0] 36; CHECK-NEXT: // kill: def $w8 killed $w8 def $x8 37; CHECK-NEXT: sxtw x8, w8 38; CHECK-NEXT: add x0, x9, x8 39; CHECK-NEXT: ret 40 %tmp = load i32, ptr %p, align 4 41 %add.ptr = getelementptr inbounds i32, ptr %p, i64 1 42 %tmp1 = load i32, ptr %add.ptr, align 4 43 %sexttmp = sext i32 %tmp to i64 44 %sexttmp1 = zext i32 %tmp1 to i64 45 %add = add nsw i64 %sexttmp1, %sexttmp 46 ret i64 %add 47} 48 49define i64 @ldp_half_sext_res1_int(ptr %p) nounwind { 50; CHECK-LABEL: ldp_half_sext_res1_int: 51; CHECK: // %bb.0: 52; CHECK-NEXT: ldp w8, w9, [x0] 53; CHECK-NEXT: // kill: def $w9 killed $w9 def $x9 54; CHECK-NEXT: sxtw x9, w9 55; CHECK-NEXT: add x0, x9, x8 56; CHECK-NEXT: ret 57 %tmp = load i32, ptr %p, align 4 58 %add.ptr = getelementptr inbounds i32, ptr %p, i64 1 59 %tmp1 = load i32, ptr %add.ptr, align 4 60 %sexttmp = zext i32 %tmp to i64 61 %sexttmp1 = sext i32 %tmp1 to i64 62 %add = add nsw i64 %sexttmp1, %sexttmp 63 ret i64 %add 64} 65 66 67define i64 @ldp_long(ptr %p) nounwind { 68; CHECK-LABEL: ldp_long: 69; CHECK: // %bb.0: 70; CHECK-NEXT: ldp x8, x9, [x0] 71; CHECK-NEXT: add x0, x9, x8 72; CHECK-NEXT: ret 73 %tmp = load i64, ptr %p, align 8 74 %add.ptr = getelementptr inbounds i64, ptr %p, i64 1 75 %tmp1 = load i64, ptr %add.ptr, align 8 76 %add = add nsw i64 %tmp1, %tmp 77 ret i64 %add 78} 79 80define float @ldp_float(ptr %p) nounwind { 81; CHECK-LABEL: ldp_float: 82; CHECK: // %bb.0: 83; CHECK-NEXT: ldp s0, s1, [x0] 84; CHECK-NEXT: fadd s0, s0, s1 85; CHECK-NEXT: ret 86 %tmp = load float, ptr %p, align 4 87 %add.ptr = getelementptr inbounds float, ptr %p, i64 1 88 %tmp1 = load float, ptr %add.ptr, align 4 89 %add = fadd float %tmp, %tmp1 90 ret float %add 91} 92 93define double @ldp_double(ptr %p) nounwind { 94; CHECK-LABEL: ldp_double: 95; CHECK: // %bb.0: 96; CHECK-NEXT: ldp d0, d1, [x0] 97; CHECK-NEXT: fadd d0, d0, d1 98; CHECK-NEXT: ret 99 %tmp = load double, ptr %p, align 8 100 %add.ptr = getelementptr inbounds double, ptr %p, i64 1 101 %tmp1 = load double, ptr %add.ptr, align 8 102 %add = fadd double %tmp, %tmp1 103 ret double %add 104} 105 106define <2 x double> @ldp_doublex2(ptr %p) nounwind { 107; CHECK-LABEL: ldp_doublex2: 108; CHECK: // %bb.0: 109; CHECK-NEXT: ldp q0, q1, [x0] 110; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d 111; CHECK-NEXT: ret 112 %tmp = load <2 x double>, ptr %p, align 16 113 %add.ptr = getelementptr inbounds <2 x double>, ptr %p, i64 1 114 %tmp1 = load <2 x double>, ptr %add.ptr, align 16 115 %add = fadd <2 x double> %tmp, %tmp1 116 ret <2 x double> %add 117} 118 119; Test the load/store optimizer---combine ldurs into a ldp, if appropriate 120define i32 @ldur_int(ptr %a) nounwind { 121; CHECK-LABEL: ldur_int: 122; CHECK: // %bb.0: 123; CHECK-NEXT: ldp w9, w8, [x0, #-8] 124; CHECK-NEXT: add w0, w8, w9 125; CHECK-NEXT: ret 126 %p1 = getelementptr inbounds i32, ptr %a, i32 -1 127 %tmp1 = load i32, ptr %p1, align 2 128 %p2 = getelementptr inbounds i32, ptr %a, i32 -2 129 %tmp2 = load i32, ptr %p2, align 2 130 %tmp3 = add i32 %tmp1, %tmp2 131 ret i32 %tmp3 132} 133 134define i64 @ldur_sext_int(ptr %a) nounwind { 135; CHECK-LABEL: ldur_sext_int: 136; CHECK: // %bb.0: 137; CHECK-NEXT: ldpsw x9, x8, [x0, #-8] 138; CHECK-NEXT: add x0, x8, x9 139; CHECK-NEXT: ret 140 %p1 = getelementptr inbounds i32, ptr %a, i32 -1 141 %tmp1 = load i32, ptr %p1, align 2 142 %p2 = getelementptr inbounds i32, ptr %a, i32 -2 143 %tmp2 = load i32, ptr %p2, align 2 144 %sexttmp1 = sext i32 %tmp1 to i64 145 %sexttmp2 = sext i32 %tmp2 to i64 146 %tmp3 = add i64 %sexttmp1, %sexttmp2 147 ret i64 %tmp3 148} 149 150define i64 @ldur_half_sext_int_res0(ptr %a) nounwind { 151; CHECK-LABEL: ldur_half_sext_int_res0: 152; CHECK: // %bb.0: 153; CHECK-NEXT: ldp w9, w8, [x0, #-8] 154; CHECK-NEXT: // kill: def $w9 killed $w9 def $x9 155; CHECK-NEXT: sxtw x9, w9 156; CHECK-NEXT: add x0, x8, x9 157; CHECK-NEXT: ret 158 %p1 = getelementptr inbounds i32, ptr %a, i32 -1 159 %tmp1 = load i32, ptr %p1, align 2 160 %p2 = getelementptr inbounds i32, ptr %a, i32 -2 161 %tmp2 = load i32, ptr %p2, align 2 162 %sexttmp1 = zext i32 %tmp1 to i64 163 %sexttmp2 = sext i32 %tmp2 to i64 164 %tmp3 = add i64 %sexttmp1, %sexttmp2 165 ret i64 %tmp3 166} 167 168define i64 @ldur_half_sext_int_res1(ptr %a) nounwind { 169; CHECK-LABEL: ldur_half_sext_int_res1: 170; CHECK: // %bb.0: 171; CHECK-NEXT: ldp w9, w8, [x0, #-8] 172; CHECK-NEXT: // kill: def $w8 killed $w8 def $x8 173; CHECK-NEXT: sxtw x8, w8 174; CHECK-NEXT: add x0, x8, x9 175; CHECK-NEXT: ret 176 %p1 = getelementptr inbounds i32, ptr %a, i32 -1 177 %tmp1 = load i32, ptr %p1, align 2 178 %p2 = getelementptr inbounds i32, ptr %a, i32 -2 179 %tmp2 = load i32, ptr %p2, align 2 180 %sexttmp1 = sext i32 %tmp1 to i64 181 %sexttmp2 = zext i32 %tmp2 to i64 182 %tmp3 = add i64 %sexttmp1, %sexttmp2 183 ret i64 %tmp3 184} 185 186 187define i64 @ldur_long(ptr %a) nounwind ssp { 188; CHECK-LABEL: ldur_long: 189; CHECK: // %bb.0: 190; CHECK-NEXT: ldp x9, x8, [x0, #-16] 191; CHECK-NEXT: add x0, x8, x9 192; CHECK-NEXT: ret 193 %p1 = getelementptr inbounds i64, ptr %a, i64 -1 194 %tmp1 = load i64, ptr %p1, align 2 195 %p2 = getelementptr inbounds i64, ptr %a, i64 -2 196 %tmp2 = load i64, ptr %p2, align 2 197 %tmp3 = add i64 %tmp1, %tmp2 198 ret i64 %tmp3 199} 200 201define float @ldur_float(ptr %a) { 202; CHECK-LABEL: ldur_float: 203; CHECK: // %bb.0: 204; CHECK-NEXT: ldp s1, s0, [x0, #-8] 205; CHECK-NEXT: fadd s0, s0, s1 206; CHECK-NEXT: ret 207 %p1 = getelementptr inbounds float, ptr %a, i64 -1 208 %tmp1 = load float, ptr %p1, align 2 209 %p2 = getelementptr inbounds float, ptr %a, i64 -2 210 %tmp2 = load float, ptr %p2, align 2 211 %tmp3 = fadd float %tmp1, %tmp2 212 ret float %tmp3 213} 214 215define double @ldur_double(ptr %a) { 216; CHECK-LABEL: ldur_double: 217; CHECK: // %bb.0: 218; CHECK-NEXT: ldp d1, d0, [x0, #-16] 219; CHECK-NEXT: fadd d0, d0, d1 220; CHECK-NEXT: ret 221 %p1 = getelementptr inbounds double, ptr %a, i64 -1 222 %tmp1 = load double, ptr %p1, align 2 223 %p2 = getelementptr inbounds double, ptr %a, i64 -2 224 %tmp2 = load double, ptr %p2, align 2 225 %tmp3 = fadd double %tmp1, %tmp2 226 ret double %tmp3 227} 228 229define <2 x double> @ldur_doublex2(ptr %a) { 230; CHECK-LABEL: ldur_doublex2: 231; CHECK: // %bb.0: 232; CHECK-NEXT: ldp q1, q0, [x0, #-32] 233; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d 234; CHECK-NEXT: ret 235 %p1 = getelementptr inbounds <2 x double>, ptr %a, i64 -1 236 %tmp1 = load <2 x double>, ptr %p1, align 2 237 %p2 = getelementptr inbounds <2 x double>, ptr %a, i64 -2 238 %tmp2 = load <2 x double>, ptr %p2, align 2 239 %tmp3 = fadd <2 x double> %tmp1, %tmp2 240 ret <2 x double> %tmp3 241} 242 243; Now check some boundary conditions 244define i64 @pairUpBarelyIn(ptr %a) nounwind ssp { 245; CHECK-LABEL: pairUpBarelyIn: 246; CHECK: // %bb.0: 247; CHECK-NEXT: ldp x9, x8, [x0, #-256] 248; CHECK-NEXT: add x0, x8, x9 249; CHECK-NEXT: ret 250 %p1 = getelementptr inbounds i64, ptr %a, i64 -31 251 %tmp1 = load i64, ptr %p1, align 2 252 %p2 = getelementptr inbounds i64, ptr %a, i64 -32 253 %tmp2 = load i64, ptr %p2, align 2 254 %tmp3 = add i64 %tmp1, %tmp2 255 ret i64 %tmp3 256} 257 258define i64 @pairUpBarelyInSext(ptr %a) nounwind ssp { 259; CHECK-LABEL: pairUpBarelyInSext: 260; CHECK: // %bb.0: 261; CHECK-NEXT: ldpsw x9, x8, [x0, #-256] 262; CHECK-NEXT: add x0, x8, x9 263; CHECK-NEXT: ret 264 %p1 = getelementptr inbounds i32, ptr %a, i64 -63 265 %tmp1 = load i32, ptr %p1, align 2 266 %p2 = getelementptr inbounds i32, ptr %a, i64 -64 267 %tmp2 = load i32, ptr %p2, align 2 268 %sexttmp1 = sext i32 %tmp1 to i64 269 %sexttmp2 = sext i32 %tmp2 to i64 270 %tmp3 = add i64 %sexttmp1, %sexttmp2 271 ret i64 %tmp3 272} 273 274define i64 @pairUpBarelyInHalfSextRes0(ptr %a) nounwind ssp { 275; CHECK-LABEL: pairUpBarelyInHalfSextRes0: 276; CHECK: // %bb.0: 277; CHECK-NEXT: ldp w9, w8, [x0, #-256] 278; CHECK-NEXT: // kill: def $w9 killed $w9 def $x9 279; CHECK-NEXT: sxtw x9, w9 280; CHECK-NEXT: add x0, x8, x9 281; CHECK-NEXT: ret 282 %p1 = getelementptr inbounds i32, ptr %a, i64 -63 283 %tmp1 = load i32, ptr %p1, align 2 284 %p2 = getelementptr inbounds i32, ptr %a, i64 -64 285 %tmp2 = load i32, ptr %p2, align 2 286 %sexttmp1 = zext i32 %tmp1 to i64 287 %sexttmp2 = sext i32 %tmp2 to i64 288 %tmp3 = add i64 %sexttmp1, %sexttmp2 289 ret i64 %tmp3 290} 291 292define i64 @pairUpBarelyInHalfSextRes1(ptr %a) nounwind ssp { 293; CHECK-LABEL: pairUpBarelyInHalfSextRes1: 294; CHECK: // %bb.0: 295; CHECK-NEXT: ldp w9, w8, [x0, #-256] 296; CHECK-NEXT: // kill: def $w8 killed $w8 def $x8 297; CHECK-NEXT: sxtw x8, w8 298; CHECK-NEXT: add x0, x8, x9 299; CHECK-NEXT: ret 300 %p1 = getelementptr inbounds i32, ptr %a, i64 -63 301 %tmp1 = load i32, ptr %p1, align 2 302 %p2 = getelementptr inbounds i32, ptr %a, i64 -64 303 %tmp2 = load i32, ptr %p2, align 2 304 %sexttmp1 = sext i32 %tmp1 to i64 305 %sexttmp2 = zext i32 %tmp2 to i64 306 %tmp3 = add i64 %sexttmp1, %sexttmp2 307 ret i64 %tmp3 308} 309 310define i64 @pairUpBarelyOut(ptr %a) nounwind ssp { 311; Don't be fragile about which loads or manipulations of the base register 312; are used---just check that there isn't an ldp before the add 313; CHECK-LABEL: pairUpBarelyOut: 314; CHECK: // %bb.0: 315; CHECK-NEXT: sub x8, x0, #264 316; CHECK-NEXT: ldur x9, [x0, #-256] 317; CHECK-NEXT: ldr x8, [x8] 318; CHECK-NEXT: add x0, x9, x8 319; CHECK-NEXT: ret 320 %p1 = getelementptr inbounds i64, ptr %a, i64 -32 321 %tmp1 = load i64, ptr %p1, align 2 322 %p2 = getelementptr inbounds i64, ptr %a, i64 -33 323 %tmp2 = load i64, ptr %p2, align 2 324 %tmp3 = add i64 %tmp1, %tmp2 325 ret i64 %tmp3 326} 327 328define i64 @pairUpBarelyOutSext(ptr %a) nounwind ssp { 329; Don't be fragile about which loads or manipulations of the base register 330; are used---just check that there isn't an ldp before the add 331; CHECK-LABEL: pairUpBarelyOutSext: 332; CHECK: // %bb.0: 333; CHECK-NEXT: sub x8, x0, #260 334; CHECK-NEXT: ldursw x9, [x0, #-256] 335; CHECK-NEXT: ldrsw x8, [x8] 336; CHECK-NEXT: add x0, x9, x8 337; CHECK-NEXT: ret 338 %p1 = getelementptr inbounds i32, ptr %a, i64 -64 339 %tmp1 = load i32, ptr %p1, align 2 340 %p2 = getelementptr inbounds i32, ptr %a, i64 -65 341 %tmp2 = load i32, ptr %p2, align 2 342 %sexttmp1 = sext i32 %tmp1 to i64 343 %sexttmp2 = sext i32 %tmp2 to i64 344 %tmp3 = add i64 %sexttmp1, %sexttmp2 345 ret i64 %tmp3 346} 347 348define i64 @pairUpNotAligned(ptr %a) nounwind ssp { 349; CHECK-LABEL: pairUpNotAligned: 350; CHECK: // %bb.0: 351; CHECK-NEXT: ldur x8, [x0, #-143] 352; CHECK-NEXT: ldur x9, [x0, #-135] 353; CHECK-NEXT: add x0, x8, x9 354; CHECK-NEXT: ret 355 %p1 = getelementptr inbounds i64, ptr %a, i64 -18 356 %bp1 = bitcast ptr %p1 to ptr 357 %bp1p1 = getelementptr inbounds i8, ptr %bp1, i64 1 358 %dp1 = bitcast ptr %bp1p1 to ptr 359 %tmp1 = load i64, ptr %dp1, align 1 360 361 %p2 = getelementptr inbounds i64, ptr %a, i64 -17 362 %bp2 = bitcast ptr %p2 to ptr 363 %bp2p1 = getelementptr inbounds i8, ptr %bp2, i64 1 364 %dp2 = bitcast ptr %bp2p1 to ptr 365 %tmp2 = load i64, ptr %dp2, align 1 366 367 %tmp3 = add i64 %tmp1, %tmp2 368 ret i64 %tmp3 369} 370 371define i64 @pairUpNotAlignedSext(ptr %a) nounwind ssp { 372; CHECK-LABEL: pairUpNotAlignedSext: 373; CHECK: // %bb.0: 374; CHECK-NEXT: ldursw x8, [x0, #-71] 375; CHECK-NEXT: ldursw x9, [x0, #-67] 376; CHECK-NEXT: add x0, x8, x9 377; CHECK-NEXT: ret 378 %p1 = getelementptr inbounds i32, ptr %a, i64 -18 379 %bp1 = bitcast ptr %p1 to ptr 380 %bp1p1 = getelementptr inbounds i8, ptr %bp1, i64 1 381 %dp1 = bitcast ptr %bp1p1 to ptr 382 %tmp1 = load i32, ptr %dp1, align 1 383 384 %p2 = getelementptr inbounds i32, ptr %a, i64 -17 385 %bp2 = bitcast ptr %p2 to ptr 386 %bp2p1 = getelementptr inbounds i8, ptr %bp2, i64 1 387 %dp2 = bitcast ptr %bp2p1 to ptr 388 %tmp2 = load i32, ptr %dp2, align 1 389 390 %sexttmp1 = sext i32 %tmp1 to i64 391 %sexttmp2 = sext i32 %tmp2 to i64 392 %tmp3 = add i64 %sexttmp1, %sexttmp2 393 ret i64 %tmp3 394} 395 396declare void @use-ptr(ptr) 397 398define i64 @ldp_sext_int_pre(ptr %p) nounwind { 399; CHECK-LABEL: ldp_sext_int_pre: 400; CHECK: // %bb.0: 401; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill 402; CHECK-NEXT: mov x19, x0 403; CHECK-NEXT: add x0, x0, #8 404; CHECK-NEXT: bl "use-ptr" 405; CHECK-NEXT: ldpsw x8, x9, [x19, #8] 406; CHECK-NEXT: add x0, x9, x8 407; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload 408; CHECK-NEXT: ret 409 %ptr = getelementptr inbounds i32, ptr %p, i64 2 410 call void @use-ptr(ptr %ptr) 411 %add.ptr = getelementptr inbounds i32, ptr %ptr, i64 0 412 %tmp = load i32, ptr %add.ptr, align 4 413 %add.ptr1 = getelementptr inbounds i32, ptr %ptr, i64 1 414 %tmp1 = load i32, ptr %add.ptr1, align 4 415 %sexttmp = sext i32 %tmp to i64 416 %sexttmp1 = sext i32 %tmp1 to i64 417 %add = add nsw i64 %sexttmp1, %sexttmp 418 ret i64 %add 419} 420 421define i64 @ldp_sext_int_post(ptr %p) nounwind { 422; CHECK-LABEL: ldp_sext_int_post: 423; CHECK: // %bb.0: 424; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill 425; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill 426; CHECK-NEXT: ldpsw x19, x20, [x0], #8 427; CHECK-NEXT: bl "use-ptr" 428; CHECK-NEXT: add x0, x20, x19 429; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload 430; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload 431; CHECK-NEXT: ret 432 %tmp = load i32, ptr %p, align 4 433 %add.ptr = getelementptr inbounds i32, ptr %p, i64 1 434 %tmp1 = load i32, ptr %add.ptr, align 4 435 %sexttmp = sext i32 %tmp to i64 436 %sexttmp1 = sext i32 %tmp1 to i64 437 %ptr = getelementptr inbounds i32, ptr %add.ptr, i64 1 438 call void @use-ptr(ptr %ptr) 439 %add = add nsw i64 %sexttmp1, %sexttmp 440 ret i64 %add 441} 442 443