1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s 3; arm64 has a separate copy due to intrinsics 4 5define <4 x i32> @copyTuple.QPair(ptr %a, ptr %b) { 6; CHECK-LABEL: copyTuple.QPair: 7; CHECK: // %bb.0: // %entry 8; CHECK-NEXT: movi v3.4s, #2 9; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff 10; CHECK-NEXT: mov v1.16b, v3.16b 11; CHECK-NEXT: mov v0.16b, v2.16b 12; CHECK-NEXT: ld2 { v0.s, v1.s }[1], [x0] 13; CHECK-NEXT: mov v1.16b, v2.16b 14; CHECK-NEXT: ld2 { v0.s, v1.s }[1], [x1] 15; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 16; CHECK-NEXT: ret 17entry: 18 %vld = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>, i64 1, ptr %a) 19 %extract = extractvalue { <4 x i32>, <4 x i32> } %vld, 0 20 %vld1 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i64 1, ptr %b) 21 %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld1, 0 22 ret <4 x i32> %vld1.fca.0.extract 23} 24 25define <4 x i32> @copyTuple.QTriple(ptr %a, ptr %b, <4 x i32> %c) { 26; CHECK-LABEL: copyTuple.QTriple: 27; CHECK: // %bb.0: // %entry 28; CHECK-NEXT: // kill: def $q0 killed $q0 def $q31_q0_q1 29; CHECK-NEXT: movi v31.2d, #0xffffffffffffffff 30; CHECK-NEXT: mov v1.16b, v0.16b 31; CHECK-NEXT: mov v2.16b, v31.16b 32; CHECK-NEXT: mov v3.16b, v0.16b 33; CHECK-NEXT: mov v4.16b, v1.16b 34; CHECK-NEXT: ld3 { v2.s, v3.s, v4.s }[1], [x0] 35; CHECK-NEXT: mov v3.16b, v31.16b 36; CHECK-NEXT: mov v4.16b, v0.16b 37; CHECK-NEXT: ld3 { v2.s, v3.s, v4.s }[1], [x1] 38; CHECK-NEXT: mov v0.16b, v2.16b 39; CHECK-NEXT: ret 40entry: 41 %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i64 1, ptr %a) 42 %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0 43 %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, i64 1, ptr %b) 44 %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0 45 ret <4 x i32> %vld1.fca.0.extract 46} 47 48define <4 x i32> @copyTuple.QQuad(ptr %a, ptr %b, <4 x i32> %c) { 49; CHECK-LABEL: copyTuple.QQuad: 50; CHECK: // %bb.0: // %entry 51; CHECK-NEXT: // kill: def $q0 killed $q0 def $q31_q0_q1_q2 52; CHECK-NEXT: movi v31.2d, #0xffffffffffffffff 53; CHECK-NEXT: mov v1.16b, v0.16b 54; CHECK-NEXT: mov v2.16b, v0.16b 55; CHECK-NEXT: mov v3.16b, v31.16b 56; CHECK-NEXT: mov v4.16b, v0.16b 57; CHECK-NEXT: mov v5.16b, v1.16b 58; CHECK-NEXT: mov v6.16b, v2.16b 59; CHECK-NEXT: ld4 { v3.s, v4.s, v5.s, v6.s }[1], [x0] 60; CHECK-NEXT: mov v4.16b, v31.16b 61; CHECK-NEXT: mov v5.16b, v0.16b 62; CHECK-NEXT: mov v6.16b, v0.16b 63; CHECK-NEXT: ld4 { v3.s, v4.s, v5.s, v6.s }[1], [x1] 64; CHECK-NEXT: mov v0.16b, v3.16b 65; CHECK-NEXT: ret 66entry: 67 %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, <4 x i32> %c, i64 1, ptr %a) 68 %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0 69 %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i64 1, ptr %b) 70 %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0 71 ret <4 x i32> %vld1.fca.0.extract 72} 73 74declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr) 75declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, i64, ptr) 76declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, ptr) 77