1; RUN: llc < %s -mtriple=armv8-linux-gnueabi --float-abi=hard -verify-machineinstrs \ 2; RUN: -asm-verbose=false | FileCheck %s 3 4%struct.uint16x4x2_t = type { <4 x i16>, <4 x i16> } 5%struct.uint16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } 6%struct.uint16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } 7 8%struct.uint32x2x2_t = type { <2 x i32>, <2 x i32> } 9%struct.uint32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> } 10%struct.uint32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } 11 12%struct.uint64x1x2_t = type { <1 x i64>, <1 x i64> } 13%struct.uint64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> } 14%struct.uint64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } 15 16%struct.uint8x8x2_t = type { <8 x i8>, <8 x i8> } 17%struct.uint8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } 18%struct.uint8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } 19 20%struct.uint16x8x2_t = type { <8 x i16>, <8 x i16> } 21%struct.uint16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> } 22%struct.uint16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } 23 24%struct.uint32x4x2_t = type { <4 x i32>, <4 x i32> } 25%struct.uint32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> } 26%struct.uint32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } 27 28%struct.uint8x16x2_t = type { <16 x i8>, <16 x i8> } 29%struct.uint8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> } 30%struct.uint8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } 31 32declare %struct.uint8x8x2_t @llvm.arm.neon.vld2dup.v8i8.p0(ptr, i32) 33declare %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0(ptr, i32) 34declare %struct.uint32x2x2_t @llvm.arm.neon.vld2dup.v2i32.p0(ptr, i32) 35declare %struct.uint64x1x2_t @llvm.arm.neon.vld2dup.v1i64.p0(ptr, i32) 36 37declare %struct.uint8x8x3_t @llvm.arm.neon.vld3dup.v8i8.p0(ptr, i32) 38declare %struct.uint16x4x3_t @llvm.arm.neon.vld3dup.v4i16.p0(ptr, i32) 39declare %struct.uint32x2x3_t @llvm.arm.neon.vld3dup.v2i32.p0(ptr, i32) 40declare %struct.uint64x1x3_t @llvm.arm.neon.vld3dup.v1i64.p0(ptr, i32) 41 42declare %struct.uint8x8x4_t @llvm.arm.neon.vld4dup.v8i8.p0(ptr, i32) 43declare %struct.uint16x4x4_t @llvm.arm.neon.vld4dup.v4i16.p0(ptr, i32) 44declare %struct.uint32x2x4_t @llvm.arm.neon.vld4dup.v2i32.p0(ptr, i32) 45declare %struct.uint64x1x4_t @llvm.arm.neon.vld4dup.v1i64.p0(ptr, i32) 46 47declare %struct.uint8x16x2_t @llvm.arm.neon.vld2dup.v16i8.p0(ptr, i32) 48declare %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0(ptr, i32) 49declare %struct.uint32x4x2_t @llvm.arm.neon.vld2dup.v4i32.p0(ptr, i32) 50 51declare %struct.uint8x16x3_t @llvm.arm.neon.vld3dup.v16i8.p0(ptr, i32) 52declare %struct.uint16x8x3_t @llvm.arm.neon.vld3dup.v8i16.p0(ptr, i32) 53declare %struct.uint32x4x3_t @llvm.arm.neon.vld3dup.v4i32.p0(ptr, i32) 54 55declare %struct.uint8x16x4_t @llvm.arm.neon.vld4dup.v16i8.p0(ptr, i32) 56declare %struct.uint16x8x4_t @llvm.arm.neon.vld4dup.v8i16.p0(ptr, i32) 57declare %struct.uint32x4x4_t @llvm.arm.neon.vld4dup.v4i32.p0(ptr, i32) 58 59define %struct.uint16x4x2_t @test_vld2_dup_u16(ptr %src) { 60; CHECK-LABEL: test_vld2_dup_u16: 61; CHECK: vld2.16 {d0[], d1[]}, [r0] 62; CHECK-NEXT: bx lr 63entry: 64 %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0(ptr %src, i32 2) 65 ret %struct.uint16x4x2_t %tmp 66} 67 68define %struct.uint32x2x2_t @test_vld2_dup_u32(ptr %src) { 69; CHECK-LABEL: test_vld2_dup_u32: 70; CHECK: vld2.32 {d0[], d1[]}, [r0] 71; CHECK-NEXT: bx lr 72entry: 73 %tmp = tail call %struct.uint32x2x2_t @llvm.arm.neon.vld2dup.v2i32.p0(ptr %src, i32 4) 74 ret %struct.uint32x2x2_t %tmp 75} 76 77define %struct.uint64x1x2_t @test_vld2_dup_u64(ptr %src) { 78; CHECK-LABEL: test_vld2_dup_u64: 79; CHECK: vld1.64 {d0, d1}, [r0:64] 80; CHECK-NEXT: bx lr 81entry: 82 %tmp = tail call %struct.uint64x1x2_t @llvm.arm.neon.vld2dup.v1i64.p0(ptr %src, i32 8) 83 ret %struct.uint64x1x2_t %tmp 84} 85 86define %struct.uint8x8x2_t @test_vld2_dup_u8(ptr %src) { 87; CHECK-LABEL: test_vld2_dup_u8: 88; CHECK: vld2.8 {d0[], d1[]}, [r0] 89; CHECK-NEXT: bx lr 90entry: 91 %tmp = tail call %struct.uint8x8x2_t @llvm.arm.neon.vld2dup.v8i8.p0(ptr %src, i32 1) 92 ret %struct.uint8x8x2_t %tmp 93} 94 95define %struct.uint16x4x3_t @test_vld3_dup_u16(ptr %src) { 96; CHECK-LABEL: test_vld3_dup_u16: 97; CHECK: vld3.16 {d0[], d1[], d2[]}, [r0] 98; CHECK-NEXT: bx lr 99entry: 100 %tmp = tail call %struct.uint16x4x3_t @llvm.arm.neon.vld3dup.v4i16.p0(ptr %src, i32 2) 101 ret %struct.uint16x4x3_t %tmp 102} 103 104define %struct.uint32x2x3_t @test_vld3_dup_u32(ptr %src) { 105; CHECK-LABEL: test_vld3_dup_u32: 106; CHECK: vld3.32 {d0[], d1[], d2[]}, [r0] 107; CHECK-NEXT: bx lr 108entry: 109 %tmp = tail call %struct.uint32x2x3_t @llvm.arm.neon.vld3dup.v2i32.p0(ptr %src, i32 4) 110 ret %struct.uint32x2x3_t %tmp 111} 112 113define %struct.uint64x1x3_t @test_vld3_dup_u64(ptr %src) { 114; CHECK-LABEL: test_vld3_dup_u64: 115; CHECK: vld1.64 {d0, d1, d2}, [r0] 116; CHECK-NEXT: bx lr 117entry: 118 %tmp = tail call %struct.uint64x1x3_t @llvm.arm.neon.vld3dup.v1i64.p0(ptr %src, i32 8) 119 ret %struct.uint64x1x3_t %tmp 120} 121 122define %struct.uint8x8x3_t @test_vld3_dup_u8(ptr %src) { 123; CHECK-LABEL: test_vld3_dup_u8: 124; CHECK: vld3.8 {d0[], d1[], d2[]}, [r0] 125; CHECK-NEXT: bx lr 126entry: 127 %tmp = tail call %struct.uint8x8x3_t @llvm.arm.neon.vld3dup.v8i8.p0(ptr %src, i32 1) 128 ret %struct.uint8x8x3_t %tmp 129} 130 131define %struct.uint16x4x4_t @test_vld4_dup_u16(ptr %src) { 132; CHECK-LABEL: test_vld4_dup_u16: 133; CHECK: vld4.16 {d0[], d1[], d2[], d3[]}, [r0] 134; CHECK-NEXT: bx lr 135entry: 136 %tmp = tail call %struct.uint16x4x4_t @llvm.arm.neon.vld4dup.v4i16.p0(ptr %src, i32 2) 137 ret %struct.uint16x4x4_t %tmp 138} 139 140define %struct.uint32x2x4_t @test_vld4_dup_u32(ptr %src) { 141; CHECK-LABEL: test_vld4_dup_u32: 142; CHECK: vld4.32 {d0[], d1[], d2[], d3[]}, [r0] 143; CHECK-NEXT: bx lr 144entry: 145 %tmp = tail call %struct.uint32x2x4_t @llvm.arm.neon.vld4dup.v2i32.p0(ptr %src, i32 4) 146 ret %struct.uint32x2x4_t %tmp 147} 148 149define %struct.uint64x1x4_t @test_vld4_dup_u64(ptr %src) { 150; CHECK-LABEL: test_vld4_dup_u64: 151; CHECK: vld1.64 {d0, d1, d2, d3}, [r0:64] 152; CHECK-NEXT: bx lr 153entry: 154 %tmp = tail call %struct.uint64x1x4_t @llvm.arm.neon.vld4dup.v1i64.p0(ptr %src, i32 8) 155 ret %struct.uint64x1x4_t %tmp 156} 157 158define %struct.uint8x8x4_t @test_vld4_dup_u8(ptr %src) { 159; CHECK-LABEL: test_vld4_dup_u8: 160; CHECK: vld4.8 {d0[], d1[], d2[], d3[]}, [r0] 161; CHECK-NEXT: bx lr 162entry: 163 %tmp = tail call %struct.uint8x8x4_t @llvm.arm.neon.vld4dup.v8i8.p0(ptr %src, i32 1) 164 ret %struct.uint8x8x4_t %tmp 165} 166 167define %struct.uint16x8x2_t @test_vld2q_dup_u16(ptr %src) { 168; CHECK-LABEL: test_vld2q_dup_u16: 169; CHECK: vld2.16 {d0[], d2[]}, [r0] 170; CHECK-NEXT: vld2.16 {d1[], d3[]}, [r0] 171; CHECK-NEXT: bx lr 172entry: 173 %tmp = tail call %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0(ptr %src, i32 2) 174 ret %struct.uint16x8x2_t %tmp 175} 176 177define %struct.uint32x4x2_t @test_vld2q_dup_u32(ptr %src) { 178; CHECK-LABEL: test_vld2q_dup_u32: 179; CHECK: vld2.32 {d0[], d2[]}, [r0] 180; CHECK-NEXT: vld2.32 {d1[], d3[]}, [r0] 181; CHECK-NEXT: bx lr 182entry: 183 %tmp = tail call %struct.uint32x4x2_t @llvm.arm.neon.vld2dup.v4i32.p0(ptr %src, i32 4) 184 ret %struct.uint32x4x2_t %tmp 185} 186 187define %struct.uint8x16x2_t @test_vld2q_dup_u8(ptr %src) { 188; CHECK-LABEL: test_vld2q_dup_u8: 189; CHECK: vld2.8 {d0[], d2[]}, [r0] 190; CHECK-NEXT: vld2.8 {d1[], d3[]}, [r0] 191; CHECK-NEXT: bx lr 192entry: 193 %tmp = tail call %struct.uint8x16x2_t @llvm.arm.neon.vld2dup.v16i8.p0(ptr %src, i32 1) 194 ret %struct.uint8x16x2_t %tmp 195} 196 197define %struct.uint16x8x3_t @test_vld3q_dup_u16(ptr %src) { 198; CHECK-LABEL: test_vld3q_dup_u16: 199; CHECK: vld3.16 {d0[], d2[], d4[]}, [r0] 200; CHECK-NEXT: vld3.16 {d1[], d3[], d5[]}, [r0] 201; CHECK-NEXT: bx lr 202entry: 203 %tmp = tail call %struct.uint16x8x3_t @llvm.arm.neon.vld3dup.v8i16.p0(ptr %src, i32 2) 204 ret %struct.uint16x8x3_t %tmp 205} 206 207define %struct.uint32x4x3_t @test_vld3q_dup_u32(ptr %src) { 208; CHECK-LABEL: test_vld3q_dup_u32: 209; CHECK: vld3.32 {d0[], d2[], d4[]}, [r0] 210; CHECK-NEXT: vld3.32 {d1[], d3[], d5[]}, [r0] 211; CHECK-NEXT: bx lr 212entry: 213 %tmp = tail call %struct.uint32x4x3_t @llvm.arm.neon.vld3dup.v4i32.p0(ptr %src, i32 4) 214 ret %struct.uint32x4x3_t %tmp 215} 216 217define %struct.uint8x16x3_t @test_vld3q_dup_u8(ptr %src) { 218; CHECK-LABEL: test_vld3q_dup_u8: 219; CHECK: vld3.8 {d0[], d2[], d4[]}, [r0] 220; CHECK-NEXT: vld3.8 {d1[], d3[], d5[]}, [r0] 221; CHECK-NEXT: bx lr 222entry: 223 %tmp = tail call %struct.uint8x16x3_t @llvm.arm.neon.vld3dup.v16i8.p0(ptr %src, i32 1) 224 ret %struct.uint8x16x3_t %tmp 225} 226 227define %struct.uint16x8x4_t @test_vld4q_dup_u16(ptr %src) { 228; CHECK-LABEL: test_vld4q_dup_u16: 229; CHECK: vld4.16 {d0[], d2[], d4[], d6[]}, [r0] 230; CHECK-NEXT: vld4.16 {d1[], d3[], d5[], d7[]}, [r0] 231; CHECK-NEXT: bx lr 232entry: 233 %tmp = tail call %struct.uint16x8x4_t @llvm.arm.neon.vld4dup.v8i16.p0(ptr %src, i32 2) 234 ret %struct.uint16x8x4_t %tmp 235} 236 237define %struct.uint32x4x4_t @test_vld4q_dup_u32(ptr %src) { 238; CHECK-LABEL: test_vld4q_dup_u32: 239; CHECK: vld4.32 {d0[], d2[], d4[], d6[]}, [r0] 240; CHECK-NEXT: vld4.32 {d1[], d3[], d5[], d7[]}, [r0] 241; CHECK-NEXT: bx lr 242entry: 243 %tmp = tail call %struct.uint32x4x4_t @llvm.arm.neon.vld4dup.v4i32.p0(ptr %src, i32 4) 244 ret %struct.uint32x4x4_t %tmp 245} 246 247define %struct.uint8x16x4_t @test_vld4q_dup_u8(ptr %src) { 248; CHECK-LABEL: test_vld4q_dup_u8: 249; CHECK: vld4.8 {d0[], d2[], d4[], d6[]}, [r0] 250; CHECK-NEXT: vld4.8 {d1[], d3[], d5[], d7[]}, [r0] 251; CHECK-NEXT: bx lr 252entry: 253 %tmp = tail call %struct.uint8x16x4_t @llvm.arm.neon.vld4dup.v16i8.p0(ptr %src, i32 1) 254 ret %struct.uint8x16x4_t %tmp 255} 256