1; RUN: llc < %s -mtriple=armv8-linux-gnueabi -verify-machineinstrs \ 2; RUN: -asm-verbose=false | FileCheck %s 3 4%struct.uint16x4x2_t = type { <4 x i16>, <4 x i16> } 5%struct.uint16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } 6%struct.uint16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } 7 8%struct.uint32x2x2_t = type { <2 x i32>, <2 x i32> } 9%struct.uint32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> } 10%struct.uint32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } 11 12%struct.uint64x1x2_t = type { <1 x i64>, <1 x i64> } 13%struct.uint64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> } 14%struct.uint64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } 15 16%struct.uint8x8x2_t = type { <8 x i8>, <8 x i8> } 17%struct.uint8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } 18%struct.uint8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } 19 20%struct.uint16x8x2_t = type { <8 x i16>, <8 x i16> } 21%struct.uint16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> } 22%struct.uint16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } 23 24%struct.uint32x4x2_t = type { <4 x i32>, <4 x i32> } 25%struct.uint32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> } 26%struct.uint32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } 27 28%struct.uint8x16x2_t = type { <16 x i8>, <16 x i8> } 29%struct.uint8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> } 30%struct.uint8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } 31 32declare %struct.uint8x8x2_t @llvm.arm.neon.vld2dup.v8i8.p0(ptr, i32) 33declare %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0(ptr, i32) 34declare %struct.uint32x2x2_t @llvm.arm.neon.vld2dup.v2i32.p0(ptr, i32) 35declare %struct.uint64x1x2_t @llvm.arm.neon.vld2dup.v1i64.p0(ptr, i32) 36 37declare %struct.uint8x8x3_t @llvm.arm.neon.vld3dup.v8i8.p0(ptr, i32) 38declare %struct.uint16x4x3_t @llvm.arm.neon.vld3dup.v4i16.p0(ptr, i32) 39declare %struct.uint32x2x3_t @llvm.arm.neon.vld3dup.v2i32.p0(ptr, i32) 40declare %struct.uint64x1x3_t @llvm.arm.neon.vld3dup.v1i64.p0(ptr, i32) 41 42declare %struct.uint8x8x4_t @llvm.arm.neon.vld4dup.v8i8.p0(ptr, i32) 43declare %struct.uint16x4x4_t @llvm.arm.neon.vld4dup.v4i16.p0(ptr, i32) 44declare %struct.uint32x2x4_t @llvm.arm.neon.vld4dup.v2i32.p0(ptr, i32) 45declare %struct.uint64x1x4_t @llvm.arm.neon.vld4dup.v1i64.p0(ptr, i32) 46 47declare %struct.uint8x16x2_t @llvm.arm.neon.vld2dup.v16i8.p0(ptr, i32) 48declare %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0(ptr, i32) 49declare %struct.uint32x4x2_t @llvm.arm.neon.vld2dup.v4i32.p0(ptr, i32) 50 51declare %struct.uint8x16x3_t @llvm.arm.neon.vld3dup.v16i8.p0(ptr, i32) 52declare %struct.uint16x8x3_t @llvm.arm.neon.vld3dup.v8i16.p0(ptr, i32) 53declare %struct.uint32x4x3_t @llvm.arm.neon.vld3dup.v4i32.p0(ptr, i32) 54 55declare %struct.uint8x16x4_t @llvm.arm.neon.vld4dup.v16i8.p0(ptr, i32) 56declare %struct.uint16x8x4_t @llvm.arm.neon.vld4dup.v8i16.p0(ptr, i32) 57declare %struct.uint32x4x4_t @llvm.arm.neon.vld4dup.v4i32.p0(ptr, i32) 58 59define ptr @test_vld2_dup_u16_update(ptr %dest, ptr %src) { 60; CHECK-LABEL: test_vld2_dup_u16_update: 61; CHECK: vld2.16 {d16[], d17[]}, [r1]! 62entry: 63 %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0(ptr %src, i32 2) 64 store %struct.uint16x4x2_t %tmp, ptr %dest, align 8 65 %updated_src = getelementptr inbounds i8, ptr %src, i32 4 66 ret ptr %updated_src 67} 68 69define ptr @test_vld2_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) { 70; CHECK-LABEL: test_vld2_dup_u16_update_reg: 71; CHECK: vld2.16 {d16[], d17[]}, [r1], r2 72entry: 73 %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0(ptr %src, i32 2) 74 store %struct.uint16x4x2_t %tmp, ptr %dest, align 8 75 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 76 ret ptr %updated_src 77} 78 79define ptr @test_vld2_dup_update(ptr %dest, ptr %src) { 80; CHECK-LABEL: test_vld2_dup_update: 81; CHECK: vld2.32 {d16[], d17[]}, [r1]! 82entry: 83 %tmp = tail call %struct.uint32x2x2_t @llvm.arm.neon.vld2dup.v2i32.p0(ptr %src, i32 4) 84 store %struct.uint32x2x2_t %tmp, ptr %dest, align 8 85 %updated_src = getelementptr inbounds i8, ptr %src, i32 8 86 ret ptr %updated_src 87} 88 89define ptr @test_vld2_dup_update_reg(ptr %dest, ptr %src, i32 %inc) { 90; CHECK-LABEL: test_vld2_dup_update_reg: 91; CHECK: vld2.32 {d16[], d17[]}, [r1], r2 92entry: 93 %tmp = tail call %struct.uint32x2x2_t @llvm.arm.neon.vld2dup.v2i32.p0(ptr %src, i32 4) 94 store %struct.uint32x2x2_t %tmp, ptr %dest, align 8 95 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 96 ret ptr %updated_src 97} 98 99define ptr @test_vld2_dup_u64_update(ptr %dest, ptr %src) { 100; CHECK-LABEL: test_vld2_dup_u64_update: 101; CHECK: vld1.64 {d16, d17}, [r1:64]! 102entry: 103 %tmp = tail call %struct.uint64x1x2_t @llvm.arm.neon.vld2dup.v1i64.p0(ptr %src, i32 8) 104 store %struct.uint64x1x2_t %tmp, ptr %dest, align 8 105 %updated_src = getelementptr inbounds i8, ptr %src, i32 16 106 ret ptr %updated_src 107} 108 109define ptr @test_vld2_dup_u64_update_reg(ptr %dest, ptr %src, i32 %inc) { 110; CHECK-LABEL: test_vld2_dup_u64_update_reg: 111; CHECK: vld1.64 {d16, d17}, [r1:64], r2 112entry: 113 %tmp = tail call %struct.uint64x1x2_t @llvm.arm.neon.vld2dup.v1i64.p0(ptr %src, i32 8) 114 store %struct.uint64x1x2_t %tmp, ptr %dest, align 8 115 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 116 ret ptr %updated_src 117} 118 119define ptr @test_vld2_dup_u8_update(ptr %dest, ptr %src) { 120; CHECK-LABEL: test_vld2_dup_u8_update: 121; CHECK: vld2.8 {d16[], d17[]}, [r1]! 122entry: 123 %tmp = tail call %struct.uint8x8x2_t @llvm.arm.neon.vld2dup.v8i8.p0(ptr %src, i32 1) 124 store %struct.uint8x8x2_t %tmp, ptr %dest, align 8 125 %updated_src = getelementptr inbounds i8, ptr %src, i32 2 126 ret ptr %updated_src 127} 128 129define ptr @test_vld2_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) { 130; CHECK-LABEL: test_vld2_dup_u8_update_reg: 131; CHECK: vld2.8 {d16[], d17[]}, [r1], r2 132entry: 133 %tmp = tail call %struct.uint8x8x2_t @llvm.arm.neon.vld2dup.v8i8.p0(ptr %src, i32 1) 134 store %struct.uint8x8x2_t %tmp, ptr %dest, align 8 135 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 136 ret ptr %updated_src 137} 138 139define ptr @test_vld3_dup_u16_update(ptr %dest, ptr %src) { 140; CHECK-LABEL: test_vld3_dup_u16_update: 141; CHECK: vld3.16 {d16[], d17[], d18[]}, [r1]! 142entry: 143 %tmp = tail call %struct.uint16x4x3_t @llvm.arm.neon.vld3dup.v4i16.p0(ptr %src, i32 2) 144 store %struct.uint16x4x3_t %tmp, ptr %dest, align 8 145 %updated_src = getelementptr inbounds i8, ptr %src, i32 6 146 ret ptr %updated_src 147} 148 149define ptr @test_vld3_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) { 150; CHECK-LABEL: test_vld3_dup_u16_update_reg: 151; CHECK: vld3.16 {d16[], d17[], d18[]}, [r1], r2 152entry: 153 %tmp = tail call %struct.uint16x4x3_t @llvm.arm.neon.vld3dup.v4i16.p0(ptr %src, i32 2) 154 store %struct.uint16x4x3_t %tmp, ptr %dest, align 8 155 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 156 ret ptr %updated_src 157} 158 159define ptr @test_vld3_dup_u32_update(ptr %dest, ptr %src) { 160; CHECK-LABEL: test_vld3_dup_u32_update: 161; CHECK: vld3.32 {d16[], d17[], d18[]}, [r1]! 162entry: 163 %tmp = tail call %struct.uint32x2x3_t @llvm.arm.neon.vld3dup.v2i32.p0(ptr %src, i32 4) 164 store %struct.uint32x2x3_t %tmp, ptr %dest, align 8 165 %updated_src = getelementptr inbounds i8, ptr %src, i32 12 166 ret ptr %updated_src 167} 168 169define ptr @test_vld3_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) { 170; CHECK-LABEL: test_vld3_dup_u32_update_reg: 171; CHECK: vld3.32 {d16[], d17[], d18[]}, [r1], r2 172entry: 173 %tmp = tail call %struct.uint32x2x3_t @llvm.arm.neon.vld3dup.v2i32.p0(ptr %src, i32 4) 174 store %struct.uint32x2x3_t %tmp, ptr %dest, align 8 175 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 176 ret ptr %updated_src 177} 178 179define ptr @test_vld3_dup_u64_update(ptr %dest, ptr %src) { 180; CHECK-LABEL: test_vld3_dup_u64_update: 181; CHECK: vld1.64 {d16, d17, d18}, [r1]! 182entry: 183 %tmp = tail call %struct.uint64x1x3_t @llvm.arm.neon.vld3dup.v1i64.p0(ptr %src, i32 8) 184 store %struct.uint64x1x3_t %tmp, ptr %dest, align 8 185 %updated_src = getelementptr inbounds i8, ptr %src, i32 24 186 ret ptr %updated_src 187} 188 189define ptr @test_vld3_dup_u64_update_reg(ptr %dest, ptr %src, i32 %inc) { 190; CHECK-LABEL: test_vld3_dup_u64_update_reg: 191; CHECK: vld1.64 {d16, d17, d18}, [r1], r2 192entry: 193 %tmp = tail call %struct.uint64x1x3_t @llvm.arm.neon.vld3dup.v1i64.p0(ptr %src, i32 8) 194 store %struct.uint64x1x3_t %tmp, ptr %dest, align 8 195 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 196 ret ptr %updated_src 197} 198 199define ptr @test_vld3_dup_u8_update(ptr %dest, ptr %src) { 200; CHECK-LABEL: test_vld3_dup_u8_update: 201; CHECK: vld3.8 {d16[], d17[], d18[]}, [r1]! 202entry: 203 %tmp = tail call %struct.uint8x8x3_t @llvm.arm.neon.vld3dup.v8i8.p0(ptr %src, i32 1) 204 store %struct.uint8x8x3_t %tmp, ptr %dest, align 8 205 %updated_src = getelementptr inbounds i8, ptr %src, i32 3 206 ret ptr %updated_src 207} 208 209define ptr @test_vld3_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) { 210; CHECK-LABEL: test_vld3_dup_u8_update_reg: 211; CHECK: vld3.8 {d16[], d17[], d18[]}, [r1], r2 212entry: 213 %tmp = tail call %struct.uint8x8x3_t @llvm.arm.neon.vld3dup.v8i8.p0(ptr %src, i32 1) 214 store %struct.uint8x8x3_t %tmp, ptr %dest, align 8 215 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 216 ret ptr %updated_src 217} 218 219define ptr @test_vld4_dup_u16_update(ptr %dest, ptr %src) { 220; CHECK-LABEL: test_vld4_dup_u16_update: 221; CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]! 222entry: 223 %tmp = tail call %struct.uint16x4x4_t @llvm.arm.neon.vld4dup.v4i16.p0(ptr %src, i32 2) 224 store %struct.uint16x4x4_t %tmp, ptr %dest, align 8 225 %updated_src = getelementptr inbounds i8, ptr %src, i32 8 226 ret ptr %updated_src 227} 228 229define ptr @test_vld4_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) { 230; CHECK-LABEL: test_vld4_dup_u16_update_reg: 231; CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1], r2 232entry: 233 %tmp = tail call %struct.uint16x4x4_t @llvm.arm.neon.vld4dup.v4i16.p0(ptr %src, i32 2) 234 store %struct.uint16x4x4_t %tmp, ptr %dest, align 8 235 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 236 ret ptr %updated_src 237} 238 239define ptr @test_vld4_dup_u32_update(ptr %dest, ptr %src) { 240; CHECK-LABEL: test_vld4_dup_u32_update: 241; CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r1]! 242entry: 243 %tmp = tail call %struct.uint32x2x4_t @llvm.arm.neon.vld4dup.v2i32.p0(ptr %src, i32 4) 244 store %struct.uint32x2x4_t %tmp, ptr %dest, align 8 245 %updated_src = getelementptr inbounds i8, ptr %src, i32 16 246 ret ptr %updated_src 247} 248 249define ptr @test_vld4_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) { 250; CHECK-LABEL: test_vld4_dup_u32_update_reg: 251; CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r1], r2 252entry: 253 %tmp = tail call %struct.uint32x2x4_t @llvm.arm.neon.vld4dup.v2i32.p0(ptr %src, i32 4) 254 store %struct.uint32x2x4_t %tmp, ptr %dest, align 8 255 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 256 ret ptr %updated_src 257} 258 259define ptr @test_vld4_dup_u64_update(ptr %dest, ptr %src) { 260; CHECK-LABEL: test_vld4_dup_u64_update: 261; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:64]! 262entry: 263 %tmp = tail call %struct.uint64x1x4_t @llvm.arm.neon.vld4dup.v1i64.p0(ptr %src, i32 8) 264 store %struct.uint64x1x4_t %tmp, ptr %dest, align 8 265 %updated_src = getelementptr inbounds i8, ptr %src, i32 32 266 ret ptr %updated_src 267} 268 269define ptr @test_vld4_dup_u64_update_reg(ptr %dest, ptr %src, i32 %inc) { 270; CHECK-LABEL: test_vld4_dup_u64_update_reg: 271; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:64], r2 272entry: 273 %tmp = tail call %struct.uint64x1x4_t @llvm.arm.neon.vld4dup.v1i64.p0(ptr %src, i32 8) 274 store %struct.uint64x1x4_t %tmp, ptr %dest, align 8 275 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 276 ret ptr %updated_src 277} 278 279define ptr @test_vld4_dup_u8_update(ptr %dest, ptr %src) { 280; CHECK-LABEL: test_vld4_dup_u8_update: 281; CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1]! 282entry: 283 %tmp = tail call %struct.uint8x8x4_t @llvm.arm.neon.vld4dup.v8i8.p0(ptr %src, i32 1) 284 store %struct.uint8x8x4_t %tmp, ptr %dest, align 8 285 %updated_src = getelementptr inbounds i8, ptr %src, i32 4 286 ret ptr %updated_src 287} 288 289define ptr @test_vld4_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) { 290; CHECK-LABEL: test_vld4_dup_u8_update_reg: 291; CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1], r2 292entry: 293 %tmp = tail call %struct.uint8x8x4_t @llvm.arm.neon.vld4dup.v8i8.p0(ptr %src, i32 1) 294 store %struct.uint8x8x4_t %tmp, ptr %dest, align 8 295 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 296 ret ptr %updated_src 297} 298 299define ptr @test_vld2q_dup_u16_update(ptr %dest, ptr %src, ptr %dest0) { 300; CHECK-LABEL: test_vld2q_dup_u16_update: 301; CHECK: vld2.16 {d16[], d18[]}, [r1] 302; CHECK-NEXT: vld2.16 {d17[], d19[]}, [r1]! 303entry: 304 %tmp = tail call %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0(ptr %src, i32 2) 305 store %struct.uint16x8x2_t %tmp, ptr %dest, align 8 306 %updated_src = getelementptr inbounds i8, ptr %src, i32 4 307 ret ptr %updated_src 308} 309 310define ptr @test_vld2q_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) { 311; CHECK-LABEL: test_vld2q_dup_u16_update_reg: 312; CHECK: vld2.16 {d16[], d18[]}, [r1] 313; CHECK-NEXT: vld2.16 {d17[], d19[]}, [r1], r2 314entry: 315 %tmp = tail call %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0(ptr %src, i32 2) 316 store %struct.uint16x8x2_t %tmp, ptr %dest, align 8 317 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 318 ret ptr %updated_src 319} 320 321define ptr @test_vld2q_dup_u32_update(ptr %dest, ptr %src) { 322; CHECK-LABEL: test_vld2q_dup_u32_update: 323; CHECK: vld2.32 {d16[], d18[]}, [r1] 324; CHECK-NEXT: vld2.32 {d17[], d19[]}, [r1]! 325entry: 326 %tmp = tail call %struct.uint32x4x2_t @llvm.arm.neon.vld2dup.v4i32.p0(ptr %src, i32 4) 327 store %struct.uint32x4x2_t %tmp, ptr %dest, align 8 328 %updated_src = getelementptr inbounds i8, ptr %src, i32 8 329 ret ptr %updated_src 330} 331 332define ptr @test_vld2q_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) { 333; CHECK-LABEL: test_vld2q_dup_u32_update_reg: 334; CHECK: vld2.32 {d16[], d18[]}, [r1] 335; CHECK-NEXT: vld2.32 {d17[], d19[]}, [r1], r2 336entry: 337 %tmp = tail call %struct.uint32x4x2_t @llvm.arm.neon.vld2dup.v4i32.p0(ptr %src, i32 4) 338 store %struct.uint32x4x2_t %tmp, ptr %dest, align 8 339 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 340 ret ptr %updated_src 341} 342 343define ptr @test_vld2q_dup_u8_update(ptr %dest, ptr %src) { 344; CHECK-LABEL: test_vld2q_dup_u8_update: 345; CHECK: vld2.8 {d16[], d18[]}, [r1] 346; CHECK-NEXT: vld2.8 {d17[], d19[]}, [r1]! 347entry: 348 %tmp = tail call %struct.uint8x16x2_t @llvm.arm.neon.vld2dup.v16i8.p0(ptr %src, i32 1) 349 store %struct.uint8x16x2_t %tmp, ptr %dest, align 8 350 %updated_src = getelementptr inbounds i8, ptr %src, i32 2 351 ret ptr %updated_src 352} 353 354define ptr @test_vld2q_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) { 355; CHECK-LABEL: test_vld2q_dup_u8_update_reg: 356; CHECK: vld2.8 {d16[], d18[]}, [r1] 357; CHECK-NEXT: vld2.8 {d17[], d19[]}, [r1], r2 358entry: 359 %tmp = tail call %struct.uint8x16x2_t @llvm.arm.neon.vld2dup.v16i8.p0(ptr %src, i32 1) 360 store %struct.uint8x16x2_t %tmp, ptr %dest, align 8 361 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 362 ret ptr %updated_src 363} 364 365define ptr @test_vld3q_dup_u16_update(ptr %dest, ptr %src) { 366; CHECK-LABEL: test_vld3q_dup_u16_update: 367; CHECK: vld3.16 {d16[], d18[], d20[]}, [r1] 368; CHECK: vld3.16 {d17[], d19[], d21[]}, [r1]! 369entry: 370 %tmp = tail call %struct.uint16x8x3_t @llvm.arm.neon.vld3dup.v8i16.p0(ptr %src, i32 2) 371 store %struct.uint16x8x3_t %tmp, ptr %dest, align 8 372 %updated_src = getelementptr inbounds i8, ptr %src, i32 6 373 ret ptr %updated_src 374} 375 376define ptr @test_vld3q_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) { 377; CHECK-LABEL: test_vld3q_dup_u16_update_reg: 378; CHECK: vld3.16 {d16[], d18[], d20[]}, [r1] 379; CHECK-NEXT: vld3.16 {d17[], d19[], d21[]}, [r1], r2 380entry: 381 %tmp = tail call %struct.uint16x8x3_t @llvm.arm.neon.vld3dup.v8i16.p0(ptr %src, i32 2) 382 store %struct.uint16x8x3_t %tmp, ptr %dest, align 8 383 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 384 ret ptr %updated_src 385} 386 387define ptr @test_vld3q_dup_u32_update(ptr %dest, ptr %src) { 388; CHECK-LABEL: test_vld3q_dup_u32_update: 389; CHECK: vld3.32 {d16[], d18[], d20[]}, [r1] 390; CHECK: vld3.32 {d17[], d19[], d21[]}, [r1]! 391entry: 392 %tmp = tail call %struct.uint32x4x3_t @llvm.arm.neon.vld3dup.v4i32.p0(ptr %src, i32 4) 393 store %struct.uint32x4x3_t %tmp, ptr %dest, align 8 394 %updated_src = getelementptr inbounds i8, ptr %src, i32 12 395 ret ptr %updated_src 396} 397 398define ptr @test_vld3q_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) { 399; CHECK-LABEL: test_vld3q_dup_u32_update_reg: 400; CHECK: vld3.32 {d16[], d18[], d20[]}, [r1] 401; CHECK-NEXT: vld3.32 {d17[], d19[], d21[]}, [r1], r2 402entry: 403 %tmp = tail call %struct.uint32x4x3_t @llvm.arm.neon.vld3dup.v4i32.p0(ptr %src, i32 4) 404 store %struct.uint32x4x3_t %tmp, ptr %dest, align 8 405 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 406 ret ptr %updated_src 407} 408 409define ptr @test_vld3q_dup_u8_update(ptr %dest, ptr %src) { 410; CHECK-LABEL: test_vld3q_dup_u8_update: 411; CHECK: vld3.8 {d16[], d18[], d20[]}, [r1] 412; CHECK: vld3.8 {d17[], d19[], d21[]}, [r1]! 413entry: 414 %tmp = tail call %struct.uint8x16x3_t @llvm.arm.neon.vld3dup.v16i8.p0(ptr %src, i32 1) 415 store %struct.uint8x16x3_t %tmp, ptr %dest, align 8 416 %updated_src = getelementptr inbounds i8, ptr %src, i32 3 417 ret ptr %updated_src 418} 419 420define ptr @test_vld3q_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) { 421; CHECK-LABEL: test_vld3q_dup_u8_update_reg: 422; CHECK: vld3.8 {d16[], d18[], d20[]}, [r1] 423; CHECK-NEXT: vld3.8 {d17[], d19[], d21[]}, [r1], r2 424entry: 425 %tmp = tail call %struct.uint8x16x3_t @llvm.arm.neon.vld3dup.v16i8.p0(ptr %src, i32 1) 426 store %struct.uint8x16x3_t %tmp, ptr %dest, align 8 427 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 428 ret ptr %updated_src 429} 430 431define ptr @test_vld4q_dup_u16_update(ptr %dest, ptr %src) { 432; CHECK-LABEL: test_vld4q_dup_u16_update: 433; CHECK: vld4.16 {d16[], d18[], d20[], d22[]}, [r1] 434; CHECK: vld4.16 {d17[], d19[], d21[], d23[]}, [r1]! 435entry: 436 %tmp = tail call %struct.uint16x8x4_t @llvm.arm.neon.vld4dup.v8i16.p0(ptr %src, i32 2) 437 store %struct.uint16x8x4_t %tmp, ptr %dest, align 8 438 %updated_src = getelementptr inbounds i8, ptr %src, i32 8 439 ret ptr %updated_src 440} 441 442define ptr @test_vld4q_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) { 443; CHECK-LABEL: test_vld4q_dup_u16_update_reg: 444; CHECK: vld4.16 {d16[], d18[], d20[], d22[]}, [r1] 445; CHECK-NEXT: vld4.16 {d17[], d19[], d21[], d23[]}, [r1], r2 446entry: 447 %tmp = tail call %struct.uint16x8x4_t @llvm.arm.neon.vld4dup.v8i16.p0(ptr %src, i32 2) 448 store %struct.uint16x8x4_t %tmp, ptr %dest, align 8 449 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 450 ret ptr %updated_src 451} 452 453define ptr @test_vld4q_dup_u32_update(ptr %dest, ptr %src) { 454; CHECK-LABEL: test_vld4q_dup_u32_update: 455; CHECK: vld4.32 {d16[], d18[], d20[], d22[]}, [r1] 456; CHECK: vld4.32 {d17[], d19[], d21[], d23[]}, [r1]! 457entry: 458 %tmp = tail call %struct.uint32x4x4_t @llvm.arm.neon.vld4dup.v4i32.p0(ptr %src, i32 4) 459 store %struct.uint32x4x4_t %tmp, ptr %dest, align 8 460 %updated_src = getelementptr inbounds i8, ptr %src, i32 16 461 ret ptr %updated_src 462} 463 464define ptr @test_vld4q_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) { 465; CHECK-LABEL: test_vld4q_dup_u32_update_reg: 466; CHECK: vld4.32 {d16[], d18[], d20[], d22[]}, [r1] 467; CHECK-NEXT: vld4.32 {d17[], d19[], d21[], d23[]}, [r1], r2 468entry: 469 %tmp = tail call %struct.uint32x4x4_t @llvm.arm.neon.vld4dup.v4i32.p0(ptr %src, i32 4) 470 store %struct.uint32x4x4_t %tmp, ptr %dest, align 8 471 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 472 ret ptr %updated_src 473} 474 475define ptr @test_vld4q_dup_u8_update(ptr %dest, ptr %src) { 476; CHECK-LABEL: test_vld4q_dup_u8_update: 477; CHECK: vld4.8 {d16[], d18[], d20[], d22[]}, [r1] 478; CHECK: vld4.8 {d17[], d19[], d21[], d23[]}, [r1]! 479entry: 480 %tmp = tail call %struct.uint8x16x4_t @llvm.arm.neon.vld4dup.v16i8.p0(ptr %src, i32 1) 481 store %struct.uint8x16x4_t %tmp, ptr %dest, align 8 482 %updated_src = getelementptr inbounds i8, ptr %src, i32 4 483 ret ptr %updated_src 484} 485 486define ptr @test_vld4q_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) { 487; CHECK-LABEL: test_vld4q_dup_u8_update_reg: 488; CHECK: vld4.8 {d16[], d18[], d20[], d22[]}, [r1] 489; CHECK-NEXT: vld4.8 {d17[], d19[], d21[], d23[]}, [r1], r2 490entry: 491 %tmp = tail call %struct.uint8x16x4_t @llvm.arm.neon.vld4dup.v16i8.p0(ptr %src, i32 1) 492 store %struct.uint8x16x4_t %tmp, ptr %dest, align 8 493 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 494 ret ptr %updated_src 495} 496