1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc < %s -mtriple=armv8-linux-gnueabi --float-abi=hard -verify-machineinstrs \ 3; RUN: -asm-verbose=false | FileCheck %s 4 5%struct.uint16x4x2_t = type { <4 x i16>, <4 x i16> } 6%struct.uint16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } 7%struct.uint16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } 8 9%struct.uint32x2x2_t = type { <2 x i32>, <2 x i32> } 10%struct.uint32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> } 11%struct.uint32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } 12 13%struct.uint64x1x2_t = type { <1 x i64>, <1 x i64> } 14%struct.uint64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> } 15%struct.uint64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } 16 17%struct.uint8x8x2_t = type { <8 x i8>, <8 x i8> } 18%struct.uint8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } 19%struct.uint8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } 20 21%struct.uint16x8x2_t = type { <8 x i16>, <8 x i16> } 22%struct.uint16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> } 23%struct.uint16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } 24 25%struct.uint32x4x2_t = type { <4 x i32>, <4 x i32> } 26%struct.uint32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> } 27%struct.uint32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } 28 29%struct.uint8x16x2_t = type { <16 x i8>, <16 x i8> } 30%struct.uint8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> } 31%struct.uint8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } 32 33declare %struct.uint8x8x2_t @llvm.arm.neon.vld2dup.v8i8.p0(ptr, i32) 34declare %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0(ptr, i32) 35declare %struct.uint32x2x2_t @llvm.arm.neon.vld2dup.v2i32.p0(ptr, i32) 36declare %struct.uint64x1x2_t @llvm.arm.neon.vld2dup.v1i64.p0(ptr, i32) 37 38declare %struct.uint8x8x3_t @llvm.arm.neon.vld3dup.v8i8.p0(ptr, i32) 39declare %struct.uint16x4x3_t @llvm.arm.neon.vld3dup.v4i16.p0(ptr, i32) 40declare %struct.uint32x2x3_t @llvm.arm.neon.vld3dup.v2i32.p0(ptr, i32) 41declare %struct.uint64x1x3_t @llvm.arm.neon.vld3dup.v1i64.p0(ptr, i32) 42 43declare %struct.uint8x8x4_t @llvm.arm.neon.vld4dup.v8i8.p0(ptr, i32) 44declare %struct.uint16x4x4_t @llvm.arm.neon.vld4dup.v4i16.p0(ptr, i32) 45declare %struct.uint32x2x4_t @llvm.arm.neon.vld4dup.v2i32.p0(ptr, i32) 46declare %struct.uint64x1x4_t @llvm.arm.neon.vld4dup.v1i64.p0(ptr, i32) 47 48declare %struct.uint8x16x2_t @llvm.arm.neon.vld2dup.v16i8.p0(ptr, i32) 49declare %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0(ptr, i32) 50declare %struct.uint32x4x2_t @llvm.arm.neon.vld2dup.v4i32.p0(ptr, i32) 51 52declare %struct.uint8x16x3_t @llvm.arm.neon.vld3dup.v16i8.p0(ptr, i32) 53declare %struct.uint16x8x3_t @llvm.arm.neon.vld3dup.v8i16.p0(ptr, i32) 54declare %struct.uint32x4x3_t @llvm.arm.neon.vld3dup.v4i32.p0(ptr, i32) 55 56declare %struct.uint8x16x4_t @llvm.arm.neon.vld4dup.v16i8.p0(ptr, i32) 57declare %struct.uint16x8x4_t @llvm.arm.neon.vld4dup.v8i16.p0(ptr, i32) 58declare %struct.uint32x4x4_t @llvm.arm.neon.vld4dup.v4i32.p0(ptr, i32) 59 60define ptr @test_vld2_dup_u16_update(ptr %dest, ptr %src) { 61; CHECK-LABEL: test_vld2_dup_u16_update: 62; CHECK: vld2.16 {d16[], d17[]}, [r1]! 63; CHECK-NEXT: vst1.16 {d16}, [r0:64]! 64; CHECK-NEXT: vstr d17, [r0] 65; CHECK-NEXT: mov r0, r1 66; CHECK-NEXT: bx lr 67entry: 68 %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0(ptr %src, i32 2) 69 store %struct.uint16x4x2_t %tmp, ptr %dest, align 8 70 %updated_src = getelementptr inbounds i8, ptr %src, i32 4 71 ret ptr %updated_src 72} 73 74define ptr @test_vld2_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) { 75; CHECK-LABEL: test_vld2_dup_u16_update_reg: 76; CHECK: vld2.16 {d16[], d17[]}, [r1], r2 77; CHECK-NEXT: vst1.16 {d16}, [r0:64]! 78; CHECK-NEXT: vstr d17, [r0] 79; CHECK-NEXT: mov r0, r1 80; CHECK-NEXT: bx lr 81entry: 82 %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0(ptr %src, i32 2) 83 store %struct.uint16x4x2_t %tmp, ptr %dest, align 8 84 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 85 ret ptr %updated_src 86} 87 88define ptr @test_vld2_dup_update(ptr %dest, ptr %src) { 89; CHECK-LABEL: test_vld2_dup_update: 90; CHECK: vld2.32 {d16[], d17[]}, [r1]! 91; CHECK-NEXT: vst1.32 {d16}, [r0:64]! 92; CHECK-NEXT: vstr d17, [r0] 93; CHECK-NEXT: mov r0, r1 94; CHECK-NEXT: bx lr 95entry: 96 %tmp = tail call %struct.uint32x2x2_t @llvm.arm.neon.vld2dup.v2i32.p0(ptr %src, i32 4) 97 store %struct.uint32x2x2_t %tmp, ptr %dest, align 8 98 %updated_src = getelementptr inbounds i8, ptr %src, i32 8 99 ret ptr %updated_src 100} 101 102define ptr @test_vld2_dup_update_reg(ptr %dest, ptr %src, i32 %inc) { 103; CHECK-LABEL: test_vld2_dup_update_reg: 104; CHECK: vld2.32 {d16[], d17[]}, [r1], r2 105; CHECK-NEXT: vst1.32 {d16}, [r0:64]! 106; CHECK-NEXT: vstr d17, [r0] 107; CHECK-NEXT: mov r0, r1 108; CHECK-NEXT: bx lr 109entry: 110 %tmp = tail call %struct.uint32x2x2_t @llvm.arm.neon.vld2dup.v2i32.p0(ptr %src, i32 4) 111 store %struct.uint32x2x2_t %tmp, ptr %dest, align 8 112 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 113 ret ptr %updated_src 114} 115 116define ptr @test_vld2_dup_u64_update(ptr %dest, ptr %src) { 117; CHECK-LABEL: test_vld2_dup_u64_update: 118; CHECK: vld1.64 {d16, d17}, [r1:64]! 119; CHECK-NEXT: vst1.64 {d16}, [r0:64]! 120; CHECK-NEXT: vstr d17, [r0] 121; CHECK-NEXT: mov r0, r1 122; CHECK-NEXT: bx lr 123entry: 124 %tmp = tail call %struct.uint64x1x2_t @llvm.arm.neon.vld2dup.v1i64.p0(ptr %src, i32 8) 125 store %struct.uint64x1x2_t %tmp, ptr %dest, align 8 126 %updated_src = getelementptr inbounds i8, ptr %src, i32 16 127 ret ptr %updated_src 128} 129 130define ptr @test_vld2_dup_u64_update_reg(ptr %dest, ptr %src, i32 %inc) { 131; CHECK-LABEL: test_vld2_dup_u64_update_reg: 132; CHECK: vld1.64 {d16, d17}, [r1:64], r2 133; CHECK-NEXT: vst1.64 {d16}, [r0:64]! 134; CHECK-NEXT: vstr d17, [r0] 135; CHECK-NEXT: mov r0, r1 136; CHECK-NEXT: bx lr 137entry: 138 %tmp = tail call %struct.uint64x1x2_t @llvm.arm.neon.vld2dup.v1i64.p0(ptr %src, i32 8) 139 store %struct.uint64x1x2_t %tmp, ptr %dest, align 8 140 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 141 ret ptr %updated_src 142} 143 144define ptr @test_vld2_dup_u8_update(ptr %dest, ptr %src) { 145; CHECK-LABEL: test_vld2_dup_u8_update: 146; CHECK: vld2.8 {d16[], d17[]}, [r1]! 147; CHECK-NEXT: vst1.8 {d16}, [r0:64]! 148; CHECK-NEXT: vstr d17, [r0] 149; CHECK-NEXT: mov r0, r1 150; CHECK-NEXT: bx lr 151entry: 152 %tmp = tail call %struct.uint8x8x2_t @llvm.arm.neon.vld2dup.v8i8.p0(ptr %src, i32 1) 153 store %struct.uint8x8x2_t %tmp, ptr %dest, align 8 154 %updated_src = getelementptr inbounds i8, ptr %src, i32 2 155 ret ptr %updated_src 156} 157 158define ptr @test_vld2_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) { 159; CHECK-LABEL: test_vld2_dup_u8_update_reg: 160; CHECK: vld2.8 {d16[], d17[]}, [r1], r2 161; CHECK-NEXT: vst1.8 {d16}, [r0:64]! 162; CHECK-NEXT: vstr d17, [r0] 163; CHECK-NEXT: mov r0, r1 164; CHECK-NEXT: bx lr 165entry: 166 %tmp = tail call %struct.uint8x8x2_t @llvm.arm.neon.vld2dup.v8i8.p0(ptr %src, i32 1) 167 store %struct.uint8x8x2_t %tmp, ptr %dest, align 8 168 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 169 ret ptr %updated_src 170} 171 172define ptr @test_vld3_dup_u16_update(ptr %dest, ptr %src) { 173; CHECK-LABEL: test_vld3_dup_u16_update: 174; CHECK: vld3.16 {d16[], d17[], d18[]}, [r1]! 175; CHECK-NEXT: vst1.16 {d16}, [r0:64]! 176; CHECK-NEXT: vst1.16 {d17}, [r0:64]! 177; CHECK-NEXT: vstr d18, [r0] 178; CHECK-NEXT: mov r0, r1 179; CHECK-NEXT: bx lr 180entry: 181 %tmp = tail call %struct.uint16x4x3_t @llvm.arm.neon.vld3dup.v4i16.p0(ptr %src, i32 2) 182 store %struct.uint16x4x3_t %tmp, ptr %dest, align 8 183 %updated_src = getelementptr inbounds i8, ptr %src, i32 6 184 ret ptr %updated_src 185} 186 187define ptr @test_vld3_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) { 188; CHECK-LABEL: test_vld3_dup_u16_update_reg: 189; CHECK: vld3.16 {d16[], d17[], d18[]}, [r1], r2 190; CHECK-NEXT: vst1.16 {d16}, [r0:64]! 191; CHECK-NEXT: vst1.16 {d17}, [r0:64]! 192; CHECK-NEXT: vstr d18, [r0] 193; CHECK-NEXT: mov r0, r1 194; CHECK-NEXT: bx lr 195entry: 196 %tmp = tail call %struct.uint16x4x3_t @llvm.arm.neon.vld3dup.v4i16.p0(ptr %src, i32 2) 197 store %struct.uint16x4x3_t %tmp, ptr %dest, align 8 198 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 199 ret ptr %updated_src 200} 201 202define ptr @test_vld3_dup_u32_update(ptr %dest, ptr %src) { 203; CHECK-LABEL: test_vld3_dup_u32_update: 204; CHECK: vld3.32 {d16[], d17[], d18[]}, [r1]! 205; CHECK-NEXT: vst1.32 {d16}, [r0:64]! 206; CHECK-NEXT: vst1.32 {d17}, [r0:64]! 207; CHECK-NEXT: vstr d18, [r0] 208; CHECK-NEXT: mov r0, r1 209; CHECK-NEXT: bx lr 210entry: 211 %tmp = tail call %struct.uint32x2x3_t @llvm.arm.neon.vld3dup.v2i32.p0(ptr %src, i32 4) 212 store %struct.uint32x2x3_t %tmp, ptr %dest, align 8 213 %updated_src = getelementptr inbounds i8, ptr %src, i32 12 214 ret ptr %updated_src 215} 216 217define ptr @test_vld3_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) { 218; CHECK-LABEL: test_vld3_dup_u32_update_reg: 219; CHECK: vld3.32 {d16[], d17[], d18[]}, [r1], r2 220; CHECK-NEXT: vst1.32 {d16}, [r0:64]! 221; CHECK-NEXT: vst1.32 {d17}, [r0:64]! 222; CHECK-NEXT: vstr d18, [r0] 223; CHECK-NEXT: mov r0, r1 224; CHECK-NEXT: bx lr 225entry: 226 %tmp = tail call %struct.uint32x2x3_t @llvm.arm.neon.vld3dup.v2i32.p0(ptr %src, i32 4) 227 store %struct.uint32x2x3_t %tmp, ptr %dest, align 8 228 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 229 ret ptr %updated_src 230} 231 232define ptr @test_vld3_dup_u64_update(ptr %dest, ptr %src) { 233; CHECK-LABEL: test_vld3_dup_u64_update: 234; CHECK: vld1.64 {d16, d17, d18}, [r1]! 235; CHECK-NEXT: vst1.64 {d16}, [r0:64]! 236; CHECK-NEXT: vst1.64 {d17}, [r0:64]! 237; CHECK-NEXT: vstr d18, [r0] 238; CHECK-NEXT: mov r0, r1 239; CHECK-NEXT: bx lr 240entry: 241 %tmp = tail call %struct.uint64x1x3_t @llvm.arm.neon.vld3dup.v1i64.p0(ptr %src, i32 8) 242 store %struct.uint64x1x3_t %tmp, ptr %dest, align 8 243 %updated_src = getelementptr inbounds i8, ptr %src, i32 24 244 ret ptr %updated_src 245} 246 247define ptr @test_vld3_dup_u64_update_reg(ptr %dest, ptr %src, i32 %inc) { 248; CHECK-LABEL: test_vld3_dup_u64_update_reg: 249; CHECK: vld1.64 {d16, d17, d18}, [r1], r2 250; CHECK-NEXT: vst1.64 {d16}, [r0:64]! 251; CHECK-NEXT: vst1.64 {d17}, [r0:64]! 252; CHECK-NEXT: vstr d18, [r0] 253; CHECK-NEXT: mov r0, r1 254; CHECK-NEXT: bx lr 255entry: 256 %tmp = tail call %struct.uint64x1x3_t @llvm.arm.neon.vld3dup.v1i64.p0(ptr %src, i32 8) 257 store %struct.uint64x1x3_t %tmp, ptr %dest, align 8 258 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 259 ret ptr %updated_src 260} 261 262define ptr @test_vld3_dup_u8_update(ptr %dest, ptr %src) { 263; CHECK-LABEL: test_vld3_dup_u8_update: 264; CHECK: vld3.8 {d16[], d17[], d18[]}, [r1]! 265; CHECK-NEXT: vst1.8 {d16}, [r0:64]! 266; CHECK-NEXT: vst1.8 {d17}, [r0:64]! 267; CHECK-NEXT: vstr d18, [r0] 268; CHECK-NEXT: mov r0, r1 269; CHECK-NEXT: bx lr 270entry: 271 %tmp = tail call %struct.uint8x8x3_t @llvm.arm.neon.vld3dup.v8i8.p0(ptr %src, i32 1) 272 store %struct.uint8x8x3_t %tmp, ptr %dest, align 8 273 %updated_src = getelementptr inbounds i8, ptr %src, i32 3 274 ret ptr %updated_src 275} 276 277define ptr @test_vld3_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) { 278; CHECK-LABEL: test_vld3_dup_u8_update_reg: 279; CHECK: vld3.8 {d16[], d17[], d18[]}, [r1], r2 280; CHECK-NEXT: vst1.8 {d16}, [r0:64]! 281; CHECK-NEXT: vst1.8 {d17}, [r0:64]! 282; CHECK-NEXT: vstr d18, [r0] 283; CHECK-NEXT: mov r0, r1 284; CHECK-NEXT: bx lr 285entry: 286 %tmp = tail call %struct.uint8x8x3_t @llvm.arm.neon.vld3dup.v8i8.p0(ptr %src, i32 1) 287 store %struct.uint8x8x3_t %tmp, ptr %dest, align 8 288 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 289 ret ptr %updated_src 290} 291 292define ptr @test_vld4_dup_u16_update(ptr %dest, ptr %src) { 293; CHECK-LABEL: test_vld4_dup_u16_update: 294; CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]! 295; CHECK-NEXT: vst1.16 {d16}, [r0:64]! 296; CHECK-NEXT: vst1.16 {d17}, [r0:64]! 297; CHECK-NEXT: vst1.16 {d18}, [r0:64]! 298; CHECK-NEXT: vstr d19, [r0] 299; CHECK-NEXT: mov r0, r1 300; CHECK-NEXT: bx lr 301entry: 302 %tmp = tail call %struct.uint16x4x4_t @llvm.arm.neon.vld4dup.v4i16.p0(ptr %src, i32 2) 303 store %struct.uint16x4x4_t %tmp, ptr %dest, align 8 304 %updated_src = getelementptr inbounds i8, ptr %src, i32 8 305 ret ptr %updated_src 306} 307 308define ptr @test_vld4_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) { 309; CHECK-LABEL: test_vld4_dup_u16_update_reg: 310; CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1], r2 311; CHECK-NEXT: vst1.16 {d16}, [r0:64]! 312; CHECK-NEXT: vst1.16 {d17}, [r0:64]! 313; CHECK-NEXT: vst1.16 {d18}, [r0:64]! 314; CHECK-NEXT: vstr d19, [r0] 315; CHECK-NEXT: mov r0, r1 316; CHECK-NEXT: bx lr 317entry: 318 %tmp = tail call %struct.uint16x4x4_t @llvm.arm.neon.vld4dup.v4i16.p0(ptr %src, i32 2) 319 store %struct.uint16x4x4_t %tmp, ptr %dest, align 8 320 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 321 ret ptr %updated_src 322} 323 324define ptr @test_vld4_dup_u32_update(ptr %dest, ptr %src) { 325; CHECK-LABEL: test_vld4_dup_u32_update: 326; CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r1]! 327; CHECK-NEXT: vst1.32 {d16}, [r0:64]! 328; CHECK-NEXT: vst1.32 {d17}, [r0:64]! 329; CHECK-NEXT: vst1.32 {d18}, [r0:64]! 330; CHECK-NEXT: vstr d19, [r0] 331; CHECK-NEXT: mov r0, r1 332; CHECK-NEXT: bx lr 333entry: 334 %tmp = tail call %struct.uint32x2x4_t @llvm.arm.neon.vld4dup.v2i32.p0(ptr %src, i32 4) 335 store %struct.uint32x2x4_t %tmp, ptr %dest, align 8 336 %updated_src = getelementptr inbounds i8, ptr %src, i32 16 337 ret ptr %updated_src 338} 339 340define ptr @test_vld4_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) { 341; CHECK-LABEL: test_vld4_dup_u32_update_reg: 342; CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r1], r2 343; CHECK-NEXT: vst1.32 {d16}, [r0:64]! 344; CHECK-NEXT: vst1.32 {d17}, [r0:64]! 345; CHECK-NEXT: vst1.32 {d18}, [r0:64]! 346; CHECK-NEXT: vstr d19, [r0] 347; CHECK-NEXT: mov r0, r1 348; CHECK-NEXT: bx lr 349entry: 350 %tmp = tail call %struct.uint32x2x4_t @llvm.arm.neon.vld4dup.v2i32.p0(ptr %src, i32 4) 351 store %struct.uint32x2x4_t %tmp, ptr %dest, align 8 352 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 353 ret ptr %updated_src 354} 355 356define ptr @test_vld4_dup_u64_update(ptr %dest, ptr %src) { 357; CHECK-LABEL: test_vld4_dup_u64_update: 358; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:64]! 359; CHECK-NEXT: vst1.64 {d16}, [r0:64]! 360; CHECK-NEXT: vst1.64 {d17}, [r0:64]! 361; CHECK-NEXT: vst1.64 {d18}, [r0:64]! 362; CHECK-NEXT: vstr d19, [r0] 363; CHECK-NEXT: mov r0, r1 364; CHECK-NEXT: bx lr 365entry: 366 %tmp = tail call %struct.uint64x1x4_t @llvm.arm.neon.vld4dup.v1i64.p0(ptr %src, i32 8) 367 store %struct.uint64x1x4_t %tmp, ptr %dest, align 8 368 %updated_src = getelementptr inbounds i8, ptr %src, i32 32 369 ret ptr %updated_src 370} 371 372define ptr @test_vld4_dup_u64_update_reg(ptr %dest, ptr %src, i32 %inc) { 373; CHECK-LABEL: test_vld4_dup_u64_update_reg: 374; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:64], r2 375; CHECK-NEXT: vst1.64 {d16}, [r0:64]! 376; CHECK-NEXT: vst1.64 {d17}, [r0:64]! 377; CHECK-NEXT: vst1.64 {d18}, [r0:64]! 378; CHECK-NEXT: vstr d19, [r0] 379; CHECK-NEXT: mov r0, r1 380; CHECK-NEXT: bx lr 381entry: 382 %tmp = tail call %struct.uint64x1x4_t @llvm.arm.neon.vld4dup.v1i64.p0(ptr %src, i32 8) 383 store %struct.uint64x1x4_t %tmp, ptr %dest, align 8 384 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 385 ret ptr %updated_src 386} 387 388define ptr @test_vld4_dup_u8_update(ptr %dest, ptr %src) { 389; CHECK-LABEL: test_vld4_dup_u8_update: 390; CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1]! 391; CHECK-NEXT: vst1.8 {d16}, [r0:64]! 392; CHECK-NEXT: vst1.8 {d17}, [r0:64]! 393; CHECK-NEXT: vst1.8 {d18}, [r0:64]! 394; CHECK-NEXT: vstr d19, [r0] 395; CHECK-NEXT: mov r0, r1 396; CHECK-NEXT: bx lr 397entry: 398 %tmp = tail call %struct.uint8x8x4_t @llvm.arm.neon.vld4dup.v8i8.p0(ptr %src, i32 1) 399 store %struct.uint8x8x4_t %tmp, ptr %dest, align 8 400 %updated_src = getelementptr inbounds i8, ptr %src, i32 4 401 ret ptr %updated_src 402} 403 404define ptr @test_vld4_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) { 405; CHECK-LABEL: test_vld4_dup_u8_update_reg: 406; CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1], r2 407; CHECK-NEXT: vst1.8 {d16}, [r0:64]! 408; CHECK-NEXT: vst1.8 {d17}, [r0:64]! 409; CHECK-NEXT: vst1.8 {d18}, [r0:64]! 410; CHECK-NEXT: vstr d19, [r0] 411; CHECK-NEXT: mov r0, r1 412; CHECK-NEXT: bx lr 413entry: 414 %tmp = tail call %struct.uint8x8x4_t @llvm.arm.neon.vld4dup.v8i8.p0(ptr %src, i32 1) 415 store %struct.uint8x8x4_t %tmp, ptr %dest, align 8 416 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 417 ret ptr %updated_src 418} 419 420define ptr @test_vld2q_dup_u16_update(ptr %dest, ptr %src, ptr %dest0) { 421; CHECK-LABEL: test_vld2q_dup_u16_update: 422; CHECK: vld2.16 {d16[], d18[]}, [r1] 423; CHECK-NEXT: vld2.16 {d17[], d19[]}, [r1]! 424; CHECK-NEXT: vst1.16 {d16, d17}, [r0]! 425; CHECK-NEXT: vst1.64 {d18, d19}, [r0] 426; CHECK-NEXT: mov r0, r1 427; CHECK-NEXT: bx lr 428entry: 429 %tmp = tail call %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0(ptr %src, i32 2) 430 store %struct.uint16x8x2_t %tmp, ptr %dest, align 8 431 %updated_src = getelementptr inbounds i8, ptr %src, i32 4 432 ret ptr %updated_src 433} 434 435define ptr @test_vld2q_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) { 436; CHECK-LABEL: test_vld2q_dup_u16_update_reg: 437; CHECK: vld2.16 {d16[], d18[]}, [r1] 438; CHECK-NEXT: vld2.16 {d17[], d19[]}, [r1], r2 439; CHECK-NEXT: vst1.16 {d16, d17}, [r0]! 440; CHECK-NEXT: vst1.64 {d18, d19}, [r0] 441; CHECK-NEXT: mov r0, r1 442; CHECK-NEXT: bx lr 443entry: 444 %tmp = tail call %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0(ptr %src, i32 2) 445 store %struct.uint16x8x2_t %tmp, ptr %dest, align 8 446 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 447 ret ptr %updated_src 448} 449 450define ptr @test_vld2q_dup_u32_update(ptr %dest, ptr %src) { 451; CHECK-LABEL: test_vld2q_dup_u32_update: 452; CHECK: vld2.32 {d16[], d18[]}, [r1] 453; CHECK-NEXT: vld2.32 {d17[], d19[]}, [r1]! 454; CHECK-NEXT: vst1.32 {d16, d17}, [r0]! 455; CHECK-NEXT: vst1.64 {d18, d19}, [r0] 456; CHECK-NEXT: mov r0, r1 457; CHECK-NEXT: bx lr 458entry: 459 %tmp = tail call %struct.uint32x4x2_t @llvm.arm.neon.vld2dup.v4i32.p0(ptr %src, i32 4) 460 store %struct.uint32x4x2_t %tmp, ptr %dest, align 8 461 %updated_src = getelementptr inbounds i8, ptr %src, i32 8 462 ret ptr %updated_src 463} 464 465define ptr @test_vld2q_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) { 466; CHECK-LABEL: test_vld2q_dup_u32_update_reg: 467; CHECK: vld2.32 {d16[], d18[]}, [r1] 468; CHECK-NEXT: vld2.32 {d17[], d19[]}, [r1], r2 469; CHECK-NEXT: vst1.32 {d16, d17}, [r0]! 470; CHECK-NEXT: vst1.64 {d18, d19}, [r0] 471; CHECK-NEXT: mov r0, r1 472; CHECK-NEXT: bx lr 473entry: 474 %tmp = tail call %struct.uint32x4x2_t @llvm.arm.neon.vld2dup.v4i32.p0(ptr %src, i32 4) 475 store %struct.uint32x4x2_t %tmp, ptr %dest, align 8 476 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 477 ret ptr %updated_src 478} 479 480define ptr @test_vld2q_dup_u8_update(ptr %dest, ptr %src) { 481; CHECK-LABEL: test_vld2q_dup_u8_update: 482; CHECK: vld2.8 {d16[], d18[]}, [r1] 483; CHECK-NEXT: vld2.8 {d17[], d19[]}, [r1]! 484; CHECK-NEXT: vst1.8 {d16, d17}, [r0]! 485; CHECK-NEXT: vst1.64 {d18, d19}, [r0] 486; CHECK-NEXT: mov r0, r1 487; CHECK-NEXT: bx lr 488entry: 489 %tmp = tail call %struct.uint8x16x2_t @llvm.arm.neon.vld2dup.v16i8.p0(ptr %src, i32 1) 490 store %struct.uint8x16x2_t %tmp, ptr %dest, align 8 491 %updated_src = getelementptr inbounds i8, ptr %src, i32 2 492 ret ptr %updated_src 493} 494 495define ptr @test_vld2q_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) { 496; CHECK-LABEL: test_vld2q_dup_u8_update_reg: 497; CHECK: vld2.8 {d16[], d18[]}, [r1] 498; CHECK-NEXT: vld2.8 {d17[], d19[]}, [r1], r2 499; CHECK-NEXT: vst1.8 {d16, d17}, [r0]! 500; CHECK-NEXT: vst1.64 {d18, d19}, [r0] 501; CHECK-NEXT: mov r0, r1 502; CHECK-NEXT: bx lr 503entry: 504 %tmp = tail call %struct.uint8x16x2_t @llvm.arm.neon.vld2dup.v16i8.p0(ptr %src, i32 1) 505 store %struct.uint8x16x2_t %tmp, ptr %dest, align 8 506 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 507 ret ptr %updated_src 508} 509 510define ptr @test_vld3q_dup_u16_update(ptr %dest, ptr %src) { 511; CHECK-LABEL: test_vld3q_dup_u16_update: 512; CHECK: vld3.16 {d16[], d18[], d20[]}, [r1] 513; CHECK-NEXT: vld3.16 {d17[], d19[], d21[]}, [r1]! 514; CHECK-NEXT: vst1.16 {d16, d17}, [r0]! 515; CHECK-NEXT: vst1.16 {d18, d19}, [r0]! 516; CHECK-NEXT: vst1.64 {d20, d21}, [r0] 517; CHECK-NEXT: mov r0, r1 518; CHECK-NEXT: bx lr 519entry: 520 %tmp = tail call %struct.uint16x8x3_t @llvm.arm.neon.vld3dup.v8i16.p0(ptr %src, i32 2) 521 store %struct.uint16x8x3_t %tmp, ptr %dest, align 8 522 %updated_src = getelementptr inbounds i8, ptr %src, i32 6 523 ret ptr %updated_src 524} 525 526define ptr @test_vld3q_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) { 527; CHECK-LABEL: test_vld3q_dup_u16_update_reg: 528; CHECK: vld3.16 {d16[], d18[], d20[]}, [r1] 529; CHECK-NEXT: vld3.16 {d17[], d19[], d21[]}, [r1], r2 530; CHECK-NEXT: vst1.16 {d16, d17}, [r0]! 531; CHECK-NEXT: vst1.16 {d18, d19}, [r0]! 532; CHECK-NEXT: vst1.64 {d20, d21}, [r0] 533; CHECK-NEXT: mov r0, r1 534; CHECK-NEXT: bx lr 535entry: 536 %tmp = tail call %struct.uint16x8x3_t @llvm.arm.neon.vld3dup.v8i16.p0(ptr %src, i32 2) 537 store %struct.uint16x8x3_t %tmp, ptr %dest, align 8 538 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 539 ret ptr %updated_src 540} 541 542define ptr @test_vld3q_dup_u32_update(ptr %dest, ptr %src) { 543; CHECK-LABEL: test_vld3q_dup_u32_update: 544; CHECK: vld3.32 {d16[], d18[], d20[]}, [r1] 545; CHECK-NEXT: vld3.32 {d17[], d19[], d21[]}, [r1]! 546; CHECK-NEXT: vst1.32 {d16, d17}, [r0]! 547; CHECK-NEXT: vst1.32 {d18, d19}, [r0]! 548; CHECK-NEXT: vst1.64 {d20, d21}, [r0] 549; CHECK-NEXT: mov r0, r1 550; CHECK-NEXT: bx lr 551entry: 552 %tmp = tail call %struct.uint32x4x3_t @llvm.arm.neon.vld3dup.v4i32.p0(ptr %src, i32 4) 553 store %struct.uint32x4x3_t %tmp, ptr %dest, align 8 554 %updated_src = getelementptr inbounds i8, ptr %src, i32 12 555 ret ptr %updated_src 556} 557 558define ptr @test_vld3q_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) { 559; CHECK-LABEL: test_vld3q_dup_u32_update_reg: 560; CHECK: vld3.32 {d16[], d18[], d20[]}, [r1] 561; CHECK-NEXT: vld3.32 {d17[], d19[], d21[]}, [r1], r2 562; CHECK-NEXT: vst1.32 {d16, d17}, [r0]! 563; CHECK-NEXT: vst1.32 {d18, d19}, [r0]! 564; CHECK-NEXT: vst1.64 {d20, d21}, [r0] 565; CHECK-NEXT: mov r0, r1 566; CHECK-NEXT: bx lr 567entry: 568 %tmp = tail call %struct.uint32x4x3_t @llvm.arm.neon.vld3dup.v4i32.p0(ptr %src, i32 4) 569 store %struct.uint32x4x3_t %tmp, ptr %dest, align 8 570 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 571 ret ptr %updated_src 572} 573 574define ptr @test_vld3q_dup_u8_update(ptr %dest, ptr %src) { 575; CHECK-LABEL: test_vld3q_dup_u8_update: 576; CHECK: vld3.8 {d16[], d18[], d20[]}, [r1] 577; CHECK-NEXT: vld3.8 {d17[], d19[], d21[]}, [r1]! 578; CHECK-NEXT: vst1.8 {d16, d17}, [r0]! 579; CHECK-NEXT: vst1.8 {d18, d19}, [r0]! 580; CHECK-NEXT: vst1.64 {d20, d21}, [r0] 581; CHECK-NEXT: mov r0, r1 582; CHECK-NEXT: bx lr 583entry: 584 %tmp = tail call %struct.uint8x16x3_t @llvm.arm.neon.vld3dup.v16i8.p0(ptr %src, i32 1) 585 store %struct.uint8x16x3_t %tmp, ptr %dest, align 8 586 %updated_src = getelementptr inbounds i8, ptr %src, i32 3 587 ret ptr %updated_src 588} 589 590define ptr @test_vld3q_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) { 591; CHECK-LABEL: test_vld3q_dup_u8_update_reg: 592; CHECK: vld3.8 {d16[], d18[], d20[]}, [r1] 593; CHECK-NEXT: vld3.8 {d17[], d19[], d21[]}, [r1], r2 594; CHECK-NEXT: vst1.8 {d16, d17}, [r0]! 595; CHECK-NEXT: vst1.8 {d18, d19}, [r0]! 596; CHECK-NEXT: vst1.64 {d20, d21}, [r0] 597; CHECK-NEXT: mov r0, r1 598; CHECK-NEXT: bx lr 599entry: 600 %tmp = tail call %struct.uint8x16x3_t @llvm.arm.neon.vld3dup.v16i8.p0(ptr %src, i32 1) 601 store %struct.uint8x16x3_t %tmp, ptr %dest, align 8 602 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 603 ret ptr %updated_src 604} 605 606define ptr @test_vld4q_dup_u16_update(ptr %dest, ptr %src) { 607; CHECK-LABEL: test_vld4q_dup_u16_update: 608; CHECK: vld4.16 {d16[], d18[], d20[], d22[]}, [r1] 609; CHECK-NEXT: vld4.16 {d17[], d19[], d21[], d23[]}, [r1]! 610; CHECK-NEXT: vst1.16 {d16, d17}, [r0]! 611; CHECK-NEXT: vst1.16 {d18, d19}, [r0]! 612; CHECK-NEXT: vst1.16 {d20, d21}, [r0]! 613; CHECK-NEXT: vst1.64 {d22, d23}, [r0] 614; CHECK-NEXT: mov r0, r1 615; CHECK-NEXT: bx lr 616entry: 617 %tmp = tail call %struct.uint16x8x4_t @llvm.arm.neon.vld4dup.v8i16.p0(ptr %src, i32 2) 618 store %struct.uint16x8x4_t %tmp, ptr %dest, align 8 619 %updated_src = getelementptr inbounds i8, ptr %src, i32 8 620 ret ptr %updated_src 621} 622 623define ptr @test_vld4q_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) { 624; CHECK-LABEL: test_vld4q_dup_u16_update_reg: 625; CHECK: vld4.16 {d16[], d18[], d20[], d22[]}, [r1] 626; CHECK-NEXT: vld4.16 {d17[], d19[], d21[], d23[]}, [r1], r2 627; CHECK-NEXT: vst1.16 {d16, d17}, [r0]! 628; CHECK-NEXT: vst1.16 {d18, d19}, [r0]! 629; CHECK-NEXT: vst1.16 {d20, d21}, [r0]! 630; CHECK-NEXT: vst1.64 {d22, d23}, [r0] 631; CHECK-NEXT: mov r0, r1 632; CHECK-NEXT: bx lr 633entry: 634 %tmp = tail call %struct.uint16x8x4_t @llvm.arm.neon.vld4dup.v8i16.p0(ptr %src, i32 2) 635 store %struct.uint16x8x4_t %tmp, ptr %dest, align 8 636 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 637 ret ptr %updated_src 638} 639 640define ptr @test_vld4q_dup_u32_update(ptr %dest, ptr %src) { 641; CHECK-LABEL: test_vld4q_dup_u32_update: 642; CHECK: vld4.32 {d16[], d18[], d20[], d22[]}, [r1] 643; CHECK-NEXT: vld4.32 {d17[], d19[], d21[], d23[]}, [r1]! 644; CHECK-NEXT: vst1.32 {d16, d17}, [r0]! 645; CHECK-NEXT: vst1.32 {d18, d19}, [r0]! 646; CHECK-NEXT: vst1.32 {d20, d21}, [r0]! 647; CHECK-NEXT: vst1.64 {d22, d23}, [r0] 648; CHECK-NEXT: mov r0, r1 649; CHECK-NEXT: bx lr 650entry: 651 %tmp = tail call %struct.uint32x4x4_t @llvm.arm.neon.vld4dup.v4i32.p0(ptr %src, i32 4) 652 store %struct.uint32x4x4_t %tmp, ptr %dest, align 8 653 %updated_src = getelementptr inbounds i8, ptr %src, i32 16 654 ret ptr %updated_src 655} 656 657define ptr @test_vld4q_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) { 658; CHECK-LABEL: test_vld4q_dup_u32_update_reg: 659; CHECK: vld4.32 {d16[], d18[], d20[], d22[]}, [r1] 660; CHECK-NEXT: vld4.32 {d17[], d19[], d21[], d23[]}, [r1], r2 661; CHECK-NEXT: vst1.32 {d16, d17}, [r0]! 662; CHECK-NEXT: vst1.32 {d18, d19}, [r0]! 663; CHECK-NEXT: vst1.32 {d20, d21}, [r0]! 664; CHECK-NEXT: vst1.64 {d22, d23}, [r0] 665; CHECK-NEXT: mov r0, r1 666; CHECK-NEXT: bx lr 667entry: 668 %tmp = tail call %struct.uint32x4x4_t @llvm.arm.neon.vld4dup.v4i32.p0(ptr %src, i32 4) 669 store %struct.uint32x4x4_t %tmp, ptr %dest, align 8 670 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 671 ret ptr %updated_src 672} 673 674define ptr @test_vld4q_dup_u8_update(ptr %dest, ptr %src) { 675; CHECK-LABEL: test_vld4q_dup_u8_update: 676; CHECK: vld4.8 {d16[], d18[], d20[], d22[]}, [r1] 677; CHECK-NEXT: vld4.8 {d17[], d19[], d21[], d23[]}, [r1]! 678; CHECK-NEXT: vst1.8 {d16, d17}, [r0]! 679; CHECK-NEXT: vst1.8 {d18, d19}, [r0]! 680; CHECK-NEXT: vst1.8 {d20, d21}, [r0]! 681; CHECK-NEXT: vst1.64 {d22, d23}, [r0] 682; CHECK-NEXT: mov r0, r1 683; CHECK-NEXT: bx lr 684entry: 685 %tmp = tail call %struct.uint8x16x4_t @llvm.arm.neon.vld4dup.v16i8.p0(ptr %src, i32 1) 686 store %struct.uint8x16x4_t %tmp, ptr %dest, align 8 687 %updated_src = getelementptr inbounds i8, ptr %src, i32 4 688 ret ptr %updated_src 689} 690 691define ptr @test_vld4q_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) { 692; CHECK-LABEL: test_vld4q_dup_u8_update_reg: 693; CHECK: vld4.8 {d16[], d18[], d20[], d22[]}, [r1] 694; CHECK-NEXT: vld4.8 {d17[], d19[], d21[], d23[]}, [r1], r2 695; CHECK-NEXT: vst1.8 {d16, d17}, [r0]! 696; CHECK-NEXT: vst1.8 {d18, d19}, [r0]! 697; CHECK-NEXT: vst1.8 {d20, d21}, [r0]! 698; CHECK-NEXT: vst1.64 {d22, d23}, [r0] 699; CHECK-NEXT: mov r0, r1 700; CHECK-NEXT: bx lr 701entry: 702 %tmp = tail call %struct.uint8x16x4_t @llvm.arm.neon.vld4dup.v16i8.p0(ptr %src, i32 1) 703 store %struct.uint8x16x4_t %tmp, ptr %dest, align 8 704 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc 705 ret ptr %updated_src 706} 707