xref: /llvm-project/llvm/test/Transforms/InstCombine/ARM/vld1.ll (revision fcfc31fffb9a83416453e60bd0dff2df93c2ee20)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3
4target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
5target triple = "armv8-arm-none-eabi"
6
7; Turning a vld1 intrinsic into an llvm load is beneficial
8; when the underlying object being addressed comes from a
9; constant, since we get constant-folding for free.
10
11; Bail the optimization if the alignment is not a constant.
12define <2 x i64> @vld1_align(ptr %ptr, i32 %align) {
13; CHECK-LABEL: @vld1_align(
14; CHECK-NEXT:    [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr [[PTR:%.*]], i32 [[ALIGN:%.*]])
15; CHECK-NEXT:    ret <2 x i64> [[VLD1]]
16;
17  %vld1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr %ptr, i32 %align)
18  ret <2 x i64> %vld1
19}
20
21; Bail the optimization if the alignment is not power of 2.
22define <2 x i64> @vld1_align_pow2(ptr %ptr) {
23; CHECK-LABEL: @vld1_align_pow2(
24; CHECK-NEXT:    [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr [[PTR:%.*]], i32 3)
25; CHECK-NEXT:    ret <2 x i64> [[VLD1]]
26;
27  %vld1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr %ptr, i32 3)
28  ret <2 x i64> %vld1
29}
30
31define <8 x i8> @vld1_8x8(ptr %ptr) {
32; CHECK-LABEL: @vld1_8x8(
33; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[PTR:%.*]], align 1
34; CHECK-NEXT:    ret <8 x i8> [[TMP2]]
35;
36  %vld1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0(ptr %ptr, i32 1)
37  ret <8 x i8> %vld1
38}
39
40define <4 x i16> @vld1_4x16(ptr %ptr) {
41; CHECK-LABEL: @vld1_4x16(
42; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[PTR:%.*]], align 2
43; CHECK-NEXT:    ret <4 x i16> [[TMP2]]
44;
45  %vld1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0(ptr %ptr, i32 2)
46  ret <4 x i16> %vld1
47}
48
49define <2 x i32> @vld1_2x32(ptr %ptr) {
50; CHECK-LABEL: @vld1_2x32(
51; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[PTR:%.*]], align 4
52; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
53;
54  %vld1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0(ptr %ptr, i32 4)
55  ret <2 x i32> %vld1
56}
57
58define <1 x i64> @vld1_1x64(ptr %ptr) {
59; CHECK-LABEL: @vld1_1x64(
60; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr [[PTR:%.*]], align 8
61; CHECK-NEXT:    ret <1 x i64> [[TMP2]]
62;
63  %vld1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0(ptr %ptr, i32 8)
64  ret <1 x i64> %vld1
65}
66
67define <8 x i16> @vld1_8x16(ptr %ptr) {
68; CHECK-LABEL: @vld1_8x16(
69; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
70; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
71;
72  %vld1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %ptr, i32 2)
73  ret <8 x i16> %vld1
74}
75
76define <16 x i8> @vld1_16x8(ptr %ptr) {
77; CHECK-LABEL: @vld1_16x8(
78; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 1
79; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
80;
81  %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0(ptr %ptr, i32 1)
82  ret <16 x i8> %vld1
83}
84
85define <4 x i32> @vld1_4x32(ptr %ptr) {
86; CHECK-LABEL: @vld1_4x32(
87; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
88; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
89;
90  %vld1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0(ptr %ptr, i32 4)
91  ret <4 x i32> %vld1
92}
93
94define <2 x i64> @vld1_2x64(ptr %ptr) {
95; CHECK-LABEL: @vld1_2x64(
96; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
97; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
98;
99  %vld1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr %ptr, i32 8)
100  ret <2 x i64> %vld1
101}
102
103declare <8 x i8> @llvm.arm.neon.vld1.v8i8.p0(ptr, i32)
104declare <4 x i16> @llvm.arm.neon.vld1.v4i16.p0(ptr, i32)
105declare <2 x i32> @llvm.arm.neon.vld1.v2i32.p0(ptr, i32)
106declare <1 x i64> @llvm.arm.neon.vld1.v1i64.p0(ptr, i32)
107declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr, i32)
108declare <16 x i8> @llvm.arm.neon.vld1.v16i8.p0(ptr, i32)
109declare <4 x i32> @llvm.arm.neon.vld1.v4i32.p0(ptr, i32)
110declare <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr, i32)
111