xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: opt -S -loop-reduce < %s | FileCheck %s --check-prefix=IR
3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefix=ASM
4; Note: To update this test, please run utils/update_test_checks.py and utils/update_llc_test_checks.py separately on opt/llc run line.
5
6target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
7target triple = "aarch64-linux-gnu"
8
9; These tests check that the IR coming out of LSR does not cast input/output pointer from ptr to ptr type.
10; And scaled-index addressing mode is leveraged in the generated assembly, i.e. ld1h { z1.h }, p0/z, [x0, x8, lsl #1].
11
12define void @ld_st_nxv8i16(ptr %in, ptr %out) {
13; IR-LABEL: @ld_st_nxv8i16(
14; IR-NEXT:  entry:
15; IR-NEXT:    br label [[LOOP_PH:%.*]]
16; IR:       loop.ph:
17; IR-NEXT:    [[P_VEC_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> undef, i16 3, i32 0
18; IR-NEXT:    [[P_VEC_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[P_VEC_SPLATINSERT]], <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
19; IR-NEXT:    [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
20; IR-NEXT:    [[SCALED_VF:%.*]] = shl i64 [[VSCALE]], 3
21; IR-NEXT:    br label [[LOOP:%.*]]
22; IR:       loop:
23; IR-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[LOOP_PH]] ], [ [[INDVAR_NEXT:%.*]], [[LOOP]] ]
24; IR-NEXT:    [[TMP0:%.*]] = shl i64 [[INDVAR]], 1
25; IR-NEXT:    [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 [[TMP0]]
26; IR-NEXT:    [[TMP1:%.*]] = shl i64 [[INDVAR]], 1
27; IR-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[OUT:%.*]], i64 [[TMP1]]
28; IR-NEXT:    [[VAL:%.*]] = load <vscale x 8 x i16>, ptr [[UGLYGEP1]], align 16
29; IR-NEXT:    [[ADDP_VEC:%.*]] = add <vscale x 8 x i16> [[VAL]], [[P_VEC_SPLAT]]
30; IR-NEXT:    store <vscale x 8 x i16> [[ADDP_VEC]], ptr [[UGLYGEP]], align 16
31; IR-NEXT:    [[INDVAR_NEXT]] = add nsw i64 [[INDVAR]], [[SCALED_VF]]
32; IR-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 1024
33; IR-NEXT:    br i1 [[EXIT_COND]], label [[LOOP_EXIT:%.*]], label [[LOOP]]
34; IR:       loop.exit:
35; IR-NEXT:    br label [[EXIT:%.*]]
36; IR:       exit:
37; IR-NEXT:    ret void
38;
39; ASM-LABEL: ld_st_nxv8i16:
40; ASM:       // %bb.0: // %entry
41; ASM-NEXT:    mov z0.h, #3 // =0x3
42; ASM-NEXT:    ptrue p0.h
43; ASM-NEXT:    mov x8, xzr
44; ASM-NEXT:    cnth x9
45; ASM-NEXT:  .LBB0_1: // %loop
46; ASM-NEXT:    // =>This Inner Loop Header: Depth=1
47; ASM-NEXT:    ld1h { z1.h }, p0/z, [x0, x8, lsl #1]
48; ASM-NEXT:    add z1.h, z1.h, z0.h
49; ASM-NEXT:    st1h { z1.h }, p0, [x1, x8, lsl #1]
50; ASM-NEXT:    add x8, x8, x9
51; ASM-NEXT:    cmp x8, #1024
52; ASM-NEXT:    b.ne .LBB0_1
53; ASM-NEXT:  // %bb.2: // %exit
54; ASM-NEXT:    ret
55entry:
56  br label %loop.ph
57
58loop.ph:
59  %p_vec.splatinsert = insertelement <vscale x 8 x i16> undef, i16 3, i32 0
60  %p_vec.splat = shufflevector <vscale x 8 x i16> %p_vec.splatinsert, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
61  %vscale = call i64 @llvm.vscale.i64()
62  %scaled_vf = shl i64 %vscale, 3
63  br label %loop
64
65loop:                                             ; preds = %loop, %loop.ph
66  %indvar = phi i64 [ 0, %loop.ph ], [ %indvar.next, %loop ]
67  %ptr.in = getelementptr inbounds i16, ptr %in, i64 %indvar
68  %ptr.out = getelementptr inbounds i16, ptr %out, i64 %indvar
69  %val = load <vscale x 8 x i16>, ptr %ptr.in, align 16
70  %addp_vec = add <vscale x 8 x i16> %val, %p_vec.splat
71  store <vscale x 8 x i16> %addp_vec, ptr %ptr.out, align 16
72  %indvar.next = add nsw i64 %indvar, %scaled_vf
73  %exit.cond = icmp eq i64 %indvar.next, 1024
74  br i1 %exit.cond, label %loop.exit, label %loop
75
76loop.exit:                                        ; preds = %loop
77  br label %exit
78
79exit:
80  ret void
81}
82
83define void @masked_ld_st_nxv8i16(ptr %in, ptr %out, i64 %n) {
84; IR-LABEL: @masked_ld_st_nxv8i16(
85; IR-NEXT:  entry:
86; IR-NEXT:    br label [[LOOP_PH:%.*]]
87; IR:       loop.ph:
88; IR-NEXT:    [[P_VEC_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> undef, i16 3, i32 0
89; IR-NEXT:    [[P_VEC_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[P_VEC_SPLATINSERT]], <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
90; IR-NEXT:    [[PTRUE_VEC_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
91; IR-NEXT:    [[PTRUE_VEC_SPLAT:%.*]] = shufflevector <vscale x 8 x i1> [[PTRUE_VEC_SPLATINSERT]], <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
92; IR-NEXT:    [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
93; IR-NEXT:    [[SCALED_VF:%.*]] = shl i64 [[VSCALE]], 3
94; IR-NEXT:    br label [[LOOP:%.*]]
95; IR:       loop:
96; IR-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[LOOP_PH]] ], [ [[INDVAR_NEXT:%.*]], [[LOOP]] ]
97; IR-NEXT:    [[TMP0:%.*]] = shl i64 [[INDVAR]], 1
98; IR-NEXT:    [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 [[TMP0]]
99; IR-NEXT:    [[TMP1:%.*]] = shl i64 [[INDVAR]], 1
100; IR-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[OUT:%.*]], i64 [[TMP1]]
101; IR-NEXT:    [[VAL:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[UGLYGEP1]], i32 4, <vscale x 8 x i1> [[PTRUE_VEC_SPLAT]], <vscale x 8 x i16> undef)
102; IR-NEXT:    [[ADDP_VEC:%.*]] = add <vscale x 8 x i16> [[VAL]], [[P_VEC_SPLAT]]
103; IR-NEXT:    call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[ADDP_VEC]], ptr [[UGLYGEP]], i32 4, <vscale x 8 x i1> [[PTRUE_VEC_SPLAT]])
104; IR-NEXT:    [[INDVAR_NEXT]] = add nsw i64 [[INDVAR]], [[SCALED_VF]]
105; IR-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[N:%.*]], [[INDVAR_NEXT]]
106; IR-NEXT:    br i1 [[EXIT_COND]], label [[LOOP_EXIT:%.*]], label [[LOOP]]
107; IR:       loop.exit:
108; IR-NEXT:    br label [[EXIT:%.*]]
109; IR:       exit:
110; IR-NEXT:    ret void
111;
112; ASM-LABEL: masked_ld_st_nxv8i16:
113; ASM:       // %bb.0: // %entry
114; ASM-NEXT:    mov z0.h, #3 // =0x3
115; ASM-NEXT:    ptrue p0.h
116; ASM-NEXT:    mov x8, xzr
117; ASM-NEXT:    cnth x9
118; ASM-NEXT:  .LBB1_1: // %loop
119; ASM-NEXT:    // =>This Inner Loop Header: Depth=1
120; ASM-NEXT:    ld1h { z1.h }, p0/z, [x0, x8, lsl #1]
121; ASM-NEXT:    add z1.h, z1.h, z0.h
122; ASM-NEXT:    st1h { z1.h }, p0, [x1, x8, lsl #1]
123; ASM-NEXT:    add x8, x8, x9
124; ASM-NEXT:    cmp x2, x8
125; ASM-NEXT:    b.ne .LBB1_1
126; ASM-NEXT:  // %bb.2: // %exit
127; ASM-NEXT:    ret
128entry:
129  br label %loop.ph
130
131loop.ph:
132  %p_vec.splatinsert = insertelement <vscale x 8 x i16> undef, i16 3, i32 0
133  %p_vec.splat = shufflevector <vscale x 8 x i16> %p_vec.splatinsert, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
134  %ptrue_vec.splatinsert = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
135  %ptrue_vec.splat = shufflevector <vscale x 8 x i1> %ptrue_vec.splatinsert, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
136  %vscale = call i64 @llvm.vscale.i64()
137  %scaled_vf = shl i64 %vscale, 3
138  br label %loop
139
140loop:                                             ; preds = %loop, %loop.ph
141  %indvar = phi i64 [ 0, %loop.ph ], [ %indvar.next, %loop ]
142  %ptr.in = getelementptr inbounds i16, ptr %in, i64 %indvar
143  %ptr.out = getelementptr inbounds i16, ptr %out, i64 %indvar
144  %val = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr %ptr.in, i32 4, <vscale x 8 x i1> %ptrue_vec.splat, <vscale x 8 x i16> undef)
145  %addp_vec = add <vscale x 8 x i16> %val, %p_vec.splat
146  call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> %addp_vec, ptr %ptr.out, i32 4, <vscale x 8 x i1> %ptrue_vec.splat)
147  %indvar.next = add nsw i64 %indvar, %scaled_vf
148  %exit.cond = icmp eq i64 %indvar.next, %n
149  br i1 %exit.cond, label %loop.exit, label %loop
150
151loop.exit:                                        ; preds = %loop
152  br label %exit
153
154exit:
155  ret void
156}
157
158declare i64 @llvm.vscale.i64()
159
160declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr, i32 immarg, <vscale x 8 x i1>, <vscale x 8 x i16>)
161
162declare void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16>, ptr, i32 immarg, <vscale x 8 x i1>)
163