xref: /llvm-project/llvm/test/Transforms/MemCpyOpt/vscale-crashes.ll (revision 56c091ea7106507b36015297ee9005c9d5fab0bf)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=memcpyopt -S -verify-memoryssa | FileCheck %s
3
4; Check that a call featuring a scalable-vector byval argument fed by a memcpy
5; doesn't crash the compiler. It previously assumed the byval type's size could
6; be represented as a known constant amount.
7define void @byval_caller(ptr %P) {
8; CHECK-LABEL: @byval_caller(
9; CHECK-NEXT:    [[A:%.*]] = alloca i8, align 1
10; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[A]], ptr align 4 [[P:%.*]], i64 8, i1 false)
11; CHECK-NEXT:    call void @byval_callee(ptr byval(<vscale x 1 x i8>) align 1 [[A]])
12; CHECK-NEXT:    ret void
13;
14  %a = alloca i8
15  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %a, ptr align 4 %P, i64 8, i1 false)
16  call void @byval_callee(ptr align 1 byval(<vscale x 1 x i8>) %a)
17  ret void
18}
19
20declare void @llvm.memcpy.p0.p0.i64(ptr align 4, ptr align 4, i64, i1)
21declare void @byval_callee(ptr align 1 byval(<vscale x 1 x i8>))
22
23; Check that two scalable-vector stores (overlapping, with a constant offset)
24; do not crash the compiler when checked whether or not they can be merged into
25; a single memset. There was previously an assumption that the stored values'
26; sizes could be represented by a known constant amount.
27define void @merge_stores_both_scalable(ptr %ptr) {
28; CHECK-LABEL: @merge_stores_both_scalable(
29; CHECK-NEXT:    store <vscale x 1 x i8> zeroinitializer, ptr [[PTR:%.*]], align 1
30; CHECK-NEXT:    [[PTR_NEXT:%.*]] = getelementptr i8, ptr [[PTR]], i64 1
31; CHECK-NEXT:    store <vscale x 1 x i8> zeroinitializer, ptr [[PTR_NEXT]], align 1
32; CHECK-NEXT:    ret void
33;
34  store <vscale x 1 x i8> zeroinitializer, ptr %ptr
35  %ptr.next = getelementptr i8, ptr %ptr, i64 1
36  store <vscale x 1 x i8> zeroinitializer, ptr %ptr.next
37  ret void
38}
39
40; As above, but where the base is scalable but the subsequent store(s) are not.
41define void @merge_stores_first_scalable(ptr %ptr) {
42; CHECK-LABEL: @merge_stores_first_scalable(
43; CHECK-NEXT:    store <vscale x 1 x i8> zeroinitializer, ptr [[PTR:%.*]], align 1
44; CHECK-NEXT:    [[PTR_NEXT:%.*]] = getelementptr i8, ptr [[PTR]], i64 1
45; CHECK-NEXT:    store i8 0, ptr [[PTR_NEXT]], align 1
46; CHECK-NEXT:    ret void
47;
48  store <vscale x 1 x i8> zeroinitializer, ptr %ptr
49  %ptr.next = getelementptr i8, ptr %ptr, i64 1
50  store i8 zeroinitializer, ptr %ptr.next
51  ret void
52}
53
54; As above, but where the base is not scalable but the subsequent store(s) are.
55define void @merge_stores_second_scalable(ptr %ptr) {
56; CHECK-LABEL: @merge_stores_second_scalable(
57; CHECK-NEXT:    store i8 0, ptr [[PTR:%.*]], align 1
58; CHECK-NEXT:    [[PTR_NEXT:%.*]] = getelementptr i8, ptr [[PTR]], i64 1
59; CHECK-NEXT:    store <vscale x 1 x i8> zeroinitializer, ptr [[PTR_NEXT]], align 1
60; CHECK-NEXT:    ret void
61;
62  store i8 zeroinitializer, ptr %ptr
63  %ptr.next = getelementptr i8, ptr %ptr, i64 1
64  store <vscale x 1 x i8> zeroinitializer, ptr %ptr.next
65  ret void
66}
67
68; Check that the call-slot optimization doesn't crash when encountering scalable types.
69define void @callslotoptzn(<vscale x 4 x float> %val, ptr %out) {
70; CHECK-LABEL: @callslotoptzn(
71; CHECK-NEXT:    [[ALLOC:%.*]] = alloca <vscale x 4 x float>, align 16
72; CHECK-NEXT:    [[IDX:%.*]] = tail call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
73; CHECK-NEXT:    [[STRIDE:%.*]] = getelementptr inbounds float, ptr [[ALLOC]], <vscale x 4 x i32> [[IDX]]
74; CHECK-NEXT:    call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[VAL:%.*]], <vscale x 4 x ptr> [[STRIDE]], i32 4, <vscale x 4 x i1> splat (i1 true))
75; CHECK-NEXT:    [[LI:%.*]] = load <vscale x 4 x float>, ptr [[ALLOC]], align 4
76; CHECK-NEXT:    store <vscale x 4 x float> [[LI]], ptr [[OUT:%.*]], align 4
77; CHECK-NEXT:    ret void
78;
79  %alloc = alloca <vscale x 4 x float>, align 16
80  %idx = tail call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
81  %stride = getelementptr inbounds float, ptr %alloc, <vscale x 4 x i32> %idx
82  call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %stride, i32 4, <vscale x 4 x i1> splat (i1 true))
83  %li = load <vscale x 4 x float>, ptr %alloc, align 4
84  store <vscale x 4 x float> %li, ptr %out, align 4
85  ret void
86}
87
88%0 = type { <vscale x 8 x i8> }
89%1 = type { <vscale x 8 x i8>, <vscale x 8 x i8> }
90
91define void @memmove_vector(ptr %a, ptr %b) {
92; CHECK-LABEL: @memmove_vector(
93; CHECK-NEXT:    [[V:%.*]] = load <vscale x 8 x i8>, ptr [[A:%.*]], align 1
94; CHECK-NEXT:    store <vscale x 8 x i8> [[V]], ptr [[B:%.*]], align 1
95; CHECK-NEXT:    ret void
96;
97  %v = load <vscale x 8 x i8>, ptr %a, align 1
98  store <vscale x 8 x i8> %v, ptr %b, align 1
99  ret void
100}
101
102define void @memmove_agg1(ptr %a, ptr %b) {
103; CHECK-LABEL: @memmove_agg1(
104; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
105; CHECK-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 8
106; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 1 [[B:%.*]], ptr align 1 [[A:%.*]], i64 [[TMP2]], i1 false)
107; CHECK-NEXT:    ret void
108;
109  %v = load %0, ptr %a, align 1
110  store %0 %v, ptr %b, align 1
111  ret void
112}
113
114define void @memmove_agg2(ptr %a, ptr %b) {
115; CHECK-LABEL: @memmove_agg2(
116; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
117; CHECK-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 16
118; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 1 [[B:%.*]], ptr align 1 [[A:%.*]], i64 [[TMP2]], i1 false)
119; CHECK-NEXT:    ret void
120;
121  %v = load %1, ptr %a, align 1
122  store %1 %v, ptr %b, align 1
123  ret void
124}
125
126declare <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
127declare void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> , <vscale x 4 x ptr> , i32, <vscale x 4 x i1>)
128