1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=memcpyopt -S -verify-memoryssa | FileCheck %s 3 4; Check that a call featuring a scalable-vector byval argument fed by a memcpy 5; doesn't crash the compiler. It previously assumed the byval type's size could 6; be represented as a known constant amount. 7define void @byval_caller(ptr %P) { 8; CHECK-LABEL: @byval_caller( 9; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1 10; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[A]], ptr align 4 [[P:%.*]], i64 8, i1 false) 11; CHECK-NEXT: call void @byval_callee(ptr byval(<vscale x 1 x i8>) align 1 [[A]]) 12; CHECK-NEXT: ret void 13; 14 %a = alloca i8 15 call void @llvm.memcpy.p0.p0.i64(ptr align 4 %a, ptr align 4 %P, i64 8, i1 false) 16 call void @byval_callee(ptr align 1 byval(<vscale x 1 x i8>) %a) 17 ret void 18} 19 20declare void @llvm.memcpy.p0.p0.i64(ptr align 4, ptr align 4, i64, i1) 21declare void @byval_callee(ptr align 1 byval(<vscale x 1 x i8>)) 22 23; Check that two scalable-vector stores (overlapping, with a constant offset) 24; do not crash the compiler when checked whether or not they can be merged into 25; a single memset. There was previously an assumption that the stored values' 26; sizes could be represented by a known constant amount. 27define void @merge_stores_both_scalable(ptr %ptr) { 28; CHECK-LABEL: @merge_stores_both_scalable( 29; CHECK-NEXT: store <vscale x 1 x i8> zeroinitializer, ptr [[PTR:%.*]], align 1 30; CHECK-NEXT: [[PTR_NEXT:%.*]] = getelementptr i8, ptr [[PTR]], i64 1 31; CHECK-NEXT: store <vscale x 1 x i8> zeroinitializer, ptr [[PTR_NEXT]], align 1 32; CHECK-NEXT: ret void 33; 34 store <vscale x 1 x i8> zeroinitializer, ptr %ptr 35 %ptr.next = getelementptr i8, ptr %ptr, i64 1 36 store <vscale x 1 x i8> zeroinitializer, ptr %ptr.next 37 ret void 38} 39 40; As above, but where the base is scalable but the subsequent store(s) are not. 41define void @merge_stores_first_scalable(ptr %ptr) { 42; CHECK-LABEL: @merge_stores_first_scalable( 43; CHECK-NEXT: store <vscale x 1 x i8> zeroinitializer, ptr [[PTR:%.*]], align 1 44; CHECK-NEXT: [[PTR_NEXT:%.*]] = getelementptr i8, ptr [[PTR]], i64 1 45; CHECK-NEXT: store i8 0, ptr [[PTR_NEXT]], align 1 46; CHECK-NEXT: ret void 47; 48 store <vscale x 1 x i8> zeroinitializer, ptr %ptr 49 %ptr.next = getelementptr i8, ptr %ptr, i64 1 50 store i8 zeroinitializer, ptr %ptr.next 51 ret void 52} 53 54; As above, but where the base is not scalable but the subsequent store(s) are. 55define void @merge_stores_second_scalable(ptr %ptr) { 56; CHECK-LABEL: @merge_stores_second_scalable( 57; CHECK-NEXT: store i8 0, ptr [[PTR:%.*]], align 1 58; CHECK-NEXT: [[PTR_NEXT:%.*]] = getelementptr i8, ptr [[PTR]], i64 1 59; CHECK-NEXT: store <vscale x 1 x i8> zeroinitializer, ptr [[PTR_NEXT]], align 1 60; CHECK-NEXT: ret void 61; 62 store i8 zeroinitializer, ptr %ptr 63 %ptr.next = getelementptr i8, ptr %ptr, i64 1 64 store <vscale x 1 x i8> zeroinitializer, ptr %ptr.next 65 ret void 66} 67 68; Check that the call-slot optimization doesn't crash when encountering scalable types. 69define void @callslotoptzn(<vscale x 4 x float> %val, ptr %out) { 70; CHECK-LABEL: @callslotoptzn( 71; CHECK-NEXT: [[ALLOC:%.*]] = alloca <vscale x 4 x float>, align 16 72; CHECK-NEXT: [[IDX:%.*]] = tail call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() 73; CHECK-NEXT: [[STRIDE:%.*]] = getelementptr inbounds float, ptr [[ALLOC]], <vscale x 4 x i32> [[IDX]] 74; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[VAL:%.*]], <vscale x 4 x ptr> [[STRIDE]], i32 4, <vscale x 4 x i1> splat (i1 true)) 75; CHECK-NEXT: [[LI:%.*]] = load <vscale x 4 x float>, ptr [[ALLOC]], align 4 76; CHECK-NEXT: store <vscale x 4 x float> [[LI]], ptr [[OUT:%.*]], align 4 77; CHECK-NEXT: ret void 78; 79 %alloc = alloca <vscale x 4 x float>, align 16 80 %idx = tail call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() 81 %stride = getelementptr inbounds float, ptr %alloc, <vscale x 4 x i32> %idx 82 call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %stride, i32 4, <vscale x 4 x i1> splat (i1 true)) 83 %li = load <vscale x 4 x float>, ptr %alloc, align 4 84 store <vscale x 4 x float> %li, ptr %out, align 4 85 ret void 86} 87 88%0 = type { <vscale x 8 x i8> } 89%1 = type { <vscale x 8 x i8>, <vscale x 8 x i8> } 90 91define void @memmove_vector(ptr %a, ptr %b) { 92; CHECK-LABEL: @memmove_vector( 93; CHECK-NEXT: [[V:%.*]] = load <vscale x 8 x i8>, ptr [[A:%.*]], align 1 94; CHECK-NEXT: store <vscale x 8 x i8> [[V]], ptr [[B:%.*]], align 1 95; CHECK-NEXT: ret void 96; 97 %v = load <vscale x 8 x i8>, ptr %a, align 1 98 store <vscale x 8 x i8> %v, ptr %b, align 1 99 ret void 100} 101 102define void @memmove_agg1(ptr %a, ptr %b) { 103; CHECK-LABEL: @memmove_agg1( 104; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 105; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 8 106; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[B:%.*]], ptr align 1 [[A:%.*]], i64 [[TMP2]], i1 false) 107; CHECK-NEXT: ret void 108; 109 %v = load %0, ptr %a, align 1 110 store %0 %v, ptr %b, align 1 111 ret void 112} 113 114define void @memmove_agg2(ptr %a, ptr %b) { 115; CHECK-LABEL: @memmove_agg2( 116; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 117; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 16 118; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[B:%.*]], ptr align 1 [[A:%.*]], i64 [[TMP2]], i1 false) 119; CHECK-NEXT: ret void 120; 121 %v = load %1, ptr %a, align 1 122 store %1 %v, ptr %b, align 1 123 ret void 124} 125 126declare <vscale x 4 x i32> @llvm.stepvector.nxv4i32() 127declare void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> , <vscale x 4 x ptr> , i32, <vscale x 4 x i1>) 128