1db7a2f34SJuneyoung Lee; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2d6e7e477SRoman Lebedev; RUN: opt -o - -S -passes=load-store-vectorizer,dce %s | FileCheck %s 3db7a2f34SJuneyoung Lee 4db7a2f34SJuneyoung Lee; Make sure LoadStoreVectorizer vectorizes the loads below. 5db7a2f34SJuneyoung Lee; In order to prove that the vectorization is safe, it tries to 6db7a2f34SJuneyoung Lee; match nested adds and find an expression that adds a constant 7db7a2f34SJuneyoung Lee; value to an existing index and the result doesn't overflow. 8db7a2f34SJuneyoung Lee 9db7a2f34SJuneyoung Leetarget triple = "x86_64--" 10db7a2f34SJuneyoung Lee 11*ba1759c4SNikita Popovdefine void @ld_v4i8_add_nsw(i32 %v0, i32 %v1, ptr %src, ptr %dst) { 12db7a2f34SJuneyoung Lee; CHECK-LABEL: @ld_v4i8_add_nsw( 13db7a2f34SJuneyoung Lee; CHECK-NEXT: bb: 14db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP:%.*]] = add nsw i32 [[V0:%.*]], -1 15db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[V1:%.*]], [[TMP]] 16db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 17*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP2]] 18*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 19db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0 20db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP82:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1 21db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP133:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2 22db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP184:%.*]] = extractelement <4 x i8> [[TMP1]], i32 3 23db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> poison, i8 [[TMP41]], i32 0 24db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP82]], i32 1 25db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP133]], i32 2 26db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP184]], i32 3 27*ba1759c4SNikita Popov; CHECK-NEXT: store <4 x i8> [[TMP22]], ptr [[DST:%.*]], align 4 28db7a2f34SJuneyoung Lee; CHECK-NEXT: ret void 29db7a2f34SJuneyoung Lee; 30db7a2f34SJuneyoung Leebb: 31db7a2f34SJuneyoung Lee %tmp = add nsw i32 %v0, -1 32db7a2f34SJuneyoung Lee %tmp1 = add nsw i32 %v1, %tmp 33db7a2f34SJuneyoung Lee %tmp2 = sext i32 %tmp1 to i64 34*ba1759c4SNikita Popov %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2 35*ba1759c4SNikita Popov %tmp4 = load i8, ptr %tmp3, align 1 36db7a2f34SJuneyoung Lee %tmp5 = add nsw i32 %v1, %v0 37db7a2f34SJuneyoung Lee %tmp6 = sext i32 %tmp5 to i64 38*ba1759c4SNikita Popov %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6 39*ba1759c4SNikita Popov %tmp8 = load i8, ptr %tmp7, align 1 40db7a2f34SJuneyoung Lee %tmp9 = add nsw i32 %v0, 1 41db7a2f34SJuneyoung Lee %tmp10 = add nsw i32 %v1, %tmp9 42db7a2f34SJuneyoung Lee %tmp11 = sext i32 %tmp10 to i64 43*ba1759c4SNikita Popov %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11 44*ba1759c4SNikita Popov %tmp13 = load i8, ptr %tmp12, align 1 45db7a2f34SJuneyoung Lee %tmp14 = add nsw i32 %v0, 2 46db7a2f34SJuneyoung Lee %tmp15 = add nsw i32 %v1, %tmp14 47db7a2f34SJuneyoung Lee %tmp16 = sext i32 %tmp15 to i64 48*ba1759c4SNikita Popov %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16 49*ba1759c4SNikita Popov %tmp18 = load i8, ptr %tmp17, align 1 50db7a2f34SJuneyoung Lee %tmp19 = insertelement <4 x i8> poison, i8 %tmp4, i32 0 51db7a2f34SJuneyoung Lee %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1 52db7a2f34SJuneyoung Lee %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2 53db7a2f34SJuneyoung Lee %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3 54*ba1759c4SNikita Popov store <4 x i8> %tmp22, ptr %dst 55db7a2f34SJuneyoung Lee ret void 56db7a2f34SJuneyoung Lee} 57db7a2f34SJuneyoung Lee 58db7a2f34SJuneyoung Lee; Make sure we don't vectorize the loads below because the source of 59db7a2f34SJuneyoung Lee; sext instructions doesn't have the nsw flag. 60db7a2f34SJuneyoung Lee 61*ba1759c4SNikita Popovdefine void @ld_v4i8_add_not_safe(i32 %v0, i32 %v1, ptr %src, ptr %dst) { 62db7a2f34SJuneyoung Lee; CHECK-LABEL: @ld_v4i8_add_not_safe( 63db7a2f34SJuneyoung Lee; CHECK-NEXT: bb: 64db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP:%.*]] = add nsw i32 [[V0:%.*]], -1 65db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[V1:%.*]], [[TMP]] 66db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 67*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP2]] 68*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1 69db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[V1]], [[V0]] 70db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 71*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP6]] 72*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 1 73db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP9:%.*]] = add nsw i32 [[V0]], 1 74db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[V1]], [[TMP9]] 75db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 76*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP11]] 77*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1 78db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP14:%.*]] = add nsw i32 [[V0]], 2 79db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[V1]], [[TMP14]] 80db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 81*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP16]] 82*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP18:%.*]] = load i8, ptr [[TMP17]], align 1 83db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> poison, i8 [[TMP4]], i32 0 84db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP8]], i32 1 85db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP13]], i32 2 86db7a2f34SJuneyoung Lee; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP18]], i32 3 87*ba1759c4SNikita Popov; CHECK-NEXT: store <4 x i8> [[TMP22]], ptr [[DST:%.*]], align 4 88db7a2f34SJuneyoung Lee; CHECK-NEXT: ret void 89db7a2f34SJuneyoung Lee; 90db7a2f34SJuneyoung Leebb: 91db7a2f34SJuneyoung Lee %tmp = add nsw i32 %v0, -1 92db7a2f34SJuneyoung Lee %tmp1 = add i32 %v1, %tmp 93db7a2f34SJuneyoung Lee %tmp2 = sext i32 %tmp1 to i64 94*ba1759c4SNikita Popov %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2 95*ba1759c4SNikita Popov %tmp4 = load i8, ptr %tmp3, align 1 96db7a2f34SJuneyoung Lee %tmp5 = add i32 %v1, %v0 97db7a2f34SJuneyoung Lee %tmp6 = sext i32 %tmp5 to i64 98*ba1759c4SNikita Popov %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6 99*ba1759c4SNikita Popov %tmp8 = load i8, ptr %tmp7, align 1 100db7a2f34SJuneyoung Lee %tmp9 = add nsw i32 %v0, 1 101db7a2f34SJuneyoung Lee %tmp10 = add i32 %v1, %tmp9 102db7a2f34SJuneyoung Lee %tmp11 = sext i32 %tmp10 to i64 103*ba1759c4SNikita Popov %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11 104*ba1759c4SNikita Popov %tmp13 = load i8, ptr %tmp12, align 1 105db7a2f34SJuneyoung Lee %tmp14 = add nsw i32 %v0, 2 106db7a2f34SJuneyoung Lee %tmp15 = add i32 %v1, %tmp14 107db7a2f34SJuneyoung Lee %tmp16 = sext i32 %tmp15 to i64 108*ba1759c4SNikita Popov %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16 109*ba1759c4SNikita Popov %tmp18 = load i8, ptr %tmp17, align 1 110db7a2f34SJuneyoung Lee %tmp19 = insertelement <4 x i8> poison, i8 %tmp4, i32 0 111db7a2f34SJuneyoung Lee %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1 112db7a2f34SJuneyoung Lee %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2 113db7a2f34SJuneyoung Lee %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3 114*ba1759c4SNikita Popov store <4 x i8> %tmp22, ptr %dst 115db7a2f34SJuneyoung Lee ret void 116db7a2f34SJuneyoung Lee} 117