1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer -slp-threshold=-18 < %s | FileCheck %s 3 4; Make sure there's no SCEV assert when the indexes are for different 5; sized address spaces 6 7target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 8 9define void @slp_scev_assert(i32 %idx, i64 %tmp3) #0 { 10; CHECK-LABEL: @slp_scev_assert( 11; CHECK-NEXT: bb: 12; CHECK-NEXT: [[TMP:%.*]] = addrspacecast ptr addrspace(5) undef to ptr 13; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(5) undef, i32 [[IDX:%.*]] 14; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 [[TMP3:%.*]] 15; CHECK-NEXT: store i8 0, ptr addrspace(5) [[TMP2]], align 1 16; CHECK-NEXT: store i8 0, ptr [[TMP4]], align 1 17; CHECK-NEXT: ret void 18; 19bb: 20 %tmp = addrspacecast ptr addrspace(5) undef to ptr 21 %tmp2 = getelementptr inbounds i8, ptr addrspace(5) undef, i32 %idx 22 %tmp4 = getelementptr inbounds i8, ptr %tmp, i64 %tmp3 23 store i8 0, ptr addrspace(5) %tmp2 24 store i8 0, ptr %tmp4 25 ret void 26} 27 28define void @multi_as_reduction_different_sized(ptr addrspace(3) %lds, i32 %idx0, i64 %idx1) #0 { 29; CHECK-LABEL: @multi_as_reduction_different_sized( 30; CHECK-NEXT: bb: 31; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(3) [[LDS:%.*]] to ptr 32; CHECK-NEXT: [[ADD0:%.*]] = add i32 [[IDX0:%.*]], 2 33; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[IDX1:%.*]], 1 34; CHECK-NEXT: [[LDS_1:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[LDS]], i32 [[ADD0]] 35; CHECK-NEXT: [[FLAT_1:%.*]] = getelementptr inbounds i32, ptr [[FLAT]], i64 [[ADD1]] 36; CHECK-NEXT: [[LOAD_LDS_0:%.*]] = load i32, ptr addrspace(3) [[LDS]], align 4 37; CHECK-NEXT: [[LOAD_LDS_1:%.*]] = load i32, ptr addrspace(3) [[LDS_1]], align 4 38; CHECK-NEXT: [[LOAD_FLAT_0:%.*]] = load i32, ptr [[FLAT]], align 4 39; CHECK-NEXT: [[LOAD_FLAT_1:%.*]] = load i32, ptr [[FLAT_1]], align 4 40; CHECK-NEXT: [[SUB0:%.*]] = sub i32 [[LOAD_FLAT_0]], [[LOAD_LDS_0]] 41; CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[LOAD_FLAT_1]], [[LOAD_LDS_1]] 42; CHECK-NEXT: store i32 [[SUB0]], ptr undef, align 4 43; CHECK-NEXT: store i32 [[SUB1]], ptr undef, align 4 44; CHECK-NEXT: ret void 45; 46bb: 47 %flat = addrspacecast ptr addrspace(3) %lds to ptr 48 %add0 = add i32 %idx0, 2 49 %add1 = add i64 %idx1, 1 50 51 %lds.1 = getelementptr inbounds i32, ptr addrspace(3) %lds, i32 %add0 52 %flat.1 = getelementptr inbounds i32, ptr %flat, i64 %add1 53 54 %load.lds.0 = load i32, ptr addrspace(3) %lds, align 4 55 %load.lds.1 = load i32, ptr addrspace(3) %lds.1, align 4 56 57 %load.flat.0 = load i32, ptr %flat, align 4 58 %load.flat.1 = load i32, ptr %flat.1, align 4 59 60 %sub0 = sub i32 %load.flat.0, %load.lds.0 61 %sub1 = sub i32 %load.flat.1, %load.lds.1 62 63 store i32 %sub0, ptr undef 64 store i32 %sub1, ptr undef 65 ret void 66} 67 68; This should vectorize if using getUnderlyingObject 69define void @multi_as_reduction_same_size(ptr addrspace(1) %global, i64 %idx0, i64 %idx1) #0 { 70; CHECK-LABEL: @multi_as_reduction_same_size( 71; CHECK-NEXT: bb: 72; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(1) [[GLOBAL:%.*]] to ptr 73; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[IDX0:%.*]], 2 74; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[IDX1:%.*]], 1 75; CHECK-NEXT: [[GLOBAL_1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[GLOBAL]], i64 [[ADD0]] 76; CHECK-NEXT: [[FLAT_1:%.*]] = getelementptr inbounds i32, ptr [[FLAT]], i64 [[ADD1]] 77; CHECK-NEXT: [[LOAD_GLOBAL_0:%.*]] = load i32, ptr addrspace(1) [[GLOBAL]], align 4 78; CHECK-NEXT: [[LOAD_GLOBAL_1:%.*]] = load i32, ptr addrspace(1) [[GLOBAL_1]], align 4 79; CHECK-NEXT: [[LOAD_FLAT_0:%.*]] = load i32, ptr [[FLAT]], align 4 80; CHECK-NEXT: [[LOAD_FLAT_1:%.*]] = load i32, ptr [[FLAT_1]], align 4 81; CHECK-NEXT: [[SUB0:%.*]] = sub i32 [[LOAD_FLAT_0]], [[LOAD_GLOBAL_0]] 82; CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[LOAD_FLAT_1]], [[LOAD_GLOBAL_1]] 83; CHECK-NEXT: store i32 [[SUB0]], ptr undef, align 4 84; CHECK-NEXT: store i32 [[SUB1]], ptr undef, align 4 85; CHECK-NEXT: ret void 86; 87bb: 88 %flat = addrspacecast ptr addrspace(1) %global to ptr 89 %add0 = add i64 %idx0, 2 90 %add1 = add i64 %idx1, 1 91 92 %global.1 = getelementptr inbounds i32, ptr addrspace(1) %global, i64 %add0 93 %flat.1 = getelementptr inbounds i32, ptr %flat, i64 %add1 94 95 %load.global.0 = load i32, ptr addrspace(1) %global, align 4 96 %load.global.1 = load i32, ptr addrspace(1) %global.1, align 4 97 98 %load.flat.0 = load i32, ptr %flat, align 4 99 %load.flat.1 = load i32, ptr %flat.1, align 4 100 101 %sub0 = sub i32 %load.flat.0, %load.global.0 102 %sub1 = sub i32 %load.flat.1, %load.global.1 103 104 store i32 %sub0, ptr undef 105 store i32 %sub1, ptr undef 106 ret void 107} 108 109; This should vectorize if using getUnderlyingObject 110; The add is done in the same width, even though the address space size is smaller 111define void @multi_as_reduction_different_sized_noncanon(ptr addrspace(3) %lds, i64 %idx0, i64 %idx1) #0 { 112; CHECK-LABEL: @multi_as_reduction_different_sized_noncanon( 113; CHECK-NEXT: bb: 114; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(3) [[LDS:%.*]] to ptr 115; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[IDX0:%.*]], 2 116; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[IDX1:%.*]], 1 117; CHECK-NEXT: [[LDS_1:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[LDS]], i64 [[ADD0]] 118; CHECK-NEXT: [[FLAT_1:%.*]] = getelementptr inbounds i32, ptr [[FLAT]], i64 [[ADD1]] 119; CHECK-NEXT: [[LOAD_LDS_0:%.*]] = load i32, ptr addrspace(3) [[LDS]], align 4 120; CHECK-NEXT: [[LOAD_LDS_1:%.*]] = load i32, ptr addrspace(3) [[LDS_1]], align 4 121; CHECK-NEXT: [[LOAD_FLAT_0:%.*]] = load i32, ptr [[FLAT]], align 4 122; CHECK-NEXT: [[LOAD_FLAT_1:%.*]] = load i32, ptr [[FLAT_1]], align 4 123; CHECK-NEXT: [[SUB0:%.*]] = sub i32 [[LOAD_FLAT_0]], [[LOAD_LDS_0]] 124; CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[LOAD_FLAT_1]], [[LOAD_LDS_1]] 125; CHECK-NEXT: store i32 [[SUB0]], ptr undef, align 4 126; CHECK-NEXT: store i32 [[SUB1]], ptr undef, align 4 127; CHECK-NEXT: ret void 128; 129bb: 130 %flat = addrspacecast ptr addrspace(3) %lds to ptr 131 %add0 = add i64 %idx0, 2 132 %add1 = add i64 %idx1, 1 133 134 %lds.1 = getelementptr inbounds i32, ptr addrspace(3) %lds, i64 %add0 135 %flat.1 = getelementptr inbounds i32, ptr %flat, i64 %add1 136 137 %load.lds.0 = load i32, ptr addrspace(3) %lds, align 4 138 %load.lds.1 = load i32, ptr addrspace(3) %lds.1, align 4 139 140 %load.flat.0 = load i32, ptr %flat, align 4 141 %load.flat.1 = load i32, ptr %flat.1, align 4 142 143 %sub0 = sub i32 %load.flat.0, %load.lds.0 144 %sub1 = sub i32 %load.flat.1, %load.lds.1 145 146 store i32 %sub0, ptr undef 147 store i32 %sub1, ptr undef 148 ret void 149} 150 151define void @slp_crash_on_addrspacecast() { 152; CHECK-LABEL: @slp_crash_on_addrspacecast( 153; CHECK-NEXT: entry: 154; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr addrspace(3) undef, i32 undef 155; CHECK-NEXT: [[P0:%.*]] = addrspacecast ptr addrspace(3) [[TMP0]] to ptr 156; CHECK-NEXT: store i64 undef, ptr [[P0]], align 8 157; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr addrspace(3) undef, i32 undef 158; CHECK-NEXT: [[P1:%.*]] = addrspacecast ptr addrspace(3) [[TMP1]] to ptr 159; CHECK-NEXT: store i64 undef, ptr [[P1]], align 8 160; CHECK-NEXT: ret void 161; 162entry: 163 %0 = getelementptr inbounds i64, ptr addrspace(3) undef, i32 undef 164 %p0 = addrspacecast ptr addrspace(3) %0 to ptr 165 store i64 undef, ptr %p0, align 8 166 %1 = getelementptr inbounds i64, ptr addrspace(3) undef, i32 undef 167 %p1 = addrspacecast ptr addrspace(3) %1 to ptr 168 store i64 undef, ptr %p1, align 8 169 ret void 170} 171