1; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer,dce -S -o - %s | FileCheck %s 2 3target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 4 5define void @base_case(i1 %cnd, ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %out) { 6; CHECK-LABEL: @base_case 7; CHECK: load <3 x i32> 8entry: 9 %gep1 = getelementptr inbounds i32, ptr addrspace(1) %a, i64 1 10 %gep2 = getelementptr inbounds i32, ptr addrspace(1) %a, i64 2 11 %gep4 = getelementptr inbounds i32, ptr addrspace(1) %b, i64 1 12 %gep5 = getelementptr inbounds i32, ptr addrspace(1) %b, i64 2 13 %selected = select i1 %cnd, ptr addrspace(1) %a, ptr addrspace(1) %b 14 %selected14 = select i1 %cnd, ptr addrspace(1) %gep1, ptr addrspace(1) %gep4 15 %selected25 = select i1 %cnd, ptr addrspace(1) %gep2, ptr addrspace(1) %gep5 16 %val0 = load i32, ptr addrspace(1) %selected, align 4 17 %val1 = load i32, ptr addrspace(1) %selected14, align 4 18 %val2 = load i32, ptr addrspace(1) %selected25, align 4 19 %t0 = insertelement <3 x i32> undef, i32 %val0, i32 0 20 %t1 = insertelement <3 x i32> %t0, i32 %val1, i32 1 21 %t2 = insertelement <3 x i32> %t1, i32 %val2, i32 2 22 store <3 x i32> %t2, ptr addrspace(1) %out 23 ret void 24} 25 26define void @scev_targeting_complex_case(i1 %cnd, ptr addrspace(1) %a, ptr addrspace(1) %b, i32 %base, ptr addrspace(1) %out) { 27; CHECK-LABEL: @scev_targeting_complex_case 28; CHECK: load <2 x i32> 29entry: 30 %base.x4 = shl i32 %base, 2 31 %base.x4.p1 = add i32 %base.x4, 1 32 %base.x4.p2 = add i32 %base.x4, 2 33 %base.x4.p3 = add i32 %base.x4, 3 34 %zext.x4 = zext i32 %base.x4 to i64 35 %zext.x4.p1 = zext i32 %base.x4.p1 to i64 36 %zext.x4.p2 = zext i32 %base.x4.p2 to i64 37 %zext.x4.p3 = zext i32 %base.x4.p3 to i64 38 %base.x16 = mul i64 %zext.x4, 4 39 %base.x16.p4 = shl i64 %zext.x4.p1, 2 40 %base.x16.p8 = shl i64 %zext.x4.p2, 2 41 %base.x16.p12 = mul i64 %zext.x4.p3, 4 42 %gep.a.base.x16 = getelementptr inbounds i8, ptr addrspace(1) %a, i64 %base.x16 43 %gep.b.base.x16.p4 = getelementptr inbounds i8, ptr addrspace(1) %b, i64 %base.x16.p4 44 %gep.a.base.x16.p8 = getelementptr inbounds i8, ptr addrspace(1) %a, i64 %base.x16.p8 45 %gep.b.base.x16.p12 = getelementptr inbounds i8, ptr addrspace(1) %b, i64 %base.x16.p12 46 %selected.base.x16.p0.or.4 = select i1 %cnd, ptr addrspace(1) %gep.a.base.x16, ptr addrspace(1) %gep.b.base.x16.p4 47 %gep.selected.base.x16.p8.or.12 = select i1 %cnd, ptr addrspace(1) %gep.a.base.x16.p8, ptr addrspace(1) %gep.b.base.x16.p12 48 %selected.base.x16.p40.or.44 = getelementptr inbounds i32, ptr addrspace(1) %selected.base.x16.p0.or.4, i64 10 49 %selected.base.x16.p44.or.48 = getelementptr inbounds i32, ptr addrspace(1) %gep.selected.base.x16.p8.or.12, i64 9 50 %val0 = load i32, ptr addrspace(1) %selected.base.x16.p40.or.44, align 4 51 %val1 = load i32, ptr addrspace(1) %selected.base.x16.p44.or.48, align 4 52 %t0 = insertelement <2 x i32> undef, i32 %val0, i32 0 53 %t1 = insertelement <2 x i32> %t0, i32 %val1, i32 1 54 store <2 x i32> %t1, ptr addrspace(1) %out 55 ret void 56} 57 58define void @nested_selects(i1 %cnd0, i1 %cnd1, ptr addrspace(1) %a, ptr addrspace(1) %b, i32 %base, ptr addrspace(1) %out) { 59; CHECK-LABEL: @nested_selects 60; CHECK: load <2 x i32> 61entry: 62 %base.p1 = add nsw i32 %base, 1 63 %base.p2 = add i32 %base, 2 64 %base.p3 = add nsw i32 %base, 3 65 %base.x4 = mul i32 %base, 4 66 %base.x4.p5 = add i32 %base.x4, 5 67 %base.x4.p6 = add i32 %base.x4, 6 68 %sext = sext i32 %base to i64 69 %sext.p1 = sext i32 %base.p1 to i64 70 %sext.p2 = sext i32 %base.p2 to i64 71 %sext.p3 = sext i32 %base.p3 to i64 72 %sext.x4.p5 = sext i32 %base.x4.p5 to i64 73 %sext.x4.p6 = sext i32 %base.x4.p6 to i64 74 %gep.a.base = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %sext 75 %gep.a.base.p1 = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %sext.p1 76 %gep.a.base.p2 = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %sext.p2 77 %gep.a.base.p3 = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %sext.p3 78 %gep.b.base.x4.p5 = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %sext.x4.p5 79 %gep.b.base.x4.p6 = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %sext.x4.p6 80 %selected.1.L = select i1 %cnd1, ptr addrspace(1) %gep.a.base.p2, ptr addrspace(1) %gep.b.base.x4.p5 81 %selected.1.R = select i1 %cnd1, ptr addrspace(1) %gep.a.base.p3, ptr addrspace(1) %gep.b.base.x4.p6 82 %selected.0.L = select i1 %cnd0, ptr addrspace(1) %gep.a.base, ptr addrspace(1) %selected.1.L 83 %selected.0.R = select i1 %cnd0, ptr addrspace(1) %gep.a.base.p1, ptr addrspace(1) %selected.1.R 84 %val0 = load i32, ptr addrspace(1) %selected.0.L, align 4 85 %val1 = load i32, ptr addrspace(1) %selected.0.R, align 4 86 %t0 = insertelement <2 x i32> undef, i32 %val0, i32 0 87 %t1 = insertelement <2 x i32> %t0, i32 %val1, i32 1 88 store <2 x i32> %t1, ptr addrspace(1) %out 89 ret void 90} 91