1; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck %s 2 3target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 4 5declare i32 @llvm.amdgcn.workitem.id.x() #1 6 7; CHECK-LABEL: @merge_v2p1i8( 8; CHECK: load <2 x i64> 9; CHECK: inttoptr i64 %{{[^ ]+}} to ptr addrspace(1) 10; CHECK: inttoptr i64 %{{[^ ]+}} to ptr addrspace(1) 11; CHECK: store <2 x i64> zeroinitializer 12define amdgpu_kernel void @merge_v2p1i8(ptr addrspace(1) nocapture %a, ptr addrspace(1) nocapture readonly %b) #0 { 13entry: 14 %a.1 = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) %a, i64 1 15 %b.1 = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) %b, i64 1 16 17 %ld.c = load ptr addrspace(1), ptr addrspace(1) %b, align 4 18 %ld.c.idx.1 = load ptr addrspace(1), ptr addrspace(1) %b.1, align 4 19 20 store ptr addrspace(1) null, ptr addrspace(1) %a, align 4 21 store ptr addrspace(1) null, ptr addrspace(1) %a.1, align 4 22 23 ret void 24} 25 26; CHECK-LABEL: @merge_v2p3i8( 27; CHECK: load <2 x i32> 28; CHECK: inttoptr i32 %{{[^ ]+}} to ptr addrspace(3) 29; CHECK: inttoptr i32 %{{[^ ]+}} to ptr addrspace(3) 30; CHECK: store <2 x i32> zeroinitializer 31define amdgpu_kernel void @merge_v2p3i8(ptr addrspace(3) nocapture %a, ptr addrspace(3) nocapture readonly %b) #0 { 32entry: 33 %a.1 = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %a, i64 1 34 %b.1 = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %b, i64 1 35 36 %ld.c = load ptr addrspace(3), ptr addrspace(3) %b, align 4 37 %ld.c.idx.1 = load ptr addrspace(3), ptr addrspace(3) %b.1, align 4 38 39 store ptr addrspace(3) null, ptr addrspace(3) %a, align 4 40 store ptr addrspace(3) null, ptr addrspace(3) %a.1, align 4 41 42 ret void 43} 44 45; CHECK-LABEL: @merge_load_i64_ptr64( 46; CHECK: load <2 x i64> 47; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 1 48; CHECK: inttoptr i64 [[ELT1]] to ptr addrspace(1) 49define amdgpu_kernel void @merge_load_i64_ptr64(ptr addrspace(1) nocapture %a) #0 { 50entry: 51 %a.1 = getelementptr inbounds i64, ptr addrspace(1) %a, i64 1 52 53 %ld.0 = load i64, ptr addrspace(1) %a 54 %ld.1 = load ptr addrspace(1), ptr addrspace(1) %a.1 55 56 ret void 57} 58 59; CHECK-LABEL: @merge_load_ptr64_i64( 60; CHECK: load <2 x i64> 61; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 0 62; CHECK: inttoptr i64 [[ELT0]] to ptr addrspace(1) 63define amdgpu_kernel void @merge_load_ptr64_i64(ptr addrspace(1) nocapture %a) #0 { 64entry: 65 %a.1 = getelementptr inbounds i64, ptr addrspace(1) %a, i64 1 66 67 %ld.0 = load ptr addrspace(1), ptr addrspace(1) %a 68 %ld.1 = load i64, ptr addrspace(1) %a.1 69 70 ret void 71} 72 73; CHECK-LABEL: @merge_store_ptr64_i64( 74; CHECK: [[ELT0:%[^ ]+]] = ptrtoint ptr addrspace(1) %ptr0 to i64 75; CHECK: insertelement <2 x i64> poison, i64 [[ELT0]], i32 0 76; CHECK: store <2 x i64> 77define amdgpu_kernel void @merge_store_ptr64_i64(ptr addrspace(1) nocapture %a, ptr addrspace(1) %ptr0, i64 %val1) #0 { 78entry: 79 %a.1 = getelementptr inbounds i64, ptr addrspace(1) %a, i64 1 80 81 82 store ptr addrspace(1) %ptr0, ptr addrspace(1) %a 83 store i64 %val1, ptr addrspace(1) %a.1 84 85 ret void 86} 87 88; CHECK-LABEL: @merge_store_i64_ptr64( 89; CHECK: [[ELT1:%[^ ]+]] = ptrtoint ptr addrspace(1) %ptr1 to i64 90; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1]], i32 1 91; CHECK: store <2 x i64> 92define amdgpu_kernel void @merge_store_i64_ptr64(ptr addrspace(1) nocapture %a, i64 %val0, ptr addrspace(1) %ptr1) #0 { 93entry: 94 %a.1 = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) %a, i64 1 95 96 store i64 %val0, ptr addrspace(1) %a 97 store ptr addrspace(1) %ptr1, ptr addrspace(1) %a.1 98 99 ret void 100} 101 102; CHECK-LABEL: @merge_load_i32_ptr32( 103; CHECK: load <2 x i32> 104; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i32> %{{[^ ]+}}, i32 1 105; CHECK: inttoptr i32 [[ELT1]] to ptr addrspace(3) 106define amdgpu_kernel void @merge_load_i32_ptr32(ptr addrspace(3) nocapture %a) #0 { 107entry: 108 %a.1 = getelementptr inbounds i32, ptr addrspace(3) %a, i32 1 109 110 %ld.0 = load i32, ptr addrspace(3) %a 111 %ld.1 = load ptr addrspace(3), ptr addrspace(3) %a.1 112 113 ret void 114} 115 116; CHECK-LABEL: @merge_load_ptr32_i32( 117; CHECK: load <2 x i32> 118; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i32> %{{[^ ]+}}, i32 0 119; CHECK: inttoptr i32 [[ELT0]] to ptr addrspace(3) 120define amdgpu_kernel void @merge_load_ptr32_i32(ptr addrspace(3) nocapture %a) #0 { 121entry: 122 %a.1 = getelementptr inbounds i32, ptr addrspace(3) %a, i32 1 123 124 %ld.0 = load ptr addrspace(3), ptr addrspace(3) %a 125 %ld.1 = load i32, ptr addrspace(3) %a.1 126 127 ret void 128} 129 130; CHECK-LABEL: @merge_store_ptr32_i32( 131; CHECK: [[ELT0:%[^ ]+]] = ptrtoint ptr addrspace(3) %ptr0 to i32 132; CHECK: insertelement <2 x i32> poison, i32 [[ELT0]], i32 0 133; CHECK: store <2 x i32> 134define amdgpu_kernel void @merge_store_ptr32_i32(ptr addrspace(3) nocapture %a, ptr addrspace(3) %ptr0, i32 %val1) #0 { 135entry: 136 %a.1 = getelementptr inbounds i32, ptr addrspace(3) %a, i32 1 137 138 store ptr addrspace(3) %ptr0, ptr addrspace(3) %a 139 store i32 %val1, ptr addrspace(3) %a.1 140 141 ret void 142} 143 144; CHECK-LABEL: @merge_store_i32_ptr32( 145; CHECK: [[ELT1:%[^ ]+]] = ptrtoint ptr addrspace(3) %ptr1 to i32 146; CHECK: insertelement <2 x i32> %{{[^ ]+}}, i32 [[ELT1]], i32 1 147; CHECK: store <2 x i32> 148define amdgpu_kernel void @merge_store_i32_ptr32(ptr addrspace(3) nocapture %a, i32 %val0, ptr addrspace(3) %ptr1) #0 { 149entry: 150 %a.1 = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %a, i32 1 151 152 store i32 %val0, ptr addrspace(3) %a 153 store ptr addrspace(3) %ptr1, ptr addrspace(3) %a.1 154 155 ret void 156} 157 158; CHECK-LABEL: @no_merge_store_ptr32_i64( 159; CHECK: store ptr addrspace(3) 160; CHECK: store i64 161define amdgpu_kernel void @no_merge_store_ptr32_i64(ptr addrspace(1) nocapture %a, ptr addrspace(3) %ptr0, i64 %val1) #0 { 162entry: 163 %a.1 = getelementptr inbounds i64, ptr addrspace(1) %a, i64 1 164 165 166 store ptr addrspace(3) %ptr0, ptr addrspace(1) %a 167 store i64 %val1, ptr addrspace(1) %a.1 168 169 ret void 170} 171 172; CHECK-LABEL: @no_merge_store_i64_ptr32( 173; CHECK: store i64 174; CHECK: store ptr addrspace(3) 175define amdgpu_kernel void @no_merge_store_i64_ptr32(ptr addrspace(1) nocapture %a, i64 %val0, ptr addrspace(3) %ptr1) #0 { 176entry: 177 %a.1 = getelementptr inbounds ptr addrspace(3), ptr addrspace(1) %a, i64 1 178 179 store i64 %val0, ptr addrspace(1) %a 180 store ptr addrspace(3) %ptr1, ptr addrspace(1) %a.1 181 182 ret void 183} 184 185; CHECK-LABEL: @no_merge_load_i64_ptr32( 186; CHECK: load i64, 187; CHECK: load ptr addrspace(3), 188define amdgpu_kernel void @no_merge_load_i64_ptr32(ptr addrspace(1) nocapture %a) #0 { 189entry: 190 %a.1 = getelementptr inbounds i64, ptr addrspace(1) %a, i64 1 191 192 %ld.0 = load i64, ptr addrspace(1) %a 193 %ld.1 = load ptr addrspace(3), ptr addrspace(1) %a.1 194 195 ret void 196} 197 198; CHECK-LABEL: @no_merge_load_ptr32_i64( 199; CHECK: load ptr addrspace(3), 200; CHECK: load i64, 201define amdgpu_kernel void @no_merge_load_ptr32_i64(ptr addrspace(1) nocapture %a) #0 { 202entry: 203 %a.1 = getelementptr inbounds i64, ptr addrspace(1) %a, i64 1 204 205 %ld.0 = load ptr addrspace(3), ptr addrspace(1) %a 206 %ld.1 = load i64, ptr addrspace(1) %a.1 207 208 ret void 209} 210 211; XXX - This isn't merged for some reason 212; CHECK-LABEL: @merge_v2p1i8_v2p1i8( 213; CHECK: load <2 x ptr addrspace(1)> 214; CHECK: load <2 x ptr addrspace(1)> 215; CHECK: store <2 x ptr addrspace(1)> 216; CHECK: store <2 x ptr addrspace(1)> 217define amdgpu_kernel void @merge_v2p1i8_v2p1i8(ptr addrspace(1) nocapture noalias %a, ptr addrspace(1) nocapture readonly noalias %b) #0 { 218entry: 219 %a.1 = getelementptr inbounds <2 x ptr addrspace(1)>, ptr addrspace(1) %a, i64 1 220 %b.1 = getelementptr inbounds <2 x ptr addrspace(1)>, ptr addrspace(1) %b, i64 1 221 222 %ld.c = load <2 x ptr addrspace(1)>, ptr addrspace(1) %b, align 4 223 %ld.c.idx.1 = load <2 x ptr addrspace(1)>, ptr addrspace(1) %b.1, align 4 224 225 store <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) %a, align 4 226 store <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) %a.1, align 4 227 ret void 228} 229 230; CHECK-LABEL: @merge_load_ptr64_f64( 231; CHECK: load <2 x i64> 232; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 0 233; CHECK: [[ELT0_INT:%[^ ]+]] = inttoptr i64 [[ELT0]] to ptr addrspace(1) 234; CHECK: [[ELT1_INT:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 1 235; CHECK: bitcast i64 [[ELT1_INT]] to double 236define amdgpu_kernel void @merge_load_ptr64_f64(ptr addrspace(1) nocapture %a) #0 { 237entry: 238 %a.1 = getelementptr inbounds double, ptr addrspace(1) %a, i64 1 239 240 %ld.0 = load ptr addrspace(1), ptr addrspace(1) %a 241 %ld.1 = load double, ptr addrspace(1) %a.1 242 243 ret void 244} 245 246; CHECK-LABEL: @merge_load_f64_ptr64( 247; CHECK: load <2 x i64> 248; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 0 249; CHECK: bitcast i64 [[ELT0]] to double 250; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 1 251; CHECK: inttoptr i64 [[ELT1]] to ptr addrspace(1) 252define amdgpu_kernel void @merge_load_f64_ptr64(ptr addrspace(1) nocapture %a) #0 { 253entry: 254 %a.1 = getelementptr inbounds double, ptr addrspace(1) %a, i64 1 255 256 %ld.0 = load double, ptr addrspace(1) %a 257 %ld.1 = load ptr addrspace(1), ptr addrspace(1) %a.1 258 259 ret void 260} 261 262; CHECK-LABEL: @merge_store_ptr64_f64( 263; CHECK: [[ELT0_INT:%[^ ]+]] = ptrtoint ptr addrspace(1) %ptr0 to i64 264; CHECK: insertelement <2 x i64> poison, i64 [[ELT0_INT]], i32 0 265; CHECK: [[ELT1_INT:%[^ ]+]] = bitcast double %val1 to i64 266; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1_INT]], i32 1 267; CHECK: store <2 x i64> 268define amdgpu_kernel void @merge_store_ptr64_f64(ptr addrspace(1) nocapture %a, ptr addrspace(1) %ptr0, double %val1) #0 { 269entry: 270 %a.1 = getelementptr inbounds double, ptr addrspace(1) %a, i64 1 271 272 store ptr addrspace(1) %ptr0, ptr addrspace(1) %a 273 store double %val1, ptr addrspace(1) %a.1 274 275 ret void 276} 277 278; CHECK-LABEL: @merge_store_f64_ptr64( 279; CHECK: [[ELT0_INT:%[^ ]+]] = bitcast double %val0 to i64 280; CHECK: insertelement <2 x i64> poison, i64 [[ELT0_INT]], i32 0 281; CHECK: [[ELT1_INT:%[^ ]+]] = ptrtoint ptr addrspace(1) %ptr1 to i64 282; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1_INT]], i32 1 283; CHECK: store <2 x i64> 284define amdgpu_kernel void @merge_store_f64_ptr64(ptr addrspace(1) nocapture %a, double %val0, ptr addrspace(1) %ptr1) #0 { 285entry: 286 %a.1 = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) %a, i64 1 287 288 store double %val0, ptr addrspace(1) %a 289 store ptr addrspace(1) %ptr1, ptr addrspace(1) %a.1 290 291 ret void 292} 293 294attributes #0 = { nounwind } 295attributes #1 = { nounwind readnone } 296