1// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 2// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck %s 3 4// REQUIRES: amdgpu-registered-target 5 6typedef unsigned int uint; 7 8// CHECK-LABEL: @test_s_sleep_var( 9// CHECK-NEXT: entry: 10// CHECK-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 11// CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr 12// CHECK-NEXT: store i32 [[D:%.*]], ptr [[D_ADDR_ASCAST]], align 4 13// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[D_ADDR_ASCAST]], align 4 14// CHECK-NEXT: call void @llvm.amdgcn.s.sleep.var(i32 [[TMP0]]) 15// CHECK-NEXT: call void @llvm.amdgcn.s.sleep.var(i32 15) 16// CHECK-NEXT: ret void 17// 18void test_s_sleep_var(int d) 19{ 20 __builtin_amdgcn_s_sleep_var(d); 21 __builtin_amdgcn_s_sleep_var(15); 22} 23 24// CHECK-LABEL: @test_permlane16_var( 25// CHECK-NEXT: entry: 26// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 27// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 28// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 29// CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 30// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr 31// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr 32// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr 33// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr 34// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 35// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4 36// CHECK-NEXT: store i32 [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4 37// CHECK-NEXT: store i32 [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 4 38// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 39// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4 40// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[C_ADDR_ASCAST]], align 4 41// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.permlane16.var(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i1 false, i1 false) 42// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 43// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4 44// CHECK-NEXT: ret void 45// 46void test_permlane16_var(global uint* out, uint a, uint b, uint c) { 47 *out = __builtin_amdgcn_permlane16_var(a, b, c, 0, 0); 48} 49 50// CHECK-LABEL: @test_permlanex16_var( 51// CHECK-NEXT: entry: 52// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 53// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 54// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 55// CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 56// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr 57// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr 58// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr 59// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr 60// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 61// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4 62// CHECK-NEXT: store i32 [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4 63// CHECK-NEXT: store i32 [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 4 64// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 65// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4 66// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[C_ADDR_ASCAST]], align 4 67// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.permlanex16.var(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i1 false, i1 false) 68// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 69// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4 70// CHECK-NEXT: ret void 71// 72void test_permlanex16_var(global uint* out, uint a, uint b, uint c) { 73 *out = __builtin_amdgcn_permlanex16_var(a, b, c, 0, 0); 74} 75 76// CHECK-LABEL: @test_s_barrier_signal( 77// CHECK-NEXT: entry: 78// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.signal(i32 -1) 79// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 -1) 80// CHECK-NEXT: ret void 81// 82void test_s_barrier_signal() 83{ 84 __builtin_amdgcn_s_barrier_signal(-1); 85 __builtin_amdgcn_s_barrier_wait(-1); 86} 87 88// CHECK-LABEL: @test_s_barrier_signal_var( 89// CHECK-NEXT: entry: 90// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 91// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 92// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr 93// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr 94// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8 95// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4 96// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8 97// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3) 98// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 99// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) [[TMP1]], i32 [[TMP2]]) 100// CHECK-NEXT: ret void 101// 102void test_s_barrier_signal_var(void *bar, int a) 103{ 104 __builtin_amdgcn_s_barrier_signal_var(bar, a); 105} 106 107// CHECK-LABEL: @test_s_barrier_signal_isfirst( 108// CHECK-NEXT: entry: 109// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 110// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 111// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 112// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr 113// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr 114// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr 115// CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 8 116// CHECK-NEXT: store ptr [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 8 117// CHECK-NEXT: store ptr [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 8 118// CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 1) 119// CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] 120// CHECK: if.then: 121// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 122// CHECK-NEXT: store ptr [[TMP1]], ptr [[A_ADDR_ASCAST]], align 8 123// CHECK-NEXT: br label [[IF_END:%.*]] 124// CHECK: if.else: 125// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 126// CHECK-NEXT: store ptr [[TMP2]], ptr [[A_ADDR_ASCAST]], align 8 127// CHECK-NEXT: br label [[IF_END]] 128// CHECK: if.end: 129// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 1) 130// CHECK-NEXT: ret void 131// 132void test_s_barrier_signal_isfirst(int* a, int* b, int *c) 133{ 134 if(__builtin_amdgcn_s_barrier_signal_isfirst(1)) 135 a = b; 136 else 137 a = c; 138 139 __builtin_amdgcn_s_barrier_wait(1); 140} 141 142// CHECK-LABEL: @test_s_barrier_init( 143// CHECK-NEXT: entry: 144// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 145// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 146// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr 147// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr 148// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8 149// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4 150// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8 151// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3) 152// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 153// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) [[TMP1]], i32 [[TMP2]]) 154// CHECK-NEXT: ret void 155// 156void test_s_barrier_init(void *bar, int a) 157{ 158 __builtin_amdgcn_s_barrier_init(bar, a); 159} 160 161// CHECK-LABEL: @test_s_barrier_join( 162// CHECK-NEXT: entry: 163// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 164// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr 165// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8 166// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8 167// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3) 168// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) [[TMP1]]) 169// CHECK-NEXT: ret void 170// 171void test_s_barrier_join(void *bar) 172{ 173 __builtin_amdgcn_s_barrier_join(bar); 174} 175 176// CHECK-LABEL: @test_s_barrier_leave( 177// CHECK-NEXT: entry: 178// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.leave(i16 1) 179// CHECK-NEXT: ret void 180// 181void test_s_barrier_leave() 182{ 183 __builtin_amdgcn_s_barrier_leave(1); 184} 185 186// CHECK-LABEL: @test_s_get_barrier_state( 187// CHECK-NEXT: entry: 188// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) 189// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 190// CHECK-NEXT: [[STATE:%.*]] = alloca i32, align 4, addrspace(5) 191// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr 192// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr 193// CHECK-NEXT: [[STATE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[STATE]] to ptr 194// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4 195// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 196// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.s.get.barrier.state(i32 [[TMP0]]) 197// CHECK-NEXT: store i32 [[TMP1]], ptr [[STATE_ASCAST]], align 4 198// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[STATE_ASCAST]], align 4 199// CHECK-NEXT: ret i32 [[TMP2]] 200// 201unsigned test_s_get_barrier_state(int a) 202{ 203 unsigned State = __builtin_amdgcn_s_get_barrier_state(a); 204 return State; 205} 206 207// CHECK-LABEL: @test_s_get_named_barrier_state( 208// CHECK-NEXT: entry: 209// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) 210// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 211// CHECK-NEXT: [[STATE:%.*]] = alloca i32, align 4, addrspace(5) 212// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr 213// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr 214// CHECK-NEXT: [[STATE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[STATE]] to ptr 215// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8 216// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8 217// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3) 218// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) [[TMP1]]) 219// CHECK-NEXT: store i32 [[TMP2]], ptr [[STATE_ASCAST]], align 4 220// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[STATE_ASCAST]], align 4 221// CHECK-NEXT: ret i32 [[TMP3]] 222// 223unsigned test_s_get_named_barrier_state(void *bar) 224{ 225 unsigned State = __builtin_amdgcn_s_get_named_barrier_state(bar); 226 return State; 227} 228 229// CHECK-LABEL: @test_s_ttracedata( 230// CHECK-NEXT: entry: 231// CHECK-NEXT: call void @llvm.amdgcn.s.ttracedata(i32 1) 232// CHECK-NEXT: ret void 233// 234void test_s_ttracedata() 235{ 236 __builtin_amdgcn_s_ttracedata(1); 237} 238 239// CHECK-LABEL: @test_s_ttracedata_imm( 240// CHECK-NEXT: entry: 241// CHECK-NEXT: call void @llvm.amdgcn.s.ttracedata.imm(i16 1) 242// CHECK-NEXT: ret void 243// 244void test_s_ttracedata_imm() 245{ 246 __builtin_amdgcn_s_ttracedata_imm(1); 247} 248 249// CHECK-LABEL: @test_s_prefetch_data( 250// CHECK-NEXT: entry: 251// CHECK-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 252// CHECK-NEXT: [[GP_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 253// CHECK-NEXT: [[CP_ADDR:%.*]] = alloca ptr addrspace(4), align 8, addrspace(5) 254// CHECK-NEXT: [[LEN_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 255// CHECK-NEXT: [[FP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FP_ADDR]] to ptr 256// CHECK-NEXT: [[GP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GP_ADDR]] to ptr 257// CHECK-NEXT: [[CP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CP_ADDR]] to ptr 258// CHECK-NEXT: [[LEN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LEN_ADDR]] to ptr 259// CHECK-NEXT: store ptr [[FP:%.*]], ptr [[FP_ADDR_ASCAST]], align 8 260// CHECK-NEXT: store ptr addrspace(1) [[GP:%.*]], ptr [[GP_ADDR_ASCAST]], align 8 261// CHECK-NEXT: store ptr addrspace(4) [[CP:%.*]], ptr [[CP_ADDR_ASCAST]], align 8 262// CHECK-NEXT: store i32 [[LEN:%.*]], ptr [[LEN_ADDR_ASCAST]], align 4 263// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FP_ADDR_ASCAST]], align 8 264// CHECK-NEXT: call void @llvm.amdgcn.s.prefetch.data.p0(ptr [[TMP0]], i32 0) 265// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[GP_ADDR_ASCAST]], align 8 266// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[LEN_ADDR_ASCAST]], align 4 267// CHECK-NEXT: call void @llvm.amdgcn.s.prefetch.data.p1(ptr addrspace(1) [[TMP1]], i32 [[TMP2]]) 268// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr [[CP_ADDR_ASCAST]], align 8 269// CHECK-NEXT: call void @llvm.amdgcn.s.prefetch.data.p4(ptr addrspace(4) [[TMP3]], i32 31) 270// CHECK-NEXT: ret void 271// 272void test_s_prefetch_data(int *fp, global float *gp, constant char *cp, unsigned int len) 273{ 274 __builtin_amdgcn_s_prefetch_data(fp, 0); 275 __builtin_amdgcn_s_prefetch_data(gp, len); 276 __builtin_amdgcn_s_prefetch_data(cp, 31); 277} 278 279// CHECK-LABEL: @test_s_buffer_prefetch_data( 280// CHECK-NEXT: entry: 281// CHECK-NEXT: [[RSRC_ADDR:%.*]] = alloca ptr addrspace(8), align 16, addrspace(5) 282// CHECK-NEXT: [[LEN_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 283// CHECK-NEXT: [[RSRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RSRC_ADDR]] to ptr 284// CHECK-NEXT: [[LEN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LEN_ADDR]] to ptr 285// CHECK-NEXT: store ptr addrspace(8) [[RSRC:%.*]], ptr [[RSRC_ADDR_ASCAST]], align 16 286// CHECK-NEXT: store i32 [[LEN:%.*]], ptr [[LEN_ADDR_ASCAST]], align 4 287// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(8), ptr [[RSRC_ADDR_ASCAST]], align 16 288// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[LEN_ADDR_ASCAST]], align 4 289// CHECK-NEXT: call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) [[TMP0]], i32 128, i32 [[TMP1]]) 290// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(8), ptr [[RSRC_ADDR_ASCAST]], align 16 291// CHECK-NEXT: call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) [[TMP2]], i32 0, i32 31) 292// CHECK-NEXT: ret void 293// 294void test_s_buffer_prefetch_data(__amdgpu_buffer_rsrc_t rsrc, unsigned int len) 295{ 296 __builtin_amdgcn_s_buffer_prefetch_data(rsrc, 128, len); 297 __builtin_amdgcn_s_buffer_prefetch_data(rsrc, 0, 31); 298} 299 300// CHECK-LABEL: @test_ds_bpermute_fi_b32( 301// CHECK-NEXT: entry: 302// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 303// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 304// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 305// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr 306// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr 307// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr 308// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 309// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4 310// CHECK-NEXT: store i32 [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4 311// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 312// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4 313// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.ds.bpermute.fi.b32(i32 [[TMP0]], i32 [[TMP1]]) 314// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 315// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 316// CHECK-NEXT: ret void 317// 318void test_ds_bpermute_fi_b32(global int* out, int a, int b) 319{ 320 *out = __builtin_amdgcn_ds_bpermute_fi_b32(a, b); 321} 322