1// REQUIRES: amdgpu-registered-target 2// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck %s 3// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s 4// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s 5// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck %s 6// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s 7 8#pragma OPENCL EXTENSION cl_khr_fp16 : enable 9 10typedef unsigned long ulong; 11typedef unsigned int uint; 12 13// CHECK-LABEL: @test_div_fixup_f16 14// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.div.fixup.f16 15void test_div_fixup_f16(global half* out, half a, half b, half c) 16{ 17 *out = __builtin_amdgcn_div_fixuph(a, b, c); 18} 19 20// CHECK-LABEL: @test_rcp_f16 21// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.rcp.f16 22void test_rcp_f16(global half* out, half a) 23{ 24 *out = __builtin_amdgcn_rcph(a); 25} 26 27// CHECK-LABEL: @test_sqrt_f16 28// CHECK: {{.*}}call{{.*}} half @llvm.{{((amdgcn.){0,1})}}sqrt.f16 29void test_sqrt_f16(global half* out, half a) 30{ 31 *out = __builtin_amdgcn_sqrth(a); 32} 33 34// CHECK-LABEL: @test_rsq_f16 35// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.rsq.f16 36void test_rsq_f16(global half* out, half a) 37{ 38 *out = __builtin_amdgcn_rsqh(a); 39} 40 41// CHECK-LABEL: @test_sin_f16 42// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.sin.f16 43void test_sin_f16(global half* out, half a) 44{ 45 *out = __builtin_amdgcn_sinh(a); 46} 47 48// CHECK-LABEL: @test_cos_f16 49// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.cos.f16 50void test_cos_f16(global half* out, half a) 51{ 52 *out = __builtin_amdgcn_cosh(a); 53} 54 55// CHECK-LABEL: @test_ldexp_f16 56// CHECK: [[TRUNC:%[0-9a-z]+]] = trunc i32 57// CHECK: {{.*}}call{{.*}} half @llvm.ldexp.f16.i16(half %a, i16 [[TRUNC]]) 58void test_ldexp_f16(global half* out, half a, int b) 59{ 60 *out = __builtin_amdgcn_ldexph(a, b); 61} 62 63// CHECK-LABEL: @test_frexp_mant_f16 64// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.frexp.mant.f16 65void test_frexp_mant_f16(global half* out, half a) 66{ 67 *out = __builtin_amdgcn_frexp_manth(a); 68} 69 70// CHECK-LABEL: @test_frexp_exp_f16 71// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.frexp.exp.i16.f16 72void test_frexp_exp_f16(global short* out, half a) 73{ 74 *out = __builtin_amdgcn_frexp_exph(a); 75} 76 77// CHECK-LABEL: @test_fract_f16 78// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.fract.f16 79void test_fract_f16(global half* out, half a) 80{ 81 *out = __builtin_amdgcn_fracth(a); 82} 83 84// CHECK-LABEL: @test_class_f16 85// CHECK: {{.*}}call{{.*}} i1 @llvm.amdgcn.class.f16 86void test_class_f16(global half* out, half a, int b) 87{ 88 *out = __builtin_amdgcn_classh(a, b); 89} 90 91// CHECK-LABEL: @test_s_memrealtime 92// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.memrealtime() 93void test_s_memrealtime(global ulong* out) 94{ 95 *out = __builtin_amdgcn_s_memrealtime(); 96} 97 98// CHECK-LABEL: @test_s_dcache_wb() 99// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.dcache.wb() 100void test_s_dcache_wb() 101{ 102 __builtin_amdgcn_s_dcache_wb(); 103} 104 105// CHECK-LABEL: @test_mov_dpp_int 106// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %src, i32 0, i32 0, i32 0, i1 false) 107void test_mov_dpp_int(global int* out, int src) 108{ 109 *out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false); 110} 111 112// CHECK-LABEL: @test_mov_dpp_long 113// CHECK: %0 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 poison, i64 %x, i32 257, i32 15, i32 15, i1 false) 114// CHECK-NEXT: store i64 %0, 115void test_mov_dpp_long(long x, global long *p) { 116 *p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0); 117} 118 119// CHECK-LABEL: @test_mov_dpp_float 120// CHECK: %0 = bitcast float %x to i32 121// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false) 122// CHECK-NEXT: store i32 %1, 123void test_mov_dpp_float(float x, global float *p) { 124 *p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0); 125} 126 127// CHECK-LABEL: @test_mov_dpp_double 128// CHECK: %0 = bitcast double %x to i64 129// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 poison, i64 %0, i32 257, i32 15, i32 15, i1 false) 130// CHECK-NEXT: store i64 %1, 131void test_mov_dpp_double(double x, global double *p) { 132 *p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0); 133} 134 135// CHECK-LABEL: @test_mov_dpp_short 136// CHECK: %0 = zext i16 %x to i32 137// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false) 138// CHECK-NEXT: %2 = trunc i32 %1 to i16 139// CHECK-NEXT: store i16 %2, 140void test_mov_dpp_short(short x, global short *p) { 141 *p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0); 142} 143 144// CHECK-LABEL: @test_mov_dpp_char 145// CHECK: %0 = zext i8 %x to i32 146// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false) 147// CHECK-NEXT: %2 = trunc i32 %1 to i8 148// CHECK-NEXT: store i8 %2, 149void test_mov_dpp_char(char x, global char *p) { 150 *p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0); 151} 152 153// CHECK-LABEL: @test_mov_dpp_half 154// CHECK: %0 = load i16, 155// CHECK: %1 = zext i16 %0 to i32 156// CHECK-NEXT: %2 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %1, i32 257, i32 15, i32 15, i1 false) 157// CHECK-NEXT: %3 = trunc i32 %2 to i16 158// CHECK-NEXT: store i16 %3, 159void test_mov_dpp_half(half *x, global half *p) { 160 *p = __builtin_amdgcn_mov_dpp(*x, 0x101, 0xf, 0xf, 0); 161} 162 163// CHECK-LABEL: @test_update_dpp_int 164// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, i32 0, i32 0, i1 false) 165void test_update_dpp_int(global int* out, int arg1, int arg2) 166{ 167 *out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false); 168} 169 170// CHECK-LABEL: @test_update_dpp_long 171// CHECK: %0 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 %x, i64 %x, i32 257, i32 15, i32 15, i1 false) 172// CHECk-NEXT: store i64 %0, 173void test_update_dpp_long(long x, global long *p) { 174 *p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0); 175} 176 177// CHECK-LABEL: @test_update_dpp_float 178// CHECK: %0 = bitcast float %x to i32 179// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false) 180// CHECK-NEXT: store i32 %1, 181void test_update_dpp_float(float x, global float *p) { 182 *p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0); 183} 184 185// CHECK-LABEL: @test_update_dpp_double 186// CHECK: %0 = bitcast double %x to i64 187// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 %0, i64 %0, i32 257, i32 15, i32 15, i1 false) 188// CHECK-NEXT: store i64 %1, 189void test_update_dpp_double(double x, global double *p) { 190 *p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0); 191} 192 193// CHECK-LABEL: @test_update_dpp_short 194// CHECK: %0 = zext i16 %x to i32 195// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false) 196// CHECK-NEXT: %2 = trunc i32 %1 to i16 197// CHECK-NEXT: store i16 %2, 198void test_update_dpp_short(short x, global short *p) { 199 *p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0); 200} 201 202// CHECK-LABEL: @test_update_dpp_char 203// CHECK: %0 = zext i8 %x to i32 204// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false) 205// CHECK-NEXT: %2 = trunc i32 %1 to i8 206// CHECK-NEXT: store i8 %2, 207void test_update_dpp_char(char x, global char *p) { 208 *p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0); 209} 210 211// CHECK-LABEL: @test_update_dpp_half 212// CHECK: %0 = load i16, 213// CHECK: %1 = zext i16 %0 to i32 214// CHECK-NEXT: %2 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %1, i32 257, i32 15, i32 15, i1 false) 215// CHECK-NEXT: %3 = trunc i32 %2 to i16 216// CHECK-NEXT: store i16 %3, 217void test_update_dpp_half(half *x, global half *p) { 218 *p = __builtin_amdgcn_update_dpp(*x, *x, 0x101, 0xf, 0xf, 0); 219} 220 221// CHECK-LABEL: @test_update_dpp_int_uint 222// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, i32 0, i32 0, i1 false) 223void test_update_dpp_int_uint(global int* out, int arg1, unsigned int arg2) 224{ 225 *out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false); 226} 227 228// CHECK-LABEL: @test_update_dpp_lit_int 229// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 5, i32 %arg1, i32 0, i32 0, i32 0, i1 false) 230void test_update_dpp_lit_int(global int* out, int arg1) 231{ 232 *out = __builtin_amdgcn_update_dpp(5, arg1, 0, 0, 0, false); 233} 234 235__constant int gi = 5; 236 237// CHECK-LABEL: @test_update_dpp_const_int 238// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 5, i32 %arg1, i32 0, i32 0, i32 0, i1 false) 239void test_update_dpp_const_int(global int* out, int arg1) 240{ 241 *out = __builtin_amdgcn_update_dpp(gi, arg1, 0, 0, 0, false); 242} 243 244// CHECK-LABEL: @test_ds_fadd 245// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}} 246// CHECK: atomicrmw volatile fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}} 247 248// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src acquire, align 4{{$}} 249// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src acquire, align 4{{$}} 250// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src release, align 4{{$}} 251// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src acq_rel, align 4{{$}} 252// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} 253// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} 254 255// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} 256// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}} 257// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} 258// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}} 259// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}} 260#if !defined(__SPIRV__) 261void test_ds_faddf(local float *out, float src) { 262#else 263 void test_ds_faddf(__attribute__((address_space(3))) float *out, float src) { 264#endif 265 266 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM, false); 267 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM, true); 268 269 // Test all orders. 270 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_CONSUME, __MEMORY_SCOPE_SYSTEM, false); 271 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SYSTEM, false); 272 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELEASE, __MEMORY_SCOPE_SYSTEM, false); 273 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_ACQ_REL, __MEMORY_SCOPE_SYSTEM, false); 274 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false); 275 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false); // invalid 276 277 // Test all syncscopes. 278 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE, false); 279 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP, false); 280 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT, false); 281 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE, false); 282 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, 5, false); // invalid 283} 284 285// CHECK-LABEL: @test_ds_fmin 286// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}} 287// CHECK: atomicrmw volatile fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}} 288 289// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src acquire, align 4{{$}} 290// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src acquire, align 4{{$}} 291// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src release, align 4{{$}} 292// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src acq_rel, align 4{{$}} 293// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} 294// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} 295 296// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} 297// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}} 298// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} 299// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}} 300// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}} 301 302#if !defined(__SPIRV__) 303void test_ds_fminf(local float *out, float src) { 304#else 305void test_ds_fminf(__attribute__((address_space(3))) float *out, float src) { 306#endif 307 *out = __builtin_amdgcn_ds_fminf(out, src, 0, 0, false); 308 *out = __builtin_amdgcn_ds_fminf(out, src, 0, 0, true); 309 310 // Test all orders. 311 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_CONSUME, __MEMORY_SCOPE_SYSTEM, false); 312 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SYSTEM, false); 313 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELEASE, __MEMORY_SCOPE_SYSTEM, false); 314 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_ACQ_REL, __MEMORY_SCOPE_SYSTEM, false); 315 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false); 316 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false); // invalid 317 318 // Test all syncscopes. 319 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE, false); 320 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP, false); 321 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT, false); 322 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE, false); 323 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, 5, false); // invalid 324} 325 326// CHECK-LABEL: @test_ds_fmax 327// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}} 328// CHECK: atomicrmw volatile fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}} 329 330// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src acquire, align 4{{$}} 331// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src acquire, align 4{{$}} 332// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src release, align 4{{$}} 333// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src acq_rel, align 4{{$}} 334// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} 335// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} 336 337// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} 338// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}} 339// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} 340// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}} 341// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}} 342 343#if !defined(__SPIRV__) 344void test_ds_fmaxf(local float *out, float src) { 345#else 346void test_ds_fmaxf(__attribute__((address_space(3))) float *out, float src) { 347#endif 348 *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, false); 349 *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, true); 350 351 // Test all orders. 352 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_CONSUME, __MEMORY_SCOPE_SYSTEM, false); 353 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SYSTEM, false); 354 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELEASE, __MEMORY_SCOPE_SYSTEM, false); 355 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_ACQ_REL, __MEMORY_SCOPE_SYSTEM, false); 356 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false); 357 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false); // invalid 358 359 // Test all syncscopes. 360 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE, false); 361 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP, false); 362 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT, false); 363 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE, false); 364 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, 5, false); // invalid 365} 366 367// CHECK-LABEL: @test_s_memtime 368// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.memtime() 369void test_s_memtime(global ulong* out) 370{ 371 *out = __builtin_amdgcn_s_memtime(); 372} 373 374// CHECK-LABEL: @test_perm 375// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.perm(i32 %a, i32 %b, i32 %s) 376void test_perm(global uint* out, uint a, uint b, uint s) 377{ 378 *out = __builtin_amdgcn_perm(a, b, s); 379} 380 381// CHECK-LABEL: @test_groupstaticsize 382// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.groupstaticsize() 383void test_groupstaticsize(global uint* out) 384{ 385 *out = __builtin_amdgcn_groupstaticsize(); 386} 387