166ecbc7cSTom Stellard#include <clc/clc.h> 266ecbc7cSTom Stellard 366ecbc7cSTom Stellard#define VLOAD_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \ 4f991505dSAaron Watry typedef PRIM_TYPE##2 less_aligned_##ADDR_SPACE##PRIM_TYPE##2 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\ 566ecbc7cSTom Stellard _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##2 vload2(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \ 6f991505dSAaron Watry return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##2*) (&x[2*offset])); \ 766ecbc7cSTom Stellard } \ 866ecbc7cSTom Stellard\ 9f991505dSAaron Watry typedef PRIM_TYPE##3 less_aligned_##ADDR_SPACE##PRIM_TYPE##3 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\ 1066ecbc7cSTom Stellard _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##3 vload3(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \ 11f991505dSAaron Watry PRIM_TYPE##2 vec = *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##2*) (&x[3*offset])); \ 12f991505dSAaron Watry return (PRIM_TYPE##3)(vec.s0, vec.s1, x[offset*3+2]); \ 1366ecbc7cSTom Stellard } \ 1466ecbc7cSTom Stellard\ 15f991505dSAaron Watry typedef PRIM_TYPE##4 less_aligned_##ADDR_SPACE##PRIM_TYPE##4 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\ 1666ecbc7cSTom Stellard _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##4 vload4(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \ 17f991505dSAaron Watry return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##4*) (&x[4*offset])); \ 1866ecbc7cSTom Stellard } \ 1966ecbc7cSTom Stellard\ 20f991505dSAaron Watry typedef PRIM_TYPE##8 less_aligned_##ADDR_SPACE##PRIM_TYPE##8 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\ 2166ecbc7cSTom Stellard _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##8 vload8(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \ 22f991505dSAaron Watry return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##8*) (&x[8*offset])); \ 2366ecbc7cSTom Stellard } \ 2466ecbc7cSTom Stellard\ 25f991505dSAaron Watry typedef PRIM_TYPE##16 less_aligned_##ADDR_SPACE##PRIM_TYPE##16 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\ 2666ecbc7cSTom Stellard _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##16 vload16(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \ 27f991505dSAaron Watry return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##16*) (&x[16*offset])); \ 2866ecbc7cSTom Stellard } \ 2966ecbc7cSTom Stellard 30d768ac03STom Stellard#define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \ 31d768ac03STom Stellard VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __private) \ 32d768ac03STom Stellard VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __local) \ 33d768ac03STom Stellard VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __constant) \ 34d768ac03STom Stellard VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __global) \ 3566ecbc7cSTom Stellard 3666ecbc7cSTom Stellard#define VLOAD_TYPES() \ 3766ecbc7cSTom Stellard VLOAD_ADDR_SPACES(char) \ 3866ecbc7cSTom Stellard VLOAD_ADDR_SPACES(uchar) \ 3966ecbc7cSTom Stellard VLOAD_ADDR_SPACES(short) \ 4066ecbc7cSTom Stellard VLOAD_ADDR_SPACES(ushort) \ 414cb7cf27SAaron Watry VLOAD_ADDR_SPACES(int) \ 424cb7cf27SAaron Watry VLOAD_ADDR_SPACES(uint) \ 4366ecbc7cSTom Stellard VLOAD_ADDR_SPACES(long) \ 4466ecbc7cSTom Stellard VLOAD_ADDR_SPACES(ulong) \ 4566ecbc7cSTom Stellard VLOAD_ADDR_SPACES(float) \ 4666ecbc7cSTom Stellard 4766ecbc7cSTom StellardVLOAD_TYPES() 4866ecbc7cSTom Stellard 4966ecbc7cSTom Stellard#ifdef cl_khr_fp64 5066ecbc7cSTom Stellard#pragma OPENCL EXTENSION cl_khr_fp64 : enable 5166ecbc7cSTom Stellard VLOAD_ADDR_SPACES(double) 5266ecbc7cSTom Stellard#endif 53285d2fb8SJan Vesely#ifdef cl_khr_fp16 54285d2fb8SJan Vesely#pragma OPENCL EXTENSION cl_khr_fp16 : enable 55285d2fb8SJan Vesely VLOAD_ADDR_SPACES(half) 56285d2fb8SJan Vesely#endif 57285d2fb8SJan Vesely 58285d2fb8SJan Vesely/* vload_half are legal even without cl_khr_fp16 */ 59285d2fb8SJan Vesely/* no vload_half for double */ 60285d2fb8SJan Vesely#if __clang_major__ < 6 61285d2fb8SJan Veselyfloat __clc_vload_half_float_helper__constant(const __constant half *); 62285d2fb8SJan Veselyfloat __clc_vload_half_float_helper__global(const __global half *); 63285d2fb8SJan Veselyfloat __clc_vload_half_float_helper__local(const __local half *); 64285d2fb8SJan Veselyfloat __clc_vload_half_float_helper__private(const __private half *); 65285d2fb8SJan Vesely 66285d2fb8SJan Vesely#define VEC_LOAD1(val, AS) val = __clc_vload_half_float_helper##AS (&mem[offset++]); 67285d2fb8SJan Vesely#else 68285d2fb8SJan Vesely#define VEC_LOAD1(val, AS) val = __builtin_load_halff(&mem[offset++]); 69285d2fb8SJan Vesely#endif 70285d2fb8SJan Vesely 71285d2fb8SJan Vesely#define VEC_LOAD2(val, AS) \ 72285d2fb8SJan Vesely VEC_LOAD1(val.lo, AS) \ 73285d2fb8SJan Vesely VEC_LOAD1(val.hi, AS) 74285d2fb8SJan Vesely#define VEC_LOAD3(val, AS) \ 75285d2fb8SJan Vesely VEC_LOAD1(val.s0, AS) \ 76285d2fb8SJan Vesely VEC_LOAD1(val.s1, AS) \ 77285d2fb8SJan Vesely VEC_LOAD1(val.s2, AS) 78285d2fb8SJan Vesely#define VEC_LOAD4(val, AS) \ 79285d2fb8SJan Vesely VEC_LOAD2(val.lo, AS) \ 80285d2fb8SJan Vesely VEC_LOAD2(val.hi, AS) 81285d2fb8SJan Vesely#define VEC_LOAD8(val, AS) \ 82285d2fb8SJan Vesely VEC_LOAD4(val.lo, AS) \ 83285d2fb8SJan Vesely VEC_LOAD4(val.hi, AS) 84285d2fb8SJan Vesely#define VEC_LOAD16(val, AS) \ 85285d2fb8SJan Vesely VEC_LOAD8(val.lo, AS) \ 86285d2fb8SJan Vesely VEC_LOAD8(val.hi, AS) 87285d2fb8SJan Vesely 88*12061c71SJan Vesely#define __FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) \ 89285d2fb8SJan Vesely _CLC_OVERLOAD _CLC_DEF TYPE vload_half##SUFFIX(size_t offset, const AS half *mem) { \ 90285d2fb8SJan Vesely offset *= VEC_SIZE; \ 91285d2fb8SJan Vesely TYPE __tmp; \ 92285d2fb8SJan Vesely VEC_LOAD##VEC_SIZE(__tmp, AS) \ 93285d2fb8SJan Vesely return __tmp; \ 94*12061c71SJan Vesely } \ 95*12061c71SJan Vesely _CLC_OVERLOAD _CLC_DEF TYPE vloada_half##SUFFIX(size_t offset, const AS half *mem) { \ 96*12061c71SJan Vesely offset *= OFFSET_SIZE; \ 97*12061c71SJan Vesely TYPE __tmp; \ 98*12061c71SJan Vesely VEC_LOAD##VEC_SIZE(__tmp, AS) \ 99*12061c71SJan Vesely return __tmp; \ 100285d2fb8SJan Vesely } 101285d2fb8SJan Vesely 102*12061c71SJan Vesely#define FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) 103285d2fb8SJan Vesely 104285d2fb8SJan Vesely#define __CLC_BODY "vload_half.inc" 105285d2fb8SJan Vesely#include <clc/math/gentype.inc> 106285d2fb8SJan Vesely#undef __CLC_BODY 107285d2fb8SJan Vesely#undef FUNC 108285d2fb8SJan Vesely#undef __FUNC 109285d2fb8SJan Vesely#undef VEC_LOAD16 110285d2fb8SJan Vesely#undef VEC_LOAD8 111285d2fb8SJan Vesely#undef VEC_LOAD4 112285d2fb8SJan Vesely#undef VEC_LOAD3 113285d2fb8SJan Vesely#undef VEC_LOAD2 114285d2fb8SJan Vesely#undef VEC_LOAD1 115285d2fb8SJan Vesely#undef VLOAD_TYPES 116285d2fb8SJan Vesely#undef VLOAD_ADDR_SPACES 117285d2fb8SJan Vesely#undef VLOAD_VECTORIZE 118