1# OpenCL built-in library: type conversion functions 2# 3# Copyright (c) 2013 Victor Oliveira <victormatheus@gmail.com> 4# Copyright (c) 2013 Jesse Towner <jessetowner@lavabit.com> 5# Copyright (c) 2024 Romaric Jodin <rjodin@chromium.org> 6# 7# Permission is hereby granted, free of charge, to any person obtaining a copy 8# of this software and associated documentation files (the "Software"), to deal 9# in the Software without restriction, including without limitation the rights 10# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11# copies of the Software, and to permit persons to whom the Software is 12# furnished to do so, subject to the following conditions: 13# 14# The above copyright notice and this permission notice shall be included in 15# all copies or substantial portions of the Software. 16# 17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23# THE SOFTWARE. 24 25# This script generates the file convert_type.cl, which contains all of the 26# OpenCL functions in the form: 27# 28# convert_<destTypen><_sat><_roundingMode>(<sourceTypen>) 29 30import argparse 31 32parser = argparse.ArgumentParser() 33parser.add_argument( 34 "--clspv", action="store_true", help="Generate the clspv variant of the code" 35) 36args = parser.parse_args() 37 38clspv = args.clspv 39 40types = [ 41 "char", 42 "uchar", 43 "short", 44 "ushort", 45 "int", 46 "uint", 47 "long", 48 "ulong", 49 "half", 50 "float", 51 "double", 52] 53int_types = ["char", "uchar", "short", "ushort", "int", "uint", "long", "ulong"] 54unsigned_types = ["uchar", "ushort", "uint", "ulong"] 55float_types = ["half", "float", "double"] 56int64_types = ["long", "ulong"] 57float64_types = ["double"] 58float16_types = ["half"] 59vector_sizes = ["", "2", "3", "4", "8", "16"] 60half_sizes = [("2", ""), ("4", "2"), ("8", "4"), ("16", "8")] 61 62saturation = ["", "_sat"] 63rounding_modes = ["_rtz", "_rte", "_rtp", "_rtn"] 64 65bool_type = { 66 "char": "char", 67 "uchar": "char", 68 "short": "short", 69 "ushort": "short", 70 "int": "int", 71 "uint": "int", 72 "long": "long", 73 "ulong": "long", 74 "half": "short", 75 "float": "int", 76 "double": "long", 77} 78 79unsigned_type = { 80 "char": "uchar", 81 "uchar": "uchar", 82 "short": "ushort", 83 "ushort": "ushort", 84 "int": "uint", 85 "uint": "uint", 86 "long": "ulong", 87 "ulong": "ulong", 88} 89 90sizeof_type = { 91 "char": 1, 92 "uchar": 1, 93 "short": 2, 94 "ushort": 2, 95 "int": 4, 96 "uint": 4, 97 "long": 8, 98 "ulong": 8, 99 "half": 2, 100 "float": 4, 101 "double": 8, 102} 103 104limit_max = { 105 "char": "CHAR_MAX", 106 "uchar": "UCHAR_MAX", 107 "short": "SHRT_MAX", 108 "ushort": "USHRT_MAX", 109 "int": "INT_MAX", 110 "uint": "UINT_MAX", 111 "long": "LONG_MAX", 112 "ulong": "ULONG_MAX", 113 "half": "0x1.ffcp+15", 114} 115 116limit_min = { 117 "char": "CHAR_MIN", 118 "uchar": "0", 119 "short": "SHRT_MIN", 120 "ushort": "0", 121 "int": "INT_MIN", 122 "uint": "0", 123 "long": "LONG_MIN", 124 "ulong": "0", 125 "half": "-0x1.ffcp+15", 126} 127 128 129def conditional_guard(src, dst): 130 int64_count = 0 131 float64_count = 0 132 float16_count = 0 133 if src in int64_types: 134 int64_count = int64_count + 1 135 elif src in float64_types: 136 float64_count = float64_count + 1 137 elif src in float16_types: 138 float16_count = float16_count + 1 139 if dst in int64_types: 140 int64_count = int64_count + 1 141 elif dst in float64_types: 142 float64_count = float64_count + 1 143 elif dst in float16_types: 144 float16_count = float16_count + 1 145 if float64_count > 0 and float16_count > 0: 146 print("#if defined(cl_khr_fp16) && defined(cl_khr_fp64)") 147 return True 148 elif float64_count > 0: 149 # In embedded profile, if cl_khr_fp64 is supported cles_khr_int64 has to be 150 print("#ifdef cl_khr_fp64") 151 return True 152 elif float16_count > 0: 153 print("#if defined cl_khr_fp16") 154 return True 155 elif int64_count > 0: 156 print("#if defined cles_khr_int64 || !defined(__EMBEDDED_PROFILE__)") 157 return True 158 return False 159 160 161print( 162 """/* !!!! AUTOGENERATED FILE generated by convert_type.py !!!!! 163 164 DON'T CHANGE THIS FILE. MAKE YOUR CHANGES TO convert_type.py AND RUN: 165 $ ./generate-conversion-type-cl.sh 166 167 OpenCL type conversion functions 168 169 Copyright (c) 2013 Victor Oliveira <victormatheus@gmail.com> 170 Copyright (c) 2013 Jesse Towner <jessetowner@lavabit.com> 171 172 Permission is hereby granted, free of charge, to any person obtaining a copy 173 of this software and associated documentation files (the "Software"), to deal 174 in the Software without restriction, including without limitation the rights 175 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 176 copies of the Software, and to permit persons to whom the Software is 177 furnished to do so, subject to the following conditions: 178 179 The above copyright notice and this permission notice shall be included in 180 all copies or substantial portions of the Software. 181 182 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 183 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 184 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 185 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 186 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 187 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 188 THE SOFTWARE. 189*/ 190 191#include <clc/clc.h> 192 193#ifdef cl_khr_fp16 194#pragma OPENCL EXTENSION cl_khr_fp16 : enable 195#endif 196 197#ifdef cl_khr_fp64 198#pragma OPENCL EXTENSION cl_khr_fp64 : enable 199 200#if defined(__EMBEDDED_PROFILE__) && !defined(cles_khr_int64) 201#error Embedded profile that supports cl_khr_fp64 also has to support cles_khr_int64 202#endif 203 204#endif 205 206#ifdef cles_khr_int64 207#pragma OPENCL EXTENSION cles_khr_int64 : enable 208#endif 209 210""" 211) 212 213# 214# Default Conversions 215# 216# All conversions are in accordance with the OpenCL specification, 217# which cites the C99 conversion rules. 218# 219# Casting from floating point to integer results in conversions 220# with truncation, so it should be suitable for the default convert 221# functions. 222# 223# Conversions from integer to floating-point, and floating-point to 224# floating-point through casting is done with the default rounding 225# mode. While C99 allows dynamically changing the rounding mode 226# during runtime, it is not a supported feature in OpenCL according 227# to Section 7.1 - Rounding Modes in the OpenCL 1.2 specification. 228# 229# Therefore, we can assume for optimization purposes that the 230# rounding mode is fixed to round-to-nearest-even. Platform target 231# authors should ensure that the rounding-control registers remain 232# in this state, and that this invariant holds. 233# 234# Also note, even though the OpenCL specification isn't entirely 235# clear on this matter, we implement all rounding mode combinations 236# even for integer-to-integer conversions. When such a conversion 237# is used, the rounding mode is ignored. 238# 239 240 241def generate_default_conversion(src, dst, mode): 242 close_conditional = conditional_guard(src, dst) 243 244 for size in vector_sizes: 245 if not size: 246 print( 247 f"""_CLC_DEF _CLC_OVERLOAD {dst} convert_{dst}{mode}({src} x) {{ 248 return ({dst})x; 249}} 250""" 251 ) 252 else: 253 print( 254 f"""_CLC_DEF _CLC_OVERLOAD {dst}{size} convert_{dst}{size}{mode}({src}{size} x) {{ 255 return __builtin_convertvector(x, {dst}{size}); 256}} 257""" 258 ) 259 260 if close_conditional: 261 print("#endif") 262 263 264# Do not generate default conversion for clspv as they are handled natively 265if not clspv: 266 for src in types: 267 for dst in types: 268 generate_default_conversion(src, dst, "") 269 270for src in int_types: 271 for dst in int_types: 272 for mode in rounding_modes: 273 # Do not generate "_rte" conversion for clspv as they are handled 274 # natively 275 if clspv and mode == "_rte": 276 continue 277 generate_default_conversion(src, dst, mode) 278 279# 280# Saturated Conversions To Integers 281# 282# These functions are dependent on the unsaturated conversion functions 283# generated above, and use clamp, max, min, and select to eliminate 284# branching and vectorize the conversions. 285# 286# Again, as above, we allow all rounding modes for integer-to-integer 287# conversions with saturation. 288# 289 290 291def generate_saturated_conversion(src, dst, size): 292 # Header 293 close_conditional = conditional_guard(src, dst) 294 print( 295 """_CLC_DEF _CLC_OVERLOAD 296{DST}{N} convert_{DST}{N}_sat({SRC}{N} x) 297{{""".format( 298 DST=dst, SRC=src, N=size 299 ) 300 ) 301 302 # FIXME: This is a work around for lack of select function with 303 # signed third argument when the first two arguments are unsigned types. 304 # We cast to the signed type for sign-extension, then do a bitcast to 305 # the unsigned type. 306 if dst in unsigned_types: 307 bool_prefix = "as_{DST}{N}(convert_{BOOL}{N}".format( 308 DST=dst, BOOL=bool_type[dst], N=size 309 ) 310 bool_suffix = ")" 311 else: 312 bool_prefix = "convert_{BOOL}{N}".format(BOOL=bool_type[dst], N=size) 313 bool_suffix = "" 314 315 # Body 316 if src == dst: 317 318 # Conversion between same types 319 print(" return x;") 320 321 elif src in float_types: 322 323 if clspv: 324 # Conversion from float to int 325 print( 326 """ {DST}{N} y = convert_{DST}{N}(x); 327 y = select(y, ({DST}{N}){DST_MIN}, {BP}(x <= ({SRC}{N}){DST_MIN}){BS}); 328 y = select(y, ({DST}{N}){DST_MAX}, {BP}(x >= ({SRC}{N}){DST_MAX}){BS}); 329 return y;""".format( 330 SRC=src, 331 DST=dst, 332 N=size, 333 DST_MIN=limit_min[dst], 334 DST_MAX=limit_max[dst], 335 BP=bool_prefix, 336 BS=bool_suffix, 337 ) 338 ) 339 else: 340 # Conversion from float to int 341 print( 342 """ {DST}{N} y = convert_{DST}{N}(x); 343 y = select(y, ({DST}{N}){DST_MIN}, {BP}(x < ({SRC}{N}){DST_MIN}){BS}); 344 y = select(y, ({DST}{N}){DST_MAX}, {BP}(x > ({SRC}{N}){DST_MAX}){BS}); 345 return y;""".format( 346 SRC=src, 347 DST=dst, 348 N=size, 349 DST_MIN=limit_min[dst], 350 DST_MAX=limit_max[dst], 351 BP=bool_prefix, 352 BS=bool_suffix, 353 ) 354 ) 355 356 else: 357 358 # Integer to integer convesion with sizeof(src) == sizeof(dst) 359 if sizeof_type[src] == sizeof_type[dst]: 360 if src in unsigned_types: 361 print( 362 " x = min(x, ({SRC}){DST_MAX});".format( 363 SRC=src, DST_MAX=limit_max[dst] 364 ) 365 ) 366 else: 367 print(" x = max(x, ({SRC})0);".format(SRC=src)) 368 369 # Integer to integer conversion where sizeof(src) > sizeof(dst) 370 elif sizeof_type[src] > sizeof_type[dst]: 371 if src in unsigned_types: 372 print( 373 " x = min(x, ({SRC}){DST_MAX});".format( 374 SRC=src, DST_MAX=limit_max[dst] 375 ) 376 ) 377 else: 378 print( 379 " x = clamp(x, ({SRC}){DST_MIN}, ({SRC}){DST_MAX});".format( 380 SRC=src, DST_MIN=limit_min[dst], DST_MAX=limit_max[dst] 381 ) 382 ) 383 384 # Integer to integer conversion where sizeof(src) < sizeof(dst) 385 elif src not in unsigned_types and dst in unsigned_types: 386 print(" x = max(x, ({SRC})0);".format(SRC=src)) 387 388 print(" return convert_{DST}{N}(x);".format(DST=dst, N=size)) 389 390 # Footer 391 print("}") 392 if close_conditional: 393 print("#endif") 394 395 396for src in types: 397 for dst in int_types: 398 for size in vector_sizes: 399 generate_saturated_conversion(src, dst, size) 400 401 402def generate_saturated_conversion_with_rounding(src, dst, size, mode): 403 # Header 404 close_conditional = conditional_guard(src, dst) 405 406 # Body 407 print( 408 """_CLC_DEF _CLC_OVERLOAD 409{DST}{N} convert_{DST}{N}_sat{M}({SRC}{N} x) 410{{ 411 return convert_{DST}{N}_sat(x); 412}} 413""".format( 414 DST=dst, SRC=src, N=size, M=mode 415 ) 416 ) 417 418 # Footer 419 if close_conditional: 420 print("#endif") 421 422 423for src in int_types: 424 for dst in int_types: 425 for size in vector_sizes: 426 for mode in rounding_modes: 427 generate_saturated_conversion_with_rounding(src, dst, size, mode) 428 429# 430# Conversions To/From Floating-Point With Rounding 431# 432# Note that we assume as above that casts from floating-point to 433# integer are done with truncation, and that the default rounding 434# mode is fixed to round-to-nearest-even, as per C99 and OpenCL 435# rounding rules. 436# 437# These functions rely on the use of abs, ceil, fabs, floor, 438# nextafter, sign, rint and the above generated conversion functions. 439# 440# Only conversions to integers can have saturation. 441# 442 443 444def generate_float_conversion(src, dst, size, mode, sat): 445 # Header 446 close_conditional = conditional_guard(src, dst) 447 print( 448 """_CLC_DEF _CLC_OVERLOAD 449{DST}{N} convert_{DST}{N}{S}{M}({SRC}{N} x) 450{{""".format( 451 SRC=src, DST=dst, N=size, M=mode, S=sat 452 ) 453 ) 454 455 # Perform conversion 456 if dst in int_types: 457 if mode == "_rte": 458 print(" x = rint(x);") 459 elif mode == "_rtp": 460 print(" x = ceil(x);") 461 elif mode == "_rtn": 462 print(" x = floor(x);") 463 print(" return convert_{DST}{N}{S}(x);".format(DST=dst, N=size, S=sat)) 464 elif mode == "_rte": 465 print(" return convert_{DST}{N}(x);".format(DST=dst, N=size)) 466 else: 467 print(" {DST}{N} r = convert_{DST}{N}(x);".format(DST=dst, N=size)) 468 if clspv: 469 print(" {SRC}{N} y = convert_{SRC}{N}_sat(r);".format(SRC=src, N=size)) 470 else: 471 print(" {SRC}{N} y = convert_{SRC}{N}(r);".format(SRC=src, N=size)) 472 if mode == "_rtz": 473 if src in int_types: 474 print( 475 " {USRC}{N} abs_x = abs(x);".format( 476 USRC=unsigned_type[src], N=size 477 ) 478 ) 479 print( 480 " {USRC}{N} abs_y = abs(y);".format( 481 USRC=unsigned_type[src], N=size 482 ) 483 ) 484 else: 485 print(" {SRC}{N} abs_x = fabs(x);".format(SRC=src, N=size)) 486 print(" {SRC}{N} abs_y = fabs(y);".format(SRC=src, N=size)) 487 if clspv: 488 print( 489 " {BOOL}{N} c = convert_{BOOL}{N}(abs_y > abs_x);".format( 490 BOOL=bool_type[dst], N=size 491 ) 492 ) 493 if sizeof_type[src] >= 4 and src in int_types: 494 print( 495 " c = c || convert_{BOOL}{N}(({SRC}{N}){SRC_MAX} == x);".format( 496 BOOL=bool_type[dst], N=size, SRC=src, SRC_MAX=limit_max[src] 497 ) 498 ) 499 print( 500 " {DST}{N} sel = select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), c);".format( 501 DST=dst, N=size, BOOL=bool_type[dst], SRC=src 502 ) 503 ) 504 else: 505 print( 506 " {DST}{N} sel = select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));".format( 507 DST=dst, N=size, BOOL=bool_type[dst] 508 ) 509 ) 510 if dst == "half" and src in int_types and sizeof_type[src] >= 2: 511 dst_max = limit_max[dst] 512 # short is 16 bits signed, so the maximum value rounded to zero is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff == 32767) 513 if src == "short": 514 dst_max = "0x1.ffcp+14" 515 print( 516 " return clamp(sel, ({DST}{N}){DST_MIN}, ({DST}{N}){DST_MAX});".format( 517 DST=dst, N=size, DST_MIN=limit_min[dst], DST_MAX=dst_max 518 ) 519 ) 520 else: 521 print(" return sel;") 522 if mode == "_rtp": 523 print( 524 " {DST}{N} sel = select(r, nextafter(r, ({DST}{N})INFINITY), convert_{BOOL}{N}(y < x));".format( 525 DST=dst, N=size, BOOL=bool_type[dst] 526 ) 527 ) 528 if dst == "half" and src in int_types and sizeof_type[src] >= 2: 529 print( 530 " return max(sel, ({DST}{N}){DST_MIN});".format( 531 DST=dst, N=size, DST_MIN=limit_min[dst] 532 ) 533 ) 534 else: 535 print(" return sel;") 536 if mode == "_rtn": 537 if clspv: 538 print( 539 " {BOOL}{N} c = convert_{BOOL}{N}(y > x);".format( 540 BOOL=bool_type[dst], N=size 541 ) 542 ) 543 if sizeof_type[src] >= 4 and src in int_types: 544 print( 545 " c = c || convert_{BOOL}{N}(({SRC}{N}){SRC_MAX} == x);".format( 546 BOOL=bool_type[dst], N=size, SRC=src, SRC_MAX=limit_max[src] 547 ) 548 ) 549 print( 550 " {DST}{N} sel = select(r, nextafter(r, ({DST}{N})-INFINITY), c);".format( 551 DST=dst, N=size, BOOL=bool_type[dst], SRC=src 552 ) 553 ) 554 else: 555 print( 556 " {DST}{N} sel = select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));".format( 557 DST=dst, N=size, BOOL=bool_type[dst] 558 ) 559 ) 560 if dst == "half" and src in int_types and sizeof_type[src] >= 2: 561 dst_max = limit_max[dst] 562 # short is 16 bits signed, so the maximum value rounded to negative infinity is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff == 32767) 563 if src == "short": 564 dst_max = "0x1.ffcp+14" 565 print( 566 " return min(sel, ({DST}{N}){DST_MAX});".format( 567 DST=dst, N=size, DST_MAX=dst_max 568 ) 569 ) 570 else: 571 print(" return sel;") 572 573 # Footer 574 print("}") 575 if close_conditional: 576 print("#endif") 577 578 579for src in float_types: 580 for dst in int_types: 581 for size in vector_sizes: 582 for mode in rounding_modes: 583 for sat in saturation: 584 generate_float_conversion(src, dst, size, mode, sat) 585 586 587for src in types: 588 for dst in float_types: 589 for size in vector_sizes: 590 for mode in rounding_modes: 591 # Do not generate "_rte" conversion for clspv as they are 592 # handled natively 593 if clspv and mode == "_rte": 594 continue 595 generate_float_conversion(src, dst, size, mode, "") 596