xref: /llvm-project/libclc/generic/lib/gen_convert.py (revision 0d2ef7af1956b463b87a09500bd87bd4147616d4)
1# OpenCL built-in library: type conversion functions
2#
3# Copyright (c) 2013 Victor Oliveira <victormatheus@gmail.com>
4# Copyright (c) 2013 Jesse Towner <jessetowner@lavabit.com>
5# Copyright (c) 2024 Romaric Jodin <rjodin@chromium.org>
6#
7# Permission is hereby granted, free of charge, to any person obtaining a copy
8# of this software and associated documentation files (the "Software"), to deal
9# in the Software without restriction, including without limitation the rights
10# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11# copies of the Software, and to permit persons to whom the Software is
12# furnished to do so, subject to the following conditions:
13#
14# The above copyright notice and this permission notice shall be included in
15# all copies or substantial portions of the Software.
16#
17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23# THE SOFTWARE.
24
25# This script generates the file convert_type.cl, which contains all of the
26# OpenCL functions in the form:
27#
28# convert_<destTypen><_sat><_roundingMode>(<sourceTypen>)
29
30import argparse
31
32parser = argparse.ArgumentParser()
33parser.add_argument(
34    "--clspv", action="store_true", help="Generate the clspv variant of the code"
35)
36args = parser.parse_args()
37
38clspv = args.clspv
39
40types = [
41    "char",
42    "uchar",
43    "short",
44    "ushort",
45    "int",
46    "uint",
47    "long",
48    "ulong",
49    "half",
50    "float",
51    "double",
52]
53int_types = ["char", "uchar", "short", "ushort", "int", "uint", "long", "ulong"]
54unsigned_types = ["uchar", "ushort", "uint", "ulong"]
55float_types = ["half", "float", "double"]
56int64_types = ["long", "ulong"]
57float64_types = ["double"]
58float16_types = ["half"]
59vector_sizes = ["", "2", "3", "4", "8", "16"]
60half_sizes = [("2", ""), ("4", "2"), ("8", "4"), ("16", "8")]
61
62saturation = ["", "_sat"]
63rounding_modes = ["_rtz", "_rte", "_rtp", "_rtn"]
64
65bool_type = {
66    "char": "char",
67    "uchar": "char",
68    "short": "short",
69    "ushort": "short",
70    "int": "int",
71    "uint": "int",
72    "long": "long",
73    "ulong": "long",
74    "half": "short",
75    "float": "int",
76    "double": "long",
77}
78
79unsigned_type = {
80    "char": "uchar",
81    "uchar": "uchar",
82    "short": "ushort",
83    "ushort": "ushort",
84    "int": "uint",
85    "uint": "uint",
86    "long": "ulong",
87    "ulong": "ulong",
88}
89
90sizeof_type = {
91    "char": 1,
92    "uchar": 1,
93    "short": 2,
94    "ushort": 2,
95    "int": 4,
96    "uint": 4,
97    "long": 8,
98    "ulong": 8,
99    "half": 2,
100    "float": 4,
101    "double": 8,
102}
103
104limit_max = {
105    "char": "CHAR_MAX",
106    "uchar": "UCHAR_MAX",
107    "short": "SHRT_MAX",
108    "ushort": "USHRT_MAX",
109    "int": "INT_MAX",
110    "uint": "UINT_MAX",
111    "long": "LONG_MAX",
112    "ulong": "ULONG_MAX",
113    "half": "0x1.ffcp+15",
114}
115
116limit_min = {
117    "char": "CHAR_MIN",
118    "uchar": "0",
119    "short": "SHRT_MIN",
120    "ushort": "0",
121    "int": "INT_MIN",
122    "uint": "0",
123    "long": "LONG_MIN",
124    "ulong": "0",
125    "half": "-0x1.ffcp+15",
126}
127
128
129def conditional_guard(src, dst):
130    int64_count = 0
131    float64_count = 0
132    float16_count = 0
133    if src in int64_types:
134        int64_count = int64_count + 1
135    elif src in float64_types:
136        float64_count = float64_count + 1
137    elif src in float16_types:
138        float16_count = float16_count + 1
139    if dst in int64_types:
140        int64_count = int64_count + 1
141    elif dst in float64_types:
142        float64_count = float64_count + 1
143    elif dst in float16_types:
144        float16_count = float16_count + 1
145    if float64_count > 0 and float16_count > 0:
146        print("#if defined(cl_khr_fp16) && defined(cl_khr_fp64)")
147        return True
148    elif float64_count > 0:
149        # In embedded profile, if cl_khr_fp64 is supported cles_khr_int64 has to be
150        print("#ifdef cl_khr_fp64")
151        return True
152    elif float16_count > 0:
153        print("#if defined cl_khr_fp16")
154        return True
155    elif int64_count > 0:
156        print("#if defined cles_khr_int64 || !defined(__EMBEDDED_PROFILE__)")
157        return True
158    return False
159
160
161print(
162    """/* !!!! AUTOGENERATED FILE generated by convert_type.py !!!!!
163
164   DON'T CHANGE THIS FILE. MAKE YOUR CHANGES TO convert_type.py AND RUN:
165   $ ./generate-conversion-type-cl.sh
166
167   OpenCL type conversion functions
168
169   Copyright (c) 2013 Victor Oliveira <victormatheus@gmail.com>
170   Copyright (c) 2013 Jesse Towner <jessetowner@lavabit.com>
171
172   Permission is hereby granted, free of charge, to any person obtaining a copy
173   of this software and associated documentation files (the "Software"), to deal
174   in the Software without restriction, including without limitation the rights
175   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
176   copies of the Software, and to permit persons to whom the Software is
177   furnished to do so, subject to the following conditions:
178
179   The above copyright notice and this permission notice shall be included in
180   all copies or substantial portions of the Software.
181
182   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
183   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
184   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
185   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
186   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
187   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
188   THE SOFTWARE.
189*/
190
191#include <clc/clc.h>
192
193#ifdef cl_khr_fp16
194#pragma OPENCL EXTENSION cl_khr_fp16 : enable
195#endif
196
197#ifdef cl_khr_fp64
198#pragma OPENCL EXTENSION cl_khr_fp64 : enable
199
200#if defined(__EMBEDDED_PROFILE__) && !defined(cles_khr_int64)
201#error Embedded profile that supports cl_khr_fp64 also has to support cles_khr_int64
202#endif
203
204#endif
205
206#ifdef cles_khr_int64
207#pragma OPENCL EXTENSION cles_khr_int64 : enable
208#endif
209
210"""
211)
212
213#
214# Default Conversions
215#
216# All conversions are in accordance with the OpenCL specification,
217# which cites the C99 conversion rules.
218#
219# Casting from floating point to integer results in conversions
220# with truncation, so it should be suitable for the default convert
221# functions.
222#
223# Conversions from integer to floating-point, and floating-point to
224# floating-point through casting is done with the default rounding
225# mode. While C99 allows dynamically changing the rounding mode
226# during runtime, it is not a supported feature in OpenCL according
227# to Section 7.1 - Rounding Modes in the OpenCL 1.2 specification.
228#
229# Therefore, we can assume for optimization purposes that the
230# rounding mode is fixed to round-to-nearest-even. Platform target
231# authors should ensure that the rounding-control registers remain
232# in this state, and that this invariant holds.
233#
234# Also note, even though the OpenCL specification isn't entirely
235# clear on this matter, we implement all rounding mode combinations
236# even for integer-to-integer conversions. When such a conversion
237# is used, the rounding mode is ignored.
238#
239
240
241def generate_default_conversion(src, dst, mode):
242    close_conditional = conditional_guard(src, dst)
243
244    for size in vector_sizes:
245        if not size:
246            print(
247                f"""_CLC_DEF _CLC_OVERLOAD {dst} convert_{dst}{mode}({src} x) {{
248  return ({dst})x;
249}}
250"""
251            )
252        else:
253            print(
254                f"""_CLC_DEF _CLC_OVERLOAD {dst}{size} convert_{dst}{size}{mode}({src}{size} x) {{
255  return __builtin_convertvector(x, {dst}{size});
256}}
257"""
258            )
259
260    if close_conditional:
261        print("#endif")
262
263
264# Do not generate default conversion for clspv as they are handled natively
265if not clspv:
266    for src in types:
267        for dst in types:
268            generate_default_conversion(src, dst, "")
269
270for src in int_types:
271    for dst in int_types:
272        for mode in rounding_modes:
273            # Do not generate "_rte" conversion for clspv as they are handled
274            # natively
275            if clspv and mode == "_rte":
276                continue
277            generate_default_conversion(src, dst, mode)
278
279#
280# Saturated Conversions To Integers
281#
282# These functions are dependent on the unsaturated conversion functions
283# generated above, and use clamp, max, min, and select to eliminate
284# branching and vectorize the conversions.
285#
286# Again, as above, we allow all rounding modes for integer-to-integer
287# conversions with saturation.
288#
289
290
291def generate_saturated_conversion(src, dst, size):
292    # Header
293    close_conditional = conditional_guard(src, dst)
294    print(
295        """_CLC_DEF _CLC_OVERLOAD
296{DST}{N} convert_{DST}{N}_sat({SRC}{N} x)
297{{""".format(
298            DST=dst, SRC=src, N=size
299        )
300    )
301
302    # FIXME: This is a work around for lack of select function with
303    # signed third argument when the first two arguments are unsigned types.
304    # We cast to the signed type for sign-extension, then do a bitcast to
305    # the unsigned type.
306    if dst in unsigned_types:
307        bool_prefix = "as_{DST}{N}(convert_{BOOL}{N}".format(
308            DST=dst, BOOL=bool_type[dst], N=size
309        )
310        bool_suffix = ")"
311    else:
312        bool_prefix = "convert_{BOOL}{N}".format(BOOL=bool_type[dst], N=size)
313        bool_suffix = ""
314
315    # Body
316    if src == dst:
317
318        # Conversion between same types
319        print("  return x;")
320
321    elif src in float_types:
322
323        if clspv:
324            # Conversion from float to int
325            print(
326                """  {DST}{N} y = convert_{DST}{N}(x);
327                y = select(y, ({DST}{N}){DST_MIN}, {BP}(x <= ({SRC}{N}){DST_MIN}){BS});
328                y = select(y, ({DST}{N}){DST_MAX}, {BP}(x >= ({SRC}{N}){DST_MAX}){BS});
329                return y;""".format(
330                    SRC=src,
331                    DST=dst,
332                    N=size,
333                    DST_MIN=limit_min[dst],
334                    DST_MAX=limit_max[dst],
335                    BP=bool_prefix,
336                    BS=bool_suffix,
337                )
338            )
339        else:
340            # Conversion from float to int
341            print(
342                """  {DST}{N} y = convert_{DST}{N}(x);
343                y = select(y, ({DST}{N}){DST_MIN}, {BP}(x < ({SRC}{N}){DST_MIN}){BS});
344                y = select(y, ({DST}{N}){DST_MAX}, {BP}(x > ({SRC}{N}){DST_MAX}){BS});
345                return y;""".format(
346                    SRC=src,
347                    DST=dst,
348                    N=size,
349                    DST_MIN=limit_min[dst],
350                    DST_MAX=limit_max[dst],
351                    BP=bool_prefix,
352                    BS=bool_suffix,
353                )
354            )
355
356    else:
357
358        # Integer to integer convesion with sizeof(src) == sizeof(dst)
359        if sizeof_type[src] == sizeof_type[dst]:
360            if src in unsigned_types:
361                print(
362                    "  x = min(x, ({SRC}){DST_MAX});".format(
363                        SRC=src, DST_MAX=limit_max[dst]
364                    )
365                )
366            else:
367                print("  x = max(x, ({SRC})0);".format(SRC=src))
368
369        # Integer to integer conversion where sizeof(src) > sizeof(dst)
370        elif sizeof_type[src] > sizeof_type[dst]:
371            if src in unsigned_types:
372                print(
373                    "  x = min(x, ({SRC}){DST_MAX});".format(
374                        SRC=src, DST_MAX=limit_max[dst]
375                    )
376                )
377            else:
378                print(
379                    "  x = clamp(x, ({SRC}){DST_MIN}, ({SRC}){DST_MAX});".format(
380                        SRC=src, DST_MIN=limit_min[dst], DST_MAX=limit_max[dst]
381                    )
382                )
383
384        # Integer to integer conversion where sizeof(src) < sizeof(dst)
385        elif src not in unsigned_types and dst in unsigned_types:
386            print("  x = max(x, ({SRC})0);".format(SRC=src))
387
388        print("  return convert_{DST}{N}(x);".format(DST=dst, N=size))
389
390    # Footer
391    print("}")
392    if close_conditional:
393        print("#endif")
394
395
396for src in types:
397    for dst in int_types:
398        for size in vector_sizes:
399            generate_saturated_conversion(src, dst, size)
400
401
402def generate_saturated_conversion_with_rounding(src, dst, size, mode):
403    # Header
404    close_conditional = conditional_guard(src, dst)
405
406    # Body
407    print(
408        """_CLC_DEF _CLC_OVERLOAD
409{DST}{N} convert_{DST}{N}_sat{M}({SRC}{N} x)
410{{
411  return convert_{DST}{N}_sat(x);
412}}
413""".format(
414            DST=dst, SRC=src, N=size, M=mode
415        )
416    )
417
418    # Footer
419    if close_conditional:
420        print("#endif")
421
422
423for src in int_types:
424    for dst in int_types:
425        for size in vector_sizes:
426            for mode in rounding_modes:
427                generate_saturated_conversion_with_rounding(src, dst, size, mode)
428
429#
430# Conversions To/From Floating-Point With Rounding
431#
432# Note that we assume as above that casts from floating-point to
433# integer are done with truncation, and that the default rounding
434# mode is fixed to round-to-nearest-even, as per C99 and OpenCL
435# rounding rules.
436#
437# These functions rely on the use of abs, ceil, fabs, floor,
438# nextafter, sign, rint and the above generated conversion functions.
439#
440# Only conversions to integers can have saturation.
441#
442
443
444def generate_float_conversion(src, dst, size, mode, sat):
445    # Header
446    close_conditional = conditional_guard(src, dst)
447    print(
448        """_CLC_DEF _CLC_OVERLOAD
449{DST}{N} convert_{DST}{N}{S}{M}({SRC}{N} x)
450{{""".format(
451            SRC=src, DST=dst, N=size, M=mode, S=sat
452        )
453    )
454
455    # Perform conversion
456    if dst in int_types:
457        if mode == "_rte":
458            print("  x = rint(x);")
459        elif mode == "_rtp":
460            print("  x = ceil(x);")
461        elif mode == "_rtn":
462            print("  x = floor(x);")
463        print("  return convert_{DST}{N}{S}(x);".format(DST=dst, N=size, S=sat))
464    elif mode == "_rte":
465        print("  return convert_{DST}{N}(x);".format(DST=dst, N=size))
466    else:
467        print("  {DST}{N} r = convert_{DST}{N}(x);".format(DST=dst, N=size))
468        if clspv:
469            print("  {SRC}{N} y = convert_{SRC}{N}_sat(r);".format(SRC=src, N=size))
470        else:
471            print("  {SRC}{N} y = convert_{SRC}{N}(r);".format(SRC=src, N=size))
472        if mode == "_rtz":
473            if src in int_types:
474                print(
475                    "  {USRC}{N} abs_x = abs(x);".format(
476                        USRC=unsigned_type[src], N=size
477                    )
478                )
479                print(
480                    "  {USRC}{N} abs_y = abs(y);".format(
481                        USRC=unsigned_type[src], N=size
482                    )
483                )
484            else:
485                print("  {SRC}{N} abs_x = fabs(x);".format(SRC=src, N=size))
486                print("  {SRC}{N} abs_y = fabs(y);".format(SRC=src, N=size))
487            if clspv:
488                print(
489                    "  {BOOL}{N} c = convert_{BOOL}{N}(abs_y > abs_x);".format(
490                        BOOL=bool_type[dst], N=size
491                    )
492                )
493                if sizeof_type[src] >= 4 and src in int_types:
494                    print(
495                        "  c = c || convert_{BOOL}{N}(({SRC}{N}){SRC_MAX} == x);".format(
496                            BOOL=bool_type[dst], N=size, SRC=src, SRC_MAX=limit_max[src]
497                        )
498                    )
499                print(
500                    "  {DST}{N} sel = select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), c);".format(
501                        DST=dst, N=size, BOOL=bool_type[dst], SRC=src
502                    )
503                )
504            else:
505                print(
506                    "  {DST}{N} sel = select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));".format(
507                        DST=dst, N=size, BOOL=bool_type[dst]
508                    )
509                )
510            if dst == "half" and src in int_types and sizeof_type[src] >= 2:
511                dst_max = limit_max[dst]
512                # short is 16 bits signed, so the maximum value rounded to zero is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff == 32767)
513                if src == "short":
514                    dst_max = "0x1.ffcp+14"
515                print(
516                    "  return clamp(sel, ({DST}{N}){DST_MIN}, ({DST}{N}){DST_MAX});".format(
517                        DST=dst, N=size, DST_MIN=limit_min[dst], DST_MAX=dst_max
518                    )
519                )
520            else:
521                print("  return sel;")
522        if mode == "_rtp":
523            print(
524                "  {DST}{N} sel = select(r, nextafter(r, ({DST}{N})INFINITY), convert_{BOOL}{N}(y < x));".format(
525                    DST=dst, N=size, BOOL=bool_type[dst]
526                )
527            )
528            if dst == "half" and src in int_types and sizeof_type[src] >= 2:
529                print(
530                    "  return max(sel, ({DST}{N}){DST_MIN});".format(
531                        DST=dst, N=size, DST_MIN=limit_min[dst]
532                    )
533                )
534            else:
535                print("  return sel;")
536        if mode == "_rtn":
537            if clspv:
538                print(
539                    "  {BOOL}{N} c = convert_{BOOL}{N}(y > x);".format(
540                        BOOL=bool_type[dst], N=size
541                    )
542                )
543                if sizeof_type[src] >= 4 and src in int_types:
544                    print(
545                        "  c = c || convert_{BOOL}{N}(({SRC}{N}){SRC_MAX} == x);".format(
546                            BOOL=bool_type[dst], N=size, SRC=src, SRC_MAX=limit_max[src]
547                        )
548                    )
549                print(
550                    "  {DST}{N} sel = select(r, nextafter(r, ({DST}{N})-INFINITY), c);".format(
551                        DST=dst, N=size, BOOL=bool_type[dst], SRC=src
552                    )
553                )
554            else:
555                print(
556                    "  {DST}{N} sel = select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));".format(
557                        DST=dst, N=size, BOOL=bool_type[dst]
558                    )
559                )
560            if dst == "half" and src in int_types and sizeof_type[src] >= 2:
561                dst_max = limit_max[dst]
562                # short is 16 bits signed, so the maximum value rounded to negative infinity is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff == 32767)
563                if src == "short":
564                    dst_max = "0x1.ffcp+14"
565                print(
566                    "  return min(sel, ({DST}{N}){DST_MAX});".format(
567                        DST=dst, N=size, DST_MAX=dst_max
568                    )
569                )
570            else:
571                print("  return sel;")
572
573    # Footer
574    print("}")
575    if close_conditional:
576        print("#endif")
577
578
579for src in float_types:
580    for dst in int_types:
581        for size in vector_sizes:
582            for mode in rounding_modes:
583                for sat in saturation:
584                    generate_float_conversion(src, dst, size, mode, sat)
585
586
587for src in types:
588    for dst in float_types:
589        for size in vector_sizes:
590            for mode in rounding_modes:
591                # Do not generate "_rte" conversion for clspv as they are
592                # handled natively
593                if clspv and mode == "_rte":
594                    continue
595                generate_float_conversion(src, dst, size, mode, "")
596