xref: /netbsd-src/external/apache2/llvm/dist/clang/lib/Headers/arm_acle.h (revision 8ecbf5f02b752fcb7debe1a8fab1dc82602bc760)
1 /*===---- arm_acle.h - ARM Non-Neon intrinsics -----------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 #ifndef __ARM_ACLE_H
11 #define __ARM_ACLE_H
12 
13 #ifndef __ARM_ACLE
14 #error "ACLE intrinsics support not enabled."
15 #endif
16 
17 #include <stdint.h>
18 
19 #if defined(__cplusplus)
20 extern "C" {
21 #endif
22 
23 /* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
24 /* 8.3 Memory barriers */
25 #if !defined(_MSC_VER)
26 #define __dmb(i) __builtin_arm_dmb(i)
27 #define __dsb(i) __builtin_arm_dsb(i)
28 #define __isb(i) __builtin_arm_isb(i)
29 #endif
30 
31 /* 8.4 Hints */
32 
33 #if !defined(_MSC_VER)
34 static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) {
35   __builtin_arm_wfi();
36 }
37 
38 static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfe(void) {
39   __builtin_arm_wfe();
40 }
41 
42 static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sev(void) {
43   __builtin_arm_sev();
44 }
45 
46 static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sevl(void) {
47   __builtin_arm_sevl();
48 }
49 
50 static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(void) {
51   __builtin_arm_yield();
52 }
53 #endif
54 
55 #if __ARM_32BIT_STATE
56 #define __dbg(t) __builtin_arm_dbg(t)
57 #endif
58 
59 /* 8.5 Swap */
60 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
61 __swp(uint32_t __x, volatile uint32_t *__p) {
62   uint32_t v;
63   do
64     v = __builtin_arm_ldrex(__p);
65   while (__builtin_arm_strex(__x, __p));
66   return v;
67 }
68 
69 /* 8.6 Memory prefetch intrinsics */
70 /* 8.6.1 Data prefetch */
71 #define __pld(addr) __pldx(0, 0, 0, addr)
72 
73 #if __ARM_32BIT_STATE
74 #define __pldx(access_kind, cache_level, retention_policy, addr) \
75   __builtin_arm_prefetch(addr, access_kind, 1)
76 #else
77 #define __pldx(access_kind, cache_level, retention_policy, addr) \
78   __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
79 #endif
80 
81 /* 8.6.2 Instruction prefetch */
82 #define __pli(addr) __plix(0, 0, addr)
83 
84 #if __ARM_32BIT_STATE
85 #define __plix(cache_level, retention_policy, addr) \
86   __builtin_arm_prefetch(addr, 0, 0)
87 #else
88 #define __plix(cache_level, retention_policy, addr) \
89   __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0)
90 #endif
91 
92 /* 8.7 NOP */
93 static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {
94   __builtin_arm_nop();
95 }
96 
97 /* 9 DATA-PROCESSING INTRINSICS */
98 /* 9.2 Miscellaneous data-processing intrinsics */
99 /* ROR */
100 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
101 __ror(uint32_t __x, uint32_t __y) {
102   __y %= 32;
103   if (__y == 0)
104     return __x;
105   return (__x >> __y) | (__x << (32 - __y));
106 }
107 
108 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
109 __rorll(uint64_t __x, uint32_t __y) {
110   __y %= 64;
111   if (__y == 0)
112     return __x;
113   return (__x >> __y) | (__x << (64 - __y));
114 }
115 
116 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
117 __rorl(unsigned long __x, uint32_t __y) {
118 #if __SIZEOF_LONG__ == 4
119   return __ror(__x, __y);
120 #else
121   return __rorll(__x, __y);
122 #endif
123 }
124 
125 
126 /* CLZ */
127 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
128 __clz(uint32_t __t) {
129   return __builtin_clz(__t);
130 }
131 
132 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
133 __clzl(unsigned long __t) {
134   return __builtin_clzl(__t);
135 }
136 
137 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
138 __clzll(uint64_t __t) {
139   return __builtin_clzll(__t);
140 }
141 
142 /* CLS */
143 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
144 __cls(uint32_t __t) {
145   return __builtin_arm_cls(__t);
146 }
147 
148 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
149 __clsl(unsigned long __t) {
150 #if __SIZEOF_LONG__ == 4
151   return __builtin_arm_cls(__t);
152 #else
153   return __builtin_arm_cls64(__t);
154 #endif
155 }
156 
157 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
158 __clsll(uint64_t __t) {
159   return __builtin_arm_cls64(__t);
160 }
161 
162 /* REV */
163 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
164 __rev(uint32_t __t) {
165   return __builtin_bswap32(__t);
166 }
167 
168 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
169 __revl(unsigned long __t) {
170 #if __SIZEOF_LONG__ == 4
171   return __builtin_bswap32(__t);
172 #else
173   return __builtin_bswap64(__t);
174 #endif
175 }
176 
177 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
178 __revll(uint64_t __t) {
179   return __builtin_bswap64(__t);
180 }
181 
182 /* REV16 */
183 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
184 __rev16(uint32_t __t) {
185   return __ror(__rev(__t), 16);
186 }
187 
188 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
189 __rev16ll(uint64_t __t) {
190   return (((uint64_t)__rev16(__t >> 32)) << 32) | __rev16(__t);
191 }
192 
193 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
194 __rev16l(unsigned long __t) {
195 #if __SIZEOF_LONG__ == 4
196     return __rev16(__t);
197 #else
198     return __rev16ll(__t);
199 #endif
200 }
201 
202 /* REVSH */
203 static __inline__ int16_t __attribute__((__always_inline__, __nodebug__))
204 __revsh(int16_t __t) {
205   return __builtin_bswap16(__t);
206 }
207 
208 /* RBIT */
209 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
210 __rbit(uint32_t __t) {
211   return __builtin_arm_rbit(__t);
212 }
213 
214 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
215 __rbitll(uint64_t __t) {
216 #if __ARM_32BIT_STATE
217   return (((uint64_t)__builtin_arm_rbit(__t)) << 32) |
218          __builtin_arm_rbit(__t >> 32);
219 #else
220   return __builtin_arm_rbit64(__t);
221 #endif
222 }
223 
224 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
225 __rbitl(unsigned long __t) {
226 #if __SIZEOF_LONG__ == 4
227   return __rbit(__t);
228 #else
229   return __rbitll(__t);
230 #endif
231 }
232 
233 /*
234  * 9.3 16-bit multiplications
235  */
236 #if __ARM_FEATURE_DSP
237 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
238 __smulbb(int32_t __a, int32_t __b) {
239   return __builtin_arm_smulbb(__a, __b);
240 }
241 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
242 __smulbt(int32_t __a, int32_t __b) {
243   return __builtin_arm_smulbt(__a, __b);
244 }
245 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
246 __smultb(int32_t __a, int32_t __b) {
247   return __builtin_arm_smultb(__a, __b);
248 }
249 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
250 __smultt(int32_t __a, int32_t __b) {
251   return __builtin_arm_smultt(__a, __b);
252 }
253 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
254 __smulwb(int32_t __a, int32_t __b) {
255   return __builtin_arm_smulwb(__a, __b);
256 }
257 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
258 __smulwt(int32_t __a, int32_t __b) {
259   return __builtin_arm_smulwt(__a, __b);
260 }
261 #endif
262 
263 /*
264  * 9.4 Saturating intrinsics
265  *
266  * FIXME: Change guard to their corrosponding __ARM_FEATURE flag when Q flag
267  * intrinsics are implemented and the flag is enabled.
268  */
269 /* 9.4.1 Width-specified saturation intrinsics */
270 #if __ARM_FEATURE_SAT
271 #define __ssat(x, y) __builtin_arm_ssat(x, y)
272 #define __usat(x, y) __builtin_arm_usat(x, y)
273 #endif
274 
275 /* 9.4.2 Saturating addition and subtraction intrinsics */
276 #if __ARM_FEATURE_DSP
277 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
278 __qadd(int32_t __t, int32_t __v) {
279   return __builtin_arm_qadd(__t, __v);
280 }
281 
282 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
283 __qsub(int32_t __t, int32_t __v) {
284   return __builtin_arm_qsub(__t, __v);
285 }
286 
287 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
288 __qdbl(int32_t __t) {
289   return __builtin_arm_qadd(__t, __t);
290 }
291 #endif
292 
293 /* 9.4.3 Accumultating multiplications */
294 #if __ARM_FEATURE_DSP
295 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
296 __smlabb(int32_t __a, int32_t __b, int32_t __c) {
297   return __builtin_arm_smlabb(__a, __b, __c);
298 }
299 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
300 __smlabt(int32_t __a, int32_t __b, int32_t __c) {
301   return __builtin_arm_smlabt(__a, __b, __c);
302 }
303 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
304 __smlatb(int32_t __a, int32_t __b, int32_t __c) {
305   return __builtin_arm_smlatb(__a, __b, __c);
306 }
307 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
308 __smlatt(int32_t __a, int32_t __b, int32_t __c) {
309   return __builtin_arm_smlatt(__a, __b, __c);
310 }
311 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
312 __smlawb(int32_t __a, int32_t __b, int32_t __c) {
313   return __builtin_arm_smlawb(__a, __b, __c);
314 }
315 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
316 __smlawt(int32_t __a, int32_t __b, int32_t __c) {
317   return __builtin_arm_smlawt(__a, __b, __c);
318 }
319 #endif
320 
321 
322 /* 9.5.4 Parallel 16-bit saturation */
323 #if __ARM_FEATURE_SIMD32
324 #define __ssat16(x, y) __builtin_arm_ssat16(x, y)
325 #define __usat16(x, y) __builtin_arm_usat16(x, y)
326 #endif
327 
328 /* 9.5.5 Packing and unpacking */
329 #if __ARM_FEATURE_SIMD32
330 typedef int32_t int8x4_t;
331 typedef int32_t int16x2_t;
332 typedef uint32_t uint8x4_t;
333 typedef uint32_t uint16x2_t;
334 
335 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
336 __sxtab16(int16x2_t __a, int8x4_t __b) {
337   return __builtin_arm_sxtab16(__a, __b);
338 }
339 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
340 __sxtb16(int8x4_t __a) {
341   return __builtin_arm_sxtb16(__a);
342 }
343 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
344 __uxtab16(int16x2_t __a, int8x4_t __b) {
345   return __builtin_arm_uxtab16(__a, __b);
346 }
347 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
348 __uxtb16(int8x4_t __a) {
349   return __builtin_arm_uxtb16(__a);
350 }
351 #endif
352 
353 /* 9.5.6 Parallel selection */
354 #if __ARM_FEATURE_SIMD32
355 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
356 __sel(uint8x4_t __a, uint8x4_t __b) {
357   return __builtin_arm_sel(__a, __b);
358 }
359 #endif
360 
361 /* 9.5.7 Parallel 8-bit addition and subtraction */
362 #if __ARM_FEATURE_SIMD32
363 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
364 __qadd8(int8x4_t __a, int8x4_t __b) {
365   return __builtin_arm_qadd8(__a, __b);
366 }
367 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
368 __qsub8(int8x4_t __a, int8x4_t __b) {
369   return __builtin_arm_qsub8(__a, __b);
370 }
371 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
372 __sadd8(int8x4_t __a, int8x4_t __b) {
373   return __builtin_arm_sadd8(__a, __b);
374 }
375 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
376 __shadd8(int8x4_t __a, int8x4_t __b) {
377   return __builtin_arm_shadd8(__a, __b);
378 }
379 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
380 __shsub8(int8x4_t __a, int8x4_t __b) {
381   return __builtin_arm_shsub8(__a, __b);
382 }
383 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
384 __ssub8(int8x4_t __a, int8x4_t __b) {
385   return __builtin_arm_ssub8(__a, __b);
386 }
387 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
388 __uadd8(uint8x4_t __a, uint8x4_t __b) {
389   return __builtin_arm_uadd8(__a, __b);
390 }
391 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
392 __uhadd8(uint8x4_t __a, uint8x4_t __b) {
393   return __builtin_arm_uhadd8(__a, __b);
394 }
395 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
396 __uhsub8(uint8x4_t __a, uint8x4_t __b) {
397   return __builtin_arm_uhsub8(__a, __b);
398 }
399 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
400 __uqadd8(uint8x4_t __a, uint8x4_t __b) {
401   return __builtin_arm_uqadd8(__a, __b);
402 }
403 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
404 __uqsub8(uint8x4_t __a, uint8x4_t __b) {
405   return __builtin_arm_uqsub8(__a, __b);
406 }
407 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
408 __usub8(uint8x4_t __a, uint8x4_t __b) {
409   return __builtin_arm_usub8(__a, __b);
410 }
411 #endif
412 
413 /* 9.5.8 Sum of 8-bit absolute differences */
414 #if __ARM_FEATURE_SIMD32
415 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
416 __usad8(uint8x4_t __a, uint8x4_t __b) {
417   return __builtin_arm_usad8(__a, __b);
418 }
419 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
420 __usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) {
421   return __builtin_arm_usada8(__a, __b, __c);
422 }
423 #endif
424 
425 /* 9.5.9 Parallel 16-bit addition and subtraction */
426 #if __ARM_FEATURE_SIMD32
427 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
428 __qadd16(int16x2_t __a, int16x2_t __b) {
429   return __builtin_arm_qadd16(__a, __b);
430 }
431 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
432 __qasx(int16x2_t __a, int16x2_t __b) {
433   return __builtin_arm_qasx(__a, __b);
434 }
435 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
436 __qsax(int16x2_t __a, int16x2_t __b) {
437   return __builtin_arm_qsax(__a, __b);
438 }
439 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
440 __qsub16(int16x2_t __a, int16x2_t __b) {
441   return __builtin_arm_qsub16(__a, __b);
442 }
443 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
444 __sadd16(int16x2_t __a, int16x2_t __b) {
445   return __builtin_arm_sadd16(__a, __b);
446 }
447 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
448 __sasx(int16x2_t __a, int16x2_t __b) {
449   return __builtin_arm_sasx(__a, __b);
450 }
451 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
452 __shadd16(int16x2_t __a, int16x2_t __b) {
453   return __builtin_arm_shadd16(__a, __b);
454 }
455 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
456 __shasx(int16x2_t __a, int16x2_t __b) {
457   return __builtin_arm_shasx(__a, __b);
458 }
459 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
460 __shsax(int16x2_t __a, int16x2_t __b) {
461   return __builtin_arm_shsax(__a, __b);
462 }
463 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
464 __shsub16(int16x2_t __a, int16x2_t __b) {
465   return __builtin_arm_shsub16(__a, __b);
466 }
467 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
468 __ssax(int16x2_t __a, int16x2_t __b) {
469   return __builtin_arm_ssax(__a, __b);
470 }
471 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
472 __ssub16(int16x2_t __a, int16x2_t __b) {
473   return __builtin_arm_ssub16(__a, __b);
474 }
475 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
476 __uadd16(uint16x2_t __a, uint16x2_t __b) {
477   return __builtin_arm_uadd16(__a, __b);
478 }
479 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
480 __uasx(uint16x2_t __a, uint16x2_t __b) {
481   return __builtin_arm_uasx(__a, __b);
482 }
483 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
484 __uhadd16(uint16x2_t __a, uint16x2_t __b) {
485   return __builtin_arm_uhadd16(__a, __b);
486 }
487 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
488 __uhasx(uint16x2_t __a, uint16x2_t __b) {
489   return __builtin_arm_uhasx(__a, __b);
490 }
491 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
492 __uhsax(uint16x2_t __a, uint16x2_t __b) {
493   return __builtin_arm_uhsax(__a, __b);
494 }
495 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
496 __uhsub16(uint16x2_t __a, uint16x2_t __b) {
497   return __builtin_arm_uhsub16(__a, __b);
498 }
499 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
500 __uqadd16(uint16x2_t __a, uint16x2_t __b) {
501   return __builtin_arm_uqadd16(__a, __b);
502 }
503 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
504 __uqasx(uint16x2_t __a, uint16x2_t __b) {
505   return __builtin_arm_uqasx(__a, __b);
506 }
507 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
508 __uqsax(uint16x2_t __a, uint16x2_t __b) {
509   return __builtin_arm_uqsax(__a, __b);
510 }
511 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
512 __uqsub16(uint16x2_t __a, uint16x2_t __b) {
513   return __builtin_arm_uqsub16(__a, __b);
514 }
515 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
516 __usax(uint16x2_t __a, uint16x2_t __b) {
517   return __builtin_arm_usax(__a, __b);
518 }
519 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
520 __usub16(uint16x2_t __a, uint16x2_t __b) {
521   return __builtin_arm_usub16(__a, __b);
522 }
523 #endif
524 
525 /* 9.5.10 Parallel 16-bit multiplications */
526 #if __ARM_FEATURE_SIMD32
527 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
528 __smlad(int16x2_t __a, int16x2_t __b, int32_t __c) {
529   return __builtin_arm_smlad(__a, __b, __c);
530 }
531 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
532 __smladx(int16x2_t __a, int16x2_t __b, int32_t __c) {
533   return __builtin_arm_smladx(__a, __b, __c);
534 }
535 static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
536 __smlald(int16x2_t __a, int16x2_t __b, int64_t __c) {
537   return __builtin_arm_smlald(__a, __b, __c);
538 }
539 static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
540 __smlaldx(int16x2_t __a, int16x2_t __b, int64_t __c) {
541   return __builtin_arm_smlaldx(__a, __b, __c);
542 }
543 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
544 __smlsd(int16x2_t __a, int16x2_t __b, int32_t __c) {
545   return __builtin_arm_smlsd(__a, __b, __c);
546 }
547 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
548 __smlsdx(int16x2_t __a, int16x2_t __b, int32_t __c) {
549   return __builtin_arm_smlsdx(__a, __b, __c);
550 }
551 static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
552 __smlsld(int16x2_t __a, int16x2_t __b, int64_t __c) {
553   return __builtin_arm_smlsld(__a, __b, __c);
554 }
555 static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
556 __smlsldx(int16x2_t __a, int16x2_t __b, int64_t __c) {
557   return __builtin_arm_smlsldx(__a, __b, __c);
558 }
559 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
560 __smuad(int16x2_t __a, int16x2_t __b) {
561   return __builtin_arm_smuad(__a, __b);
562 }
563 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
564 __smuadx(int16x2_t __a, int16x2_t __b) {
565   return __builtin_arm_smuadx(__a, __b);
566 }
567 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
568 __smusd(int16x2_t __a, int16x2_t __b) {
569   return __builtin_arm_smusd(__a, __b);
570 }
571 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
572 __smusdx(int16x2_t __a, int16x2_t __b) {
573   return __builtin_arm_smusdx(__a, __b);
574 }
575 #endif
576 
577 /* 9.7 CRC32 intrinsics */
578 #if __ARM_FEATURE_CRC32
579 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
580 __crc32b(uint32_t __a, uint8_t __b) {
581   return __builtin_arm_crc32b(__a, __b);
582 }
583 
584 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
585 __crc32h(uint32_t __a, uint16_t __b) {
586   return __builtin_arm_crc32h(__a, __b);
587 }
588 
589 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
590 __crc32w(uint32_t __a, uint32_t __b) {
591   return __builtin_arm_crc32w(__a, __b);
592 }
593 
594 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
595 __crc32d(uint32_t __a, uint64_t __b) {
596   return __builtin_arm_crc32d(__a, __b);
597 }
598 
599 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
600 __crc32cb(uint32_t __a, uint8_t __b) {
601   return __builtin_arm_crc32cb(__a, __b);
602 }
603 
604 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
605 __crc32ch(uint32_t __a, uint16_t __b) {
606   return __builtin_arm_crc32ch(__a, __b);
607 }
608 
609 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
610 __crc32cw(uint32_t __a, uint32_t __b) {
611   return __builtin_arm_crc32cw(__a, __b);
612 }
613 
614 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
615 __crc32cd(uint32_t __a, uint64_t __b) {
616   return __builtin_arm_crc32cd(__a, __b);
617 }
618 #endif
619 
620 /* Armv8.3-A Javascript conversion intrinsic */
621 #if __ARM_64BIT_STATE && defined(__ARM_FEATURE_JCVT)
622 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
623 __jcvt(double __a) {
624   return __builtin_arm_jcvt(__a);
625 }
626 #endif
627 
628 /* 10.1 Special register intrinsics */
629 #define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
630 #define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
631 #define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg)
632 #define __arm_rsrf(sysreg) __builtin_bit_cast(float, __arm_rsr(sysreg))
633 #define __arm_rsrf64(sysreg) __builtin_bit_cast(double, __arm_rsr64(sysreg))
634 #define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v)
635 #define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v)
636 #define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v)
637 #define __arm_wsrf(sysreg, v) __arm_wsr(sysreg, __builtin_bit_cast(uint32_t, v))
638 #define __arm_wsrf64(sysreg, v) __arm_wsr64(sysreg, __builtin_bit_cast(uint64_t, v))
639 
640 /* Memory Tagging Extensions (MTE) Intrinsics */
641 #if __ARM_FEATURE_MEMORY_TAGGING
642 #define __arm_mte_create_random_tag(__ptr, __mask)  __builtin_arm_irg(__ptr, __mask)
643 #define __arm_mte_increment_tag(__ptr, __tag_offset)  __builtin_arm_addg(__ptr, __tag_offset)
644 #define __arm_mte_exclude_tag(__ptr, __excluded)  __builtin_arm_gmi(__ptr, __excluded)
645 #define __arm_mte_get_tag(__ptr) __builtin_arm_ldg(__ptr)
646 #define __arm_mte_set_tag(__ptr) __builtin_arm_stg(__ptr)
647 #define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
648 #endif
649 
650 /* Transactional Memory Extension (TME) Intrinsics */
651 #if __ARM_FEATURE_TME
652 
653 #define _TMFAILURE_REASON  0x00007fffu
654 #define _TMFAILURE_RTRY    0x00008000u
655 #define _TMFAILURE_CNCL    0x00010000u
656 #define _TMFAILURE_MEM     0x00020000u
657 #define _TMFAILURE_IMP     0x00040000u
658 #define _TMFAILURE_ERR     0x00080000u
659 #define _TMFAILURE_SIZE    0x00100000u
660 #define _TMFAILURE_NEST    0x00200000u
661 #define _TMFAILURE_DBG     0x00400000u
662 #define _TMFAILURE_INT     0x00800000u
663 #define _TMFAILURE_TRIVIAL 0x01000000u
664 
665 #define __tstart()        __builtin_arm_tstart()
666 #define __tcommit()       __builtin_arm_tcommit()
667 #define __tcancel(__arg)  __builtin_arm_tcancel(__arg)
668 #define __ttest()         __builtin_arm_ttest()
669 
670 #endif /* __ARM_FEATURE_TME */
671 
672 #if defined(__cplusplus)
673 }
674 #endif
675 
676 #endif /* __ARM_ACLE_H */
677