xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/mips/loongson-mmiintrin.h (revision 4f645668ed707e1f969c546666f8c8e45e6f8888)
1 /* Intrinsics for Loongson MultiMedia extension Instructions operations.
2 
3    Copyright (C) 2008-2019 Free Software Foundation, Inc.
4    Contributed by CodeSourcery.
5 
6    This file is part of GCC.
7 
8    GCC is free software; you can redistribute it and/or modify it
9    under the terms of the GNU General Public License as published
10    by the Free Software Foundation; either version 3, or (at your
11    option) any later version.
12 
13    GCC is distributed in the hope that it will be useful, but WITHOUT
14    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
16    License for more details.
17 
18    Under Section 7 of GPL version 3, you are granted additional
19    permissions described in the GCC Runtime Library Exception, version
20    3.1, as published by the Free Software Foundation.
21 
22    You should have received a copy of the GNU General Public License and
23    a copy of the GCC Runtime Library Exception along with this program;
24    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
25    <http://www.gnu.org/licenses/>.  */
26 
27 #ifndef _GCC_LOONGSON_MMIINTRIN_H
28 #define _GCC_LOONGSON_MMIINTRIN_H
29 
30 #if !defined(__mips_loongson_mmi)
31 # error You must select -mloongson-mmi or -march=loongson2e/2f/3a to use\
32  loongson-mmiintrin.h
33 #endif
34 
35 #ifdef __cplusplus
36 extern "C" {
37 #endif
38 
39 #include <stdint.h>
40 
41 /* Vectors of unsigned bytes, halfwords and words.  */
42 typedef uint8_t uint8x8_t __attribute__((vector_size (8)));
43 typedef uint16_t uint16x4_t __attribute__((vector_size (8)));
44 typedef uint32_t uint32x2_t __attribute__((vector_size (8)));
45 
46 /* Vectors of signed bytes, halfwords and words.  */
47 typedef int8_t int8x8_t __attribute__((vector_size (8)));
48 typedef int16_t int16x4_t __attribute__((vector_size (8)));
49 typedef int32_t int32x2_t __attribute__((vector_size (8)));
50 
51 /* SIMD intrinsics.
52    Unless otherwise noted, calls to the functions below will expand into
53    precisely one machine instruction, modulo any moves required to
54    satisfy register allocation constraints.  */
55 
56 /* Pack with signed saturation.  */
57 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
58 packsswh (int32x2_t s, int32x2_t t)
59 {
60   return __builtin_loongson_packsswh (s, t);
61 }
62 
63 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
64 packsshb (int16x4_t s, int16x4_t t)
65 {
66   return __builtin_loongson_packsshb (s, t);
67 }
68 
69 /* Pack with unsigned saturation.  */
70 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
71 packushb (uint16x4_t s, uint16x4_t t)
72 {
73   return __builtin_loongson_packushb (s, t);
74 }
75 
76 /* Vector addition, treating overflow by wraparound.  */
77 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
78 paddw_u (uint32x2_t s, uint32x2_t t)
79 {
80   return __builtin_loongson_paddw_u (s, t);
81 }
82 
83 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
84 paddh_u (uint16x4_t s, uint16x4_t t)
85 {
86   return __builtin_loongson_paddh_u (s, t);
87 }
88 
89 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
90 paddb_u (uint8x8_t s, uint8x8_t t)
91 {
92   return __builtin_loongson_paddb_u (s, t);
93 }
94 
95 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
96 paddw_s (int32x2_t s, int32x2_t t)
97 {
98   return __builtin_loongson_paddw_s (s, t);
99 }
100 
101 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
102 paddh_s (int16x4_t s, int16x4_t t)
103 {
104   return __builtin_loongson_paddh_s (s, t);
105 }
106 
107 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
108 paddb_s (int8x8_t s, int8x8_t t)
109 {
110   return __builtin_loongson_paddb_s (s, t);
111 }
112 
113 /* Addition of doubleword integers, treating overflow by wraparound.  */
114 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
115 paddd_u (uint64_t s, uint64_t t)
116 {
117   return __builtin_loongson_paddd_u (s, t);
118 }
119 
120 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
121 paddd_s (int64_t s, int64_t t)
122 {
123   return __builtin_loongson_paddd_s (s, t);
124 }
125 
126 /* Vector addition, treating overflow by signed saturation.  */
127 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
128 paddsh (int16x4_t s, int16x4_t t)
129 {
130   return __builtin_loongson_paddsh (s, t);
131 }
132 
133 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
134 paddsb (int8x8_t s, int8x8_t t)
135 {
136   return __builtin_loongson_paddsb (s, t);
137 }
138 
139 /* Vector addition, treating overflow by unsigned saturation.  */
140 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
141 paddush (uint16x4_t s, uint16x4_t t)
142 {
143   return __builtin_loongson_paddush (s, t);
144 }
145 
146 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
147 paddusb (uint8x8_t s, uint8x8_t t)
148 {
149   return __builtin_loongson_paddusb (s, t);
150 }
151 
152 /* Logical AND NOT.  */
153 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
154 pandn_ud (uint64_t s, uint64_t t)
155 {
156   return __builtin_loongson_pandn_ud (s, t);
157 }
158 
159 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
160 pandn_uw (uint32x2_t s, uint32x2_t t)
161 {
162   return __builtin_loongson_pandn_uw (s, t);
163 }
164 
165 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
166 pandn_uh (uint16x4_t s, uint16x4_t t)
167 {
168   return __builtin_loongson_pandn_uh (s, t);
169 }
170 
171 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
172 pandn_ub (uint8x8_t s, uint8x8_t t)
173 {
174   return __builtin_loongson_pandn_ub (s, t);
175 }
176 
177 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
178 pandn_sd (int64_t s, int64_t t)
179 {
180   return __builtin_loongson_pandn_sd (s, t);
181 }
182 
183 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
184 pandn_sw (int32x2_t s, int32x2_t t)
185 {
186   return __builtin_loongson_pandn_sw (s, t);
187 }
188 
189 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
190 pandn_sh (int16x4_t s, int16x4_t t)
191 {
192   return __builtin_loongson_pandn_sh (s, t);
193 }
194 
195 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
196 pandn_sb (int8x8_t s, int8x8_t t)
197 {
198   return __builtin_loongson_pandn_sb (s, t);
199 }
200 
201 /* Average.  */
202 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
203 pavgh (uint16x4_t s, uint16x4_t t)
204 {
205   return __builtin_loongson_pavgh (s, t);
206 }
207 
208 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
209 pavgb (uint8x8_t s, uint8x8_t t)
210 {
211   return __builtin_loongson_pavgb (s, t);
212 }
213 
214 /* Equality test.  */
215 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
216 pcmpeqw_u (uint32x2_t s, uint32x2_t t)
217 {
218   return __builtin_loongson_pcmpeqw_u (s, t);
219 }
220 
221 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
222 pcmpeqh_u (uint16x4_t s, uint16x4_t t)
223 {
224   return __builtin_loongson_pcmpeqh_u (s, t);
225 }
226 
227 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
228 pcmpeqb_u (uint8x8_t s, uint8x8_t t)
229 {
230   return __builtin_loongson_pcmpeqb_u (s, t);
231 }
232 
233 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
234 pcmpeqw_s (int32x2_t s, int32x2_t t)
235 {
236   return __builtin_loongson_pcmpeqw_s (s, t);
237 }
238 
239 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
240 pcmpeqh_s (int16x4_t s, int16x4_t t)
241 {
242   return __builtin_loongson_pcmpeqh_s (s, t);
243 }
244 
245 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
246 pcmpeqb_s (int8x8_t s, int8x8_t t)
247 {
248   return __builtin_loongson_pcmpeqb_s (s, t);
249 }
250 
251 /* Greater-than test.  */
252 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
253 pcmpgtw_u (uint32x2_t s, uint32x2_t t)
254 {
255   return __builtin_loongson_pcmpgtw_u (s, t);
256 }
257 
258 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
259 pcmpgth_u (uint16x4_t s, uint16x4_t t)
260 {
261   return __builtin_loongson_pcmpgth_u (s, t);
262 }
263 
264 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
265 pcmpgtb_u (uint8x8_t s, uint8x8_t t)
266 {
267   return __builtin_loongson_pcmpgtb_u (s, t);
268 }
269 
270 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
271 pcmpgtw_s (int32x2_t s, int32x2_t t)
272 {
273   return __builtin_loongson_pcmpgtw_s (s, t);
274 }
275 
276 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
277 pcmpgth_s (int16x4_t s, int16x4_t t)
278 {
279   return __builtin_loongson_pcmpgth_s (s, t);
280 }
281 
282 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
283 pcmpgtb_s (int8x8_t s, int8x8_t t)
284 {
285   return __builtin_loongson_pcmpgtb_s (s, t);
286 }
287 
288 /* Extract halfword.  */
289 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
290 pextrh_u (uint16x4_t s, int field /* 0--3.  */)
291 {
292   return __builtin_loongson_pextrh_u (s, field);
293 }
294 
295 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
296 pextrh_s (int16x4_t s, int field /* 0--3.  */)
297 {
298   return __builtin_loongson_pextrh_s (s, field);
299 }
300 
301 /* Insert halfword.  */
302 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
303 pinsrh_0_u (uint16x4_t s, uint16x4_t t)
304 {
305   return __builtin_loongson_pinsrh_0_u (s, t);
306 }
307 
308 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
309 pinsrh_1_u (uint16x4_t s, uint16x4_t t)
310 {
311   return __builtin_loongson_pinsrh_1_u (s, t);
312 }
313 
314 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
315 pinsrh_2_u (uint16x4_t s, uint16x4_t t)
316 {
317   return __builtin_loongson_pinsrh_2_u (s, t);
318 }
319 
320 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
321 pinsrh_3_u (uint16x4_t s, uint16x4_t t)
322 {
323   return __builtin_loongson_pinsrh_3_u (s, t);
324 }
325 
326 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
327 pinsrh_0_s (int16x4_t s, int16x4_t t)
328 {
329   return __builtin_loongson_pinsrh_0_s (s, t);
330 }
331 
332 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
333 pinsrh_1_s (int16x4_t s, int16x4_t t)
334 {
335   return __builtin_loongson_pinsrh_1_s (s, t);
336 }
337 
338 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
339 pinsrh_2_s (int16x4_t s, int16x4_t t)
340 {
341   return __builtin_loongson_pinsrh_2_s (s, t);
342 }
343 
344 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
345 pinsrh_3_s (int16x4_t s, int16x4_t t)
346 {
347   return __builtin_loongson_pinsrh_3_s (s, t);
348 }
349 
350 /* Multiply and add.  */
351 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
352 pmaddhw (int16x4_t s, int16x4_t t)
353 {
354   return __builtin_loongson_pmaddhw (s, t);
355 }
356 
357 /* Maximum of signed halfwords.  */
358 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
359 pmaxsh (int16x4_t s, int16x4_t t)
360 {
361   return __builtin_loongson_pmaxsh (s, t);
362 }
363 
364 /* Maximum of unsigned bytes.  */
365 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
366 pmaxub (uint8x8_t s, uint8x8_t t)
367 {
368   return __builtin_loongson_pmaxub (s, t);
369 }
370 
371 /* Minimum of signed halfwords.  */
372 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
373 pminsh (int16x4_t s, int16x4_t t)
374 {
375   return __builtin_loongson_pminsh (s, t);
376 }
377 
378 /* Minimum of unsigned bytes.  */
379 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
380 pminub (uint8x8_t s, uint8x8_t t)
381 {
382   return __builtin_loongson_pminub (s, t);
383 }
384 
385 /* Move byte mask.  */
386 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
387 pmovmskb_u (uint8x8_t s)
388 {
389   return __builtin_loongson_pmovmskb_u (s);
390 }
391 
392 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
393 pmovmskb_s (int8x8_t s)
394 {
395   return __builtin_loongson_pmovmskb_s (s);
396 }
397 
398 /* Multiply unsigned integers and store high result.  */
399 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
400 pmulhuh (uint16x4_t s, uint16x4_t t)
401 {
402   return __builtin_loongson_pmulhuh (s, t);
403 }
404 
405 /* Multiply signed integers and store high result.  */
406 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
407 pmulhh (int16x4_t s, int16x4_t t)
408 {
409   return __builtin_loongson_pmulhh (s, t);
410 }
411 
412 /* Multiply signed integers and store low result.  */
413 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
414 pmullh (int16x4_t s, int16x4_t t)
415 {
416   return __builtin_loongson_pmullh (s, t);
417 }
418 
419 /* Multiply unsigned word integers.  */
420 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
421 pmuluw (uint32x2_t s, uint32x2_t t)
422 {
423   return __builtin_loongson_pmuluw (s, t);
424 }
425 
426 /* Absolute difference.  */
427 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
428 pasubub (uint8x8_t s, uint8x8_t t)
429 {
430   return __builtin_loongson_pasubub (s, t);
431 }
432 
433 /* Sum of unsigned byte integers.  */
434 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
435 biadd (uint8x8_t s)
436 {
437   return __builtin_loongson_biadd (s);
438 }
439 
440 /* Sum of absolute differences.
441    Note that this intrinsic expands into two machine instructions:
442    PASUBUB followed by BIADD.  */
443 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
444 psadbh (uint8x8_t s, uint8x8_t t)
445 {
446   return __builtin_loongson_psadbh (s, t);
447 }
448 
449 /* Shuffle halfwords.  */
450 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
451 pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order)
452 {
453   return __builtin_loongson_pshufh_u (s, order);
454 }
455 
456 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
457 pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order)
458 {
459   return __builtin_loongson_pshufh_s (s, order);
460 }
461 
462 /* Shift left logical.  */
463 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
464 psllh_u (uint16x4_t s, uint8_t amount)
465 {
466   return __builtin_loongson_psllh_u (s, amount);
467 }
468 
469 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
470 psllh_s (int16x4_t s, uint8_t amount)
471 {
472   return __builtin_loongson_psllh_s (s, amount);
473 }
474 
475 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
476 psllw_u (uint32x2_t s, uint8_t amount)
477 {
478   return __builtin_loongson_psllw_u (s, amount);
479 }
480 
481 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
482 psllw_s (int32x2_t s, uint8_t amount)
483 {
484   return __builtin_loongson_psllw_s (s, amount);
485 }
486 
487 /* Shift right logical.  */
488 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
489 psrlh_u (uint16x4_t s, uint8_t amount)
490 {
491   return __builtin_loongson_psrlh_u (s, amount);
492 }
493 
494 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
495 psrlh_s (int16x4_t s, uint8_t amount)
496 {
497   return __builtin_loongson_psrlh_s (s, amount);
498 }
499 
500 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
501 psrlw_u (uint32x2_t s, uint8_t amount)
502 {
503   return __builtin_loongson_psrlw_u (s, amount);
504 }
505 
506 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
507 psrlw_s (int32x2_t s, uint8_t amount)
508 {
509   return __builtin_loongson_psrlw_s (s, amount);
510 }
511 
512 /* Shift right arithmetic.  */
513 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
514 psrah_u (uint16x4_t s, uint8_t amount)
515 {
516   return __builtin_loongson_psrah_u (s, amount);
517 }
518 
519 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
520 psrah_s (int16x4_t s, uint8_t amount)
521 {
522   return __builtin_loongson_psrah_s (s, amount);
523 }
524 
525 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
526 psraw_u (uint32x2_t s, uint8_t amount)
527 {
528   return __builtin_loongson_psraw_u (s, amount);
529 }
530 
531 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
532 psraw_s (int32x2_t s, uint8_t amount)
533 {
534   return __builtin_loongson_psraw_s (s, amount);
535 }
536 
537 /* Vector subtraction, treating overflow by wraparound.  */
538 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
539 psubw_u (uint32x2_t s, uint32x2_t t)
540 {
541   return __builtin_loongson_psubw_u (s, t);
542 }
543 
544 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
545 psubh_u (uint16x4_t s, uint16x4_t t)
546 {
547   return __builtin_loongson_psubh_u (s, t);
548 }
549 
550 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
551 psubb_u (uint8x8_t s, uint8x8_t t)
552 {
553   return __builtin_loongson_psubb_u (s, t);
554 }
555 
556 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
557 psubw_s (int32x2_t s, int32x2_t t)
558 {
559   return __builtin_loongson_psubw_s (s, t);
560 }
561 
562 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
563 psubh_s (int16x4_t s, int16x4_t t)
564 {
565   return __builtin_loongson_psubh_s (s, t);
566 }
567 
568 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
569 psubb_s (int8x8_t s, int8x8_t t)
570 {
571   return __builtin_loongson_psubb_s (s, t);
572 }
573 
574 /* Subtraction of doubleword integers, treating overflow by wraparound.  */
575 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
576 psubd_u (uint64_t s, uint64_t t)
577 {
578   return __builtin_loongson_psubd_u (s, t);
579 }
580 
581 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
582 psubd_s (int64_t s, int64_t t)
583 {
584   return __builtin_loongson_psubd_s (s, t);
585 }
586 
587 /* Vector subtraction, treating overflow by signed saturation.  */
588 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
589 psubsh (int16x4_t s, int16x4_t t)
590 {
591   return __builtin_loongson_psubsh (s, t);
592 }
593 
594 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
595 psubsb (int8x8_t s, int8x8_t t)
596 {
597   return __builtin_loongson_psubsb (s, t);
598 }
599 
600 /* Vector subtraction, treating overflow by unsigned saturation.  */
601 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
602 psubush (uint16x4_t s, uint16x4_t t)
603 {
604   return __builtin_loongson_psubush (s, t);
605 }
606 
607 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
608 psubusb (uint8x8_t s, uint8x8_t t)
609 {
610   return __builtin_loongson_psubusb (s, t);
611 }
612 
613 /* Unpack high data.  */
614 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
615 punpckhwd_u (uint32x2_t s, uint32x2_t t)
616 {
617   return __builtin_loongson_punpckhwd_u (s, t);
618 }
619 
620 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
621 punpckhhw_u (uint16x4_t s, uint16x4_t t)
622 {
623   return __builtin_loongson_punpckhhw_u (s, t);
624 }
625 
626 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
627 punpckhbh_u (uint8x8_t s, uint8x8_t t)
628 {
629   return __builtin_loongson_punpckhbh_u (s, t);
630 }
631 
632 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
633 punpckhwd_s (int32x2_t s, int32x2_t t)
634 {
635   return __builtin_loongson_punpckhwd_s (s, t);
636 }
637 
638 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
639 punpckhhw_s (int16x4_t s, int16x4_t t)
640 {
641   return __builtin_loongson_punpckhhw_s (s, t);
642 }
643 
644 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
645 punpckhbh_s (int8x8_t s, int8x8_t t)
646 {
647   return __builtin_loongson_punpckhbh_s (s, t);
648 }
649 
650 /* Unpack low data.  */
651 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
652 punpcklwd_u (uint32x2_t s, uint32x2_t t)
653 {
654   return __builtin_loongson_punpcklwd_u (s, t);
655 }
656 
657 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
658 punpcklhw_u (uint16x4_t s, uint16x4_t t)
659 {
660   return __builtin_loongson_punpcklhw_u (s, t);
661 }
662 
663 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
664 punpcklbh_u (uint8x8_t s, uint8x8_t t)
665 {
666   return __builtin_loongson_punpcklbh_u (s, t);
667 }
668 
669 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
670 punpcklwd_s (int32x2_t s, int32x2_t t)
671 {
672   return __builtin_loongson_punpcklwd_s (s, t);
673 }
674 
675 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
676 punpcklhw_s (int16x4_t s, int16x4_t t)
677 {
678   return __builtin_loongson_punpcklhw_s (s, t);
679 }
680 
681 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
682 punpcklbh_s (int8x8_t s, int8x8_t t)
683 {
684   return __builtin_loongson_punpcklbh_s (s, t);
685 }
686 
687 #ifdef __cplusplus
688 }
689 #endif
690 
691 #endif
692