Lines Matching defs:src0

136 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in src0 with
147 /// \param src0
151 #define _tile_dpbssd(dst, src0, src1) \
152 __builtin_ia32_tdpbssd((dst), (src0), (src1))
155 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in src0 with
166 /// \param src0
170 #define _tile_dpbsud(dst, src0, src1) \
171 __builtin_ia32_tdpbsud((dst), (src0), (src1))
174 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in src0 with
185 /// \param src0
189 #define _tile_dpbusd(dst, src0, src1) \
190 __builtin_ia32_tdpbusd((dst), (src0), (src1))
193 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in src0 with
204 /// \param src0
208 #define _tile_dpbuud(dst, src0, src1) \
209 __builtin_ia32_tdpbuud((dst), (src0), (src1))
211 /// Compute dot-product of BF16 (16-bit) floating-point pairs in tiles src0 and
222 /// \param src0
226 #define _tile_dpbf16ps(dst, src0, src1) \
227 __builtin_ia32_tdpbf16ps((dst), (src0), (src1))
346 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in src0 with
357 /// \param src0
362 static __inline__ void __tile_dpbssd(__tile1024i *dst, __tile1024i src0,
364 dst->tile = _tile_dpbssd_internal(src0.row, src1.col, src0.col, dst->tile,
365 src0.tile, src1.tile);
369 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in src0 with
380 /// \param src0
385 static __inline__ void __tile_dpbsud(__tile1024i *dst, __tile1024i src0,
387 dst->tile = _tile_dpbsud_internal(src0.row, src1.col, src0.col, dst->tile,
388 src0.tile, src1.tile);
392 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in src0 with
403 /// \param src0
408 static __inline__ void __tile_dpbusd(__tile1024i *dst, __tile1024i src0,
410 dst->tile = _tile_dpbusd_internal(src0.row, src1.col, src0.col, dst->tile,
411 src0.tile, src1.tile);
415 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in src0 with
426 /// \param src0
431 static __inline__ void __tile_dpbuud(__tile1024i *dst, __tile1024i src0,
433 dst->tile = _tile_dpbuud_internal(src0.row, src1.col, src0.col, dst->tile,
434 src0.tile, src1.tile);
467 /// Compute dot-product of BF16 (16-bit) floating-point pairs in tiles src0 and
478 /// \param src0
483 static __inline__ void __tile_dpbf16ps(__tile1024i *dst, __tile1024i src0,
485 dst->tile = _tile_dpbf16ps_internal(src0.row, src1.col, src0.col, dst->tile,
486 src0.tile, src1.tile);