xref: /dpdk/lib/pipeline/rte_swx_pipeline_internal.h (revision f12c41bf4074efb438fc21ab7db13f011f5a1e84)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2021 Intel Corporation
3  */
4 #ifndef __INCLUDE_RTE_SWX_PIPELINE_INTERNAL_H__
5 #define __INCLUDE_RTE_SWX_PIPELINE_INTERNAL_H__
6 
7 #include <inttypes.h>
8 #include <string.h>
9 #include <sys/queue.h>
10 
11 #include <rte_byteorder.h>
12 #include <rte_common.h>
13 #include <rte_cycles.h>
14 #include <rte_prefetch.h>
15 #include <rte_meter.h>
16 
17 #include <rte_swx_table_selector.h>
18 #include <rte_swx_table_learner.h>
19 #include <rte_swx_pipeline.h>
20 #include <rte_swx_ctl.h>
21 
22 #ifndef TRACE_LEVEL
23 #define TRACE_LEVEL 0
24 #endif
25 
26 #if TRACE_LEVEL
27 #define TRACE(...) printf(__VA_ARGS__)
28 #else
29 #define TRACE(...)
30 #endif
31 
32 /*
33  * Environment.
34  */
35 #define ntoh64(x) rte_be_to_cpu_64(x)
36 #define hton64(x) rte_cpu_to_be_64(x)
37 
38 /*
39  * Struct.
40  */
41 struct field {
42 	char name[RTE_SWX_NAME_SIZE];
43 	uint32_t n_bits;
44 	uint32_t offset;
45 	int var_size;
46 };
47 
48 struct struct_type {
49 	TAILQ_ENTRY(struct_type) node;
50 	char name[RTE_SWX_NAME_SIZE];
51 	struct field *fields;
52 	uint32_t n_fields;
53 	uint32_t n_bits;
54 	uint32_t n_bits_min;
55 	int var_size;
56 };
57 
58 TAILQ_HEAD(struct_type_tailq, struct_type);
59 
60 /*
61  * Input port.
62  */
63 struct port_in_type {
64 	TAILQ_ENTRY(port_in_type) node;
65 	char name[RTE_SWX_NAME_SIZE];
66 	struct rte_swx_port_in_ops ops;
67 };
68 
69 TAILQ_HEAD(port_in_type_tailq, port_in_type);
70 
71 struct port_in {
72 	TAILQ_ENTRY(port_in) node;
73 	struct port_in_type *type;
74 	void *obj;
75 	uint32_t id;
76 };
77 
78 TAILQ_HEAD(port_in_tailq, port_in);
79 
80 struct port_in_runtime {
81 	rte_swx_port_in_pkt_rx_t pkt_rx;
82 	void *obj;
83 };
84 
85 /*
86  * Output port.
87  */
88 struct port_out_type {
89 	TAILQ_ENTRY(port_out_type) node;
90 	char name[RTE_SWX_NAME_SIZE];
91 	struct rte_swx_port_out_ops ops;
92 };
93 
94 TAILQ_HEAD(port_out_type_tailq, port_out_type);
95 
96 struct port_out {
97 	TAILQ_ENTRY(port_out) node;
98 	struct port_out_type *type;
99 	void *obj;
100 	uint32_t id;
101 };
102 
103 TAILQ_HEAD(port_out_tailq, port_out);
104 
105 struct port_out_runtime {
106 	rte_swx_port_out_pkt_tx_t pkt_tx;
107 	rte_swx_port_out_pkt_fast_clone_tx_t pkt_fast_clone_tx;
108 	rte_swx_port_out_pkt_clone_tx_t pkt_clone_tx;
109 	rte_swx_port_out_flush_t flush;
110 	void *obj;
111 };
112 
113 /*
114  * Packet mirroring.
115  */
116 struct mirroring_session {
117 	uint32_t port_id;
118 	int fast_clone;
119 	uint32_t truncation_length;
120 };
121 
122 /*
123  * Extern object.
124  */
125 struct extern_type_member_func {
126 	TAILQ_ENTRY(extern_type_member_func) node;
127 	char name[RTE_SWX_NAME_SIZE];
128 	rte_swx_extern_type_member_func_t func;
129 	uint32_t id;
130 };
131 
132 TAILQ_HEAD(extern_type_member_func_tailq, extern_type_member_func);
133 
134 struct extern_type {
135 	TAILQ_ENTRY(extern_type) node;
136 	char name[RTE_SWX_NAME_SIZE];
137 	struct struct_type *mailbox_struct_type;
138 	rte_swx_extern_type_constructor_t constructor;
139 	rte_swx_extern_type_destructor_t destructor;
140 	struct extern_type_member_func_tailq funcs;
141 	uint32_t n_funcs;
142 };
143 
144 TAILQ_HEAD(extern_type_tailq, extern_type);
145 
146 struct extern_obj {
147 	TAILQ_ENTRY(extern_obj) node;
148 	char name[RTE_SWX_NAME_SIZE];
149 	struct extern_type *type;
150 	void *obj;
151 	uint32_t struct_id;
152 	uint32_t id;
153 };
154 
155 TAILQ_HEAD(extern_obj_tailq, extern_obj);
156 
157 #ifndef RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX
158 #define RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX 8
159 #endif
160 
161 struct extern_obj_runtime {
162 	void *obj;
163 	uint8_t *mailbox;
164 	rte_swx_extern_type_member_func_t funcs[RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX];
165 };
166 
167 /*
168  * Extern function.
169  */
170 struct extern_func {
171 	TAILQ_ENTRY(extern_func) node;
172 	char name[RTE_SWX_NAME_SIZE];
173 	struct struct_type *mailbox_struct_type;
174 	rte_swx_extern_func_t func;
175 	uint32_t struct_id;
176 	uint32_t id;
177 };
178 
179 TAILQ_HEAD(extern_func_tailq, extern_func);
180 
181 struct extern_func_runtime {
182 	uint8_t *mailbox;
183 	rte_swx_extern_func_t func;
184 };
185 
186 /*
187  * Hash function.
188  */
189 struct hash_func {
190 	TAILQ_ENTRY(hash_func) node;
191 	char name[RTE_SWX_NAME_SIZE];
192 	rte_swx_hash_func_t func;
193 	uint32_t id;
194 };
195 
196 TAILQ_HEAD(hash_func_tailq, hash_func);
197 
198 struct hash_func_runtime {
199 	rte_swx_hash_func_t func;
200 };
201 
202 /*
203  * Header.
204  */
205 struct header {
206 	TAILQ_ENTRY(header) node;
207 	char name[RTE_SWX_NAME_SIZE];
208 	struct struct_type *st;
209 	uint32_t struct_id;
210 	uint32_t id;
211 };
212 
213 TAILQ_HEAD(header_tailq, header);
214 
215 struct header_runtime {
216 	uint8_t *ptr0;
217 	uint32_t n_bytes;
218 };
219 
220 struct header_out_runtime {
221 	uint8_t *ptr0;
222 	uint8_t *ptr;
223 	uint32_t n_bytes;
224 };
225 
226 /*
227  * Instruction.
228  */
229 
230 /* Packet headers are always in Network Byte Order (NBO), i.e. big endian.
231  * Packet meta-data fields are always assumed to be in Host Byte Order (HBO).
232  * Table entry fields can be in either NBO or HBO; they are assumed to be in HBO
233  * when transferred to packet meta-data and in NBO when transferred to packet
234  * headers.
235  */
236 
237 /* Notation conventions:
238  *    -Header field: H = h.header.field (dst/src)
239  *    -Meta-data field: M = m.field (dst/src)
240  *    -Extern object mailbox field: E = e.field (dst/src)
241  *    -Extern function mailbox field: F = f.field (dst/src)
242  *    -Table action data field: T = t.field (src only)
243  *    -Immediate value: I = 32-bit unsigned value (src only)
244  */
245 
246 enum instruction_type {
247 	/* rx m.port_in */
248 	INSTR_RX,
249 
250 	/* tx port_out
251 	 * port_out = MI
252 	 */
253 	INSTR_TX,   /* port_out = M */
254 	INSTR_TX_I, /* port_out = I */
255 	INSTR_DROP,
256 
257 	/*
258 	 * mirror slot_id session_id
259 	 * slot_id = MEFT
260 	 * session_id = MEFT
261 	 */
262 	INSTR_MIRROR,
263 
264 	/* recirculate
265 	 */
266 	INSTR_RECIRCULATE,
267 
268 	/* recircid m.recirc_pass_id
269 	 * Read the internal recirculation pass ID into the specified meta-data field.
270 	 */
271 	INSTR_RECIRCID,
272 
273 	/* extract h.header */
274 	INSTR_HDR_EXTRACT,
275 	INSTR_HDR_EXTRACT2,
276 	INSTR_HDR_EXTRACT3,
277 	INSTR_HDR_EXTRACT4,
278 	INSTR_HDR_EXTRACT5,
279 	INSTR_HDR_EXTRACT6,
280 	INSTR_HDR_EXTRACT7,
281 	INSTR_HDR_EXTRACT8,
282 
283 	/* extract h.header m.last_field_size */
284 	INSTR_HDR_EXTRACT_M,
285 
286 	/* lookahead h.header */
287 	INSTR_HDR_LOOKAHEAD,
288 
289 	/* emit h.header */
290 	INSTR_HDR_EMIT,
291 	INSTR_HDR_EMIT_TX,
292 	INSTR_HDR_EMIT2_TX,
293 	INSTR_HDR_EMIT3_TX,
294 	INSTR_HDR_EMIT4_TX,
295 	INSTR_HDR_EMIT5_TX,
296 	INSTR_HDR_EMIT6_TX,
297 	INSTR_HDR_EMIT7_TX,
298 	INSTR_HDR_EMIT8_TX,
299 
300 	/* validate h.header */
301 	INSTR_HDR_VALIDATE,
302 
303 	/* invalidate h.header */
304 	INSTR_HDR_INVALIDATE,
305 
306 	/* mov dst src
307 	 * dst = src
308 	 * dst = HMEF, src = HMEFTI
309 	 */
310 	INSTR_MOV,    /* dst = MEF, src = MEFT */
311 	INSTR_MOV_MH, /* dst = MEF, src = H */
312 	INSTR_MOV_HM, /* dst = H, src = MEFT */
313 	INSTR_MOV_HH, /* dst = H, src = H */
314 	INSTR_MOV_I,  /* dst = HMEF, src = I */
315 
316 	/* dma h.header t.field
317 	 * memcpy(h.header, t.field, sizeof(h.header))
318 	 */
319 	INSTR_DMA_HT,
320 	INSTR_DMA_HT2,
321 	INSTR_DMA_HT3,
322 	INSTR_DMA_HT4,
323 	INSTR_DMA_HT5,
324 	INSTR_DMA_HT6,
325 	INSTR_DMA_HT7,
326 	INSTR_DMA_HT8,
327 
328 	/* add dst src
329 	 * dst += src
330 	 * dst = HMEF, src = HMEFTI
331 	 */
332 	INSTR_ALU_ADD,    /* dst = MEF, src = MEF */
333 	INSTR_ALU_ADD_MH, /* dst = MEF, src = H */
334 	INSTR_ALU_ADD_HM, /* dst = H, src = MEF */
335 	INSTR_ALU_ADD_HH, /* dst = H, src = H */
336 	INSTR_ALU_ADD_MI, /* dst = MEF, src = I */
337 	INSTR_ALU_ADD_HI, /* dst = H, src = I */
338 
339 	/* sub dst src
340 	 * dst -= src
341 	 * dst = HMEF, src = HMEFTI
342 	 */
343 	INSTR_ALU_SUB,    /* dst = MEF, src = MEF */
344 	INSTR_ALU_SUB_MH, /* dst = MEF, src = H */
345 	INSTR_ALU_SUB_HM, /* dst = H, src = MEF */
346 	INSTR_ALU_SUB_HH, /* dst = H, src = H */
347 	INSTR_ALU_SUB_MI, /* dst = MEF, src = I */
348 	INSTR_ALU_SUB_HI, /* dst = H, src = I */
349 
350 	/* ckadd dst src
351 	 * dst = dst '+ src[0:1] '+ src[2:3] '+ ...
352 	 * dst = H, src = {H, h.header}, '+ = 1's complement addition operator
353 	 */
354 	INSTR_ALU_CKADD_FIELD,    /* src = H */
355 	INSTR_ALU_CKADD_STRUCT20, /* src = h.header, with sizeof(header) = 20 bytes. */
356 	INSTR_ALU_CKADD_STRUCT,   /* src = h.header, with sizeof(header) any 4-byte multiple. */
357 
358 	/* cksub dst src
359 	 * dst = dst '- src
360 	 * dst = H, src = H, '- = 1's complement subtraction operator
361 	 */
362 	INSTR_ALU_CKSUB_FIELD,
363 
364 	/* and dst src
365 	 * dst &= src
366 	 * dst = HMEF, src = HMEFTI
367 	 */
368 	INSTR_ALU_AND,    /* dst = MEF, src = MEFT */
369 	INSTR_ALU_AND_MH, /* dst = MEF, src = H */
370 	INSTR_ALU_AND_HM, /* dst = H, src = MEFT */
371 	INSTR_ALU_AND_HH, /* dst = H, src = H */
372 	INSTR_ALU_AND_I,  /* dst = HMEF, src = I */
373 
374 	/* or dst src
375 	 * dst |= src
376 	 * dst = HMEF, src = HMEFTI
377 	 */
378 	INSTR_ALU_OR,    /* dst = MEF, src = MEFT */
379 	INSTR_ALU_OR_MH, /* dst = MEF, src = H */
380 	INSTR_ALU_OR_HM, /* dst = H, src = MEFT */
381 	INSTR_ALU_OR_HH, /* dst = H, src = H */
382 	INSTR_ALU_OR_I,  /* dst = HMEF, src = I */
383 
384 	/* xor dst src
385 	 * dst ^= src
386 	 * dst = HMEF, src = HMEFTI
387 	 */
388 	INSTR_ALU_XOR,    /* dst = MEF, src = MEFT */
389 	INSTR_ALU_XOR_MH, /* dst = MEF, src = H */
390 	INSTR_ALU_XOR_HM, /* dst = H, src = MEFT */
391 	INSTR_ALU_XOR_HH, /* dst = H, src = H */
392 	INSTR_ALU_XOR_I,  /* dst = HMEF, src = I */
393 
394 	/* shl dst src
395 	 * dst <<= src
396 	 * dst = HMEF, src = HMEFTI
397 	 */
398 	INSTR_ALU_SHL,    /* dst = MEF, src = MEF */
399 	INSTR_ALU_SHL_MH, /* dst = MEF, src = H */
400 	INSTR_ALU_SHL_HM, /* dst = H, src = MEF */
401 	INSTR_ALU_SHL_HH, /* dst = H, src = H */
402 	INSTR_ALU_SHL_MI, /* dst = MEF, src = I */
403 	INSTR_ALU_SHL_HI, /* dst = H, src = I */
404 
405 	/* shr dst src
406 	 * dst >>= src
407 	 * dst = HMEF, src = HMEFTI
408 	 */
409 	INSTR_ALU_SHR,    /* dst = MEF, src = MEF */
410 	INSTR_ALU_SHR_MH, /* dst = MEF, src = H */
411 	INSTR_ALU_SHR_HM, /* dst = H, src = MEF */
412 	INSTR_ALU_SHR_HH, /* dst = H, src = H */
413 	INSTR_ALU_SHR_MI, /* dst = MEF, src = I */
414 	INSTR_ALU_SHR_HI, /* dst = H, src = I */
415 
416 	/* regprefetch REGARRAY index
417 	 * prefetch REGARRAY[index]
418 	 * index = HMEFTI
419 	 */
420 	INSTR_REGPREFETCH_RH, /* index = H */
421 	INSTR_REGPREFETCH_RM, /* index = MEFT */
422 	INSTR_REGPREFETCH_RI, /* index = I */
423 
424 	/* regrd dst REGARRAY index
425 	 * dst = REGARRAY[index]
426 	 * dst = HMEF, index = HMEFTI
427 	 */
428 	INSTR_REGRD_HRH, /* dst = H, index = H */
429 	INSTR_REGRD_HRM, /* dst = H, index = MEFT */
430 	INSTR_REGRD_HRI, /* dst = H, index = I */
431 	INSTR_REGRD_MRH, /* dst = MEF, index = H */
432 	INSTR_REGRD_MRM, /* dst = MEF, index = MEFT */
433 	INSTR_REGRD_MRI, /* dst = MEF, index = I */
434 
435 	/* regwr REGARRAY index src
436 	 * REGARRAY[index] = src
437 	 * index = HMEFTI, src = HMEFTI
438 	 */
439 	INSTR_REGWR_RHH, /* index = H, src = H */
440 	INSTR_REGWR_RHM, /* index = H, src = MEFT */
441 	INSTR_REGWR_RHI, /* index = H, src = I */
442 	INSTR_REGWR_RMH, /* index = MEFT, src = H */
443 	INSTR_REGWR_RMM, /* index = MEFT, src = MEFT */
444 	INSTR_REGWR_RMI, /* index = MEFT, src = I */
445 	INSTR_REGWR_RIH, /* index = I, src = H */
446 	INSTR_REGWR_RIM, /* index = I, src = MEFT */
447 	INSTR_REGWR_RII, /* index = I, src = I */
448 
449 	/* regadd REGARRAY index src
450 	 * REGARRAY[index] += src
451 	 * index = HMEFTI, src = HMEFTI
452 	 */
453 	INSTR_REGADD_RHH, /* index = H, src = H */
454 	INSTR_REGADD_RHM, /* index = H, src = MEFT */
455 	INSTR_REGADD_RHI, /* index = H, src = I */
456 	INSTR_REGADD_RMH, /* index = MEFT, src = H */
457 	INSTR_REGADD_RMM, /* index = MEFT, src = MEFT */
458 	INSTR_REGADD_RMI, /* index = MEFT, src = I */
459 	INSTR_REGADD_RIH, /* index = I, src = H */
460 	INSTR_REGADD_RIM, /* index = I, src = MEFT */
461 	INSTR_REGADD_RII, /* index = I, src = I */
462 
463 	/* metprefetch METARRAY index
464 	 * prefetch METARRAY[index]
465 	 * index = HMEFTI
466 	 */
467 	INSTR_METPREFETCH_H, /* index = H */
468 	INSTR_METPREFETCH_M, /* index = MEFT */
469 	INSTR_METPREFETCH_I, /* index = I */
470 
471 	/* meter METARRAY index length color_in color_out
472 	 * color_out = meter(METARRAY[index], length, color_in)
473 	 * index = HMEFTI, length = HMEFT, color_in = MEFTI, color_out = MEF
474 	 */
475 	INSTR_METER_HHM, /* index = H, length = H, color_in = MEFT */
476 	INSTR_METER_HHI, /* index = H, length = H, color_in = I */
477 	INSTR_METER_HMM, /* index = H, length = MEFT, color_in = MEFT */
478 	INSTR_METER_HMI, /* index = H, length = MEFT, color_in = I */
479 	INSTR_METER_MHM, /* index = MEFT, length = H, color_in = MEFT */
480 	INSTR_METER_MHI, /* index = MEFT, length = H, color_in = I */
481 	INSTR_METER_MMM, /* index = MEFT, length = MEFT, color_in = MEFT */
482 	INSTR_METER_MMI, /* index = MEFT, length = MEFT, color_in = I */
483 	INSTR_METER_IHM, /* index = I, length = H, color_in = MEFT */
484 	INSTR_METER_IHI, /* index = I, length = H, color_in = I */
485 	INSTR_METER_IMM, /* index = I, length = MEFT, color_in = MEFT */
486 	INSTR_METER_IMI, /* index = I, length = MEFT, color_in = I */
487 
488 	/* table TABLE */
489 	INSTR_TABLE,
490 	INSTR_TABLE_AF,
491 	INSTR_SELECTOR,
492 	INSTR_LEARNER,
493 	INSTR_LEARNER_AF,
494 
495 	/* learn ACTION_NAME [ m.action_first_arg ] m.timeout_id */
496 	INSTR_LEARNER_LEARN,
497 
498 	/* rearm [ m.timeout_id ] */
499 	INSTR_LEARNER_REARM,
500 	INSTR_LEARNER_REARM_NEW,
501 
502 	/* forget */
503 	INSTR_LEARNER_FORGET,
504 
505 	/* extern e.obj.func */
506 	INSTR_EXTERN_OBJ,
507 
508 	/* extern f.func */
509 	INSTR_EXTERN_FUNC,
510 
511 	/* hash HASH_FUNC_NAME dst src_first src_last
512 	 * Compute hash value over range of struct fields.
513 	 * dst = M
514 	 * src_first = HMEFT
515 	 * src_last = HMEFT
516 	 * src_first and src_last must be fields within the same struct
517 	 */
518 	INSTR_HASH_FUNC,
519 
520 	/* jmp LABEL
521 	 * Unconditional jump
522 	 */
523 	INSTR_JMP,
524 
525 	/* jmpv LABEL h.header
526 	 * Jump if header is valid
527 	 */
528 	INSTR_JMP_VALID,
529 
530 	/* jmpnv LABEL h.header
531 	 * Jump if header is invalid
532 	 */
533 	INSTR_JMP_INVALID,
534 
535 	/* jmph LABEL
536 	 * Jump if table lookup hit
537 	 */
538 	INSTR_JMP_HIT,
539 
540 	/* jmpnh LABEL
541 	 * Jump if table lookup miss
542 	 */
543 	INSTR_JMP_MISS,
544 
545 	/* jmpa LABEL ACTION
546 	 * Jump if action run
547 	 */
548 	INSTR_JMP_ACTION_HIT,
549 
550 	/* jmpna LABEL ACTION
551 	 * Jump if action not run
552 	 */
553 	INSTR_JMP_ACTION_MISS,
554 
555 	/* jmpeq LABEL a b
556 	 * Jump if a is equal to b
557 	 * a = HMEFT, b = HMEFTI
558 	 */
559 	INSTR_JMP_EQ,    /* a = MEFT, b = MEFT */
560 	INSTR_JMP_EQ_MH, /* a = MEFT, b = H */
561 	INSTR_JMP_EQ_HM, /* a = H, b = MEFT */
562 	INSTR_JMP_EQ_HH, /* a = H, b = H */
563 	INSTR_JMP_EQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
564 
565 	/* jmpneq LABEL a b
566 	 * Jump if a is not equal to b
567 	 * a = HMEFT, b = HMEFTI
568 	 */
569 	INSTR_JMP_NEQ,    /* a = MEFT, b = MEFT */
570 	INSTR_JMP_NEQ_MH, /* a = MEFT, b = H */
571 	INSTR_JMP_NEQ_HM, /* a = H, b = MEFT */
572 	INSTR_JMP_NEQ_HH, /* a = H, b = H */
573 	INSTR_JMP_NEQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
574 
575 	/* jmplt LABEL a b
576 	 * Jump if a is less than b
577 	 * a = HMEFT, b = HMEFTI
578 	 */
579 	INSTR_JMP_LT,    /* a = MEFT, b = MEFT */
580 	INSTR_JMP_LT_MH, /* a = MEFT, b = H */
581 	INSTR_JMP_LT_HM, /* a = H, b = MEFT */
582 	INSTR_JMP_LT_HH, /* a = H, b = H */
583 	INSTR_JMP_LT_MI, /* a = MEFT, b = I */
584 	INSTR_JMP_LT_HI, /* a = H, b = I */
585 
586 	/* jmpgt LABEL a b
587 	 * Jump if a is greater than b
588 	 * a = HMEFT, b = HMEFTI
589 	 */
590 	INSTR_JMP_GT,    /* a = MEFT, b = MEFT */
591 	INSTR_JMP_GT_MH, /* a = MEFT, b = H */
592 	INSTR_JMP_GT_HM, /* a = H, b = MEFT */
593 	INSTR_JMP_GT_HH, /* a = H, b = H */
594 	INSTR_JMP_GT_MI, /* a = MEFT, b = I */
595 	INSTR_JMP_GT_HI, /* a = H, b = I */
596 
597 	/* return
598 	 * Return from action
599 	 */
600 	INSTR_RETURN,
601 
602 	/* Start of custom instructions. */
603 	INSTR_CUSTOM_0,
604 };
605 
606 struct instr_operand {
607 	uint8_t struct_id;
608 	uint8_t n_bits;
609 	uint8_t offset;
610 	uint8_t pad;
611 };
612 
613 struct instr_io {
614 	struct {
615 		union {
616 			struct {
617 				uint8_t offset;
618 				uint8_t n_bits;
619 				uint8_t pad[2];
620 			};
621 
622 			uint32_t val;
623 		};
624 	} io;
625 
626 	struct {
627 		uint8_t header_id[8];
628 		uint8_t struct_id[8];
629 		uint8_t n_bytes[8];
630 	} hdr;
631 };
632 
633 struct instr_hdr_validity {
634 	uint8_t header_id;
635 };
636 
637 struct instr_table {
638 	uint8_t table_id;
639 };
640 
641 struct instr_learn {
642 	uint8_t action_id;
643 	uint8_t mf_first_arg_offset;
644 	uint8_t mf_timeout_id_offset;
645 	uint8_t mf_timeout_id_n_bits;
646 };
647 
648 struct instr_extern_obj {
649 	uint8_t ext_obj_id;
650 	uint8_t func_id;
651 };
652 
653 struct instr_extern_func {
654 	uint8_t ext_func_id;
655 };
656 
657 struct instr_hash_func {
658 	uint8_t hash_func_id;
659 
660 	struct {
661 		uint8_t offset;
662 		uint8_t n_bits;
663 	} dst;
664 
665 	struct {
666 		uint8_t struct_id;
667 		uint16_t offset;
668 		uint16_t n_bytes;
669 	} src;
670 };
671 
672 struct instr_dst_src {
673 	struct instr_operand dst;
674 	union {
675 		struct instr_operand src;
676 		uint64_t src_val;
677 	};
678 };
679 
680 struct instr_regarray {
681 	uint8_t regarray_id;
682 	uint8_t pad[3];
683 
684 	union {
685 		struct instr_operand idx;
686 		uint32_t idx_val;
687 	};
688 
689 	union {
690 		struct instr_operand dstsrc;
691 		uint64_t dstsrc_val;
692 	};
693 };
694 
695 struct instr_meter {
696 	uint8_t metarray_id;
697 	uint8_t pad[3];
698 
699 	union {
700 		struct instr_operand idx;
701 		uint32_t idx_val;
702 	};
703 
704 	struct instr_operand length;
705 
706 	union {
707 		struct instr_operand color_in;
708 		uint32_t color_in_val;
709 	};
710 
711 	struct instr_operand color_out;
712 };
713 
714 struct instr_dma {
715 	struct {
716 		uint8_t header_id[8];
717 		uint8_t struct_id[8];
718 	} dst;
719 
720 	struct {
721 		uint8_t offset[8];
722 	} src;
723 
724 	uint16_t n_bytes[8];
725 };
726 
727 struct instr_jmp {
728 	struct instruction *ip;
729 
730 	union {
731 		struct instr_operand a;
732 		uint8_t header_id;
733 		uint8_t action_id;
734 	};
735 
736 	union {
737 		struct instr_operand b;
738 		uint64_t b_val;
739 	};
740 };
741 
742 struct instruction {
743 	enum instruction_type type;
744 	union {
745 		struct instr_io io;
746 		struct instr_dst_src mirror;
747 		struct instr_hdr_validity valid;
748 		struct instr_dst_src mov;
749 		struct instr_regarray regarray;
750 		struct instr_meter meter;
751 		struct instr_dma dma;
752 		struct instr_dst_src alu;
753 		struct instr_table table;
754 		struct instr_learn learn;
755 		struct instr_extern_obj ext_obj;
756 		struct instr_extern_func ext_func;
757 		struct instr_hash_func hash_func;
758 		struct instr_jmp jmp;
759 	};
760 };
761 
762 struct instruction_data {
763 	char label[RTE_SWX_NAME_SIZE];
764 	char jmp_label[RTE_SWX_NAME_SIZE];
765 	uint32_t n_users; /* user = jmp instruction to this instruction. */
766 	int invalid;
767 };
768 
769 typedef void (*instr_exec_t)(struct rte_swx_pipeline *);
770 
771 /*
772  * Action.
773  */
774 typedef void
775 (*action_func_t)(struct rte_swx_pipeline *p);
776 
777 struct action {
778 	TAILQ_ENTRY(action) node;
779 	char name[RTE_SWX_NAME_SIZE];
780 	struct struct_type *st;
781 	int *args_endianness; /* 0 = Host Byte Order (HBO); 1 = Network Byte Order (NBO). */
782 	struct instruction *instructions;
783 	struct instruction_data *instruction_data;
784 	uint32_t n_instructions;
785 	uint32_t id;
786 };
787 
788 TAILQ_HEAD(action_tailq, action);
789 
790 /*
791  * Table.
792  */
793 struct table_type {
794 	TAILQ_ENTRY(table_type) node;
795 	char name[RTE_SWX_NAME_SIZE];
796 	enum rte_swx_table_match_type match_type;
797 	struct rte_swx_table_ops ops;
798 };
799 
800 TAILQ_HEAD(table_type_tailq, table_type);
801 
802 struct match_field {
803 	enum rte_swx_table_match_type match_type;
804 	struct field *field;
805 };
806 
807 struct table {
808 	TAILQ_ENTRY(table) node;
809 	char name[RTE_SWX_NAME_SIZE];
810 	char args[RTE_SWX_NAME_SIZE];
811 	struct table_type *type; /* NULL when n_fields == 0. */
812 
813 	/* Match. */
814 	struct match_field *fields;
815 	uint32_t n_fields;
816 	struct header *header; /* Only valid when n_fields > 0. */
817 
818 	/* Action. */
819 	struct action **actions;
820 	struct action *default_action;
821 	uint8_t *default_action_data;
822 	uint32_t n_actions;
823 	int default_action_is_const;
824 	uint32_t action_data_size_max;
825 	int *action_is_for_table_entries;
826 	int *action_is_for_default_entry;
827 
828 	uint32_t size;
829 	uint32_t id;
830 };
831 
832 TAILQ_HEAD(table_tailq, table);
833 
834 struct table_runtime {
835 	rte_swx_table_lookup_t func;
836 	void *mailbox;
837 	uint8_t **key;
838 };
839 
840 struct table_statistics {
841 	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
842 	uint64_t *n_pkts_action;
843 };
844 
845 /*
846  * Selector.
847  */
848 struct selector {
849 	TAILQ_ENTRY(selector) node;
850 	char name[RTE_SWX_NAME_SIZE];
851 
852 	struct field *group_id_field;
853 	struct field **selector_fields;
854 	uint32_t n_selector_fields;
855 	struct header *selector_header;
856 	struct field *member_id_field;
857 
858 	uint32_t n_groups_max;
859 	uint32_t n_members_per_group_max;
860 
861 	uint32_t id;
862 };
863 
864 TAILQ_HEAD(selector_tailq, selector);
865 
866 struct selector_runtime {
867 	void *mailbox;
868 	uint8_t **group_id_buffer;
869 	uint8_t **selector_buffer;
870 	uint8_t **member_id_buffer;
871 };
872 
873 struct selector_statistics {
874 	uint64_t n_pkts;
875 };
876 
877 /*
878  * Learner table.
879  */
880 struct learner {
881 	TAILQ_ENTRY(learner) node;
882 	char name[RTE_SWX_NAME_SIZE];
883 
884 	/* Match. */
885 	struct field **fields;
886 	uint32_t n_fields;
887 	struct header *header;
888 
889 	/* Action. */
890 	struct action **actions;
891 	struct action *default_action;
892 	uint8_t *default_action_data;
893 	uint32_t n_actions;
894 	int default_action_is_const;
895 	uint32_t action_data_size_max;
896 	int *action_is_for_table_entries;
897 	int *action_is_for_default_entry;
898 
899 	uint32_t size;
900 	uint32_t timeout[RTE_SWX_TABLE_LEARNER_N_KEY_TIMEOUTS_MAX];
901 	uint32_t n_timeouts;
902 	uint32_t id;
903 };
904 
905 TAILQ_HEAD(learner_tailq, learner);
906 
907 struct learner_runtime {
908 	void *mailbox;
909 	uint8_t **key;
910 };
911 
912 struct learner_statistics {
913 	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
914 	uint64_t n_pkts_learn[2]; /* 0 = Learn OK, 1 = Learn error. */
915 	uint64_t n_pkts_rearm;
916 	uint64_t n_pkts_forget;
917 	uint64_t *n_pkts_action;
918 };
919 
920 /*
921  * Register array.
922  */
923 struct regarray {
924 	TAILQ_ENTRY(regarray) node;
925 	char name[RTE_SWX_NAME_SIZE];
926 	uint64_t init_val;
927 	uint32_t size;
928 	uint32_t id;
929 };
930 
931 TAILQ_HEAD(regarray_tailq, regarray);
932 
933 struct regarray_runtime {
934 	uint64_t *regarray;
935 	uint32_t size_mask;
936 };
937 
938 /*
939  * Meter array.
940  */
941 struct meter_profile {
942 	TAILQ_ENTRY(meter_profile) node;
943 	char name[RTE_SWX_NAME_SIZE];
944 	struct rte_meter_trtcm_params params;
945 	struct rte_meter_trtcm_profile profile;
946 	uint32_t n_users;
947 };
948 
949 TAILQ_HEAD(meter_profile_tailq, meter_profile);
950 
951 struct metarray {
952 	TAILQ_ENTRY(metarray) node;
953 	char name[RTE_SWX_NAME_SIZE];
954 	uint32_t size;
955 	uint32_t id;
956 };
957 
958 TAILQ_HEAD(metarray_tailq, metarray);
959 
960 struct meter {
961 	struct rte_meter_trtcm m;
962 	struct meter_profile *profile;
963 	enum rte_color color_mask;
964 	uint8_t pad[20];
965 
966 	uint64_t n_pkts[RTE_COLORS];
967 	uint64_t n_bytes[RTE_COLORS];
968 };
969 
970 struct metarray_runtime {
971 	struct meter *metarray;
972 	uint32_t size_mask;
973 };
974 
975 /*
976  * Pipeline.
977  */
978 struct thread {
979 	/* Packet. */
980 	struct rte_swx_pkt pkt;
981 	uint8_t *ptr;
982 	uint32_t *mirroring_slots;
983 	uint64_t mirroring_slots_mask;
984 	int recirculate;
985 	uint32_t recirc_pass_id;
986 
987 	/* Structures. */
988 	uint8_t **structs;
989 
990 	/* Packet headers. */
991 	struct header_runtime *headers; /* Extracted or generated headers. */
992 	struct header_out_runtime *headers_out; /* Emitted headers. */
993 	uint8_t *header_storage;
994 	uint8_t *header_out_storage;
995 	uint64_t valid_headers;
996 	uint32_t n_headers_out;
997 
998 	/* Packet meta-data. */
999 	uint8_t *metadata;
1000 
1001 	/* Tables. */
1002 	struct table_runtime *tables;
1003 	struct selector_runtime *selectors;
1004 	struct learner_runtime *learners;
1005 	struct rte_swx_table_state *table_state;
1006 	uint64_t action_id;
1007 	int hit; /* 0 = Miss, 1 = Hit. */
1008 	uint32_t learner_id;
1009 	uint64_t time;
1010 
1011 	/* Extern objects and functions. */
1012 	struct extern_obj_runtime *extern_objs;
1013 	struct extern_func_runtime *extern_funcs;
1014 
1015 	/* Instructions. */
1016 	struct instruction *ip;
1017 	struct instruction *ret;
1018 };
1019 
1020 #define MASK64_BIT_GET(mask, pos) ((mask) & (1LLU << (pos)))
1021 #define MASK64_BIT_SET(mask, pos) ((mask) | (1LLU << (pos)))
1022 #define MASK64_BIT_CLR(mask, pos) ((mask) & ~(1LLU << (pos)))
1023 
1024 #define HEADER_VALID(thread, header_id) \
1025 	MASK64_BIT_GET((thread)->valid_headers, header_id)
1026 
1027 static inline uint64_t
1028 instr_operand_hbo(struct thread *t, const struct instr_operand *x)
1029 {
1030 	uint8_t *x_struct = t->structs[x->struct_id];
1031 	uint64_t *x64_ptr = (uint64_t *)&x_struct[x->offset];
1032 	uint64_t x64 = *x64_ptr;
1033 	uint64_t x64_mask = UINT64_MAX >> (64 - x->n_bits);
1034 
1035 	return x64 & x64_mask;
1036 }
1037 
1038 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1039 
1040 static inline uint64_t
1041 instr_operand_nbo(struct thread *t, const struct instr_operand *x)
1042 {
1043 	uint8_t *x_struct = t->structs[x->struct_id];
1044 	uint64_t *x64_ptr = (uint64_t *)&x_struct[x->offset];
1045 	uint64_t x64 = *x64_ptr;
1046 
1047 	return ntoh64(x64) >> (64 - x->n_bits);
1048 }
1049 
1050 #else
1051 
1052 #define instr_operand_nbo instr_operand_hbo
1053 
1054 #endif
1055 
1056 #define ALU(thread, ip, operator)  \
1057 {                                                                              \
1058 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1059 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1060 	uint64_t dst64 = *dst64_ptr;                                           \
1061 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1062 	uint64_t dst = dst64 & dst64_mask;                                     \
1063 									       \
1064 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1065 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1066 	uint64_t src64 = *src64_ptr;                                           \
1067 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
1068 	uint64_t src = src64 & src64_mask;                                     \
1069 									       \
1070 	uint64_t result = dst operator src;                                    \
1071 									       \
1072 	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
1073 }
1074 
1075 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1076 
1077 #define ALU_MH(thread, ip, operator)  \
1078 {                                                                              \
1079 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1080 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1081 	uint64_t dst64 = *dst64_ptr;                                           \
1082 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1083 	uint64_t dst = dst64 & dst64_mask;                                     \
1084 									       \
1085 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1086 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1087 	uint64_t src64 = *src64_ptr;                                           \
1088 	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
1089 									       \
1090 	uint64_t result = dst operator src;                                    \
1091 									       \
1092 	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
1093 }
1094 
1095 #define ALU_HM(thread, ip, operator)  \
1096 {                                                                              \
1097 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1098 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1099 	uint64_t dst64 = *dst64_ptr;                                           \
1100 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1101 	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
1102 									       \
1103 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1104 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1105 	uint64_t src64 = *src64_ptr;                                           \
1106 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
1107 	uint64_t src = src64 & src64_mask;                                     \
1108 									       \
1109 	uint64_t result = dst operator src;                                    \
1110 	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
1111 									       \
1112 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
1113 }
1114 
1115 #define ALU_HM_FAST(thread, ip, operator)  \
1116 {                                                                                 \
1117 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];         \
1118 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];      \
1119 	uint64_t dst64 = *dst64_ptr;                                              \
1120 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);          \
1121 	uint64_t dst = dst64 & dst64_mask;                                        \
1122 										  \
1123 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];         \
1124 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];      \
1125 	uint64_t src64 = *src64_ptr;                                              \
1126 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);          \
1127 	uint64_t src = hton64(src64 & src64_mask) >> (64 - (ip)->alu.dst.n_bits); \
1128 										  \
1129 	uint64_t result = dst operator src;                                       \
1130 										  \
1131 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                              \
1132 }
1133 
1134 #define ALU_HH(thread, ip, operator)  \
1135 {                                                                              \
1136 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1137 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1138 	uint64_t dst64 = *dst64_ptr;                                           \
1139 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1140 	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
1141 									       \
1142 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1143 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1144 	uint64_t src64 = *src64_ptr;                                           \
1145 	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
1146 									       \
1147 	uint64_t result = dst operator src;                                    \
1148 	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
1149 									       \
1150 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
1151 }
1152 
1153 #define ALU_HH_FAST(thread, ip, operator)  \
1154 {                                                                                             \
1155 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];                     \
1156 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];                  \
1157 	uint64_t dst64 = *dst64_ptr;                                                          \
1158 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);                      \
1159 	uint64_t dst = dst64 & dst64_mask;                                                    \
1160 											      \
1161 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];                     \
1162 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];                  \
1163 	uint64_t src64 = *src64_ptr;                                                          \
1164 	uint64_t src = (src64 << (64 - (ip)->alu.src.n_bits)) >> (64 - (ip)->alu.dst.n_bits); \
1165 											      \
1166 	uint64_t result = dst operator src;                                                   \
1167 											      \
1168 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                                          \
1169 }
1170 
1171 #else
1172 
1173 #define ALU_MH ALU
1174 #define ALU_HM ALU
1175 #define ALU_HM_FAST ALU
1176 #define ALU_HH ALU
1177 #define ALU_HH_FAST ALU
1178 
1179 #endif
1180 
1181 #define ALU_I(thread, ip, operator)  \
1182 {                                                                              \
1183 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1184 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1185 	uint64_t dst64 = *dst64_ptr;                                           \
1186 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1187 	uint64_t dst = dst64 & dst64_mask;                                     \
1188 									       \
1189 	uint64_t src = (ip)->alu.src_val;                                      \
1190 									       \
1191 	uint64_t result = dst operator src;                                    \
1192 									       \
1193 	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
1194 }
1195 
1196 #define ALU_MI ALU_I
1197 
1198 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1199 
1200 #define ALU_HI(thread, ip, operator)  \
1201 {                                                                              \
1202 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1203 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1204 	uint64_t dst64 = *dst64_ptr;                                           \
1205 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1206 	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
1207 									       \
1208 	uint64_t src = (ip)->alu.src_val;                                      \
1209 									       \
1210 	uint64_t result = dst operator src;                                    \
1211 	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
1212 									       \
1213 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
1214 }
1215 
1216 #else
1217 
1218 #define ALU_HI ALU_I
1219 
1220 #endif
1221 
1222 #define MOV(thread, ip)  \
1223 {                                                                              \
1224 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1225 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1226 	uint64_t dst64 = *dst64_ptr;                                           \
1227 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1228 									       \
1229 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1230 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1231 	uint64_t src64 = *src64_ptr;                                           \
1232 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
1233 	uint64_t src = src64 & src64_mask;                                     \
1234 									       \
1235 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
1236 }
1237 
1238 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1239 
1240 #define MOV_MH(thread, ip)  \
1241 {                                                                              \
1242 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1243 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1244 	uint64_t dst64 = *dst64_ptr;                                           \
1245 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1246 									       \
1247 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1248 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1249 	uint64_t src64 = *src64_ptr;                                           \
1250 	uint64_t src = ntoh64(src64) >> (64 - (ip)->mov.src.n_bits);           \
1251 									       \
1252 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
1253 }
1254 
1255 #define MOV_HM(thread, ip)  \
1256 {                                                                              \
1257 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1258 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1259 	uint64_t dst64 = *dst64_ptr;                                           \
1260 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1261 									       \
1262 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1263 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1264 	uint64_t src64 = *src64_ptr;                                           \
1265 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
1266 	uint64_t src = src64 & src64_mask;                                     \
1267 									       \
1268 	src = hton64(src) >> (64 - (ip)->mov.dst.n_bits);                      \
1269 	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
1270 }
1271 
1272 #define MOV_HH(thread, ip)  \
1273 {                                                                              \
1274 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1275 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1276 	uint64_t dst64 = *dst64_ptr;                                           \
1277 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1278 									       \
1279 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1280 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1281 	uint64_t src64 = *src64_ptr;                                           \
1282 									       \
1283 	uint64_t src = src64 << (64 - (ip)->mov.src.n_bits);                   \
1284 	src = src >> (64 - (ip)->mov.dst.n_bits);                              \
1285 	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
1286 }
1287 
1288 #else
1289 
1290 #define MOV_MH MOV
1291 #define MOV_HM MOV
1292 #define MOV_HH MOV
1293 
1294 #endif
1295 
1296 #define MOV_I(thread, ip)  \
1297 {                                                                              \
1298 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1299 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1300 	uint64_t dst64 = *dst64_ptr;                                           \
1301 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1302 									       \
1303 	uint64_t src = (ip)->mov.src_val;                                      \
1304 									       \
1305 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
1306 }
1307 
1308 #define JMP_CMP(thread, ip, operator)  \
1309 {                                                                              \
1310 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1311 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1312 	uint64_t a64 = *a64_ptr;                                               \
1313 	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
1314 	uint64_t a = a64 & a64_mask;                                           \
1315 									       \
1316 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1317 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1318 	uint64_t b64 = *b64_ptr;                                               \
1319 	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
1320 	uint64_t b = b64 & b64_mask;                                           \
1321 									       \
1322 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1323 }
1324 
1325 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1326 
1327 #define JMP_CMP_MH(thread, ip, operator)  \
1328 {                                                                              \
1329 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1330 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1331 	uint64_t a64 = *a64_ptr;                                               \
1332 	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
1333 	uint64_t a = a64 & a64_mask;                                           \
1334 									       \
1335 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1336 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1337 	uint64_t b64 = *b64_ptr;                                               \
1338 	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
1339 									       \
1340 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1341 }
1342 
1343 #define JMP_CMP_HM(thread, ip, operator)  \
1344 {                                                                              \
1345 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1346 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1347 	uint64_t a64 = *a64_ptr;                                               \
1348 	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
1349 									       \
1350 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1351 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1352 	uint64_t b64 = *b64_ptr;                                               \
1353 	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
1354 	uint64_t b = b64 & b64_mask;                                           \
1355 									       \
1356 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1357 }
1358 
1359 #define JMP_CMP_HH(thread, ip, operator)  \
1360 {                                                                              \
1361 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1362 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1363 	uint64_t a64 = *a64_ptr;                                               \
1364 	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
1365 									       \
1366 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1367 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1368 	uint64_t b64 = *b64_ptr;                                               \
1369 	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
1370 									       \
1371 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1372 }
1373 
1374 #define JMP_CMP_HH_FAST(thread, ip, operator)  \
1375 {                                                                              \
1376 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1377 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1378 	uint64_t a64 = *a64_ptr;                                               \
1379 	uint64_t a = a64 << (64 - (ip)->jmp.a.n_bits);                         \
1380 									       \
1381 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1382 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1383 	uint64_t b64 = *b64_ptr;                                               \
1384 	uint64_t b = b64 << (64 - (ip)->jmp.b.n_bits);                         \
1385 									       \
1386 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1387 }
1388 
1389 #else
1390 
1391 #define JMP_CMP_MH JMP_CMP
1392 #define JMP_CMP_HM JMP_CMP
1393 #define JMP_CMP_HH JMP_CMP
1394 #define JMP_CMP_HH_FAST JMP_CMP
1395 
1396 #endif
1397 
1398 #define JMP_CMP_I(thread, ip, operator)  \
1399 {                                                                              \
1400 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1401 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1402 	uint64_t a64 = *a64_ptr;                                               \
1403 	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
1404 	uint64_t a = a64 & a64_mask;                                           \
1405 									       \
1406 	uint64_t b = (ip)->jmp.b_val;                                          \
1407 									       \
1408 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1409 }
1410 
1411 #define JMP_CMP_MI JMP_CMP_I
1412 
1413 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1414 
1415 #define JMP_CMP_HI(thread, ip, operator)  \
1416 {                                                                              \
1417 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1418 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1419 	uint64_t a64 = *a64_ptr;                                               \
1420 	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
1421 									       \
1422 	uint64_t b = (ip)->jmp.b_val;                                          \
1423 									       \
1424 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1425 }
1426 
1427 #else
1428 
1429 #define JMP_CMP_HI JMP_CMP_I
1430 
1431 #endif
1432 
1433 #define METADATA_READ(thread, offset, n_bits)                                  \
1434 ({                                                                             \
1435 	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
1436 	uint64_t m64 = *m64_ptr;                                               \
1437 	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
1438 	(m64 & m64_mask);                                                      \
1439 })
1440 
1441 #define METADATA_WRITE(thread, offset, n_bits, value)                          \
1442 {                                                                              \
1443 	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
1444 	uint64_t m64 = *m64_ptr;                                               \
1445 	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
1446 									       \
1447 	uint64_t m_new = value;                                                \
1448 									       \
1449 	*m64_ptr = (m64 & ~m64_mask) | (m_new & m64_mask);                     \
1450 }
1451 
1452 #ifndef RTE_SWX_PIPELINE_THREADS_MAX
1453 #define RTE_SWX_PIPELINE_THREADS_MAX 16
1454 #endif
1455 
1456 #ifndef RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX
1457 #define RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX 256
1458 #endif
1459 
1460 struct rte_swx_pipeline {
1461 	struct struct_type_tailq struct_types;
1462 	struct port_in_type_tailq port_in_types;
1463 	struct port_in_tailq ports_in;
1464 	struct port_out_type_tailq port_out_types;
1465 	struct port_out_tailq ports_out;
1466 	struct extern_type_tailq extern_types;
1467 	struct extern_obj_tailq extern_objs;
1468 	struct extern_func_tailq extern_funcs;
1469 	struct hash_func_tailq hash_funcs;
1470 	struct header_tailq headers;
1471 	struct struct_type *metadata_st;
1472 	uint32_t metadata_struct_id;
1473 	struct action_tailq actions;
1474 	struct table_type_tailq table_types;
1475 	struct table_tailq tables;
1476 	struct selector_tailq selectors;
1477 	struct learner_tailq learners;
1478 	struct regarray_tailq regarrays;
1479 	struct meter_profile_tailq meter_profiles;
1480 	struct metarray_tailq metarrays;
1481 
1482 	struct port_in_runtime *in;
1483 	struct port_out_runtime *out;
1484 	struct mirroring_session *mirroring_sessions;
1485 	struct instruction **action_instructions;
1486 	action_func_t *action_funcs;
1487 	struct rte_swx_table_state *table_state;
1488 	struct table_statistics *table_stats;
1489 	struct selector_statistics *selector_stats;
1490 	struct learner_statistics *learner_stats;
1491 	struct hash_func_runtime *hash_func_runtime;
1492 	struct regarray_runtime *regarray_runtime;
1493 	struct metarray_runtime *metarray_runtime;
1494 	struct instruction *instructions;
1495 	struct instruction_data *instruction_data;
1496 	instr_exec_t *instruction_table;
1497 	struct thread threads[RTE_SWX_PIPELINE_THREADS_MAX];
1498 	void *lib;
1499 
1500 	uint32_t n_structs;
1501 	uint32_t n_ports_in;
1502 	uint32_t n_ports_out;
1503 	uint32_t n_mirroring_slots;
1504 	uint32_t n_mirroring_sessions;
1505 	uint32_t n_extern_objs;
1506 	uint32_t n_extern_funcs;
1507 	uint32_t n_hash_funcs;
1508 	uint32_t n_actions;
1509 	uint32_t n_tables;
1510 	uint32_t n_selectors;
1511 	uint32_t n_learners;
1512 	uint32_t n_regarrays;
1513 	uint32_t n_metarrays;
1514 	uint32_t n_headers;
1515 	uint32_t thread_id;
1516 	uint32_t port_id;
1517 	uint32_t n_instructions;
1518 	int build_done;
1519 	int numa_node;
1520 };
1521 
1522 /*
1523  * Instruction.
1524  */
1525 static inline void
1526 pipeline_port_inc(struct rte_swx_pipeline *p)
1527 {
1528 	p->port_id = (p->port_id + 1) & (p->n_ports_in - 1);
1529 }
1530 
1531 static inline void
1532 thread_ip_reset(struct rte_swx_pipeline *p, struct thread *t)
1533 {
1534 	t->ip = p->instructions;
1535 }
1536 
1537 static inline void
1538 thread_ip_set(struct thread *t, struct instruction *ip)
1539 {
1540 	t->ip = ip;
1541 }
1542 
1543 static inline void
1544 thread_ip_action_call(struct rte_swx_pipeline *p,
1545 		      struct thread *t,
1546 		      uint32_t action_id)
1547 {
1548 	t->ret = t->ip + 1;
1549 	t->ip = p->action_instructions[action_id];
1550 }
1551 
1552 static inline void
1553 thread_ip_inc(struct rte_swx_pipeline *p);
1554 
1555 static inline void
1556 thread_ip_inc(struct rte_swx_pipeline *p)
1557 {
1558 	struct thread *t = &p->threads[p->thread_id];
1559 
1560 	t->ip++;
1561 }
1562 
1563 static inline void
1564 thread_ip_inc_cond(struct thread *t, int cond)
1565 {
1566 	t->ip += cond;
1567 }
1568 
1569 static inline void
1570 thread_yield(struct rte_swx_pipeline *p)
1571 {
1572 	p->thread_id = (p->thread_id + 1) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
1573 }
1574 
1575 static inline void
1576 thread_yield_cond(struct rte_swx_pipeline *p, int cond)
1577 {
1578 	p->thread_id = (p->thread_id + cond) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
1579 }
1580 
1581 /*
1582  * rx.
1583  */
1584 static inline int
1585 __instr_rx_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
1586 {
1587 	struct port_in_runtime *port = &p->in[p->port_id];
1588 	struct rte_swx_pkt *pkt = &t->pkt;
1589 	int pkt_received;
1590 
1591 	/* Recirculation: keep the current packet. */
1592 	if (t->recirculate) {
1593 		TRACE("[Thread %2u] rx - recirculate (pass %u)\n",
1594 		      p->thread_id,
1595 		      t->recirc_pass_id + 1);
1596 
1597 		/* Packet. */
1598 		t->ptr = &pkt->pkt[pkt->offset];
1599 		t->mirroring_slots_mask = 0;
1600 		t->recirculate = 0;
1601 		t->recirc_pass_id++;
1602 
1603 		/* Headers. */
1604 		t->valid_headers = 0;
1605 		t->n_headers_out = 0;
1606 
1607 		/* Tables. */
1608 		t->table_state = p->table_state;
1609 
1610 		return 1;
1611 	}
1612 
1613 	/* Packet. */
1614 	pkt_received = port->pkt_rx(port->obj, pkt);
1615 	t->ptr = &pkt->pkt[pkt->offset];
1616 	rte_prefetch0(t->ptr);
1617 
1618 	TRACE("[Thread %2u] rx %s from port %u\n",
1619 	      p->thread_id,
1620 	      pkt_received ? "1 pkt" : "0 pkts",
1621 	      p->port_id);
1622 
1623 	t->mirroring_slots_mask = 0;
1624 	t->recirc_pass_id = 0;
1625 
1626 	/* Headers. */
1627 	t->valid_headers = 0;
1628 	t->n_headers_out = 0;
1629 
1630 	/* Meta-data. */
1631 	METADATA_WRITE(t, ip->io.io.offset, ip->io.io.n_bits, p->port_id);
1632 
1633 	/* Tables. */
1634 	t->table_state = p->table_state;
1635 
1636 	/* Thread. */
1637 	pipeline_port_inc(p);
1638 
1639 	return pkt_received;
1640 }
1641 
1642 static inline void
1643 instr_rx_exec(struct rte_swx_pipeline *p)
1644 {
1645 	struct thread *t = &p->threads[p->thread_id];
1646 	struct instruction *ip = t->ip;
1647 	int pkt_received;
1648 
1649 	/* Packet. */
1650 	pkt_received = __instr_rx_exec(p, t, ip);
1651 
1652 	/* Thread. */
1653 	thread_ip_inc_cond(t, pkt_received);
1654 	thread_yield(p);
1655 }
1656 
1657 /*
1658  * tx.
1659  */
1660 static inline void
1661 emit_handler(struct thread *t)
1662 {
1663 	struct header_out_runtime *h0 = &t->headers_out[0];
1664 	struct header_out_runtime *h1 = &t->headers_out[1];
1665 	uint32_t offset = 0, i;
1666 
1667 	/* No header change or header decapsulation. */
1668 	if ((t->n_headers_out == 1) &&
1669 	    (h0->ptr + h0->n_bytes == t->ptr)) {
1670 		TRACE("Emit handler: no header change or header decap.\n");
1671 
1672 		t->pkt.offset -= h0->n_bytes;
1673 		t->pkt.length += h0->n_bytes;
1674 
1675 		return;
1676 	}
1677 
1678 	/* Header encapsulation (optionally, with prior header decapsulation). */
1679 	if ((t->n_headers_out == 2) &&
1680 	    (h1->ptr + h1->n_bytes == t->ptr) &&
1681 	    (h0->ptr == h0->ptr0)) {
1682 		uint32_t offset;
1683 
1684 		TRACE("Emit handler: header encapsulation.\n");
1685 
1686 		offset = h0->n_bytes + h1->n_bytes;
1687 		memcpy(t->ptr - offset, h0->ptr, h0->n_bytes);
1688 		t->pkt.offset -= offset;
1689 		t->pkt.length += offset;
1690 
1691 		return;
1692 	}
1693 
1694 	/* For any other case. */
1695 	TRACE("Emit handler: complex case.\n");
1696 
1697 	for (i = 0; i < t->n_headers_out; i++) {
1698 		struct header_out_runtime *h = &t->headers_out[i];
1699 
1700 		memcpy(&t->header_out_storage[offset], h->ptr, h->n_bytes);
1701 		offset += h->n_bytes;
1702 	}
1703 
1704 	if (offset) {
1705 		memcpy(t->ptr - offset, t->header_out_storage, offset);
1706 		t->pkt.offset -= offset;
1707 		t->pkt.length += offset;
1708 	}
1709 }
1710 
1711 static inline void
1712 mirroring_handler(struct rte_swx_pipeline *p, struct thread *t, struct rte_swx_pkt *pkt)
1713 {
1714 	uint64_t slots_mask = t->mirroring_slots_mask, slot_mask;
1715 	uint32_t slot_id;
1716 
1717 	for (slot_id = 0, slot_mask = 1LLU ; slots_mask; slot_id++, slot_mask <<= 1)
1718 		if (slot_mask & slots_mask) {
1719 			struct port_out_runtime *port;
1720 			struct mirroring_session *session;
1721 			uint32_t port_id, session_id;
1722 
1723 			session_id = t->mirroring_slots[slot_id];
1724 			session = &p->mirroring_sessions[session_id];
1725 
1726 			port_id = session->port_id;
1727 			port = &p->out[port_id];
1728 
1729 			if (session->fast_clone)
1730 				port->pkt_fast_clone_tx(port->obj, pkt);
1731 			else
1732 				port->pkt_clone_tx(port->obj, pkt, session->truncation_length);
1733 
1734 			slots_mask &= ~slot_mask;
1735 		}
1736 }
1737 
1738 static inline void
1739 __instr_tx_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
1740 {
1741 	uint64_t port_id = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
1742 	struct port_out_runtime *port = &p->out[port_id];
1743 	struct rte_swx_pkt *pkt = &t->pkt;
1744 
1745 	/* Recirculation: keep the current packet. */
1746 	if (t->recirculate) {
1747 		TRACE("[Thread %2u]: tx 1 pkt - recirculate\n",
1748 		      p->thread_id);
1749 
1750 		/* Headers. */
1751 		emit_handler(t);
1752 
1753 		/* Packet. */
1754 		mirroring_handler(p, t, pkt);
1755 
1756 		return;
1757 	}
1758 
1759 	TRACE("[Thread %2u]: tx 1 pkt to port %u\n",
1760 	      p->thread_id,
1761 	      (uint32_t)port_id);
1762 
1763 	/* Headers. */
1764 	emit_handler(t);
1765 
1766 	/* Packet. */
1767 	mirroring_handler(p, t, pkt);
1768 	port->pkt_tx(port->obj, pkt);
1769 }
1770 
1771 static inline void
1772 __instr_tx_i_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
1773 {
1774 	uint64_t port_id = ip->io.io.val;
1775 	struct port_out_runtime *port = &p->out[port_id];
1776 	struct rte_swx_pkt *pkt = &t->pkt;
1777 
1778 	/* Recirculation: keep the current packet. */
1779 	if (t->recirculate) {
1780 		TRACE("[Thread %2u]: tx (i) 1 pkt - recirculate\n",
1781 		      p->thread_id);
1782 
1783 		/* Headers. */
1784 		emit_handler(t);
1785 
1786 		/* Packet. */
1787 		mirroring_handler(p, t, pkt);
1788 
1789 		return;
1790 	}
1791 
1792 	TRACE("[Thread %2u]: tx (i) 1 pkt to port %u\n",
1793 	      p->thread_id,
1794 	      (uint32_t)port_id);
1795 
1796 	/* Headers. */
1797 	emit_handler(t);
1798 
1799 	/* Packet. */
1800 	mirroring_handler(p, t, pkt);
1801 	port->pkt_tx(port->obj, pkt);
1802 }
1803 
1804 static inline void
1805 __instr_drop_exec(struct rte_swx_pipeline *p,
1806 		  struct thread *t,
1807 		  const struct instruction *ip __rte_unused)
1808 {
1809 	uint64_t port_id = p->n_ports_out - 1;
1810 	struct port_out_runtime *port = &p->out[port_id];
1811 	struct rte_swx_pkt *pkt = &t->pkt;
1812 
1813 	TRACE("[Thread %2u]: drop 1 pkt\n",
1814 	      p->thread_id);
1815 
1816 	/* Headers. */
1817 	emit_handler(t);
1818 
1819 	/* Packet. */
1820 	mirroring_handler(p, t, pkt);
1821 	port->pkt_tx(port->obj, pkt);
1822 }
1823 
1824 static inline void
1825 __instr_mirror_exec(struct rte_swx_pipeline *p,
1826 		    struct thread *t,
1827 		    const struct instruction *ip)
1828 {
1829 	uint64_t slot_id = instr_operand_hbo(t, &ip->mirror.dst);
1830 	uint64_t session_id = instr_operand_hbo(t, &ip->mirror.src);
1831 
1832 	slot_id &= p->n_mirroring_slots - 1;
1833 	session_id &= p->n_mirroring_sessions - 1;
1834 
1835 	TRACE("[Thread %2u]: mirror pkt (slot = %u, session = %u)\n",
1836 	      p->thread_id,
1837 	      (uint32_t)slot_id,
1838 	      (uint32_t)session_id);
1839 
1840 	t->mirroring_slots[slot_id] = session_id;
1841 	t->mirroring_slots_mask |= 1LLU << slot_id;
1842 }
1843 
1844 static inline void
1845 __instr_recirculate_exec(struct rte_swx_pipeline *p __rte_unused,
1846 			 struct thread *t,
1847 			 const struct instruction *ip __rte_unused)
1848 {
1849 	TRACE("[Thread %2u]: recirculate\n",
1850 	      p->thread_id);
1851 
1852 	t->recirculate = 1;
1853 }
1854 
1855 static inline void
1856 __instr_recircid_exec(struct rte_swx_pipeline *p __rte_unused,
1857 		      struct thread *t,
1858 		      const struct instruction *ip)
1859 {
1860 	TRACE("[Thread %2u]: recircid (pass %u)\n",
1861 	      p->thread_id,
1862 	      t->recirc_pass_id);
1863 
1864 	/* Meta-data. */
1865 	METADATA_WRITE(t, ip->io.io.offset, ip->io.io.n_bits, t->recirc_pass_id);
1866 }
1867 
1868 /*
1869  * extract.
1870  */
1871 static inline void
1872 __instr_hdr_extract_many_exec(struct rte_swx_pipeline *p __rte_unused,
1873 			      struct thread *t,
1874 			      const struct instruction *ip,
1875 			      uint32_t n_extract)
1876 {
1877 	uint64_t valid_headers = t->valid_headers;
1878 	uint8_t *ptr = t->ptr;
1879 	uint32_t offset = t->pkt.offset;
1880 	uint32_t length = t->pkt.length;
1881 	uint32_t i;
1882 
1883 	for (i = 0; i < n_extract; i++) {
1884 		uint32_t header_id = ip->io.hdr.header_id[i];
1885 		uint32_t struct_id = ip->io.hdr.struct_id[i];
1886 		uint32_t n_bytes = ip->io.hdr.n_bytes[i];
1887 
1888 		TRACE("[Thread %2u]: extract header %u (%u bytes)\n",
1889 		      p->thread_id,
1890 		      header_id,
1891 		      n_bytes);
1892 
1893 		/* Headers. */
1894 		t->structs[struct_id] = ptr;
1895 		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
1896 
1897 		/* Packet. */
1898 		offset += n_bytes;
1899 		length -= n_bytes;
1900 		ptr += n_bytes;
1901 	}
1902 
1903 	/* Headers. */
1904 	t->valid_headers = valid_headers;
1905 
1906 	/* Packet. */
1907 	t->pkt.offset = offset;
1908 	t->pkt.length = length;
1909 	t->ptr = ptr;
1910 }
1911 
1912 static inline void
1913 __instr_hdr_extract_exec(struct rte_swx_pipeline *p,
1914 			 struct thread *t,
1915 			 const struct instruction *ip)
1916 {
1917 	__instr_hdr_extract_many_exec(p, t, ip, 1);
1918 }
1919 
1920 static inline void
1921 __instr_hdr_extract2_exec(struct rte_swx_pipeline *p,
1922 			  struct thread *t,
1923 			  const struct instruction *ip)
1924 {
1925 	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
1926 
1927 	__instr_hdr_extract_many_exec(p, t, ip, 2);
1928 }
1929 
1930 static inline void
1931 __instr_hdr_extract3_exec(struct rte_swx_pipeline *p,
1932 			  struct thread *t,
1933 			  const struct instruction *ip)
1934 {
1935 	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
1936 
1937 	__instr_hdr_extract_many_exec(p, t, ip, 3);
1938 }
1939 
1940 static inline void
1941 __instr_hdr_extract4_exec(struct rte_swx_pipeline *p,
1942 			  struct thread *t,
1943 			  const struct instruction *ip)
1944 {
1945 	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
1946 
1947 	__instr_hdr_extract_many_exec(p, t, ip, 4);
1948 }
1949 
1950 static inline void
1951 __instr_hdr_extract5_exec(struct rte_swx_pipeline *p,
1952 			  struct thread *t,
1953 			  const struct instruction *ip)
1954 {
1955 	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
1956 
1957 	__instr_hdr_extract_many_exec(p, t, ip, 5);
1958 }
1959 
1960 static inline void
1961 __instr_hdr_extract6_exec(struct rte_swx_pipeline *p,
1962 			  struct thread *t,
1963 			  const struct instruction *ip)
1964 {
1965 	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
1966 
1967 	__instr_hdr_extract_many_exec(p, t, ip, 6);
1968 }
1969 
1970 static inline void
1971 __instr_hdr_extract7_exec(struct rte_swx_pipeline *p,
1972 			  struct thread *t,
1973 			  const struct instruction *ip)
1974 {
1975 	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
1976 
1977 	__instr_hdr_extract_many_exec(p, t, ip, 7);
1978 }
1979 
1980 static inline void
1981 __instr_hdr_extract8_exec(struct rte_swx_pipeline *p,
1982 			  struct thread *t,
1983 			  const struct instruction *ip)
1984 {
1985 	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
1986 
1987 	__instr_hdr_extract_many_exec(p, t, ip, 8);
1988 }
1989 
1990 static inline void
1991 __instr_hdr_extract_m_exec(struct rte_swx_pipeline *p __rte_unused,
1992 			   struct thread *t,
1993 			   const struct instruction *ip)
1994 {
1995 	uint64_t valid_headers = t->valid_headers;
1996 	uint8_t *ptr = t->ptr;
1997 	uint32_t offset = t->pkt.offset;
1998 	uint32_t length = t->pkt.length;
1999 
2000 	uint32_t n_bytes_last = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
2001 	uint32_t header_id = ip->io.hdr.header_id[0];
2002 	uint32_t struct_id = ip->io.hdr.struct_id[0];
2003 	uint32_t n_bytes = ip->io.hdr.n_bytes[0];
2004 
2005 	struct header_runtime *h = &t->headers[header_id];
2006 
2007 	TRACE("[Thread %2u]: extract header %u (%u + %u bytes)\n",
2008 	      p->thread_id,
2009 	      header_id,
2010 	      n_bytes,
2011 	      n_bytes_last);
2012 
2013 	n_bytes += n_bytes_last;
2014 
2015 	/* Headers. */
2016 	t->structs[struct_id] = ptr;
2017 	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
2018 	h->n_bytes = n_bytes;
2019 
2020 	/* Packet. */
2021 	t->pkt.offset = offset + n_bytes;
2022 	t->pkt.length = length - n_bytes;
2023 	t->ptr = ptr + n_bytes;
2024 }
2025 
2026 static inline void
2027 __instr_hdr_lookahead_exec(struct rte_swx_pipeline *p __rte_unused,
2028 			   struct thread *t,
2029 			   const struct instruction *ip)
2030 {
2031 	uint64_t valid_headers = t->valid_headers;
2032 	uint8_t *ptr = t->ptr;
2033 
2034 	uint32_t header_id = ip->io.hdr.header_id[0];
2035 	uint32_t struct_id = ip->io.hdr.struct_id[0];
2036 
2037 	TRACE("[Thread %2u]: lookahead header %u\n",
2038 	      p->thread_id,
2039 	      header_id);
2040 
2041 	/* Headers. */
2042 	t->structs[struct_id] = ptr;
2043 	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
2044 }
2045 
2046 /*
2047  * emit.
2048  */
2049 static inline void
2050 __instr_hdr_emit_many_exec(struct rte_swx_pipeline *p __rte_unused,
2051 			   struct thread *t,
2052 			   const struct instruction *ip,
2053 			   uint32_t n_emit)
2054 {
2055 	uint64_t valid_headers = t->valid_headers;
2056 	uint32_t n_headers_out = t->n_headers_out;
2057 	struct header_out_runtime *ho = NULL;
2058 	uint8_t *ho_ptr = NULL;
2059 	uint32_t ho_nbytes = 0, i;
2060 
2061 	for (i = 0; i < n_emit; i++) {
2062 		uint32_t header_id = ip->io.hdr.header_id[i];
2063 		uint32_t struct_id = ip->io.hdr.struct_id[i];
2064 
2065 		struct header_runtime *hi = &t->headers[header_id];
2066 		uint8_t *hi_ptr0 = hi->ptr0;
2067 		uint32_t n_bytes = hi->n_bytes;
2068 
2069 		uint8_t *hi_ptr = t->structs[struct_id];
2070 
2071 		if (!MASK64_BIT_GET(valid_headers, header_id)) {
2072 			TRACE("[Thread %2u]: emit header %u (invalid)\n",
2073 			      p->thread_id,
2074 			      header_id);
2075 
2076 			continue;
2077 		}
2078 
2079 		TRACE("[Thread %2u]: emit header %u (valid)\n",
2080 		      p->thread_id,
2081 		      header_id);
2082 
2083 		/* Headers. */
2084 		if (!ho) {
2085 			if (!n_headers_out) {
2086 				ho = &t->headers_out[0];
2087 
2088 				ho->ptr0 = hi_ptr0;
2089 				ho->ptr = hi_ptr;
2090 
2091 				ho_ptr = hi_ptr;
2092 				ho_nbytes = n_bytes;
2093 
2094 				n_headers_out = 1;
2095 
2096 				continue;
2097 			} else {
2098 				ho = &t->headers_out[n_headers_out - 1];
2099 
2100 				ho_ptr = ho->ptr;
2101 				ho_nbytes = ho->n_bytes;
2102 			}
2103 		}
2104 
2105 		if (ho_ptr + ho_nbytes == hi_ptr) {
2106 			ho_nbytes += n_bytes;
2107 		} else {
2108 			ho->n_bytes = ho_nbytes;
2109 
2110 			ho++;
2111 			ho->ptr0 = hi_ptr0;
2112 			ho->ptr = hi_ptr;
2113 
2114 			ho_ptr = hi_ptr;
2115 			ho_nbytes = n_bytes;
2116 
2117 			n_headers_out++;
2118 		}
2119 	}
2120 
2121 	if (ho)
2122 		ho->n_bytes = ho_nbytes;
2123 	t->n_headers_out = n_headers_out;
2124 }
2125 
2126 static inline void
2127 __instr_hdr_emit_exec(struct rte_swx_pipeline *p,
2128 		      struct thread *t,
2129 		      const struct instruction *ip)
2130 {
2131 	__instr_hdr_emit_many_exec(p, t, ip, 1);
2132 }
2133 
2134 static inline void
2135 __instr_hdr_emit_tx_exec(struct rte_swx_pipeline *p,
2136 			 struct thread *t,
2137 			 const struct instruction *ip)
2138 {
2139 	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
2140 
2141 	__instr_hdr_emit_many_exec(p, t, ip, 1);
2142 	__instr_tx_exec(p, t, ip);
2143 }
2144 
2145 static inline void
2146 __instr_hdr_emit2_tx_exec(struct rte_swx_pipeline *p,
2147 			  struct thread *t,
2148 			  const struct instruction *ip)
2149 {
2150 	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
2151 
2152 	__instr_hdr_emit_many_exec(p, t, ip, 2);
2153 	__instr_tx_exec(p, t, ip);
2154 }
2155 
2156 static inline void
2157 __instr_hdr_emit3_tx_exec(struct rte_swx_pipeline *p,
2158 			  struct thread *t,
2159 			  const struct instruction *ip)
2160 {
2161 	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
2162 
2163 	__instr_hdr_emit_many_exec(p, t, ip, 3);
2164 	__instr_tx_exec(p, t, ip);
2165 }
2166 
2167 static inline void
2168 __instr_hdr_emit4_tx_exec(struct rte_swx_pipeline *p,
2169 			  struct thread *t,
2170 			  const struct instruction *ip)
2171 {
2172 	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
2173 
2174 	__instr_hdr_emit_many_exec(p, t, ip, 4);
2175 	__instr_tx_exec(p, t, ip);
2176 }
2177 
2178 static inline void
2179 __instr_hdr_emit5_tx_exec(struct rte_swx_pipeline *p,
2180 			  struct thread *t,
2181 			  const struct instruction *ip)
2182 {
2183 	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
2184 
2185 	__instr_hdr_emit_many_exec(p, t, ip, 5);
2186 	__instr_tx_exec(p, t, ip);
2187 }
2188 
2189 static inline void
2190 __instr_hdr_emit6_tx_exec(struct rte_swx_pipeline *p,
2191 			  struct thread *t,
2192 			  const struct instruction *ip)
2193 {
2194 	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
2195 
2196 	__instr_hdr_emit_many_exec(p, t, ip, 6);
2197 	__instr_tx_exec(p, t, ip);
2198 }
2199 
2200 static inline void
2201 __instr_hdr_emit7_tx_exec(struct rte_swx_pipeline *p,
2202 			  struct thread *t,
2203 			  const struct instruction *ip)
2204 {
2205 	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
2206 
2207 	__instr_hdr_emit_many_exec(p, t, ip, 7);
2208 	__instr_tx_exec(p, t, ip);
2209 }
2210 
2211 static inline void
2212 __instr_hdr_emit8_tx_exec(struct rte_swx_pipeline *p,
2213 			  struct thread *t,
2214 			  const struct instruction *ip)
2215 {
2216 	TRACE("[Thread %2u] *** The next 9 instructions are fused. ***\n", p->thread_id);
2217 
2218 	__instr_hdr_emit_many_exec(p, t, ip, 8);
2219 	__instr_tx_exec(p, t, ip);
2220 }
2221 
2222 /*
2223  * validate.
2224  */
2225 static inline void
2226 __instr_hdr_validate_exec(struct rte_swx_pipeline *p __rte_unused,
2227 			  struct thread *t,
2228 			  const struct instruction *ip)
2229 {
2230 	uint32_t header_id = ip->valid.header_id;
2231 
2232 	TRACE("[Thread %2u] validate header %u\n", p->thread_id, header_id);
2233 
2234 	/* Headers. */
2235 	t->valid_headers = MASK64_BIT_SET(t->valid_headers, header_id);
2236 }
2237 
2238 /*
2239  * invalidate.
2240  */
2241 static inline void
2242 __instr_hdr_invalidate_exec(struct rte_swx_pipeline *p __rte_unused,
2243 			    struct thread *t,
2244 			    const struct instruction *ip)
2245 {
2246 	uint32_t header_id = ip->valid.header_id;
2247 
2248 	TRACE("[Thread %2u] invalidate header %u\n", p->thread_id, header_id);
2249 
2250 	/* Headers. */
2251 	t->valid_headers = MASK64_BIT_CLR(t->valid_headers, header_id);
2252 }
2253 
2254 /*
2255  * learn.
2256  */
2257 static inline void
2258 __instr_learn_exec(struct rte_swx_pipeline *p,
2259 		   struct thread *t,
2260 		   const struct instruction *ip)
2261 {
2262 	uint64_t action_id = ip->learn.action_id;
2263 	uint32_t mf_first_arg_offset = ip->learn.mf_first_arg_offset;
2264 	uint32_t timeout_id = METADATA_READ(t, ip->learn.mf_timeout_id_offset,
2265 		ip->learn.mf_timeout_id_n_bits);
2266 	uint32_t learner_id = t->learner_id;
2267 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2268 		p->n_selectors + learner_id];
2269 	struct learner_runtime *l = &t->learners[learner_id];
2270 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2271 	uint32_t status;
2272 
2273 	/* Table. */
2274 	status = rte_swx_table_learner_add(ts->obj,
2275 					   l->mailbox,
2276 					   t->time,
2277 					   action_id,
2278 					   &t->metadata[mf_first_arg_offset],
2279 					   timeout_id);
2280 
2281 	TRACE("[Thread %2u] learner %u learn %s\n",
2282 	      p->thread_id,
2283 	      learner_id,
2284 	      status ? "ok" : "error");
2285 
2286 	stats->n_pkts_learn[status] += 1;
2287 }
2288 
2289 /*
2290  * rearm.
2291  */
2292 static inline void
2293 __instr_rearm_exec(struct rte_swx_pipeline *p,
2294 		   struct thread *t,
2295 		   const struct instruction *ip __rte_unused)
2296 {
2297 	uint32_t learner_id = t->learner_id;
2298 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2299 		p->n_selectors + learner_id];
2300 	struct learner_runtime *l = &t->learners[learner_id];
2301 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2302 
2303 	/* Table. */
2304 	rte_swx_table_learner_rearm(ts->obj, l->mailbox, t->time);
2305 
2306 	TRACE("[Thread %2u] learner %u rearm\n",
2307 	      p->thread_id,
2308 	      learner_id);
2309 
2310 	stats->n_pkts_rearm += 1;
2311 }
2312 
2313 static inline void
2314 __instr_rearm_new_exec(struct rte_swx_pipeline *p,
2315 		       struct thread *t,
2316 		       const struct instruction *ip)
2317 {
2318 	uint32_t timeout_id = METADATA_READ(t, ip->learn.mf_timeout_id_offset,
2319 		ip->learn.mf_timeout_id_n_bits);
2320 	uint32_t learner_id = t->learner_id;
2321 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2322 		p->n_selectors + learner_id];
2323 	struct learner_runtime *l = &t->learners[learner_id];
2324 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2325 
2326 	/* Table. */
2327 	rte_swx_table_learner_rearm_new(ts->obj, l->mailbox, t->time, timeout_id);
2328 
2329 	TRACE("[Thread %2u] learner %u rearm with timeout ID %u\n",
2330 	      p->thread_id,
2331 	      learner_id,
2332 	      timeout_id);
2333 
2334 	stats->n_pkts_rearm += 1;
2335 }
2336 
2337 /*
2338  * forget.
2339  */
2340 static inline void
2341 __instr_forget_exec(struct rte_swx_pipeline *p,
2342 		    struct thread *t,
2343 		    const struct instruction *ip __rte_unused)
2344 {
2345 	uint32_t learner_id = t->learner_id;
2346 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2347 		p->n_selectors + learner_id];
2348 	struct learner_runtime *l = &t->learners[learner_id];
2349 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2350 
2351 	/* Table. */
2352 	rte_swx_table_learner_delete(ts->obj, l->mailbox);
2353 
2354 	TRACE("[Thread %2u] learner %u forget\n",
2355 	      p->thread_id,
2356 	      learner_id);
2357 
2358 	stats->n_pkts_forget += 1;
2359 }
2360 
2361 /*
2362  * extern.
2363  */
2364 static inline uint32_t
2365 __instr_extern_obj_exec(struct rte_swx_pipeline *p __rte_unused,
2366 			struct thread *t,
2367 			const struct instruction *ip)
2368 {
2369 	uint32_t obj_id = ip->ext_obj.ext_obj_id;
2370 	uint32_t func_id = ip->ext_obj.func_id;
2371 	struct extern_obj_runtime *obj = &t->extern_objs[obj_id];
2372 	rte_swx_extern_type_member_func_t func = obj->funcs[func_id];
2373 	uint32_t done;
2374 
2375 	TRACE("[Thread %2u] extern obj %u member func %u\n",
2376 	      p->thread_id,
2377 	      obj_id,
2378 	      func_id);
2379 
2380 	done = func(obj->obj, obj->mailbox);
2381 
2382 	return done;
2383 }
2384 
2385 static inline uint32_t
2386 __instr_extern_func_exec(struct rte_swx_pipeline *p __rte_unused,
2387 			 struct thread *t,
2388 			 const struct instruction *ip)
2389 {
2390 	uint32_t ext_func_id = ip->ext_func.ext_func_id;
2391 	struct extern_func_runtime *ext_func = &t->extern_funcs[ext_func_id];
2392 	rte_swx_extern_func_t func = ext_func->func;
2393 	uint32_t done;
2394 
2395 	TRACE("[Thread %2u] extern func %u\n",
2396 	      p->thread_id,
2397 	      ext_func_id);
2398 
2399 	done = func(ext_func->mailbox);
2400 
2401 	return done;
2402 }
2403 
2404 /*
2405  * hash.
2406  */
2407 static inline void
2408 __instr_hash_func_exec(struct rte_swx_pipeline *p,
2409 		       struct thread *t,
2410 		       const struct instruction *ip)
2411 {
2412 	uint32_t hash_func_id = ip->hash_func.hash_func_id;
2413 	uint32_t dst_offset = ip->hash_func.dst.offset;
2414 	uint32_t n_dst_bits = ip->hash_func.dst.n_bits;
2415 	uint32_t src_struct_id = ip->hash_func.src.struct_id;
2416 	uint32_t src_offset = ip->hash_func.src.offset;
2417 	uint32_t n_src_bytes = ip->hash_func.src.n_bytes;
2418 
2419 	struct hash_func_runtime *func = &p->hash_func_runtime[hash_func_id];
2420 	uint8_t *src_ptr = t->structs[src_struct_id];
2421 	uint32_t result;
2422 
2423 	TRACE("[Thread %2u] hash %u\n",
2424 	      p->thread_id,
2425 	      hash_func_id);
2426 
2427 	result = func->func(&src_ptr[src_offset], n_src_bytes, 0);
2428 	METADATA_WRITE(t, dst_offset, n_dst_bits, result);
2429 }
2430 
2431 /*
2432  * mov.
2433  */
2434 static inline void
2435 __instr_mov_exec(struct rte_swx_pipeline *p __rte_unused,
2436 		 struct thread *t,
2437 		 const struct instruction *ip)
2438 {
2439 	TRACE("[Thread %2u] mov\n", p->thread_id);
2440 
2441 	MOV(t, ip);
2442 }
2443 
2444 static inline void
2445 __instr_mov_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2446 		    struct thread *t,
2447 		    const struct instruction *ip)
2448 {
2449 	TRACE("[Thread %2u] mov (mh)\n", p->thread_id);
2450 
2451 	MOV_MH(t, ip);
2452 }
2453 
2454 static inline void
2455 __instr_mov_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2456 		    struct thread *t,
2457 		    const struct instruction *ip)
2458 {
2459 	TRACE("[Thread %2u] mov (hm)\n", p->thread_id);
2460 
2461 	MOV_HM(t, ip);
2462 }
2463 
2464 static inline void
2465 __instr_mov_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2466 		    struct thread *t,
2467 		    const struct instruction *ip)
2468 {
2469 	TRACE("[Thread %2u] mov (hh)\n", p->thread_id);
2470 
2471 	MOV_HH(t, ip);
2472 }
2473 
2474 static inline void
2475 __instr_mov_i_exec(struct rte_swx_pipeline *p __rte_unused,
2476 		   struct thread *t,
2477 		   const struct instruction *ip)
2478 {
2479 	TRACE("[Thread %2u] mov m.f %" PRIx64 "\n", p->thread_id, ip->mov.src_val);
2480 
2481 	MOV_I(t, ip);
2482 }
2483 
2484 /*
2485  * dma.
2486  */
2487 static inline void
2488 __instr_dma_ht_many_exec(struct rte_swx_pipeline *p __rte_unused,
2489 			 struct thread *t,
2490 			 const struct instruction *ip,
2491 			 uint32_t n_dma)
2492 {
2493 	uint8_t *action_data = t->structs[0];
2494 	uint64_t valid_headers = t->valid_headers;
2495 	uint32_t i;
2496 
2497 	for (i = 0; i < n_dma; i++) {
2498 		uint32_t header_id = ip->dma.dst.header_id[i];
2499 		uint32_t struct_id = ip->dma.dst.struct_id[i];
2500 		uint32_t offset = ip->dma.src.offset[i];
2501 		uint32_t n_bytes = ip->dma.n_bytes[i];
2502 
2503 		struct header_runtime *h = &t->headers[header_id];
2504 		uint8_t *h_ptr0 = h->ptr0;
2505 		uint8_t *h_ptr = t->structs[struct_id];
2506 
2507 		void *dst = MASK64_BIT_GET(valid_headers, header_id) ?
2508 			h_ptr : h_ptr0;
2509 		void *src = &action_data[offset];
2510 
2511 		TRACE("[Thread %2u] dma h.s t.f\n", p->thread_id);
2512 
2513 		/* Headers. */
2514 		memcpy(dst, src, n_bytes);
2515 		t->structs[struct_id] = dst;
2516 		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
2517 	}
2518 
2519 	t->valid_headers = valid_headers;
2520 }
2521 
2522 static inline void
2523 __instr_dma_ht_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2524 {
2525 	__instr_dma_ht_many_exec(p, t, ip, 1);
2526 }
2527 
2528 static inline void
2529 __instr_dma_ht2_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2530 {
2531 	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
2532 
2533 	__instr_dma_ht_many_exec(p, t, ip, 2);
2534 }
2535 
2536 static inline void
2537 __instr_dma_ht3_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2538 {
2539 	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
2540 
2541 	__instr_dma_ht_many_exec(p, t, ip, 3);
2542 }
2543 
2544 static inline void
2545 __instr_dma_ht4_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2546 {
2547 	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
2548 
2549 	__instr_dma_ht_many_exec(p, t, ip, 4);
2550 }
2551 
2552 static inline void
2553 __instr_dma_ht5_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2554 {
2555 	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
2556 
2557 	__instr_dma_ht_many_exec(p, t, ip, 5);
2558 }
2559 
2560 static inline void
2561 __instr_dma_ht6_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2562 {
2563 	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
2564 
2565 	__instr_dma_ht_many_exec(p, t, ip, 6);
2566 }
2567 
2568 static inline void
2569 __instr_dma_ht7_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2570 {
2571 	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
2572 
2573 	__instr_dma_ht_many_exec(p, t, ip, 7);
2574 }
2575 
2576 static inline void
2577 __instr_dma_ht8_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2578 {
2579 	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
2580 
2581 	__instr_dma_ht_many_exec(p, t, ip, 8);
2582 }
2583 
2584 /*
2585  * alu.
2586  */
2587 static inline void
2588 __instr_alu_add_exec(struct rte_swx_pipeline *p __rte_unused,
2589 		     struct thread *t,
2590 		     const struct instruction *ip)
2591 {
2592 	TRACE("[Thread %2u] add\n", p->thread_id);
2593 
2594 	ALU(t, ip, +);
2595 }
2596 
2597 static inline void
2598 __instr_alu_add_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2599 			struct thread *t,
2600 			const struct instruction *ip)
2601 {
2602 	TRACE("[Thread %2u] add (mh)\n", p->thread_id);
2603 
2604 	ALU_MH(t, ip, +);
2605 }
2606 
2607 static inline void
2608 __instr_alu_add_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2609 			struct thread *t,
2610 			const struct instruction *ip)
2611 {
2612 	TRACE("[Thread %2u] add (hm)\n", p->thread_id);
2613 
2614 	ALU_HM(t, ip, +);
2615 }
2616 
2617 static inline void
2618 __instr_alu_add_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2619 			struct thread *t,
2620 			const struct instruction *ip)
2621 {
2622 	TRACE("[Thread %2u] add (hh)\n", p->thread_id);
2623 
2624 	ALU_HH(t, ip, +);
2625 }
2626 
2627 static inline void
2628 __instr_alu_add_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2629 			struct thread *t,
2630 			const struct instruction *ip)
2631 {
2632 	TRACE("[Thread %2u] add (mi)\n", p->thread_id);
2633 
2634 	ALU_MI(t, ip, +);
2635 }
2636 
2637 static inline void
2638 __instr_alu_add_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2639 			struct thread *t,
2640 			const struct instruction *ip)
2641 {
2642 	TRACE("[Thread %2u] add (hi)\n", p->thread_id);
2643 
2644 	ALU_HI(t, ip, +);
2645 }
2646 
2647 static inline void
2648 __instr_alu_sub_exec(struct rte_swx_pipeline *p __rte_unused,
2649 		     struct thread *t,
2650 		     const struct instruction *ip)
2651 {
2652 	TRACE("[Thread %2u] sub\n", p->thread_id);
2653 
2654 	ALU(t, ip, -);
2655 }
2656 
2657 static inline void
2658 __instr_alu_sub_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2659 			struct thread *t,
2660 			const struct instruction *ip)
2661 {
2662 	TRACE("[Thread %2u] sub (mh)\n", p->thread_id);
2663 
2664 	ALU_MH(t, ip, -);
2665 }
2666 
2667 static inline void
2668 __instr_alu_sub_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2669 			struct thread *t,
2670 			const struct instruction *ip)
2671 {
2672 	TRACE("[Thread %2u] sub (hm)\n", p->thread_id);
2673 
2674 	ALU_HM(t, ip, -);
2675 }
2676 
2677 static inline void
2678 __instr_alu_sub_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2679 			struct thread *t,
2680 			const struct instruction *ip)
2681 {
2682 	TRACE("[Thread %2u] sub (hh)\n", p->thread_id);
2683 
2684 	ALU_HH(t, ip, -);
2685 }
2686 
2687 static inline void
2688 __instr_alu_sub_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2689 			struct thread *t,
2690 			const struct instruction *ip)
2691 {
2692 	TRACE("[Thread %2u] sub (mi)\n", p->thread_id);
2693 
2694 	ALU_MI(t, ip, -);
2695 }
2696 
2697 static inline void
2698 __instr_alu_sub_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2699 			struct thread *t,
2700 			const struct instruction *ip)
2701 {
2702 	TRACE("[Thread %2u] sub (hi)\n", p->thread_id);
2703 
2704 	ALU_HI(t, ip, -);
2705 }
2706 
2707 static inline void
2708 __instr_alu_shl_exec(struct rte_swx_pipeline *p __rte_unused,
2709 		     struct thread *t,
2710 		     const struct instruction *ip)
2711 {
2712 	TRACE("[Thread %2u] shl\n", p->thread_id);
2713 
2714 	ALU(t, ip, <<);
2715 }
2716 
2717 static inline void
2718 __instr_alu_shl_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2719 			struct thread *t,
2720 			const struct instruction *ip)
2721 {
2722 	TRACE("[Thread %2u] shl (mh)\n", p->thread_id);
2723 
2724 	ALU_MH(t, ip, <<);
2725 }
2726 
2727 static inline void
2728 __instr_alu_shl_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2729 			struct thread *t,
2730 			const struct instruction *ip)
2731 {
2732 	TRACE("[Thread %2u] shl (hm)\n", p->thread_id);
2733 
2734 	ALU_HM(t, ip, <<);
2735 }
2736 
2737 static inline void
2738 __instr_alu_shl_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2739 			struct thread *t,
2740 			const struct instruction *ip)
2741 {
2742 	TRACE("[Thread %2u] shl (hh)\n", p->thread_id);
2743 
2744 	ALU_HH(t, ip, <<);
2745 }
2746 
2747 static inline void
2748 __instr_alu_shl_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2749 			struct thread *t,
2750 			const struct instruction *ip)
2751 {
2752 	TRACE("[Thread %2u] shl (mi)\n", p->thread_id);
2753 
2754 	ALU_MI(t, ip, <<);
2755 }
2756 
2757 static inline void
2758 __instr_alu_shl_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2759 			struct thread *t,
2760 			const struct instruction *ip)
2761 {
2762 	TRACE("[Thread %2u] shl (hi)\n", p->thread_id);
2763 
2764 	ALU_HI(t, ip, <<);
2765 }
2766 
2767 static inline void
2768 __instr_alu_shr_exec(struct rte_swx_pipeline *p __rte_unused,
2769 		     struct thread *t,
2770 		     const struct instruction *ip)
2771 {
2772 	TRACE("[Thread %2u] shr\n", p->thread_id);
2773 
2774 	ALU(t, ip, >>);
2775 }
2776 
2777 static inline void
2778 __instr_alu_shr_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2779 			struct thread *t,
2780 			const struct instruction *ip)
2781 {
2782 	TRACE("[Thread %2u] shr (mh)\n", p->thread_id);
2783 
2784 	ALU_MH(t, ip, >>);
2785 }
2786 
2787 static inline void
2788 __instr_alu_shr_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2789 			struct thread *t,
2790 			const struct instruction *ip)
2791 {
2792 	TRACE("[Thread %2u] shr (hm)\n", p->thread_id);
2793 
2794 	ALU_HM(t, ip, >>);
2795 }
2796 
2797 static inline void
2798 __instr_alu_shr_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2799 			struct thread *t,
2800 			const struct instruction *ip)
2801 {
2802 	TRACE("[Thread %2u] shr (hh)\n", p->thread_id);
2803 
2804 	ALU_HH(t, ip, >>);
2805 }
2806 
2807 static inline void
2808 __instr_alu_shr_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2809 			struct thread *t,
2810 			const struct instruction *ip)
2811 {
2812 	TRACE("[Thread %2u] shr (mi)\n", p->thread_id);
2813 
2814 	/* Structs. */
2815 	ALU_MI(t, ip, >>);
2816 }
2817 
2818 static inline void
2819 __instr_alu_shr_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2820 			struct thread *t,
2821 			const struct instruction *ip)
2822 {
2823 	TRACE("[Thread %2u] shr (hi)\n", p->thread_id);
2824 
2825 	ALU_HI(t, ip, >>);
2826 }
2827 
2828 static inline void
2829 __instr_alu_and_exec(struct rte_swx_pipeline *p __rte_unused,
2830 		     struct thread *t,
2831 		     const struct instruction *ip)
2832 {
2833 	TRACE("[Thread %2u] and\n", p->thread_id);
2834 
2835 	ALU(t, ip, &);
2836 }
2837 
2838 static inline void
2839 __instr_alu_and_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2840 			struct thread *t,
2841 			const struct instruction *ip)
2842 {
2843 	TRACE("[Thread %2u] and (mh)\n", p->thread_id);
2844 
2845 	ALU_MH(t, ip, &);
2846 }
2847 
2848 static inline void
2849 __instr_alu_and_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2850 			struct thread *t,
2851 			const struct instruction *ip)
2852 {
2853 	TRACE("[Thread %2u] and (hm)\n", p->thread_id);
2854 
2855 	ALU_HM_FAST(t, ip, &);
2856 }
2857 
2858 static inline void
2859 __instr_alu_and_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2860 			struct thread *t,
2861 			const struct instruction *ip)
2862 {
2863 	TRACE("[Thread %2u] and (hh)\n", p->thread_id);
2864 
2865 	ALU_HH_FAST(t, ip, &);
2866 }
2867 
2868 static inline void
2869 __instr_alu_and_i_exec(struct rte_swx_pipeline *p __rte_unused,
2870 		       struct thread *t,
2871 		       const struct instruction *ip)
2872 {
2873 	TRACE("[Thread %2u] and (i)\n", p->thread_id);
2874 
2875 	ALU_I(t, ip, &);
2876 }
2877 
2878 static inline void
2879 __instr_alu_or_exec(struct rte_swx_pipeline *p __rte_unused,
2880 		    struct thread *t,
2881 		    const struct instruction *ip)
2882 {
2883 	TRACE("[Thread %2u] or\n", p->thread_id);
2884 
2885 	ALU(t, ip, |);
2886 }
2887 
2888 static inline void
2889 __instr_alu_or_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2890 		       struct thread *t,
2891 		       const struct instruction *ip)
2892 {
2893 	TRACE("[Thread %2u] or (mh)\n", p->thread_id);
2894 
2895 	ALU_MH(t, ip, |);
2896 }
2897 
2898 static inline void
2899 __instr_alu_or_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2900 		       struct thread *t,
2901 		       const struct instruction *ip)
2902 {
2903 	TRACE("[Thread %2u] or (hm)\n", p->thread_id);
2904 
2905 	ALU_HM_FAST(t, ip, |);
2906 }
2907 
2908 static inline void
2909 __instr_alu_or_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2910 		       struct thread *t,
2911 		       const struct instruction *ip)
2912 {
2913 	TRACE("[Thread %2u] or (hh)\n", p->thread_id);
2914 
2915 	ALU_HH_FAST(t, ip, |);
2916 }
2917 
2918 static inline void
2919 __instr_alu_or_i_exec(struct rte_swx_pipeline *p __rte_unused,
2920 		      struct thread *t,
2921 		      const struct instruction *ip)
2922 {
2923 	TRACE("[Thread %2u] or (i)\n", p->thread_id);
2924 
2925 	ALU_I(t, ip, |);
2926 }
2927 
2928 static inline void
2929 __instr_alu_xor_exec(struct rte_swx_pipeline *p __rte_unused,
2930 		     struct thread *t,
2931 		     const struct instruction *ip)
2932 {
2933 	TRACE("[Thread %2u] xor\n", p->thread_id);
2934 
2935 	ALU(t, ip, ^);
2936 }
2937 
2938 static inline void
2939 __instr_alu_xor_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2940 			struct thread *t,
2941 			const struct instruction *ip)
2942 {
2943 	TRACE("[Thread %2u] xor (mh)\n", p->thread_id);
2944 
2945 	ALU_MH(t, ip, ^);
2946 }
2947 
2948 static inline void
2949 __instr_alu_xor_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2950 			struct thread *t,
2951 			const struct instruction *ip)
2952 {
2953 	TRACE("[Thread %2u] xor (hm)\n", p->thread_id);
2954 
2955 	ALU_HM_FAST(t, ip, ^);
2956 }
2957 
2958 static inline void
2959 __instr_alu_xor_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2960 			struct thread *t,
2961 			const struct instruction *ip)
2962 {
2963 	TRACE("[Thread %2u] xor (hh)\n", p->thread_id);
2964 
2965 	ALU_HH_FAST(t, ip, ^);
2966 }
2967 
2968 static inline void
2969 __instr_alu_xor_i_exec(struct rte_swx_pipeline *p __rte_unused,
2970 		       struct thread *t,
2971 		       const struct instruction *ip)
2972 {
2973 	TRACE("[Thread %2u] xor (i)\n", p->thread_id);
2974 
2975 	ALU_I(t, ip, ^);
2976 }
2977 
2978 static inline void
2979 __instr_alu_ckadd_field_exec(struct rte_swx_pipeline *p __rte_unused,
2980 			     struct thread *t,
2981 			     const struct instruction *ip)
2982 {
2983 	uint8_t *dst_struct, *src_struct;
2984 	uint16_t *dst16_ptr, dst;
2985 	uint64_t *src64_ptr, src64, src64_mask, src;
2986 	uint64_t r;
2987 
2988 	TRACE("[Thread %2u] ckadd (field)\n", p->thread_id);
2989 
2990 	/* Structs. */
2991 	dst_struct = t->structs[ip->alu.dst.struct_id];
2992 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
2993 	dst = *dst16_ptr;
2994 
2995 	src_struct = t->structs[ip->alu.src.struct_id];
2996 	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
2997 	src64 = *src64_ptr;
2998 	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
2999 	src = src64 & src64_mask;
3000 
3001 	/* Initialize the result with destination 1's complement. */
3002 	r = dst;
3003 	r = ~r & 0xFFFF;
3004 
3005 	/* The first input (r) is a 16-bit number. The second and the third
3006 	 * inputs are 32-bit numbers. In the worst case scenario, the sum of the
3007 	 * three numbers (output r) is a 34-bit number.
3008 	 */
3009 	r += (src >> 32) + (src & 0xFFFFFFFF);
3010 
3011 	/* The first input is a 16-bit number. The second input is an 18-bit
3012 	 * number. In the worst case scenario, the sum of the two numbers is a
3013 	 * 19-bit number.
3014 	 */
3015 	r = (r & 0xFFFF) + (r >> 16);
3016 
3017 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
3018 	 * a 3-bit number (0 .. 7). Their sum is a 17-bit number (0 .. 0x10006).
3019 	 */
3020 	r = (r & 0xFFFF) + (r >> 16);
3021 
3022 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
3023 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
3024 	 * 0x10006), the output r is (0 .. 7). So no carry bit can be generated,
3025 	 * therefore the output r is always a 16-bit number.
3026 	 */
3027 	r = (r & 0xFFFF) + (r >> 16);
3028 
3029 	/* Apply 1's complement to the result. */
3030 	r = ~r & 0xFFFF;
3031 	r = r ? r : 0xFFFF;
3032 
3033 	*dst16_ptr = (uint16_t)r;
3034 }
3035 
3036 static inline void
3037 __instr_alu_cksub_field_exec(struct rte_swx_pipeline *p __rte_unused,
3038 			     struct thread *t,
3039 			     const struct instruction *ip)
3040 {
3041 	uint8_t *dst_struct, *src_struct;
3042 	uint16_t *dst16_ptr, dst;
3043 	uint64_t *src64_ptr, src64, src64_mask, src;
3044 	uint64_t r;
3045 
3046 	TRACE("[Thread %2u] cksub (field)\n", p->thread_id);
3047 
3048 	/* Structs. */
3049 	dst_struct = t->structs[ip->alu.dst.struct_id];
3050 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
3051 	dst = *dst16_ptr;
3052 
3053 	src_struct = t->structs[ip->alu.src.struct_id];
3054 	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
3055 	src64 = *src64_ptr;
3056 	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
3057 	src = src64 & src64_mask;
3058 
3059 	/* Initialize the result with destination 1's complement. */
3060 	r = dst;
3061 	r = ~r & 0xFFFF;
3062 
3063 	/* Subtraction in 1's complement arithmetic (i.e. a '- b) is the same as
3064 	 * the following sequence of operations in 2's complement arithmetic:
3065 	 *    a '- b = (a - b) % 0xFFFF.
3066 	 *
3067 	 * In order to prevent an underflow for the below subtraction, in which
3068 	 * a 33-bit number (the subtrahend) is taken out of a 16-bit number (the
3069 	 * minuend), we first add a multiple of the 0xFFFF modulus to the
3070 	 * minuend. The number we add to the minuend needs to be a 34-bit number
3071 	 * or higher, so for readability reasons we picked the 36-bit multiple.
3072 	 * We are effectively turning the 16-bit minuend into a 36-bit number:
3073 	 *    (a - b) % 0xFFFF = (a + 0xFFFF00000 - b) % 0xFFFF.
3074 	 */
3075 	r += 0xFFFF00000ULL; /* The output r is a 36-bit number. */
3076 
3077 	/* A 33-bit number is subtracted from a 36-bit number (the input r). The
3078 	 * result (the output r) is a 36-bit number.
3079 	 */
3080 	r -= (src >> 32) + (src & 0xFFFFFFFF);
3081 
3082 	/* The first input is a 16-bit number. The second input is a 20-bit
3083 	 * number. Their sum is a 21-bit number.
3084 	 */
3085 	r = (r & 0xFFFF) + (r >> 16);
3086 
3087 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
3088 	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1001E).
3089 	 */
3090 	r = (r & 0xFFFF) + (r >> 16);
3091 
3092 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
3093 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
3094 	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
3095 	 * generated, therefore the output r is always a 16-bit number.
3096 	 */
3097 	r = (r & 0xFFFF) + (r >> 16);
3098 
3099 	/* Apply 1's complement to the result. */
3100 	r = ~r & 0xFFFF;
3101 	r = r ? r : 0xFFFF;
3102 
3103 	*dst16_ptr = (uint16_t)r;
3104 }
3105 
3106 static inline void
3107 __instr_alu_ckadd_struct20_exec(struct rte_swx_pipeline *p __rte_unused,
3108 				struct thread *t,
3109 				const struct instruction *ip)
3110 {
3111 	uint8_t *dst_struct, *src_struct;
3112 	uint16_t *dst16_ptr, dst;
3113 	uint32_t *src32_ptr;
3114 	uint64_t r0, r1;
3115 
3116 	TRACE("[Thread %2u] ckadd (struct of 20 bytes)\n", p->thread_id);
3117 
3118 	/* Structs. */
3119 	dst_struct = t->structs[ip->alu.dst.struct_id];
3120 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
3121 	dst = *dst16_ptr;
3122 
3123 	src_struct = t->structs[ip->alu.src.struct_id];
3124 	src32_ptr = (uint32_t *)&src_struct[0];
3125 
3126 	/* Initialize the result with destination 1's complement. */
3127 	r0 = dst;
3128 	r0 = ~r0 & 0xFFFF;
3129 
3130 	r0 += src32_ptr[0]; /* The output r0 is a 33-bit number. */
3131 	r1 = src32_ptr[1]; /* r1 is a 32-bit number. */
3132 	r0 += src32_ptr[2]; /* The output r0 is a 34-bit number. */
3133 	r1 += src32_ptr[3]; /* The output r1 is a 33-bit number. */
3134 	r0 += r1 + src32_ptr[4]; /* The output r0 is a 35-bit number. */
3135 
3136 	/* The first input is a 16-bit number. The second input is a 19-bit
3137 	 * number. Their sum is a 20-bit number.
3138 	 */
3139 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
3140 
3141 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
3142 	 * a 4-bit number (0 .. 15). The sum is a 17-bit number (0 .. 0x1000E).
3143 	 */
3144 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
3145 
3146 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
3147 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
3148 	 * 0x1000E), the output r is (0 .. 15). So no carry bit can be
3149 	 * generated, therefore the output r is always a 16-bit number.
3150 	 */
3151 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
3152 
3153 	/* Apply 1's complement to the result. */
3154 	r0 = ~r0 & 0xFFFF;
3155 	r0 = r0 ? r0 : 0xFFFF;
3156 
3157 	*dst16_ptr = (uint16_t)r0;
3158 }
3159 
3160 static inline void
3161 __instr_alu_ckadd_struct_exec(struct rte_swx_pipeline *p __rte_unused,
3162 			      struct thread *t,
3163 			      const struct instruction *ip)
3164 {
3165 	uint32_t src_header_id = ip->alu.src.n_bits; /* The src header ID is stored here. */
3166 	uint32_t n_src_header_bytes = t->headers[src_header_id].n_bytes;
3167 	uint8_t *dst_struct, *src_struct;
3168 	uint16_t *dst16_ptr, dst;
3169 	uint32_t *src32_ptr;
3170 	uint64_t r;
3171 	uint32_t i;
3172 
3173 	if (n_src_header_bytes == 20) {
3174 		__instr_alu_ckadd_struct20_exec(p, t, ip);
3175 		return;
3176 	}
3177 
3178 	TRACE("[Thread %2u] ckadd (struct)\n", p->thread_id);
3179 
3180 	/* Structs. */
3181 	dst_struct = t->structs[ip->alu.dst.struct_id];
3182 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
3183 	dst = *dst16_ptr;
3184 
3185 	src_struct = t->structs[ip->alu.src.struct_id];
3186 	src32_ptr = (uint32_t *)&src_struct[0];
3187 
3188 	/* Initialize the result with destination 1's complement. */
3189 	r = dst;
3190 	r = ~r & 0xFFFF;
3191 
3192 	/* The max number of 32-bit words in a 32K-byte header is 2^13.
3193 	 * Therefore, in the worst case scenario, a 45-bit number is added to a
3194 	 * 16-bit number (the input r), so the output r is 46-bit number.
3195 	 */
3196 	for (i = 0; i < n_src_header_bytes / 4; i++, src32_ptr++)
3197 		r += *src32_ptr;
3198 
3199 	/* The first input is a 16-bit number. The second input is a 30-bit
3200 	 * number. Their sum is a 31-bit number.
3201 	 */
3202 	r = (r & 0xFFFF) + (r >> 16);
3203 
3204 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
3205 	 * a 15-bit number (0 .. 0x7FFF). The sum is a 17-bit number (0 .. 0x17FFE).
3206 	 */
3207 	r = (r & 0xFFFF) + (r >> 16);
3208 
3209 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
3210 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
3211 	 * 0x17FFE), the output r is (0 .. 0x7FFF). So no carry bit can be
3212 	 * generated, therefore the output r is always a 16-bit number.
3213 	 */
3214 	r = (r & 0xFFFF) + (r >> 16);
3215 
3216 	/* Apply 1's complement to the result. */
3217 	r = ~r & 0xFFFF;
3218 	r = r ? r : 0xFFFF;
3219 
3220 	*dst16_ptr = (uint16_t)r;
3221 }
3222 
3223 /*
3224  * Register array.
3225  */
3226 static inline uint64_t *
3227 instr_regarray_regarray(struct rte_swx_pipeline *p, const struct instruction *ip)
3228 {
3229 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
3230 	return r->regarray;
3231 }
3232 
3233 static inline uint64_t
3234 instr_regarray_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3235 {
3236 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
3237 
3238 	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
3239 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
3240 	uint64_t idx64 = *idx64_ptr;
3241 	uint64_t idx64_mask = UINT64_MAX >> (64 - ip->regarray.idx.n_bits);
3242 	uint64_t idx = idx64 & idx64_mask & r->size_mask;
3243 
3244 	return idx;
3245 }
3246 
3247 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3248 
3249 static inline uint64_t
3250 instr_regarray_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3251 {
3252 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
3253 
3254 	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
3255 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
3256 	uint64_t idx64 = *idx64_ptr;
3257 	uint64_t idx = (ntoh64(idx64) >> (64 - ip->regarray.idx.n_bits)) & r->size_mask;
3258 
3259 	return idx;
3260 }
3261 
3262 #else
3263 
3264 #define instr_regarray_idx_nbo instr_regarray_idx_hbo
3265 
3266 #endif
3267 
3268 static inline uint64_t
3269 instr_regarray_idx_imm(struct rte_swx_pipeline *p, const struct instruction *ip)
3270 {
3271 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
3272 
3273 	uint64_t idx = ip->regarray.idx_val & r->size_mask;
3274 
3275 	return idx;
3276 }
3277 
3278 static inline uint64_t
3279 instr_regarray_src_hbo(struct thread *t, const struct instruction *ip)
3280 {
3281 	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
3282 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
3283 	uint64_t src64 = *src64_ptr;
3284 	uint64_t src64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
3285 	uint64_t src = src64 & src64_mask;
3286 
3287 	return src;
3288 }
3289 
3290 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3291 
3292 static inline uint64_t
3293 instr_regarray_src_nbo(struct thread *t, const struct instruction *ip)
3294 {
3295 	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
3296 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
3297 	uint64_t src64 = *src64_ptr;
3298 	uint64_t src = ntoh64(src64) >> (64 - ip->regarray.dstsrc.n_bits);
3299 
3300 	return src;
3301 }
3302 
3303 #else
3304 
3305 #define instr_regarray_src_nbo instr_regarray_src_hbo
3306 
3307 #endif
3308 
3309 static inline void
3310 instr_regarray_dst_hbo_src_hbo_set(struct thread *t, const struct instruction *ip, uint64_t src)
3311 {
3312 	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
3313 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
3314 	uint64_t dst64 = *dst64_ptr;
3315 	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
3316 
3317 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
3318 
3319 }
3320 
3321 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3322 
3323 static inline void
3324 instr_regarray_dst_nbo_src_hbo_set(struct thread *t, const struct instruction *ip, uint64_t src)
3325 {
3326 	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
3327 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
3328 	uint64_t dst64 = *dst64_ptr;
3329 	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
3330 
3331 	src = hton64(src) >> (64 - ip->regarray.dstsrc.n_bits);
3332 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
3333 }
3334 
3335 #else
3336 
3337 #define instr_regarray_dst_nbo_src_hbo_set instr_regarray_dst_hbo_src_hbo_set
3338 
3339 #endif
3340 
3341 static inline void
3342 __instr_regprefetch_rh_exec(struct rte_swx_pipeline *p,
3343 			    struct thread *t,
3344 			    const struct instruction *ip)
3345 {
3346 	uint64_t *regarray, idx;
3347 
3348 	TRACE("[Thread %2u] regprefetch (r[h])\n", p->thread_id);
3349 
3350 	regarray = instr_regarray_regarray(p, ip);
3351 	idx = instr_regarray_idx_nbo(p, t, ip);
3352 	rte_prefetch0(&regarray[idx]);
3353 }
3354 
3355 static inline void
3356 __instr_regprefetch_rm_exec(struct rte_swx_pipeline *p,
3357 			    struct thread *t,
3358 			    const struct instruction *ip)
3359 {
3360 	uint64_t *regarray, idx;
3361 
3362 	TRACE("[Thread %2u] regprefetch (r[m])\n", p->thread_id);
3363 
3364 	regarray = instr_regarray_regarray(p, ip);
3365 	idx = instr_regarray_idx_hbo(p, t, ip);
3366 	rte_prefetch0(&regarray[idx]);
3367 }
3368 
3369 static inline void
3370 __instr_regprefetch_ri_exec(struct rte_swx_pipeline *p,
3371 			    struct thread *t __rte_unused,
3372 			    const struct instruction *ip)
3373 {
3374 	uint64_t *regarray, idx;
3375 
3376 	TRACE("[Thread %2u] regprefetch (r[i])\n", p->thread_id);
3377 
3378 	regarray = instr_regarray_regarray(p, ip);
3379 	idx = instr_regarray_idx_imm(p, ip);
3380 	rte_prefetch0(&regarray[idx]);
3381 }
3382 
3383 static inline void
3384 __instr_regrd_hrh_exec(struct rte_swx_pipeline *p,
3385 		       struct thread *t,
3386 		       const struct instruction *ip)
3387 {
3388 	uint64_t *regarray, idx;
3389 
3390 	TRACE("[Thread %2u] regrd (h = r[h])\n", p->thread_id);
3391 
3392 	regarray = instr_regarray_regarray(p, ip);
3393 	idx = instr_regarray_idx_nbo(p, t, ip);
3394 	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
3395 }
3396 
3397 static inline void
3398 __instr_regrd_hrm_exec(struct rte_swx_pipeline *p,
3399 		       struct thread *t,
3400 		       const struct instruction *ip)
3401 {
3402 	uint64_t *regarray, idx;
3403 
3404 	TRACE("[Thread %2u] regrd (h = r[m])\n", p->thread_id);
3405 
3406 	/* Structs. */
3407 	regarray = instr_regarray_regarray(p, ip);
3408 	idx = instr_regarray_idx_hbo(p, t, ip);
3409 	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
3410 }
3411 
3412 static inline void
3413 __instr_regrd_mrh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3414 {
3415 	uint64_t *regarray, idx;
3416 
3417 	TRACE("[Thread %2u] regrd (m = r[h])\n", p->thread_id);
3418 
3419 	regarray = instr_regarray_regarray(p, ip);
3420 	idx = instr_regarray_idx_nbo(p, t, ip);
3421 	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
3422 }
3423 
3424 static inline void
3425 __instr_regrd_mrm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3426 {
3427 	uint64_t *regarray, idx;
3428 
3429 	TRACE("[Thread %2u] regrd (m = r[m])\n", p->thread_id);
3430 
3431 	regarray = instr_regarray_regarray(p, ip);
3432 	idx = instr_regarray_idx_hbo(p, t, ip);
3433 	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
3434 }
3435 
3436 static inline void
3437 __instr_regrd_hri_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3438 {
3439 	uint64_t *regarray, idx;
3440 
3441 	TRACE("[Thread %2u] regrd (h = r[i])\n", p->thread_id);
3442 
3443 	regarray = instr_regarray_regarray(p, ip);
3444 	idx = instr_regarray_idx_imm(p, ip);
3445 	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
3446 }
3447 
3448 static inline void
3449 __instr_regrd_mri_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3450 {
3451 	uint64_t *regarray, idx;
3452 
3453 	TRACE("[Thread %2u] regrd (m = r[i])\n", p->thread_id);
3454 
3455 	regarray = instr_regarray_regarray(p, ip);
3456 	idx = instr_regarray_idx_imm(p, ip);
3457 	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
3458 }
3459 
3460 static inline void
3461 __instr_regwr_rhh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3462 {
3463 	uint64_t *regarray, idx, src;
3464 
3465 	TRACE("[Thread %2u] regwr (r[h] = h)\n", p->thread_id);
3466 
3467 	regarray = instr_regarray_regarray(p, ip);
3468 	idx = instr_regarray_idx_nbo(p, t, ip);
3469 	src = instr_regarray_src_nbo(t, ip);
3470 	regarray[idx] = src;
3471 }
3472 
3473 static inline void
3474 __instr_regwr_rhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3475 {
3476 	uint64_t *regarray, idx, src;
3477 
3478 	TRACE("[Thread %2u] regwr (r[h] = m)\n", p->thread_id);
3479 
3480 	regarray = instr_regarray_regarray(p, ip);
3481 	idx = instr_regarray_idx_nbo(p, t, ip);
3482 	src = instr_regarray_src_hbo(t, ip);
3483 	regarray[idx] = src;
3484 }
3485 
3486 static inline void
3487 __instr_regwr_rmh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3488 {
3489 	uint64_t *regarray, idx, src;
3490 
3491 	TRACE("[Thread %2u] regwr (r[m] = h)\n", p->thread_id);
3492 
3493 	regarray = instr_regarray_regarray(p, ip);
3494 	idx = instr_regarray_idx_hbo(p, t, ip);
3495 	src = instr_regarray_src_nbo(t, ip);
3496 	regarray[idx] = src;
3497 }
3498 
3499 static inline void
3500 __instr_regwr_rmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3501 {
3502 	uint64_t *regarray, idx, src;
3503 
3504 	TRACE("[Thread %2u] regwr (r[m] = m)\n", p->thread_id);
3505 
3506 	regarray = instr_regarray_regarray(p, ip);
3507 	idx = instr_regarray_idx_hbo(p, t, ip);
3508 	src = instr_regarray_src_hbo(t, ip);
3509 	regarray[idx] = src;
3510 }
3511 
3512 static inline void
3513 __instr_regwr_rhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3514 {
3515 	uint64_t *regarray, idx, src;
3516 
3517 	TRACE("[Thread %2u] regwr (r[h] = i)\n", p->thread_id);
3518 
3519 	regarray = instr_regarray_regarray(p, ip);
3520 	idx = instr_regarray_idx_nbo(p, t, ip);
3521 	src = ip->regarray.dstsrc_val;
3522 	regarray[idx] = src;
3523 }
3524 
3525 static inline void
3526 __instr_regwr_rmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3527 {
3528 	uint64_t *regarray, idx, src;
3529 
3530 	TRACE("[Thread %2u] regwr (r[m] = i)\n", p->thread_id);
3531 
3532 	regarray = instr_regarray_regarray(p, ip);
3533 	idx = instr_regarray_idx_hbo(p, t, ip);
3534 	src = ip->regarray.dstsrc_val;
3535 	regarray[idx] = src;
3536 }
3537 
3538 static inline void
3539 __instr_regwr_rih_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3540 {
3541 	uint64_t *regarray, idx, src;
3542 
3543 	TRACE("[Thread %2u] regwr (r[i] = h)\n", p->thread_id);
3544 
3545 	regarray = instr_regarray_regarray(p, ip);
3546 	idx = instr_regarray_idx_imm(p, ip);
3547 	src = instr_regarray_src_nbo(t, ip);
3548 	regarray[idx] = src;
3549 }
3550 
3551 static inline void
3552 __instr_regwr_rim_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3553 {
3554 	uint64_t *regarray, idx, src;
3555 
3556 	TRACE("[Thread %2u] regwr (r[i] = m)\n", p->thread_id);
3557 
3558 	regarray = instr_regarray_regarray(p, ip);
3559 	idx = instr_regarray_idx_imm(p, ip);
3560 	src = instr_regarray_src_hbo(t, ip);
3561 	regarray[idx] = src;
3562 }
3563 
3564 static inline void
3565 __instr_regwr_rii_exec(struct rte_swx_pipeline *p,
3566 		       struct thread *t __rte_unused,
3567 		       const struct instruction *ip)
3568 {
3569 	uint64_t *regarray, idx, src;
3570 
3571 	TRACE("[Thread %2u] regwr (r[i] = i)\n", p->thread_id);
3572 
3573 	regarray = instr_regarray_regarray(p, ip);
3574 	idx = instr_regarray_idx_imm(p, ip);
3575 	src = ip->regarray.dstsrc_val;
3576 	regarray[idx] = src;
3577 }
3578 
3579 static inline void
3580 __instr_regadd_rhh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3581 {
3582 	uint64_t *regarray, idx, src;
3583 
3584 	TRACE("[Thread %2u] regadd (r[h] += h)\n", p->thread_id);
3585 
3586 	regarray = instr_regarray_regarray(p, ip);
3587 	idx = instr_regarray_idx_nbo(p, t, ip);
3588 	src = instr_regarray_src_nbo(t, ip);
3589 	regarray[idx] += src;
3590 }
3591 
3592 static inline void
3593 __instr_regadd_rhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3594 {
3595 	uint64_t *regarray, idx, src;
3596 
3597 	TRACE("[Thread %2u] regadd (r[h] += m)\n", p->thread_id);
3598 
3599 	regarray = instr_regarray_regarray(p, ip);
3600 	idx = instr_regarray_idx_nbo(p, t, ip);
3601 	src = instr_regarray_src_hbo(t, ip);
3602 	regarray[idx] += src;
3603 }
3604 
3605 static inline void
3606 __instr_regadd_rmh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3607 {
3608 	uint64_t *regarray, idx, src;
3609 
3610 	TRACE("[Thread %2u] regadd (r[m] += h)\n", p->thread_id);
3611 
3612 	regarray = instr_regarray_regarray(p, ip);
3613 	idx = instr_regarray_idx_hbo(p, t, ip);
3614 	src = instr_regarray_src_nbo(t, ip);
3615 	regarray[idx] += src;
3616 }
3617 
3618 static inline void
3619 __instr_regadd_rmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3620 {
3621 	uint64_t *regarray, idx, src;
3622 
3623 	TRACE("[Thread %2u] regadd (r[m] += m)\n", p->thread_id);
3624 
3625 	regarray = instr_regarray_regarray(p, ip);
3626 	idx = instr_regarray_idx_hbo(p, t, ip);
3627 	src = instr_regarray_src_hbo(t, ip);
3628 	regarray[idx] += src;
3629 }
3630 
3631 static inline void
3632 __instr_regadd_rhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3633 {
3634 	uint64_t *regarray, idx, src;
3635 
3636 	TRACE("[Thread %2u] regadd (r[h] += i)\n", p->thread_id);
3637 
3638 	regarray = instr_regarray_regarray(p, ip);
3639 	idx = instr_regarray_idx_nbo(p, t, ip);
3640 	src = ip->regarray.dstsrc_val;
3641 	regarray[idx] += src;
3642 }
3643 
3644 static inline void
3645 __instr_regadd_rmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3646 {
3647 	uint64_t *regarray, idx, src;
3648 
3649 	TRACE("[Thread %2u] regadd (r[m] += i)\n", p->thread_id);
3650 
3651 	regarray = instr_regarray_regarray(p, ip);
3652 	idx = instr_regarray_idx_hbo(p, t, ip);
3653 	src = ip->regarray.dstsrc_val;
3654 	regarray[idx] += src;
3655 }
3656 
3657 static inline void
3658 __instr_regadd_rih_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3659 {
3660 	uint64_t *regarray, idx, src;
3661 
3662 	TRACE("[Thread %2u] regadd (r[i] += h)\n", p->thread_id);
3663 
3664 	regarray = instr_regarray_regarray(p, ip);
3665 	idx = instr_regarray_idx_imm(p, ip);
3666 	src = instr_regarray_src_nbo(t, ip);
3667 	regarray[idx] += src;
3668 }
3669 
3670 static inline void
3671 __instr_regadd_rim_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3672 {
3673 	uint64_t *regarray, idx, src;
3674 
3675 	TRACE("[Thread %2u] regadd (r[i] += m)\n", p->thread_id);
3676 
3677 	regarray = instr_regarray_regarray(p, ip);
3678 	idx = instr_regarray_idx_imm(p, ip);
3679 	src = instr_regarray_src_hbo(t, ip);
3680 	regarray[idx] += src;
3681 }
3682 
3683 static inline void
3684 __instr_regadd_rii_exec(struct rte_swx_pipeline *p,
3685 			struct thread *t __rte_unused,
3686 			const struct instruction *ip)
3687 {
3688 	uint64_t *regarray, idx, src;
3689 
3690 	TRACE("[Thread %2u] regadd (r[i] += i)\n", p->thread_id);
3691 
3692 	regarray = instr_regarray_regarray(p, ip);
3693 	idx = instr_regarray_idx_imm(p, ip);
3694 	src = ip->regarray.dstsrc_val;
3695 	regarray[idx] += src;
3696 }
3697 
3698 /*
3699  * metarray.
3700  */
3701 static inline struct meter *
3702 instr_meter_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3703 {
3704 	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
3705 
3706 	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
3707 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
3708 	uint64_t idx64 = *idx64_ptr;
3709 	uint64_t idx64_mask = UINT64_MAX >> (64 - (ip)->meter.idx.n_bits);
3710 	uint64_t idx = idx64 & idx64_mask & r->size_mask;
3711 
3712 	return &r->metarray[idx];
3713 }
3714 
3715 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3716 
3717 static inline struct meter *
3718 instr_meter_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3719 {
3720 	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
3721 
3722 	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
3723 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
3724 	uint64_t idx64 = *idx64_ptr;
3725 	uint64_t idx = (ntoh64(idx64) >> (64 - ip->meter.idx.n_bits)) & r->size_mask;
3726 
3727 	return &r->metarray[idx];
3728 }
3729 
3730 #else
3731 
3732 #define instr_meter_idx_nbo instr_meter_idx_hbo
3733 
3734 #endif
3735 
3736 static inline struct meter *
3737 instr_meter_idx_imm(struct rte_swx_pipeline *p, const struct instruction *ip)
3738 {
3739 	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
3740 
3741 	uint64_t idx =  ip->meter.idx_val & r->size_mask;
3742 
3743 	return &r->metarray[idx];
3744 }
3745 
3746 static inline uint32_t
3747 instr_meter_length_hbo(struct thread *t, const struct instruction *ip)
3748 {
3749 	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
3750 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
3751 	uint64_t src64 = *src64_ptr;
3752 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->meter.length.n_bits);
3753 	uint64_t src = src64 & src64_mask;
3754 
3755 	return (uint32_t)src;
3756 }
3757 
3758 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3759 
3760 static inline uint32_t
3761 instr_meter_length_nbo(struct thread *t, const struct instruction *ip)
3762 {
3763 	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
3764 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
3765 	uint64_t src64 = *src64_ptr;
3766 	uint64_t src = ntoh64(src64) >> (64 - ip->meter.length.n_bits);
3767 
3768 	return (uint32_t)src;
3769 }
3770 
3771 #else
3772 
3773 #define instr_meter_length_nbo instr_meter_length_hbo
3774 
3775 #endif
3776 
3777 static inline enum rte_color
3778 instr_meter_color_in_hbo(struct thread *t, const struct instruction *ip)
3779 {
3780 	uint8_t *src_struct = t->structs[ip->meter.color_in.struct_id];
3781 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.color_in.offset];
3782 	uint64_t src64 = *src64_ptr;
3783 	uint64_t src64_mask = UINT64_MAX >> (64 - ip->meter.color_in.n_bits);
3784 	uint64_t src = src64 & src64_mask;
3785 
3786 	return (enum rte_color)src;
3787 }
3788 
3789 static inline void
3790 instr_meter_color_out_hbo_set(struct thread *t,
3791 			      const struct instruction *ip,
3792 			      enum rte_color color_out)
3793 {
3794 	uint8_t *dst_struct = t->structs[ip->meter.color_out.struct_id];
3795 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->meter.color_out.offset];
3796 	uint64_t dst64 = *dst64_ptr;
3797 	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->meter.color_out.n_bits);
3798 
3799 	uint64_t src = (uint64_t)color_out;
3800 
3801 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
3802 }
3803 
3804 static inline void
3805 __instr_metprefetch_h_exec(struct rte_swx_pipeline *p,
3806 			   struct thread *t,
3807 			   const struct instruction *ip)
3808 {
3809 	struct meter *m;
3810 
3811 	TRACE("[Thread %2u] metprefetch (h)\n", p->thread_id);
3812 
3813 	m = instr_meter_idx_nbo(p, t, ip);
3814 	rte_prefetch0(m);
3815 }
3816 
3817 static inline void
3818 __instr_metprefetch_m_exec(struct rte_swx_pipeline *p,
3819 			   struct thread *t,
3820 			   const struct instruction *ip)
3821 {
3822 	struct meter *m;
3823 
3824 	TRACE("[Thread %2u] metprefetch (m)\n", p->thread_id);
3825 
3826 	m = instr_meter_idx_hbo(p, t, ip);
3827 	rte_prefetch0(m);
3828 }
3829 
3830 static inline void
3831 __instr_metprefetch_i_exec(struct rte_swx_pipeline *p,
3832 			   struct thread *t __rte_unused,
3833 			   const struct instruction *ip)
3834 {
3835 	struct meter *m;
3836 
3837 	TRACE("[Thread %2u] metprefetch (i)\n", p->thread_id);
3838 
3839 	m = instr_meter_idx_imm(p, ip);
3840 	rte_prefetch0(m);
3841 }
3842 
3843 static inline void
3844 __instr_meter_hhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3845 {
3846 	struct meter *m;
3847 	uint64_t time, n_pkts, n_bytes;
3848 	uint32_t length;
3849 	enum rte_color color_in, color_out;
3850 
3851 	TRACE("[Thread %2u] meter (hhm)\n", p->thread_id);
3852 
3853 	m = instr_meter_idx_nbo(p, t, ip);
3854 	rte_prefetch0(m->n_pkts);
3855 	time = rte_get_tsc_cycles();
3856 	length = instr_meter_length_nbo(t, ip);
3857 	color_in = instr_meter_color_in_hbo(t, ip);
3858 
3859 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3860 		&m->profile->profile,
3861 		time,
3862 		length,
3863 		color_in);
3864 
3865 	color_out &= m->color_mask;
3866 
3867 	n_pkts = m->n_pkts[color_out];
3868 	n_bytes = m->n_bytes[color_out];
3869 
3870 	instr_meter_color_out_hbo_set(t, ip, color_out);
3871 
3872 	m->n_pkts[color_out] = n_pkts + 1;
3873 	m->n_bytes[color_out] = n_bytes + length;
3874 }
3875 
3876 static inline void
3877 __instr_meter_hhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3878 {
3879 	struct meter *m;
3880 	uint64_t time, n_pkts, n_bytes;
3881 	uint32_t length;
3882 	enum rte_color color_in, color_out;
3883 
3884 	TRACE("[Thread %2u] meter (hhi)\n", p->thread_id);
3885 
3886 	m = instr_meter_idx_nbo(p, t, ip);
3887 	rte_prefetch0(m->n_pkts);
3888 	time = rte_get_tsc_cycles();
3889 	length = instr_meter_length_nbo(t, ip);
3890 	color_in = (enum rte_color)ip->meter.color_in_val;
3891 
3892 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3893 		&m->profile->profile,
3894 		time,
3895 		length,
3896 		color_in);
3897 
3898 	color_out &= m->color_mask;
3899 
3900 	n_pkts = m->n_pkts[color_out];
3901 	n_bytes = m->n_bytes[color_out];
3902 
3903 	instr_meter_color_out_hbo_set(t, ip, color_out);
3904 
3905 	m->n_pkts[color_out] = n_pkts + 1;
3906 	m->n_bytes[color_out] = n_bytes + length;
3907 }
3908 
3909 static inline void
3910 __instr_meter_hmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3911 {
3912 	struct meter *m;
3913 	uint64_t time, n_pkts, n_bytes;
3914 	uint32_t length;
3915 	enum rte_color color_in, color_out;
3916 
3917 	TRACE("[Thread %2u] meter (hmm)\n", p->thread_id);
3918 
3919 	m = instr_meter_idx_nbo(p, t, ip);
3920 	rte_prefetch0(m->n_pkts);
3921 	time = rte_get_tsc_cycles();
3922 	length = instr_meter_length_hbo(t, ip);
3923 	color_in = instr_meter_color_in_hbo(t, ip);
3924 
3925 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3926 		&m->profile->profile,
3927 		time,
3928 		length,
3929 		color_in);
3930 
3931 	color_out &= m->color_mask;
3932 
3933 	n_pkts = m->n_pkts[color_out];
3934 	n_bytes = m->n_bytes[color_out];
3935 
3936 	instr_meter_color_out_hbo_set(t, ip, color_out);
3937 
3938 	m->n_pkts[color_out] = n_pkts + 1;
3939 	m->n_bytes[color_out] = n_bytes + length;
3940 }
3941 
3942 static inline void
3943 __instr_meter_hmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3944 {
3945 	struct meter *m;
3946 	uint64_t time, n_pkts, n_bytes;
3947 	uint32_t length;
3948 	enum rte_color color_in, color_out;
3949 
3950 	TRACE("[Thread %2u] meter (hmi)\n", p->thread_id);
3951 
3952 	m = instr_meter_idx_nbo(p, t, ip);
3953 	rte_prefetch0(m->n_pkts);
3954 	time = rte_get_tsc_cycles();
3955 	length = instr_meter_length_hbo(t, ip);
3956 	color_in = (enum rte_color)ip->meter.color_in_val;
3957 
3958 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3959 		&m->profile->profile,
3960 		time,
3961 		length,
3962 		color_in);
3963 
3964 	color_out &= m->color_mask;
3965 
3966 	n_pkts = m->n_pkts[color_out];
3967 	n_bytes = m->n_bytes[color_out];
3968 
3969 	instr_meter_color_out_hbo_set(t, ip, color_out);
3970 
3971 	m->n_pkts[color_out] = n_pkts + 1;
3972 	m->n_bytes[color_out] = n_bytes + length;
3973 }
3974 
3975 static inline void
3976 __instr_meter_mhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3977 {
3978 	struct meter *m;
3979 	uint64_t time, n_pkts, n_bytes;
3980 	uint32_t length;
3981 	enum rte_color color_in, color_out;
3982 
3983 	TRACE("[Thread %2u] meter (mhm)\n", p->thread_id);
3984 
3985 	m = instr_meter_idx_hbo(p, t, ip);
3986 	rte_prefetch0(m->n_pkts);
3987 	time = rte_get_tsc_cycles();
3988 	length = instr_meter_length_nbo(t, ip);
3989 	color_in = instr_meter_color_in_hbo(t, ip);
3990 
3991 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3992 		&m->profile->profile,
3993 		time,
3994 		length,
3995 		color_in);
3996 
3997 	color_out &= m->color_mask;
3998 
3999 	n_pkts = m->n_pkts[color_out];
4000 	n_bytes = m->n_bytes[color_out];
4001 
4002 	instr_meter_color_out_hbo_set(t, ip, color_out);
4003 
4004 	m->n_pkts[color_out] = n_pkts + 1;
4005 	m->n_bytes[color_out] = n_bytes + length;
4006 }
4007 
4008 static inline void
4009 __instr_meter_mhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4010 {
4011 	struct meter *m;
4012 	uint64_t time, n_pkts, n_bytes;
4013 	uint32_t length;
4014 	enum rte_color color_in, color_out;
4015 
4016 	TRACE("[Thread %2u] meter (mhi)\n", p->thread_id);
4017 
4018 	m = instr_meter_idx_hbo(p, t, ip);
4019 	rte_prefetch0(m->n_pkts);
4020 	time = rte_get_tsc_cycles();
4021 	length = instr_meter_length_nbo(t, ip);
4022 	color_in = (enum rte_color)ip->meter.color_in_val;
4023 
4024 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4025 		&m->profile->profile,
4026 		time,
4027 		length,
4028 		color_in);
4029 
4030 	color_out &= m->color_mask;
4031 
4032 	n_pkts = m->n_pkts[color_out];
4033 	n_bytes = m->n_bytes[color_out];
4034 
4035 	instr_meter_color_out_hbo_set(t, ip, color_out);
4036 
4037 	m->n_pkts[color_out] = n_pkts + 1;
4038 	m->n_bytes[color_out] = n_bytes + length;
4039 }
4040 
4041 static inline void
4042 __instr_meter_mmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4043 {
4044 	struct meter *m;
4045 	uint64_t time, n_pkts, n_bytes;
4046 	uint32_t length;
4047 	enum rte_color color_in, color_out;
4048 
4049 	TRACE("[Thread %2u] meter (mmm)\n", p->thread_id);
4050 
4051 	m = instr_meter_idx_hbo(p, t, ip);
4052 	rte_prefetch0(m->n_pkts);
4053 	time = rte_get_tsc_cycles();
4054 	length = instr_meter_length_hbo(t, ip);
4055 	color_in = instr_meter_color_in_hbo(t, ip);
4056 
4057 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4058 		&m->profile->profile,
4059 		time,
4060 		length,
4061 		color_in);
4062 
4063 	color_out &= m->color_mask;
4064 
4065 	n_pkts = m->n_pkts[color_out];
4066 	n_bytes = m->n_bytes[color_out];
4067 
4068 	instr_meter_color_out_hbo_set(t, ip, color_out);
4069 
4070 	m->n_pkts[color_out] = n_pkts + 1;
4071 	m->n_bytes[color_out] = n_bytes + length;
4072 }
4073 
4074 static inline void
4075 __instr_meter_mmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4076 {
4077 	struct meter *m;
4078 	uint64_t time, n_pkts, n_bytes;
4079 	uint32_t length;
4080 	enum rte_color color_in, color_out;
4081 
4082 	TRACE("[Thread %2u] meter (mmi)\n", p->thread_id);
4083 
4084 	m = instr_meter_idx_hbo(p, t, ip);
4085 	rte_prefetch0(m->n_pkts);
4086 	time = rte_get_tsc_cycles();
4087 	length = instr_meter_length_hbo(t, ip);
4088 	color_in = (enum rte_color)ip->meter.color_in_val;
4089 
4090 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4091 		&m->profile->profile,
4092 		time,
4093 		length,
4094 		color_in);
4095 
4096 	color_out &= m->color_mask;
4097 
4098 	n_pkts = m->n_pkts[color_out];
4099 	n_bytes = m->n_bytes[color_out];
4100 
4101 	instr_meter_color_out_hbo_set(t, ip, color_out);
4102 
4103 	m->n_pkts[color_out] = n_pkts + 1;
4104 	m->n_bytes[color_out] = n_bytes + length;
4105 }
4106 
4107 static inline void
4108 __instr_meter_ihm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4109 {
4110 	struct meter *m;
4111 	uint64_t time, n_pkts, n_bytes;
4112 	uint32_t length;
4113 	enum rte_color color_in, color_out;
4114 
4115 	TRACE("[Thread %2u] meter (ihm)\n", p->thread_id);
4116 
4117 	m = instr_meter_idx_imm(p, ip);
4118 	rte_prefetch0(m->n_pkts);
4119 	time = rte_get_tsc_cycles();
4120 	length = instr_meter_length_nbo(t, ip);
4121 	color_in = instr_meter_color_in_hbo(t, ip);
4122 
4123 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4124 		&m->profile->profile,
4125 		time,
4126 		length,
4127 		color_in);
4128 
4129 	color_out &= m->color_mask;
4130 
4131 	n_pkts = m->n_pkts[color_out];
4132 	n_bytes = m->n_bytes[color_out];
4133 
4134 	instr_meter_color_out_hbo_set(t, ip, color_out);
4135 
4136 	m->n_pkts[color_out] = n_pkts + 1;
4137 	m->n_bytes[color_out] = n_bytes + length;
4138 }
4139 
4140 static inline void
4141 __instr_meter_ihi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4142 {
4143 	struct meter *m;
4144 	uint64_t time, n_pkts, n_bytes;
4145 	uint32_t length;
4146 	enum rte_color color_in, color_out;
4147 
4148 	TRACE("[Thread %2u] meter (ihi)\n", p->thread_id);
4149 
4150 	m = instr_meter_idx_imm(p, ip);
4151 	rte_prefetch0(m->n_pkts);
4152 	time = rte_get_tsc_cycles();
4153 	length = instr_meter_length_nbo(t, ip);
4154 	color_in = (enum rte_color)ip->meter.color_in_val;
4155 
4156 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4157 		&m->profile->profile,
4158 		time,
4159 		length,
4160 		color_in);
4161 
4162 	color_out &= m->color_mask;
4163 
4164 	n_pkts = m->n_pkts[color_out];
4165 	n_bytes = m->n_bytes[color_out];
4166 
4167 	instr_meter_color_out_hbo_set(t, ip, color_out);
4168 
4169 	m->n_pkts[color_out] = n_pkts + 1;
4170 	m->n_bytes[color_out] = n_bytes + length;
4171 }
4172 
4173 static inline void
4174 __instr_meter_imm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4175 {
4176 	struct meter *m;
4177 	uint64_t time, n_pkts, n_bytes;
4178 	uint32_t length;
4179 	enum rte_color color_in, color_out;
4180 
4181 	TRACE("[Thread %2u] meter (imm)\n", p->thread_id);
4182 
4183 	m = instr_meter_idx_imm(p, ip);
4184 	rte_prefetch0(m->n_pkts);
4185 	time = rte_get_tsc_cycles();
4186 	length = instr_meter_length_hbo(t, ip);
4187 	color_in = instr_meter_color_in_hbo(t, ip);
4188 
4189 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4190 		&m->profile->profile,
4191 		time,
4192 		length,
4193 		color_in);
4194 
4195 	color_out &= m->color_mask;
4196 
4197 	n_pkts = m->n_pkts[color_out];
4198 	n_bytes = m->n_bytes[color_out];
4199 
4200 	instr_meter_color_out_hbo_set(t, ip, color_out);
4201 
4202 	m->n_pkts[color_out] = n_pkts + 1;
4203 	m->n_bytes[color_out] = n_bytes + length;
4204 }
4205 
4206 static inline void
4207 __instr_meter_imi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4208 {
4209 	struct meter *m;
4210 	uint64_t time, n_pkts, n_bytes;
4211 	uint32_t length;
4212 	enum rte_color color_in, color_out;
4213 
4214 	TRACE("[Thread %2u] meter (imi)\n", p->thread_id);
4215 
4216 	m = instr_meter_idx_imm(p, ip);
4217 	rte_prefetch0(m->n_pkts);
4218 	time = rte_get_tsc_cycles();
4219 	length = instr_meter_length_hbo(t, ip);
4220 	color_in = (enum rte_color)ip->meter.color_in_val;
4221 
4222 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4223 		&m->profile->profile,
4224 		time,
4225 		length,
4226 		color_in);
4227 
4228 	color_out &= m->color_mask;
4229 
4230 	n_pkts = m->n_pkts[color_out];
4231 	n_bytes = m->n_bytes[color_out];
4232 
4233 	instr_meter_color_out_hbo_set(t, ip, color_out);
4234 
4235 	m->n_pkts[color_out] = n_pkts + 1;
4236 	m->n_bytes[color_out] = n_bytes + length;
4237 }
4238 
4239 #endif
4240