xref: /dpdk/lib/pipeline/rte_swx_pipeline_internal.h (revision 1c839246f934340e8dfb8fd71bc436f81541a587)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2021 Intel Corporation
3  */
4 #ifndef __INCLUDE_RTE_SWX_PIPELINE_INTERNAL_H__
5 #define __INCLUDE_RTE_SWX_PIPELINE_INTERNAL_H__
6 
7 #include <inttypes.h>
8 #include <string.h>
9 #include <sys/queue.h>
10 
11 #include <rte_byteorder.h>
12 #include <rte_common.h>
13 #include <rte_cycles.h>
14 #include <rte_prefetch.h>
15 #include <rte_meter.h>
16 
17 #include <rte_swx_table_selector.h>
18 #include <rte_swx_table_learner.h>
19 #include <rte_swx_pipeline.h>
20 #include <rte_swx_ctl.h>
21 
22 #ifndef TRACE_LEVEL
23 #define TRACE_LEVEL 0
24 #endif
25 
26 #if TRACE_LEVEL
27 #define TRACE(...) printf(__VA_ARGS__)
28 #else
29 #define TRACE(...)
30 #endif
31 
32 /*
33  * Environment.
34  */
35 #define ntoh64(x) rte_be_to_cpu_64(x)
36 #define hton64(x) rte_cpu_to_be_64(x)
37 
38 /*
39  * Struct.
40  */
41 struct field {
42 	char name[RTE_SWX_NAME_SIZE];
43 	uint32_t n_bits;
44 	uint32_t offset;
45 	int var_size;
46 };
47 
48 struct struct_type {
49 	TAILQ_ENTRY(struct_type) node;
50 	char name[RTE_SWX_NAME_SIZE];
51 	struct field *fields;
52 	uint32_t n_fields;
53 	uint32_t n_bits;
54 	uint32_t n_bits_min;
55 	int var_size;
56 };
57 
58 TAILQ_HEAD(struct_type_tailq, struct_type);
59 
60 /*
61  * Input port.
62  */
63 struct port_in_type {
64 	TAILQ_ENTRY(port_in_type) node;
65 	char name[RTE_SWX_NAME_SIZE];
66 	struct rte_swx_port_in_ops ops;
67 };
68 
69 TAILQ_HEAD(port_in_type_tailq, port_in_type);
70 
71 struct port_in {
72 	TAILQ_ENTRY(port_in) node;
73 	struct port_in_type *type;
74 	void *obj;
75 	uint32_t id;
76 };
77 
78 TAILQ_HEAD(port_in_tailq, port_in);
79 
80 struct port_in_runtime {
81 	rte_swx_port_in_pkt_rx_t pkt_rx;
82 	void *obj;
83 };
84 
85 /*
86  * Output port.
87  */
88 struct port_out_type {
89 	TAILQ_ENTRY(port_out_type) node;
90 	char name[RTE_SWX_NAME_SIZE];
91 	struct rte_swx_port_out_ops ops;
92 };
93 
94 TAILQ_HEAD(port_out_type_tailq, port_out_type);
95 
96 struct port_out {
97 	TAILQ_ENTRY(port_out) node;
98 	struct port_out_type *type;
99 	void *obj;
100 	uint32_t id;
101 };
102 
103 TAILQ_HEAD(port_out_tailq, port_out);
104 
105 struct port_out_runtime {
106 	rte_swx_port_out_pkt_tx_t pkt_tx;
107 	rte_swx_port_out_flush_t flush;
108 	void *obj;
109 };
110 
111 /*
112  * Extern object.
113  */
114 struct extern_type_member_func {
115 	TAILQ_ENTRY(extern_type_member_func) node;
116 	char name[RTE_SWX_NAME_SIZE];
117 	rte_swx_extern_type_member_func_t func;
118 	uint32_t id;
119 };
120 
121 TAILQ_HEAD(extern_type_member_func_tailq, extern_type_member_func);
122 
123 struct extern_type {
124 	TAILQ_ENTRY(extern_type) node;
125 	char name[RTE_SWX_NAME_SIZE];
126 	struct struct_type *mailbox_struct_type;
127 	rte_swx_extern_type_constructor_t constructor;
128 	rte_swx_extern_type_destructor_t destructor;
129 	struct extern_type_member_func_tailq funcs;
130 	uint32_t n_funcs;
131 };
132 
133 TAILQ_HEAD(extern_type_tailq, extern_type);
134 
135 struct extern_obj {
136 	TAILQ_ENTRY(extern_obj) node;
137 	char name[RTE_SWX_NAME_SIZE];
138 	struct extern_type *type;
139 	void *obj;
140 	uint32_t struct_id;
141 	uint32_t id;
142 };
143 
144 TAILQ_HEAD(extern_obj_tailq, extern_obj);
145 
146 #ifndef RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX
147 #define RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX 8
148 #endif
149 
150 struct extern_obj_runtime {
151 	void *obj;
152 	uint8_t *mailbox;
153 	rte_swx_extern_type_member_func_t funcs[RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX];
154 };
155 
156 /*
157  * Extern function.
158  */
159 struct extern_func {
160 	TAILQ_ENTRY(extern_func) node;
161 	char name[RTE_SWX_NAME_SIZE];
162 	struct struct_type *mailbox_struct_type;
163 	rte_swx_extern_func_t func;
164 	uint32_t struct_id;
165 	uint32_t id;
166 };
167 
168 TAILQ_HEAD(extern_func_tailq, extern_func);
169 
170 struct extern_func_runtime {
171 	uint8_t *mailbox;
172 	rte_swx_extern_func_t func;
173 };
174 
175 /*
176  * Header.
177  */
178 struct header {
179 	TAILQ_ENTRY(header) node;
180 	char name[RTE_SWX_NAME_SIZE];
181 	struct struct_type *st;
182 	uint32_t struct_id;
183 	uint32_t id;
184 };
185 
186 TAILQ_HEAD(header_tailq, header);
187 
188 struct header_runtime {
189 	uint8_t *ptr0;
190 	uint32_t n_bytes;
191 };
192 
193 struct header_out_runtime {
194 	uint8_t *ptr0;
195 	uint8_t *ptr;
196 	uint32_t n_bytes;
197 };
198 
199 /*
200  * Instruction.
201  */
202 
203 /* Packet headers are always in Network Byte Order (NBO), i.e. big endian.
204  * Packet meta-data fields are always assumed to be in Host Byte Order (HBO).
205  * Table entry fields can be in either NBO or HBO; they are assumed to be in HBO
206  * when transferred to packet meta-data and in NBO when transferred to packet
207  * headers.
208  */
209 
210 /* Notation conventions:
211  *    -Header field: H = h.header.field (dst/src)
212  *    -Meta-data field: M = m.field (dst/src)
213  *    -Extern object mailbox field: E = e.field (dst/src)
214  *    -Extern function mailbox field: F = f.field (dst/src)
215  *    -Table action data field: T = t.field (src only)
216  *    -Immediate value: I = 32-bit unsigned value (src only)
217  */
218 
219 enum instruction_type {
220 	/* rx m.port_in */
221 	INSTR_RX,
222 
223 	/* tx port_out
224 	 * port_out = MI
225 	 */
226 	INSTR_TX,   /* port_out = M */
227 	INSTR_TX_I, /* port_out = I */
228 	INSTR_DROP,
229 
230 	/* extract h.header */
231 	INSTR_HDR_EXTRACT,
232 	INSTR_HDR_EXTRACT2,
233 	INSTR_HDR_EXTRACT3,
234 	INSTR_HDR_EXTRACT4,
235 	INSTR_HDR_EXTRACT5,
236 	INSTR_HDR_EXTRACT6,
237 	INSTR_HDR_EXTRACT7,
238 	INSTR_HDR_EXTRACT8,
239 
240 	/* extract h.header m.last_field_size */
241 	INSTR_HDR_EXTRACT_M,
242 
243 	/* lookahead h.header */
244 	INSTR_HDR_LOOKAHEAD,
245 
246 	/* emit h.header */
247 	INSTR_HDR_EMIT,
248 	INSTR_HDR_EMIT_TX,
249 	INSTR_HDR_EMIT2_TX,
250 	INSTR_HDR_EMIT3_TX,
251 	INSTR_HDR_EMIT4_TX,
252 	INSTR_HDR_EMIT5_TX,
253 	INSTR_HDR_EMIT6_TX,
254 	INSTR_HDR_EMIT7_TX,
255 	INSTR_HDR_EMIT8_TX,
256 
257 	/* validate h.header */
258 	INSTR_HDR_VALIDATE,
259 
260 	/* invalidate h.header */
261 	INSTR_HDR_INVALIDATE,
262 
263 	/* mov dst src
264 	 * dst = src
265 	 * dst = HMEF, src = HMEFTI
266 	 */
267 	INSTR_MOV,    /* dst = MEF, src = MEFT */
268 	INSTR_MOV_MH, /* dst = MEF, src = H */
269 	INSTR_MOV_HM, /* dst = H, src = MEFT */
270 	INSTR_MOV_HH, /* dst = H, src = H */
271 	INSTR_MOV_I,  /* dst = HMEF, src = I */
272 
273 	/* dma h.header t.field
274 	 * memcpy(h.header, t.field, sizeof(h.header))
275 	 */
276 	INSTR_DMA_HT,
277 	INSTR_DMA_HT2,
278 	INSTR_DMA_HT3,
279 	INSTR_DMA_HT4,
280 	INSTR_DMA_HT5,
281 	INSTR_DMA_HT6,
282 	INSTR_DMA_HT7,
283 	INSTR_DMA_HT8,
284 
285 	/* add dst src
286 	 * dst += src
287 	 * dst = HMEF, src = HMEFTI
288 	 */
289 	INSTR_ALU_ADD,    /* dst = MEF, src = MEF */
290 	INSTR_ALU_ADD_MH, /* dst = MEF, src = H */
291 	INSTR_ALU_ADD_HM, /* dst = H, src = MEF */
292 	INSTR_ALU_ADD_HH, /* dst = H, src = H */
293 	INSTR_ALU_ADD_MI, /* dst = MEF, src = I */
294 	INSTR_ALU_ADD_HI, /* dst = H, src = I */
295 
296 	/* sub dst src
297 	 * dst -= src
298 	 * dst = HMEF, src = HMEFTI
299 	 */
300 	INSTR_ALU_SUB,    /* dst = MEF, src = MEF */
301 	INSTR_ALU_SUB_MH, /* dst = MEF, src = H */
302 	INSTR_ALU_SUB_HM, /* dst = H, src = MEF */
303 	INSTR_ALU_SUB_HH, /* dst = H, src = H */
304 	INSTR_ALU_SUB_MI, /* dst = MEF, src = I */
305 	INSTR_ALU_SUB_HI, /* dst = H, src = I */
306 
307 	/* ckadd dst src
308 	 * dst = dst '+ src[0:1] '+ src[2:3] '+ ...
309 	 * dst = H, src = {H, h.header}, '+ = 1's complement addition operator
310 	 */
311 	INSTR_ALU_CKADD_FIELD,    /* src = H */
312 	INSTR_ALU_CKADD_STRUCT20, /* src = h.header, with sizeof(header) = 20 bytes. */
313 	INSTR_ALU_CKADD_STRUCT,   /* src = h.header, with sizeof(header) any 4-byte multiple. */
314 
315 	/* cksub dst src
316 	 * dst = dst '- src
317 	 * dst = H, src = H, '- = 1's complement subtraction operator
318 	 */
319 	INSTR_ALU_CKSUB_FIELD,
320 
321 	/* and dst src
322 	 * dst &= src
323 	 * dst = HMEF, src = HMEFTI
324 	 */
325 	INSTR_ALU_AND,    /* dst = MEF, src = MEFT */
326 	INSTR_ALU_AND_MH, /* dst = MEF, src = H */
327 	INSTR_ALU_AND_HM, /* dst = H, src = MEFT */
328 	INSTR_ALU_AND_HH, /* dst = H, src = H */
329 	INSTR_ALU_AND_I,  /* dst = HMEF, src = I */
330 
331 	/* or dst src
332 	 * dst |= src
333 	 * dst = HMEF, src = HMEFTI
334 	 */
335 	INSTR_ALU_OR,    /* dst = MEF, src = MEFT */
336 	INSTR_ALU_OR_MH, /* dst = MEF, src = H */
337 	INSTR_ALU_OR_HM, /* dst = H, src = MEFT */
338 	INSTR_ALU_OR_HH, /* dst = H, src = H */
339 	INSTR_ALU_OR_I,  /* dst = HMEF, src = I */
340 
341 	/* xor dst src
342 	 * dst ^= src
343 	 * dst = HMEF, src = HMEFTI
344 	 */
345 	INSTR_ALU_XOR,    /* dst = MEF, src = MEFT */
346 	INSTR_ALU_XOR_MH, /* dst = MEF, src = H */
347 	INSTR_ALU_XOR_HM, /* dst = H, src = MEFT */
348 	INSTR_ALU_XOR_HH, /* dst = H, src = H */
349 	INSTR_ALU_XOR_I,  /* dst = HMEF, src = I */
350 
351 	/* shl dst src
352 	 * dst <<= src
353 	 * dst = HMEF, src = HMEFTI
354 	 */
355 	INSTR_ALU_SHL,    /* dst = MEF, src = MEF */
356 	INSTR_ALU_SHL_MH, /* dst = MEF, src = H */
357 	INSTR_ALU_SHL_HM, /* dst = H, src = MEF */
358 	INSTR_ALU_SHL_HH, /* dst = H, src = H */
359 	INSTR_ALU_SHL_MI, /* dst = MEF, src = I */
360 	INSTR_ALU_SHL_HI, /* dst = H, src = I */
361 
362 	/* shr dst src
363 	 * dst >>= src
364 	 * dst = HMEF, src = HMEFTI
365 	 */
366 	INSTR_ALU_SHR,    /* dst = MEF, src = MEF */
367 	INSTR_ALU_SHR_MH, /* dst = MEF, src = H */
368 	INSTR_ALU_SHR_HM, /* dst = H, src = MEF */
369 	INSTR_ALU_SHR_HH, /* dst = H, src = H */
370 	INSTR_ALU_SHR_MI, /* dst = MEF, src = I */
371 	INSTR_ALU_SHR_HI, /* dst = H, src = I */
372 
373 	/* regprefetch REGARRAY index
374 	 * prefetch REGARRAY[index]
375 	 * index = HMEFTI
376 	 */
377 	INSTR_REGPREFETCH_RH, /* index = H */
378 	INSTR_REGPREFETCH_RM, /* index = MEFT */
379 	INSTR_REGPREFETCH_RI, /* index = I */
380 
381 	/* regrd dst REGARRAY index
382 	 * dst = REGARRAY[index]
383 	 * dst = HMEF, index = HMEFTI
384 	 */
385 	INSTR_REGRD_HRH, /* dst = H, index = H */
386 	INSTR_REGRD_HRM, /* dst = H, index = MEFT */
387 	INSTR_REGRD_HRI, /* dst = H, index = I */
388 	INSTR_REGRD_MRH, /* dst = MEF, index = H */
389 	INSTR_REGRD_MRM, /* dst = MEF, index = MEFT */
390 	INSTR_REGRD_MRI, /* dst = MEF, index = I */
391 
392 	/* regwr REGARRAY index src
393 	 * REGARRAY[index] = src
394 	 * index = HMEFTI, src = HMEFTI
395 	 */
396 	INSTR_REGWR_RHH, /* index = H, src = H */
397 	INSTR_REGWR_RHM, /* index = H, src = MEFT */
398 	INSTR_REGWR_RHI, /* index = H, src = I */
399 	INSTR_REGWR_RMH, /* index = MEFT, src = H */
400 	INSTR_REGWR_RMM, /* index = MEFT, src = MEFT */
401 	INSTR_REGWR_RMI, /* index = MEFT, src = I */
402 	INSTR_REGWR_RIH, /* index = I, src = H */
403 	INSTR_REGWR_RIM, /* index = I, src = MEFT */
404 	INSTR_REGWR_RII, /* index = I, src = I */
405 
406 	/* regadd REGARRAY index src
407 	 * REGARRAY[index] += src
408 	 * index = HMEFTI, src = HMEFTI
409 	 */
410 	INSTR_REGADD_RHH, /* index = H, src = H */
411 	INSTR_REGADD_RHM, /* index = H, src = MEFT */
412 	INSTR_REGADD_RHI, /* index = H, src = I */
413 	INSTR_REGADD_RMH, /* index = MEFT, src = H */
414 	INSTR_REGADD_RMM, /* index = MEFT, src = MEFT */
415 	INSTR_REGADD_RMI, /* index = MEFT, src = I */
416 	INSTR_REGADD_RIH, /* index = I, src = H */
417 	INSTR_REGADD_RIM, /* index = I, src = MEFT */
418 	INSTR_REGADD_RII, /* index = I, src = I */
419 
420 	/* metprefetch METARRAY index
421 	 * prefetch METARRAY[index]
422 	 * index = HMEFTI
423 	 */
424 	INSTR_METPREFETCH_H, /* index = H */
425 	INSTR_METPREFETCH_M, /* index = MEFT */
426 	INSTR_METPREFETCH_I, /* index = I */
427 
428 	/* meter METARRAY index length color_in color_out
429 	 * color_out = meter(METARRAY[index], length, color_in)
430 	 * index = HMEFTI, length = HMEFT, color_in = MEFTI, color_out = MEF
431 	 */
432 	INSTR_METER_HHM, /* index = H, length = H, color_in = MEFT */
433 	INSTR_METER_HHI, /* index = H, length = H, color_in = I */
434 	INSTR_METER_HMM, /* index = H, length = MEFT, color_in = MEFT */
435 	INSTR_METER_HMI, /* index = H, length = MEFT, color_in = I */
436 	INSTR_METER_MHM, /* index = MEFT, length = H, color_in = MEFT */
437 	INSTR_METER_MHI, /* index = MEFT, length = H, color_in = I */
438 	INSTR_METER_MMM, /* index = MEFT, length = MEFT, color_in = MEFT */
439 	INSTR_METER_MMI, /* index = MEFT, length = MEFT, color_in = I */
440 	INSTR_METER_IHM, /* index = I, length = H, color_in = MEFT */
441 	INSTR_METER_IHI, /* index = I, length = H, color_in = I */
442 	INSTR_METER_IMM, /* index = I, length = MEFT, color_in = MEFT */
443 	INSTR_METER_IMI, /* index = I, length = MEFT, color_in = I */
444 
445 	/* table TABLE */
446 	INSTR_TABLE,
447 	INSTR_TABLE_AF,
448 	INSTR_SELECTOR,
449 	INSTR_LEARNER,
450 	INSTR_LEARNER_AF,
451 
452 	/* learn LEARNER ACTION_NAME [ m.action_first_arg ] */
453 	INSTR_LEARNER_LEARN,
454 
455 	/* forget */
456 	INSTR_LEARNER_FORGET,
457 
458 	/* extern e.obj.func */
459 	INSTR_EXTERN_OBJ,
460 
461 	/* extern f.func */
462 	INSTR_EXTERN_FUNC,
463 
464 	/* jmp LABEL
465 	 * Unconditional jump
466 	 */
467 	INSTR_JMP,
468 
469 	/* jmpv LABEL h.header
470 	 * Jump if header is valid
471 	 */
472 	INSTR_JMP_VALID,
473 
474 	/* jmpnv LABEL h.header
475 	 * Jump if header is invalid
476 	 */
477 	INSTR_JMP_INVALID,
478 
479 	/* jmph LABEL
480 	 * Jump if table lookup hit
481 	 */
482 	INSTR_JMP_HIT,
483 
484 	/* jmpnh LABEL
485 	 * Jump if table lookup miss
486 	 */
487 	INSTR_JMP_MISS,
488 
489 	/* jmpa LABEL ACTION
490 	 * Jump if action run
491 	 */
492 	INSTR_JMP_ACTION_HIT,
493 
494 	/* jmpna LABEL ACTION
495 	 * Jump if action not run
496 	 */
497 	INSTR_JMP_ACTION_MISS,
498 
499 	/* jmpeq LABEL a b
500 	 * Jump if a is equal to b
501 	 * a = HMEFT, b = HMEFTI
502 	 */
503 	INSTR_JMP_EQ,    /* a = MEFT, b = MEFT */
504 	INSTR_JMP_EQ_MH, /* a = MEFT, b = H */
505 	INSTR_JMP_EQ_HM, /* a = H, b = MEFT */
506 	INSTR_JMP_EQ_HH, /* a = H, b = H */
507 	INSTR_JMP_EQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
508 
509 	/* jmpneq LABEL a b
510 	 * Jump if a is not equal to b
511 	 * a = HMEFT, b = HMEFTI
512 	 */
513 	INSTR_JMP_NEQ,    /* a = MEFT, b = MEFT */
514 	INSTR_JMP_NEQ_MH, /* a = MEFT, b = H */
515 	INSTR_JMP_NEQ_HM, /* a = H, b = MEFT */
516 	INSTR_JMP_NEQ_HH, /* a = H, b = H */
517 	INSTR_JMP_NEQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
518 
519 	/* jmplt LABEL a b
520 	 * Jump if a is less than b
521 	 * a = HMEFT, b = HMEFTI
522 	 */
523 	INSTR_JMP_LT,    /* a = MEFT, b = MEFT */
524 	INSTR_JMP_LT_MH, /* a = MEFT, b = H */
525 	INSTR_JMP_LT_HM, /* a = H, b = MEFT */
526 	INSTR_JMP_LT_HH, /* a = H, b = H */
527 	INSTR_JMP_LT_MI, /* a = MEFT, b = I */
528 	INSTR_JMP_LT_HI, /* a = H, b = I */
529 
530 	/* jmpgt LABEL a b
531 	 * Jump if a is greater than b
532 	 * a = HMEFT, b = HMEFTI
533 	 */
534 	INSTR_JMP_GT,    /* a = MEFT, b = MEFT */
535 	INSTR_JMP_GT_MH, /* a = MEFT, b = H */
536 	INSTR_JMP_GT_HM, /* a = H, b = MEFT */
537 	INSTR_JMP_GT_HH, /* a = H, b = H */
538 	INSTR_JMP_GT_MI, /* a = MEFT, b = I */
539 	INSTR_JMP_GT_HI, /* a = H, b = I */
540 
541 	/* return
542 	 * Return from action
543 	 */
544 	INSTR_RETURN,
545 
546 	/* Start of custom instructions. */
547 	INSTR_CUSTOM_0,
548 };
549 
550 struct instr_operand {
551 	uint8_t struct_id;
552 	uint8_t n_bits;
553 	uint8_t offset;
554 	uint8_t pad;
555 };
556 
557 struct instr_io {
558 	struct {
559 		union {
560 			struct {
561 				uint8_t offset;
562 				uint8_t n_bits;
563 				uint8_t pad[2];
564 			};
565 
566 			uint32_t val;
567 		};
568 	} io;
569 
570 	struct {
571 		uint8_t header_id[8];
572 		uint8_t struct_id[8];
573 		uint8_t n_bytes[8];
574 	} hdr;
575 };
576 
577 struct instr_hdr_validity {
578 	uint8_t header_id;
579 };
580 
581 struct instr_table {
582 	uint8_t table_id;
583 };
584 
585 struct instr_learn {
586 	uint8_t action_id;
587 	uint8_t mf_offset;
588 };
589 
590 struct instr_extern_obj {
591 	uint8_t ext_obj_id;
592 	uint8_t func_id;
593 };
594 
595 struct instr_extern_func {
596 	uint8_t ext_func_id;
597 };
598 
599 struct instr_dst_src {
600 	struct instr_operand dst;
601 	union {
602 		struct instr_operand src;
603 		uint64_t src_val;
604 	};
605 };
606 
607 struct instr_regarray {
608 	uint8_t regarray_id;
609 	uint8_t pad[3];
610 
611 	union {
612 		struct instr_operand idx;
613 		uint32_t idx_val;
614 	};
615 
616 	union {
617 		struct instr_operand dstsrc;
618 		uint64_t dstsrc_val;
619 	};
620 };
621 
622 struct instr_meter {
623 	uint8_t metarray_id;
624 	uint8_t pad[3];
625 
626 	union {
627 		struct instr_operand idx;
628 		uint32_t idx_val;
629 	};
630 
631 	struct instr_operand length;
632 
633 	union {
634 		struct instr_operand color_in;
635 		uint32_t color_in_val;
636 	};
637 
638 	struct instr_operand color_out;
639 };
640 
641 struct instr_dma {
642 	struct {
643 		uint8_t header_id[8];
644 		uint8_t struct_id[8];
645 	} dst;
646 
647 	struct {
648 		uint8_t offset[8];
649 	} src;
650 
651 	uint16_t n_bytes[8];
652 };
653 
654 struct instr_jmp {
655 	struct instruction *ip;
656 
657 	union {
658 		struct instr_operand a;
659 		uint8_t header_id;
660 		uint8_t action_id;
661 	};
662 
663 	union {
664 		struct instr_operand b;
665 		uint64_t b_val;
666 	};
667 };
668 
669 struct instruction {
670 	enum instruction_type type;
671 	union {
672 		struct instr_io io;
673 		struct instr_hdr_validity valid;
674 		struct instr_dst_src mov;
675 		struct instr_regarray regarray;
676 		struct instr_meter meter;
677 		struct instr_dma dma;
678 		struct instr_dst_src alu;
679 		struct instr_table table;
680 		struct instr_learn learn;
681 		struct instr_extern_obj ext_obj;
682 		struct instr_extern_func ext_func;
683 		struct instr_jmp jmp;
684 	};
685 };
686 
687 struct instruction_data {
688 	char label[RTE_SWX_NAME_SIZE];
689 	char jmp_label[RTE_SWX_NAME_SIZE];
690 	uint32_t n_users; /* user = jmp instruction to this instruction. */
691 	int invalid;
692 };
693 
694 typedef void (*instr_exec_t)(struct rte_swx_pipeline *);
695 
696 /*
697  * Action.
698  */
699 typedef void
700 (*action_func_t)(struct rte_swx_pipeline *p);
701 
702 struct action {
703 	TAILQ_ENTRY(action) node;
704 	char name[RTE_SWX_NAME_SIZE];
705 	struct struct_type *st;
706 	int *args_endianness; /* 0 = Host Byte Order (HBO); 1 = Network Byte Order (NBO). */
707 	struct instruction *instructions;
708 	struct instruction_data *instruction_data;
709 	uint32_t n_instructions;
710 	uint32_t id;
711 };
712 
713 TAILQ_HEAD(action_tailq, action);
714 
715 /*
716  * Table.
717  */
718 struct table_type {
719 	TAILQ_ENTRY(table_type) node;
720 	char name[RTE_SWX_NAME_SIZE];
721 	enum rte_swx_table_match_type match_type;
722 	struct rte_swx_table_ops ops;
723 };
724 
725 TAILQ_HEAD(table_type_tailq, table_type);
726 
727 struct match_field {
728 	enum rte_swx_table_match_type match_type;
729 	struct field *field;
730 };
731 
732 struct table {
733 	TAILQ_ENTRY(table) node;
734 	char name[RTE_SWX_NAME_SIZE];
735 	char args[RTE_SWX_NAME_SIZE];
736 	struct table_type *type; /* NULL when n_fields == 0. */
737 
738 	/* Match. */
739 	struct match_field *fields;
740 	uint32_t n_fields;
741 	struct header *header; /* Only valid when n_fields > 0. */
742 
743 	/* Action. */
744 	struct action **actions;
745 	struct action *default_action;
746 	uint8_t *default_action_data;
747 	uint32_t n_actions;
748 	int default_action_is_const;
749 	uint32_t action_data_size_max;
750 	int *action_is_for_table_entries;
751 	int *action_is_for_default_entry;
752 
753 	uint32_t size;
754 	uint32_t id;
755 };
756 
757 TAILQ_HEAD(table_tailq, table);
758 
759 struct table_runtime {
760 	rte_swx_table_lookup_t func;
761 	void *mailbox;
762 	uint8_t **key;
763 };
764 
765 struct table_statistics {
766 	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
767 	uint64_t *n_pkts_action;
768 };
769 
770 /*
771  * Selector.
772  */
773 struct selector {
774 	TAILQ_ENTRY(selector) node;
775 	char name[RTE_SWX_NAME_SIZE];
776 
777 	struct field *group_id_field;
778 	struct field **selector_fields;
779 	uint32_t n_selector_fields;
780 	struct header *selector_header;
781 	struct field *member_id_field;
782 
783 	uint32_t n_groups_max;
784 	uint32_t n_members_per_group_max;
785 
786 	uint32_t id;
787 };
788 
789 TAILQ_HEAD(selector_tailq, selector);
790 
791 struct selector_runtime {
792 	void *mailbox;
793 	uint8_t **group_id_buffer;
794 	uint8_t **selector_buffer;
795 	uint8_t **member_id_buffer;
796 };
797 
798 struct selector_statistics {
799 	uint64_t n_pkts;
800 };
801 
802 /*
803  * Learner table.
804  */
805 struct learner {
806 	TAILQ_ENTRY(learner) node;
807 	char name[RTE_SWX_NAME_SIZE];
808 
809 	/* Match. */
810 	struct field **fields;
811 	uint32_t n_fields;
812 	struct header *header;
813 
814 	/* Action. */
815 	struct action **actions;
816 	struct action *default_action;
817 	uint8_t *default_action_data;
818 	uint32_t n_actions;
819 	int default_action_is_const;
820 	uint32_t action_data_size_max;
821 	int *action_is_for_table_entries;
822 	int *action_is_for_default_entry;
823 
824 	uint32_t size;
825 	uint32_t timeout;
826 	uint32_t id;
827 };
828 
829 TAILQ_HEAD(learner_tailq, learner);
830 
831 struct learner_runtime {
832 	void *mailbox;
833 	uint8_t **key;
834 };
835 
836 struct learner_statistics {
837 	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
838 	uint64_t n_pkts_learn[2]; /* 0 = Learn OK, 1 = Learn error. */
839 	uint64_t n_pkts_forget;
840 	uint64_t *n_pkts_action;
841 };
842 
843 /*
844  * Register array.
845  */
846 struct regarray {
847 	TAILQ_ENTRY(regarray) node;
848 	char name[RTE_SWX_NAME_SIZE];
849 	uint64_t init_val;
850 	uint32_t size;
851 	uint32_t id;
852 };
853 
854 TAILQ_HEAD(regarray_tailq, regarray);
855 
856 struct regarray_runtime {
857 	uint64_t *regarray;
858 	uint32_t size_mask;
859 };
860 
861 /*
862  * Meter array.
863  */
864 struct meter_profile {
865 	TAILQ_ENTRY(meter_profile) node;
866 	char name[RTE_SWX_NAME_SIZE];
867 	struct rte_meter_trtcm_params params;
868 	struct rte_meter_trtcm_profile profile;
869 	uint32_t n_users;
870 };
871 
872 TAILQ_HEAD(meter_profile_tailq, meter_profile);
873 
874 struct metarray {
875 	TAILQ_ENTRY(metarray) node;
876 	char name[RTE_SWX_NAME_SIZE];
877 	uint32_t size;
878 	uint32_t id;
879 };
880 
881 TAILQ_HEAD(metarray_tailq, metarray);
882 
883 struct meter {
884 	struct rte_meter_trtcm m;
885 	struct meter_profile *profile;
886 	enum rte_color color_mask;
887 	uint8_t pad[20];
888 
889 	uint64_t n_pkts[RTE_COLORS];
890 	uint64_t n_bytes[RTE_COLORS];
891 };
892 
893 struct metarray_runtime {
894 	struct meter *metarray;
895 	uint32_t size_mask;
896 };
897 
898 /*
899  * Pipeline.
900  */
901 struct thread {
902 	/* Packet. */
903 	struct rte_swx_pkt pkt;
904 	uint8_t *ptr;
905 
906 	/* Structures. */
907 	uint8_t **structs;
908 
909 	/* Packet headers. */
910 	struct header_runtime *headers; /* Extracted or generated headers. */
911 	struct header_out_runtime *headers_out; /* Emitted headers. */
912 	uint8_t *header_storage;
913 	uint8_t *header_out_storage;
914 	uint64_t valid_headers;
915 	uint32_t n_headers_out;
916 
917 	/* Packet meta-data. */
918 	uint8_t *metadata;
919 
920 	/* Tables. */
921 	struct table_runtime *tables;
922 	struct selector_runtime *selectors;
923 	struct learner_runtime *learners;
924 	struct rte_swx_table_state *table_state;
925 	uint64_t action_id;
926 	int hit; /* 0 = Miss, 1 = Hit. */
927 	uint32_t learner_id;
928 	uint64_t time;
929 
930 	/* Extern objects and functions. */
931 	struct extern_obj_runtime *extern_objs;
932 	struct extern_func_runtime *extern_funcs;
933 
934 	/* Instructions. */
935 	struct instruction *ip;
936 	struct instruction *ret;
937 };
938 
939 #define MASK64_BIT_GET(mask, pos) ((mask) & (1LLU << (pos)))
940 #define MASK64_BIT_SET(mask, pos) ((mask) | (1LLU << (pos)))
941 #define MASK64_BIT_CLR(mask, pos) ((mask) & ~(1LLU << (pos)))
942 
943 #define HEADER_VALID(thread, header_id) \
944 	MASK64_BIT_GET((thread)->valid_headers, header_id)
945 
946 static inline uint64_t
947 instr_operand_hbo(struct thread *t, const struct instr_operand *x)
948 {
949 	uint8_t *x_struct = t->structs[x->struct_id];
950 	uint64_t *x64_ptr = (uint64_t *)&x_struct[x->offset];
951 	uint64_t x64 = *x64_ptr;
952 	uint64_t x64_mask = UINT64_MAX >> (64 - x->n_bits);
953 
954 	return x64 & x64_mask;
955 }
956 
957 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
958 
959 static inline uint64_t
960 instr_operand_nbo(struct thread *t, const struct instr_operand *x)
961 {
962 	uint8_t *x_struct = t->structs[x->struct_id];
963 	uint64_t *x64_ptr = (uint64_t *)&x_struct[x->offset];
964 	uint64_t x64 = *x64_ptr;
965 
966 	return ntoh64(x64) >> (64 - x->n_bits);
967 }
968 
969 #else
970 
971 #define instr_operand_nbo instr_operand_hbo
972 
973 #endif
974 
975 #define ALU(thread, ip, operator)  \
976 {                                                                              \
977 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
978 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
979 	uint64_t dst64 = *dst64_ptr;                                           \
980 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
981 	uint64_t dst = dst64 & dst64_mask;                                     \
982 									       \
983 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
984 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
985 	uint64_t src64 = *src64_ptr;                                           \
986 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
987 	uint64_t src = src64 & src64_mask;                                     \
988 									       \
989 	uint64_t result = dst operator src;                                    \
990 									       \
991 	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
992 }
993 
994 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
995 
996 #define ALU_MH(thread, ip, operator)  \
997 {                                                                              \
998 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
999 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1000 	uint64_t dst64 = *dst64_ptr;                                           \
1001 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1002 	uint64_t dst = dst64 & dst64_mask;                                     \
1003 									       \
1004 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1005 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1006 	uint64_t src64 = *src64_ptr;                                           \
1007 	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
1008 									       \
1009 	uint64_t result = dst operator src;                                    \
1010 									       \
1011 	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
1012 }
1013 
1014 #define ALU_HM(thread, ip, operator)  \
1015 {                                                                              \
1016 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1017 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1018 	uint64_t dst64 = *dst64_ptr;                                           \
1019 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1020 	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
1021 									       \
1022 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1023 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1024 	uint64_t src64 = *src64_ptr;                                           \
1025 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
1026 	uint64_t src = src64 & src64_mask;                                     \
1027 									       \
1028 	uint64_t result = dst operator src;                                    \
1029 	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
1030 									       \
1031 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
1032 }
1033 
1034 #define ALU_HM_FAST(thread, ip, operator)  \
1035 {                                                                                 \
1036 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];         \
1037 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];      \
1038 	uint64_t dst64 = *dst64_ptr;                                              \
1039 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);          \
1040 	uint64_t dst = dst64 & dst64_mask;                                        \
1041 										  \
1042 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];         \
1043 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];      \
1044 	uint64_t src64 = *src64_ptr;                                              \
1045 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);          \
1046 	uint64_t src = hton64(src64 & src64_mask) >> (64 - (ip)->alu.dst.n_bits); \
1047 										  \
1048 	uint64_t result = dst operator src;                                       \
1049 										  \
1050 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                              \
1051 }
1052 
1053 #define ALU_HH(thread, ip, operator)  \
1054 {                                                                              \
1055 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1056 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1057 	uint64_t dst64 = *dst64_ptr;                                           \
1058 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1059 	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
1060 									       \
1061 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1062 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1063 	uint64_t src64 = *src64_ptr;                                           \
1064 	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
1065 									       \
1066 	uint64_t result = dst operator src;                                    \
1067 	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
1068 									       \
1069 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
1070 }
1071 
1072 #define ALU_HH_FAST(thread, ip, operator)  \
1073 {                                                                                             \
1074 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];                     \
1075 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];                  \
1076 	uint64_t dst64 = *dst64_ptr;                                                          \
1077 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);                      \
1078 	uint64_t dst = dst64 & dst64_mask;                                                    \
1079 											      \
1080 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];                     \
1081 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];                  \
1082 	uint64_t src64 = *src64_ptr;                                                          \
1083 	uint64_t src = (src64 << (64 - (ip)->alu.src.n_bits)) >> (64 - (ip)->alu.dst.n_bits); \
1084 											      \
1085 	uint64_t result = dst operator src;                                                   \
1086 											      \
1087 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                                          \
1088 }
1089 
1090 #else
1091 
1092 #define ALU_MH ALU
1093 #define ALU_HM ALU
1094 #define ALU_HM_FAST ALU
1095 #define ALU_HH ALU
1096 #define ALU_HH_FAST ALU
1097 
1098 #endif
1099 
1100 #define ALU_I(thread, ip, operator)  \
1101 {                                                                              \
1102 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1103 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1104 	uint64_t dst64 = *dst64_ptr;                                           \
1105 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1106 	uint64_t dst = dst64 & dst64_mask;                                     \
1107 									       \
1108 	uint64_t src = (ip)->alu.src_val;                                      \
1109 									       \
1110 	uint64_t result = dst operator src;                                    \
1111 									       \
1112 	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
1113 }
1114 
1115 #define ALU_MI ALU_I
1116 
1117 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1118 
1119 #define ALU_HI(thread, ip, operator)  \
1120 {                                                                              \
1121 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1122 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1123 	uint64_t dst64 = *dst64_ptr;                                           \
1124 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1125 	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
1126 									       \
1127 	uint64_t src = (ip)->alu.src_val;                                      \
1128 									       \
1129 	uint64_t result = dst operator src;                                    \
1130 	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
1131 									       \
1132 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
1133 }
1134 
1135 #else
1136 
1137 #define ALU_HI ALU_I
1138 
1139 #endif
1140 
1141 #define MOV(thread, ip)  \
1142 {                                                                              \
1143 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1144 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1145 	uint64_t dst64 = *dst64_ptr;                                           \
1146 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1147 									       \
1148 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1149 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1150 	uint64_t src64 = *src64_ptr;                                           \
1151 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
1152 	uint64_t src = src64 & src64_mask;                                     \
1153 									       \
1154 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
1155 }
1156 
1157 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1158 
1159 #define MOV_MH(thread, ip)  \
1160 {                                                                              \
1161 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1162 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1163 	uint64_t dst64 = *dst64_ptr;                                           \
1164 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1165 									       \
1166 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1167 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1168 	uint64_t src64 = *src64_ptr;                                           \
1169 	uint64_t src = ntoh64(src64) >> (64 - (ip)->mov.src.n_bits);           \
1170 									       \
1171 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
1172 }
1173 
1174 #define MOV_HM(thread, ip)  \
1175 {                                                                              \
1176 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1177 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1178 	uint64_t dst64 = *dst64_ptr;                                           \
1179 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1180 									       \
1181 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1182 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1183 	uint64_t src64 = *src64_ptr;                                           \
1184 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
1185 	uint64_t src = src64 & src64_mask;                                     \
1186 									       \
1187 	src = hton64(src) >> (64 - (ip)->mov.dst.n_bits);                      \
1188 	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
1189 }
1190 
1191 #define MOV_HH(thread, ip)  \
1192 {                                                                              \
1193 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1194 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1195 	uint64_t dst64 = *dst64_ptr;                                           \
1196 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1197 									       \
1198 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1199 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1200 	uint64_t src64 = *src64_ptr;                                           \
1201 									       \
1202 	uint64_t src = src64 << (64 - (ip)->mov.src.n_bits);                   \
1203 	src = src >> (64 - (ip)->mov.dst.n_bits);                              \
1204 	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
1205 }
1206 
1207 #else
1208 
1209 #define MOV_MH MOV
1210 #define MOV_HM MOV
1211 #define MOV_HH MOV
1212 
1213 #endif
1214 
1215 #define MOV_I(thread, ip)  \
1216 {                                                                              \
1217 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1218 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1219 	uint64_t dst64 = *dst64_ptr;                                           \
1220 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1221 									       \
1222 	uint64_t src = (ip)->mov.src_val;                                      \
1223 									       \
1224 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
1225 }
1226 
1227 #define JMP_CMP(thread, ip, operator)  \
1228 {                                                                              \
1229 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1230 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1231 	uint64_t a64 = *a64_ptr;                                               \
1232 	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
1233 	uint64_t a = a64 & a64_mask;                                           \
1234 									       \
1235 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1236 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1237 	uint64_t b64 = *b64_ptr;                                               \
1238 	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
1239 	uint64_t b = b64 & b64_mask;                                           \
1240 									       \
1241 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1242 }
1243 
1244 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1245 
1246 #define JMP_CMP_MH(thread, ip, operator)  \
1247 {                                                                              \
1248 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1249 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1250 	uint64_t a64 = *a64_ptr;                                               \
1251 	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
1252 	uint64_t a = a64 & a64_mask;                                           \
1253 									       \
1254 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1255 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1256 	uint64_t b64 = *b64_ptr;                                               \
1257 	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
1258 									       \
1259 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1260 }
1261 
1262 #define JMP_CMP_HM(thread, ip, operator)  \
1263 {                                                                              \
1264 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1265 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1266 	uint64_t a64 = *a64_ptr;                                               \
1267 	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
1268 									       \
1269 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1270 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1271 	uint64_t b64 = *b64_ptr;                                               \
1272 	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
1273 	uint64_t b = b64 & b64_mask;                                           \
1274 									       \
1275 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1276 }
1277 
1278 #define JMP_CMP_HH(thread, ip, operator)  \
1279 {                                                                              \
1280 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1281 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1282 	uint64_t a64 = *a64_ptr;                                               \
1283 	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
1284 									       \
1285 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1286 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1287 	uint64_t b64 = *b64_ptr;                                               \
1288 	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
1289 									       \
1290 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1291 }
1292 
1293 #define JMP_CMP_HH_FAST(thread, ip, operator)  \
1294 {                                                                              \
1295 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1296 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1297 	uint64_t a64 = *a64_ptr;                                               \
1298 	uint64_t a = a64 << (64 - (ip)->jmp.a.n_bits);                         \
1299 									       \
1300 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1301 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1302 	uint64_t b64 = *b64_ptr;                                               \
1303 	uint64_t b = b64 << (64 - (ip)->jmp.b.n_bits);                         \
1304 									       \
1305 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1306 }
1307 
1308 #else
1309 
1310 #define JMP_CMP_MH JMP_CMP
1311 #define JMP_CMP_HM JMP_CMP
1312 #define JMP_CMP_HH JMP_CMP
1313 #define JMP_CMP_HH_FAST JMP_CMP
1314 
1315 #endif
1316 
1317 #define JMP_CMP_I(thread, ip, operator)  \
1318 {                                                                              \
1319 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1320 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1321 	uint64_t a64 = *a64_ptr;                                               \
1322 	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
1323 	uint64_t a = a64 & a64_mask;                                           \
1324 									       \
1325 	uint64_t b = (ip)->jmp.b_val;                                          \
1326 									       \
1327 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1328 }
1329 
1330 #define JMP_CMP_MI JMP_CMP_I
1331 
1332 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1333 
1334 #define JMP_CMP_HI(thread, ip, operator)  \
1335 {                                                                              \
1336 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1337 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1338 	uint64_t a64 = *a64_ptr;                                               \
1339 	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
1340 									       \
1341 	uint64_t b = (ip)->jmp.b_val;                                          \
1342 									       \
1343 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1344 }
1345 
1346 #else
1347 
1348 #define JMP_CMP_HI JMP_CMP_I
1349 
1350 #endif
1351 
1352 #define METADATA_READ(thread, offset, n_bits)                                  \
1353 ({                                                                             \
1354 	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
1355 	uint64_t m64 = *m64_ptr;                                               \
1356 	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
1357 	(m64 & m64_mask);                                                      \
1358 })
1359 
1360 #define METADATA_WRITE(thread, offset, n_bits, value)                          \
1361 {                                                                              \
1362 	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
1363 	uint64_t m64 = *m64_ptr;                                               \
1364 	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
1365 									       \
1366 	uint64_t m_new = value;                                                \
1367 									       \
1368 	*m64_ptr = (m64 & ~m64_mask) | (m_new & m64_mask);                     \
1369 }
1370 
1371 #ifndef RTE_SWX_PIPELINE_THREADS_MAX
1372 #define RTE_SWX_PIPELINE_THREADS_MAX 16
1373 #endif
1374 
1375 #ifndef RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX
1376 #define RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX 256
1377 #endif
1378 
1379 struct rte_swx_pipeline {
1380 	struct struct_type_tailq struct_types;
1381 	struct port_in_type_tailq port_in_types;
1382 	struct port_in_tailq ports_in;
1383 	struct port_out_type_tailq port_out_types;
1384 	struct port_out_tailq ports_out;
1385 	struct extern_type_tailq extern_types;
1386 	struct extern_obj_tailq extern_objs;
1387 	struct extern_func_tailq extern_funcs;
1388 	struct header_tailq headers;
1389 	struct struct_type *metadata_st;
1390 	uint32_t metadata_struct_id;
1391 	struct action_tailq actions;
1392 	struct table_type_tailq table_types;
1393 	struct table_tailq tables;
1394 	struct selector_tailq selectors;
1395 	struct learner_tailq learners;
1396 	struct regarray_tailq regarrays;
1397 	struct meter_profile_tailq meter_profiles;
1398 	struct metarray_tailq metarrays;
1399 
1400 	struct port_in_runtime *in;
1401 	struct port_out_runtime *out;
1402 	struct instruction **action_instructions;
1403 	action_func_t *action_funcs;
1404 	struct rte_swx_table_state *table_state;
1405 	struct table_statistics *table_stats;
1406 	struct selector_statistics *selector_stats;
1407 	struct learner_statistics *learner_stats;
1408 	struct regarray_runtime *regarray_runtime;
1409 	struct metarray_runtime *metarray_runtime;
1410 	struct instruction *instructions;
1411 	struct instruction_data *instruction_data;
1412 	instr_exec_t *instruction_table;
1413 	struct thread threads[RTE_SWX_PIPELINE_THREADS_MAX];
1414 	void *lib;
1415 
1416 	uint32_t n_structs;
1417 	uint32_t n_ports_in;
1418 	uint32_t n_ports_out;
1419 	uint32_t n_extern_objs;
1420 	uint32_t n_extern_funcs;
1421 	uint32_t n_actions;
1422 	uint32_t n_tables;
1423 	uint32_t n_selectors;
1424 	uint32_t n_learners;
1425 	uint32_t n_regarrays;
1426 	uint32_t n_metarrays;
1427 	uint32_t n_headers;
1428 	uint32_t thread_id;
1429 	uint32_t port_id;
1430 	uint32_t n_instructions;
1431 	int build_done;
1432 	int numa_node;
1433 };
1434 
1435 /*
1436  * Instruction.
1437  */
1438 static inline void
1439 pipeline_port_inc(struct rte_swx_pipeline *p)
1440 {
1441 	p->port_id = (p->port_id + 1) & (p->n_ports_in - 1);
1442 }
1443 
1444 static inline void
1445 thread_ip_reset(struct rte_swx_pipeline *p, struct thread *t)
1446 {
1447 	t->ip = p->instructions;
1448 }
1449 
1450 static inline void
1451 thread_ip_set(struct thread *t, struct instruction *ip)
1452 {
1453 	t->ip = ip;
1454 }
1455 
1456 static inline void
1457 thread_ip_action_call(struct rte_swx_pipeline *p,
1458 		      struct thread *t,
1459 		      uint32_t action_id)
1460 {
1461 	t->ret = t->ip + 1;
1462 	t->ip = p->action_instructions[action_id];
1463 }
1464 
1465 static inline void
1466 thread_ip_inc(struct rte_swx_pipeline *p);
1467 
1468 static inline void
1469 thread_ip_inc(struct rte_swx_pipeline *p)
1470 {
1471 	struct thread *t = &p->threads[p->thread_id];
1472 
1473 	t->ip++;
1474 }
1475 
1476 static inline void
1477 thread_ip_inc_cond(struct thread *t, int cond)
1478 {
1479 	t->ip += cond;
1480 }
1481 
1482 static inline void
1483 thread_yield(struct rte_swx_pipeline *p)
1484 {
1485 	p->thread_id = (p->thread_id + 1) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
1486 }
1487 
1488 static inline void
1489 thread_yield_cond(struct rte_swx_pipeline *p, int cond)
1490 {
1491 	p->thread_id = (p->thread_id + cond) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
1492 }
1493 
1494 /*
1495  * rx.
1496  */
1497 static inline int
1498 __instr_rx_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
1499 {
1500 	struct port_in_runtime *port = &p->in[p->port_id];
1501 	struct rte_swx_pkt *pkt = &t->pkt;
1502 	int pkt_received;
1503 
1504 	/* Packet. */
1505 	pkt_received = port->pkt_rx(port->obj, pkt);
1506 	t->ptr = &pkt->pkt[pkt->offset];
1507 	rte_prefetch0(t->ptr);
1508 
1509 	TRACE("[Thread %2u] rx %s from port %u\n",
1510 	      p->thread_id,
1511 	      pkt_received ? "1 pkt" : "0 pkts",
1512 	      p->port_id);
1513 
1514 	/* Headers. */
1515 	t->valid_headers = 0;
1516 	t->n_headers_out = 0;
1517 
1518 	/* Meta-data. */
1519 	METADATA_WRITE(t, ip->io.io.offset, ip->io.io.n_bits, p->port_id);
1520 
1521 	/* Tables. */
1522 	t->table_state = p->table_state;
1523 
1524 	/* Thread. */
1525 	pipeline_port_inc(p);
1526 
1527 	return pkt_received;
1528 }
1529 
1530 static inline void
1531 instr_rx_exec(struct rte_swx_pipeline *p)
1532 {
1533 	struct thread *t = &p->threads[p->thread_id];
1534 	struct instruction *ip = t->ip;
1535 	int pkt_received;
1536 
1537 	/* Packet. */
1538 	pkt_received = __instr_rx_exec(p, t, ip);
1539 
1540 	/* Thread. */
1541 	thread_ip_inc_cond(t, pkt_received);
1542 	thread_yield(p);
1543 }
1544 
1545 /*
1546  * tx.
1547  */
1548 static inline void
1549 emit_handler(struct thread *t)
1550 {
1551 	struct header_out_runtime *h0 = &t->headers_out[0];
1552 	struct header_out_runtime *h1 = &t->headers_out[1];
1553 	uint32_t offset = 0, i;
1554 
1555 	/* No header change or header decapsulation. */
1556 	if ((t->n_headers_out == 1) &&
1557 	    (h0->ptr + h0->n_bytes == t->ptr)) {
1558 		TRACE("Emit handler: no header change or header decap.\n");
1559 
1560 		t->pkt.offset -= h0->n_bytes;
1561 		t->pkt.length += h0->n_bytes;
1562 
1563 		return;
1564 	}
1565 
1566 	/* Header encapsulation (optionally, with prior header decapsulation). */
1567 	if ((t->n_headers_out == 2) &&
1568 	    (h1->ptr + h1->n_bytes == t->ptr) &&
1569 	    (h0->ptr == h0->ptr0)) {
1570 		uint32_t offset;
1571 
1572 		TRACE("Emit handler: header encapsulation.\n");
1573 
1574 		offset = h0->n_bytes + h1->n_bytes;
1575 		memcpy(t->ptr - offset, h0->ptr, h0->n_bytes);
1576 		t->pkt.offset -= offset;
1577 		t->pkt.length += offset;
1578 
1579 		return;
1580 	}
1581 
1582 	/* For any other case. */
1583 	TRACE("Emit handler: complex case.\n");
1584 
1585 	for (i = 0; i < t->n_headers_out; i++) {
1586 		struct header_out_runtime *h = &t->headers_out[i];
1587 
1588 		memcpy(&t->header_out_storage[offset], h->ptr, h->n_bytes);
1589 		offset += h->n_bytes;
1590 	}
1591 
1592 	if (offset) {
1593 		memcpy(t->ptr - offset, t->header_out_storage, offset);
1594 		t->pkt.offset -= offset;
1595 		t->pkt.length += offset;
1596 	}
1597 }
1598 
1599 static inline void
1600 __instr_tx_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
1601 {
1602 	uint64_t port_id = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
1603 	struct port_out_runtime *port = &p->out[port_id];
1604 	struct rte_swx_pkt *pkt = &t->pkt;
1605 
1606 	TRACE("[Thread %2u]: tx 1 pkt to port %u\n",
1607 	      p->thread_id,
1608 	      (uint32_t)port_id);
1609 
1610 	/* Headers. */
1611 	emit_handler(t);
1612 
1613 	/* Packet. */
1614 	port->pkt_tx(port->obj, pkt);
1615 }
1616 
1617 static inline void
1618 __instr_tx_i_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
1619 {
1620 	uint64_t port_id = ip->io.io.val;
1621 	struct port_out_runtime *port = &p->out[port_id];
1622 	struct rte_swx_pkt *pkt = &t->pkt;
1623 
1624 	TRACE("[Thread %2u]: tx (i) 1 pkt to port %u\n",
1625 	      p->thread_id,
1626 	      (uint32_t)port_id);
1627 
1628 	/* Headers. */
1629 	emit_handler(t);
1630 
1631 	/* Packet. */
1632 	port->pkt_tx(port->obj, pkt);
1633 }
1634 
1635 static inline void
1636 __instr_drop_exec(struct rte_swx_pipeline *p,
1637 		  struct thread *t,
1638 		  const struct instruction *ip __rte_unused)
1639 {
1640 	uint64_t port_id = p->n_ports_out - 1;
1641 	struct port_out_runtime *port = &p->out[port_id];
1642 	struct rte_swx_pkt *pkt = &t->pkt;
1643 
1644 	TRACE("[Thread %2u]: drop 1 pkt\n",
1645 	      p->thread_id);
1646 
1647 	/* Headers. */
1648 	emit_handler(t);
1649 
1650 	/* Packet. */
1651 	port->pkt_tx(port->obj, pkt);
1652 }
1653 
1654 /*
1655  * extract.
1656  */
1657 static inline void
1658 __instr_hdr_extract_many_exec(struct rte_swx_pipeline *p __rte_unused,
1659 			      struct thread *t,
1660 			      const struct instruction *ip,
1661 			      uint32_t n_extract)
1662 {
1663 	uint64_t valid_headers = t->valid_headers;
1664 	uint8_t *ptr = t->ptr;
1665 	uint32_t offset = t->pkt.offset;
1666 	uint32_t length = t->pkt.length;
1667 	uint32_t i;
1668 
1669 	for (i = 0; i < n_extract; i++) {
1670 		uint32_t header_id = ip->io.hdr.header_id[i];
1671 		uint32_t struct_id = ip->io.hdr.struct_id[i];
1672 		uint32_t n_bytes = ip->io.hdr.n_bytes[i];
1673 
1674 		TRACE("[Thread %2u]: extract header %u (%u bytes)\n",
1675 		      p->thread_id,
1676 		      header_id,
1677 		      n_bytes);
1678 
1679 		/* Headers. */
1680 		t->structs[struct_id] = ptr;
1681 		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
1682 
1683 		/* Packet. */
1684 		offset += n_bytes;
1685 		length -= n_bytes;
1686 		ptr += n_bytes;
1687 	}
1688 
1689 	/* Headers. */
1690 	t->valid_headers = valid_headers;
1691 
1692 	/* Packet. */
1693 	t->pkt.offset = offset;
1694 	t->pkt.length = length;
1695 	t->ptr = ptr;
1696 }
1697 
1698 static inline void
1699 __instr_hdr_extract_exec(struct rte_swx_pipeline *p,
1700 			 struct thread *t,
1701 			 const struct instruction *ip)
1702 {
1703 	__instr_hdr_extract_many_exec(p, t, ip, 1);
1704 }
1705 
1706 static inline void
1707 __instr_hdr_extract2_exec(struct rte_swx_pipeline *p,
1708 			  struct thread *t,
1709 			  const struct instruction *ip)
1710 {
1711 	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
1712 
1713 	__instr_hdr_extract_many_exec(p, t, ip, 2);
1714 }
1715 
1716 static inline void
1717 __instr_hdr_extract3_exec(struct rte_swx_pipeline *p,
1718 			  struct thread *t,
1719 			  const struct instruction *ip)
1720 {
1721 	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
1722 
1723 	__instr_hdr_extract_many_exec(p, t, ip, 3);
1724 }
1725 
1726 static inline void
1727 __instr_hdr_extract4_exec(struct rte_swx_pipeline *p,
1728 			  struct thread *t,
1729 			  const struct instruction *ip)
1730 {
1731 	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
1732 
1733 	__instr_hdr_extract_many_exec(p, t, ip, 4);
1734 }
1735 
1736 static inline void
1737 __instr_hdr_extract5_exec(struct rte_swx_pipeline *p,
1738 			  struct thread *t,
1739 			  const struct instruction *ip)
1740 {
1741 	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
1742 
1743 	__instr_hdr_extract_many_exec(p, t, ip, 5);
1744 }
1745 
1746 static inline void
1747 __instr_hdr_extract6_exec(struct rte_swx_pipeline *p,
1748 			  struct thread *t,
1749 			  const struct instruction *ip)
1750 {
1751 	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
1752 
1753 	__instr_hdr_extract_many_exec(p, t, ip, 6);
1754 }
1755 
1756 static inline void
1757 __instr_hdr_extract7_exec(struct rte_swx_pipeline *p,
1758 			  struct thread *t,
1759 			  const struct instruction *ip)
1760 {
1761 	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
1762 
1763 	__instr_hdr_extract_many_exec(p, t, ip, 7);
1764 }
1765 
1766 static inline void
1767 __instr_hdr_extract8_exec(struct rte_swx_pipeline *p,
1768 			  struct thread *t,
1769 			  const struct instruction *ip)
1770 {
1771 	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
1772 
1773 	__instr_hdr_extract_many_exec(p, t, ip, 8);
1774 }
1775 
1776 static inline void
1777 __instr_hdr_extract_m_exec(struct rte_swx_pipeline *p __rte_unused,
1778 			   struct thread *t,
1779 			   const struct instruction *ip)
1780 {
1781 	uint64_t valid_headers = t->valid_headers;
1782 	uint8_t *ptr = t->ptr;
1783 	uint32_t offset = t->pkt.offset;
1784 	uint32_t length = t->pkt.length;
1785 
1786 	uint32_t n_bytes_last = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
1787 	uint32_t header_id = ip->io.hdr.header_id[0];
1788 	uint32_t struct_id = ip->io.hdr.struct_id[0];
1789 	uint32_t n_bytes = ip->io.hdr.n_bytes[0];
1790 
1791 	struct header_runtime *h = &t->headers[header_id];
1792 
1793 	TRACE("[Thread %2u]: extract header %u (%u + %u bytes)\n",
1794 	      p->thread_id,
1795 	      header_id,
1796 	      n_bytes,
1797 	      n_bytes_last);
1798 
1799 	n_bytes += n_bytes_last;
1800 
1801 	/* Headers. */
1802 	t->structs[struct_id] = ptr;
1803 	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
1804 	h->n_bytes = n_bytes;
1805 
1806 	/* Packet. */
1807 	t->pkt.offset = offset + n_bytes;
1808 	t->pkt.length = length - n_bytes;
1809 	t->ptr = ptr + n_bytes;
1810 }
1811 
1812 static inline void
1813 __instr_hdr_lookahead_exec(struct rte_swx_pipeline *p __rte_unused,
1814 			   struct thread *t,
1815 			   const struct instruction *ip)
1816 {
1817 	uint64_t valid_headers = t->valid_headers;
1818 	uint8_t *ptr = t->ptr;
1819 
1820 	uint32_t header_id = ip->io.hdr.header_id[0];
1821 	uint32_t struct_id = ip->io.hdr.struct_id[0];
1822 
1823 	TRACE("[Thread %2u]: lookahead header %u\n",
1824 	      p->thread_id,
1825 	      header_id);
1826 
1827 	/* Headers. */
1828 	t->structs[struct_id] = ptr;
1829 	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
1830 }
1831 
1832 /*
1833  * emit.
1834  */
1835 static inline void
1836 __instr_hdr_emit_many_exec(struct rte_swx_pipeline *p __rte_unused,
1837 			   struct thread *t,
1838 			   const struct instruction *ip,
1839 			   uint32_t n_emit)
1840 {
1841 	uint64_t valid_headers = t->valid_headers;
1842 	uint32_t n_headers_out = t->n_headers_out;
1843 	struct header_out_runtime *ho = &t->headers_out[n_headers_out - 1];
1844 	uint8_t *ho_ptr = NULL;
1845 	uint32_t ho_nbytes = 0, first = 1, i;
1846 
1847 	for (i = 0; i < n_emit; i++) {
1848 		uint32_t header_id = ip->io.hdr.header_id[i];
1849 		uint32_t struct_id = ip->io.hdr.struct_id[i];
1850 
1851 		struct header_runtime *hi = &t->headers[header_id];
1852 		uint8_t *hi_ptr0 = hi->ptr0;
1853 		uint32_t n_bytes = hi->n_bytes;
1854 
1855 		uint8_t *hi_ptr = t->structs[struct_id];
1856 
1857 		if (!MASK64_BIT_GET(valid_headers, header_id))
1858 			continue;
1859 
1860 		TRACE("[Thread %2u]: emit header %u\n",
1861 		      p->thread_id,
1862 		      header_id);
1863 
1864 		/* Headers. */
1865 		if (first) {
1866 			first = 0;
1867 
1868 			if (!t->n_headers_out) {
1869 				ho = &t->headers_out[0];
1870 
1871 				ho->ptr0 = hi_ptr0;
1872 				ho->ptr = hi_ptr;
1873 
1874 				ho_ptr = hi_ptr;
1875 				ho_nbytes = n_bytes;
1876 
1877 				n_headers_out = 1;
1878 
1879 				continue;
1880 			} else {
1881 				ho_ptr = ho->ptr;
1882 				ho_nbytes = ho->n_bytes;
1883 			}
1884 		}
1885 
1886 		if (ho_ptr + ho_nbytes == hi_ptr) {
1887 			ho_nbytes += n_bytes;
1888 		} else {
1889 			ho->n_bytes = ho_nbytes;
1890 
1891 			ho++;
1892 			ho->ptr0 = hi_ptr0;
1893 			ho->ptr = hi_ptr;
1894 
1895 			ho_ptr = hi_ptr;
1896 			ho_nbytes = n_bytes;
1897 
1898 			n_headers_out++;
1899 		}
1900 	}
1901 
1902 	ho->n_bytes = ho_nbytes;
1903 	t->n_headers_out = n_headers_out;
1904 }
1905 
1906 static inline void
1907 __instr_hdr_emit_exec(struct rte_swx_pipeline *p,
1908 		      struct thread *t,
1909 		      const struct instruction *ip)
1910 {
1911 	__instr_hdr_emit_many_exec(p, t, ip, 1);
1912 }
1913 
1914 static inline void
1915 __instr_hdr_emit_tx_exec(struct rte_swx_pipeline *p,
1916 			 struct thread *t,
1917 			 const struct instruction *ip)
1918 {
1919 	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
1920 
1921 	__instr_hdr_emit_many_exec(p, t, ip, 1);
1922 	__instr_tx_exec(p, t, ip);
1923 }
1924 
1925 static inline void
1926 __instr_hdr_emit2_tx_exec(struct rte_swx_pipeline *p,
1927 			  struct thread *t,
1928 			  const struct instruction *ip)
1929 {
1930 	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
1931 
1932 	__instr_hdr_emit_many_exec(p, t, ip, 2);
1933 	__instr_tx_exec(p, t, ip);
1934 }
1935 
1936 static inline void
1937 __instr_hdr_emit3_tx_exec(struct rte_swx_pipeline *p,
1938 			  struct thread *t,
1939 			  const struct instruction *ip)
1940 {
1941 	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
1942 
1943 	__instr_hdr_emit_many_exec(p, t, ip, 3);
1944 	__instr_tx_exec(p, t, ip);
1945 }
1946 
1947 static inline void
1948 __instr_hdr_emit4_tx_exec(struct rte_swx_pipeline *p,
1949 			  struct thread *t,
1950 			  const struct instruction *ip)
1951 {
1952 	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
1953 
1954 	__instr_hdr_emit_many_exec(p, t, ip, 4);
1955 	__instr_tx_exec(p, t, ip);
1956 }
1957 
1958 static inline void
1959 __instr_hdr_emit5_tx_exec(struct rte_swx_pipeline *p,
1960 			  struct thread *t,
1961 			  const struct instruction *ip)
1962 {
1963 	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
1964 
1965 	__instr_hdr_emit_many_exec(p, t, ip, 5);
1966 	__instr_tx_exec(p, t, ip);
1967 }
1968 
1969 static inline void
1970 __instr_hdr_emit6_tx_exec(struct rte_swx_pipeline *p,
1971 			  struct thread *t,
1972 			  const struct instruction *ip)
1973 {
1974 	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
1975 
1976 	__instr_hdr_emit_many_exec(p, t, ip, 6);
1977 	__instr_tx_exec(p, t, ip);
1978 }
1979 
1980 static inline void
1981 __instr_hdr_emit7_tx_exec(struct rte_swx_pipeline *p,
1982 			  struct thread *t,
1983 			  const struct instruction *ip)
1984 {
1985 	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
1986 
1987 	__instr_hdr_emit_many_exec(p, t, ip, 7);
1988 	__instr_tx_exec(p, t, ip);
1989 }
1990 
1991 static inline void
1992 __instr_hdr_emit8_tx_exec(struct rte_swx_pipeline *p,
1993 			  struct thread *t,
1994 			  const struct instruction *ip)
1995 {
1996 	TRACE("[Thread %2u] *** The next 9 instructions are fused. ***\n", p->thread_id);
1997 
1998 	__instr_hdr_emit_many_exec(p, t, ip, 8);
1999 	__instr_tx_exec(p, t, ip);
2000 }
2001 
2002 /*
2003  * validate.
2004  */
2005 static inline void
2006 __instr_hdr_validate_exec(struct rte_swx_pipeline *p __rte_unused,
2007 			  struct thread *t,
2008 			  const struct instruction *ip)
2009 {
2010 	uint32_t header_id = ip->valid.header_id;
2011 
2012 	TRACE("[Thread %2u] validate header %u\n", p->thread_id, header_id);
2013 
2014 	/* Headers. */
2015 	t->valid_headers = MASK64_BIT_SET(t->valid_headers, header_id);
2016 }
2017 
2018 /*
2019  * invalidate.
2020  */
2021 static inline void
2022 __instr_hdr_invalidate_exec(struct rte_swx_pipeline *p __rte_unused,
2023 			    struct thread *t,
2024 			    const struct instruction *ip)
2025 {
2026 	uint32_t header_id = ip->valid.header_id;
2027 
2028 	TRACE("[Thread %2u] invalidate header %u\n", p->thread_id, header_id);
2029 
2030 	/* Headers. */
2031 	t->valid_headers = MASK64_BIT_CLR(t->valid_headers, header_id);
2032 }
2033 
2034 /*
2035  * learn.
2036  */
2037 static inline void
2038 __instr_learn_exec(struct rte_swx_pipeline *p,
2039 		   struct thread *t,
2040 		   const struct instruction *ip)
2041 {
2042 	uint64_t action_id = ip->learn.action_id;
2043 	uint32_t mf_offset = ip->learn.mf_offset;
2044 	uint32_t learner_id = t->learner_id;
2045 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2046 		p->n_selectors + learner_id];
2047 	struct learner_runtime *l = &t->learners[learner_id];
2048 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2049 	uint32_t status;
2050 
2051 	/* Table. */
2052 	status = rte_swx_table_learner_add(ts->obj,
2053 					   l->mailbox,
2054 					   t->time,
2055 					   action_id,
2056 					   &t->metadata[mf_offset]);
2057 
2058 	TRACE("[Thread %2u] learner %u learn %s\n",
2059 	      p->thread_id,
2060 	      learner_id,
2061 	      status ? "ok" : "error");
2062 
2063 	stats->n_pkts_learn[status] += 1;
2064 }
2065 
2066 /*
2067  * forget.
2068  */
2069 static inline void
2070 __instr_forget_exec(struct rte_swx_pipeline *p,
2071 		    struct thread *t,
2072 		    const struct instruction *ip __rte_unused)
2073 {
2074 	uint32_t learner_id = t->learner_id;
2075 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2076 		p->n_selectors + learner_id];
2077 	struct learner_runtime *l = &t->learners[learner_id];
2078 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2079 
2080 	/* Table. */
2081 	rte_swx_table_learner_delete(ts->obj, l->mailbox);
2082 
2083 	TRACE("[Thread %2u] learner %u forget\n",
2084 	      p->thread_id,
2085 	      learner_id);
2086 
2087 	stats->n_pkts_forget += 1;
2088 }
2089 
2090 /*
2091  * extern.
2092  */
2093 static inline uint32_t
2094 __instr_extern_obj_exec(struct rte_swx_pipeline *p __rte_unused,
2095 			struct thread *t,
2096 			const struct instruction *ip)
2097 {
2098 	uint32_t obj_id = ip->ext_obj.ext_obj_id;
2099 	uint32_t func_id = ip->ext_obj.func_id;
2100 	struct extern_obj_runtime *obj = &t->extern_objs[obj_id];
2101 	rte_swx_extern_type_member_func_t func = obj->funcs[func_id];
2102 	uint32_t done;
2103 
2104 	TRACE("[Thread %2u] extern obj %u member func %u\n",
2105 	      p->thread_id,
2106 	      obj_id,
2107 	      func_id);
2108 
2109 	done = func(obj->obj, obj->mailbox);
2110 
2111 	return done;
2112 }
2113 
2114 static inline uint32_t
2115 __instr_extern_func_exec(struct rte_swx_pipeline *p __rte_unused,
2116 			 struct thread *t,
2117 			 const struct instruction *ip)
2118 {
2119 	uint32_t ext_func_id = ip->ext_func.ext_func_id;
2120 	struct extern_func_runtime *ext_func = &t->extern_funcs[ext_func_id];
2121 	rte_swx_extern_func_t func = ext_func->func;
2122 	uint32_t done;
2123 
2124 	TRACE("[Thread %2u] extern func %u\n",
2125 	      p->thread_id,
2126 	      ext_func_id);
2127 
2128 	done = func(ext_func->mailbox);
2129 
2130 	return done;
2131 }
2132 
2133 /*
2134  * mov.
2135  */
2136 static inline void
2137 __instr_mov_exec(struct rte_swx_pipeline *p __rte_unused,
2138 		 struct thread *t,
2139 		 const struct instruction *ip)
2140 {
2141 	TRACE("[Thread %2u] mov\n", p->thread_id);
2142 
2143 	MOV(t, ip);
2144 }
2145 
2146 static inline void
2147 __instr_mov_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2148 		    struct thread *t,
2149 		    const struct instruction *ip)
2150 {
2151 	TRACE("[Thread %2u] mov (mh)\n", p->thread_id);
2152 
2153 	MOV_MH(t, ip);
2154 }
2155 
2156 static inline void
2157 __instr_mov_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2158 		    struct thread *t,
2159 		    const struct instruction *ip)
2160 {
2161 	TRACE("[Thread %2u] mov (hm)\n", p->thread_id);
2162 
2163 	MOV_HM(t, ip);
2164 }
2165 
2166 static inline void
2167 __instr_mov_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2168 		    struct thread *t,
2169 		    const struct instruction *ip)
2170 {
2171 	TRACE("[Thread %2u] mov (hh)\n", p->thread_id);
2172 
2173 	MOV_HH(t, ip);
2174 }
2175 
2176 static inline void
2177 __instr_mov_i_exec(struct rte_swx_pipeline *p __rte_unused,
2178 		   struct thread *t,
2179 		   const struct instruction *ip)
2180 {
2181 	TRACE("[Thread %2u] mov m.f %" PRIx64 "\n", p->thread_id, ip->mov.src_val);
2182 
2183 	MOV_I(t, ip);
2184 }
2185 
2186 /*
2187  * dma.
2188  */
2189 static inline void
2190 __instr_dma_ht_many_exec(struct rte_swx_pipeline *p __rte_unused,
2191 			 struct thread *t,
2192 			 const struct instruction *ip,
2193 			 uint32_t n_dma)
2194 {
2195 	uint8_t *action_data = t->structs[0];
2196 	uint64_t valid_headers = t->valid_headers;
2197 	uint32_t i;
2198 
2199 	for (i = 0; i < n_dma; i++) {
2200 		uint32_t header_id = ip->dma.dst.header_id[i];
2201 		uint32_t struct_id = ip->dma.dst.struct_id[i];
2202 		uint32_t offset = ip->dma.src.offset[i];
2203 		uint32_t n_bytes = ip->dma.n_bytes[i];
2204 
2205 		struct header_runtime *h = &t->headers[header_id];
2206 		uint8_t *h_ptr0 = h->ptr0;
2207 		uint8_t *h_ptr = t->structs[struct_id];
2208 
2209 		void *dst = MASK64_BIT_GET(valid_headers, header_id) ?
2210 			h_ptr : h_ptr0;
2211 		void *src = &action_data[offset];
2212 
2213 		TRACE("[Thread %2u] dma h.s t.f\n", p->thread_id);
2214 
2215 		/* Headers. */
2216 		memcpy(dst, src, n_bytes);
2217 		t->structs[struct_id] = dst;
2218 		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
2219 	}
2220 
2221 	t->valid_headers = valid_headers;
2222 }
2223 
2224 static inline void
2225 __instr_dma_ht_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2226 {
2227 	__instr_dma_ht_many_exec(p, t, ip, 1);
2228 }
2229 
2230 static inline void
2231 __instr_dma_ht2_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2232 {
2233 	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
2234 
2235 	__instr_dma_ht_many_exec(p, t, ip, 2);
2236 }
2237 
2238 static inline void
2239 __instr_dma_ht3_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2240 {
2241 	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
2242 
2243 	__instr_dma_ht_many_exec(p, t, ip, 3);
2244 }
2245 
2246 static inline void
2247 __instr_dma_ht4_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2248 {
2249 	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
2250 
2251 	__instr_dma_ht_many_exec(p, t, ip, 4);
2252 }
2253 
2254 static inline void
2255 __instr_dma_ht5_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2256 {
2257 	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
2258 
2259 	__instr_dma_ht_many_exec(p, t, ip, 5);
2260 }
2261 
2262 static inline void
2263 __instr_dma_ht6_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2264 {
2265 	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
2266 
2267 	__instr_dma_ht_many_exec(p, t, ip, 6);
2268 }
2269 
2270 static inline void
2271 __instr_dma_ht7_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2272 {
2273 	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
2274 
2275 	__instr_dma_ht_many_exec(p, t, ip, 7);
2276 }
2277 
2278 static inline void
2279 __instr_dma_ht8_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2280 {
2281 	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
2282 
2283 	__instr_dma_ht_many_exec(p, t, ip, 8);
2284 }
2285 
2286 /*
2287  * alu.
2288  */
2289 static inline void
2290 __instr_alu_add_exec(struct rte_swx_pipeline *p __rte_unused,
2291 		     struct thread *t,
2292 		     const struct instruction *ip)
2293 {
2294 	TRACE("[Thread %2u] add\n", p->thread_id);
2295 
2296 	ALU(t, ip, +);
2297 }
2298 
2299 static inline void
2300 __instr_alu_add_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2301 			struct thread *t,
2302 			const struct instruction *ip)
2303 {
2304 	TRACE("[Thread %2u] add (mh)\n", p->thread_id);
2305 
2306 	ALU_MH(t, ip, +);
2307 }
2308 
2309 static inline void
2310 __instr_alu_add_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2311 			struct thread *t,
2312 			const struct instruction *ip)
2313 {
2314 	TRACE("[Thread %2u] add (hm)\n", p->thread_id);
2315 
2316 	ALU_HM(t, ip, +);
2317 }
2318 
2319 static inline void
2320 __instr_alu_add_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2321 			struct thread *t,
2322 			const struct instruction *ip)
2323 {
2324 	TRACE("[Thread %2u] add (hh)\n", p->thread_id);
2325 
2326 	ALU_HH(t, ip, +);
2327 }
2328 
2329 static inline void
2330 __instr_alu_add_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2331 			struct thread *t,
2332 			const struct instruction *ip)
2333 {
2334 	TRACE("[Thread %2u] add (mi)\n", p->thread_id);
2335 
2336 	ALU_MI(t, ip, +);
2337 }
2338 
2339 static inline void
2340 __instr_alu_add_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2341 			struct thread *t,
2342 			const struct instruction *ip)
2343 {
2344 	TRACE("[Thread %2u] add (hi)\n", p->thread_id);
2345 
2346 	ALU_HI(t, ip, +);
2347 }
2348 
2349 static inline void
2350 __instr_alu_sub_exec(struct rte_swx_pipeline *p __rte_unused,
2351 		     struct thread *t,
2352 		     const struct instruction *ip)
2353 {
2354 	TRACE("[Thread %2u] sub\n", p->thread_id);
2355 
2356 	ALU(t, ip, -);
2357 }
2358 
2359 static inline void
2360 __instr_alu_sub_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2361 			struct thread *t,
2362 			const struct instruction *ip)
2363 {
2364 	TRACE("[Thread %2u] sub (mh)\n", p->thread_id);
2365 
2366 	ALU_MH(t, ip, -);
2367 }
2368 
2369 static inline void
2370 __instr_alu_sub_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2371 			struct thread *t,
2372 			const struct instruction *ip)
2373 {
2374 	TRACE("[Thread %2u] sub (hm)\n", p->thread_id);
2375 
2376 	ALU_HM(t, ip, -);
2377 }
2378 
2379 static inline void
2380 __instr_alu_sub_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2381 			struct thread *t,
2382 			const struct instruction *ip)
2383 {
2384 	TRACE("[Thread %2u] sub (hh)\n", p->thread_id);
2385 
2386 	ALU_HH(t, ip, -);
2387 }
2388 
2389 static inline void
2390 __instr_alu_sub_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2391 			struct thread *t,
2392 			const struct instruction *ip)
2393 {
2394 	TRACE("[Thread %2u] sub (mi)\n", p->thread_id);
2395 
2396 	ALU_MI(t, ip, -);
2397 }
2398 
2399 static inline void
2400 __instr_alu_sub_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2401 			struct thread *t,
2402 			const struct instruction *ip)
2403 {
2404 	TRACE("[Thread %2u] sub (hi)\n", p->thread_id);
2405 
2406 	ALU_HI(t, ip, -);
2407 }
2408 
2409 static inline void
2410 __instr_alu_shl_exec(struct rte_swx_pipeline *p __rte_unused,
2411 		     struct thread *t,
2412 		     const struct instruction *ip)
2413 {
2414 	TRACE("[Thread %2u] shl\n", p->thread_id);
2415 
2416 	ALU(t, ip, <<);
2417 }
2418 
2419 static inline void
2420 __instr_alu_shl_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2421 			struct thread *t,
2422 			const struct instruction *ip)
2423 {
2424 	TRACE("[Thread %2u] shl (mh)\n", p->thread_id);
2425 
2426 	ALU_MH(t, ip, <<);
2427 }
2428 
2429 static inline void
2430 __instr_alu_shl_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2431 			struct thread *t,
2432 			const struct instruction *ip)
2433 {
2434 	TRACE("[Thread %2u] shl (hm)\n", p->thread_id);
2435 
2436 	ALU_HM(t, ip, <<);
2437 }
2438 
2439 static inline void
2440 __instr_alu_shl_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2441 			struct thread *t,
2442 			const struct instruction *ip)
2443 {
2444 	TRACE("[Thread %2u] shl (hh)\n", p->thread_id);
2445 
2446 	ALU_HH(t, ip, <<);
2447 }
2448 
2449 static inline void
2450 __instr_alu_shl_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2451 			struct thread *t,
2452 			const struct instruction *ip)
2453 {
2454 	TRACE("[Thread %2u] shl (mi)\n", p->thread_id);
2455 
2456 	ALU_MI(t, ip, <<);
2457 }
2458 
2459 static inline void
2460 __instr_alu_shl_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2461 			struct thread *t,
2462 			const struct instruction *ip)
2463 {
2464 	TRACE("[Thread %2u] shl (hi)\n", p->thread_id);
2465 
2466 	ALU_HI(t, ip, <<);
2467 }
2468 
2469 static inline void
2470 __instr_alu_shr_exec(struct rte_swx_pipeline *p __rte_unused,
2471 		     struct thread *t,
2472 		     const struct instruction *ip)
2473 {
2474 	TRACE("[Thread %2u] shr\n", p->thread_id);
2475 
2476 	ALU(t, ip, >>);
2477 }
2478 
2479 static inline void
2480 __instr_alu_shr_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2481 			struct thread *t,
2482 			const struct instruction *ip)
2483 {
2484 	TRACE("[Thread %2u] shr (mh)\n", p->thread_id);
2485 
2486 	ALU_MH(t, ip, >>);
2487 }
2488 
2489 static inline void
2490 __instr_alu_shr_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2491 			struct thread *t,
2492 			const struct instruction *ip)
2493 {
2494 	TRACE("[Thread %2u] shr (hm)\n", p->thread_id);
2495 
2496 	ALU_HM(t, ip, >>);
2497 }
2498 
2499 static inline void
2500 __instr_alu_shr_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2501 			struct thread *t,
2502 			const struct instruction *ip)
2503 {
2504 	TRACE("[Thread %2u] shr (hh)\n", p->thread_id);
2505 
2506 	ALU_HH(t, ip, >>);
2507 }
2508 
2509 static inline void
2510 __instr_alu_shr_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2511 			struct thread *t,
2512 			const struct instruction *ip)
2513 {
2514 	TRACE("[Thread %2u] shr (mi)\n", p->thread_id);
2515 
2516 	/* Structs. */
2517 	ALU_MI(t, ip, >>);
2518 }
2519 
2520 static inline void
2521 __instr_alu_shr_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2522 			struct thread *t,
2523 			const struct instruction *ip)
2524 {
2525 	TRACE("[Thread %2u] shr (hi)\n", p->thread_id);
2526 
2527 	ALU_HI(t, ip, >>);
2528 }
2529 
2530 static inline void
2531 __instr_alu_and_exec(struct rte_swx_pipeline *p __rte_unused,
2532 		     struct thread *t,
2533 		     const struct instruction *ip)
2534 {
2535 	TRACE("[Thread %2u] and\n", p->thread_id);
2536 
2537 	ALU(t, ip, &);
2538 }
2539 
2540 static inline void
2541 __instr_alu_and_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2542 			struct thread *t,
2543 			const struct instruction *ip)
2544 {
2545 	TRACE("[Thread %2u] and (mh)\n", p->thread_id);
2546 
2547 	ALU_MH(t, ip, &);
2548 }
2549 
2550 static inline void
2551 __instr_alu_and_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2552 			struct thread *t,
2553 			const struct instruction *ip)
2554 {
2555 	TRACE("[Thread %2u] and (hm)\n", p->thread_id);
2556 
2557 	ALU_HM_FAST(t, ip, &);
2558 }
2559 
2560 static inline void
2561 __instr_alu_and_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2562 			struct thread *t,
2563 			const struct instruction *ip)
2564 {
2565 	TRACE("[Thread %2u] and (hh)\n", p->thread_id);
2566 
2567 	ALU_HH_FAST(t, ip, &);
2568 }
2569 
2570 static inline void
2571 __instr_alu_and_i_exec(struct rte_swx_pipeline *p __rte_unused,
2572 		       struct thread *t,
2573 		       const struct instruction *ip)
2574 {
2575 	TRACE("[Thread %2u] and (i)\n", p->thread_id);
2576 
2577 	ALU_I(t, ip, &);
2578 }
2579 
2580 static inline void
2581 __instr_alu_or_exec(struct rte_swx_pipeline *p __rte_unused,
2582 		    struct thread *t,
2583 		    const struct instruction *ip)
2584 {
2585 	TRACE("[Thread %2u] or\n", p->thread_id);
2586 
2587 	ALU(t, ip, |);
2588 }
2589 
2590 static inline void
2591 __instr_alu_or_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2592 		       struct thread *t,
2593 		       const struct instruction *ip)
2594 {
2595 	TRACE("[Thread %2u] or (mh)\n", p->thread_id);
2596 
2597 	ALU_MH(t, ip, |);
2598 }
2599 
2600 static inline void
2601 __instr_alu_or_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2602 		       struct thread *t,
2603 		       const struct instruction *ip)
2604 {
2605 	TRACE("[Thread %2u] or (hm)\n", p->thread_id);
2606 
2607 	ALU_HM_FAST(t, ip, |);
2608 }
2609 
2610 static inline void
2611 __instr_alu_or_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2612 		       struct thread *t,
2613 		       const struct instruction *ip)
2614 {
2615 	TRACE("[Thread %2u] or (hh)\n", p->thread_id);
2616 
2617 	ALU_HH_FAST(t, ip, |);
2618 }
2619 
2620 static inline void
2621 __instr_alu_or_i_exec(struct rte_swx_pipeline *p __rte_unused,
2622 		      struct thread *t,
2623 		      const struct instruction *ip)
2624 {
2625 	TRACE("[Thread %2u] or (i)\n", p->thread_id);
2626 
2627 	ALU_I(t, ip, |);
2628 }
2629 
2630 static inline void
2631 __instr_alu_xor_exec(struct rte_swx_pipeline *p __rte_unused,
2632 		     struct thread *t,
2633 		     const struct instruction *ip)
2634 {
2635 	TRACE("[Thread %2u] xor\n", p->thread_id);
2636 
2637 	ALU(t, ip, ^);
2638 }
2639 
2640 static inline void
2641 __instr_alu_xor_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2642 			struct thread *t,
2643 			const struct instruction *ip)
2644 {
2645 	TRACE("[Thread %2u] xor (mh)\n", p->thread_id);
2646 
2647 	ALU_MH(t, ip, ^);
2648 }
2649 
2650 static inline void
2651 __instr_alu_xor_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2652 			struct thread *t,
2653 			const struct instruction *ip)
2654 {
2655 	TRACE("[Thread %2u] xor (hm)\n", p->thread_id);
2656 
2657 	ALU_HM_FAST(t, ip, ^);
2658 }
2659 
2660 static inline void
2661 __instr_alu_xor_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2662 			struct thread *t,
2663 			const struct instruction *ip)
2664 {
2665 	TRACE("[Thread %2u] xor (hh)\n", p->thread_id);
2666 
2667 	ALU_HH_FAST(t, ip, ^);
2668 }
2669 
2670 static inline void
2671 __instr_alu_xor_i_exec(struct rte_swx_pipeline *p __rte_unused,
2672 		       struct thread *t,
2673 		       const struct instruction *ip)
2674 {
2675 	TRACE("[Thread %2u] xor (i)\n", p->thread_id);
2676 
2677 	ALU_I(t, ip, ^);
2678 }
2679 
2680 static inline void
2681 __instr_alu_ckadd_field_exec(struct rte_swx_pipeline *p __rte_unused,
2682 			     struct thread *t,
2683 			     const struct instruction *ip)
2684 {
2685 	uint8_t *dst_struct, *src_struct;
2686 	uint16_t *dst16_ptr, dst;
2687 	uint64_t *src64_ptr, src64, src64_mask, src;
2688 	uint64_t r;
2689 
2690 	TRACE("[Thread %2u] ckadd (field)\n", p->thread_id);
2691 
2692 	/* Structs. */
2693 	dst_struct = t->structs[ip->alu.dst.struct_id];
2694 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
2695 	dst = *dst16_ptr;
2696 
2697 	src_struct = t->structs[ip->alu.src.struct_id];
2698 	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
2699 	src64 = *src64_ptr;
2700 	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
2701 	src = src64 & src64_mask;
2702 
2703 	/* Initialize the result with destination 1's complement. */
2704 	r = dst;
2705 	r = ~r & 0xFFFF;
2706 
2707 	/* The first input (r) is a 16-bit number. The second and the third
2708 	 * inputs are 32-bit numbers. In the worst case scenario, the sum of the
2709 	 * three numbers (output r) is a 34-bit number.
2710 	 */
2711 	r += (src >> 32) + (src & 0xFFFFFFFF);
2712 
2713 	/* The first input is a 16-bit number. The second input is an 18-bit
2714 	 * number. In the worst case scenario, the sum of the two numbers is a
2715 	 * 19-bit number.
2716 	 */
2717 	r = (r & 0xFFFF) + (r >> 16);
2718 
2719 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
2720 	 * a 3-bit number (0 .. 7). Their sum is a 17-bit number (0 .. 0x10006).
2721 	 */
2722 	r = (r & 0xFFFF) + (r >> 16);
2723 
2724 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
2725 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
2726 	 * 0x10006), the output r is (0 .. 7). So no carry bit can be generated,
2727 	 * therefore the output r is always a 16-bit number.
2728 	 */
2729 	r = (r & 0xFFFF) + (r >> 16);
2730 
2731 	/* Apply 1's complement to the result. */
2732 	r = ~r & 0xFFFF;
2733 	r = r ? r : 0xFFFF;
2734 
2735 	*dst16_ptr = (uint16_t)r;
2736 }
2737 
2738 static inline void
2739 __instr_alu_cksub_field_exec(struct rte_swx_pipeline *p __rte_unused,
2740 			     struct thread *t,
2741 			     const struct instruction *ip)
2742 {
2743 	uint8_t *dst_struct, *src_struct;
2744 	uint16_t *dst16_ptr, dst;
2745 	uint64_t *src64_ptr, src64, src64_mask, src;
2746 	uint64_t r;
2747 
2748 	TRACE("[Thread %2u] cksub (field)\n", p->thread_id);
2749 
2750 	/* Structs. */
2751 	dst_struct = t->structs[ip->alu.dst.struct_id];
2752 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
2753 	dst = *dst16_ptr;
2754 
2755 	src_struct = t->structs[ip->alu.src.struct_id];
2756 	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
2757 	src64 = *src64_ptr;
2758 	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
2759 	src = src64 & src64_mask;
2760 
2761 	/* Initialize the result with destination 1's complement. */
2762 	r = dst;
2763 	r = ~r & 0xFFFF;
2764 
2765 	/* Subtraction in 1's complement arithmetic (i.e. a '- b) is the same as
2766 	 * the following sequence of operations in 2's complement arithmetic:
2767 	 *    a '- b = (a - b) % 0xFFFF.
2768 	 *
2769 	 * In order to prevent an underflow for the below subtraction, in which
2770 	 * a 33-bit number (the subtrahend) is taken out of a 16-bit number (the
2771 	 * minuend), we first add a multiple of the 0xFFFF modulus to the
2772 	 * minuend. The number we add to the minuend needs to be a 34-bit number
2773 	 * or higher, so for readability reasons we picked the 36-bit multiple.
2774 	 * We are effectively turning the 16-bit minuend into a 36-bit number:
2775 	 *    (a - b) % 0xFFFF = (a + 0xFFFF00000 - b) % 0xFFFF.
2776 	 */
2777 	r += 0xFFFF00000ULL; /* The output r is a 36-bit number. */
2778 
2779 	/* A 33-bit number is subtracted from a 36-bit number (the input r). The
2780 	 * result (the output r) is a 36-bit number.
2781 	 */
2782 	r -= (src >> 32) + (src & 0xFFFFFFFF);
2783 
2784 	/* The first input is a 16-bit number. The second input is a 20-bit
2785 	 * number. Their sum is a 21-bit number.
2786 	 */
2787 	r = (r & 0xFFFF) + (r >> 16);
2788 
2789 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
2790 	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1001E).
2791 	 */
2792 	r = (r & 0xFFFF) + (r >> 16);
2793 
2794 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
2795 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
2796 	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
2797 	 * generated, therefore the output r is always a 16-bit number.
2798 	 */
2799 	r = (r & 0xFFFF) + (r >> 16);
2800 
2801 	/* Apply 1's complement to the result. */
2802 	r = ~r & 0xFFFF;
2803 	r = r ? r : 0xFFFF;
2804 
2805 	*dst16_ptr = (uint16_t)r;
2806 }
2807 
2808 static inline void
2809 __instr_alu_ckadd_struct20_exec(struct rte_swx_pipeline *p __rte_unused,
2810 				struct thread *t,
2811 				const struct instruction *ip)
2812 {
2813 	uint8_t *dst_struct, *src_struct;
2814 	uint16_t *dst16_ptr, dst;
2815 	uint32_t *src32_ptr;
2816 	uint64_t r0, r1;
2817 
2818 	TRACE("[Thread %2u] ckadd (struct of 20 bytes)\n", p->thread_id);
2819 
2820 	/* Structs. */
2821 	dst_struct = t->structs[ip->alu.dst.struct_id];
2822 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
2823 	dst = *dst16_ptr;
2824 
2825 	src_struct = t->structs[ip->alu.src.struct_id];
2826 	src32_ptr = (uint32_t *)&src_struct[0];
2827 
2828 	/* Initialize the result with destination 1's complement. */
2829 	r0 = dst;
2830 	r0 = ~r0 & 0xFFFF;
2831 
2832 	r0 += src32_ptr[0]; /* The output r0 is a 33-bit number. */
2833 	r1 = src32_ptr[1]; /* r1 is a 32-bit number. */
2834 	r0 += src32_ptr[2]; /* The output r0 is a 34-bit number. */
2835 	r1 += src32_ptr[3]; /* The output r1 is a 33-bit number. */
2836 	r0 += r1 + src32_ptr[4]; /* The output r0 is a 35-bit number. */
2837 
2838 	/* The first input is a 16-bit number. The second input is a 19-bit
2839 	 * number. Their sum is a 20-bit number.
2840 	 */
2841 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
2842 
2843 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
2844 	 * a 4-bit number (0 .. 15). The sum is a 17-bit number (0 .. 0x1000E).
2845 	 */
2846 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
2847 
2848 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
2849 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
2850 	 * 0x1000E), the output r is (0 .. 15). So no carry bit can be
2851 	 * generated, therefore the output r is always a 16-bit number.
2852 	 */
2853 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
2854 
2855 	/* Apply 1's complement to the result. */
2856 	r0 = ~r0 & 0xFFFF;
2857 	r0 = r0 ? r0 : 0xFFFF;
2858 
2859 	*dst16_ptr = (uint16_t)r0;
2860 }
2861 
2862 static inline void
2863 __instr_alu_ckadd_struct_exec(struct rte_swx_pipeline *p __rte_unused,
2864 			      struct thread *t,
2865 			      const struct instruction *ip)
2866 {
2867 	uint32_t src_header_id = ip->alu.src.n_bits; /* The src header ID is stored here. */
2868 	uint32_t n_src_header_bytes = t->headers[src_header_id].n_bytes;
2869 	uint8_t *dst_struct, *src_struct;
2870 	uint16_t *dst16_ptr, dst;
2871 	uint32_t *src32_ptr;
2872 	uint64_t r;
2873 	uint32_t i;
2874 
2875 	if (n_src_header_bytes == 20) {
2876 		__instr_alu_ckadd_struct20_exec(p, t, ip);
2877 		return;
2878 	}
2879 
2880 	TRACE("[Thread %2u] ckadd (struct)\n", p->thread_id);
2881 
2882 	/* Structs. */
2883 	dst_struct = t->structs[ip->alu.dst.struct_id];
2884 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
2885 	dst = *dst16_ptr;
2886 
2887 	src_struct = t->structs[ip->alu.src.struct_id];
2888 	src32_ptr = (uint32_t *)&src_struct[0];
2889 
2890 	/* Initialize the result with destination 1's complement. */
2891 	r = dst;
2892 	r = ~r & 0xFFFF;
2893 
2894 	/* The max number of 32-bit words in a 32K-byte header is 2^13.
2895 	 * Therefore, in the worst case scenario, a 45-bit number is added to a
2896 	 * 16-bit number (the input r), so the output r is 46-bit number.
2897 	 */
2898 	for (i = 0; i < n_src_header_bytes / 4; i++, src32_ptr++)
2899 		r += *src32_ptr;
2900 
2901 	/* The first input is a 16-bit number. The second input is a 30-bit
2902 	 * number. Their sum is a 31-bit number.
2903 	 */
2904 	r = (r & 0xFFFF) + (r >> 16);
2905 
2906 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
2907 	 * a 15-bit number (0 .. 0x7FFF). The sum is a 17-bit number (0 .. 0x17FFE).
2908 	 */
2909 	r = (r & 0xFFFF) + (r >> 16);
2910 
2911 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
2912 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
2913 	 * 0x17FFE), the output r is (0 .. 0x7FFF). So no carry bit can be
2914 	 * generated, therefore the output r is always a 16-bit number.
2915 	 */
2916 	r = (r & 0xFFFF) + (r >> 16);
2917 
2918 	/* Apply 1's complement to the result. */
2919 	r = ~r & 0xFFFF;
2920 	r = r ? r : 0xFFFF;
2921 
2922 	*dst16_ptr = (uint16_t)r;
2923 }
2924 
2925 /*
2926  * Register array.
2927  */
2928 static inline uint64_t *
2929 instr_regarray_regarray(struct rte_swx_pipeline *p, const struct instruction *ip)
2930 {
2931 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
2932 	return r->regarray;
2933 }
2934 
2935 static inline uint64_t
2936 instr_regarray_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2937 {
2938 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
2939 
2940 	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
2941 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
2942 	uint64_t idx64 = *idx64_ptr;
2943 	uint64_t idx64_mask = UINT64_MAX >> (64 - ip->regarray.idx.n_bits);
2944 	uint64_t idx = idx64 & idx64_mask & r->size_mask;
2945 
2946 	return idx;
2947 }
2948 
2949 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
2950 
2951 static inline uint64_t
2952 instr_regarray_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2953 {
2954 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
2955 
2956 	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
2957 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
2958 	uint64_t idx64 = *idx64_ptr;
2959 	uint64_t idx = (ntoh64(idx64) >> (64 - ip->regarray.idx.n_bits)) & r->size_mask;
2960 
2961 	return idx;
2962 }
2963 
2964 #else
2965 
2966 #define instr_regarray_idx_nbo instr_regarray_idx_hbo
2967 
2968 #endif
2969 
2970 static inline uint64_t
2971 instr_regarray_idx_imm(struct rte_swx_pipeline *p, const struct instruction *ip)
2972 {
2973 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
2974 
2975 	uint64_t idx = ip->regarray.idx_val & r->size_mask;
2976 
2977 	return idx;
2978 }
2979 
2980 static inline uint64_t
2981 instr_regarray_src_hbo(struct thread *t, const struct instruction *ip)
2982 {
2983 	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
2984 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
2985 	uint64_t src64 = *src64_ptr;
2986 	uint64_t src64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
2987 	uint64_t src = src64 & src64_mask;
2988 
2989 	return src;
2990 }
2991 
2992 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
2993 
2994 static inline uint64_t
2995 instr_regarray_src_nbo(struct thread *t, const struct instruction *ip)
2996 {
2997 	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
2998 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
2999 	uint64_t src64 = *src64_ptr;
3000 	uint64_t src = ntoh64(src64) >> (64 - ip->regarray.dstsrc.n_bits);
3001 
3002 	return src;
3003 }
3004 
3005 #else
3006 
3007 #define instr_regarray_src_nbo instr_regarray_src_hbo
3008 
3009 #endif
3010 
3011 static inline void
3012 instr_regarray_dst_hbo_src_hbo_set(struct thread *t, const struct instruction *ip, uint64_t src)
3013 {
3014 	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
3015 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
3016 	uint64_t dst64 = *dst64_ptr;
3017 	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
3018 
3019 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
3020 
3021 }
3022 
3023 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3024 
3025 static inline void
3026 instr_regarray_dst_nbo_src_hbo_set(struct thread *t, const struct instruction *ip, uint64_t src)
3027 {
3028 	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
3029 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
3030 	uint64_t dst64 = *dst64_ptr;
3031 	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
3032 
3033 	src = hton64(src) >> (64 - ip->regarray.dstsrc.n_bits);
3034 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
3035 }
3036 
3037 #else
3038 
3039 #define instr_regarray_dst_nbo_src_hbo_set instr_regarray_dst_hbo_src_hbo_set
3040 
3041 #endif
3042 
3043 static inline void
3044 __instr_regprefetch_rh_exec(struct rte_swx_pipeline *p,
3045 			    struct thread *t,
3046 			    const struct instruction *ip)
3047 {
3048 	uint64_t *regarray, idx;
3049 
3050 	TRACE("[Thread %2u] regprefetch (r[h])\n", p->thread_id);
3051 
3052 	regarray = instr_regarray_regarray(p, ip);
3053 	idx = instr_regarray_idx_nbo(p, t, ip);
3054 	rte_prefetch0(&regarray[idx]);
3055 }
3056 
3057 static inline void
3058 __instr_regprefetch_rm_exec(struct rte_swx_pipeline *p,
3059 			    struct thread *t,
3060 			    const struct instruction *ip)
3061 {
3062 	uint64_t *regarray, idx;
3063 
3064 	TRACE("[Thread %2u] regprefetch (r[m])\n", p->thread_id);
3065 
3066 	regarray = instr_regarray_regarray(p, ip);
3067 	idx = instr_regarray_idx_hbo(p, t, ip);
3068 	rte_prefetch0(&regarray[idx]);
3069 }
3070 
3071 static inline void
3072 __instr_regprefetch_ri_exec(struct rte_swx_pipeline *p,
3073 			    struct thread *t __rte_unused,
3074 			    const struct instruction *ip)
3075 {
3076 	uint64_t *regarray, idx;
3077 
3078 	TRACE("[Thread %2u] regprefetch (r[i])\n", p->thread_id);
3079 
3080 	regarray = instr_regarray_regarray(p, ip);
3081 	idx = instr_regarray_idx_imm(p, ip);
3082 	rte_prefetch0(&regarray[idx]);
3083 }
3084 
3085 static inline void
3086 __instr_regrd_hrh_exec(struct rte_swx_pipeline *p,
3087 		       struct thread *t,
3088 		       const struct instruction *ip)
3089 {
3090 	uint64_t *regarray, idx;
3091 
3092 	TRACE("[Thread %2u] regrd (h = r[h])\n", p->thread_id);
3093 
3094 	regarray = instr_regarray_regarray(p, ip);
3095 	idx = instr_regarray_idx_nbo(p, t, ip);
3096 	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
3097 }
3098 
3099 static inline void
3100 __instr_regrd_hrm_exec(struct rte_swx_pipeline *p,
3101 		       struct thread *t,
3102 		       const struct instruction *ip)
3103 {
3104 	uint64_t *regarray, idx;
3105 
3106 	TRACE("[Thread %2u] regrd (h = r[m])\n", p->thread_id);
3107 
3108 	/* Structs. */
3109 	regarray = instr_regarray_regarray(p, ip);
3110 	idx = instr_regarray_idx_hbo(p, t, ip);
3111 	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
3112 }
3113 
3114 static inline void
3115 __instr_regrd_mrh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3116 {
3117 	uint64_t *regarray, idx;
3118 
3119 	TRACE("[Thread %2u] regrd (m = r[h])\n", p->thread_id);
3120 
3121 	regarray = instr_regarray_regarray(p, ip);
3122 	idx = instr_regarray_idx_nbo(p, t, ip);
3123 	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
3124 }
3125 
3126 static inline void
3127 __instr_regrd_mrm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3128 {
3129 	uint64_t *regarray, idx;
3130 
3131 	TRACE("[Thread %2u] regrd (m = r[m])\n", p->thread_id);
3132 
3133 	regarray = instr_regarray_regarray(p, ip);
3134 	idx = instr_regarray_idx_hbo(p, t, ip);
3135 	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
3136 }
3137 
3138 static inline void
3139 __instr_regrd_hri_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3140 {
3141 	uint64_t *regarray, idx;
3142 
3143 	TRACE("[Thread %2u] regrd (h = r[i])\n", p->thread_id);
3144 
3145 	regarray = instr_regarray_regarray(p, ip);
3146 	idx = instr_regarray_idx_imm(p, ip);
3147 	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
3148 }
3149 
3150 static inline void
3151 __instr_regrd_mri_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3152 {
3153 	uint64_t *regarray, idx;
3154 
3155 	TRACE("[Thread %2u] regrd (m = r[i])\n", p->thread_id);
3156 
3157 	regarray = instr_regarray_regarray(p, ip);
3158 	idx = instr_regarray_idx_imm(p, ip);
3159 	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
3160 }
3161 
3162 static inline void
3163 __instr_regwr_rhh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3164 {
3165 	uint64_t *regarray, idx, src;
3166 
3167 	TRACE("[Thread %2u] regwr (r[h] = h)\n", p->thread_id);
3168 
3169 	regarray = instr_regarray_regarray(p, ip);
3170 	idx = instr_regarray_idx_nbo(p, t, ip);
3171 	src = instr_regarray_src_nbo(t, ip);
3172 	regarray[idx] = src;
3173 }
3174 
3175 static inline void
3176 __instr_regwr_rhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3177 {
3178 	uint64_t *regarray, idx, src;
3179 
3180 	TRACE("[Thread %2u] regwr (r[h] = m)\n", p->thread_id);
3181 
3182 	regarray = instr_regarray_regarray(p, ip);
3183 	idx = instr_regarray_idx_nbo(p, t, ip);
3184 	src = instr_regarray_src_hbo(t, ip);
3185 	regarray[idx] = src;
3186 }
3187 
3188 static inline void
3189 __instr_regwr_rmh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3190 {
3191 	uint64_t *regarray, idx, src;
3192 
3193 	TRACE("[Thread %2u] regwr (r[m] = h)\n", p->thread_id);
3194 
3195 	regarray = instr_regarray_regarray(p, ip);
3196 	idx = instr_regarray_idx_hbo(p, t, ip);
3197 	src = instr_regarray_src_nbo(t, ip);
3198 	regarray[idx] = src;
3199 }
3200 
3201 static inline void
3202 __instr_regwr_rmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3203 {
3204 	uint64_t *regarray, idx, src;
3205 
3206 	TRACE("[Thread %2u] regwr (r[m] = m)\n", p->thread_id);
3207 
3208 	regarray = instr_regarray_regarray(p, ip);
3209 	idx = instr_regarray_idx_hbo(p, t, ip);
3210 	src = instr_regarray_src_hbo(t, ip);
3211 	regarray[idx] = src;
3212 }
3213 
3214 static inline void
3215 __instr_regwr_rhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3216 {
3217 	uint64_t *regarray, idx, src;
3218 
3219 	TRACE("[Thread %2u] regwr (r[h] = i)\n", p->thread_id);
3220 
3221 	regarray = instr_regarray_regarray(p, ip);
3222 	idx = instr_regarray_idx_nbo(p, t, ip);
3223 	src = ip->regarray.dstsrc_val;
3224 	regarray[idx] = src;
3225 }
3226 
3227 static inline void
3228 __instr_regwr_rmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3229 {
3230 	uint64_t *regarray, idx, src;
3231 
3232 	TRACE("[Thread %2u] regwr (r[m] = i)\n", p->thread_id);
3233 
3234 	regarray = instr_regarray_regarray(p, ip);
3235 	idx = instr_regarray_idx_hbo(p, t, ip);
3236 	src = ip->regarray.dstsrc_val;
3237 	regarray[idx] = src;
3238 }
3239 
3240 static inline void
3241 __instr_regwr_rih_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3242 {
3243 	uint64_t *regarray, idx, src;
3244 
3245 	TRACE("[Thread %2u] regwr (r[i] = h)\n", p->thread_id);
3246 
3247 	regarray = instr_regarray_regarray(p, ip);
3248 	idx = instr_regarray_idx_imm(p, ip);
3249 	src = instr_regarray_src_nbo(t, ip);
3250 	regarray[idx] = src;
3251 }
3252 
3253 static inline void
3254 __instr_regwr_rim_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3255 {
3256 	uint64_t *regarray, idx, src;
3257 
3258 	TRACE("[Thread %2u] regwr (r[i] = m)\n", p->thread_id);
3259 
3260 	regarray = instr_regarray_regarray(p, ip);
3261 	idx = instr_regarray_idx_imm(p, ip);
3262 	src = instr_regarray_src_hbo(t, ip);
3263 	regarray[idx] = src;
3264 }
3265 
3266 static inline void
3267 __instr_regwr_rii_exec(struct rte_swx_pipeline *p,
3268 		       struct thread *t __rte_unused,
3269 		       const struct instruction *ip)
3270 {
3271 	uint64_t *regarray, idx, src;
3272 
3273 	TRACE("[Thread %2u] regwr (r[i] = i)\n", p->thread_id);
3274 
3275 	regarray = instr_regarray_regarray(p, ip);
3276 	idx = instr_regarray_idx_imm(p, ip);
3277 	src = ip->regarray.dstsrc_val;
3278 	regarray[idx] = src;
3279 }
3280 
3281 static inline void
3282 __instr_regadd_rhh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3283 {
3284 	uint64_t *regarray, idx, src;
3285 
3286 	TRACE("[Thread %2u] regadd (r[h] += h)\n", p->thread_id);
3287 
3288 	regarray = instr_regarray_regarray(p, ip);
3289 	idx = instr_regarray_idx_nbo(p, t, ip);
3290 	src = instr_regarray_src_nbo(t, ip);
3291 	regarray[idx] += src;
3292 }
3293 
3294 static inline void
3295 __instr_regadd_rhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3296 {
3297 	uint64_t *regarray, idx, src;
3298 
3299 	TRACE("[Thread %2u] regadd (r[h] += m)\n", p->thread_id);
3300 
3301 	regarray = instr_regarray_regarray(p, ip);
3302 	idx = instr_regarray_idx_nbo(p, t, ip);
3303 	src = instr_regarray_src_hbo(t, ip);
3304 	regarray[idx] += src;
3305 }
3306 
3307 static inline void
3308 __instr_regadd_rmh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3309 {
3310 	uint64_t *regarray, idx, src;
3311 
3312 	TRACE("[Thread %2u] regadd (r[m] += h)\n", p->thread_id);
3313 
3314 	regarray = instr_regarray_regarray(p, ip);
3315 	idx = instr_regarray_idx_hbo(p, t, ip);
3316 	src = instr_regarray_src_nbo(t, ip);
3317 	regarray[idx] += src;
3318 }
3319 
3320 static inline void
3321 __instr_regadd_rmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3322 {
3323 	uint64_t *regarray, idx, src;
3324 
3325 	TRACE("[Thread %2u] regadd (r[m] += m)\n", p->thread_id);
3326 
3327 	regarray = instr_regarray_regarray(p, ip);
3328 	idx = instr_regarray_idx_hbo(p, t, ip);
3329 	src = instr_regarray_src_hbo(t, ip);
3330 	regarray[idx] += src;
3331 }
3332 
3333 static inline void
3334 __instr_regadd_rhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3335 {
3336 	uint64_t *regarray, idx, src;
3337 
3338 	TRACE("[Thread %2u] regadd (r[h] += i)\n", p->thread_id);
3339 
3340 	regarray = instr_regarray_regarray(p, ip);
3341 	idx = instr_regarray_idx_nbo(p, t, ip);
3342 	src = ip->regarray.dstsrc_val;
3343 	regarray[idx] += src;
3344 }
3345 
3346 static inline void
3347 __instr_regadd_rmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3348 {
3349 	uint64_t *regarray, idx, src;
3350 
3351 	TRACE("[Thread %2u] regadd (r[m] += i)\n", p->thread_id);
3352 
3353 	regarray = instr_regarray_regarray(p, ip);
3354 	idx = instr_regarray_idx_hbo(p, t, ip);
3355 	src = ip->regarray.dstsrc_val;
3356 	regarray[idx] += src;
3357 }
3358 
3359 static inline void
3360 __instr_regadd_rih_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3361 {
3362 	uint64_t *regarray, idx, src;
3363 
3364 	TRACE("[Thread %2u] regadd (r[i] += h)\n", p->thread_id);
3365 
3366 	regarray = instr_regarray_regarray(p, ip);
3367 	idx = instr_regarray_idx_imm(p, ip);
3368 	src = instr_regarray_src_nbo(t, ip);
3369 	regarray[idx] += src;
3370 }
3371 
3372 static inline void
3373 __instr_regadd_rim_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3374 {
3375 	uint64_t *regarray, idx, src;
3376 
3377 	TRACE("[Thread %2u] regadd (r[i] += m)\n", p->thread_id);
3378 
3379 	regarray = instr_regarray_regarray(p, ip);
3380 	idx = instr_regarray_idx_imm(p, ip);
3381 	src = instr_regarray_src_hbo(t, ip);
3382 	regarray[idx] += src;
3383 }
3384 
3385 static inline void
3386 __instr_regadd_rii_exec(struct rte_swx_pipeline *p,
3387 			struct thread *t __rte_unused,
3388 			const struct instruction *ip)
3389 {
3390 	uint64_t *regarray, idx, src;
3391 
3392 	TRACE("[Thread %2u] regadd (r[i] += i)\n", p->thread_id);
3393 
3394 	regarray = instr_regarray_regarray(p, ip);
3395 	idx = instr_regarray_idx_imm(p, ip);
3396 	src = ip->regarray.dstsrc_val;
3397 	regarray[idx] += src;
3398 }
3399 
3400 /*
3401  * metarray.
3402  */
3403 static inline struct meter *
3404 instr_meter_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3405 {
3406 	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
3407 
3408 	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
3409 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
3410 	uint64_t idx64 = *idx64_ptr;
3411 	uint64_t idx64_mask = UINT64_MAX >> (64 - (ip)->meter.idx.n_bits);
3412 	uint64_t idx = idx64 & idx64_mask & r->size_mask;
3413 
3414 	return &r->metarray[idx];
3415 }
3416 
3417 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3418 
3419 static inline struct meter *
3420 instr_meter_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3421 {
3422 	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
3423 
3424 	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
3425 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
3426 	uint64_t idx64 = *idx64_ptr;
3427 	uint64_t idx = (ntoh64(idx64) >> (64 - ip->meter.idx.n_bits)) & r->size_mask;
3428 
3429 	return &r->metarray[idx];
3430 }
3431 
3432 #else
3433 
3434 #define instr_meter_idx_nbo instr_meter_idx_hbo
3435 
3436 #endif
3437 
3438 static inline struct meter *
3439 instr_meter_idx_imm(struct rte_swx_pipeline *p, const struct instruction *ip)
3440 {
3441 	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
3442 
3443 	uint64_t idx =  ip->meter.idx_val & r->size_mask;
3444 
3445 	return &r->metarray[idx];
3446 }
3447 
3448 static inline uint32_t
3449 instr_meter_length_hbo(struct thread *t, const struct instruction *ip)
3450 {
3451 	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
3452 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
3453 	uint64_t src64 = *src64_ptr;
3454 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->meter.length.n_bits);
3455 	uint64_t src = src64 & src64_mask;
3456 
3457 	return (uint32_t)src;
3458 }
3459 
3460 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3461 
3462 static inline uint32_t
3463 instr_meter_length_nbo(struct thread *t, const struct instruction *ip)
3464 {
3465 	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
3466 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
3467 	uint64_t src64 = *src64_ptr;
3468 	uint64_t src = ntoh64(src64) >> (64 - ip->meter.length.n_bits);
3469 
3470 	return (uint32_t)src;
3471 }
3472 
3473 #else
3474 
3475 #define instr_meter_length_nbo instr_meter_length_hbo
3476 
3477 #endif
3478 
3479 static inline enum rte_color
3480 instr_meter_color_in_hbo(struct thread *t, const struct instruction *ip)
3481 {
3482 	uint8_t *src_struct = t->structs[ip->meter.color_in.struct_id];
3483 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.color_in.offset];
3484 	uint64_t src64 = *src64_ptr;
3485 	uint64_t src64_mask = UINT64_MAX >> (64 - ip->meter.color_in.n_bits);
3486 	uint64_t src = src64 & src64_mask;
3487 
3488 	return (enum rte_color)src;
3489 }
3490 
3491 static inline void
3492 instr_meter_color_out_hbo_set(struct thread *t,
3493 			      const struct instruction *ip,
3494 			      enum rte_color color_out)
3495 {
3496 	uint8_t *dst_struct = t->structs[ip->meter.color_out.struct_id];
3497 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->meter.color_out.offset];
3498 	uint64_t dst64 = *dst64_ptr;
3499 	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->meter.color_out.n_bits);
3500 
3501 	uint64_t src = (uint64_t)color_out;
3502 
3503 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
3504 }
3505 
3506 static inline void
3507 __instr_metprefetch_h_exec(struct rte_swx_pipeline *p,
3508 			   struct thread *t,
3509 			   const struct instruction *ip)
3510 {
3511 	struct meter *m;
3512 
3513 	TRACE("[Thread %2u] metprefetch (h)\n", p->thread_id);
3514 
3515 	m = instr_meter_idx_nbo(p, t, ip);
3516 	rte_prefetch0(m);
3517 }
3518 
3519 static inline void
3520 __instr_metprefetch_m_exec(struct rte_swx_pipeline *p,
3521 			   struct thread *t,
3522 			   const struct instruction *ip)
3523 {
3524 	struct meter *m;
3525 
3526 	TRACE("[Thread %2u] metprefetch (m)\n", p->thread_id);
3527 
3528 	m = instr_meter_idx_hbo(p, t, ip);
3529 	rte_prefetch0(m);
3530 }
3531 
3532 static inline void
3533 __instr_metprefetch_i_exec(struct rte_swx_pipeline *p,
3534 			   struct thread *t __rte_unused,
3535 			   const struct instruction *ip)
3536 {
3537 	struct meter *m;
3538 
3539 	TRACE("[Thread %2u] metprefetch (i)\n", p->thread_id);
3540 
3541 	m = instr_meter_idx_imm(p, ip);
3542 	rte_prefetch0(m);
3543 }
3544 
3545 static inline void
3546 __instr_meter_hhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3547 {
3548 	struct meter *m;
3549 	uint64_t time, n_pkts, n_bytes;
3550 	uint32_t length;
3551 	enum rte_color color_in, color_out;
3552 
3553 	TRACE("[Thread %2u] meter (hhm)\n", p->thread_id);
3554 
3555 	m = instr_meter_idx_nbo(p, t, ip);
3556 	rte_prefetch0(m->n_pkts);
3557 	time = rte_get_tsc_cycles();
3558 	length = instr_meter_length_nbo(t, ip);
3559 	color_in = instr_meter_color_in_hbo(t, ip);
3560 
3561 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3562 		&m->profile->profile,
3563 		time,
3564 		length,
3565 		color_in);
3566 
3567 	color_out &= m->color_mask;
3568 
3569 	n_pkts = m->n_pkts[color_out];
3570 	n_bytes = m->n_bytes[color_out];
3571 
3572 	instr_meter_color_out_hbo_set(t, ip, color_out);
3573 
3574 	m->n_pkts[color_out] = n_pkts + 1;
3575 	m->n_bytes[color_out] = n_bytes + length;
3576 }
3577 
3578 static inline void
3579 __instr_meter_hhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3580 {
3581 	struct meter *m;
3582 	uint64_t time, n_pkts, n_bytes;
3583 	uint32_t length;
3584 	enum rte_color color_in, color_out;
3585 
3586 	TRACE("[Thread %2u] meter (hhi)\n", p->thread_id);
3587 
3588 	m = instr_meter_idx_nbo(p, t, ip);
3589 	rte_prefetch0(m->n_pkts);
3590 	time = rte_get_tsc_cycles();
3591 	length = instr_meter_length_nbo(t, ip);
3592 	color_in = (enum rte_color)ip->meter.color_in_val;
3593 
3594 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3595 		&m->profile->profile,
3596 		time,
3597 		length,
3598 		color_in);
3599 
3600 	color_out &= m->color_mask;
3601 
3602 	n_pkts = m->n_pkts[color_out];
3603 	n_bytes = m->n_bytes[color_out];
3604 
3605 	instr_meter_color_out_hbo_set(t, ip, color_out);
3606 
3607 	m->n_pkts[color_out] = n_pkts + 1;
3608 	m->n_bytes[color_out] = n_bytes + length;
3609 }
3610 
3611 static inline void
3612 __instr_meter_hmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3613 {
3614 	struct meter *m;
3615 	uint64_t time, n_pkts, n_bytes;
3616 	uint32_t length;
3617 	enum rte_color color_in, color_out;
3618 
3619 	TRACE("[Thread %2u] meter (hmm)\n", p->thread_id);
3620 
3621 	m = instr_meter_idx_nbo(p, t, ip);
3622 	rte_prefetch0(m->n_pkts);
3623 	time = rte_get_tsc_cycles();
3624 	length = instr_meter_length_hbo(t, ip);
3625 	color_in = instr_meter_color_in_hbo(t, ip);
3626 
3627 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3628 		&m->profile->profile,
3629 		time,
3630 		length,
3631 		color_in);
3632 
3633 	color_out &= m->color_mask;
3634 
3635 	n_pkts = m->n_pkts[color_out];
3636 	n_bytes = m->n_bytes[color_out];
3637 
3638 	instr_meter_color_out_hbo_set(t, ip, color_out);
3639 
3640 	m->n_pkts[color_out] = n_pkts + 1;
3641 	m->n_bytes[color_out] = n_bytes + length;
3642 }
3643 
3644 static inline void
3645 __instr_meter_hmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3646 {
3647 	struct meter *m;
3648 	uint64_t time, n_pkts, n_bytes;
3649 	uint32_t length;
3650 	enum rte_color color_in, color_out;
3651 
3652 	TRACE("[Thread %2u] meter (hmi)\n", p->thread_id);
3653 
3654 	m = instr_meter_idx_nbo(p, t, ip);
3655 	rte_prefetch0(m->n_pkts);
3656 	time = rte_get_tsc_cycles();
3657 	length = instr_meter_length_hbo(t, ip);
3658 	color_in = (enum rte_color)ip->meter.color_in_val;
3659 
3660 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3661 		&m->profile->profile,
3662 		time,
3663 		length,
3664 		color_in);
3665 
3666 	color_out &= m->color_mask;
3667 
3668 	n_pkts = m->n_pkts[color_out];
3669 	n_bytes = m->n_bytes[color_out];
3670 
3671 	instr_meter_color_out_hbo_set(t, ip, color_out);
3672 
3673 	m->n_pkts[color_out] = n_pkts + 1;
3674 	m->n_bytes[color_out] = n_bytes + length;
3675 }
3676 
3677 static inline void
3678 __instr_meter_mhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3679 {
3680 	struct meter *m;
3681 	uint64_t time, n_pkts, n_bytes;
3682 	uint32_t length;
3683 	enum rte_color color_in, color_out;
3684 
3685 	TRACE("[Thread %2u] meter (mhm)\n", p->thread_id);
3686 
3687 	m = instr_meter_idx_hbo(p, t, ip);
3688 	rte_prefetch0(m->n_pkts);
3689 	time = rte_get_tsc_cycles();
3690 	length = instr_meter_length_nbo(t, ip);
3691 	color_in = instr_meter_color_in_hbo(t, ip);
3692 
3693 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3694 		&m->profile->profile,
3695 		time,
3696 		length,
3697 		color_in);
3698 
3699 	color_out &= m->color_mask;
3700 
3701 	n_pkts = m->n_pkts[color_out];
3702 	n_bytes = m->n_bytes[color_out];
3703 
3704 	instr_meter_color_out_hbo_set(t, ip, color_out);
3705 
3706 	m->n_pkts[color_out] = n_pkts + 1;
3707 	m->n_bytes[color_out] = n_bytes + length;
3708 }
3709 
3710 static inline void
3711 __instr_meter_mhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3712 {
3713 	struct meter *m;
3714 	uint64_t time, n_pkts, n_bytes;
3715 	uint32_t length;
3716 	enum rte_color color_in, color_out;
3717 
3718 	TRACE("[Thread %2u] meter (mhi)\n", p->thread_id);
3719 
3720 	m = instr_meter_idx_hbo(p, t, ip);
3721 	rte_prefetch0(m->n_pkts);
3722 	time = rte_get_tsc_cycles();
3723 	length = instr_meter_length_nbo(t, ip);
3724 	color_in = (enum rte_color)ip->meter.color_in_val;
3725 
3726 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3727 		&m->profile->profile,
3728 		time,
3729 		length,
3730 		color_in);
3731 
3732 	color_out &= m->color_mask;
3733 
3734 	n_pkts = m->n_pkts[color_out];
3735 	n_bytes = m->n_bytes[color_out];
3736 
3737 	instr_meter_color_out_hbo_set(t, ip, color_out);
3738 
3739 	m->n_pkts[color_out] = n_pkts + 1;
3740 	m->n_bytes[color_out] = n_bytes + length;
3741 }
3742 
3743 static inline void
3744 __instr_meter_mmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3745 {
3746 	struct meter *m;
3747 	uint64_t time, n_pkts, n_bytes;
3748 	uint32_t length;
3749 	enum rte_color color_in, color_out;
3750 
3751 	TRACE("[Thread %2u] meter (mmm)\n", p->thread_id);
3752 
3753 	m = instr_meter_idx_hbo(p, t, ip);
3754 	rte_prefetch0(m->n_pkts);
3755 	time = rte_get_tsc_cycles();
3756 	length = instr_meter_length_hbo(t, ip);
3757 	color_in = instr_meter_color_in_hbo(t, ip);
3758 
3759 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3760 		&m->profile->profile,
3761 		time,
3762 		length,
3763 		color_in);
3764 
3765 	color_out &= m->color_mask;
3766 
3767 	n_pkts = m->n_pkts[color_out];
3768 	n_bytes = m->n_bytes[color_out];
3769 
3770 	instr_meter_color_out_hbo_set(t, ip, color_out);
3771 
3772 	m->n_pkts[color_out] = n_pkts + 1;
3773 	m->n_bytes[color_out] = n_bytes + length;
3774 }
3775 
3776 static inline void
3777 __instr_meter_mmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3778 {
3779 	struct meter *m;
3780 	uint64_t time, n_pkts, n_bytes;
3781 	uint32_t length;
3782 	enum rte_color color_in, color_out;
3783 
3784 	TRACE("[Thread %2u] meter (mmi)\n", p->thread_id);
3785 
3786 	m = instr_meter_idx_hbo(p, t, ip);
3787 	rte_prefetch0(m->n_pkts);
3788 	time = rte_get_tsc_cycles();
3789 	length = instr_meter_length_hbo(t, ip);
3790 	color_in = (enum rte_color)ip->meter.color_in_val;
3791 
3792 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3793 		&m->profile->profile,
3794 		time,
3795 		length,
3796 		color_in);
3797 
3798 	color_out &= m->color_mask;
3799 
3800 	n_pkts = m->n_pkts[color_out];
3801 	n_bytes = m->n_bytes[color_out];
3802 
3803 	instr_meter_color_out_hbo_set(t, ip, color_out);
3804 
3805 	m->n_pkts[color_out] = n_pkts + 1;
3806 	m->n_bytes[color_out] = n_bytes + length;
3807 }
3808 
3809 static inline void
3810 __instr_meter_ihm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3811 {
3812 	struct meter *m;
3813 	uint64_t time, n_pkts, n_bytes;
3814 	uint32_t length;
3815 	enum rte_color color_in, color_out;
3816 
3817 	TRACE("[Thread %2u] meter (ihm)\n", p->thread_id);
3818 
3819 	m = instr_meter_idx_imm(p, ip);
3820 	rte_prefetch0(m->n_pkts);
3821 	time = rte_get_tsc_cycles();
3822 	length = instr_meter_length_nbo(t, ip);
3823 	color_in = instr_meter_color_in_hbo(t, ip);
3824 
3825 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3826 		&m->profile->profile,
3827 		time,
3828 		length,
3829 		color_in);
3830 
3831 	color_out &= m->color_mask;
3832 
3833 	n_pkts = m->n_pkts[color_out];
3834 	n_bytes = m->n_bytes[color_out];
3835 
3836 	instr_meter_color_out_hbo_set(t, ip, color_out);
3837 
3838 	m->n_pkts[color_out] = n_pkts + 1;
3839 	m->n_bytes[color_out] = n_bytes + length;
3840 }
3841 
3842 static inline void
3843 __instr_meter_ihi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3844 {
3845 	struct meter *m;
3846 	uint64_t time, n_pkts, n_bytes;
3847 	uint32_t length;
3848 	enum rte_color color_in, color_out;
3849 
3850 	TRACE("[Thread %2u] meter (ihi)\n", p->thread_id);
3851 
3852 	m = instr_meter_idx_imm(p, ip);
3853 	rte_prefetch0(m->n_pkts);
3854 	time = rte_get_tsc_cycles();
3855 	length = instr_meter_length_nbo(t, ip);
3856 	color_in = (enum rte_color)ip->meter.color_in_val;
3857 
3858 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3859 		&m->profile->profile,
3860 		time,
3861 		length,
3862 		color_in);
3863 
3864 	color_out &= m->color_mask;
3865 
3866 	n_pkts = m->n_pkts[color_out];
3867 	n_bytes = m->n_bytes[color_out];
3868 
3869 	instr_meter_color_out_hbo_set(t, ip, color_out);
3870 
3871 	m->n_pkts[color_out] = n_pkts + 1;
3872 	m->n_bytes[color_out] = n_bytes + length;
3873 }
3874 
3875 static inline void
3876 __instr_meter_imm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3877 {
3878 	struct meter *m;
3879 	uint64_t time, n_pkts, n_bytes;
3880 	uint32_t length;
3881 	enum rte_color color_in, color_out;
3882 
3883 	TRACE("[Thread %2u] meter (imm)\n", p->thread_id);
3884 
3885 	m = instr_meter_idx_imm(p, ip);
3886 	rte_prefetch0(m->n_pkts);
3887 	time = rte_get_tsc_cycles();
3888 	length = instr_meter_length_hbo(t, ip);
3889 	color_in = instr_meter_color_in_hbo(t, ip);
3890 
3891 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3892 		&m->profile->profile,
3893 		time,
3894 		length,
3895 		color_in);
3896 
3897 	color_out &= m->color_mask;
3898 
3899 	n_pkts = m->n_pkts[color_out];
3900 	n_bytes = m->n_bytes[color_out];
3901 
3902 	instr_meter_color_out_hbo_set(t, ip, color_out);
3903 
3904 	m->n_pkts[color_out] = n_pkts + 1;
3905 	m->n_bytes[color_out] = n_bytes + length;
3906 }
3907 
3908 static inline void
3909 __instr_meter_imi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3910 {
3911 	struct meter *m;
3912 	uint64_t time, n_pkts, n_bytes;
3913 	uint32_t length;
3914 	enum rte_color color_in, color_out;
3915 
3916 	TRACE("[Thread %2u] meter (imi)\n", p->thread_id);
3917 
3918 	m = instr_meter_idx_imm(p, ip);
3919 	rte_prefetch0(m->n_pkts);
3920 	time = rte_get_tsc_cycles();
3921 	length = instr_meter_length_hbo(t, ip);
3922 	color_in = (enum rte_color)ip->meter.color_in_val;
3923 
3924 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3925 		&m->profile->profile,
3926 		time,
3927 		length,
3928 		color_in);
3929 
3930 	color_out &= m->color_mask;
3931 
3932 	n_pkts = m->n_pkts[color_out];
3933 	n_bytes = m->n_bytes[color_out];
3934 
3935 	instr_meter_color_out_hbo_set(t, ip, color_out);
3936 
3937 	m->n_pkts[color_out] = n_pkts + 1;
3938 	m->n_bytes[color_out] = n_bytes + length;
3939 }
3940 
3941 #endif
3942