xref: /dpdk/lib/pipeline/rte_swx_pipeline_internal.h (revision 770ebc060eb56731ae6ea0769ae10fda8eec5e89)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2021 Intel Corporation
3  */
4 #ifndef __INCLUDE_RTE_SWX_PIPELINE_INTERNAL_H__
5 #define __INCLUDE_RTE_SWX_PIPELINE_INTERNAL_H__
6 
7 #include <inttypes.h>
8 #include <string.h>
9 #include <sys/queue.h>
10 
11 #include <rte_byteorder.h>
12 #include <rte_common.h>
13 #include <rte_cycles.h>
14 #include <rte_prefetch.h>
15 #include <rte_meter.h>
16 
17 #include <rte_swx_table_selector.h>
18 #include <rte_swx_table_learner.h>
19 #include <rte_swx_pipeline.h>
20 #include <rte_swx_ctl.h>
21 
22 #ifndef TRACE_LEVEL
23 #define TRACE_LEVEL 0
24 #endif
25 
26 #if TRACE_LEVEL
27 #define TRACE(...) printf(__VA_ARGS__)
28 #else
29 #define TRACE(...)
30 #endif
31 
32 /*
33  * Environment.
34  */
35 #define ntoh64(x) rte_be_to_cpu_64(x)
36 #define hton64(x) rte_cpu_to_be_64(x)
37 
38 /*
39  * Struct.
40  */
41 struct field {
42 	char name[RTE_SWX_NAME_SIZE];
43 	uint32_t n_bits;
44 	uint32_t offset;
45 	int var_size;
46 };
47 
48 struct struct_type {
49 	TAILQ_ENTRY(struct_type) node;
50 	char name[RTE_SWX_NAME_SIZE];
51 	struct field *fields;
52 	uint32_t n_fields;
53 	uint32_t n_bits;
54 	uint32_t n_bits_min;
55 	int var_size;
56 };
57 
58 TAILQ_HEAD(struct_type_tailq, struct_type);
59 
60 /*
61  * Input port.
62  */
63 struct port_in_type {
64 	TAILQ_ENTRY(port_in_type) node;
65 	char name[RTE_SWX_NAME_SIZE];
66 	struct rte_swx_port_in_ops ops;
67 };
68 
69 TAILQ_HEAD(port_in_type_tailq, port_in_type);
70 
71 struct port_in {
72 	TAILQ_ENTRY(port_in) node;
73 	struct port_in_type *type;
74 	void *obj;
75 	uint32_t id;
76 };
77 
78 TAILQ_HEAD(port_in_tailq, port_in);
79 
80 struct port_in_runtime {
81 	rte_swx_port_in_pkt_rx_t pkt_rx;
82 	void *obj;
83 };
84 
85 /*
86  * Output port.
87  */
88 struct port_out_type {
89 	TAILQ_ENTRY(port_out_type) node;
90 	char name[RTE_SWX_NAME_SIZE];
91 	struct rte_swx_port_out_ops ops;
92 };
93 
94 TAILQ_HEAD(port_out_type_tailq, port_out_type);
95 
96 struct port_out {
97 	TAILQ_ENTRY(port_out) node;
98 	struct port_out_type *type;
99 	void *obj;
100 	uint32_t id;
101 };
102 
103 TAILQ_HEAD(port_out_tailq, port_out);
104 
105 struct port_out_runtime {
106 	rte_swx_port_out_pkt_tx_t pkt_tx;
107 	rte_swx_port_out_pkt_fast_clone_tx_t pkt_fast_clone_tx;
108 	rte_swx_port_out_pkt_clone_tx_t pkt_clone_tx;
109 	rte_swx_port_out_flush_t flush;
110 	void *obj;
111 };
112 
113 /*
114  * Packet mirroring.
115  */
116 struct mirroring_session {
117 	uint32_t port_id;
118 	int fast_clone;
119 	uint32_t truncation_length;
120 };
121 
122 /*
123  * Extern object.
124  */
125 struct extern_type_member_func {
126 	TAILQ_ENTRY(extern_type_member_func) node;
127 	char name[RTE_SWX_NAME_SIZE];
128 	rte_swx_extern_type_member_func_t func;
129 	uint32_t id;
130 };
131 
132 TAILQ_HEAD(extern_type_member_func_tailq, extern_type_member_func);
133 
134 struct extern_type {
135 	TAILQ_ENTRY(extern_type) node;
136 	char name[RTE_SWX_NAME_SIZE];
137 	struct struct_type *mailbox_struct_type;
138 	rte_swx_extern_type_constructor_t constructor;
139 	rte_swx_extern_type_destructor_t destructor;
140 	struct extern_type_member_func_tailq funcs;
141 	uint32_t n_funcs;
142 };
143 
144 TAILQ_HEAD(extern_type_tailq, extern_type);
145 
146 struct extern_obj {
147 	TAILQ_ENTRY(extern_obj) node;
148 	char name[RTE_SWX_NAME_SIZE];
149 	struct extern_type *type;
150 	void *obj;
151 	uint32_t struct_id;
152 	uint32_t id;
153 };
154 
155 TAILQ_HEAD(extern_obj_tailq, extern_obj);
156 
157 #ifndef RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX
158 #define RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX 8
159 #endif
160 
161 struct extern_obj_runtime {
162 	void *obj;
163 	uint8_t *mailbox;
164 	rte_swx_extern_type_member_func_t funcs[RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX];
165 };
166 
167 /*
168  * Extern function.
169  */
170 struct extern_func {
171 	TAILQ_ENTRY(extern_func) node;
172 	char name[RTE_SWX_NAME_SIZE];
173 	struct struct_type *mailbox_struct_type;
174 	rte_swx_extern_func_t func;
175 	uint32_t struct_id;
176 	uint32_t id;
177 };
178 
179 TAILQ_HEAD(extern_func_tailq, extern_func);
180 
181 struct extern_func_runtime {
182 	uint8_t *mailbox;
183 	rte_swx_extern_func_t func;
184 };
185 
186 /*
187  * Hash function.
188  */
189 struct hash_func {
190 	TAILQ_ENTRY(hash_func) node;
191 	char name[RTE_SWX_NAME_SIZE];
192 	rte_swx_hash_func_t func;
193 	uint32_t id;
194 };
195 
196 TAILQ_HEAD(hash_func_tailq, hash_func);
197 
198 struct hash_func_runtime {
199 	rte_swx_hash_func_t func;
200 };
201 
202 /*
203  * Header.
204  */
205 struct header {
206 	TAILQ_ENTRY(header) node;
207 	char name[RTE_SWX_NAME_SIZE];
208 	struct struct_type *st;
209 	uint32_t struct_id;
210 	uint32_t id;
211 };
212 
213 TAILQ_HEAD(header_tailq, header);
214 
215 struct header_runtime {
216 	uint8_t *ptr0;
217 	uint32_t n_bytes;
218 };
219 
220 struct header_out_runtime {
221 	uint8_t *ptr0;
222 	uint8_t *ptr;
223 	uint32_t n_bytes;
224 };
225 
226 /*
227  * Instruction.
228  */
229 
230 /* Packet headers are always in Network Byte Order (NBO), i.e. big endian.
231  * Packet meta-data fields are always assumed to be in Host Byte Order (HBO).
232  * Table entry fields can be in either NBO or HBO; they are assumed to be in HBO
233  * when transferred to packet meta-data and in NBO when transferred to packet
234  * headers.
235  */
236 
237 /* Notation conventions:
238  *    -Header field: H = h.header.field (dst/src)
239  *    -Meta-data field: M = m.field (dst/src)
240  *    -Extern object mailbox field: E = e.field (dst/src)
241  *    -Extern function mailbox field: F = f.field (dst/src)
242  *    -Table action data field: T = t.field (src only)
243  *    -Immediate value: I = 32-bit unsigned value (src only)
244  */
245 
246 enum instruction_type {
247 	/* rx m.port_in */
248 	INSTR_RX,
249 
250 	/* tx port_out
251 	 * port_out = MI
252 	 */
253 	INSTR_TX,   /* port_out = M */
254 	INSTR_TX_I, /* port_out = I */
255 	INSTR_DROP,
256 
257 	/*
258 	 * mirror slot_id session_id
259 	 * slot_id = MEFT
260 	 * session_id = MEFT
261 	 */
262 	INSTR_MIRROR,
263 
264 	/* recirculate
265 	 */
266 	INSTR_RECIRCULATE,
267 
268 	/* recircid m.recirc_pass_id
269 	 * Read the internal recirculation pass ID into the specified meta-data field.
270 	 */
271 	INSTR_RECIRCID,
272 
273 	/* extract h.header */
274 	INSTR_HDR_EXTRACT,
275 	INSTR_HDR_EXTRACT2,
276 	INSTR_HDR_EXTRACT3,
277 	INSTR_HDR_EXTRACT4,
278 	INSTR_HDR_EXTRACT5,
279 	INSTR_HDR_EXTRACT6,
280 	INSTR_HDR_EXTRACT7,
281 	INSTR_HDR_EXTRACT8,
282 
283 	/* extract h.header m.last_field_size */
284 	INSTR_HDR_EXTRACT_M,
285 
286 	/* lookahead h.header */
287 	INSTR_HDR_LOOKAHEAD,
288 
289 	/* emit h.header */
290 	INSTR_HDR_EMIT,
291 	INSTR_HDR_EMIT_TX,
292 	INSTR_HDR_EMIT2_TX,
293 	INSTR_HDR_EMIT3_TX,
294 	INSTR_HDR_EMIT4_TX,
295 	INSTR_HDR_EMIT5_TX,
296 	INSTR_HDR_EMIT6_TX,
297 	INSTR_HDR_EMIT7_TX,
298 	INSTR_HDR_EMIT8_TX,
299 
300 	/* validate h.header */
301 	INSTR_HDR_VALIDATE,
302 
303 	/* invalidate h.header */
304 	INSTR_HDR_INVALIDATE,
305 
306 	/* mov dst src
307 	 * dst = src
308 	 * dst = HMEF, src = HMEFTI
309 	 */
310 	INSTR_MOV,     /* dst = MEF, src = MEFT; size(dst) <= 64 bits, size(src) <= 64 bits. */
311 	INSTR_MOV_MH,  /* dst = MEF, src = H; size(dst) <= 64 bits, size(src) <= 64 bits. */
312 	INSTR_MOV_HM,  /* dst = H, src = MEFT; size(dst) <= 64 bits, size(src) <= 64 bits. */
313 	INSTR_MOV_HH,  /* dst = H, src = H; size(dst) <= 64 bits, size(src) <= 64 bits. */
314 	INSTR_MOV_DMA, /* dst = HMEF, src = HMEF; size(dst) = size(src) > 64 bits, NBO format. */
315 	INSTR_MOV_128, /* dst = HMEF, src = HMEF; size(dst) = size(src) = 128 bits, NBO format. */
316 	INSTR_MOV_I,   /* dst = HMEF, src = I; size(dst) <= 64 bits. */
317 
318 	/* dma h.header t.field
319 	 * memcpy(h.header, t.field, sizeof(h.header))
320 	 */
321 	INSTR_DMA_HT,
322 	INSTR_DMA_HT2,
323 	INSTR_DMA_HT3,
324 	INSTR_DMA_HT4,
325 	INSTR_DMA_HT5,
326 	INSTR_DMA_HT6,
327 	INSTR_DMA_HT7,
328 	INSTR_DMA_HT8,
329 
330 	/* add dst src
331 	 * dst += src
332 	 * dst = HMEF, src = HMEFTI
333 	 */
334 	INSTR_ALU_ADD,    /* dst = MEF, src = MEF */
335 	INSTR_ALU_ADD_MH, /* dst = MEF, src = H */
336 	INSTR_ALU_ADD_HM, /* dst = H, src = MEF */
337 	INSTR_ALU_ADD_HH, /* dst = H, src = H */
338 	INSTR_ALU_ADD_MI, /* dst = MEF, src = I */
339 	INSTR_ALU_ADD_HI, /* dst = H, src = I */
340 
341 	/* sub dst src
342 	 * dst -= src
343 	 * dst = HMEF, src = HMEFTI
344 	 */
345 	INSTR_ALU_SUB,    /* dst = MEF, src = MEF */
346 	INSTR_ALU_SUB_MH, /* dst = MEF, src = H */
347 	INSTR_ALU_SUB_HM, /* dst = H, src = MEF */
348 	INSTR_ALU_SUB_HH, /* dst = H, src = H */
349 	INSTR_ALU_SUB_MI, /* dst = MEF, src = I */
350 	INSTR_ALU_SUB_HI, /* dst = H, src = I */
351 
352 	/* ckadd dst src
353 	 * dst = dst '+ src[0:1] '+ src[2:3] '+ ...
354 	 * dst = H, src = {H, h.header}, '+ = 1's complement addition operator
355 	 */
356 	INSTR_ALU_CKADD_FIELD,    /* src = H */
357 	INSTR_ALU_CKADD_STRUCT20, /* src = h.header, with sizeof(header) = 20 bytes. */
358 	INSTR_ALU_CKADD_STRUCT,   /* src = h.header, with sizeof(header) any 4-byte multiple. */
359 
360 	/* cksub dst src
361 	 * dst = dst '- src
362 	 * dst = H, src = H, '- = 1's complement subtraction operator
363 	 */
364 	INSTR_ALU_CKSUB_FIELD,
365 
366 	/* and dst src
367 	 * dst &= src
368 	 * dst = HMEF, src = HMEFTI
369 	 */
370 	INSTR_ALU_AND,    /* dst = MEF, src = MEFT */
371 	INSTR_ALU_AND_MH, /* dst = MEF, src = H */
372 	INSTR_ALU_AND_HM, /* dst = H, src = MEFT */
373 	INSTR_ALU_AND_HH, /* dst = H, src = H */
374 	INSTR_ALU_AND_I,  /* dst = HMEF, src = I */
375 
376 	/* or dst src
377 	 * dst |= src
378 	 * dst = HMEF, src = HMEFTI
379 	 */
380 	INSTR_ALU_OR,    /* dst = MEF, src = MEFT */
381 	INSTR_ALU_OR_MH, /* dst = MEF, src = H */
382 	INSTR_ALU_OR_HM, /* dst = H, src = MEFT */
383 	INSTR_ALU_OR_HH, /* dst = H, src = H */
384 	INSTR_ALU_OR_I,  /* dst = HMEF, src = I */
385 
386 	/* xor dst src
387 	 * dst ^= src
388 	 * dst = HMEF, src = HMEFTI
389 	 */
390 	INSTR_ALU_XOR,    /* dst = MEF, src = MEFT */
391 	INSTR_ALU_XOR_MH, /* dst = MEF, src = H */
392 	INSTR_ALU_XOR_HM, /* dst = H, src = MEFT */
393 	INSTR_ALU_XOR_HH, /* dst = H, src = H */
394 	INSTR_ALU_XOR_I,  /* dst = HMEF, src = I */
395 
396 	/* shl dst src
397 	 * dst <<= src
398 	 * dst = HMEF, src = HMEFTI
399 	 */
400 	INSTR_ALU_SHL,    /* dst = MEF, src = MEF */
401 	INSTR_ALU_SHL_MH, /* dst = MEF, src = H */
402 	INSTR_ALU_SHL_HM, /* dst = H, src = MEF */
403 	INSTR_ALU_SHL_HH, /* dst = H, src = H */
404 	INSTR_ALU_SHL_MI, /* dst = MEF, src = I */
405 	INSTR_ALU_SHL_HI, /* dst = H, src = I */
406 
407 	/* shr dst src
408 	 * dst >>= src
409 	 * dst = HMEF, src = HMEFTI
410 	 */
411 	INSTR_ALU_SHR,    /* dst = MEF, src = MEF */
412 	INSTR_ALU_SHR_MH, /* dst = MEF, src = H */
413 	INSTR_ALU_SHR_HM, /* dst = H, src = MEF */
414 	INSTR_ALU_SHR_HH, /* dst = H, src = H */
415 	INSTR_ALU_SHR_MI, /* dst = MEF, src = I */
416 	INSTR_ALU_SHR_HI, /* dst = H, src = I */
417 
418 	/* regprefetch REGARRAY index
419 	 * prefetch REGARRAY[index]
420 	 * index = HMEFTI
421 	 */
422 	INSTR_REGPREFETCH_RH, /* index = H */
423 	INSTR_REGPREFETCH_RM, /* index = MEFT */
424 	INSTR_REGPREFETCH_RI, /* index = I */
425 
426 	/* regrd dst REGARRAY index
427 	 * dst = REGARRAY[index]
428 	 * dst = HMEF, index = HMEFTI
429 	 */
430 	INSTR_REGRD_HRH, /* dst = H, index = H */
431 	INSTR_REGRD_HRM, /* dst = H, index = MEFT */
432 	INSTR_REGRD_HRI, /* dst = H, index = I */
433 	INSTR_REGRD_MRH, /* dst = MEF, index = H */
434 	INSTR_REGRD_MRM, /* dst = MEF, index = MEFT */
435 	INSTR_REGRD_MRI, /* dst = MEF, index = I */
436 
437 	/* regwr REGARRAY index src
438 	 * REGARRAY[index] = src
439 	 * index = HMEFTI, src = HMEFTI
440 	 */
441 	INSTR_REGWR_RHH, /* index = H, src = H */
442 	INSTR_REGWR_RHM, /* index = H, src = MEFT */
443 	INSTR_REGWR_RHI, /* index = H, src = I */
444 	INSTR_REGWR_RMH, /* index = MEFT, src = H */
445 	INSTR_REGWR_RMM, /* index = MEFT, src = MEFT */
446 	INSTR_REGWR_RMI, /* index = MEFT, src = I */
447 	INSTR_REGWR_RIH, /* index = I, src = H */
448 	INSTR_REGWR_RIM, /* index = I, src = MEFT */
449 	INSTR_REGWR_RII, /* index = I, src = I */
450 
451 	/* regadd REGARRAY index src
452 	 * REGARRAY[index] += src
453 	 * index = HMEFTI, src = HMEFTI
454 	 */
455 	INSTR_REGADD_RHH, /* index = H, src = H */
456 	INSTR_REGADD_RHM, /* index = H, src = MEFT */
457 	INSTR_REGADD_RHI, /* index = H, src = I */
458 	INSTR_REGADD_RMH, /* index = MEFT, src = H */
459 	INSTR_REGADD_RMM, /* index = MEFT, src = MEFT */
460 	INSTR_REGADD_RMI, /* index = MEFT, src = I */
461 	INSTR_REGADD_RIH, /* index = I, src = H */
462 	INSTR_REGADD_RIM, /* index = I, src = MEFT */
463 	INSTR_REGADD_RII, /* index = I, src = I */
464 
465 	/* metprefetch METARRAY index
466 	 * prefetch METARRAY[index]
467 	 * index = HMEFTI
468 	 */
469 	INSTR_METPREFETCH_H, /* index = H */
470 	INSTR_METPREFETCH_M, /* index = MEFT */
471 	INSTR_METPREFETCH_I, /* index = I */
472 
473 	/* meter METARRAY index length color_in color_out
474 	 * color_out = meter(METARRAY[index], length, color_in)
475 	 * index = HMEFTI, length = HMEFT, color_in = MEFTI, color_out = MEF
476 	 */
477 	INSTR_METER_HHM, /* index = H, length = H, color_in = MEFT */
478 	INSTR_METER_HHI, /* index = H, length = H, color_in = I */
479 	INSTR_METER_HMM, /* index = H, length = MEFT, color_in = MEFT */
480 	INSTR_METER_HMI, /* index = H, length = MEFT, color_in = I */
481 	INSTR_METER_MHM, /* index = MEFT, length = H, color_in = MEFT */
482 	INSTR_METER_MHI, /* index = MEFT, length = H, color_in = I */
483 	INSTR_METER_MMM, /* index = MEFT, length = MEFT, color_in = MEFT */
484 	INSTR_METER_MMI, /* index = MEFT, length = MEFT, color_in = I */
485 	INSTR_METER_IHM, /* index = I, length = H, color_in = MEFT */
486 	INSTR_METER_IHI, /* index = I, length = H, color_in = I */
487 	INSTR_METER_IMM, /* index = I, length = MEFT, color_in = MEFT */
488 	INSTR_METER_IMI, /* index = I, length = MEFT, color_in = I */
489 
490 	/* table TABLE */
491 	INSTR_TABLE,
492 	INSTR_TABLE_AF,
493 	INSTR_SELECTOR,
494 	INSTR_LEARNER,
495 	INSTR_LEARNER_AF,
496 
497 	/* learn ACTION_NAME [ m.action_first_arg ] m.timeout_id */
498 	INSTR_LEARNER_LEARN,
499 
500 	/* rearm [ m.timeout_id ] */
501 	INSTR_LEARNER_REARM,
502 	INSTR_LEARNER_REARM_NEW,
503 
504 	/* forget */
505 	INSTR_LEARNER_FORGET,
506 
507 	/* extern e.obj.func */
508 	INSTR_EXTERN_OBJ,
509 
510 	/* extern f.func */
511 	INSTR_EXTERN_FUNC,
512 
513 	/* hash HASH_FUNC_NAME dst src_first src_last
514 	 * Compute hash value over range of struct fields.
515 	 * dst = M
516 	 * src_first = HMEFT
517 	 * src_last = HMEFT
518 	 * src_first and src_last must be fields within the same struct
519 	 */
520 	INSTR_HASH_FUNC,
521 
522 	/* jmp LABEL
523 	 * Unconditional jump
524 	 */
525 	INSTR_JMP,
526 
527 	/* jmpv LABEL h.header
528 	 * Jump if header is valid
529 	 */
530 	INSTR_JMP_VALID,
531 
532 	/* jmpnv LABEL h.header
533 	 * Jump if header is invalid
534 	 */
535 	INSTR_JMP_INVALID,
536 
537 	/* jmph LABEL
538 	 * Jump if table lookup hit
539 	 */
540 	INSTR_JMP_HIT,
541 
542 	/* jmpnh LABEL
543 	 * Jump if table lookup miss
544 	 */
545 	INSTR_JMP_MISS,
546 
547 	/* jmpa LABEL ACTION
548 	 * Jump if action run
549 	 */
550 	INSTR_JMP_ACTION_HIT,
551 
552 	/* jmpna LABEL ACTION
553 	 * Jump if action not run
554 	 */
555 	INSTR_JMP_ACTION_MISS,
556 
557 	/* jmpeq LABEL a b
558 	 * Jump if a is equal to b
559 	 * a = HMEFT, b = HMEFTI
560 	 */
561 	INSTR_JMP_EQ,    /* a = MEFT, b = MEFT */
562 	INSTR_JMP_EQ_MH, /* a = MEFT, b = H */
563 	INSTR_JMP_EQ_HM, /* a = H, b = MEFT */
564 	INSTR_JMP_EQ_HH, /* a = H, b = H */
565 	INSTR_JMP_EQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
566 
567 	/* jmpneq LABEL a b
568 	 * Jump if a is not equal to b
569 	 * a = HMEFT, b = HMEFTI
570 	 */
571 	INSTR_JMP_NEQ,    /* a = MEFT, b = MEFT */
572 	INSTR_JMP_NEQ_MH, /* a = MEFT, b = H */
573 	INSTR_JMP_NEQ_HM, /* a = H, b = MEFT */
574 	INSTR_JMP_NEQ_HH, /* a = H, b = H */
575 	INSTR_JMP_NEQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
576 
577 	/* jmplt LABEL a b
578 	 * Jump if a is less than b
579 	 * a = HMEFT, b = HMEFTI
580 	 */
581 	INSTR_JMP_LT,    /* a = MEFT, b = MEFT */
582 	INSTR_JMP_LT_MH, /* a = MEFT, b = H */
583 	INSTR_JMP_LT_HM, /* a = H, b = MEFT */
584 	INSTR_JMP_LT_HH, /* a = H, b = H */
585 	INSTR_JMP_LT_MI, /* a = MEFT, b = I */
586 	INSTR_JMP_LT_HI, /* a = H, b = I */
587 
588 	/* jmpgt LABEL a b
589 	 * Jump if a is greater than b
590 	 * a = HMEFT, b = HMEFTI
591 	 */
592 	INSTR_JMP_GT,    /* a = MEFT, b = MEFT */
593 	INSTR_JMP_GT_MH, /* a = MEFT, b = H */
594 	INSTR_JMP_GT_HM, /* a = H, b = MEFT */
595 	INSTR_JMP_GT_HH, /* a = H, b = H */
596 	INSTR_JMP_GT_MI, /* a = MEFT, b = I */
597 	INSTR_JMP_GT_HI, /* a = H, b = I */
598 
599 	/* return
600 	 * Return from action
601 	 */
602 	INSTR_RETURN,
603 
604 	/* Start of custom instructions. */
605 	INSTR_CUSTOM_0,
606 };
607 
608 struct instr_operand {
609 	uint8_t struct_id;
610 	uint8_t n_bits;
611 	uint8_t offset;
612 	uint8_t pad;
613 };
614 
615 struct instr_io {
616 	struct {
617 		union {
618 			struct {
619 				uint8_t offset;
620 				uint8_t n_bits;
621 				uint8_t pad[2];
622 			};
623 
624 			uint32_t val;
625 		};
626 	} io;
627 
628 	struct {
629 		uint8_t header_id[8];
630 		uint8_t struct_id[8];
631 		uint8_t n_bytes[8];
632 	} hdr;
633 };
634 
635 struct instr_hdr_validity {
636 	uint8_t header_id;
637 	uint8_t struct_id;
638 };
639 
640 struct instr_table {
641 	uint8_t table_id;
642 };
643 
644 struct instr_learn {
645 	uint8_t action_id;
646 	uint8_t mf_first_arg_offset;
647 	uint8_t mf_timeout_id_offset;
648 	uint8_t mf_timeout_id_n_bits;
649 };
650 
651 struct instr_extern_obj {
652 	uint8_t ext_obj_id;
653 	uint8_t func_id;
654 };
655 
656 struct instr_extern_func {
657 	uint8_t ext_func_id;
658 };
659 
660 struct instr_hash_func {
661 	uint8_t hash_func_id;
662 
663 	struct {
664 		uint8_t offset;
665 		uint8_t n_bits;
666 	} dst;
667 
668 	struct {
669 		uint8_t struct_id;
670 		uint16_t offset;
671 		uint16_t n_bytes;
672 	} src;
673 };
674 
675 struct instr_dst_src {
676 	struct instr_operand dst;
677 	union {
678 		struct instr_operand src;
679 		uint64_t src_val;
680 	};
681 };
682 
683 struct instr_regarray {
684 	uint8_t regarray_id;
685 	uint8_t pad[3];
686 
687 	union {
688 		struct instr_operand idx;
689 		uint32_t idx_val;
690 	};
691 
692 	union {
693 		struct instr_operand dstsrc;
694 		uint64_t dstsrc_val;
695 	};
696 };
697 
698 struct instr_meter {
699 	uint8_t metarray_id;
700 	uint8_t pad[3];
701 
702 	union {
703 		struct instr_operand idx;
704 		uint32_t idx_val;
705 	};
706 
707 	struct instr_operand length;
708 
709 	union {
710 		struct instr_operand color_in;
711 		uint32_t color_in_val;
712 	};
713 
714 	struct instr_operand color_out;
715 };
716 
717 struct instr_dma {
718 	struct {
719 		uint8_t header_id[8];
720 		uint8_t struct_id[8];
721 	} dst;
722 
723 	struct {
724 		uint8_t offset[8];
725 	} src;
726 
727 	uint16_t n_bytes[8];
728 };
729 
730 struct instr_jmp {
731 	struct instruction *ip;
732 
733 	union {
734 		struct instr_operand a;
735 		uint8_t header_id;
736 		uint8_t action_id;
737 	};
738 
739 	union {
740 		struct instr_operand b;
741 		uint64_t b_val;
742 	};
743 };
744 
745 struct instruction {
746 	enum instruction_type type;
747 	union {
748 		struct instr_io io;
749 		struct instr_dst_src mirror;
750 		struct instr_hdr_validity valid;
751 		struct instr_dst_src mov;
752 		struct instr_regarray regarray;
753 		struct instr_meter meter;
754 		struct instr_dma dma;
755 		struct instr_dst_src alu;
756 		struct instr_table table;
757 		struct instr_learn learn;
758 		struct instr_extern_obj ext_obj;
759 		struct instr_extern_func ext_func;
760 		struct instr_hash_func hash_func;
761 		struct instr_jmp jmp;
762 	};
763 };
764 
765 struct instruction_data {
766 	char label[RTE_SWX_NAME_SIZE];
767 	char jmp_label[RTE_SWX_NAME_SIZE];
768 	uint32_t n_users; /* user = jmp instruction to this instruction. */
769 	int invalid;
770 };
771 
772 typedef void (*instr_exec_t)(struct rte_swx_pipeline *);
773 
774 /*
775  * Action.
776  */
777 typedef void
778 (*action_func_t)(struct rte_swx_pipeline *p);
779 
780 struct action {
781 	TAILQ_ENTRY(action) node;
782 	char name[RTE_SWX_NAME_SIZE];
783 	struct struct_type *st;
784 	int *args_endianness; /* 0 = Host Byte Order (HBO); 1 = Network Byte Order (NBO). */
785 	struct instruction *instructions;
786 	struct instruction_data *instruction_data;
787 	uint32_t n_instructions;
788 	uint32_t id;
789 };
790 
791 TAILQ_HEAD(action_tailq, action);
792 
793 /*
794  * Table.
795  */
796 struct table_type {
797 	TAILQ_ENTRY(table_type) node;
798 	char name[RTE_SWX_NAME_SIZE];
799 	enum rte_swx_table_match_type match_type;
800 	struct rte_swx_table_ops ops;
801 };
802 
803 TAILQ_HEAD(table_type_tailq, table_type);
804 
805 struct match_field {
806 	enum rte_swx_table_match_type match_type;
807 	struct field *field;
808 };
809 
810 struct table {
811 	TAILQ_ENTRY(table) node;
812 	char name[RTE_SWX_NAME_SIZE];
813 	char args[RTE_SWX_NAME_SIZE];
814 	struct table_type *type; /* NULL when n_fields == 0. */
815 
816 	/* Match. */
817 	struct match_field *fields;
818 	uint32_t n_fields;
819 	struct header *header; /* Only valid when n_fields > 0. */
820 
821 	/* Action. */
822 	struct action **actions;
823 	struct action *default_action;
824 	uint8_t *default_action_data;
825 	uint32_t n_actions;
826 	int default_action_is_const;
827 	uint32_t action_data_size_max;
828 	int *action_is_for_table_entries;
829 	int *action_is_for_default_entry;
830 
831 	uint32_t size;
832 	uint32_t id;
833 };
834 
835 TAILQ_HEAD(table_tailq, table);
836 
837 struct table_runtime {
838 	rte_swx_table_lookup_t func;
839 	void *mailbox;
840 	uint8_t **key;
841 };
842 
843 struct table_statistics {
844 	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
845 	uint64_t *n_pkts_action;
846 };
847 
848 /*
849  * Selector.
850  */
851 struct selector {
852 	TAILQ_ENTRY(selector) node;
853 	char name[RTE_SWX_NAME_SIZE];
854 
855 	struct field *group_id_field;
856 	struct field **selector_fields;
857 	uint32_t n_selector_fields;
858 	struct header *selector_header;
859 	struct field *member_id_field;
860 
861 	uint32_t n_groups_max;
862 	uint32_t n_members_per_group_max;
863 
864 	uint32_t id;
865 };
866 
867 TAILQ_HEAD(selector_tailq, selector);
868 
869 struct selector_runtime {
870 	void *mailbox;
871 	uint8_t **group_id_buffer;
872 	uint8_t **selector_buffer;
873 	uint8_t **member_id_buffer;
874 };
875 
876 struct selector_statistics {
877 	uint64_t n_pkts;
878 };
879 
880 /*
881  * Learner table.
882  */
883 struct learner {
884 	TAILQ_ENTRY(learner) node;
885 	char name[RTE_SWX_NAME_SIZE];
886 
887 	/* Match. */
888 	struct field **fields;
889 	uint32_t n_fields;
890 	struct header *header;
891 
892 	/* Action. */
893 	struct action **actions;
894 	struct action *default_action;
895 	uint8_t *default_action_data;
896 	uint32_t n_actions;
897 	int default_action_is_const;
898 	uint32_t action_data_size_max;
899 	int *action_is_for_table_entries;
900 	int *action_is_for_default_entry;
901 
902 	uint32_t size;
903 	uint32_t timeout[RTE_SWX_TABLE_LEARNER_N_KEY_TIMEOUTS_MAX];
904 	uint32_t n_timeouts;
905 	uint32_t id;
906 };
907 
908 TAILQ_HEAD(learner_tailq, learner);
909 
910 struct learner_runtime {
911 	void *mailbox;
912 	uint8_t **key;
913 };
914 
915 struct learner_statistics {
916 	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
917 	uint64_t n_pkts_learn[2]; /* 0 = Learn OK, 1 = Learn error. */
918 	uint64_t n_pkts_rearm;
919 	uint64_t n_pkts_forget;
920 	uint64_t *n_pkts_action;
921 };
922 
923 /*
924  * Register array.
925  */
926 struct regarray {
927 	TAILQ_ENTRY(regarray) node;
928 	char name[RTE_SWX_NAME_SIZE];
929 	uint64_t init_val;
930 	uint32_t size;
931 	uint32_t id;
932 };
933 
934 TAILQ_HEAD(regarray_tailq, regarray);
935 
936 struct regarray_runtime {
937 	uint64_t *regarray;
938 	uint32_t size_mask;
939 };
940 
941 /*
942  * Meter array.
943  */
944 struct meter_profile {
945 	TAILQ_ENTRY(meter_profile) node;
946 	char name[RTE_SWX_NAME_SIZE];
947 	struct rte_meter_trtcm_params params;
948 	struct rte_meter_trtcm_profile profile;
949 	uint32_t n_users;
950 };
951 
952 TAILQ_HEAD(meter_profile_tailq, meter_profile);
953 
954 struct metarray {
955 	TAILQ_ENTRY(metarray) node;
956 	char name[RTE_SWX_NAME_SIZE];
957 	uint32_t size;
958 	uint32_t id;
959 };
960 
961 TAILQ_HEAD(metarray_tailq, metarray);
962 
963 struct meter {
964 	struct rte_meter_trtcm m;
965 	struct meter_profile *profile;
966 	enum rte_color color_mask;
967 	uint8_t pad[20];
968 
969 	uint64_t n_pkts[RTE_COLORS];
970 	uint64_t n_bytes[RTE_COLORS];
971 };
972 
973 struct metarray_runtime {
974 	struct meter *metarray;
975 	uint32_t size_mask;
976 };
977 
978 /*
979  * Pipeline.
980  */
981 struct thread {
982 	/* Packet. */
983 	struct rte_swx_pkt pkt;
984 	uint8_t *ptr;
985 	uint32_t *mirroring_slots;
986 	uint64_t mirroring_slots_mask;
987 	int recirculate;
988 	uint32_t recirc_pass_id;
989 
990 	/* Structures. */
991 	uint8_t **structs;
992 
993 	/* Packet headers. */
994 	struct header_runtime *headers; /* Extracted or generated headers. */
995 	struct header_out_runtime *headers_out; /* Emitted headers. */
996 	uint8_t *header_storage;
997 	uint8_t *header_out_storage;
998 	uint64_t valid_headers;
999 	uint32_t n_headers_out;
1000 
1001 	/* Packet meta-data. */
1002 	uint8_t *metadata;
1003 
1004 	/* Tables. */
1005 	struct table_runtime *tables;
1006 	struct selector_runtime *selectors;
1007 	struct learner_runtime *learners;
1008 	struct rte_swx_table_state *table_state;
1009 	uint64_t action_id;
1010 	int hit; /* 0 = Miss, 1 = Hit. */
1011 	uint32_t learner_id;
1012 	uint64_t time;
1013 
1014 	/* Extern objects and functions. */
1015 	struct extern_obj_runtime *extern_objs;
1016 	struct extern_func_runtime *extern_funcs;
1017 
1018 	/* Instructions. */
1019 	struct instruction *ip;
1020 	struct instruction *ret;
1021 };
1022 
1023 #define MASK64_BIT_GET(mask, pos) ((mask) & (1LLU << (pos)))
1024 #define MASK64_BIT_SET(mask, pos) ((mask) | (1LLU << (pos)))
1025 #define MASK64_BIT_CLR(mask, pos) ((mask) & ~(1LLU << (pos)))
1026 
1027 #define HEADER_VALID(thread, header_id) \
1028 	MASK64_BIT_GET((thread)->valid_headers, header_id)
1029 
1030 static inline uint64_t
1031 instr_operand_hbo(struct thread *t, const struct instr_operand *x)
1032 {
1033 	uint8_t *x_struct = t->structs[x->struct_id];
1034 	uint64_t *x64_ptr = (uint64_t *)&x_struct[x->offset];
1035 	uint64_t x64 = *x64_ptr;
1036 	uint64_t x64_mask = UINT64_MAX >> (64 - x->n_bits);
1037 
1038 	return x64 & x64_mask;
1039 }
1040 
1041 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1042 
1043 static inline uint64_t
1044 instr_operand_nbo(struct thread *t, const struct instr_operand *x)
1045 {
1046 	uint8_t *x_struct = t->structs[x->struct_id];
1047 	uint64_t *x64_ptr = (uint64_t *)&x_struct[x->offset];
1048 	uint64_t x64 = *x64_ptr;
1049 
1050 	return ntoh64(x64) >> (64 - x->n_bits);
1051 }
1052 
1053 #else
1054 
1055 #define instr_operand_nbo instr_operand_hbo
1056 
1057 #endif
1058 
1059 #define ALU(thread, ip, operator)  \
1060 {                                                                              \
1061 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1062 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1063 	uint64_t dst64 = *dst64_ptr;                                           \
1064 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1065 	uint64_t dst = dst64 & dst64_mask;                                     \
1066 									       \
1067 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1068 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1069 	uint64_t src64 = *src64_ptr;                                           \
1070 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
1071 	uint64_t src = src64 & src64_mask;                                     \
1072 									       \
1073 	uint64_t result = dst operator src;                                    \
1074 									       \
1075 	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
1076 }
1077 
1078 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1079 
1080 #define ALU_MH(thread, ip, operator)  \
1081 {                                                                              \
1082 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1083 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1084 	uint64_t dst64 = *dst64_ptr;                                           \
1085 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1086 	uint64_t dst = dst64 & dst64_mask;                                     \
1087 									       \
1088 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1089 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1090 	uint64_t src64 = *src64_ptr;                                           \
1091 	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
1092 									       \
1093 	uint64_t result = dst operator src;                                    \
1094 									       \
1095 	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
1096 }
1097 
1098 #define ALU_HM(thread, ip, operator)  \
1099 {                                                                              \
1100 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1101 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1102 	uint64_t dst64 = *dst64_ptr;                                           \
1103 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1104 	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
1105 									       \
1106 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1107 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1108 	uint64_t src64 = *src64_ptr;                                           \
1109 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
1110 	uint64_t src = src64 & src64_mask;                                     \
1111 									       \
1112 	uint64_t result = dst operator src;                                    \
1113 	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
1114 									       \
1115 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
1116 }
1117 
1118 #define ALU_HM_FAST(thread, ip, operator)  \
1119 {                                                                                 \
1120 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];         \
1121 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];      \
1122 	uint64_t dst64 = *dst64_ptr;                                              \
1123 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);          \
1124 	uint64_t dst = dst64 & dst64_mask;                                        \
1125 										  \
1126 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];         \
1127 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];      \
1128 	uint64_t src64 = *src64_ptr;                                              \
1129 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);          \
1130 	uint64_t src = hton64(src64 & src64_mask) >> (64 - (ip)->alu.dst.n_bits); \
1131 										  \
1132 	uint64_t result = dst operator src;                                       \
1133 										  \
1134 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                              \
1135 }
1136 
1137 #define ALU_HH(thread, ip, operator)  \
1138 {                                                                              \
1139 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1140 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1141 	uint64_t dst64 = *dst64_ptr;                                           \
1142 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1143 	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
1144 									       \
1145 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1146 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1147 	uint64_t src64 = *src64_ptr;                                           \
1148 	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
1149 									       \
1150 	uint64_t result = dst operator src;                                    \
1151 	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
1152 									       \
1153 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
1154 }
1155 
1156 #define ALU_HH_FAST(thread, ip, operator)  \
1157 {                                                                                             \
1158 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];                     \
1159 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];                  \
1160 	uint64_t dst64 = *dst64_ptr;                                                          \
1161 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);                      \
1162 	uint64_t dst = dst64 & dst64_mask;                                                    \
1163 											      \
1164 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];                     \
1165 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];                  \
1166 	uint64_t src64 = *src64_ptr;                                                          \
1167 	uint64_t src = (src64 << (64 - (ip)->alu.src.n_bits)) >> (64 - (ip)->alu.dst.n_bits); \
1168 											      \
1169 	uint64_t result = dst operator src;                                                   \
1170 											      \
1171 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                                          \
1172 }
1173 
1174 #else
1175 
1176 #define ALU_MH ALU
1177 #define ALU_HM ALU
1178 #define ALU_HM_FAST ALU
1179 #define ALU_HH ALU
1180 #define ALU_HH_FAST ALU
1181 
1182 #endif
1183 
1184 #define ALU_I(thread, ip, operator)  \
1185 {                                                                              \
1186 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1187 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1188 	uint64_t dst64 = *dst64_ptr;                                           \
1189 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1190 	uint64_t dst = dst64 & dst64_mask;                                     \
1191 									       \
1192 	uint64_t src = (ip)->alu.src_val;                                      \
1193 									       \
1194 	uint64_t result = dst operator src;                                    \
1195 									       \
1196 	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
1197 }
1198 
1199 #define ALU_MI ALU_I
1200 
1201 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1202 
1203 #define ALU_HI(thread, ip, operator)  \
1204 {                                                                              \
1205 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1206 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1207 	uint64_t dst64 = *dst64_ptr;                                           \
1208 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1209 	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
1210 									       \
1211 	uint64_t src = (ip)->alu.src_val;                                      \
1212 									       \
1213 	uint64_t result = dst operator src;                                    \
1214 	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
1215 									       \
1216 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
1217 }
1218 
1219 #else
1220 
1221 #define ALU_HI ALU_I
1222 
1223 #endif
1224 
1225 #define MOV(thread, ip)  \
1226 {                                                                              \
1227 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1228 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1229 	uint64_t dst64 = *dst64_ptr;                                           \
1230 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1231 									       \
1232 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1233 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1234 	uint64_t src64 = *src64_ptr;                                           \
1235 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
1236 	uint64_t src = src64 & src64_mask;                                     \
1237 									       \
1238 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
1239 }
1240 
1241 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1242 
1243 #define MOV_MH(thread, ip)  \
1244 {                                                                              \
1245 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1246 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1247 	uint64_t dst64 = *dst64_ptr;                                           \
1248 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1249 									       \
1250 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1251 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1252 	uint64_t src64 = *src64_ptr;                                           \
1253 	uint64_t src = ntoh64(src64) >> (64 - (ip)->mov.src.n_bits);           \
1254 									       \
1255 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
1256 }
1257 
1258 #define MOV_HM(thread, ip)  \
1259 {                                                                              \
1260 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1261 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1262 	uint64_t dst64 = *dst64_ptr;                                           \
1263 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1264 									       \
1265 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1266 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1267 	uint64_t src64 = *src64_ptr;                                           \
1268 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
1269 	uint64_t src = src64 & src64_mask;                                     \
1270 									       \
1271 	src = hton64(src) >> (64 - (ip)->mov.dst.n_bits);                      \
1272 	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
1273 }
1274 
1275 #define MOV_HH(thread, ip)  \
1276 {                                                                              \
1277 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1278 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1279 	uint64_t dst64 = *dst64_ptr;                                           \
1280 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1281 									       \
1282 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1283 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1284 	uint64_t src64 = *src64_ptr;                                           \
1285 									       \
1286 	uint64_t src = src64 << (64 - (ip)->mov.src.n_bits);                   \
1287 	src = src >> (64 - (ip)->mov.dst.n_bits);                              \
1288 	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
1289 }
1290 
1291 #else
1292 
1293 #define MOV_MH MOV
1294 #define MOV_HM MOV
1295 #define MOV_HH MOV
1296 
1297 #endif
1298 
1299 #define MOV_I(thread, ip)  \
1300 {                                                                              \
1301 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1302 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1303 	uint64_t dst64 = *dst64_ptr;                                           \
1304 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1305 									       \
1306 	uint64_t src = (ip)->mov.src_val;                                      \
1307 									       \
1308 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
1309 }
1310 
1311 #define JMP_CMP(thread, ip, operator)  \
1312 {                                                                              \
1313 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1314 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1315 	uint64_t a64 = *a64_ptr;                                               \
1316 	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
1317 	uint64_t a = a64 & a64_mask;                                           \
1318 									       \
1319 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1320 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1321 	uint64_t b64 = *b64_ptr;                                               \
1322 	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
1323 	uint64_t b = b64 & b64_mask;                                           \
1324 									       \
1325 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1326 }
1327 
1328 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1329 
1330 #define JMP_CMP_MH(thread, ip, operator)  \
1331 {                                                                              \
1332 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1333 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1334 	uint64_t a64 = *a64_ptr;                                               \
1335 	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
1336 	uint64_t a = a64 & a64_mask;                                           \
1337 									       \
1338 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1339 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1340 	uint64_t b64 = *b64_ptr;                                               \
1341 	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
1342 									       \
1343 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1344 }
1345 
1346 #define JMP_CMP_HM(thread, ip, operator)  \
1347 {                                                                              \
1348 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1349 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1350 	uint64_t a64 = *a64_ptr;                                               \
1351 	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
1352 									       \
1353 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1354 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1355 	uint64_t b64 = *b64_ptr;                                               \
1356 	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
1357 	uint64_t b = b64 & b64_mask;                                           \
1358 									       \
1359 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1360 }
1361 
1362 #define JMP_CMP_HH(thread, ip, operator)  \
1363 {                                                                              \
1364 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1365 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1366 	uint64_t a64 = *a64_ptr;                                               \
1367 	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
1368 									       \
1369 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1370 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1371 	uint64_t b64 = *b64_ptr;                                               \
1372 	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
1373 									       \
1374 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1375 }
1376 
1377 #define JMP_CMP_HH_FAST(thread, ip, operator)  \
1378 {                                                                              \
1379 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1380 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1381 	uint64_t a64 = *a64_ptr;                                               \
1382 	uint64_t a = a64 << (64 - (ip)->jmp.a.n_bits);                         \
1383 									       \
1384 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1385 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1386 	uint64_t b64 = *b64_ptr;                                               \
1387 	uint64_t b = b64 << (64 - (ip)->jmp.b.n_bits);                         \
1388 									       \
1389 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1390 }
1391 
1392 #else
1393 
1394 #define JMP_CMP_MH JMP_CMP
1395 #define JMP_CMP_HM JMP_CMP
1396 #define JMP_CMP_HH JMP_CMP
1397 #define JMP_CMP_HH_FAST JMP_CMP
1398 
1399 #endif
1400 
1401 #define JMP_CMP_I(thread, ip, operator)  \
1402 {                                                                              \
1403 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1404 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1405 	uint64_t a64 = *a64_ptr;                                               \
1406 	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
1407 	uint64_t a = a64 & a64_mask;                                           \
1408 									       \
1409 	uint64_t b = (ip)->jmp.b_val;                                          \
1410 									       \
1411 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1412 }
1413 
1414 #define JMP_CMP_MI JMP_CMP_I
1415 
1416 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1417 
1418 #define JMP_CMP_HI(thread, ip, operator)  \
1419 {                                                                              \
1420 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1421 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1422 	uint64_t a64 = *a64_ptr;                                               \
1423 	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
1424 									       \
1425 	uint64_t b = (ip)->jmp.b_val;                                          \
1426 									       \
1427 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1428 }
1429 
1430 #else
1431 
1432 #define JMP_CMP_HI JMP_CMP_I
1433 
1434 #endif
1435 
1436 #define METADATA_READ(thread, offset, n_bits)                                  \
1437 ({                                                                             \
1438 	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
1439 	uint64_t m64 = *m64_ptr;                                               \
1440 	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
1441 	(m64 & m64_mask);                                                      \
1442 })
1443 
1444 #define METADATA_WRITE(thread, offset, n_bits, value)                          \
1445 {                                                                              \
1446 	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
1447 	uint64_t m64 = *m64_ptr;                                               \
1448 	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
1449 									       \
1450 	uint64_t m_new = value;                                                \
1451 									       \
1452 	*m64_ptr = (m64 & ~m64_mask) | (m_new & m64_mask);                     \
1453 }
1454 
1455 #ifndef RTE_SWX_PIPELINE_THREADS_MAX
1456 #define RTE_SWX_PIPELINE_THREADS_MAX 16
1457 #endif
1458 
1459 #ifndef RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX
1460 #define RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX 256
1461 #endif
1462 
1463 struct rte_swx_pipeline {
1464 	char name[RTE_SWX_NAME_SIZE];
1465 
1466 	struct struct_type_tailq struct_types;
1467 	struct port_in_type_tailq port_in_types;
1468 	struct port_in_tailq ports_in;
1469 	struct port_out_type_tailq port_out_types;
1470 	struct port_out_tailq ports_out;
1471 	struct extern_type_tailq extern_types;
1472 	struct extern_obj_tailq extern_objs;
1473 	struct extern_func_tailq extern_funcs;
1474 	struct hash_func_tailq hash_funcs;
1475 	struct header_tailq headers;
1476 	struct struct_type *metadata_st;
1477 	uint32_t metadata_struct_id;
1478 	struct action_tailq actions;
1479 	struct table_type_tailq table_types;
1480 	struct table_tailq tables;
1481 	struct selector_tailq selectors;
1482 	struct learner_tailq learners;
1483 	struct regarray_tailq regarrays;
1484 	struct meter_profile_tailq meter_profiles;
1485 	struct metarray_tailq metarrays;
1486 
1487 	struct port_in_runtime *in;
1488 	struct port_out_runtime *out;
1489 	struct mirroring_session *mirroring_sessions;
1490 	struct instruction **action_instructions;
1491 	action_func_t *action_funcs;
1492 	struct rte_swx_table_state *table_state;
1493 	struct table_statistics *table_stats;
1494 	struct selector_statistics *selector_stats;
1495 	struct learner_statistics *learner_stats;
1496 	struct hash_func_runtime *hash_func_runtime;
1497 	struct regarray_runtime *regarray_runtime;
1498 	struct metarray_runtime *metarray_runtime;
1499 	struct instruction *instructions;
1500 	struct instruction_data *instruction_data;
1501 	instr_exec_t *instruction_table;
1502 	struct thread threads[RTE_SWX_PIPELINE_THREADS_MAX];
1503 	void *lib;
1504 
1505 	uint32_t n_structs;
1506 	uint32_t n_ports_in;
1507 	uint32_t n_ports_out;
1508 	uint32_t n_mirroring_slots;
1509 	uint32_t n_mirroring_sessions;
1510 	uint32_t n_extern_objs;
1511 	uint32_t n_extern_funcs;
1512 	uint32_t n_hash_funcs;
1513 	uint32_t n_actions;
1514 	uint32_t n_tables;
1515 	uint32_t n_selectors;
1516 	uint32_t n_learners;
1517 	uint32_t n_regarrays;
1518 	uint32_t n_metarrays;
1519 	uint32_t n_headers;
1520 	uint32_t thread_id;
1521 	uint32_t port_id;
1522 	uint32_t n_instructions;
1523 	int build_done;
1524 	int numa_node;
1525 };
1526 
1527 /*
1528  * Instruction.
1529  */
1530 static inline void
1531 pipeline_port_inc(struct rte_swx_pipeline *p)
1532 {
1533 	p->port_id = (p->port_id + 1) & (p->n_ports_in - 1);
1534 }
1535 
1536 static inline void
1537 thread_ip_reset(struct rte_swx_pipeline *p, struct thread *t)
1538 {
1539 	t->ip = p->instructions;
1540 }
1541 
1542 static inline void
1543 thread_ip_set(struct thread *t, struct instruction *ip)
1544 {
1545 	t->ip = ip;
1546 }
1547 
1548 static inline void
1549 thread_ip_action_call(struct rte_swx_pipeline *p,
1550 		      struct thread *t,
1551 		      uint32_t action_id)
1552 {
1553 	t->ret = t->ip + 1;
1554 	t->ip = p->action_instructions[action_id];
1555 }
1556 
1557 static inline void
1558 thread_ip_inc(struct rte_swx_pipeline *p);
1559 
1560 static inline void
1561 thread_ip_inc(struct rte_swx_pipeline *p)
1562 {
1563 	struct thread *t = &p->threads[p->thread_id];
1564 
1565 	t->ip++;
1566 }
1567 
1568 static inline void
1569 thread_ip_inc_cond(struct thread *t, int cond)
1570 {
1571 	t->ip += cond;
1572 }
1573 
1574 static inline void
1575 thread_yield(struct rte_swx_pipeline *p)
1576 {
1577 	p->thread_id = (p->thread_id + 1) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
1578 }
1579 
1580 static inline void
1581 thread_yield_cond(struct rte_swx_pipeline *p, int cond)
1582 {
1583 	p->thread_id = (p->thread_id + cond) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
1584 }
1585 
1586 /*
1587  * rx.
1588  */
1589 static inline int
1590 __instr_rx_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
1591 {
1592 	struct port_in_runtime *port = &p->in[p->port_id];
1593 	struct rte_swx_pkt *pkt = &t->pkt;
1594 	int pkt_received;
1595 
1596 	/* Recirculation: keep the current packet. */
1597 	if (t->recirculate) {
1598 		TRACE("[Thread %2u] rx - recirculate (pass %u)\n",
1599 		      p->thread_id,
1600 		      t->recirc_pass_id + 1);
1601 
1602 		/* Packet. */
1603 		t->ptr = &pkt->pkt[pkt->offset];
1604 		t->mirroring_slots_mask = 0;
1605 		t->recirculate = 0;
1606 		t->recirc_pass_id++;
1607 
1608 		/* Headers. */
1609 		t->valid_headers = 0;
1610 		t->n_headers_out = 0;
1611 
1612 		/* Tables. */
1613 		t->table_state = p->table_state;
1614 
1615 		return 1;
1616 	}
1617 
1618 	/* Packet. */
1619 	pkt_received = port->pkt_rx(port->obj, pkt);
1620 	t->ptr = &pkt->pkt[pkt->offset];
1621 	rte_prefetch0(t->ptr);
1622 
1623 	TRACE("[Thread %2u] rx %s from port %u\n",
1624 	      p->thread_id,
1625 	      pkt_received ? "1 pkt" : "0 pkts",
1626 	      p->port_id);
1627 
1628 	t->mirroring_slots_mask = 0;
1629 	t->recirc_pass_id = 0;
1630 
1631 	/* Headers. */
1632 	t->valid_headers = 0;
1633 	t->n_headers_out = 0;
1634 
1635 	/* Meta-data. */
1636 	METADATA_WRITE(t, ip->io.io.offset, ip->io.io.n_bits, p->port_id);
1637 
1638 	/* Tables. */
1639 	t->table_state = p->table_state;
1640 
1641 	/* Thread. */
1642 	pipeline_port_inc(p);
1643 
1644 	return pkt_received;
1645 }
1646 
1647 static inline void
1648 instr_rx_exec(struct rte_swx_pipeline *p)
1649 {
1650 	struct thread *t = &p->threads[p->thread_id];
1651 	struct instruction *ip = t->ip;
1652 	int pkt_received;
1653 
1654 	/* Packet. */
1655 	pkt_received = __instr_rx_exec(p, t, ip);
1656 
1657 	/* Thread. */
1658 	thread_ip_inc_cond(t, pkt_received);
1659 	thread_yield(p);
1660 }
1661 
1662 /*
1663  * tx.
1664  */
1665 static inline void
1666 emit_handler(struct thread *t)
1667 {
1668 	struct header_out_runtime *h0 = &t->headers_out[0];
1669 	struct header_out_runtime *h1 = &t->headers_out[1];
1670 	uint32_t offset = 0, i;
1671 
1672 	/* No header change or header decapsulation. */
1673 	if ((t->n_headers_out == 1) &&
1674 	    (h0->ptr + h0->n_bytes == t->ptr)) {
1675 		TRACE("Emit handler: no header change or header decap.\n");
1676 
1677 		t->pkt.offset -= h0->n_bytes;
1678 		t->pkt.length += h0->n_bytes;
1679 
1680 		return;
1681 	}
1682 
1683 	/* Header encapsulation (optionally, with prior header decapsulation). */
1684 	if ((t->n_headers_out == 2) &&
1685 	    (h1->ptr + h1->n_bytes == t->ptr) &&
1686 	    (h0->ptr == h0->ptr0)) {
1687 		uint32_t offset;
1688 
1689 		TRACE("Emit handler: header encapsulation.\n");
1690 
1691 		offset = h0->n_bytes + h1->n_bytes;
1692 		memcpy(t->ptr - offset, h0->ptr, h0->n_bytes);
1693 		t->pkt.offset -= offset;
1694 		t->pkt.length += offset;
1695 
1696 		return;
1697 	}
1698 
1699 	/* For any other case. */
1700 	TRACE("Emit handler: complex case.\n");
1701 
1702 	for (i = 0; i < t->n_headers_out; i++) {
1703 		struct header_out_runtime *h = &t->headers_out[i];
1704 
1705 		memcpy(&t->header_out_storage[offset], h->ptr, h->n_bytes);
1706 		offset += h->n_bytes;
1707 	}
1708 
1709 	if (offset) {
1710 		memcpy(t->ptr - offset, t->header_out_storage, offset);
1711 		t->pkt.offset -= offset;
1712 		t->pkt.length += offset;
1713 	}
1714 }
1715 
1716 static inline void
1717 mirroring_handler(struct rte_swx_pipeline *p, struct thread *t, struct rte_swx_pkt *pkt)
1718 {
1719 	uint64_t slots_mask = t->mirroring_slots_mask, slot_mask;
1720 	uint32_t slot_id;
1721 
1722 	for (slot_id = 0, slot_mask = 1LLU ; slots_mask; slot_id++, slot_mask <<= 1)
1723 		if (slot_mask & slots_mask) {
1724 			struct port_out_runtime *port;
1725 			struct mirroring_session *session;
1726 			uint32_t port_id, session_id;
1727 
1728 			session_id = t->mirroring_slots[slot_id];
1729 			session = &p->mirroring_sessions[session_id];
1730 
1731 			port_id = session->port_id;
1732 			port = &p->out[port_id];
1733 
1734 			if (session->fast_clone)
1735 				port->pkt_fast_clone_tx(port->obj, pkt);
1736 			else
1737 				port->pkt_clone_tx(port->obj, pkt, session->truncation_length);
1738 
1739 			slots_mask &= ~slot_mask;
1740 		}
1741 }
1742 
1743 static inline void
1744 __instr_tx_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
1745 {
1746 	uint64_t port_id = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
1747 	struct port_out_runtime *port = &p->out[port_id];
1748 	struct rte_swx_pkt *pkt = &t->pkt;
1749 
1750 	/* Recirculation: keep the current packet. */
1751 	if (t->recirculate) {
1752 		TRACE("[Thread %2u]: tx 1 pkt - recirculate\n",
1753 		      p->thread_id);
1754 
1755 		/* Headers. */
1756 		emit_handler(t);
1757 
1758 		/* Packet. */
1759 		mirroring_handler(p, t, pkt);
1760 
1761 		return;
1762 	}
1763 
1764 	TRACE("[Thread %2u]: tx 1 pkt to port %u\n",
1765 	      p->thread_id,
1766 	      (uint32_t)port_id);
1767 
1768 	/* Headers. */
1769 	emit_handler(t);
1770 
1771 	/* Packet. */
1772 	mirroring_handler(p, t, pkt);
1773 	port->pkt_tx(port->obj, pkt);
1774 }
1775 
1776 static inline void
1777 __instr_tx_i_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
1778 {
1779 	uint64_t port_id = ip->io.io.val;
1780 	struct port_out_runtime *port = &p->out[port_id];
1781 	struct rte_swx_pkt *pkt = &t->pkt;
1782 
1783 	/* Recirculation: keep the current packet. */
1784 	if (t->recirculate) {
1785 		TRACE("[Thread %2u]: tx (i) 1 pkt - recirculate\n",
1786 		      p->thread_id);
1787 
1788 		/* Headers. */
1789 		emit_handler(t);
1790 
1791 		/* Packet. */
1792 		mirroring_handler(p, t, pkt);
1793 
1794 		return;
1795 	}
1796 
1797 	TRACE("[Thread %2u]: tx (i) 1 pkt to port %u\n",
1798 	      p->thread_id,
1799 	      (uint32_t)port_id);
1800 
1801 	/* Headers. */
1802 	emit_handler(t);
1803 
1804 	/* Packet. */
1805 	mirroring_handler(p, t, pkt);
1806 	port->pkt_tx(port->obj, pkt);
1807 }
1808 
1809 static inline void
1810 __instr_drop_exec(struct rte_swx_pipeline *p,
1811 		  struct thread *t,
1812 		  const struct instruction *ip __rte_unused)
1813 {
1814 	uint64_t port_id = p->n_ports_out - 1;
1815 	struct port_out_runtime *port = &p->out[port_id];
1816 	struct rte_swx_pkt *pkt = &t->pkt;
1817 
1818 	TRACE("[Thread %2u]: drop 1 pkt\n",
1819 	      p->thread_id);
1820 
1821 	/* Headers. */
1822 	emit_handler(t);
1823 
1824 	/* Packet. */
1825 	mirroring_handler(p, t, pkt);
1826 	port->pkt_tx(port->obj, pkt);
1827 }
1828 
1829 static inline void
1830 __instr_mirror_exec(struct rte_swx_pipeline *p,
1831 		    struct thread *t,
1832 		    const struct instruction *ip)
1833 {
1834 	uint64_t slot_id = instr_operand_hbo(t, &ip->mirror.dst);
1835 	uint64_t session_id = instr_operand_hbo(t, &ip->mirror.src);
1836 
1837 	slot_id &= p->n_mirroring_slots - 1;
1838 	session_id &= p->n_mirroring_sessions - 1;
1839 
1840 	TRACE("[Thread %2u]: mirror pkt (slot = %u, session = %u)\n",
1841 	      p->thread_id,
1842 	      (uint32_t)slot_id,
1843 	      (uint32_t)session_id);
1844 
1845 	t->mirroring_slots[slot_id] = session_id;
1846 	t->mirroring_slots_mask |= 1LLU << slot_id;
1847 }
1848 
1849 static inline void
1850 __instr_recirculate_exec(struct rte_swx_pipeline *p __rte_unused,
1851 			 struct thread *t,
1852 			 const struct instruction *ip __rte_unused)
1853 {
1854 	TRACE("[Thread %2u]: recirculate\n",
1855 	      p->thread_id);
1856 
1857 	t->recirculate = 1;
1858 }
1859 
1860 static inline void
1861 __instr_recircid_exec(struct rte_swx_pipeline *p __rte_unused,
1862 		      struct thread *t,
1863 		      const struct instruction *ip)
1864 {
1865 	TRACE("[Thread %2u]: recircid (pass %u)\n",
1866 	      p->thread_id,
1867 	      t->recirc_pass_id);
1868 
1869 	/* Meta-data. */
1870 	METADATA_WRITE(t, ip->io.io.offset, ip->io.io.n_bits, t->recirc_pass_id);
1871 }
1872 
1873 /*
1874  * extract.
1875  */
1876 static inline void
1877 __instr_hdr_extract_many_exec(struct rte_swx_pipeline *p __rte_unused,
1878 			      struct thread *t,
1879 			      const struct instruction *ip,
1880 			      uint32_t n_extract)
1881 {
1882 	uint64_t valid_headers = t->valid_headers;
1883 	uint8_t *ptr = t->ptr;
1884 	uint32_t offset = t->pkt.offset;
1885 	uint32_t length = t->pkt.length;
1886 	uint32_t i;
1887 
1888 	for (i = 0; i < n_extract; i++) {
1889 		uint32_t header_id = ip->io.hdr.header_id[i];
1890 		uint32_t struct_id = ip->io.hdr.struct_id[i];
1891 		uint32_t n_bytes = ip->io.hdr.n_bytes[i];
1892 
1893 		TRACE("[Thread %2u]: extract header %u (%u bytes)\n",
1894 		      p->thread_id,
1895 		      header_id,
1896 		      n_bytes);
1897 
1898 		/* Headers. */
1899 		t->structs[struct_id] = ptr;
1900 		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
1901 
1902 		/* Packet. */
1903 		offset += n_bytes;
1904 		length -= n_bytes;
1905 		ptr += n_bytes;
1906 	}
1907 
1908 	/* Headers. */
1909 	t->valid_headers = valid_headers;
1910 
1911 	/* Packet. */
1912 	t->pkt.offset = offset;
1913 	t->pkt.length = length;
1914 	t->ptr = ptr;
1915 }
1916 
1917 static inline void
1918 __instr_hdr_extract_exec(struct rte_swx_pipeline *p,
1919 			 struct thread *t,
1920 			 const struct instruction *ip)
1921 {
1922 	__instr_hdr_extract_many_exec(p, t, ip, 1);
1923 }
1924 
1925 static inline void
1926 __instr_hdr_extract2_exec(struct rte_swx_pipeline *p,
1927 			  struct thread *t,
1928 			  const struct instruction *ip)
1929 {
1930 	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
1931 
1932 	__instr_hdr_extract_many_exec(p, t, ip, 2);
1933 }
1934 
1935 static inline void
1936 __instr_hdr_extract3_exec(struct rte_swx_pipeline *p,
1937 			  struct thread *t,
1938 			  const struct instruction *ip)
1939 {
1940 	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
1941 
1942 	__instr_hdr_extract_many_exec(p, t, ip, 3);
1943 }
1944 
1945 static inline void
1946 __instr_hdr_extract4_exec(struct rte_swx_pipeline *p,
1947 			  struct thread *t,
1948 			  const struct instruction *ip)
1949 {
1950 	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
1951 
1952 	__instr_hdr_extract_many_exec(p, t, ip, 4);
1953 }
1954 
1955 static inline void
1956 __instr_hdr_extract5_exec(struct rte_swx_pipeline *p,
1957 			  struct thread *t,
1958 			  const struct instruction *ip)
1959 {
1960 	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
1961 
1962 	__instr_hdr_extract_many_exec(p, t, ip, 5);
1963 }
1964 
1965 static inline void
1966 __instr_hdr_extract6_exec(struct rte_swx_pipeline *p,
1967 			  struct thread *t,
1968 			  const struct instruction *ip)
1969 {
1970 	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
1971 
1972 	__instr_hdr_extract_many_exec(p, t, ip, 6);
1973 }
1974 
1975 static inline void
1976 __instr_hdr_extract7_exec(struct rte_swx_pipeline *p,
1977 			  struct thread *t,
1978 			  const struct instruction *ip)
1979 {
1980 	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
1981 
1982 	__instr_hdr_extract_many_exec(p, t, ip, 7);
1983 }
1984 
1985 static inline void
1986 __instr_hdr_extract8_exec(struct rte_swx_pipeline *p,
1987 			  struct thread *t,
1988 			  const struct instruction *ip)
1989 {
1990 	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
1991 
1992 	__instr_hdr_extract_many_exec(p, t, ip, 8);
1993 }
1994 
1995 static inline void
1996 __instr_hdr_extract_m_exec(struct rte_swx_pipeline *p __rte_unused,
1997 			   struct thread *t,
1998 			   const struct instruction *ip)
1999 {
2000 	uint64_t valid_headers = t->valid_headers;
2001 	uint8_t *ptr = t->ptr;
2002 	uint32_t offset = t->pkt.offset;
2003 	uint32_t length = t->pkt.length;
2004 
2005 	uint32_t n_bytes_last = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
2006 	uint32_t header_id = ip->io.hdr.header_id[0];
2007 	uint32_t struct_id = ip->io.hdr.struct_id[0];
2008 	uint32_t n_bytes = ip->io.hdr.n_bytes[0];
2009 
2010 	struct header_runtime *h = &t->headers[header_id];
2011 
2012 	TRACE("[Thread %2u]: extract header %u (%u + %u bytes)\n",
2013 	      p->thread_id,
2014 	      header_id,
2015 	      n_bytes,
2016 	      n_bytes_last);
2017 
2018 	n_bytes += n_bytes_last;
2019 
2020 	/* Headers. */
2021 	t->structs[struct_id] = ptr;
2022 	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
2023 	h->n_bytes = n_bytes;
2024 
2025 	/* Packet. */
2026 	t->pkt.offset = offset + n_bytes;
2027 	t->pkt.length = length - n_bytes;
2028 	t->ptr = ptr + n_bytes;
2029 }
2030 
2031 static inline void
2032 __instr_hdr_lookahead_exec(struct rte_swx_pipeline *p __rte_unused,
2033 			   struct thread *t,
2034 			   const struct instruction *ip)
2035 {
2036 	uint64_t valid_headers = t->valid_headers;
2037 	uint8_t *ptr = t->ptr;
2038 
2039 	uint32_t header_id = ip->io.hdr.header_id[0];
2040 	uint32_t struct_id = ip->io.hdr.struct_id[0];
2041 
2042 	TRACE("[Thread %2u]: lookahead header %u\n",
2043 	      p->thread_id,
2044 	      header_id);
2045 
2046 	/* Headers. */
2047 	t->structs[struct_id] = ptr;
2048 	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
2049 }
2050 
2051 /*
2052  * emit.
2053  */
2054 static inline void
2055 __instr_hdr_emit_many_exec(struct rte_swx_pipeline *p __rte_unused,
2056 			   struct thread *t,
2057 			   const struct instruction *ip,
2058 			   uint32_t n_emit)
2059 {
2060 	uint64_t valid_headers = t->valid_headers;
2061 	uint32_t n_headers_out = t->n_headers_out;
2062 	struct header_out_runtime *ho = NULL;
2063 	uint8_t *ho_ptr = NULL;
2064 	uint32_t ho_nbytes = 0, i;
2065 
2066 	for (i = 0; i < n_emit; i++) {
2067 		uint32_t header_id = ip->io.hdr.header_id[i];
2068 		uint32_t struct_id = ip->io.hdr.struct_id[i];
2069 
2070 		struct header_runtime *hi = &t->headers[header_id];
2071 		uint8_t *hi_ptr0 = hi->ptr0;
2072 		uint32_t n_bytes = hi->n_bytes;
2073 
2074 		uint8_t *hi_ptr = t->structs[struct_id];
2075 
2076 		if (!MASK64_BIT_GET(valid_headers, header_id)) {
2077 			TRACE("[Thread %2u]: emit header %u (invalid)\n",
2078 			      p->thread_id,
2079 			      header_id);
2080 
2081 			continue;
2082 		}
2083 
2084 		TRACE("[Thread %2u]: emit header %u (valid)\n",
2085 		      p->thread_id,
2086 		      header_id);
2087 
2088 		/* Headers. */
2089 		if (!ho) {
2090 			if (!n_headers_out) {
2091 				ho = &t->headers_out[0];
2092 
2093 				ho->ptr0 = hi_ptr0;
2094 				ho->ptr = hi_ptr;
2095 
2096 				ho_ptr = hi_ptr;
2097 				ho_nbytes = n_bytes;
2098 
2099 				n_headers_out = 1;
2100 
2101 				continue;
2102 			} else {
2103 				ho = &t->headers_out[n_headers_out - 1];
2104 
2105 				ho_ptr = ho->ptr;
2106 				ho_nbytes = ho->n_bytes;
2107 			}
2108 		}
2109 
2110 		if (ho_ptr + ho_nbytes == hi_ptr) {
2111 			ho_nbytes += n_bytes;
2112 		} else {
2113 			ho->n_bytes = ho_nbytes;
2114 
2115 			ho++;
2116 			ho->ptr0 = hi_ptr0;
2117 			ho->ptr = hi_ptr;
2118 
2119 			ho_ptr = hi_ptr;
2120 			ho_nbytes = n_bytes;
2121 
2122 			n_headers_out++;
2123 		}
2124 	}
2125 
2126 	if (ho)
2127 		ho->n_bytes = ho_nbytes;
2128 	t->n_headers_out = n_headers_out;
2129 }
2130 
2131 static inline void
2132 __instr_hdr_emit_exec(struct rte_swx_pipeline *p,
2133 		      struct thread *t,
2134 		      const struct instruction *ip)
2135 {
2136 	__instr_hdr_emit_many_exec(p, t, ip, 1);
2137 }
2138 
2139 static inline void
2140 __instr_hdr_emit_tx_exec(struct rte_swx_pipeline *p,
2141 			 struct thread *t,
2142 			 const struct instruction *ip)
2143 {
2144 	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
2145 
2146 	__instr_hdr_emit_many_exec(p, t, ip, 1);
2147 	__instr_tx_exec(p, t, ip);
2148 }
2149 
2150 static inline void
2151 __instr_hdr_emit2_tx_exec(struct rte_swx_pipeline *p,
2152 			  struct thread *t,
2153 			  const struct instruction *ip)
2154 {
2155 	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
2156 
2157 	__instr_hdr_emit_many_exec(p, t, ip, 2);
2158 	__instr_tx_exec(p, t, ip);
2159 }
2160 
2161 static inline void
2162 __instr_hdr_emit3_tx_exec(struct rte_swx_pipeline *p,
2163 			  struct thread *t,
2164 			  const struct instruction *ip)
2165 {
2166 	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
2167 
2168 	__instr_hdr_emit_many_exec(p, t, ip, 3);
2169 	__instr_tx_exec(p, t, ip);
2170 }
2171 
2172 static inline void
2173 __instr_hdr_emit4_tx_exec(struct rte_swx_pipeline *p,
2174 			  struct thread *t,
2175 			  const struct instruction *ip)
2176 {
2177 	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
2178 
2179 	__instr_hdr_emit_many_exec(p, t, ip, 4);
2180 	__instr_tx_exec(p, t, ip);
2181 }
2182 
2183 static inline void
2184 __instr_hdr_emit5_tx_exec(struct rte_swx_pipeline *p,
2185 			  struct thread *t,
2186 			  const struct instruction *ip)
2187 {
2188 	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
2189 
2190 	__instr_hdr_emit_many_exec(p, t, ip, 5);
2191 	__instr_tx_exec(p, t, ip);
2192 }
2193 
2194 static inline void
2195 __instr_hdr_emit6_tx_exec(struct rte_swx_pipeline *p,
2196 			  struct thread *t,
2197 			  const struct instruction *ip)
2198 {
2199 	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
2200 
2201 	__instr_hdr_emit_many_exec(p, t, ip, 6);
2202 	__instr_tx_exec(p, t, ip);
2203 }
2204 
2205 static inline void
2206 __instr_hdr_emit7_tx_exec(struct rte_swx_pipeline *p,
2207 			  struct thread *t,
2208 			  const struct instruction *ip)
2209 {
2210 	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
2211 
2212 	__instr_hdr_emit_many_exec(p, t, ip, 7);
2213 	__instr_tx_exec(p, t, ip);
2214 }
2215 
2216 static inline void
2217 __instr_hdr_emit8_tx_exec(struct rte_swx_pipeline *p,
2218 			  struct thread *t,
2219 			  const struct instruction *ip)
2220 {
2221 	TRACE("[Thread %2u] *** The next 9 instructions are fused. ***\n", p->thread_id);
2222 
2223 	__instr_hdr_emit_many_exec(p, t, ip, 8);
2224 	__instr_tx_exec(p, t, ip);
2225 }
2226 
2227 /*
2228  * validate.
2229  */
2230 static inline void
2231 __instr_hdr_validate_exec(struct rte_swx_pipeline *p __rte_unused,
2232 			  struct thread *t,
2233 			  const struct instruction *ip)
2234 {
2235 	uint32_t header_id = ip->valid.header_id;
2236 	uint32_t struct_id = ip->valid.struct_id;
2237 	uint64_t valid_headers = t->valid_headers;
2238 	struct header_runtime *h = &t->headers[header_id];
2239 
2240 	TRACE("[Thread %2u] validate header %u\n", p->thread_id, header_id);
2241 
2242 	/* If this header is already valid, then its associated t->structs[] element is also valid
2243 	 * and therefore it should not be modified. It could point to the packet buffer (in case of
2244 	 * extracted header) and setting it to the default location (h->ptr0) would be incorrect.
2245 	 */
2246 	if (MASK64_BIT_GET(valid_headers, header_id))
2247 		return;
2248 
2249 	/* Headers. */
2250 	t->structs[struct_id] = h->ptr0;
2251 	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
2252 }
2253 
2254 /*
2255  * invalidate.
2256  */
2257 static inline void
2258 __instr_hdr_invalidate_exec(struct rte_swx_pipeline *p __rte_unused,
2259 			    struct thread *t,
2260 			    const struct instruction *ip)
2261 {
2262 	uint32_t header_id = ip->valid.header_id;
2263 
2264 	TRACE("[Thread %2u] invalidate header %u\n", p->thread_id, header_id);
2265 
2266 	/* Headers. */
2267 	t->valid_headers = MASK64_BIT_CLR(t->valid_headers, header_id);
2268 }
2269 
2270 /*
2271  * learn.
2272  */
2273 static inline void
2274 __instr_learn_exec(struct rte_swx_pipeline *p,
2275 		   struct thread *t,
2276 		   const struct instruction *ip)
2277 {
2278 	uint64_t action_id = ip->learn.action_id;
2279 	uint32_t mf_first_arg_offset = ip->learn.mf_first_arg_offset;
2280 	uint32_t timeout_id = METADATA_READ(t, ip->learn.mf_timeout_id_offset,
2281 		ip->learn.mf_timeout_id_n_bits);
2282 	uint32_t learner_id = t->learner_id;
2283 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2284 		p->n_selectors + learner_id];
2285 	struct learner_runtime *l = &t->learners[learner_id];
2286 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2287 	uint32_t status;
2288 
2289 	/* Table. */
2290 	status = rte_swx_table_learner_add(ts->obj,
2291 					   l->mailbox,
2292 					   t->time,
2293 					   action_id,
2294 					   &t->metadata[mf_first_arg_offset],
2295 					   timeout_id);
2296 
2297 	TRACE("[Thread %2u] learner %u learn %s\n",
2298 	      p->thread_id,
2299 	      learner_id,
2300 	      status ? "ok" : "error");
2301 
2302 	stats->n_pkts_learn[status] += 1;
2303 }
2304 
2305 /*
2306  * rearm.
2307  */
2308 static inline void
2309 __instr_rearm_exec(struct rte_swx_pipeline *p,
2310 		   struct thread *t,
2311 		   const struct instruction *ip __rte_unused)
2312 {
2313 	uint32_t learner_id = t->learner_id;
2314 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2315 		p->n_selectors + learner_id];
2316 	struct learner_runtime *l = &t->learners[learner_id];
2317 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2318 
2319 	/* Table. */
2320 	rte_swx_table_learner_rearm(ts->obj, l->mailbox, t->time);
2321 
2322 	TRACE("[Thread %2u] learner %u rearm\n",
2323 	      p->thread_id,
2324 	      learner_id);
2325 
2326 	stats->n_pkts_rearm += 1;
2327 }
2328 
2329 static inline void
2330 __instr_rearm_new_exec(struct rte_swx_pipeline *p,
2331 		       struct thread *t,
2332 		       const struct instruction *ip)
2333 {
2334 	uint32_t timeout_id = METADATA_READ(t, ip->learn.mf_timeout_id_offset,
2335 		ip->learn.mf_timeout_id_n_bits);
2336 	uint32_t learner_id = t->learner_id;
2337 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2338 		p->n_selectors + learner_id];
2339 	struct learner_runtime *l = &t->learners[learner_id];
2340 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2341 
2342 	/* Table. */
2343 	rte_swx_table_learner_rearm_new(ts->obj, l->mailbox, t->time, timeout_id);
2344 
2345 	TRACE("[Thread %2u] learner %u rearm with timeout ID %u\n",
2346 	      p->thread_id,
2347 	      learner_id,
2348 	      timeout_id);
2349 
2350 	stats->n_pkts_rearm += 1;
2351 }
2352 
2353 /*
2354  * forget.
2355  */
2356 static inline void
2357 __instr_forget_exec(struct rte_swx_pipeline *p,
2358 		    struct thread *t,
2359 		    const struct instruction *ip __rte_unused)
2360 {
2361 	uint32_t learner_id = t->learner_id;
2362 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2363 		p->n_selectors + learner_id];
2364 	struct learner_runtime *l = &t->learners[learner_id];
2365 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2366 
2367 	/* Table. */
2368 	rte_swx_table_learner_delete(ts->obj, l->mailbox);
2369 
2370 	TRACE("[Thread %2u] learner %u forget\n",
2371 	      p->thread_id,
2372 	      learner_id);
2373 
2374 	stats->n_pkts_forget += 1;
2375 }
2376 
2377 /*
2378  * extern.
2379  */
2380 static inline uint32_t
2381 __instr_extern_obj_exec(struct rte_swx_pipeline *p __rte_unused,
2382 			struct thread *t,
2383 			const struct instruction *ip)
2384 {
2385 	uint32_t obj_id = ip->ext_obj.ext_obj_id;
2386 	uint32_t func_id = ip->ext_obj.func_id;
2387 	struct extern_obj_runtime *obj = &t->extern_objs[obj_id];
2388 	rte_swx_extern_type_member_func_t func = obj->funcs[func_id];
2389 	uint32_t done;
2390 
2391 	TRACE("[Thread %2u] extern obj %u member func %u\n",
2392 	      p->thread_id,
2393 	      obj_id,
2394 	      func_id);
2395 
2396 	done = func(obj->obj, obj->mailbox);
2397 
2398 	return done;
2399 }
2400 
2401 static inline uint32_t
2402 __instr_extern_func_exec(struct rte_swx_pipeline *p __rte_unused,
2403 			 struct thread *t,
2404 			 const struct instruction *ip)
2405 {
2406 	uint32_t ext_func_id = ip->ext_func.ext_func_id;
2407 	struct extern_func_runtime *ext_func = &t->extern_funcs[ext_func_id];
2408 	rte_swx_extern_func_t func = ext_func->func;
2409 	uint32_t done;
2410 
2411 	TRACE("[Thread %2u] extern func %u\n",
2412 	      p->thread_id,
2413 	      ext_func_id);
2414 
2415 	done = func(ext_func->mailbox);
2416 
2417 	return done;
2418 }
2419 
2420 /*
2421  * hash.
2422  */
2423 static inline void
2424 __instr_hash_func_exec(struct rte_swx_pipeline *p,
2425 		       struct thread *t,
2426 		       const struct instruction *ip)
2427 {
2428 	uint32_t hash_func_id = ip->hash_func.hash_func_id;
2429 	uint32_t dst_offset = ip->hash_func.dst.offset;
2430 	uint32_t n_dst_bits = ip->hash_func.dst.n_bits;
2431 	uint32_t src_struct_id = ip->hash_func.src.struct_id;
2432 	uint32_t src_offset = ip->hash_func.src.offset;
2433 	uint32_t n_src_bytes = ip->hash_func.src.n_bytes;
2434 
2435 	struct hash_func_runtime *func = &p->hash_func_runtime[hash_func_id];
2436 	uint8_t *src_ptr = t->structs[src_struct_id];
2437 	uint32_t result;
2438 
2439 	TRACE("[Thread %2u] hash %u\n",
2440 	      p->thread_id,
2441 	      hash_func_id);
2442 
2443 	result = func->func(&src_ptr[src_offset], n_src_bytes, 0);
2444 	METADATA_WRITE(t, dst_offset, n_dst_bits, result);
2445 }
2446 
2447 /*
2448  * mov.
2449  */
2450 static inline void
2451 __instr_mov_exec(struct rte_swx_pipeline *p __rte_unused,
2452 		 struct thread *t,
2453 		 const struct instruction *ip)
2454 {
2455 	TRACE("[Thread %2u] mov\n", p->thread_id);
2456 
2457 	MOV(t, ip);
2458 }
2459 
2460 static inline void
2461 __instr_mov_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2462 		    struct thread *t,
2463 		    const struct instruction *ip)
2464 {
2465 	TRACE("[Thread %2u] mov (mh)\n", p->thread_id);
2466 
2467 	MOV_MH(t, ip);
2468 }
2469 
2470 static inline void
2471 __instr_mov_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2472 		    struct thread *t,
2473 		    const struct instruction *ip)
2474 {
2475 	TRACE("[Thread %2u] mov (hm)\n", p->thread_id);
2476 
2477 	MOV_HM(t, ip);
2478 }
2479 
2480 static inline void
2481 __instr_mov_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2482 		    struct thread *t,
2483 		    const struct instruction *ip)
2484 {
2485 	TRACE("[Thread %2u] mov (hh)\n", p->thread_id);
2486 
2487 	MOV_HH(t, ip);
2488 }
2489 
2490 static inline void
2491 __instr_mov_dma_exec(struct rte_swx_pipeline *p __rte_unused,
2492 		     struct thread *t,
2493 		     const struct instruction *ip)
2494 {
2495 	uint8_t *dst_struct = t->structs[ip->mov.dst.struct_id];
2496 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->mov.dst.offset];
2497 	uint32_t *dst32_ptr;
2498 	uint16_t *dst16_ptr;
2499 	uint8_t *dst8_ptr;
2500 
2501 	uint8_t *src_struct = t->structs[ip->mov.src.struct_id];
2502 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->mov.src.offset];
2503 	uint32_t *src32_ptr;
2504 	uint16_t *src16_ptr;
2505 	uint8_t *src8_ptr;
2506 
2507 	uint32_t n = ip->mov.dst.n_bits >> 3, i;
2508 
2509 	TRACE("[Thread %2u] mov (dma) %u bytes\n", p->thread_id, n);
2510 
2511 	/* 8-byte transfers. */
2512 	for (i = 0; i < n >> 3; i++)
2513 		*dst64_ptr++ = *src64_ptr++;
2514 
2515 	/* 4-byte transfers. */
2516 	n &= 7;
2517 	dst32_ptr = (uint32_t *)dst64_ptr;
2518 	src32_ptr = (uint32_t *)src64_ptr;
2519 
2520 	for (i = 0; i < n >> 2; i++)
2521 		*dst32_ptr++ = *src32_ptr++;
2522 
2523 	/* 2-byte transfers. */
2524 	n &= 3;
2525 	dst16_ptr = (uint16_t *)dst32_ptr;
2526 	src16_ptr = (uint16_t *)src32_ptr;
2527 
2528 	for (i = 0; i < n >> 1; i++)
2529 		*dst16_ptr++ = *src16_ptr++;
2530 
2531 	/* 1-byte transfer. */
2532 	n &= 1;
2533 	dst8_ptr = (uint8_t *)dst16_ptr;
2534 	src8_ptr = (uint8_t *)src16_ptr;
2535 	if (n)
2536 		*dst8_ptr = *src8_ptr;
2537 }
2538 
2539 static inline void
2540 __instr_mov_128_exec(struct rte_swx_pipeline *p __rte_unused,
2541 		     struct thread *t,
2542 		     const struct instruction *ip)
2543 {
2544 	uint8_t *dst_struct = t->structs[ip->mov.dst.struct_id];
2545 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->mov.dst.offset];
2546 
2547 	uint8_t *src_struct = t->structs[ip->mov.src.struct_id];
2548 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->mov.src.offset];
2549 
2550 	TRACE("[Thread %2u] mov (128)\n", p->thread_id);
2551 
2552 	dst64_ptr[0] = src64_ptr[0];
2553 	dst64_ptr[1] = src64_ptr[1];
2554 }
2555 
2556 static inline void
2557 __instr_mov_i_exec(struct rte_swx_pipeline *p __rte_unused,
2558 		   struct thread *t,
2559 		   const struct instruction *ip)
2560 {
2561 	TRACE("[Thread %2u] mov m.f %" PRIx64 "\n", p->thread_id, ip->mov.src_val);
2562 
2563 	MOV_I(t, ip);
2564 }
2565 
2566 /*
2567  * dma.
2568  */
2569 static inline void
2570 __instr_dma_ht_many_exec(struct rte_swx_pipeline *p __rte_unused,
2571 			 struct thread *t,
2572 			 const struct instruction *ip,
2573 			 uint32_t n_dma)
2574 {
2575 	uint8_t *action_data = t->structs[0];
2576 	uint64_t valid_headers = t->valid_headers;
2577 	uint32_t i;
2578 
2579 	for (i = 0; i < n_dma; i++) {
2580 		uint32_t header_id = ip->dma.dst.header_id[i];
2581 		uint32_t struct_id = ip->dma.dst.struct_id[i];
2582 		uint32_t offset = ip->dma.src.offset[i];
2583 		uint32_t n_bytes = ip->dma.n_bytes[i];
2584 
2585 		struct header_runtime *h = &t->headers[header_id];
2586 		uint8_t *h_ptr0 = h->ptr0;
2587 		uint8_t *h_ptr = t->structs[struct_id];
2588 
2589 		void *dst = MASK64_BIT_GET(valid_headers, header_id) ?
2590 			h_ptr : h_ptr0;
2591 		void *src = &action_data[offset];
2592 
2593 		TRACE("[Thread %2u] dma h.s t.f\n", p->thread_id);
2594 
2595 		/* Headers. */
2596 		memcpy(dst, src, n_bytes);
2597 		t->structs[struct_id] = dst;
2598 		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
2599 	}
2600 
2601 	t->valid_headers = valid_headers;
2602 }
2603 
2604 static inline void
2605 __instr_dma_ht_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2606 {
2607 	__instr_dma_ht_many_exec(p, t, ip, 1);
2608 }
2609 
2610 static inline void
2611 __instr_dma_ht2_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2612 {
2613 	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
2614 
2615 	__instr_dma_ht_many_exec(p, t, ip, 2);
2616 }
2617 
2618 static inline void
2619 __instr_dma_ht3_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2620 {
2621 	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
2622 
2623 	__instr_dma_ht_many_exec(p, t, ip, 3);
2624 }
2625 
2626 static inline void
2627 __instr_dma_ht4_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2628 {
2629 	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
2630 
2631 	__instr_dma_ht_many_exec(p, t, ip, 4);
2632 }
2633 
2634 static inline void
2635 __instr_dma_ht5_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2636 {
2637 	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
2638 
2639 	__instr_dma_ht_many_exec(p, t, ip, 5);
2640 }
2641 
2642 static inline void
2643 __instr_dma_ht6_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2644 {
2645 	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
2646 
2647 	__instr_dma_ht_many_exec(p, t, ip, 6);
2648 }
2649 
2650 static inline void
2651 __instr_dma_ht7_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2652 {
2653 	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
2654 
2655 	__instr_dma_ht_many_exec(p, t, ip, 7);
2656 }
2657 
2658 static inline void
2659 __instr_dma_ht8_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2660 {
2661 	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
2662 
2663 	__instr_dma_ht_many_exec(p, t, ip, 8);
2664 }
2665 
2666 /*
2667  * alu.
2668  */
2669 static inline void
2670 __instr_alu_add_exec(struct rte_swx_pipeline *p __rte_unused,
2671 		     struct thread *t,
2672 		     const struct instruction *ip)
2673 {
2674 	TRACE("[Thread %2u] add\n", p->thread_id);
2675 
2676 	ALU(t, ip, +);
2677 }
2678 
2679 static inline void
2680 __instr_alu_add_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2681 			struct thread *t,
2682 			const struct instruction *ip)
2683 {
2684 	TRACE("[Thread %2u] add (mh)\n", p->thread_id);
2685 
2686 	ALU_MH(t, ip, +);
2687 }
2688 
2689 static inline void
2690 __instr_alu_add_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2691 			struct thread *t,
2692 			const struct instruction *ip)
2693 {
2694 	TRACE("[Thread %2u] add (hm)\n", p->thread_id);
2695 
2696 	ALU_HM(t, ip, +);
2697 }
2698 
2699 static inline void
2700 __instr_alu_add_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2701 			struct thread *t,
2702 			const struct instruction *ip)
2703 {
2704 	TRACE("[Thread %2u] add (hh)\n", p->thread_id);
2705 
2706 	ALU_HH(t, ip, +);
2707 }
2708 
2709 static inline void
2710 __instr_alu_add_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2711 			struct thread *t,
2712 			const struct instruction *ip)
2713 {
2714 	TRACE("[Thread %2u] add (mi)\n", p->thread_id);
2715 
2716 	ALU_MI(t, ip, +);
2717 }
2718 
2719 static inline void
2720 __instr_alu_add_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2721 			struct thread *t,
2722 			const struct instruction *ip)
2723 {
2724 	TRACE("[Thread %2u] add (hi)\n", p->thread_id);
2725 
2726 	ALU_HI(t, ip, +);
2727 }
2728 
2729 static inline void
2730 __instr_alu_sub_exec(struct rte_swx_pipeline *p __rte_unused,
2731 		     struct thread *t,
2732 		     const struct instruction *ip)
2733 {
2734 	TRACE("[Thread %2u] sub\n", p->thread_id);
2735 
2736 	ALU(t, ip, -);
2737 }
2738 
2739 static inline void
2740 __instr_alu_sub_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2741 			struct thread *t,
2742 			const struct instruction *ip)
2743 {
2744 	TRACE("[Thread %2u] sub (mh)\n", p->thread_id);
2745 
2746 	ALU_MH(t, ip, -);
2747 }
2748 
2749 static inline void
2750 __instr_alu_sub_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2751 			struct thread *t,
2752 			const struct instruction *ip)
2753 {
2754 	TRACE("[Thread %2u] sub (hm)\n", p->thread_id);
2755 
2756 	ALU_HM(t, ip, -);
2757 }
2758 
2759 static inline void
2760 __instr_alu_sub_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2761 			struct thread *t,
2762 			const struct instruction *ip)
2763 {
2764 	TRACE("[Thread %2u] sub (hh)\n", p->thread_id);
2765 
2766 	ALU_HH(t, ip, -);
2767 }
2768 
2769 static inline void
2770 __instr_alu_sub_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2771 			struct thread *t,
2772 			const struct instruction *ip)
2773 {
2774 	TRACE("[Thread %2u] sub (mi)\n", p->thread_id);
2775 
2776 	ALU_MI(t, ip, -);
2777 }
2778 
2779 static inline void
2780 __instr_alu_sub_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2781 			struct thread *t,
2782 			const struct instruction *ip)
2783 {
2784 	TRACE("[Thread %2u] sub (hi)\n", p->thread_id);
2785 
2786 	ALU_HI(t, ip, -);
2787 }
2788 
2789 static inline void
2790 __instr_alu_shl_exec(struct rte_swx_pipeline *p __rte_unused,
2791 		     struct thread *t,
2792 		     const struct instruction *ip)
2793 {
2794 	TRACE("[Thread %2u] shl\n", p->thread_id);
2795 
2796 	ALU(t, ip, <<);
2797 }
2798 
2799 static inline void
2800 __instr_alu_shl_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2801 			struct thread *t,
2802 			const struct instruction *ip)
2803 {
2804 	TRACE("[Thread %2u] shl (mh)\n", p->thread_id);
2805 
2806 	ALU_MH(t, ip, <<);
2807 }
2808 
2809 static inline void
2810 __instr_alu_shl_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2811 			struct thread *t,
2812 			const struct instruction *ip)
2813 {
2814 	TRACE("[Thread %2u] shl (hm)\n", p->thread_id);
2815 
2816 	ALU_HM(t, ip, <<);
2817 }
2818 
2819 static inline void
2820 __instr_alu_shl_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2821 			struct thread *t,
2822 			const struct instruction *ip)
2823 {
2824 	TRACE("[Thread %2u] shl (hh)\n", p->thread_id);
2825 
2826 	ALU_HH(t, ip, <<);
2827 }
2828 
2829 static inline void
2830 __instr_alu_shl_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2831 			struct thread *t,
2832 			const struct instruction *ip)
2833 {
2834 	TRACE("[Thread %2u] shl (mi)\n", p->thread_id);
2835 
2836 	ALU_MI(t, ip, <<);
2837 }
2838 
2839 static inline void
2840 __instr_alu_shl_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2841 			struct thread *t,
2842 			const struct instruction *ip)
2843 {
2844 	TRACE("[Thread %2u] shl (hi)\n", p->thread_id);
2845 
2846 	ALU_HI(t, ip, <<);
2847 }
2848 
2849 static inline void
2850 __instr_alu_shr_exec(struct rte_swx_pipeline *p __rte_unused,
2851 		     struct thread *t,
2852 		     const struct instruction *ip)
2853 {
2854 	TRACE("[Thread %2u] shr\n", p->thread_id);
2855 
2856 	ALU(t, ip, >>);
2857 }
2858 
2859 static inline void
2860 __instr_alu_shr_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2861 			struct thread *t,
2862 			const struct instruction *ip)
2863 {
2864 	TRACE("[Thread %2u] shr (mh)\n", p->thread_id);
2865 
2866 	ALU_MH(t, ip, >>);
2867 }
2868 
2869 static inline void
2870 __instr_alu_shr_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2871 			struct thread *t,
2872 			const struct instruction *ip)
2873 {
2874 	TRACE("[Thread %2u] shr (hm)\n", p->thread_id);
2875 
2876 	ALU_HM(t, ip, >>);
2877 }
2878 
2879 static inline void
2880 __instr_alu_shr_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2881 			struct thread *t,
2882 			const struct instruction *ip)
2883 {
2884 	TRACE("[Thread %2u] shr (hh)\n", p->thread_id);
2885 
2886 	ALU_HH(t, ip, >>);
2887 }
2888 
2889 static inline void
2890 __instr_alu_shr_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2891 			struct thread *t,
2892 			const struct instruction *ip)
2893 {
2894 	TRACE("[Thread %2u] shr (mi)\n", p->thread_id);
2895 
2896 	/* Structs. */
2897 	ALU_MI(t, ip, >>);
2898 }
2899 
2900 static inline void
2901 __instr_alu_shr_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2902 			struct thread *t,
2903 			const struct instruction *ip)
2904 {
2905 	TRACE("[Thread %2u] shr (hi)\n", p->thread_id);
2906 
2907 	ALU_HI(t, ip, >>);
2908 }
2909 
2910 static inline void
2911 __instr_alu_and_exec(struct rte_swx_pipeline *p __rte_unused,
2912 		     struct thread *t,
2913 		     const struct instruction *ip)
2914 {
2915 	TRACE("[Thread %2u] and\n", p->thread_id);
2916 
2917 	ALU(t, ip, &);
2918 }
2919 
2920 static inline void
2921 __instr_alu_and_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2922 			struct thread *t,
2923 			const struct instruction *ip)
2924 {
2925 	TRACE("[Thread %2u] and (mh)\n", p->thread_id);
2926 
2927 	ALU_MH(t, ip, &);
2928 }
2929 
2930 static inline void
2931 __instr_alu_and_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2932 			struct thread *t,
2933 			const struct instruction *ip)
2934 {
2935 	TRACE("[Thread %2u] and (hm)\n", p->thread_id);
2936 
2937 	ALU_HM_FAST(t, ip, &);
2938 }
2939 
2940 static inline void
2941 __instr_alu_and_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2942 			struct thread *t,
2943 			const struct instruction *ip)
2944 {
2945 	TRACE("[Thread %2u] and (hh)\n", p->thread_id);
2946 
2947 	ALU_HH_FAST(t, ip, &);
2948 }
2949 
2950 static inline void
2951 __instr_alu_and_i_exec(struct rte_swx_pipeline *p __rte_unused,
2952 		       struct thread *t,
2953 		       const struct instruction *ip)
2954 {
2955 	TRACE("[Thread %2u] and (i)\n", p->thread_id);
2956 
2957 	ALU_I(t, ip, &);
2958 }
2959 
2960 static inline void
2961 __instr_alu_or_exec(struct rte_swx_pipeline *p __rte_unused,
2962 		    struct thread *t,
2963 		    const struct instruction *ip)
2964 {
2965 	TRACE("[Thread %2u] or\n", p->thread_id);
2966 
2967 	ALU(t, ip, |);
2968 }
2969 
2970 static inline void
2971 __instr_alu_or_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2972 		       struct thread *t,
2973 		       const struct instruction *ip)
2974 {
2975 	TRACE("[Thread %2u] or (mh)\n", p->thread_id);
2976 
2977 	ALU_MH(t, ip, |);
2978 }
2979 
2980 static inline void
2981 __instr_alu_or_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2982 		       struct thread *t,
2983 		       const struct instruction *ip)
2984 {
2985 	TRACE("[Thread %2u] or (hm)\n", p->thread_id);
2986 
2987 	ALU_HM_FAST(t, ip, |);
2988 }
2989 
2990 static inline void
2991 __instr_alu_or_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2992 		       struct thread *t,
2993 		       const struct instruction *ip)
2994 {
2995 	TRACE("[Thread %2u] or (hh)\n", p->thread_id);
2996 
2997 	ALU_HH_FAST(t, ip, |);
2998 }
2999 
3000 static inline void
3001 __instr_alu_or_i_exec(struct rte_swx_pipeline *p __rte_unused,
3002 		      struct thread *t,
3003 		      const struct instruction *ip)
3004 {
3005 	TRACE("[Thread %2u] or (i)\n", p->thread_id);
3006 
3007 	ALU_I(t, ip, |);
3008 }
3009 
3010 static inline void
3011 __instr_alu_xor_exec(struct rte_swx_pipeline *p __rte_unused,
3012 		     struct thread *t,
3013 		     const struct instruction *ip)
3014 {
3015 	TRACE("[Thread %2u] xor\n", p->thread_id);
3016 
3017 	ALU(t, ip, ^);
3018 }
3019 
3020 static inline void
3021 __instr_alu_xor_mh_exec(struct rte_swx_pipeline *p __rte_unused,
3022 			struct thread *t,
3023 			const struct instruction *ip)
3024 {
3025 	TRACE("[Thread %2u] xor (mh)\n", p->thread_id);
3026 
3027 	ALU_MH(t, ip, ^);
3028 }
3029 
3030 static inline void
3031 __instr_alu_xor_hm_exec(struct rte_swx_pipeline *p __rte_unused,
3032 			struct thread *t,
3033 			const struct instruction *ip)
3034 {
3035 	TRACE("[Thread %2u] xor (hm)\n", p->thread_id);
3036 
3037 	ALU_HM_FAST(t, ip, ^);
3038 }
3039 
3040 static inline void
3041 __instr_alu_xor_hh_exec(struct rte_swx_pipeline *p __rte_unused,
3042 			struct thread *t,
3043 			const struct instruction *ip)
3044 {
3045 	TRACE("[Thread %2u] xor (hh)\n", p->thread_id);
3046 
3047 	ALU_HH_FAST(t, ip, ^);
3048 }
3049 
3050 static inline void
3051 __instr_alu_xor_i_exec(struct rte_swx_pipeline *p __rte_unused,
3052 		       struct thread *t,
3053 		       const struct instruction *ip)
3054 {
3055 	TRACE("[Thread %2u] xor (i)\n", p->thread_id);
3056 
3057 	ALU_I(t, ip, ^);
3058 }
3059 
3060 static inline void
3061 __instr_alu_ckadd_field_exec(struct rte_swx_pipeline *p __rte_unused,
3062 			     struct thread *t,
3063 			     const struct instruction *ip)
3064 {
3065 	uint8_t *dst_struct, *src_struct;
3066 	uint16_t *dst16_ptr, dst;
3067 	uint64_t *src64_ptr, src64, src64_mask, src;
3068 	uint64_t r;
3069 
3070 	TRACE("[Thread %2u] ckadd (field)\n", p->thread_id);
3071 
3072 	/* Structs. */
3073 	dst_struct = t->structs[ip->alu.dst.struct_id];
3074 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
3075 	dst = *dst16_ptr;
3076 
3077 	src_struct = t->structs[ip->alu.src.struct_id];
3078 	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
3079 	src64 = *src64_ptr;
3080 	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
3081 	src = src64 & src64_mask;
3082 
3083 	/* Initialize the result with destination 1's complement. */
3084 	r = dst;
3085 	r = ~r & 0xFFFF;
3086 
3087 	/* The first input (r) is a 16-bit number. The second and the third
3088 	 * inputs are 32-bit numbers. In the worst case scenario, the sum of the
3089 	 * three numbers (output r) is a 34-bit number.
3090 	 */
3091 	r += (src >> 32) + (src & 0xFFFFFFFF);
3092 
3093 	/* The first input is a 16-bit number. The second input is an 18-bit
3094 	 * number. In the worst case scenario, the sum of the two numbers is a
3095 	 * 19-bit number.
3096 	 */
3097 	r = (r & 0xFFFF) + (r >> 16);
3098 
3099 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
3100 	 * a 3-bit number (0 .. 7). Their sum is a 17-bit number (0 .. 0x10006).
3101 	 */
3102 	r = (r & 0xFFFF) + (r >> 16);
3103 
3104 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
3105 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
3106 	 * 0x10006), the output r is (0 .. 7). So no carry bit can be generated,
3107 	 * therefore the output r is always a 16-bit number.
3108 	 */
3109 	r = (r & 0xFFFF) + (r >> 16);
3110 
3111 	/* Apply 1's complement to the result. */
3112 	r = ~r & 0xFFFF;
3113 	r = r ? r : 0xFFFF;
3114 
3115 	*dst16_ptr = (uint16_t)r;
3116 }
3117 
3118 static inline void
3119 __instr_alu_cksub_field_exec(struct rte_swx_pipeline *p __rte_unused,
3120 			     struct thread *t,
3121 			     const struct instruction *ip)
3122 {
3123 	uint8_t *dst_struct, *src_struct;
3124 	uint16_t *dst16_ptr, dst;
3125 	uint64_t *src64_ptr, src64, src64_mask, src;
3126 	uint64_t r;
3127 
3128 	TRACE("[Thread %2u] cksub (field)\n", p->thread_id);
3129 
3130 	/* Structs. */
3131 	dst_struct = t->structs[ip->alu.dst.struct_id];
3132 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
3133 	dst = *dst16_ptr;
3134 
3135 	src_struct = t->structs[ip->alu.src.struct_id];
3136 	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
3137 	src64 = *src64_ptr;
3138 	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
3139 	src = src64 & src64_mask;
3140 
3141 	/* Initialize the result with destination 1's complement. */
3142 	r = dst;
3143 	r = ~r & 0xFFFF;
3144 
3145 	/* Subtraction in 1's complement arithmetic (i.e. a '- b) is the same as
3146 	 * the following sequence of operations in 2's complement arithmetic:
3147 	 *    a '- b = (a - b) % 0xFFFF.
3148 	 *
3149 	 * In order to prevent an underflow for the below subtraction, in which
3150 	 * a 33-bit number (the subtrahend) is taken out of a 16-bit number (the
3151 	 * minuend), we first add a multiple of the 0xFFFF modulus to the
3152 	 * minuend. The number we add to the minuend needs to be a 34-bit number
3153 	 * or higher, so for readability reasons we picked the 36-bit multiple.
3154 	 * We are effectively turning the 16-bit minuend into a 36-bit number:
3155 	 *    (a - b) % 0xFFFF = (a + 0xFFFF00000 - b) % 0xFFFF.
3156 	 */
3157 	r += 0xFFFF00000ULL; /* The output r is a 36-bit number. */
3158 
3159 	/* A 33-bit number is subtracted from a 36-bit number (the input r). The
3160 	 * result (the output r) is a 36-bit number.
3161 	 */
3162 	r -= (src >> 32) + (src & 0xFFFFFFFF);
3163 
3164 	/* The first input is a 16-bit number. The second input is a 20-bit
3165 	 * number. Their sum is a 21-bit number.
3166 	 */
3167 	r = (r & 0xFFFF) + (r >> 16);
3168 
3169 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
3170 	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1001E).
3171 	 */
3172 	r = (r & 0xFFFF) + (r >> 16);
3173 
3174 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
3175 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
3176 	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
3177 	 * generated, therefore the output r is always a 16-bit number.
3178 	 */
3179 	r = (r & 0xFFFF) + (r >> 16);
3180 
3181 	/* Apply 1's complement to the result. */
3182 	r = ~r & 0xFFFF;
3183 	r = r ? r : 0xFFFF;
3184 
3185 	*dst16_ptr = (uint16_t)r;
3186 }
3187 
3188 static inline void
3189 __instr_alu_ckadd_struct20_exec(struct rte_swx_pipeline *p __rte_unused,
3190 				struct thread *t,
3191 				const struct instruction *ip)
3192 {
3193 	uint8_t *dst_struct, *src_struct;
3194 	uint16_t *dst16_ptr, dst;
3195 	uint32_t *src32_ptr;
3196 	uint64_t r0, r1;
3197 
3198 	TRACE("[Thread %2u] ckadd (struct of 20 bytes)\n", p->thread_id);
3199 
3200 	/* Structs. */
3201 	dst_struct = t->structs[ip->alu.dst.struct_id];
3202 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
3203 	dst = *dst16_ptr;
3204 
3205 	src_struct = t->structs[ip->alu.src.struct_id];
3206 	src32_ptr = (uint32_t *)&src_struct[0];
3207 
3208 	/* Initialize the result with destination 1's complement. */
3209 	r0 = dst;
3210 	r0 = ~r0 & 0xFFFF;
3211 
3212 	r0 += src32_ptr[0]; /* The output r0 is a 33-bit number. */
3213 	r1 = src32_ptr[1]; /* r1 is a 32-bit number. */
3214 	r0 += src32_ptr[2]; /* The output r0 is a 34-bit number. */
3215 	r1 += src32_ptr[3]; /* The output r1 is a 33-bit number. */
3216 	r0 += r1 + src32_ptr[4]; /* The output r0 is a 35-bit number. */
3217 
3218 	/* The first input is a 16-bit number. The second input is a 19-bit
3219 	 * number. Their sum is a 20-bit number.
3220 	 */
3221 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
3222 
3223 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
3224 	 * a 4-bit number (0 .. 15). The sum is a 17-bit number (0 .. 0x1000E).
3225 	 */
3226 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
3227 
3228 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
3229 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
3230 	 * 0x1000E), the output r is (0 .. 15). So no carry bit can be
3231 	 * generated, therefore the output r is always a 16-bit number.
3232 	 */
3233 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
3234 
3235 	/* Apply 1's complement to the result. */
3236 	r0 = ~r0 & 0xFFFF;
3237 	r0 = r0 ? r0 : 0xFFFF;
3238 
3239 	*dst16_ptr = (uint16_t)r0;
3240 }
3241 
3242 static inline void
3243 __instr_alu_ckadd_struct_exec(struct rte_swx_pipeline *p __rte_unused,
3244 			      struct thread *t,
3245 			      const struct instruction *ip)
3246 {
3247 	uint32_t src_header_id = ip->alu.src.n_bits; /* The src header ID is stored here. */
3248 	uint32_t n_src_header_bytes = t->headers[src_header_id].n_bytes;
3249 	uint8_t *dst_struct, *src_struct;
3250 	uint16_t *dst16_ptr, dst;
3251 	uint32_t *src32_ptr;
3252 	uint64_t r;
3253 	uint32_t i;
3254 
3255 	if (n_src_header_bytes == 20) {
3256 		__instr_alu_ckadd_struct20_exec(p, t, ip);
3257 		return;
3258 	}
3259 
3260 	TRACE("[Thread %2u] ckadd (struct)\n", p->thread_id);
3261 
3262 	/* Structs. */
3263 	dst_struct = t->structs[ip->alu.dst.struct_id];
3264 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
3265 	dst = *dst16_ptr;
3266 
3267 	src_struct = t->structs[ip->alu.src.struct_id];
3268 	src32_ptr = (uint32_t *)&src_struct[0];
3269 
3270 	/* Initialize the result with destination 1's complement. */
3271 	r = dst;
3272 	r = ~r & 0xFFFF;
3273 
3274 	/* The max number of 32-bit words in a 32K-byte header is 2^13.
3275 	 * Therefore, in the worst case scenario, a 45-bit number is added to a
3276 	 * 16-bit number (the input r), so the output r is 46-bit number.
3277 	 */
3278 	for (i = 0; i < n_src_header_bytes / 4; i++, src32_ptr++)
3279 		r += *src32_ptr;
3280 
3281 	/* The first input is a 16-bit number. The second input is a 30-bit
3282 	 * number. Their sum is a 31-bit number.
3283 	 */
3284 	r = (r & 0xFFFF) + (r >> 16);
3285 
3286 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
3287 	 * a 15-bit number (0 .. 0x7FFF). The sum is a 17-bit number (0 .. 0x17FFE).
3288 	 */
3289 	r = (r & 0xFFFF) + (r >> 16);
3290 
3291 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
3292 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
3293 	 * 0x17FFE), the output r is (0 .. 0x7FFF). So no carry bit can be
3294 	 * generated, therefore the output r is always a 16-bit number.
3295 	 */
3296 	r = (r & 0xFFFF) + (r >> 16);
3297 
3298 	/* Apply 1's complement to the result. */
3299 	r = ~r & 0xFFFF;
3300 	r = r ? r : 0xFFFF;
3301 
3302 	*dst16_ptr = (uint16_t)r;
3303 }
3304 
3305 /*
3306  * Register array.
3307  */
3308 static inline uint64_t *
3309 instr_regarray_regarray(struct rte_swx_pipeline *p, const struct instruction *ip)
3310 {
3311 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
3312 	return r->regarray;
3313 }
3314 
3315 static inline uint64_t
3316 instr_regarray_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3317 {
3318 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
3319 
3320 	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
3321 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
3322 	uint64_t idx64 = *idx64_ptr;
3323 	uint64_t idx64_mask = UINT64_MAX >> (64 - ip->regarray.idx.n_bits);
3324 	uint64_t idx = idx64 & idx64_mask & r->size_mask;
3325 
3326 	return idx;
3327 }
3328 
3329 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3330 
3331 static inline uint64_t
3332 instr_regarray_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3333 {
3334 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
3335 
3336 	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
3337 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
3338 	uint64_t idx64 = *idx64_ptr;
3339 	uint64_t idx = (ntoh64(idx64) >> (64 - ip->regarray.idx.n_bits)) & r->size_mask;
3340 
3341 	return idx;
3342 }
3343 
3344 #else
3345 
3346 #define instr_regarray_idx_nbo instr_regarray_idx_hbo
3347 
3348 #endif
3349 
3350 static inline uint64_t
3351 instr_regarray_idx_imm(struct rte_swx_pipeline *p, const struct instruction *ip)
3352 {
3353 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
3354 
3355 	uint64_t idx = ip->regarray.idx_val & r->size_mask;
3356 
3357 	return idx;
3358 }
3359 
3360 static inline uint64_t
3361 instr_regarray_src_hbo(struct thread *t, const struct instruction *ip)
3362 {
3363 	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
3364 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
3365 	uint64_t src64 = *src64_ptr;
3366 	uint64_t src64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
3367 	uint64_t src = src64 & src64_mask;
3368 
3369 	return src;
3370 }
3371 
3372 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3373 
3374 static inline uint64_t
3375 instr_regarray_src_nbo(struct thread *t, const struct instruction *ip)
3376 {
3377 	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
3378 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
3379 	uint64_t src64 = *src64_ptr;
3380 	uint64_t src = ntoh64(src64) >> (64 - ip->regarray.dstsrc.n_bits);
3381 
3382 	return src;
3383 }
3384 
3385 #else
3386 
3387 #define instr_regarray_src_nbo instr_regarray_src_hbo
3388 
3389 #endif
3390 
3391 static inline void
3392 instr_regarray_dst_hbo_src_hbo_set(struct thread *t, const struct instruction *ip, uint64_t src)
3393 {
3394 	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
3395 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
3396 	uint64_t dst64 = *dst64_ptr;
3397 	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
3398 
3399 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
3400 
3401 }
3402 
3403 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3404 
3405 static inline void
3406 instr_regarray_dst_nbo_src_hbo_set(struct thread *t, const struct instruction *ip, uint64_t src)
3407 {
3408 	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
3409 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
3410 	uint64_t dst64 = *dst64_ptr;
3411 	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
3412 
3413 	src = hton64(src) >> (64 - ip->regarray.dstsrc.n_bits);
3414 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
3415 }
3416 
3417 #else
3418 
3419 #define instr_regarray_dst_nbo_src_hbo_set instr_regarray_dst_hbo_src_hbo_set
3420 
3421 #endif
3422 
3423 static inline void
3424 __instr_regprefetch_rh_exec(struct rte_swx_pipeline *p,
3425 			    struct thread *t,
3426 			    const struct instruction *ip)
3427 {
3428 	uint64_t *regarray, idx;
3429 
3430 	TRACE("[Thread %2u] regprefetch (r[h])\n", p->thread_id);
3431 
3432 	regarray = instr_regarray_regarray(p, ip);
3433 	idx = instr_regarray_idx_nbo(p, t, ip);
3434 	rte_prefetch0(&regarray[idx]);
3435 }
3436 
3437 static inline void
3438 __instr_regprefetch_rm_exec(struct rte_swx_pipeline *p,
3439 			    struct thread *t,
3440 			    const struct instruction *ip)
3441 {
3442 	uint64_t *regarray, idx;
3443 
3444 	TRACE("[Thread %2u] regprefetch (r[m])\n", p->thread_id);
3445 
3446 	regarray = instr_regarray_regarray(p, ip);
3447 	idx = instr_regarray_idx_hbo(p, t, ip);
3448 	rte_prefetch0(&regarray[idx]);
3449 }
3450 
3451 static inline void
3452 __instr_regprefetch_ri_exec(struct rte_swx_pipeline *p,
3453 			    struct thread *t __rte_unused,
3454 			    const struct instruction *ip)
3455 {
3456 	uint64_t *regarray, idx;
3457 
3458 	TRACE("[Thread %2u] regprefetch (r[i])\n", p->thread_id);
3459 
3460 	regarray = instr_regarray_regarray(p, ip);
3461 	idx = instr_regarray_idx_imm(p, ip);
3462 	rte_prefetch0(&regarray[idx]);
3463 }
3464 
3465 static inline void
3466 __instr_regrd_hrh_exec(struct rte_swx_pipeline *p,
3467 		       struct thread *t,
3468 		       const struct instruction *ip)
3469 {
3470 	uint64_t *regarray, idx;
3471 
3472 	TRACE("[Thread %2u] regrd (h = r[h])\n", p->thread_id);
3473 
3474 	regarray = instr_regarray_regarray(p, ip);
3475 	idx = instr_regarray_idx_nbo(p, t, ip);
3476 	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
3477 }
3478 
3479 static inline void
3480 __instr_regrd_hrm_exec(struct rte_swx_pipeline *p,
3481 		       struct thread *t,
3482 		       const struct instruction *ip)
3483 {
3484 	uint64_t *regarray, idx;
3485 
3486 	TRACE("[Thread %2u] regrd (h = r[m])\n", p->thread_id);
3487 
3488 	/* Structs. */
3489 	regarray = instr_regarray_regarray(p, ip);
3490 	idx = instr_regarray_idx_hbo(p, t, ip);
3491 	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
3492 }
3493 
3494 static inline void
3495 __instr_regrd_mrh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3496 {
3497 	uint64_t *regarray, idx;
3498 
3499 	TRACE("[Thread %2u] regrd (m = r[h])\n", p->thread_id);
3500 
3501 	regarray = instr_regarray_regarray(p, ip);
3502 	idx = instr_regarray_idx_nbo(p, t, ip);
3503 	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
3504 }
3505 
3506 static inline void
3507 __instr_regrd_mrm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3508 {
3509 	uint64_t *regarray, idx;
3510 
3511 	TRACE("[Thread %2u] regrd (m = r[m])\n", p->thread_id);
3512 
3513 	regarray = instr_regarray_regarray(p, ip);
3514 	idx = instr_regarray_idx_hbo(p, t, ip);
3515 	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
3516 }
3517 
3518 static inline void
3519 __instr_regrd_hri_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3520 {
3521 	uint64_t *regarray, idx;
3522 
3523 	TRACE("[Thread %2u] regrd (h = r[i])\n", p->thread_id);
3524 
3525 	regarray = instr_regarray_regarray(p, ip);
3526 	idx = instr_regarray_idx_imm(p, ip);
3527 	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
3528 }
3529 
3530 static inline void
3531 __instr_regrd_mri_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3532 {
3533 	uint64_t *regarray, idx;
3534 
3535 	TRACE("[Thread %2u] regrd (m = r[i])\n", p->thread_id);
3536 
3537 	regarray = instr_regarray_regarray(p, ip);
3538 	idx = instr_regarray_idx_imm(p, ip);
3539 	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
3540 }
3541 
3542 static inline void
3543 __instr_regwr_rhh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3544 {
3545 	uint64_t *regarray, idx, src;
3546 
3547 	TRACE("[Thread %2u] regwr (r[h] = h)\n", p->thread_id);
3548 
3549 	regarray = instr_regarray_regarray(p, ip);
3550 	idx = instr_regarray_idx_nbo(p, t, ip);
3551 	src = instr_regarray_src_nbo(t, ip);
3552 	regarray[idx] = src;
3553 }
3554 
3555 static inline void
3556 __instr_regwr_rhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3557 {
3558 	uint64_t *regarray, idx, src;
3559 
3560 	TRACE("[Thread %2u] regwr (r[h] = m)\n", p->thread_id);
3561 
3562 	regarray = instr_regarray_regarray(p, ip);
3563 	idx = instr_regarray_idx_nbo(p, t, ip);
3564 	src = instr_regarray_src_hbo(t, ip);
3565 	regarray[idx] = src;
3566 }
3567 
3568 static inline void
3569 __instr_regwr_rmh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3570 {
3571 	uint64_t *regarray, idx, src;
3572 
3573 	TRACE("[Thread %2u] regwr (r[m] = h)\n", p->thread_id);
3574 
3575 	regarray = instr_regarray_regarray(p, ip);
3576 	idx = instr_regarray_idx_hbo(p, t, ip);
3577 	src = instr_regarray_src_nbo(t, ip);
3578 	regarray[idx] = src;
3579 }
3580 
3581 static inline void
3582 __instr_regwr_rmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3583 {
3584 	uint64_t *regarray, idx, src;
3585 
3586 	TRACE("[Thread %2u] regwr (r[m] = m)\n", p->thread_id);
3587 
3588 	regarray = instr_regarray_regarray(p, ip);
3589 	idx = instr_regarray_idx_hbo(p, t, ip);
3590 	src = instr_regarray_src_hbo(t, ip);
3591 	regarray[idx] = src;
3592 }
3593 
3594 static inline void
3595 __instr_regwr_rhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3596 {
3597 	uint64_t *regarray, idx, src;
3598 
3599 	TRACE("[Thread %2u] regwr (r[h] = i)\n", p->thread_id);
3600 
3601 	regarray = instr_regarray_regarray(p, ip);
3602 	idx = instr_regarray_idx_nbo(p, t, ip);
3603 	src = ip->regarray.dstsrc_val;
3604 	regarray[idx] = src;
3605 }
3606 
3607 static inline void
3608 __instr_regwr_rmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3609 {
3610 	uint64_t *regarray, idx, src;
3611 
3612 	TRACE("[Thread %2u] regwr (r[m] = i)\n", p->thread_id);
3613 
3614 	regarray = instr_regarray_regarray(p, ip);
3615 	idx = instr_regarray_idx_hbo(p, t, ip);
3616 	src = ip->regarray.dstsrc_val;
3617 	regarray[idx] = src;
3618 }
3619 
3620 static inline void
3621 __instr_regwr_rih_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3622 {
3623 	uint64_t *regarray, idx, src;
3624 
3625 	TRACE("[Thread %2u] regwr (r[i] = h)\n", p->thread_id);
3626 
3627 	regarray = instr_regarray_regarray(p, ip);
3628 	idx = instr_regarray_idx_imm(p, ip);
3629 	src = instr_regarray_src_nbo(t, ip);
3630 	regarray[idx] = src;
3631 }
3632 
3633 static inline void
3634 __instr_regwr_rim_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3635 {
3636 	uint64_t *regarray, idx, src;
3637 
3638 	TRACE("[Thread %2u] regwr (r[i] = m)\n", p->thread_id);
3639 
3640 	regarray = instr_regarray_regarray(p, ip);
3641 	idx = instr_regarray_idx_imm(p, ip);
3642 	src = instr_regarray_src_hbo(t, ip);
3643 	regarray[idx] = src;
3644 }
3645 
3646 static inline void
3647 __instr_regwr_rii_exec(struct rte_swx_pipeline *p,
3648 		       struct thread *t __rte_unused,
3649 		       const struct instruction *ip)
3650 {
3651 	uint64_t *regarray, idx, src;
3652 
3653 	TRACE("[Thread %2u] regwr (r[i] = i)\n", p->thread_id);
3654 
3655 	regarray = instr_regarray_regarray(p, ip);
3656 	idx = instr_regarray_idx_imm(p, ip);
3657 	src = ip->regarray.dstsrc_val;
3658 	regarray[idx] = src;
3659 }
3660 
3661 static inline void
3662 __instr_regadd_rhh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3663 {
3664 	uint64_t *regarray, idx, src;
3665 
3666 	TRACE("[Thread %2u] regadd (r[h] += h)\n", p->thread_id);
3667 
3668 	regarray = instr_regarray_regarray(p, ip);
3669 	idx = instr_regarray_idx_nbo(p, t, ip);
3670 	src = instr_regarray_src_nbo(t, ip);
3671 	regarray[idx] += src;
3672 }
3673 
3674 static inline void
3675 __instr_regadd_rhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3676 {
3677 	uint64_t *regarray, idx, src;
3678 
3679 	TRACE("[Thread %2u] regadd (r[h] += m)\n", p->thread_id);
3680 
3681 	regarray = instr_regarray_regarray(p, ip);
3682 	idx = instr_regarray_idx_nbo(p, t, ip);
3683 	src = instr_regarray_src_hbo(t, ip);
3684 	regarray[idx] += src;
3685 }
3686 
3687 static inline void
3688 __instr_regadd_rmh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3689 {
3690 	uint64_t *regarray, idx, src;
3691 
3692 	TRACE("[Thread %2u] regadd (r[m] += h)\n", p->thread_id);
3693 
3694 	regarray = instr_regarray_regarray(p, ip);
3695 	idx = instr_regarray_idx_hbo(p, t, ip);
3696 	src = instr_regarray_src_nbo(t, ip);
3697 	regarray[idx] += src;
3698 }
3699 
3700 static inline void
3701 __instr_regadd_rmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3702 {
3703 	uint64_t *regarray, idx, src;
3704 
3705 	TRACE("[Thread %2u] regadd (r[m] += m)\n", p->thread_id);
3706 
3707 	regarray = instr_regarray_regarray(p, ip);
3708 	idx = instr_regarray_idx_hbo(p, t, ip);
3709 	src = instr_regarray_src_hbo(t, ip);
3710 	regarray[idx] += src;
3711 }
3712 
3713 static inline void
3714 __instr_regadd_rhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3715 {
3716 	uint64_t *regarray, idx, src;
3717 
3718 	TRACE("[Thread %2u] regadd (r[h] += i)\n", p->thread_id);
3719 
3720 	regarray = instr_regarray_regarray(p, ip);
3721 	idx = instr_regarray_idx_nbo(p, t, ip);
3722 	src = ip->regarray.dstsrc_val;
3723 	regarray[idx] += src;
3724 }
3725 
3726 static inline void
3727 __instr_regadd_rmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3728 {
3729 	uint64_t *regarray, idx, src;
3730 
3731 	TRACE("[Thread %2u] regadd (r[m] += i)\n", p->thread_id);
3732 
3733 	regarray = instr_regarray_regarray(p, ip);
3734 	idx = instr_regarray_idx_hbo(p, t, ip);
3735 	src = ip->regarray.dstsrc_val;
3736 	regarray[idx] += src;
3737 }
3738 
3739 static inline void
3740 __instr_regadd_rih_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3741 {
3742 	uint64_t *regarray, idx, src;
3743 
3744 	TRACE("[Thread %2u] regadd (r[i] += h)\n", p->thread_id);
3745 
3746 	regarray = instr_regarray_regarray(p, ip);
3747 	idx = instr_regarray_idx_imm(p, ip);
3748 	src = instr_regarray_src_nbo(t, ip);
3749 	regarray[idx] += src;
3750 }
3751 
3752 static inline void
3753 __instr_regadd_rim_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3754 {
3755 	uint64_t *regarray, idx, src;
3756 
3757 	TRACE("[Thread %2u] regadd (r[i] += m)\n", p->thread_id);
3758 
3759 	regarray = instr_regarray_regarray(p, ip);
3760 	idx = instr_regarray_idx_imm(p, ip);
3761 	src = instr_regarray_src_hbo(t, ip);
3762 	regarray[idx] += src;
3763 }
3764 
3765 static inline void
3766 __instr_regadd_rii_exec(struct rte_swx_pipeline *p,
3767 			struct thread *t __rte_unused,
3768 			const struct instruction *ip)
3769 {
3770 	uint64_t *regarray, idx, src;
3771 
3772 	TRACE("[Thread %2u] regadd (r[i] += i)\n", p->thread_id);
3773 
3774 	regarray = instr_regarray_regarray(p, ip);
3775 	idx = instr_regarray_idx_imm(p, ip);
3776 	src = ip->regarray.dstsrc_val;
3777 	regarray[idx] += src;
3778 }
3779 
3780 /*
3781  * metarray.
3782  */
3783 static inline struct meter *
3784 instr_meter_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3785 {
3786 	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
3787 
3788 	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
3789 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
3790 	uint64_t idx64 = *idx64_ptr;
3791 	uint64_t idx64_mask = UINT64_MAX >> (64 - (ip)->meter.idx.n_bits);
3792 	uint64_t idx = idx64 & idx64_mask & r->size_mask;
3793 
3794 	return &r->metarray[idx];
3795 }
3796 
3797 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3798 
3799 static inline struct meter *
3800 instr_meter_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3801 {
3802 	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
3803 
3804 	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
3805 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
3806 	uint64_t idx64 = *idx64_ptr;
3807 	uint64_t idx = (ntoh64(idx64) >> (64 - ip->meter.idx.n_bits)) & r->size_mask;
3808 
3809 	return &r->metarray[idx];
3810 }
3811 
3812 #else
3813 
3814 #define instr_meter_idx_nbo instr_meter_idx_hbo
3815 
3816 #endif
3817 
3818 static inline struct meter *
3819 instr_meter_idx_imm(struct rte_swx_pipeline *p, const struct instruction *ip)
3820 {
3821 	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
3822 
3823 	uint64_t idx =  ip->meter.idx_val & r->size_mask;
3824 
3825 	return &r->metarray[idx];
3826 }
3827 
3828 static inline uint32_t
3829 instr_meter_length_hbo(struct thread *t, const struct instruction *ip)
3830 {
3831 	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
3832 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
3833 	uint64_t src64 = *src64_ptr;
3834 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->meter.length.n_bits);
3835 	uint64_t src = src64 & src64_mask;
3836 
3837 	return (uint32_t)src;
3838 }
3839 
3840 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3841 
3842 static inline uint32_t
3843 instr_meter_length_nbo(struct thread *t, const struct instruction *ip)
3844 {
3845 	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
3846 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
3847 	uint64_t src64 = *src64_ptr;
3848 	uint64_t src = ntoh64(src64) >> (64 - ip->meter.length.n_bits);
3849 
3850 	return (uint32_t)src;
3851 }
3852 
3853 #else
3854 
3855 #define instr_meter_length_nbo instr_meter_length_hbo
3856 
3857 #endif
3858 
3859 static inline enum rte_color
3860 instr_meter_color_in_hbo(struct thread *t, const struct instruction *ip)
3861 {
3862 	uint8_t *src_struct = t->structs[ip->meter.color_in.struct_id];
3863 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.color_in.offset];
3864 	uint64_t src64 = *src64_ptr;
3865 	uint64_t src64_mask = UINT64_MAX >> (64 - ip->meter.color_in.n_bits);
3866 	uint64_t src = src64 & src64_mask;
3867 
3868 	return (enum rte_color)src;
3869 }
3870 
3871 static inline void
3872 instr_meter_color_out_hbo_set(struct thread *t,
3873 			      const struct instruction *ip,
3874 			      enum rte_color color_out)
3875 {
3876 	uint8_t *dst_struct = t->structs[ip->meter.color_out.struct_id];
3877 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->meter.color_out.offset];
3878 	uint64_t dst64 = *dst64_ptr;
3879 	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->meter.color_out.n_bits);
3880 
3881 	uint64_t src = (uint64_t)color_out;
3882 
3883 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
3884 }
3885 
3886 static inline void
3887 __instr_metprefetch_h_exec(struct rte_swx_pipeline *p,
3888 			   struct thread *t,
3889 			   const struct instruction *ip)
3890 {
3891 	struct meter *m;
3892 
3893 	TRACE("[Thread %2u] metprefetch (h)\n", p->thread_id);
3894 
3895 	m = instr_meter_idx_nbo(p, t, ip);
3896 	rte_prefetch0(m);
3897 }
3898 
3899 static inline void
3900 __instr_metprefetch_m_exec(struct rte_swx_pipeline *p,
3901 			   struct thread *t,
3902 			   const struct instruction *ip)
3903 {
3904 	struct meter *m;
3905 
3906 	TRACE("[Thread %2u] metprefetch (m)\n", p->thread_id);
3907 
3908 	m = instr_meter_idx_hbo(p, t, ip);
3909 	rte_prefetch0(m);
3910 }
3911 
3912 static inline void
3913 __instr_metprefetch_i_exec(struct rte_swx_pipeline *p,
3914 			   struct thread *t __rte_unused,
3915 			   const struct instruction *ip)
3916 {
3917 	struct meter *m;
3918 
3919 	TRACE("[Thread %2u] metprefetch (i)\n", p->thread_id);
3920 
3921 	m = instr_meter_idx_imm(p, ip);
3922 	rte_prefetch0(m);
3923 }
3924 
3925 static inline void
3926 __instr_meter_hhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3927 {
3928 	struct meter *m;
3929 	uint64_t time, n_pkts, n_bytes;
3930 	uint32_t length;
3931 	enum rte_color color_in, color_out;
3932 
3933 	TRACE("[Thread %2u] meter (hhm)\n", p->thread_id);
3934 
3935 	m = instr_meter_idx_nbo(p, t, ip);
3936 	rte_prefetch0(m->n_pkts);
3937 	time = rte_get_tsc_cycles();
3938 	length = instr_meter_length_nbo(t, ip);
3939 	color_in = instr_meter_color_in_hbo(t, ip);
3940 
3941 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3942 		&m->profile->profile,
3943 		time,
3944 		length,
3945 		color_in);
3946 
3947 	color_out &= m->color_mask;
3948 
3949 	n_pkts = m->n_pkts[color_out];
3950 	n_bytes = m->n_bytes[color_out];
3951 
3952 	instr_meter_color_out_hbo_set(t, ip, color_out);
3953 
3954 	m->n_pkts[color_out] = n_pkts + 1;
3955 	m->n_bytes[color_out] = n_bytes + length;
3956 }
3957 
3958 static inline void
3959 __instr_meter_hhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3960 {
3961 	struct meter *m;
3962 	uint64_t time, n_pkts, n_bytes;
3963 	uint32_t length;
3964 	enum rte_color color_in, color_out;
3965 
3966 	TRACE("[Thread %2u] meter (hhi)\n", p->thread_id);
3967 
3968 	m = instr_meter_idx_nbo(p, t, ip);
3969 	rte_prefetch0(m->n_pkts);
3970 	time = rte_get_tsc_cycles();
3971 	length = instr_meter_length_nbo(t, ip);
3972 	color_in = (enum rte_color)ip->meter.color_in_val;
3973 
3974 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3975 		&m->profile->profile,
3976 		time,
3977 		length,
3978 		color_in);
3979 
3980 	color_out &= m->color_mask;
3981 
3982 	n_pkts = m->n_pkts[color_out];
3983 	n_bytes = m->n_bytes[color_out];
3984 
3985 	instr_meter_color_out_hbo_set(t, ip, color_out);
3986 
3987 	m->n_pkts[color_out] = n_pkts + 1;
3988 	m->n_bytes[color_out] = n_bytes + length;
3989 }
3990 
3991 static inline void
3992 __instr_meter_hmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3993 {
3994 	struct meter *m;
3995 	uint64_t time, n_pkts, n_bytes;
3996 	uint32_t length;
3997 	enum rte_color color_in, color_out;
3998 
3999 	TRACE("[Thread %2u] meter (hmm)\n", p->thread_id);
4000 
4001 	m = instr_meter_idx_nbo(p, t, ip);
4002 	rte_prefetch0(m->n_pkts);
4003 	time = rte_get_tsc_cycles();
4004 	length = instr_meter_length_hbo(t, ip);
4005 	color_in = instr_meter_color_in_hbo(t, ip);
4006 
4007 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4008 		&m->profile->profile,
4009 		time,
4010 		length,
4011 		color_in);
4012 
4013 	color_out &= m->color_mask;
4014 
4015 	n_pkts = m->n_pkts[color_out];
4016 	n_bytes = m->n_bytes[color_out];
4017 
4018 	instr_meter_color_out_hbo_set(t, ip, color_out);
4019 
4020 	m->n_pkts[color_out] = n_pkts + 1;
4021 	m->n_bytes[color_out] = n_bytes + length;
4022 }
4023 
4024 static inline void
4025 __instr_meter_hmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4026 {
4027 	struct meter *m;
4028 	uint64_t time, n_pkts, n_bytes;
4029 	uint32_t length;
4030 	enum rte_color color_in, color_out;
4031 
4032 	TRACE("[Thread %2u] meter (hmi)\n", p->thread_id);
4033 
4034 	m = instr_meter_idx_nbo(p, t, ip);
4035 	rte_prefetch0(m->n_pkts);
4036 	time = rte_get_tsc_cycles();
4037 	length = instr_meter_length_hbo(t, ip);
4038 	color_in = (enum rte_color)ip->meter.color_in_val;
4039 
4040 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4041 		&m->profile->profile,
4042 		time,
4043 		length,
4044 		color_in);
4045 
4046 	color_out &= m->color_mask;
4047 
4048 	n_pkts = m->n_pkts[color_out];
4049 	n_bytes = m->n_bytes[color_out];
4050 
4051 	instr_meter_color_out_hbo_set(t, ip, color_out);
4052 
4053 	m->n_pkts[color_out] = n_pkts + 1;
4054 	m->n_bytes[color_out] = n_bytes + length;
4055 }
4056 
4057 static inline void
4058 __instr_meter_mhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4059 {
4060 	struct meter *m;
4061 	uint64_t time, n_pkts, n_bytes;
4062 	uint32_t length;
4063 	enum rte_color color_in, color_out;
4064 
4065 	TRACE("[Thread %2u] meter (mhm)\n", p->thread_id);
4066 
4067 	m = instr_meter_idx_hbo(p, t, ip);
4068 	rte_prefetch0(m->n_pkts);
4069 	time = rte_get_tsc_cycles();
4070 	length = instr_meter_length_nbo(t, ip);
4071 	color_in = instr_meter_color_in_hbo(t, ip);
4072 
4073 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4074 		&m->profile->profile,
4075 		time,
4076 		length,
4077 		color_in);
4078 
4079 	color_out &= m->color_mask;
4080 
4081 	n_pkts = m->n_pkts[color_out];
4082 	n_bytes = m->n_bytes[color_out];
4083 
4084 	instr_meter_color_out_hbo_set(t, ip, color_out);
4085 
4086 	m->n_pkts[color_out] = n_pkts + 1;
4087 	m->n_bytes[color_out] = n_bytes + length;
4088 }
4089 
4090 static inline void
4091 __instr_meter_mhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4092 {
4093 	struct meter *m;
4094 	uint64_t time, n_pkts, n_bytes;
4095 	uint32_t length;
4096 	enum rte_color color_in, color_out;
4097 
4098 	TRACE("[Thread %2u] meter (mhi)\n", p->thread_id);
4099 
4100 	m = instr_meter_idx_hbo(p, t, ip);
4101 	rte_prefetch0(m->n_pkts);
4102 	time = rte_get_tsc_cycles();
4103 	length = instr_meter_length_nbo(t, ip);
4104 	color_in = (enum rte_color)ip->meter.color_in_val;
4105 
4106 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4107 		&m->profile->profile,
4108 		time,
4109 		length,
4110 		color_in);
4111 
4112 	color_out &= m->color_mask;
4113 
4114 	n_pkts = m->n_pkts[color_out];
4115 	n_bytes = m->n_bytes[color_out];
4116 
4117 	instr_meter_color_out_hbo_set(t, ip, color_out);
4118 
4119 	m->n_pkts[color_out] = n_pkts + 1;
4120 	m->n_bytes[color_out] = n_bytes + length;
4121 }
4122 
4123 static inline void
4124 __instr_meter_mmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4125 {
4126 	struct meter *m;
4127 	uint64_t time, n_pkts, n_bytes;
4128 	uint32_t length;
4129 	enum rte_color color_in, color_out;
4130 
4131 	TRACE("[Thread %2u] meter (mmm)\n", p->thread_id);
4132 
4133 	m = instr_meter_idx_hbo(p, t, ip);
4134 	rte_prefetch0(m->n_pkts);
4135 	time = rte_get_tsc_cycles();
4136 	length = instr_meter_length_hbo(t, ip);
4137 	color_in = instr_meter_color_in_hbo(t, ip);
4138 
4139 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4140 		&m->profile->profile,
4141 		time,
4142 		length,
4143 		color_in);
4144 
4145 	color_out &= m->color_mask;
4146 
4147 	n_pkts = m->n_pkts[color_out];
4148 	n_bytes = m->n_bytes[color_out];
4149 
4150 	instr_meter_color_out_hbo_set(t, ip, color_out);
4151 
4152 	m->n_pkts[color_out] = n_pkts + 1;
4153 	m->n_bytes[color_out] = n_bytes + length;
4154 }
4155 
4156 static inline void
4157 __instr_meter_mmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4158 {
4159 	struct meter *m;
4160 	uint64_t time, n_pkts, n_bytes;
4161 	uint32_t length;
4162 	enum rte_color color_in, color_out;
4163 
4164 	TRACE("[Thread %2u] meter (mmi)\n", p->thread_id);
4165 
4166 	m = instr_meter_idx_hbo(p, t, ip);
4167 	rte_prefetch0(m->n_pkts);
4168 	time = rte_get_tsc_cycles();
4169 	length = instr_meter_length_hbo(t, ip);
4170 	color_in = (enum rte_color)ip->meter.color_in_val;
4171 
4172 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4173 		&m->profile->profile,
4174 		time,
4175 		length,
4176 		color_in);
4177 
4178 	color_out &= m->color_mask;
4179 
4180 	n_pkts = m->n_pkts[color_out];
4181 	n_bytes = m->n_bytes[color_out];
4182 
4183 	instr_meter_color_out_hbo_set(t, ip, color_out);
4184 
4185 	m->n_pkts[color_out] = n_pkts + 1;
4186 	m->n_bytes[color_out] = n_bytes + length;
4187 }
4188 
4189 static inline void
4190 __instr_meter_ihm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4191 {
4192 	struct meter *m;
4193 	uint64_t time, n_pkts, n_bytes;
4194 	uint32_t length;
4195 	enum rte_color color_in, color_out;
4196 
4197 	TRACE("[Thread %2u] meter (ihm)\n", p->thread_id);
4198 
4199 	m = instr_meter_idx_imm(p, ip);
4200 	rte_prefetch0(m->n_pkts);
4201 	time = rte_get_tsc_cycles();
4202 	length = instr_meter_length_nbo(t, ip);
4203 	color_in = instr_meter_color_in_hbo(t, ip);
4204 
4205 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4206 		&m->profile->profile,
4207 		time,
4208 		length,
4209 		color_in);
4210 
4211 	color_out &= m->color_mask;
4212 
4213 	n_pkts = m->n_pkts[color_out];
4214 	n_bytes = m->n_bytes[color_out];
4215 
4216 	instr_meter_color_out_hbo_set(t, ip, color_out);
4217 
4218 	m->n_pkts[color_out] = n_pkts + 1;
4219 	m->n_bytes[color_out] = n_bytes + length;
4220 }
4221 
4222 static inline void
4223 __instr_meter_ihi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4224 {
4225 	struct meter *m;
4226 	uint64_t time, n_pkts, n_bytes;
4227 	uint32_t length;
4228 	enum rte_color color_in, color_out;
4229 
4230 	TRACE("[Thread %2u] meter (ihi)\n", p->thread_id);
4231 
4232 	m = instr_meter_idx_imm(p, ip);
4233 	rte_prefetch0(m->n_pkts);
4234 	time = rte_get_tsc_cycles();
4235 	length = instr_meter_length_nbo(t, ip);
4236 	color_in = (enum rte_color)ip->meter.color_in_val;
4237 
4238 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4239 		&m->profile->profile,
4240 		time,
4241 		length,
4242 		color_in);
4243 
4244 	color_out &= m->color_mask;
4245 
4246 	n_pkts = m->n_pkts[color_out];
4247 	n_bytes = m->n_bytes[color_out];
4248 
4249 	instr_meter_color_out_hbo_set(t, ip, color_out);
4250 
4251 	m->n_pkts[color_out] = n_pkts + 1;
4252 	m->n_bytes[color_out] = n_bytes + length;
4253 }
4254 
4255 static inline void
4256 __instr_meter_imm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4257 {
4258 	struct meter *m;
4259 	uint64_t time, n_pkts, n_bytes;
4260 	uint32_t length;
4261 	enum rte_color color_in, color_out;
4262 
4263 	TRACE("[Thread %2u] meter (imm)\n", p->thread_id);
4264 
4265 	m = instr_meter_idx_imm(p, ip);
4266 	rte_prefetch0(m->n_pkts);
4267 	time = rte_get_tsc_cycles();
4268 	length = instr_meter_length_hbo(t, ip);
4269 	color_in = instr_meter_color_in_hbo(t, ip);
4270 
4271 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4272 		&m->profile->profile,
4273 		time,
4274 		length,
4275 		color_in);
4276 
4277 	color_out &= m->color_mask;
4278 
4279 	n_pkts = m->n_pkts[color_out];
4280 	n_bytes = m->n_bytes[color_out];
4281 
4282 	instr_meter_color_out_hbo_set(t, ip, color_out);
4283 
4284 	m->n_pkts[color_out] = n_pkts + 1;
4285 	m->n_bytes[color_out] = n_bytes + length;
4286 }
4287 
4288 static inline void
4289 __instr_meter_imi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4290 {
4291 	struct meter *m;
4292 	uint64_t time, n_pkts, n_bytes;
4293 	uint32_t length;
4294 	enum rte_color color_in, color_out;
4295 
4296 	TRACE("[Thread %2u] meter (imi)\n", p->thread_id);
4297 
4298 	m = instr_meter_idx_imm(p, ip);
4299 	rte_prefetch0(m->n_pkts);
4300 	time = rte_get_tsc_cycles();
4301 	length = instr_meter_length_hbo(t, ip);
4302 	color_in = (enum rte_color)ip->meter.color_in_val;
4303 
4304 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4305 		&m->profile->profile,
4306 		time,
4307 		length,
4308 		color_in);
4309 
4310 	color_out &= m->color_mask;
4311 
4312 	n_pkts = m->n_pkts[color_out];
4313 	n_bytes = m->n_bytes[color_out];
4314 
4315 	instr_meter_color_out_hbo_set(t, ip, color_out);
4316 
4317 	m->n_pkts[color_out] = n_pkts + 1;
4318 	m->n_bytes[color_out] = n_bytes + length;
4319 }
4320 
4321 #endif
4322