xref: /dpdk/lib/pipeline/rte_swx_pipeline_internal.h (revision daa02b5cddbb8e11b31d41e2bf7bb1ae64dcae2f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2021 Intel Corporation
3  */
4 #ifndef __INCLUDE_RTE_SWX_PIPELINE_INTERNAL_H__
5 #define __INCLUDE_RTE_SWX_PIPELINE_INTERNAL_H__
6 
7 #include <inttypes.h>
8 #include <string.h>
9 #include <sys/queue.h>
10 
11 #include <rte_byteorder.h>
12 #include <rte_common.h>
13 #include <rte_cycles.h>
14 #include <rte_prefetch.h>
15 #include <rte_meter.h>
16 
17 #include <rte_swx_table_selector.h>
18 #include <rte_swx_table_learner.h>
19 #include <rte_swx_pipeline.h>
20 #include <rte_swx_ctl.h>
21 
22 #ifndef TRACE_LEVEL
23 #define TRACE_LEVEL 0
24 #endif
25 
26 #if TRACE_LEVEL
27 #define TRACE(...) printf(__VA_ARGS__)
28 #else
29 #define TRACE(...)
30 #endif
31 
32 /*
33  * Environment.
34  */
35 #define ntoh64(x) rte_be_to_cpu_64(x)
36 #define hton64(x) rte_cpu_to_be_64(x)
37 
38 /*
39  * Struct.
40  */
41 struct field {
42 	char name[RTE_SWX_NAME_SIZE];
43 	uint32_t n_bits;
44 	uint32_t offset;
45 	int var_size;
46 };
47 
48 struct struct_type {
49 	TAILQ_ENTRY(struct_type) node;
50 	char name[RTE_SWX_NAME_SIZE];
51 	struct field *fields;
52 	uint32_t n_fields;
53 	uint32_t n_bits;
54 	uint32_t n_bits_min;
55 	int var_size;
56 };
57 
58 TAILQ_HEAD(struct_type_tailq, struct_type);
59 
60 /*
61  * Input port.
62  */
63 struct port_in_type {
64 	TAILQ_ENTRY(port_in_type) node;
65 	char name[RTE_SWX_NAME_SIZE];
66 	struct rte_swx_port_in_ops ops;
67 };
68 
69 TAILQ_HEAD(port_in_type_tailq, port_in_type);
70 
71 struct port_in {
72 	TAILQ_ENTRY(port_in) node;
73 	struct port_in_type *type;
74 	void *obj;
75 	uint32_t id;
76 };
77 
78 TAILQ_HEAD(port_in_tailq, port_in);
79 
80 struct port_in_runtime {
81 	rte_swx_port_in_pkt_rx_t pkt_rx;
82 	void *obj;
83 };
84 
85 /*
86  * Output port.
87  */
88 struct port_out_type {
89 	TAILQ_ENTRY(port_out_type) node;
90 	char name[RTE_SWX_NAME_SIZE];
91 	struct rte_swx_port_out_ops ops;
92 };
93 
94 TAILQ_HEAD(port_out_type_tailq, port_out_type);
95 
96 struct port_out {
97 	TAILQ_ENTRY(port_out) node;
98 	struct port_out_type *type;
99 	void *obj;
100 	uint32_t id;
101 };
102 
103 TAILQ_HEAD(port_out_tailq, port_out);
104 
105 struct port_out_runtime {
106 	rte_swx_port_out_pkt_tx_t pkt_tx;
107 	rte_swx_port_out_flush_t flush;
108 	void *obj;
109 };
110 
111 /*
112  * Extern object.
113  */
114 struct extern_type_member_func {
115 	TAILQ_ENTRY(extern_type_member_func) node;
116 	char name[RTE_SWX_NAME_SIZE];
117 	rte_swx_extern_type_member_func_t func;
118 	uint32_t id;
119 };
120 
121 TAILQ_HEAD(extern_type_member_func_tailq, extern_type_member_func);
122 
123 struct extern_type {
124 	TAILQ_ENTRY(extern_type) node;
125 	char name[RTE_SWX_NAME_SIZE];
126 	struct struct_type *mailbox_struct_type;
127 	rte_swx_extern_type_constructor_t constructor;
128 	rte_swx_extern_type_destructor_t destructor;
129 	struct extern_type_member_func_tailq funcs;
130 	uint32_t n_funcs;
131 };
132 
133 TAILQ_HEAD(extern_type_tailq, extern_type);
134 
135 struct extern_obj {
136 	TAILQ_ENTRY(extern_obj) node;
137 	char name[RTE_SWX_NAME_SIZE];
138 	struct extern_type *type;
139 	void *obj;
140 	uint32_t struct_id;
141 	uint32_t id;
142 };
143 
144 TAILQ_HEAD(extern_obj_tailq, extern_obj);
145 
146 #ifndef RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX
147 #define RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX 8
148 #endif
149 
150 struct extern_obj_runtime {
151 	void *obj;
152 	uint8_t *mailbox;
153 	rte_swx_extern_type_member_func_t funcs[RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX];
154 };
155 
156 /*
157  * Extern function.
158  */
159 struct extern_func {
160 	TAILQ_ENTRY(extern_func) node;
161 	char name[RTE_SWX_NAME_SIZE];
162 	struct struct_type *mailbox_struct_type;
163 	rte_swx_extern_func_t func;
164 	uint32_t struct_id;
165 	uint32_t id;
166 };
167 
168 TAILQ_HEAD(extern_func_tailq, extern_func);
169 
170 struct extern_func_runtime {
171 	uint8_t *mailbox;
172 	rte_swx_extern_func_t func;
173 };
174 
175 /*
176  * Header.
177  */
178 struct header {
179 	TAILQ_ENTRY(header) node;
180 	char name[RTE_SWX_NAME_SIZE];
181 	struct struct_type *st;
182 	uint32_t struct_id;
183 	uint32_t id;
184 };
185 
186 TAILQ_HEAD(header_tailq, header);
187 
188 struct header_runtime {
189 	uint8_t *ptr0;
190 	uint32_t n_bytes;
191 };
192 
193 struct header_out_runtime {
194 	uint8_t *ptr0;
195 	uint8_t *ptr;
196 	uint32_t n_bytes;
197 };
198 
199 /*
200  * Instruction.
201  */
202 
203 /* Packet headers are always in Network Byte Order (NBO), i.e. big endian.
204  * Packet meta-data fields are always assumed to be in Host Byte Order (HBO).
205  * Table entry fields can be in either NBO or HBO; they are assumed to be in HBO
206  * when transferred to packet meta-data and in NBO when transferred to packet
207  * headers.
208  */
209 
210 /* Notation conventions:
211  *    -Header field: H = h.header.field (dst/src)
212  *    -Meta-data field: M = m.field (dst/src)
213  *    -Extern object mailbox field: E = e.field (dst/src)
214  *    -Extern function mailbox field: F = f.field (dst/src)
215  *    -Table action data field: T = t.field (src only)
216  *    -Immediate value: I = 32-bit unsigned value (src only)
217  */
218 
219 enum instruction_type {
220 	/* rx m.port_in */
221 	INSTR_RX,
222 
223 	/* tx port_out
224 	 * port_out = MI
225 	 */
226 	INSTR_TX,   /* port_out = M */
227 	INSTR_TX_I, /* port_out = I */
228 
229 	/* extract h.header */
230 	INSTR_HDR_EXTRACT,
231 	INSTR_HDR_EXTRACT2,
232 	INSTR_HDR_EXTRACT3,
233 	INSTR_HDR_EXTRACT4,
234 	INSTR_HDR_EXTRACT5,
235 	INSTR_HDR_EXTRACT6,
236 	INSTR_HDR_EXTRACT7,
237 	INSTR_HDR_EXTRACT8,
238 
239 	/* extract h.header m.last_field_size */
240 	INSTR_HDR_EXTRACT_M,
241 
242 	/* lookahead h.header */
243 	INSTR_HDR_LOOKAHEAD,
244 
245 	/* emit h.header */
246 	INSTR_HDR_EMIT,
247 	INSTR_HDR_EMIT_TX,
248 	INSTR_HDR_EMIT2_TX,
249 	INSTR_HDR_EMIT3_TX,
250 	INSTR_HDR_EMIT4_TX,
251 	INSTR_HDR_EMIT5_TX,
252 	INSTR_HDR_EMIT6_TX,
253 	INSTR_HDR_EMIT7_TX,
254 	INSTR_HDR_EMIT8_TX,
255 
256 	/* validate h.header */
257 	INSTR_HDR_VALIDATE,
258 
259 	/* invalidate h.header */
260 	INSTR_HDR_INVALIDATE,
261 
262 	/* mov dst src
263 	 * dst = src
264 	 * dst = HMEF, src = HMEFTI
265 	 */
266 	INSTR_MOV,    /* dst = MEF, src = MEFT */
267 	INSTR_MOV_MH, /* dst = MEF, src = H */
268 	INSTR_MOV_HM, /* dst = H, src = MEFT */
269 	INSTR_MOV_HH, /* dst = H, src = H */
270 	INSTR_MOV_I,  /* dst = HMEF, src = I */
271 
272 	/* dma h.header t.field
273 	 * memcpy(h.header, t.field, sizeof(h.header))
274 	 */
275 	INSTR_DMA_HT,
276 	INSTR_DMA_HT2,
277 	INSTR_DMA_HT3,
278 	INSTR_DMA_HT4,
279 	INSTR_DMA_HT5,
280 	INSTR_DMA_HT6,
281 	INSTR_DMA_HT7,
282 	INSTR_DMA_HT8,
283 
284 	/* add dst src
285 	 * dst += src
286 	 * dst = HMEF, src = HMEFTI
287 	 */
288 	INSTR_ALU_ADD,    /* dst = MEF, src = MEF */
289 	INSTR_ALU_ADD_MH, /* dst = MEF, src = H */
290 	INSTR_ALU_ADD_HM, /* dst = H, src = MEF */
291 	INSTR_ALU_ADD_HH, /* dst = H, src = H */
292 	INSTR_ALU_ADD_MI, /* dst = MEF, src = I */
293 	INSTR_ALU_ADD_HI, /* dst = H, src = I */
294 
295 	/* sub dst src
296 	 * dst -= src
297 	 * dst = HMEF, src = HMEFTI
298 	 */
299 	INSTR_ALU_SUB,    /* dst = MEF, src = MEF */
300 	INSTR_ALU_SUB_MH, /* dst = MEF, src = H */
301 	INSTR_ALU_SUB_HM, /* dst = H, src = MEF */
302 	INSTR_ALU_SUB_HH, /* dst = H, src = H */
303 	INSTR_ALU_SUB_MI, /* dst = MEF, src = I */
304 	INSTR_ALU_SUB_HI, /* dst = H, src = I */
305 
306 	/* ckadd dst src
307 	 * dst = dst '+ src[0:1] '+ src[2:3] + ...
308 	 * dst = H, src = {H, h.header}
309 	 */
310 	INSTR_ALU_CKADD_FIELD,    /* src = H */
311 	INSTR_ALU_CKADD_STRUCT20, /* src = h.header, with sizeof(header) = 20 */
312 	INSTR_ALU_CKADD_STRUCT,   /* src = h.hdeader, with any sizeof(header) */
313 
314 	/* cksub dst src
315 	 * dst = dst '- src
316 	 * dst = H, src = H
317 	 */
318 	INSTR_ALU_CKSUB_FIELD,
319 
320 	/* and dst src
321 	 * dst &= src
322 	 * dst = HMEF, src = HMEFTI
323 	 */
324 	INSTR_ALU_AND,    /* dst = MEF, src = MEFT */
325 	INSTR_ALU_AND_MH, /* dst = MEF, src = H */
326 	INSTR_ALU_AND_HM, /* dst = H, src = MEFT */
327 	INSTR_ALU_AND_HH, /* dst = H, src = H */
328 	INSTR_ALU_AND_I,  /* dst = HMEF, src = I */
329 
330 	/* or dst src
331 	 * dst |= src
332 	 * dst = HMEF, src = HMEFTI
333 	 */
334 	INSTR_ALU_OR,    /* dst = MEF, src = MEFT */
335 	INSTR_ALU_OR_MH, /* dst = MEF, src = H */
336 	INSTR_ALU_OR_HM, /* dst = H, src = MEFT */
337 	INSTR_ALU_OR_HH, /* dst = H, src = H */
338 	INSTR_ALU_OR_I,  /* dst = HMEF, src = I */
339 
340 	/* xor dst src
341 	 * dst ^= src
342 	 * dst = HMEF, src = HMEFTI
343 	 */
344 	INSTR_ALU_XOR,    /* dst = MEF, src = MEFT */
345 	INSTR_ALU_XOR_MH, /* dst = MEF, src = H */
346 	INSTR_ALU_XOR_HM, /* dst = H, src = MEFT */
347 	INSTR_ALU_XOR_HH, /* dst = H, src = H */
348 	INSTR_ALU_XOR_I,  /* dst = HMEF, src = I */
349 
350 	/* shl dst src
351 	 * dst <<= src
352 	 * dst = HMEF, src = HMEFTI
353 	 */
354 	INSTR_ALU_SHL,    /* dst = MEF, src = MEF */
355 	INSTR_ALU_SHL_MH, /* dst = MEF, src = H */
356 	INSTR_ALU_SHL_HM, /* dst = H, src = MEF */
357 	INSTR_ALU_SHL_HH, /* dst = H, src = H */
358 	INSTR_ALU_SHL_MI, /* dst = MEF, src = I */
359 	INSTR_ALU_SHL_HI, /* dst = H, src = I */
360 
361 	/* shr dst src
362 	 * dst >>= src
363 	 * dst = HMEF, src = HMEFTI
364 	 */
365 	INSTR_ALU_SHR,    /* dst = MEF, src = MEF */
366 	INSTR_ALU_SHR_MH, /* dst = MEF, src = H */
367 	INSTR_ALU_SHR_HM, /* dst = H, src = MEF */
368 	INSTR_ALU_SHR_HH, /* dst = H, src = H */
369 	INSTR_ALU_SHR_MI, /* dst = MEF, src = I */
370 	INSTR_ALU_SHR_HI, /* dst = H, src = I */
371 
372 	/* regprefetch REGARRAY index
373 	 * prefetch REGARRAY[index]
374 	 * index = HMEFTI
375 	 */
376 	INSTR_REGPREFETCH_RH, /* index = H */
377 	INSTR_REGPREFETCH_RM, /* index = MEFT */
378 	INSTR_REGPREFETCH_RI, /* index = I */
379 
380 	/* regrd dst REGARRAY index
381 	 * dst = REGARRAY[index]
382 	 * dst = HMEF, index = HMEFTI
383 	 */
384 	INSTR_REGRD_HRH, /* dst = H, index = H */
385 	INSTR_REGRD_HRM, /* dst = H, index = MEFT */
386 	INSTR_REGRD_HRI, /* dst = H, index = I */
387 	INSTR_REGRD_MRH, /* dst = MEF, index = H */
388 	INSTR_REGRD_MRM, /* dst = MEF, index = MEFT */
389 	INSTR_REGRD_MRI, /* dst = MEF, index = I */
390 
391 	/* regwr REGARRAY index src
392 	 * REGARRAY[index] = src
393 	 * index = HMEFTI, src = HMEFTI
394 	 */
395 	INSTR_REGWR_RHH, /* index = H, src = H */
396 	INSTR_REGWR_RHM, /* index = H, src = MEFT */
397 	INSTR_REGWR_RHI, /* index = H, src = I */
398 	INSTR_REGWR_RMH, /* index = MEFT, src = H */
399 	INSTR_REGWR_RMM, /* index = MEFT, src = MEFT */
400 	INSTR_REGWR_RMI, /* index = MEFT, src = I */
401 	INSTR_REGWR_RIH, /* index = I, src = H */
402 	INSTR_REGWR_RIM, /* index = I, src = MEFT */
403 	INSTR_REGWR_RII, /* index = I, src = I */
404 
405 	/* regadd REGARRAY index src
406 	 * REGARRAY[index] += src
407 	 * index = HMEFTI, src = HMEFTI
408 	 */
409 	INSTR_REGADD_RHH, /* index = H, src = H */
410 	INSTR_REGADD_RHM, /* index = H, src = MEFT */
411 	INSTR_REGADD_RHI, /* index = H, src = I */
412 	INSTR_REGADD_RMH, /* index = MEFT, src = H */
413 	INSTR_REGADD_RMM, /* index = MEFT, src = MEFT */
414 	INSTR_REGADD_RMI, /* index = MEFT, src = I */
415 	INSTR_REGADD_RIH, /* index = I, src = H */
416 	INSTR_REGADD_RIM, /* index = I, src = MEFT */
417 	INSTR_REGADD_RII, /* index = I, src = I */
418 
419 	/* metprefetch METARRAY index
420 	 * prefetch METARRAY[index]
421 	 * index = HMEFTI
422 	 */
423 	INSTR_METPREFETCH_H, /* index = H */
424 	INSTR_METPREFETCH_M, /* index = MEFT */
425 	INSTR_METPREFETCH_I, /* index = I */
426 
427 	/* meter METARRAY index length color_in color_out
428 	 * color_out = meter(METARRAY[index], length, color_in)
429 	 * index = HMEFTI, length = HMEFT, color_in = MEFTI, color_out = MEF
430 	 */
431 	INSTR_METER_HHM, /* index = H, length = H, color_in = MEFT */
432 	INSTR_METER_HHI, /* index = H, length = H, color_in = I */
433 	INSTR_METER_HMM, /* index = H, length = MEFT, color_in = MEFT */
434 	INSTR_METER_HMI, /* index = H, length = MEFT, color_in = I */
435 	INSTR_METER_MHM, /* index = MEFT, length = H, color_in = MEFT */
436 	INSTR_METER_MHI, /* index = MEFT, length = H, color_in = I */
437 	INSTR_METER_MMM, /* index = MEFT, length = MEFT, color_in = MEFT */
438 	INSTR_METER_MMI, /* index = MEFT, length = MEFT, color_in = I */
439 	INSTR_METER_IHM, /* index = I, length = H, color_in = MEFT */
440 	INSTR_METER_IHI, /* index = I, length = H, color_in = I */
441 	INSTR_METER_IMM, /* index = I, length = MEFT, color_in = MEFT */
442 	INSTR_METER_IMI, /* index = I, length = MEFT, color_in = I */
443 
444 	/* table TABLE */
445 	INSTR_TABLE,
446 	INSTR_TABLE_AF,
447 	INSTR_SELECTOR,
448 	INSTR_LEARNER,
449 	INSTR_LEARNER_AF,
450 
451 	/* learn LEARNER ACTION_NAME [ m.action_first_arg ] */
452 	INSTR_LEARNER_LEARN,
453 
454 	/* forget */
455 	INSTR_LEARNER_FORGET,
456 
457 	/* extern e.obj.func */
458 	INSTR_EXTERN_OBJ,
459 
460 	/* extern f.func */
461 	INSTR_EXTERN_FUNC,
462 
463 	/* jmp LABEL
464 	 * Unconditional jump
465 	 */
466 	INSTR_JMP,
467 
468 	/* jmpv LABEL h.header
469 	 * Jump if header is valid
470 	 */
471 	INSTR_JMP_VALID,
472 
473 	/* jmpnv LABEL h.header
474 	 * Jump if header is invalid
475 	 */
476 	INSTR_JMP_INVALID,
477 
478 	/* jmph LABEL
479 	 * Jump if table lookup hit
480 	 */
481 	INSTR_JMP_HIT,
482 
483 	/* jmpnh LABEL
484 	 * Jump if table lookup miss
485 	 */
486 	INSTR_JMP_MISS,
487 
488 	/* jmpa LABEL ACTION
489 	 * Jump if action run
490 	 */
491 	INSTR_JMP_ACTION_HIT,
492 
493 	/* jmpna LABEL ACTION
494 	 * Jump if action not run
495 	 */
496 	INSTR_JMP_ACTION_MISS,
497 
498 	/* jmpeq LABEL a b
499 	 * Jump if a is equal to b
500 	 * a = HMEFT, b = HMEFTI
501 	 */
502 	INSTR_JMP_EQ,    /* a = MEFT, b = MEFT */
503 	INSTR_JMP_EQ_MH, /* a = MEFT, b = H */
504 	INSTR_JMP_EQ_HM, /* a = H, b = MEFT */
505 	INSTR_JMP_EQ_HH, /* a = H, b = H */
506 	INSTR_JMP_EQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
507 
508 	/* jmpneq LABEL a b
509 	 * Jump if a is not equal to b
510 	 * a = HMEFT, b = HMEFTI
511 	 */
512 	INSTR_JMP_NEQ,    /* a = MEFT, b = MEFT */
513 	INSTR_JMP_NEQ_MH, /* a = MEFT, b = H */
514 	INSTR_JMP_NEQ_HM, /* a = H, b = MEFT */
515 	INSTR_JMP_NEQ_HH, /* a = H, b = H */
516 	INSTR_JMP_NEQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
517 
518 	/* jmplt LABEL a b
519 	 * Jump if a is less than b
520 	 * a = HMEFT, b = HMEFTI
521 	 */
522 	INSTR_JMP_LT,    /* a = MEFT, b = MEFT */
523 	INSTR_JMP_LT_MH, /* a = MEFT, b = H */
524 	INSTR_JMP_LT_HM, /* a = H, b = MEFT */
525 	INSTR_JMP_LT_HH, /* a = H, b = H */
526 	INSTR_JMP_LT_MI, /* a = MEFT, b = I */
527 	INSTR_JMP_LT_HI, /* a = H, b = I */
528 
529 	/* jmpgt LABEL a b
530 	 * Jump if a is greater than b
531 	 * a = HMEFT, b = HMEFTI
532 	 */
533 	INSTR_JMP_GT,    /* a = MEFT, b = MEFT */
534 	INSTR_JMP_GT_MH, /* a = MEFT, b = H */
535 	INSTR_JMP_GT_HM, /* a = H, b = MEFT */
536 	INSTR_JMP_GT_HH, /* a = H, b = H */
537 	INSTR_JMP_GT_MI, /* a = MEFT, b = I */
538 	INSTR_JMP_GT_HI, /* a = H, b = I */
539 
540 	/* return
541 	 * Return from action
542 	 */
543 	INSTR_RETURN,
544 
545 	/* Start of custom instructions. */
546 	INSTR_CUSTOM_0,
547 };
548 
549 struct instr_operand {
550 	uint8_t struct_id;
551 	uint8_t n_bits;
552 	uint8_t offset;
553 	uint8_t pad;
554 };
555 
556 struct instr_io {
557 	struct {
558 		union {
559 			struct {
560 				uint8_t offset;
561 				uint8_t n_bits;
562 				uint8_t pad[2];
563 			};
564 
565 			uint32_t val;
566 		};
567 	} io;
568 
569 	struct {
570 		uint8_t header_id[8];
571 		uint8_t struct_id[8];
572 		uint8_t n_bytes[8];
573 	} hdr;
574 };
575 
576 struct instr_hdr_validity {
577 	uint8_t header_id;
578 };
579 
580 struct instr_table {
581 	uint8_t table_id;
582 };
583 
584 struct instr_learn {
585 	uint8_t action_id;
586 	uint8_t mf_offset;
587 };
588 
589 struct instr_extern_obj {
590 	uint8_t ext_obj_id;
591 	uint8_t func_id;
592 };
593 
594 struct instr_extern_func {
595 	uint8_t ext_func_id;
596 };
597 
598 struct instr_dst_src {
599 	struct instr_operand dst;
600 	union {
601 		struct instr_operand src;
602 		uint64_t src_val;
603 	};
604 };
605 
606 struct instr_regarray {
607 	uint8_t regarray_id;
608 	uint8_t pad[3];
609 
610 	union {
611 		struct instr_operand idx;
612 		uint32_t idx_val;
613 	};
614 
615 	union {
616 		struct instr_operand dstsrc;
617 		uint64_t dstsrc_val;
618 	};
619 };
620 
621 struct instr_meter {
622 	uint8_t metarray_id;
623 	uint8_t pad[3];
624 
625 	union {
626 		struct instr_operand idx;
627 		uint32_t idx_val;
628 	};
629 
630 	struct instr_operand length;
631 
632 	union {
633 		struct instr_operand color_in;
634 		uint32_t color_in_val;
635 	};
636 
637 	struct instr_operand color_out;
638 };
639 
640 struct instr_dma {
641 	struct {
642 		uint8_t header_id[8];
643 		uint8_t struct_id[8];
644 	} dst;
645 
646 	struct {
647 		uint8_t offset[8];
648 	} src;
649 
650 	uint16_t n_bytes[8];
651 };
652 
653 struct instr_jmp {
654 	struct instruction *ip;
655 
656 	union {
657 		struct instr_operand a;
658 		uint8_t header_id;
659 		uint8_t action_id;
660 	};
661 
662 	union {
663 		struct instr_operand b;
664 		uint64_t b_val;
665 	};
666 };
667 
668 struct instruction {
669 	enum instruction_type type;
670 	union {
671 		struct instr_io io;
672 		struct instr_hdr_validity valid;
673 		struct instr_dst_src mov;
674 		struct instr_regarray regarray;
675 		struct instr_meter meter;
676 		struct instr_dma dma;
677 		struct instr_dst_src alu;
678 		struct instr_table table;
679 		struct instr_learn learn;
680 		struct instr_extern_obj ext_obj;
681 		struct instr_extern_func ext_func;
682 		struct instr_jmp jmp;
683 	};
684 };
685 
686 struct instruction_data {
687 	char label[RTE_SWX_NAME_SIZE];
688 	char jmp_label[RTE_SWX_NAME_SIZE];
689 	uint32_t n_users; /* user = jmp instruction to this instruction. */
690 	int invalid;
691 };
692 
693 typedef void (*instr_exec_t)(struct rte_swx_pipeline *);
694 
695 /*
696  * Action.
697  */
698 typedef void
699 (*action_func_t)(struct rte_swx_pipeline *p);
700 
701 struct action {
702 	TAILQ_ENTRY(action) node;
703 	char name[RTE_SWX_NAME_SIZE];
704 	struct struct_type *st;
705 	int *args_endianness; /* 0 = Host Byte Order (HBO); 1 = Network Byte Order (NBO). */
706 	struct instruction *instructions;
707 	struct instruction_data *instruction_data;
708 	uint32_t n_instructions;
709 	uint32_t id;
710 };
711 
712 TAILQ_HEAD(action_tailq, action);
713 
714 /*
715  * Table.
716  */
717 struct table_type {
718 	TAILQ_ENTRY(table_type) node;
719 	char name[RTE_SWX_NAME_SIZE];
720 	enum rte_swx_table_match_type match_type;
721 	struct rte_swx_table_ops ops;
722 };
723 
724 TAILQ_HEAD(table_type_tailq, table_type);
725 
726 struct match_field {
727 	enum rte_swx_table_match_type match_type;
728 	struct field *field;
729 };
730 
731 struct table {
732 	TAILQ_ENTRY(table) node;
733 	char name[RTE_SWX_NAME_SIZE];
734 	char args[RTE_SWX_NAME_SIZE];
735 	struct table_type *type; /* NULL when n_fields == 0. */
736 
737 	/* Match. */
738 	struct match_field *fields;
739 	uint32_t n_fields;
740 	struct header *header; /* Only valid when n_fields > 0. */
741 
742 	/* Action. */
743 	struct action **actions;
744 	struct action *default_action;
745 	uint8_t *default_action_data;
746 	uint32_t n_actions;
747 	int default_action_is_const;
748 	uint32_t action_data_size_max;
749 
750 	uint32_t size;
751 	uint32_t id;
752 };
753 
754 TAILQ_HEAD(table_tailq, table);
755 
756 struct table_runtime {
757 	rte_swx_table_lookup_t func;
758 	void *mailbox;
759 	uint8_t **key;
760 };
761 
762 struct table_statistics {
763 	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
764 	uint64_t *n_pkts_action;
765 };
766 
767 /*
768  * Selector.
769  */
770 struct selector {
771 	TAILQ_ENTRY(selector) node;
772 	char name[RTE_SWX_NAME_SIZE];
773 
774 	struct field *group_id_field;
775 	struct field **selector_fields;
776 	uint32_t n_selector_fields;
777 	struct header *selector_header;
778 	struct field *member_id_field;
779 
780 	uint32_t n_groups_max;
781 	uint32_t n_members_per_group_max;
782 
783 	uint32_t id;
784 };
785 
786 TAILQ_HEAD(selector_tailq, selector);
787 
788 struct selector_runtime {
789 	void *mailbox;
790 	uint8_t **group_id_buffer;
791 	uint8_t **selector_buffer;
792 	uint8_t **member_id_buffer;
793 };
794 
795 struct selector_statistics {
796 	uint64_t n_pkts;
797 };
798 
799 /*
800  * Learner table.
801  */
802 struct learner {
803 	TAILQ_ENTRY(learner) node;
804 	char name[RTE_SWX_NAME_SIZE];
805 
806 	/* Match. */
807 	struct field **fields;
808 	uint32_t n_fields;
809 	struct header *header;
810 
811 	/* Action. */
812 	struct action **actions;
813 	struct action *default_action;
814 	uint8_t *default_action_data;
815 	uint32_t n_actions;
816 	int default_action_is_const;
817 	uint32_t action_data_size_max;
818 
819 	uint32_t size;
820 	uint32_t timeout;
821 	uint32_t id;
822 };
823 
824 TAILQ_HEAD(learner_tailq, learner);
825 
826 struct learner_runtime {
827 	void *mailbox;
828 	uint8_t **key;
829 };
830 
831 struct learner_statistics {
832 	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
833 	uint64_t n_pkts_learn[2]; /* 0 = Learn OK, 1 = Learn error. */
834 	uint64_t n_pkts_forget;
835 	uint64_t *n_pkts_action;
836 };
837 
838 /*
839  * Register array.
840  */
841 struct regarray {
842 	TAILQ_ENTRY(regarray) node;
843 	char name[RTE_SWX_NAME_SIZE];
844 	uint64_t init_val;
845 	uint32_t size;
846 	uint32_t id;
847 };
848 
849 TAILQ_HEAD(regarray_tailq, regarray);
850 
851 struct regarray_runtime {
852 	uint64_t *regarray;
853 	uint32_t size_mask;
854 };
855 
856 /*
857  * Meter array.
858  */
859 struct meter_profile {
860 	TAILQ_ENTRY(meter_profile) node;
861 	char name[RTE_SWX_NAME_SIZE];
862 	struct rte_meter_trtcm_params params;
863 	struct rte_meter_trtcm_profile profile;
864 	uint32_t n_users;
865 };
866 
867 TAILQ_HEAD(meter_profile_tailq, meter_profile);
868 
869 struct metarray {
870 	TAILQ_ENTRY(metarray) node;
871 	char name[RTE_SWX_NAME_SIZE];
872 	uint32_t size;
873 	uint32_t id;
874 };
875 
876 TAILQ_HEAD(metarray_tailq, metarray);
877 
878 struct meter {
879 	struct rte_meter_trtcm m;
880 	struct meter_profile *profile;
881 	enum rte_color color_mask;
882 	uint8_t pad[20];
883 
884 	uint64_t n_pkts[RTE_COLORS];
885 	uint64_t n_bytes[RTE_COLORS];
886 };
887 
888 struct metarray_runtime {
889 	struct meter *metarray;
890 	uint32_t size_mask;
891 };
892 
893 /*
894  * Pipeline.
895  */
896 struct thread {
897 	/* Packet. */
898 	struct rte_swx_pkt pkt;
899 	uint8_t *ptr;
900 
901 	/* Structures. */
902 	uint8_t **structs;
903 
904 	/* Packet headers. */
905 	struct header_runtime *headers; /* Extracted or generated headers. */
906 	struct header_out_runtime *headers_out; /* Emitted headers. */
907 	uint8_t *header_storage;
908 	uint8_t *header_out_storage;
909 	uint64_t valid_headers;
910 	uint32_t n_headers_out;
911 
912 	/* Packet meta-data. */
913 	uint8_t *metadata;
914 
915 	/* Tables. */
916 	struct table_runtime *tables;
917 	struct selector_runtime *selectors;
918 	struct learner_runtime *learners;
919 	struct rte_swx_table_state *table_state;
920 	uint64_t action_id;
921 	int hit; /* 0 = Miss, 1 = Hit. */
922 	uint32_t learner_id;
923 	uint64_t time;
924 
925 	/* Extern objects and functions. */
926 	struct extern_obj_runtime *extern_objs;
927 	struct extern_func_runtime *extern_funcs;
928 
929 	/* Instructions. */
930 	struct instruction *ip;
931 	struct instruction *ret;
932 };
933 
934 #define MASK64_BIT_GET(mask, pos) ((mask) & (1LLU << (pos)))
935 #define MASK64_BIT_SET(mask, pos) ((mask) | (1LLU << (pos)))
936 #define MASK64_BIT_CLR(mask, pos) ((mask) & ~(1LLU << (pos)))
937 
938 #define HEADER_VALID(thread, header_id) \
939 	MASK64_BIT_GET((thread)->valid_headers, header_id)
940 
941 static inline uint64_t
942 instr_operand_hbo(struct thread *t, const struct instr_operand *x)
943 {
944 	uint8_t *x_struct = t->structs[x->struct_id];
945 	uint64_t *x64_ptr = (uint64_t *)&x_struct[x->offset];
946 	uint64_t x64 = *x64_ptr;
947 	uint64_t x64_mask = UINT64_MAX >> (64 - x->n_bits);
948 
949 	return x64 & x64_mask;
950 }
951 
952 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
953 
954 static inline uint64_t
955 instr_operand_nbo(struct thread *t, const struct instr_operand *x)
956 {
957 	uint8_t *x_struct = t->structs[x->struct_id];
958 	uint64_t *x64_ptr = (uint64_t *)&x_struct[x->offset];
959 	uint64_t x64 = *x64_ptr;
960 
961 	return ntoh64(x64) >> (64 - x->n_bits);
962 }
963 
964 #else
965 
966 #define instr_operand_nbo instr_operand_hbo
967 
968 #endif
969 
970 #define ALU(thread, ip, operator)  \
971 {                                                                              \
972 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
973 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
974 	uint64_t dst64 = *dst64_ptr;                                           \
975 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
976 	uint64_t dst = dst64 & dst64_mask;                                     \
977 									       \
978 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
979 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
980 	uint64_t src64 = *src64_ptr;                                           \
981 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
982 	uint64_t src = src64 & src64_mask;                                     \
983 									       \
984 	uint64_t result = dst operator src;                                    \
985 									       \
986 	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
987 }
988 
989 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
990 
991 #define ALU_MH(thread, ip, operator)  \
992 {                                                                              \
993 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
994 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
995 	uint64_t dst64 = *dst64_ptr;                                           \
996 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
997 	uint64_t dst = dst64 & dst64_mask;                                     \
998 									       \
999 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1000 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1001 	uint64_t src64 = *src64_ptr;                                           \
1002 	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
1003 									       \
1004 	uint64_t result = dst operator src;                                    \
1005 									       \
1006 	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
1007 }
1008 
1009 #define ALU_HM(thread, ip, operator)  \
1010 {                                                                              \
1011 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1012 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1013 	uint64_t dst64 = *dst64_ptr;                                           \
1014 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1015 	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
1016 									       \
1017 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1018 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1019 	uint64_t src64 = *src64_ptr;                                           \
1020 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
1021 	uint64_t src = src64 & src64_mask;                                     \
1022 									       \
1023 	uint64_t result = dst operator src;                                    \
1024 	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
1025 									       \
1026 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
1027 }
1028 
1029 #define ALU_HM_FAST(thread, ip, operator)  \
1030 {                                                                                 \
1031 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];         \
1032 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];      \
1033 	uint64_t dst64 = *dst64_ptr;                                              \
1034 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);          \
1035 	uint64_t dst = dst64 & dst64_mask;                                        \
1036 										  \
1037 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];         \
1038 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];      \
1039 	uint64_t src64 = *src64_ptr;                                              \
1040 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);          \
1041 	uint64_t src = hton64(src64 & src64_mask) >> (64 - (ip)->alu.dst.n_bits); \
1042 										  \
1043 	uint64_t result = dst operator src;                                       \
1044 										  \
1045 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                              \
1046 }
1047 
1048 #define ALU_HH(thread, ip, operator)  \
1049 {                                                                              \
1050 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1051 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1052 	uint64_t dst64 = *dst64_ptr;                                           \
1053 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1054 	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
1055 									       \
1056 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1057 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1058 	uint64_t src64 = *src64_ptr;                                           \
1059 	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
1060 									       \
1061 	uint64_t result = dst operator src;                                    \
1062 	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
1063 									       \
1064 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
1065 }
1066 
1067 #define ALU_HH_FAST(thread, ip, operator)  \
1068 {                                                                                             \
1069 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];                     \
1070 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];                  \
1071 	uint64_t dst64 = *dst64_ptr;                                                          \
1072 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);                      \
1073 	uint64_t dst = dst64 & dst64_mask;                                                    \
1074 											      \
1075 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];                     \
1076 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];                  \
1077 	uint64_t src64 = *src64_ptr;                                                          \
1078 	uint64_t src = (src64 << (64 - (ip)->alu.src.n_bits)) >> (64 - (ip)->alu.dst.n_bits); \
1079 											      \
1080 	uint64_t result = dst operator src;                                                   \
1081 											      \
1082 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                                          \
1083 }
1084 
1085 #else
1086 
1087 #define ALU_MH ALU
1088 #define ALU_HM ALU
1089 #define ALU_HM_FAST ALU
1090 #define ALU_HH ALU
1091 #define ALU_HH_FAST ALU
1092 
1093 #endif
1094 
1095 #define ALU_I(thread, ip, operator)  \
1096 {                                                                              \
1097 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1098 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1099 	uint64_t dst64 = *dst64_ptr;                                           \
1100 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1101 	uint64_t dst = dst64 & dst64_mask;                                     \
1102 									       \
1103 	uint64_t src = (ip)->alu.src_val;                                      \
1104 									       \
1105 	uint64_t result = dst operator src;                                    \
1106 									       \
1107 	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
1108 }
1109 
1110 #define ALU_MI ALU_I
1111 
1112 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1113 
1114 #define ALU_HI(thread, ip, operator)  \
1115 {                                                                              \
1116 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1117 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1118 	uint64_t dst64 = *dst64_ptr;                                           \
1119 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1120 	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
1121 									       \
1122 	uint64_t src = (ip)->alu.src_val;                                      \
1123 									       \
1124 	uint64_t result = dst operator src;                                    \
1125 	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
1126 									       \
1127 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
1128 }
1129 
1130 #else
1131 
1132 #define ALU_HI ALU_I
1133 
1134 #endif
1135 
1136 #define MOV(thread, ip)  \
1137 {                                                                              \
1138 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1139 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1140 	uint64_t dst64 = *dst64_ptr;                                           \
1141 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1142 									       \
1143 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1144 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1145 	uint64_t src64 = *src64_ptr;                                           \
1146 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
1147 	uint64_t src = src64 & src64_mask;                                     \
1148 									       \
1149 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
1150 }
1151 
1152 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1153 
1154 #define MOV_MH(thread, ip)  \
1155 {                                                                              \
1156 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1157 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1158 	uint64_t dst64 = *dst64_ptr;                                           \
1159 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1160 									       \
1161 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1162 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1163 	uint64_t src64 = *src64_ptr;                                           \
1164 	uint64_t src = ntoh64(src64) >> (64 - (ip)->mov.src.n_bits);           \
1165 									       \
1166 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
1167 }
1168 
1169 #define MOV_HM(thread, ip)  \
1170 {                                                                              \
1171 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1172 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1173 	uint64_t dst64 = *dst64_ptr;                                           \
1174 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1175 									       \
1176 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1177 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1178 	uint64_t src64 = *src64_ptr;                                           \
1179 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
1180 	uint64_t src = src64 & src64_mask;                                     \
1181 									       \
1182 	src = hton64(src) >> (64 - (ip)->mov.dst.n_bits);                      \
1183 	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
1184 }
1185 
1186 #define MOV_HH(thread, ip)  \
1187 {                                                                              \
1188 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1189 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1190 	uint64_t dst64 = *dst64_ptr;                                           \
1191 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1192 									       \
1193 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1194 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1195 	uint64_t src64 = *src64_ptr;                                           \
1196 									       \
1197 	uint64_t src = src64 << (64 - (ip)->mov.src.n_bits);                   \
1198 	src = src >> (64 - (ip)->mov.dst.n_bits);                              \
1199 	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
1200 }
1201 
1202 #else
1203 
1204 #define MOV_MH MOV
1205 #define MOV_HM MOV
1206 #define MOV_HH MOV
1207 
1208 #endif
1209 
1210 #define MOV_I(thread, ip)  \
1211 {                                                                              \
1212 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1213 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1214 	uint64_t dst64 = *dst64_ptr;                                           \
1215 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1216 									       \
1217 	uint64_t src = (ip)->mov.src_val;                                      \
1218 									       \
1219 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
1220 }
1221 
1222 #define JMP_CMP(thread, ip, operator)  \
1223 {                                                                              \
1224 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1225 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1226 	uint64_t a64 = *a64_ptr;                                               \
1227 	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
1228 	uint64_t a = a64 & a64_mask;                                           \
1229 									       \
1230 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1231 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1232 	uint64_t b64 = *b64_ptr;                                               \
1233 	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
1234 	uint64_t b = b64 & b64_mask;                                           \
1235 									       \
1236 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1237 }
1238 
1239 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1240 
1241 #define JMP_CMP_MH(thread, ip, operator)  \
1242 {                                                                              \
1243 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1244 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1245 	uint64_t a64 = *a64_ptr;                                               \
1246 	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
1247 	uint64_t a = a64 & a64_mask;                                           \
1248 									       \
1249 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1250 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1251 	uint64_t b64 = *b64_ptr;                                               \
1252 	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
1253 									       \
1254 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1255 }
1256 
1257 #define JMP_CMP_HM(thread, ip, operator)  \
1258 {                                                                              \
1259 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1260 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1261 	uint64_t a64 = *a64_ptr;                                               \
1262 	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
1263 									       \
1264 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1265 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1266 	uint64_t b64 = *b64_ptr;                                               \
1267 	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
1268 	uint64_t b = b64 & b64_mask;                                           \
1269 									       \
1270 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1271 }
1272 
1273 #define JMP_CMP_HH(thread, ip, operator)  \
1274 {                                                                              \
1275 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1276 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1277 	uint64_t a64 = *a64_ptr;                                               \
1278 	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
1279 									       \
1280 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1281 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1282 	uint64_t b64 = *b64_ptr;                                               \
1283 	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
1284 									       \
1285 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1286 }
1287 
1288 #define JMP_CMP_HH_FAST(thread, ip, operator)  \
1289 {                                                                              \
1290 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1291 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1292 	uint64_t a64 = *a64_ptr;                                               \
1293 	uint64_t a = a64 << (64 - (ip)->jmp.a.n_bits);                         \
1294 									       \
1295 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1296 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1297 	uint64_t b64 = *b64_ptr;                                               \
1298 	uint64_t b = b64 << (64 - (ip)->jmp.b.n_bits);                         \
1299 									       \
1300 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1301 }
1302 
1303 #else
1304 
1305 #define JMP_CMP_MH JMP_CMP
1306 #define JMP_CMP_HM JMP_CMP
1307 #define JMP_CMP_HH JMP_CMP
1308 #define JMP_CMP_HH_FAST JMP_CMP
1309 
1310 #endif
1311 
1312 #define JMP_CMP_I(thread, ip, operator)  \
1313 {                                                                              \
1314 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1315 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1316 	uint64_t a64 = *a64_ptr;                                               \
1317 	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
1318 	uint64_t a = a64 & a64_mask;                                           \
1319 									       \
1320 	uint64_t b = (ip)->jmp.b_val;                                          \
1321 									       \
1322 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1323 }
1324 
1325 #define JMP_CMP_MI JMP_CMP_I
1326 
1327 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1328 
1329 #define JMP_CMP_HI(thread, ip, operator)  \
1330 {                                                                              \
1331 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1332 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1333 	uint64_t a64 = *a64_ptr;                                               \
1334 	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
1335 									       \
1336 	uint64_t b = (ip)->jmp.b_val;                                          \
1337 									       \
1338 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1339 }
1340 
1341 #else
1342 
1343 #define JMP_CMP_HI JMP_CMP_I
1344 
1345 #endif
1346 
1347 #define METADATA_READ(thread, offset, n_bits)                                  \
1348 ({                                                                             \
1349 	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
1350 	uint64_t m64 = *m64_ptr;                                               \
1351 	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
1352 	(m64 & m64_mask);                                                      \
1353 })
1354 
1355 #define METADATA_WRITE(thread, offset, n_bits, value)                          \
1356 {                                                                              \
1357 	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
1358 	uint64_t m64 = *m64_ptr;                                               \
1359 	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
1360 									       \
1361 	uint64_t m_new = value;                                                \
1362 									       \
1363 	*m64_ptr = (m64 & ~m64_mask) | (m_new & m64_mask);                     \
1364 }
1365 
1366 #ifndef RTE_SWX_PIPELINE_THREADS_MAX
1367 #define RTE_SWX_PIPELINE_THREADS_MAX 16
1368 #endif
1369 
1370 #ifndef RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX
1371 #define RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX 256
1372 #endif
1373 
1374 struct rte_swx_pipeline {
1375 	struct struct_type_tailq struct_types;
1376 	struct port_in_type_tailq port_in_types;
1377 	struct port_in_tailq ports_in;
1378 	struct port_out_type_tailq port_out_types;
1379 	struct port_out_tailq ports_out;
1380 	struct extern_type_tailq extern_types;
1381 	struct extern_obj_tailq extern_objs;
1382 	struct extern_func_tailq extern_funcs;
1383 	struct header_tailq headers;
1384 	struct struct_type *metadata_st;
1385 	uint32_t metadata_struct_id;
1386 	struct action_tailq actions;
1387 	struct table_type_tailq table_types;
1388 	struct table_tailq tables;
1389 	struct selector_tailq selectors;
1390 	struct learner_tailq learners;
1391 	struct regarray_tailq regarrays;
1392 	struct meter_profile_tailq meter_profiles;
1393 	struct metarray_tailq metarrays;
1394 
1395 	struct port_in_runtime *in;
1396 	struct port_out_runtime *out;
1397 	struct instruction **action_instructions;
1398 	action_func_t *action_funcs;
1399 	struct rte_swx_table_state *table_state;
1400 	struct table_statistics *table_stats;
1401 	struct selector_statistics *selector_stats;
1402 	struct learner_statistics *learner_stats;
1403 	struct regarray_runtime *regarray_runtime;
1404 	struct metarray_runtime *metarray_runtime;
1405 	struct instruction *instructions;
1406 	struct instruction_data *instruction_data;
1407 	instr_exec_t *instruction_table;
1408 	struct thread threads[RTE_SWX_PIPELINE_THREADS_MAX];
1409 	void *lib;
1410 
1411 	uint32_t n_structs;
1412 	uint32_t n_ports_in;
1413 	uint32_t n_ports_out;
1414 	uint32_t n_extern_objs;
1415 	uint32_t n_extern_funcs;
1416 	uint32_t n_actions;
1417 	uint32_t n_tables;
1418 	uint32_t n_selectors;
1419 	uint32_t n_learners;
1420 	uint32_t n_regarrays;
1421 	uint32_t n_metarrays;
1422 	uint32_t n_headers;
1423 	uint32_t thread_id;
1424 	uint32_t port_id;
1425 	uint32_t n_instructions;
1426 	int build_done;
1427 	int numa_node;
1428 };
1429 
1430 /*
1431  * Instruction.
1432  */
1433 static inline void
1434 pipeline_port_inc(struct rte_swx_pipeline *p)
1435 {
1436 	p->port_id = (p->port_id + 1) & (p->n_ports_in - 1);
1437 }
1438 
1439 static inline void
1440 thread_ip_reset(struct rte_swx_pipeline *p, struct thread *t)
1441 {
1442 	t->ip = p->instructions;
1443 }
1444 
1445 static inline void
1446 thread_ip_set(struct thread *t, struct instruction *ip)
1447 {
1448 	t->ip = ip;
1449 }
1450 
1451 static inline void
1452 thread_ip_action_call(struct rte_swx_pipeline *p,
1453 		      struct thread *t,
1454 		      uint32_t action_id)
1455 {
1456 	t->ret = t->ip + 1;
1457 	t->ip = p->action_instructions[action_id];
1458 }
1459 
1460 static inline void
1461 thread_ip_inc(struct rte_swx_pipeline *p);
1462 
1463 static inline void
1464 thread_ip_inc(struct rte_swx_pipeline *p)
1465 {
1466 	struct thread *t = &p->threads[p->thread_id];
1467 
1468 	t->ip++;
1469 }
1470 
1471 static inline void
1472 thread_ip_inc_cond(struct thread *t, int cond)
1473 {
1474 	t->ip += cond;
1475 }
1476 
1477 static inline void
1478 thread_yield(struct rte_swx_pipeline *p)
1479 {
1480 	p->thread_id = (p->thread_id + 1) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
1481 }
1482 
1483 static inline void
1484 thread_yield_cond(struct rte_swx_pipeline *p, int cond)
1485 {
1486 	p->thread_id = (p->thread_id + cond) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
1487 }
1488 
1489 /*
1490  * rx.
1491  */
1492 static inline int
1493 __instr_rx_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
1494 {
1495 	struct port_in_runtime *port = &p->in[p->port_id];
1496 	struct rte_swx_pkt *pkt = &t->pkt;
1497 	int pkt_received;
1498 
1499 	/* Packet. */
1500 	pkt_received = port->pkt_rx(port->obj, pkt);
1501 	t->ptr = &pkt->pkt[pkt->offset];
1502 	rte_prefetch0(t->ptr);
1503 
1504 	TRACE("[Thread %2u] rx %s from port %u\n",
1505 	      p->thread_id,
1506 	      pkt_received ? "1 pkt" : "0 pkts",
1507 	      p->port_id);
1508 
1509 	/* Headers. */
1510 	t->valid_headers = 0;
1511 	t->n_headers_out = 0;
1512 
1513 	/* Meta-data. */
1514 	METADATA_WRITE(t, ip->io.io.offset, ip->io.io.n_bits, p->port_id);
1515 
1516 	/* Tables. */
1517 	t->table_state = p->table_state;
1518 
1519 	/* Thread. */
1520 	pipeline_port_inc(p);
1521 
1522 	return pkt_received;
1523 }
1524 
1525 static inline void
1526 instr_rx_exec(struct rte_swx_pipeline *p)
1527 {
1528 	struct thread *t = &p->threads[p->thread_id];
1529 	struct instruction *ip = t->ip;
1530 	int pkt_received;
1531 
1532 	/* Packet. */
1533 	pkt_received = __instr_rx_exec(p, t, ip);
1534 
1535 	/* Thread. */
1536 	thread_ip_inc_cond(t, pkt_received);
1537 	thread_yield(p);
1538 }
1539 
1540 /*
1541  * tx.
1542  */
1543 static inline void
1544 emit_handler(struct thread *t)
1545 {
1546 	struct header_out_runtime *h0 = &t->headers_out[0];
1547 	struct header_out_runtime *h1 = &t->headers_out[1];
1548 	uint32_t offset = 0, i;
1549 
1550 	/* No header change or header decapsulation. */
1551 	if ((t->n_headers_out == 1) &&
1552 	    (h0->ptr + h0->n_bytes == t->ptr)) {
1553 		TRACE("Emit handler: no header change or header decap.\n");
1554 
1555 		t->pkt.offset -= h0->n_bytes;
1556 		t->pkt.length += h0->n_bytes;
1557 
1558 		return;
1559 	}
1560 
1561 	/* Header encapsulation (optionally, with prior header decasulation). */
1562 	if ((t->n_headers_out == 2) &&
1563 	    (h1->ptr + h1->n_bytes == t->ptr) &&
1564 	    (h0->ptr == h0->ptr0)) {
1565 		uint32_t offset;
1566 
1567 		TRACE("Emit handler: header encapsulation.\n");
1568 
1569 		offset = h0->n_bytes + h1->n_bytes;
1570 		memcpy(t->ptr - offset, h0->ptr, h0->n_bytes);
1571 		t->pkt.offset -= offset;
1572 		t->pkt.length += offset;
1573 
1574 		return;
1575 	}
1576 
1577 	/* For any other case. */
1578 	TRACE("Emit handler: complex case.\n");
1579 
1580 	for (i = 0; i < t->n_headers_out; i++) {
1581 		struct header_out_runtime *h = &t->headers_out[i];
1582 
1583 		memcpy(&t->header_out_storage[offset], h->ptr, h->n_bytes);
1584 		offset += h->n_bytes;
1585 	}
1586 
1587 	if (offset) {
1588 		memcpy(t->ptr - offset, t->header_out_storage, offset);
1589 		t->pkt.offset -= offset;
1590 		t->pkt.length += offset;
1591 	}
1592 }
1593 
1594 static inline void
1595 __instr_tx_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
1596 {
1597 	uint64_t port_id = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
1598 	struct port_out_runtime *port = &p->out[port_id];
1599 	struct rte_swx_pkt *pkt = &t->pkt;
1600 
1601 	TRACE("[Thread %2u]: tx 1 pkt to port %u\n",
1602 	      p->thread_id,
1603 	      (uint32_t)port_id);
1604 
1605 	/* Headers. */
1606 	emit_handler(t);
1607 
1608 	/* Packet. */
1609 	port->pkt_tx(port->obj, pkt);
1610 }
1611 
1612 static inline void
1613 __instr_tx_i_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
1614 {
1615 	uint64_t port_id = ip->io.io.val;
1616 	struct port_out_runtime *port = &p->out[port_id];
1617 	struct rte_swx_pkt *pkt = &t->pkt;
1618 
1619 	TRACE("[Thread %2u]: tx (i) 1 pkt to port %u\n",
1620 	      p->thread_id,
1621 	      (uint32_t)port_id);
1622 
1623 	/* Headers. */
1624 	emit_handler(t);
1625 
1626 	/* Packet. */
1627 	port->pkt_tx(port->obj, pkt);
1628 }
1629 
1630 /*
1631  * extract.
1632  */
1633 static inline void
1634 __instr_hdr_extract_many_exec(struct rte_swx_pipeline *p __rte_unused,
1635 			      struct thread *t,
1636 			      const struct instruction *ip,
1637 			      uint32_t n_extract)
1638 {
1639 	uint64_t valid_headers = t->valid_headers;
1640 	uint8_t *ptr = t->ptr;
1641 	uint32_t offset = t->pkt.offset;
1642 	uint32_t length = t->pkt.length;
1643 	uint32_t i;
1644 
1645 	for (i = 0; i < n_extract; i++) {
1646 		uint32_t header_id = ip->io.hdr.header_id[i];
1647 		uint32_t struct_id = ip->io.hdr.struct_id[i];
1648 		uint32_t n_bytes = ip->io.hdr.n_bytes[i];
1649 
1650 		TRACE("[Thread %2u]: extract header %u (%u bytes)\n",
1651 		      p->thread_id,
1652 		      header_id,
1653 		      n_bytes);
1654 
1655 		/* Headers. */
1656 		t->structs[struct_id] = ptr;
1657 		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
1658 
1659 		/* Packet. */
1660 		offset += n_bytes;
1661 		length -= n_bytes;
1662 		ptr += n_bytes;
1663 	}
1664 
1665 	/* Headers. */
1666 	t->valid_headers = valid_headers;
1667 
1668 	/* Packet. */
1669 	t->pkt.offset = offset;
1670 	t->pkt.length = length;
1671 	t->ptr = ptr;
1672 }
1673 
1674 static inline void
1675 __instr_hdr_extract_exec(struct rte_swx_pipeline *p,
1676 			 struct thread *t,
1677 			 const struct instruction *ip)
1678 {
1679 	__instr_hdr_extract_many_exec(p, t, ip, 1);
1680 }
1681 
1682 static inline void
1683 __instr_hdr_extract2_exec(struct rte_swx_pipeline *p,
1684 			  struct thread *t,
1685 			  const struct instruction *ip)
1686 {
1687 	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
1688 
1689 	__instr_hdr_extract_many_exec(p, t, ip, 2);
1690 }
1691 
1692 static inline void
1693 __instr_hdr_extract3_exec(struct rte_swx_pipeline *p,
1694 			  struct thread *t,
1695 			  const struct instruction *ip)
1696 {
1697 	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
1698 
1699 	__instr_hdr_extract_many_exec(p, t, ip, 3);
1700 }
1701 
1702 static inline void
1703 __instr_hdr_extract4_exec(struct rte_swx_pipeline *p,
1704 			  struct thread *t,
1705 			  const struct instruction *ip)
1706 {
1707 	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
1708 
1709 	__instr_hdr_extract_many_exec(p, t, ip, 4);
1710 }
1711 
1712 static inline void
1713 __instr_hdr_extract5_exec(struct rte_swx_pipeline *p,
1714 			  struct thread *t,
1715 			  const struct instruction *ip)
1716 {
1717 	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
1718 
1719 	__instr_hdr_extract_many_exec(p, t, ip, 5);
1720 }
1721 
1722 static inline void
1723 __instr_hdr_extract6_exec(struct rte_swx_pipeline *p,
1724 			  struct thread *t,
1725 			  const struct instruction *ip)
1726 {
1727 	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
1728 
1729 	__instr_hdr_extract_many_exec(p, t, ip, 6);
1730 }
1731 
1732 static inline void
1733 __instr_hdr_extract7_exec(struct rte_swx_pipeline *p,
1734 			  struct thread *t,
1735 			  const struct instruction *ip)
1736 {
1737 	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
1738 
1739 	__instr_hdr_extract_many_exec(p, t, ip, 7);
1740 }
1741 
1742 static inline void
1743 __instr_hdr_extract8_exec(struct rte_swx_pipeline *p,
1744 			  struct thread *t,
1745 			  const struct instruction *ip)
1746 {
1747 	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
1748 
1749 	__instr_hdr_extract_many_exec(p, t, ip, 8);
1750 }
1751 
1752 static inline void
1753 __instr_hdr_extract_m_exec(struct rte_swx_pipeline *p __rte_unused,
1754 			   struct thread *t,
1755 			   const struct instruction *ip)
1756 {
1757 	uint64_t valid_headers = t->valid_headers;
1758 	uint8_t *ptr = t->ptr;
1759 	uint32_t offset = t->pkt.offset;
1760 	uint32_t length = t->pkt.length;
1761 
1762 	uint32_t n_bytes_last = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
1763 	uint32_t header_id = ip->io.hdr.header_id[0];
1764 	uint32_t struct_id = ip->io.hdr.struct_id[0];
1765 	uint32_t n_bytes = ip->io.hdr.n_bytes[0];
1766 
1767 	struct header_runtime *h = &t->headers[header_id];
1768 
1769 	TRACE("[Thread %2u]: extract header %u (%u + %u bytes)\n",
1770 	      p->thread_id,
1771 	      header_id,
1772 	      n_bytes,
1773 	      n_bytes_last);
1774 
1775 	n_bytes += n_bytes_last;
1776 
1777 	/* Headers. */
1778 	t->structs[struct_id] = ptr;
1779 	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
1780 	h->n_bytes = n_bytes;
1781 
1782 	/* Packet. */
1783 	t->pkt.offset = offset + n_bytes;
1784 	t->pkt.length = length - n_bytes;
1785 	t->ptr = ptr + n_bytes;
1786 }
1787 
1788 static inline void
1789 __instr_hdr_lookahead_exec(struct rte_swx_pipeline *p __rte_unused,
1790 			   struct thread *t,
1791 			   const struct instruction *ip)
1792 {
1793 	uint64_t valid_headers = t->valid_headers;
1794 	uint8_t *ptr = t->ptr;
1795 
1796 	uint32_t header_id = ip->io.hdr.header_id[0];
1797 	uint32_t struct_id = ip->io.hdr.struct_id[0];
1798 
1799 	TRACE("[Thread %2u]: lookahead header %u\n",
1800 	      p->thread_id,
1801 	      header_id);
1802 
1803 	/* Headers. */
1804 	t->structs[struct_id] = ptr;
1805 	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
1806 }
1807 
1808 /*
1809  * emit.
1810  */
1811 static inline void
1812 __instr_hdr_emit_many_exec(struct rte_swx_pipeline *p __rte_unused,
1813 			   struct thread *t,
1814 			   const struct instruction *ip,
1815 			   uint32_t n_emit)
1816 {
1817 	uint64_t valid_headers = t->valid_headers;
1818 	uint32_t n_headers_out = t->n_headers_out;
1819 	struct header_out_runtime *ho = &t->headers_out[n_headers_out - 1];
1820 	uint8_t *ho_ptr = NULL;
1821 	uint32_t ho_nbytes = 0, first = 1, i;
1822 
1823 	for (i = 0; i < n_emit; i++) {
1824 		uint32_t header_id = ip->io.hdr.header_id[i];
1825 		uint32_t struct_id = ip->io.hdr.struct_id[i];
1826 
1827 		struct header_runtime *hi = &t->headers[header_id];
1828 		uint8_t *hi_ptr0 = hi->ptr0;
1829 		uint32_t n_bytes = hi->n_bytes;
1830 
1831 		uint8_t *hi_ptr = t->structs[struct_id];
1832 
1833 		if (!MASK64_BIT_GET(valid_headers, header_id))
1834 			continue;
1835 
1836 		TRACE("[Thread %2u]: emit header %u\n",
1837 		      p->thread_id,
1838 		      header_id);
1839 
1840 		/* Headers. */
1841 		if (first) {
1842 			first = 0;
1843 
1844 			if (!t->n_headers_out) {
1845 				ho = &t->headers_out[0];
1846 
1847 				ho->ptr0 = hi_ptr0;
1848 				ho->ptr = hi_ptr;
1849 
1850 				ho_ptr = hi_ptr;
1851 				ho_nbytes = n_bytes;
1852 
1853 				n_headers_out = 1;
1854 
1855 				continue;
1856 			} else {
1857 				ho_ptr = ho->ptr;
1858 				ho_nbytes = ho->n_bytes;
1859 			}
1860 		}
1861 
1862 		if (ho_ptr + ho_nbytes == hi_ptr) {
1863 			ho_nbytes += n_bytes;
1864 		} else {
1865 			ho->n_bytes = ho_nbytes;
1866 
1867 			ho++;
1868 			ho->ptr0 = hi_ptr0;
1869 			ho->ptr = hi_ptr;
1870 
1871 			ho_ptr = hi_ptr;
1872 			ho_nbytes = n_bytes;
1873 
1874 			n_headers_out++;
1875 		}
1876 	}
1877 
1878 	ho->n_bytes = ho_nbytes;
1879 	t->n_headers_out = n_headers_out;
1880 }
1881 
1882 static inline void
1883 __instr_hdr_emit_exec(struct rte_swx_pipeline *p,
1884 		      struct thread *t,
1885 		      const struct instruction *ip)
1886 {
1887 	__instr_hdr_emit_many_exec(p, t, ip, 1);
1888 }
1889 
1890 static inline void
1891 __instr_hdr_emit_tx_exec(struct rte_swx_pipeline *p,
1892 			 struct thread *t,
1893 			 const struct instruction *ip)
1894 {
1895 	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
1896 
1897 	__instr_hdr_emit_many_exec(p, t, ip, 1);
1898 	__instr_tx_exec(p, t, ip);
1899 }
1900 
1901 static inline void
1902 __instr_hdr_emit2_tx_exec(struct rte_swx_pipeline *p,
1903 			  struct thread *t,
1904 			  const struct instruction *ip)
1905 {
1906 	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
1907 
1908 	__instr_hdr_emit_many_exec(p, t, ip, 2);
1909 	__instr_tx_exec(p, t, ip);
1910 }
1911 
1912 static inline void
1913 __instr_hdr_emit3_tx_exec(struct rte_swx_pipeline *p,
1914 			  struct thread *t,
1915 			  const struct instruction *ip)
1916 {
1917 	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
1918 
1919 	__instr_hdr_emit_many_exec(p, t, ip, 3);
1920 	__instr_tx_exec(p, t, ip);
1921 }
1922 
1923 static inline void
1924 __instr_hdr_emit4_tx_exec(struct rte_swx_pipeline *p,
1925 			  struct thread *t,
1926 			  const struct instruction *ip)
1927 {
1928 	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
1929 
1930 	__instr_hdr_emit_many_exec(p, t, ip, 4);
1931 	__instr_tx_exec(p, t, ip);
1932 }
1933 
1934 static inline void
1935 __instr_hdr_emit5_tx_exec(struct rte_swx_pipeline *p,
1936 			  struct thread *t,
1937 			  const struct instruction *ip)
1938 {
1939 	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
1940 
1941 	__instr_hdr_emit_many_exec(p, t, ip, 5);
1942 	__instr_tx_exec(p, t, ip);
1943 }
1944 
1945 static inline void
1946 __instr_hdr_emit6_tx_exec(struct rte_swx_pipeline *p,
1947 			  struct thread *t,
1948 			  const struct instruction *ip)
1949 {
1950 	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
1951 
1952 	__instr_hdr_emit_many_exec(p, t, ip, 6);
1953 	__instr_tx_exec(p, t, ip);
1954 }
1955 
1956 static inline void
1957 __instr_hdr_emit7_tx_exec(struct rte_swx_pipeline *p,
1958 			  struct thread *t,
1959 			  const struct instruction *ip)
1960 {
1961 	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
1962 
1963 	__instr_hdr_emit_many_exec(p, t, ip, 7);
1964 	__instr_tx_exec(p, t, ip);
1965 }
1966 
1967 static inline void
1968 __instr_hdr_emit8_tx_exec(struct rte_swx_pipeline *p,
1969 			  struct thread *t,
1970 			  const struct instruction *ip)
1971 {
1972 	TRACE("[Thread %2u] *** The next 9 instructions are fused. ***\n", p->thread_id);
1973 
1974 	__instr_hdr_emit_many_exec(p, t, ip, 8);
1975 	__instr_tx_exec(p, t, ip);
1976 }
1977 
1978 /*
1979  * validate.
1980  */
1981 static inline void
1982 __instr_hdr_validate_exec(struct rte_swx_pipeline *p __rte_unused,
1983 			  struct thread *t,
1984 			  const struct instruction *ip)
1985 {
1986 	uint32_t header_id = ip->valid.header_id;
1987 
1988 	TRACE("[Thread %2u] validate header %u\n", p->thread_id, header_id);
1989 
1990 	/* Headers. */
1991 	t->valid_headers = MASK64_BIT_SET(t->valid_headers, header_id);
1992 }
1993 
1994 /*
1995  * invalidate.
1996  */
1997 static inline void
1998 __instr_hdr_invalidate_exec(struct rte_swx_pipeline *p __rte_unused,
1999 			    struct thread *t,
2000 			    const struct instruction *ip)
2001 {
2002 	uint32_t header_id = ip->valid.header_id;
2003 
2004 	TRACE("[Thread %2u] invalidate header %u\n", p->thread_id, header_id);
2005 
2006 	/* Headers. */
2007 	t->valid_headers = MASK64_BIT_CLR(t->valid_headers, header_id);
2008 }
2009 
2010 /*
2011  * learn.
2012  */
2013 static inline void
2014 __instr_learn_exec(struct rte_swx_pipeline *p,
2015 		   struct thread *t,
2016 		   const struct instruction *ip)
2017 {
2018 	uint64_t action_id = ip->learn.action_id;
2019 	uint32_t mf_offset = ip->learn.mf_offset;
2020 	uint32_t learner_id = t->learner_id;
2021 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2022 		p->n_selectors + learner_id];
2023 	struct learner_runtime *l = &t->learners[learner_id];
2024 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2025 	uint32_t status;
2026 
2027 	/* Table. */
2028 	status = rte_swx_table_learner_add(ts->obj,
2029 					   l->mailbox,
2030 					   t->time,
2031 					   action_id,
2032 					   &t->metadata[mf_offset]);
2033 
2034 	TRACE("[Thread %2u] learner %u learn %s\n",
2035 	      p->thread_id,
2036 	      learner_id,
2037 	      status ? "ok" : "error");
2038 
2039 	stats->n_pkts_learn[status] += 1;
2040 }
2041 
2042 /*
2043  * forget.
2044  */
2045 static inline void
2046 __instr_forget_exec(struct rte_swx_pipeline *p,
2047 		    struct thread *t,
2048 		    const struct instruction *ip __rte_unused)
2049 {
2050 	uint32_t learner_id = t->learner_id;
2051 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2052 		p->n_selectors + learner_id];
2053 	struct learner_runtime *l = &t->learners[learner_id];
2054 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2055 
2056 	/* Table. */
2057 	rte_swx_table_learner_delete(ts->obj, l->mailbox);
2058 
2059 	TRACE("[Thread %2u] learner %u forget\n",
2060 	      p->thread_id,
2061 	      learner_id);
2062 
2063 	stats->n_pkts_forget += 1;
2064 }
2065 
2066 /*
2067  * extern.
2068  */
2069 static inline uint32_t
2070 __instr_extern_obj_exec(struct rte_swx_pipeline *p __rte_unused,
2071 			struct thread *t,
2072 			const struct instruction *ip)
2073 {
2074 	uint32_t obj_id = ip->ext_obj.ext_obj_id;
2075 	uint32_t func_id = ip->ext_obj.func_id;
2076 	struct extern_obj_runtime *obj = &t->extern_objs[obj_id];
2077 	rte_swx_extern_type_member_func_t func = obj->funcs[func_id];
2078 	uint32_t done;
2079 
2080 	TRACE("[Thread %2u] extern obj %u member func %u\n",
2081 	      p->thread_id,
2082 	      obj_id,
2083 	      func_id);
2084 
2085 	done = func(obj->obj, obj->mailbox);
2086 
2087 	return done;
2088 }
2089 
2090 static inline uint32_t
2091 __instr_extern_func_exec(struct rte_swx_pipeline *p __rte_unused,
2092 			 struct thread *t,
2093 			 const struct instruction *ip)
2094 {
2095 	uint32_t ext_func_id = ip->ext_func.ext_func_id;
2096 	struct extern_func_runtime *ext_func = &t->extern_funcs[ext_func_id];
2097 	rte_swx_extern_func_t func = ext_func->func;
2098 	uint32_t done;
2099 
2100 	TRACE("[Thread %2u] extern func %u\n",
2101 	      p->thread_id,
2102 	      ext_func_id);
2103 
2104 	done = func(ext_func->mailbox);
2105 
2106 	return done;
2107 }
2108 
2109 /*
2110  * mov.
2111  */
2112 static inline void
2113 __instr_mov_exec(struct rte_swx_pipeline *p __rte_unused,
2114 		 struct thread *t,
2115 		 const struct instruction *ip)
2116 {
2117 	TRACE("[Thread %2u] mov\n", p->thread_id);
2118 
2119 	MOV(t, ip);
2120 }
2121 
2122 static inline void
2123 __instr_mov_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2124 		    struct thread *t,
2125 		    const struct instruction *ip)
2126 {
2127 	TRACE("[Thread %2u] mov (mh)\n", p->thread_id);
2128 
2129 	MOV_MH(t, ip);
2130 }
2131 
2132 static inline void
2133 __instr_mov_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2134 		    struct thread *t,
2135 		    const struct instruction *ip)
2136 {
2137 	TRACE("[Thread %2u] mov (hm)\n", p->thread_id);
2138 
2139 	MOV_HM(t, ip);
2140 }
2141 
2142 static inline void
2143 __instr_mov_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2144 		    struct thread *t,
2145 		    const struct instruction *ip)
2146 {
2147 	TRACE("[Thread %2u] mov (hh)\n", p->thread_id);
2148 
2149 	MOV_HH(t, ip);
2150 }
2151 
2152 static inline void
2153 __instr_mov_i_exec(struct rte_swx_pipeline *p __rte_unused,
2154 		   struct thread *t,
2155 		   const struct instruction *ip)
2156 {
2157 	TRACE("[Thread %2u] mov m.f %" PRIx64 "\n", p->thread_id, ip->mov.src_val);
2158 
2159 	MOV_I(t, ip);
2160 }
2161 
2162 /*
2163  * dma.
2164  */
2165 static inline void
2166 __instr_dma_ht_many_exec(struct rte_swx_pipeline *p __rte_unused,
2167 			 struct thread *t,
2168 			 const struct instruction *ip,
2169 			 uint32_t n_dma)
2170 {
2171 	uint8_t *action_data = t->structs[0];
2172 	uint64_t valid_headers = t->valid_headers;
2173 	uint32_t i;
2174 
2175 	for (i = 0; i < n_dma; i++) {
2176 		uint32_t header_id = ip->dma.dst.header_id[i];
2177 		uint32_t struct_id = ip->dma.dst.struct_id[i];
2178 		uint32_t offset = ip->dma.src.offset[i];
2179 		uint32_t n_bytes = ip->dma.n_bytes[i];
2180 
2181 		struct header_runtime *h = &t->headers[header_id];
2182 		uint8_t *h_ptr0 = h->ptr0;
2183 		uint8_t *h_ptr = t->structs[struct_id];
2184 
2185 		void *dst = MASK64_BIT_GET(valid_headers, header_id) ?
2186 			h_ptr : h_ptr0;
2187 		void *src = &action_data[offset];
2188 
2189 		TRACE("[Thread %2u] dma h.s t.f\n", p->thread_id);
2190 
2191 		/* Headers. */
2192 		memcpy(dst, src, n_bytes);
2193 		t->structs[struct_id] = dst;
2194 		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
2195 	}
2196 
2197 	t->valid_headers = valid_headers;
2198 }
2199 
2200 static inline void
2201 __instr_dma_ht_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2202 {
2203 	__instr_dma_ht_many_exec(p, t, ip, 1);
2204 }
2205 
2206 static inline void
2207 __instr_dma_ht2_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2208 {
2209 	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
2210 
2211 	__instr_dma_ht_many_exec(p, t, ip, 2);
2212 }
2213 
2214 static inline void
2215 __instr_dma_ht3_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2216 {
2217 	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
2218 
2219 	__instr_dma_ht_many_exec(p, t, ip, 3);
2220 }
2221 
2222 static inline void
2223 __instr_dma_ht4_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2224 {
2225 	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
2226 
2227 	__instr_dma_ht_many_exec(p, t, ip, 4);
2228 }
2229 
2230 static inline void
2231 __instr_dma_ht5_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2232 {
2233 	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
2234 
2235 	__instr_dma_ht_many_exec(p, t, ip, 5);
2236 }
2237 
2238 static inline void
2239 __instr_dma_ht6_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2240 {
2241 	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
2242 
2243 	__instr_dma_ht_many_exec(p, t, ip, 6);
2244 }
2245 
2246 static inline void
2247 __instr_dma_ht7_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2248 {
2249 	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
2250 
2251 	__instr_dma_ht_many_exec(p, t, ip, 7);
2252 }
2253 
2254 static inline void
2255 __instr_dma_ht8_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2256 {
2257 	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
2258 
2259 	__instr_dma_ht_many_exec(p, t, ip, 8);
2260 }
2261 
2262 /*
2263  * alu.
2264  */
2265 static inline void
2266 __instr_alu_add_exec(struct rte_swx_pipeline *p __rte_unused,
2267 		     struct thread *t,
2268 		     const struct instruction *ip)
2269 {
2270 	TRACE("[Thread %2u] add\n", p->thread_id);
2271 
2272 	ALU(t, ip, +);
2273 }
2274 
2275 static inline void
2276 __instr_alu_add_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2277 			struct thread *t,
2278 			const struct instruction *ip)
2279 {
2280 	TRACE("[Thread %2u] add (mh)\n", p->thread_id);
2281 
2282 	ALU_MH(t, ip, +);
2283 }
2284 
2285 static inline void
2286 __instr_alu_add_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2287 			struct thread *t,
2288 			const struct instruction *ip)
2289 {
2290 	TRACE("[Thread %2u] add (hm)\n", p->thread_id);
2291 
2292 	ALU_HM(t, ip, +);
2293 }
2294 
2295 static inline void
2296 __instr_alu_add_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2297 			struct thread *t,
2298 			const struct instruction *ip)
2299 {
2300 	TRACE("[Thread %2u] add (hh)\n", p->thread_id);
2301 
2302 	ALU_HH(t, ip, +);
2303 }
2304 
2305 static inline void
2306 __instr_alu_add_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2307 			struct thread *t,
2308 			const struct instruction *ip)
2309 {
2310 	TRACE("[Thread %2u] add (mi)\n", p->thread_id);
2311 
2312 	ALU_MI(t, ip, +);
2313 }
2314 
2315 static inline void
2316 __instr_alu_add_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2317 			struct thread *t,
2318 			const struct instruction *ip)
2319 {
2320 	TRACE("[Thread %2u] add (hi)\n", p->thread_id);
2321 
2322 	ALU_HI(t, ip, +);
2323 }
2324 
2325 static inline void
2326 __instr_alu_sub_exec(struct rte_swx_pipeline *p __rte_unused,
2327 		     struct thread *t,
2328 		     const struct instruction *ip)
2329 {
2330 	TRACE("[Thread %2u] sub\n", p->thread_id);
2331 
2332 	ALU(t, ip, -);
2333 }
2334 
2335 static inline void
2336 __instr_alu_sub_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2337 			struct thread *t,
2338 			const struct instruction *ip)
2339 {
2340 	TRACE("[Thread %2u] sub (mh)\n", p->thread_id);
2341 
2342 	ALU_MH(t, ip, -);
2343 }
2344 
2345 static inline void
2346 __instr_alu_sub_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2347 			struct thread *t,
2348 			const struct instruction *ip)
2349 {
2350 	TRACE("[Thread %2u] sub (hm)\n", p->thread_id);
2351 
2352 	ALU_HM(t, ip, -);
2353 }
2354 
2355 static inline void
2356 __instr_alu_sub_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2357 			struct thread *t,
2358 			const struct instruction *ip)
2359 {
2360 	TRACE("[Thread %2u] sub (hh)\n", p->thread_id);
2361 
2362 	ALU_HH(t, ip, -);
2363 }
2364 
2365 static inline void
2366 __instr_alu_sub_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2367 			struct thread *t,
2368 			const struct instruction *ip)
2369 {
2370 	TRACE("[Thread %2u] sub (mi)\n", p->thread_id);
2371 
2372 	ALU_MI(t, ip, -);
2373 }
2374 
2375 static inline void
2376 __instr_alu_sub_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2377 			struct thread *t,
2378 			const struct instruction *ip)
2379 {
2380 	TRACE("[Thread %2u] sub (hi)\n", p->thread_id);
2381 
2382 	ALU_HI(t, ip, -);
2383 }
2384 
2385 static inline void
2386 __instr_alu_shl_exec(struct rte_swx_pipeline *p __rte_unused,
2387 		     struct thread *t,
2388 		     const struct instruction *ip)
2389 {
2390 	TRACE("[Thread %2u] shl\n", p->thread_id);
2391 
2392 	ALU(t, ip, <<);
2393 }
2394 
2395 static inline void
2396 __instr_alu_shl_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2397 			struct thread *t,
2398 			const struct instruction *ip)
2399 {
2400 	TRACE("[Thread %2u] shl (mh)\n", p->thread_id);
2401 
2402 	ALU_MH(t, ip, <<);
2403 }
2404 
2405 static inline void
2406 __instr_alu_shl_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2407 			struct thread *t,
2408 			const struct instruction *ip)
2409 {
2410 	TRACE("[Thread %2u] shl (hm)\n", p->thread_id);
2411 
2412 	ALU_HM(t, ip, <<);
2413 }
2414 
2415 static inline void
2416 __instr_alu_shl_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2417 			struct thread *t,
2418 			const struct instruction *ip)
2419 {
2420 	TRACE("[Thread %2u] shl (hh)\n", p->thread_id);
2421 
2422 	ALU_HH(t, ip, <<);
2423 }
2424 
2425 static inline void
2426 __instr_alu_shl_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2427 			struct thread *t,
2428 			const struct instruction *ip)
2429 {
2430 	TRACE("[Thread %2u] shl (mi)\n", p->thread_id);
2431 
2432 	ALU_MI(t, ip, <<);
2433 }
2434 
2435 static inline void
2436 __instr_alu_shl_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2437 			struct thread *t,
2438 			const struct instruction *ip)
2439 {
2440 	TRACE("[Thread %2u] shl (hi)\n", p->thread_id);
2441 
2442 	ALU_HI(t, ip, <<);
2443 }
2444 
2445 static inline void
2446 __instr_alu_shr_exec(struct rte_swx_pipeline *p __rte_unused,
2447 		     struct thread *t,
2448 		     const struct instruction *ip)
2449 {
2450 	TRACE("[Thread %2u] shr\n", p->thread_id);
2451 
2452 	ALU(t, ip, >>);
2453 }
2454 
2455 static inline void
2456 __instr_alu_shr_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2457 			struct thread *t,
2458 			const struct instruction *ip)
2459 {
2460 	TRACE("[Thread %2u] shr (mh)\n", p->thread_id);
2461 
2462 	ALU_MH(t, ip, >>);
2463 }
2464 
2465 static inline void
2466 __instr_alu_shr_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2467 			struct thread *t,
2468 			const struct instruction *ip)
2469 {
2470 	TRACE("[Thread %2u] shr (hm)\n", p->thread_id);
2471 
2472 	ALU_HM(t, ip, >>);
2473 }
2474 
2475 static inline void
2476 __instr_alu_shr_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2477 			struct thread *t,
2478 			const struct instruction *ip)
2479 {
2480 	TRACE("[Thread %2u] shr (hh)\n", p->thread_id);
2481 
2482 	ALU_HH(t, ip, >>);
2483 }
2484 
2485 static inline void
2486 __instr_alu_shr_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2487 			struct thread *t,
2488 			const struct instruction *ip)
2489 {
2490 	TRACE("[Thread %2u] shr (mi)\n", p->thread_id);
2491 
2492 	/* Structs. */
2493 	ALU_MI(t, ip, >>);
2494 }
2495 
2496 static inline void
2497 __instr_alu_shr_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2498 			struct thread *t,
2499 			const struct instruction *ip)
2500 {
2501 	TRACE("[Thread %2u] shr (hi)\n", p->thread_id);
2502 
2503 	ALU_HI(t, ip, >>);
2504 }
2505 
2506 static inline void
2507 __instr_alu_and_exec(struct rte_swx_pipeline *p __rte_unused,
2508 		     struct thread *t,
2509 		     const struct instruction *ip)
2510 {
2511 	TRACE("[Thread %2u] and\n", p->thread_id);
2512 
2513 	ALU(t, ip, &);
2514 }
2515 
2516 static inline void
2517 __instr_alu_and_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2518 			struct thread *t,
2519 			const struct instruction *ip)
2520 {
2521 	TRACE("[Thread %2u] and (mh)\n", p->thread_id);
2522 
2523 	ALU_MH(t, ip, &);
2524 }
2525 
2526 static inline void
2527 __instr_alu_and_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2528 			struct thread *t,
2529 			const struct instruction *ip)
2530 {
2531 	TRACE("[Thread %2u] and (hm)\n", p->thread_id);
2532 
2533 	ALU_HM_FAST(t, ip, &);
2534 }
2535 
2536 static inline void
2537 __instr_alu_and_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2538 			struct thread *t,
2539 			const struct instruction *ip)
2540 {
2541 	TRACE("[Thread %2u] and (hh)\n", p->thread_id);
2542 
2543 	ALU_HH_FAST(t, ip, &);
2544 }
2545 
2546 static inline void
2547 __instr_alu_and_i_exec(struct rte_swx_pipeline *p __rte_unused,
2548 		       struct thread *t,
2549 		       const struct instruction *ip)
2550 {
2551 	TRACE("[Thread %2u] and (i)\n", p->thread_id);
2552 
2553 	ALU_I(t, ip, &);
2554 }
2555 
2556 static inline void
2557 __instr_alu_or_exec(struct rte_swx_pipeline *p __rte_unused,
2558 		    struct thread *t,
2559 		    const struct instruction *ip)
2560 {
2561 	TRACE("[Thread %2u] or\n", p->thread_id);
2562 
2563 	ALU(t, ip, |);
2564 }
2565 
2566 static inline void
2567 __instr_alu_or_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2568 		       struct thread *t,
2569 		       const struct instruction *ip)
2570 {
2571 	TRACE("[Thread %2u] or (mh)\n", p->thread_id);
2572 
2573 	ALU_MH(t, ip, |);
2574 }
2575 
2576 static inline void
2577 __instr_alu_or_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2578 		       struct thread *t,
2579 		       const struct instruction *ip)
2580 {
2581 	TRACE("[Thread %2u] or (hm)\n", p->thread_id);
2582 
2583 	ALU_HM_FAST(t, ip, |);
2584 }
2585 
2586 static inline void
2587 __instr_alu_or_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2588 		       struct thread *t,
2589 		       const struct instruction *ip)
2590 {
2591 	TRACE("[Thread %2u] or (hh)\n", p->thread_id);
2592 
2593 	ALU_HH_FAST(t, ip, |);
2594 }
2595 
2596 static inline void
2597 __instr_alu_or_i_exec(struct rte_swx_pipeline *p __rte_unused,
2598 		      struct thread *t,
2599 		      const struct instruction *ip)
2600 {
2601 	TRACE("[Thread %2u] or (i)\n", p->thread_id);
2602 
2603 	ALU_I(t, ip, |);
2604 }
2605 
2606 static inline void
2607 __instr_alu_xor_exec(struct rte_swx_pipeline *p __rte_unused,
2608 		     struct thread *t,
2609 		     const struct instruction *ip)
2610 {
2611 	TRACE("[Thread %2u] xor\n", p->thread_id);
2612 
2613 	ALU(t, ip, ^);
2614 }
2615 
2616 static inline void
2617 __instr_alu_xor_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2618 			struct thread *t,
2619 			const struct instruction *ip)
2620 {
2621 	TRACE("[Thread %2u] xor (mh)\n", p->thread_id);
2622 
2623 	ALU_MH(t, ip, ^);
2624 }
2625 
2626 static inline void
2627 __instr_alu_xor_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2628 			struct thread *t,
2629 			const struct instruction *ip)
2630 {
2631 	TRACE("[Thread %2u] xor (hm)\n", p->thread_id);
2632 
2633 	ALU_HM_FAST(t, ip, ^);
2634 }
2635 
2636 static inline void
2637 __instr_alu_xor_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2638 			struct thread *t,
2639 			const struct instruction *ip)
2640 {
2641 	TRACE("[Thread %2u] xor (hh)\n", p->thread_id);
2642 
2643 	ALU_HH_FAST(t, ip, ^);
2644 }
2645 
2646 static inline void
2647 __instr_alu_xor_i_exec(struct rte_swx_pipeline *p __rte_unused,
2648 		       struct thread *t,
2649 		       const struct instruction *ip)
2650 {
2651 	TRACE("[Thread %2u] xor (i)\n", p->thread_id);
2652 
2653 	ALU_I(t, ip, ^);
2654 }
2655 
2656 static inline void
2657 __instr_alu_ckadd_field_exec(struct rte_swx_pipeline *p __rte_unused,
2658 			     struct thread *t,
2659 			     const struct instruction *ip)
2660 {
2661 	uint8_t *dst_struct, *src_struct;
2662 	uint16_t *dst16_ptr, dst;
2663 	uint64_t *src64_ptr, src64, src64_mask, src;
2664 	uint64_t r;
2665 
2666 	TRACE("[Thread %2u] ckadd (field)\n", p->thread_id);
2667 
2668 	/* Structs. */
2669 	dst_struct = t->structs[ip->alu.dst.struct_id];
2670 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
2671 	dst = *dst16_ptr;
2672 
2673 	src_struct = t->structs[ip->alu.src.struct_id];
2674 	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
2675 	src64 = *src64_ptr;
2676 	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
2677 	src = src64 & src64_mask;
2678 
2679 	r = dst;
2680 	r = ~r & 0xFFFF;
2681 
2682 	/* The first input (r) is a 16-bit number. The second and the third
2683 	 * inputs are 32-bit numbers. In the worst case scenario, the sum of the
2684 	 * three numbers (output r) is a 34-bit number.
2685 	 */
2686 	r += (src >> 32) + (src & 0xFFFFFFFF);
2687 
2688 	/* The first input is a 16-bit number. The second input is an 18-bit
2689 	 * number. In the worst case scenario, the sum of the two numbers is a
2690 	 * 19-bit number.
2691 	 */
2692 	r = (r & 0xFFFF) + (r >> 16);
2693 
2694 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
2695 	 * a 3-bit number (0 .. 7). Their sum is a 17-bit number (0 .. 0x10006).
2696 	 */
2697 	r = (r & 0xFFFF) + (r >> 16);
2698 
2699 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
2700 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
2701 	 * 0x10006), the output r is (0 .. 7). So no carry bit can be generated,
2702 	 * therefore the output r is always a 16-bit number.
2703 	 */
2704 	r = (r & 0xFFFF) + (r >> 16);
2705 
2706 	r = ~r & 0xFFFF;
2707 	r = r ? r : 0xFFFF;
2708 
2709 	*dst16_ptr = (uint16_t)r;
2710 }
2711 
2712 static inline void
2713 __instr_alu_cksub_field_exec(struct rte_swx_pipeline *p __rte_unused,
2714 			     struct thread *t,
2715 			     const struct instruction *ip)
2716 {
2717 	uint8_t *dst_struct, *src_struct;
2718 	uint16_t *dst16_ptr, dst;
2719 	uint64_t *src64_ptr, src64, src64_mask, src;
2720 	uint64_t r;
2721 
2722 	TRACE("[Thread %2u] cksub (field)\n", p->thread_id);
2723 
2724 	/* Structs. */
2725 	dst_struct = t->structs[ip->alu.dst.struct_id];
2726 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
2727 	dst = *dst16_ptr;
2728 
2729 	src_struct = t->structs[ip->alu.src.struct_id];
2730 	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
2731 	src64 = *src64_ptr;
2732 	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
2733 	src = src64 & src64_mask;
2734 
2735 	r = dst;
2736 	r = ~r & 0xFFFF;
2737 
2738 	/* Subtraction in 1's complement arithmetic (i.e. a '- b) is the same as
2739 	 * the following sequence of operations in 2's complement arithmetic:
2740 	 *    a '- b = (a - b) % 0xFFFF.
2741 	 *
2742 	 * In order to prevent an underflow for the below subtraction, in which
2743 	 * a 33-bit number (the subtrahend) is taken out of a 16-bit number (the
2744 	 * minuend), we first add a multiple of the 0xFFFF modulus to the
2745 	 * minuend. The number we add to the minuend needs to be a 34-bit number
2746 	 * or higher, so for readability reasons we picked the 36-bit multiple.
2747 	 * We are effectively turning the 16-bit minuend into a 36-bit number:
2748 	 *    (a - b) % 0xFFFF = (a + 0xFFFF00000 - b) % 0xFFFF.
2749 	 */
2750 	r += 0xFFFF00000ULL; /* The output r is a 36-bit number. */
2751 
2752 	/* A 33-bit number is subtracted from a 36-bit number (the input r). The
2753 	 * result (the output r) is a 36-bit number.
2754 	 */
2755 	r -= (src >> 32) + (src & 0xFFFFFFFF);
2756 
2757 	/* The first input is a 16-bit number. The second input is a 20-bit
2758 	 * number. Their sum is a 21-bit number.
2759 	 */
2760 	r = (r & 0xFFFF) + (r >> 16);
2761 
2762 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
2763 	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1001E).
2764 	 */
2765 	r = (r & 0xFFFF) + (r >> 16);
2766 
2767 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
2768 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
2769 	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
2770 	 * generated, therefore the output r is always a 16-bit number.
2771 	 */
2772 	r = (r & 0xFFFF) + (r >> 16);
2773 
2774 	r = ~r & 0xFFFF;
2775 	r = r ? r : 0xFFFF;
2776 
2777 	*dst16_ptr = (uint16_t)r;
2778 }
2779 
2780 static inline void
2781 __instr_alu_ckadd_struct20_exec(struct rte_swx_pipeline *p __rte_unused,
2782 				struct thread *t,
2783 				const struct instruction *ip)
2784 {
2785 	uint8_t *dst_struct, *src_struct;
2786 	uint16_t *dst16_ptr;
2787 	uint32_t *src32_ptr;
2788 	uint64_t r0, r1;
2789 
2790 	TRACE("[Thread %2u] ckadd (struct of 20 bytes)\n", p->thread_id);
2791 
2792 	/* Structs. */
2793 	dst_struct = t->structs[ip->alu.dst.struct_id];
2794 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
2795 
2796 	src_struct = t->structs[ip->alu.src.struct_id];
2797 	src32_ptr = (uint32_t *)&src_struct[0];
2798 
2799 	r0 = src32_ptr[0]; /* r0 is a 32-bit number. */
2800 	r1 = src32_ptr[1]; /* r1 is a 32-bit number. */
2801 	r0 += src32_ptr[2]; /* The output r0 is a 33-bit number. */
2802 	r1 += src32_ptr[3]; /* The output r1 is a 33-bit number. */
2803 	r0 += r1 + src32_ptr[4]; /* The output r0 is a 35-bit number. */
2804 
2805 	/* The first input is a 16-bit number. The second input is a 19-bit
2806 	 * number. Their sum is a 20-bit number.
2807 	 */
2808 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
2809 
2810 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
2811 	 * a 4-bit number (0 .. 15). The sum is a 17-bit number (0 .. 0x1000E).
2812 	 */
2813 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
2814 
2815 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
2816 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
2817 	 * 0x1000E), the output r is (0 .. 15). So no carry bit can be
2818 	 * generated, therefore the output r is always a 16-bit number.
2819 	 */
2820 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
2821 
2822 	r0 = ~r0 & 0xFFFF;
2823 	r0 = r0 ? r0 : 0xFFFF;
2824 
2825 	*dst16_ptr = (uint16_t)r0;
2826 }
2827 
2828 static inline void
2829 __instr_alu_ckadd_struct_exec(struct rte_swx_pipeline *p __rte_unused,
2830 			      struct thread *t,
2831 			      const struct instruction *ip)
2832 {
2833 	uint8_t *dst_struct, *src_struct;
2834 	uint16_t *dst16_ptr;
2835 	uint32_t *src32_ptr;
2836 	uint64_t r = 0;
2837 	uint32_t i;
2838 
2839 	TRACE("[Thread %2u] ckadd (struct)\n", p->thread_id);
2840 
2841 	/* Structs. */
2842 	dst_struct = t->structs[ip->alu.dst.struct_id];
2843 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
2844 
2845 	src_struct = t->structs[ip->alu.src.struct_id];
2846 	src32_ptr = (uint32_t *)&src_struct[0];
2847 
2848 	/* The max number of 32-bit words in a 256-byte header is 8 = 2^3.
2849 	 * Therefore, in the worst case scenario, a 35-bit number is added to a
2850 	 * 16-bit number (the input r), so the output r is 36-bit number.
2851 	 */
2852 	for (i = 0; i < ip->alu.src.n_bits / 32; i++, src32_ptr++)
2853 		r += *src32_ptr;
2854 
2855 	/* The first input is a 16-bit number. The second input is a 20-bit
2856 	 * number. Their sum is a 21-bit number.
2857 	 */
2858 	r = (r & 0xFFFF) + (r >> 16);
2859 
2860 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
2861 	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1000E).
2862 	 */
2863 	r = (r & 0xFFFF) + (r >> 16);
2864 
2865 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
2866 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
2867 	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
2868 	 * generated, therefore the output r is always a 16-bit number.
2869 	 */
2870 	r = (r & 0xFFFF) + (r >> 16);
2871 
2872 	r = ~r & 0xFFFF;
2873 	r = r ? r : 0xFFFF;
2874 
2875 	*dst16_ptr = (uint16_t)r;
2876 }
2877 
2878 /*
2879  * Register array.
2880  */
2881 static inline uint64_t *
2882 instr_regarray_regarray(struct rte_swx_pipeline *p, const struct instruction *ip)
2883 {
2884 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
2885 	return r->regarray;
2886 }
2887 
2888 static inline uint64_t
2889 instr_regarray_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2890 {
2891 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
2892 
2893 	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
2894 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
2895 	uint64_t idx64 = *idx64_ptr;
2896 	uint64_t idx64_mask = UINT64_MAX >> (64 - ip->regarray.idx.n_bits);
2897 	uint64_t idx = idx64 & idx64_mask & r->size_mask;
2898 
2899 	return idx;
2900 }
2901 
2902 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
2903 
2904 static inline uint64_t
2905 instr_regarray_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2906 {
2907 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
2908 
2909 	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
2910 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
2911 	uint64_t idx64 = *idx64_ptr;
2912 	uint64_t idx = (ntoh64(idx64) >> (64 - ip->regarray.idx.n_bits)) & r->size_mask;
2913 
2914 	return idx;
2915 }
2916 
2917 #else
2918 
2919 #define instr_regarray_idx_nbo instr_regarray_idx_hbo
2920 
2921 #endif
2922 
2923 static inline uint64_t
2924 instr_regarray_idx_imm(struct rte_swx_pipeline *p, const struct instruction *ip)
2925 {
2926 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
2927 
2928 	uint64_t idx = ip->regarray.idx_val & r->size_mask;
2929 
2930 	return idx;
2931 }
2932 
2933 static inline uint64_t
2934 instr_regarray_src_hbo(struct thread *t, const struct instruction *ip)
2935 {
2936 	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
2937 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
2938 	uint64_t src64 = *src64_ptr;
2939 	uint64_t src64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
2940 	uint64_t src = src64 & src64_mask;
2941 
2942 	return src;
2943 }
2944 
2945 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
2946 
2947 static inline uint64_t
2948 instr_regarray_src_nbo(struct thread *t, const struct instruction *ip)
2949 {
2950 	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
2951 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
2952 	uint64_t src64 = *src64_ptr;
2953 	uint64_t src = ntoh64(src64) >> (64 - ip->regarray.dstsrc.n_bits);
2954 
2955 	return src;
2956 }
2957 
2958 #else
2959 
2960 #define instr_regarray_src_nbo instr_regarray_src_hbo
2961 
2962 #endif
2963 
2964 static inline void
2965 instr_regarray_dst_hbo_src_hbo_set(struct thread *t, const struct instruction *ip, uint64_t src)
2966 {
2967 	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
2968 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
2969 	uint64_t dst64 = *dst64_ptr;
2970 	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
2971 
2972 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
2973 
2974 }
2975 
2976 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
2977 
2978 static inline void
2979 instr_regarray_dst_nbo_src_hbo_set(struct thread *t, const struct instruction *ip, uint64_t src)
2980 {
2981 	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
2982 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
2983 	uint64_t dst64 = *dst64_ptr;
2984 	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
2985 
2986 	src = hton64(src) >> (64 - ip->regarray.dstsrc.n_bits);
2987 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
2988 }
2989 
2990 #else
2991 
2992 #define instr_regarray_dst_nbo_src_hbo_set instr_regarray_dst_hbo_src_hbo_set
2993 
2994 #endif
2995 
2996 static inline void
2997 __instr_regprefetch_rh_exec(struct rte_swx_pipeline *p,
2998 			    struct thread *t,
2999 			    const struct instruction *ip)
3000 {
3001 	uint64_t *regarray, idx;
3002 
3003 	TRACE("[Thread %2u] regprefetch (r[h])\n", p->thread_id);
3004 
3005 	regarray = instr_regarray_regarray(p, ip);
3006 	idx = instr_regarray_idx_nbo(p, t, ip);
3007 	rte_prefetch0(&regarray[idx]);
3008 }
3009 
3010 static inline void
3011 __instr_regprefetch_rm_exec(struct rte_swx_pipeline *p,
3012 			    struct thread *t,
3013 			    const struct instruction *ip)
3014 {
3015 	uint64_t *regarray, idx;
3016 
3017 	TRACE("[Thread %2u] regprefetch (r[m])\n", p->thread_id);
3018 
3019 	regarray = instr_regarray_regarray(p, ip);
3020 	idx = instr_regarray_idx_hbo(p, t, ip);
3021 	rte_prefetch0(&regarray[idx]);
3022 }
3023 
3024 static inline void
3025 __instr_regprefetch_ri_exec(struct rte_swx_pipeline *p,
3026 			    struct thread *t __rte_unused,
3027 			    const struct instruction *ip)
3028 {
3029 	uint64_t *regarray, idx;
3030 
3031 	TRACE("[Thread %2u] regprefetch (r[i])\n", p->thread_id);
3032 
3033 	regarray = instr_regarray_regarray(p, ip);
3034 	idx = instr_regarray_idx_imm(p, ip);
3035 	rte_prefetch0(&regarray[idx]);
3036 }
3037 
3038 static inline void
3039 __instr_regrd_hrh_exec(struct rte_swx_pipeline *p,
3040 		       struct thread *t,
3041 		       const struct instruction *ip)
3042 {
3043 	uint64_t *regarray, idx;
3044 
3045 	TRACE("[Thread %2u] regrd (h = r[h])\n", p->thread_id);
3046 
3047 	regarray = instr_regarray_regarray(p, ip);
3048 	idx = instr_regarray_idx_nbo(p, t, ip);
3049 	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
3050 }
3051 
3052 static inline void
3053 __instr_regrd_hrm_exec(struct rte_swx_pipeline *p,
3054 		       struct thread *t,
3055 		       const struct instruction *ip)
3056 {
3057 	uint64_t *regarray, idx;
3058 
3059 	TRACE("[Thread %2u] regrd (h = r[m])\n", p->thread_id);
3060 
3061 	/* Structs. */
3062 	regarray = instr_regarray_regarray(p, ip);
3063 	idx = instr_regarray_idx_hbo(p, t, ip);
3064 	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
3065 }
3066 
3067 static inline void
3068 __instr_regrd_mrh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3069 {
3070 	uint64_t *regarray, idx;
3071 
3072 	TRACE("[Thread %2u] regrd (m = r[h])\n", p->thread_id);
3073 
3074 	regarray = instr_regarray_regarray(p, ip);
3075 	idx = instr_regarray_idx_nbo(p, t, ip);
3076 	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
3077 }
3078 
3079 static inline void
3080 __instr_regrd_mrm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3081 {
3082 	uint64_t *regarray, idx;
3083 
3084 	TRACE("[Thread %2u] regrd (m = r[m])\n", p->thread_id);
3085 
3086 	regarray = instr_regarray_regarray(p, ip);
3087 	idx = instr_regarray_idx_hbo(p, t, ip);
3088 	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
3089 }
3090 
3091 static inline void
3092 __instr_regrd_hri_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3093 {
3094 	uint64_t *regarray, idx;
3095 
3096 	TRACE("[Thread %2u] regrd (h = r[i])\n", p->thread_id);
3097 
3098 	regarray = instr_regarray_regarray(p, ip);
3099 	idx = instr_regarray_idx_imm(p, ip);
3100 	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
3101 }
3102 
3103 static inline void
3104 __instr_regrd_mri_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3105 {
3106 	uint64_t *regarray, idx;
3107 
3108 	TRACE("[Thread %2u] regrd (m = r[i])\n", p->thread_id);
3109 
3110 	regarray = instr_regarray_regarray(p, ip);
3111 	idx = instr_regarray_idx_imm(p, ip);
3112 	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
3113 }
3114 
3115 static inline void
3116 __instr_regwr_rhh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3117 {
3118 	uint64_t *regarray, idx, src;
3119 
3120 	TRACE("[Thread %2u] regwr (r[h] = h)\n", p->thread_id);
3121 
3122 	regarray = instr_regarray_regarray(p, ip);
3123 	idx = instr_regarray_idx_nbo(p, t, ip);
3124 	src = instr_regarray_src_nbo(t, ip);
3125 	regarray[idx] = src;
3126 }
3127 
3128 static inline void
3129 __instr_regwr_rhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3130 {
3131 	uint64_t *regarray, idx, src;
3132 
3133 	TRACE("[Thread %2u] regwr (r[h] = m)\n", p->thread_id);
3134 
3135 	regarray = instr_regarray_regarray(p, ip);
3136 	idx = instr_regarray_idx_nbo(p, t, ip);
3137 	src = instr_regarray_src_hbo(t, ip);
3138 	regarray[idx] = src;
3139 }
3140 
3141 static inline void
3142 __instr_regwr_rmh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3143 {
3144 	uint64_t *regarray, idx, src;
3145 
3146 	TRACE("[Thread %2u] regwr (r[m] = h)\n", p->thread_id);
3147 
3148 	regarray = instr_regarray_regarray(p, ip);
3149 	idx = instr_regarray_idx_hbo(p, t, ip);
3150 	src = instr_regarray_src_nbo(t, ip);
3151 	regarray[idx] = src;
3152 }
3153 
3154 static inline void
3155 __instr_regwr_rmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3156 {
3157 	uint64_t *regarray, idx, src;
3158 
3159 	TRACE("[Thread %2u] regwr (r[m] = m)\n", p->thread_id);
3160 
3161 	regarray = instr_regarray_regarray(p, ip);
3162 	idx = instr_regarray_idx_hbo(p, t, ip);
3163 	src = instr_regarray_src_hbo(t, ip);
3164 	regarray[idx] = src;
3165 }
3166 
3167 static inline void
3168 __instr_regwr_rhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3169 {
3170 	uint64_t *regarray, idx, src;
3171 
3172 	TRACE("[Thread %2u] regwr (r[h] = i)\n", p->thread_id);
3173 
3174 	regarray = instr_regarray_regarray(p, ip);
3175 	idx = instr_regarray_idx_nbo(p, t, ip);
3176 	src = ip->regarray.dstsrc_val;
3177 	regarray[idx] = src;
3178 }
3179 
3180 static inline void
3181 __instr_regwr_rmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3182 {
3183 	uint64_t *regarray, idx, src;
3184 
3185 	TRACE("[Thread %2u] regwr (r[m] = i)\n", p->thread_id);
3186 
3187 	regarray = instr_regarray_regarray(p, ip);
3188 	idx = instr_regarray_idx_hbo(p, t, ip);
3189 	src = ip->regarray.dstsrc_val;
3190 	regarray[idx] = src;
3191 }
3192 
3193 static inline void
3194 __instr_regwr_rih_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3195 {
3196 	uint64_t *regarray, idx, src;
3197 
3198 	TRACE("[Thread %2u] regwr (r[i] = h)\n", p->thread_id);
3199 
3200 	regarray = instr_regarray_regarray(p, ip);
3201 	idx = instr_regarray_idx_imm(p, ip);
3202 	src = instr_regarray_src_nbo(t, ip);
3203 	regarray[idx] = src;
3204 }
3205 
3206 static inline void
3207 __instr_regwr_rim_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3208 {
3209 	uint64_t *regarray, idx, src;
3210 
3211 	TRACE("[Thread %2u] regwr (r[i] = m)\n", p->thread_id);
3212 
3213 	regarray = instr_regarray_regarray(p, ip);
3214 	idx = instr_regarray_idx_imm(p, ip);
3215 	src = instr_regarray_src_hbo(t, ip);
3216 	regarray[idx] = src;
3217 }
3218 
3219 static inline void
3220 __instr_regwr_rii_exec(struct rte_swx_pipeline *p,
3221 		       struct thread *t __rte_unused,
3222 		       const struct instruction *ip)
3223 {
3224 	uint64_t *regarray, idx, src;
3225 
3226 	TRACE("[Thread %2u] regwr (r[i] = i)\n", p->thread_id);
3227 
3228 	regarray = instr_regarray_regarray(p, ip);
3229 	idx = instr_regarray_idx_imm(p, ip);
3230 	src = ip->regarray.dstsrc_val;
3231 	regarray[idx] = src;
3232 }
3233 
3234 static inline void
3235 __instr_regadd_rhh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3236 {
3237 	uint64_t *regarray, idx, src;
3238 
3239 	TRACE("[Thread %2u] regadd (r[h] += h)\n", p->thread_id);
3240 
3241 	regarray = instr_regarray_regarray(p, ip);
3242 	idx = instr_regarray_idx_nbo(p, t, ip);
3243 	src = instr_regarray_src_nbo(t, ip);
3244 	regarray[idx] += src;
3245 }
3246 
3247 static inline void
3248 __instr_regadd_rhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3249 {
3250 	uint64_t *regarray, idx, src;
3251 
3252 	TRACE("[Thread %2u] regadd (r[h] += m)\n", p->thread_id);
3253 
3254 	regarray = instr_regarray_regarray(p, ip);
3255 	idx = instr_regarray_idx_nbo(p, t, ip);
3256 	src = instr_regarray_src_hbo(t, ip);
3257 	regarray[idx] += src;
3258 }
3259 
3260 static inline void
3261 __instr_regadd_rmh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3262 {
3263 	uint64_t *regarray, idx, src;
3264 
3265 	TRACE("[Thread %2u] regadd (r[m] += h)\n", p->thread_id);
3266 
3267 	regarray = instr_regarray_regarray(p, ip);
3268 	idx = instr_regarray_idx_hbo(p, t, ip);
3269 	src = instr_regarray_src_nbo(t, ip);
3270 	regarray[idx] += src;
3271 }
3272 
3273 static inline void
3274 __instr_regadd_rmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3275 {
3276 	uint64_t *regarray, idx, src;
3277 
3278 	TRACE("[Thread %2u] regadd (r[m] += m)\n", p->thread_id);
3279 
3280 	regarray = instr_regarray_regarray(p, ip);
3281 	idx = instr_regarray_idx_hbo(p, t, ip);
3282 	src = instr_regarray_src_hbo(t, ip);
3283 	regarray[idx] += src;
3284 }
3285 
3286 static inline void
3287 __instr_regadd_rhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3288 {
3289 	uint64_t *regarray, idx, src;
3290 
3291 	TRACE("[Thread %2u] regadd (r[h] += i)\n", p->thread_id);
3292 
3293 	regarray = instr_regarray_regarray(p, ip);
3294 	idx = instr_regarray_idx_nbo(p, t, ip);
3295 	src = ip->regarray.dstsrc_val;
3296 	regarray[idx] += src;
3297 }
3298 
3299 static inline void
3300 __instr_regadd_rmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3301 {
3302 	uint64_t *regarray, idx, src;
3303 
3304 	TRACE("[Thread %2u] regadd (r[m] += i)\n", p->thread_id);
3305 
3306 	regarray = instr_regarray_regarray(p, ip);
3307 	idx = instr_regarray_idx_hbo(p, t, ip);
3308 	src = ip->regarray.dstsrc_val;
3309 	regarray[idx] += src;
3310 }
3311 
3312 static inline void
3313 __instr_regadd_rih_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3314 {
3315 	uint64_t *regarray, idx, src;
3316 
3317 	TRACE("[Thread %2u] regadd (r[i] += h)\n", p->thread_id);
3318 
3319 	regarray = instr_regarray_regarray(p, ip);
3320 	idx = instr_regarray_idx_imm(p, ip);
3321 	src = instr_regarray_src_nbo(t, ip);
3322 	regarray[idx] += src;
3323 }
3324 
3325 static inline void
3326 __instr_regadd_rim_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3327 {
3328 	uint64_t *regarray, idx, src;
3329 
3330 	TRACE("[Thread %2u] regadd (r[i] += m)\n", p->thread_id);
3331 
3332 	regarray = instr_regarray_regarray(p, ip);
3333 	idx = instr_regarray_idx_imm(p, ip);
3334 	src = instr_regarray_src_hbo(t, ip);
3335 	regarray[idx] += src;
3336 }
3337 
3338 static inline void
3339 __instr_regadd_rii_exec(struct rte_swx_pipeline *p,
3340 			struct thread *t __rte_unused,
3341 			const struct instruction *ip)
3342 {
3343 	uint64_t *regarray, idx, src;
3344 
3345 	TRACE("[Thread %2u] regadd (r[i] += i)\n", p->thread_id);
3346 
3347 	regarray = instr_regarray_regarray(p, ip);
3348 	idx = instr_regarray_idx_imm(p, ip);
3349 	src = ip->regarray.dstsrc_val;
3350 	regarray[idx] += src;
3351 }
3352 
3353 /*
3354  * metarray.
3355  */
3356 static inline struct meter *
3357 instr_meter_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3358 {
3359 	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
3360 
3361 	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
3362 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
3363 	uint64_t idx64 = *idx64_ptr;
3364 	uint64_t idx64_mask = UINT64_MAX >> (64 - (ip)->meter.idx.n_bits);
3365 	uint64_t idx = idx64 & idx64_mask & r->size_mask;
3366 
3367 	return &r->metarray[idx];
3368 }
3369 
3370 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3371 
3372 static inline struct meter *
3373 instr_meter_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3374 {
3375 	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
3376 
3377 	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
3378 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
3379 	uint64_t idx64 = *idx64_ptr;
3380 	uint64_t idx = (ntoh64(idx64) >> (64 - ip->meter.idx.n_bits)) & r->size_mask;
3381 
3382 	return &r->metarray[idx];
3383 }
3384 
3385 #else
3386 
3387 #define instr_meter_idx_nbo instr_meter_idx_hbo
3388 
3389 #endif
3390 
3391 static inline struct meter *
3392 instr_meter_idx_imm(struct rte_swx_pipeline *p, const struct instruction *ip)
3393 {
3394 	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
3395 
3396 	uint64_t idx =  ip->meter.idx_val & r->size_mask;
3397 
3398 	return &r->metarray[idx];
3399 }
3400 
3401 static inline uint32_t
3402 instr_meter_length_hbo(struct thread *t, const struct instruction *ip)
3403 {
3404 	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
3405 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
3406 	uint64_t src64 = *src64_ptr;
3407 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->meter.length.n_bits);
3408 	uint64_t src = src64 & src64_mask;
3409 
3410 	return (uint32_t)src;
3411 }
3412 
3413 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3414 
3415 static inline uint32_t
3416 instr_meter_length_nbo(struct thread *t, const struct instruction *ip)
3417 {
3418 	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
3419 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
3420 	uint64_t src64 = *src64_ptr;
3421 	uint64_t src = ntoh64(src64) >> (64 - ip->meter.length.n_bits);
3422 
3423 	return (uint32_t)src;
3424 }
3425 
3426 #else
3427 
3428 #define instr_meter_length_nbo instr_meter_length_hbo
3429 
3430 #endif
3431 
3432 static inline enum rte_color
3433 instr_meter_color_in_hbo(struct thread *t, const struct instruction *ip)
3434 {
3435 	uint8_t *src_struct = t->structs[ip->meter.color_in.struct_id];
3436 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.color_in.offset];
3437 	uint64_t src64 = *src64_ptr;
3438 	uint64_t src64_mask = UINT64_MAX >> (64 - ip->meter.color_in.n_bits);
3439 	uint64_t src = src64 & src64_mask;
3440 
3441 	return (enum rte_color)src;
3442 }
3443 
3444 static inline void
3445 instr_meter_color_out_hbo_set(struct thread *t,
3446 			      const struct instruction *ip,
3447 			      enum rte_color color_out)
3448 {
3449 	uint8_t *dst_struct = t->structs[ip->meter.color_out.struct_id];
3450 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->meter.color_out.offset];
3451 	uint64_t dst64 = *dst64_ptr;
3452 	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->meter.color_out.n_bits);
3453 
3454 	uint64_t src = (uint64_t)color_out;
3455 
3456 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
3457 }
3458 
3459 static inline void
3460 __instr_metprefetch_h_exec(struct rte_swx_pipeline *p,
3461 			   struct thread *t,
3462 			   const struct instruction *ip)
3463 {
3464 	struct meter *m;
3465 
3466 	TRACE("[Thread %2u] metprefetch (h)\n", p->thread_id);
3467 
3468 	m = instr_meter_idx_nbo(p, t, ip);
3469 	rte_prefetch0(m);
3470 }
3471 
3472 static inline void
3473 __instr_metprefetch_m_exec(struct rte_swx_pipeline *p,
3474 			   struct thread *t,
3475 			   const struct instruction *ip)
3476 {
3477 	struct meter *m;
3478 
3479 	TRACE("[Thread %2u] metprefetch (m)\n", p->thread_id);
3480 
3481 	m = instr_meter_idx_hbo(p, t, ip);
3482 	rte_prefetch0(m);
3483 }
3484 
3485 static inline void
3486 __instr_metprefetch_i_exec(struct rte_swx_pipeline *p,
3487 			   struct thread *t __rte_unused,
3488 			   const struct instruction *ip)
3489 {
3490 	struct meter *m;
3491 
3492 	TRACE("[Thread %2u] metprefetch (i)\n", p->thread_id);
3493 
3494 	m = instr_meter_idx_imm(p, ip);
3495 	rte_prefetch0(m);
3496 }
3497 
3498 static inline void
3499 __instr_meter_hhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3500 {
3501 	struct meter *m;
3502 	uint64_t time, n_pkts, n_bytes;
3503 	uint32_t length;
3504 	enum rte_color color_in, color_out;
3505 
3506 	TRACE("[Thread %2u] meter (hhm)\n", p->thread_id);
3507 
3508 	m = instr_meter_idx_nbo(p, t, ip);
3509 	rte_prefetch0(m->n_pkts);
3510 	time = rte_get_tsc_cycles();
3511 	length = instr_meter_length_nbo(t, ip);
3512 	color_in = instr_meter_color_in_hbo(t, ip);
3513 
3514 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3515 		&m->profile->profile,
3516 		time,
3517 		length,
3518 		color_in);
3519 
3520 	color_out &= m->color_mask;
3521 
3522 	n_pkts = m->n_pkts[color_out];
3523 	n_bytes = m->n_bytes[color_out];
3524 
3525 	instr_meter_color_out_hbo_set(t, ip, color_out);
3526 
3527 	m->n_pkts[color_out] = n_pkts + 1;
3528 	m->n_bytes[color_out] = n_bytes + length;
3529 }
3530 
3531 static inline void
3532 __instr_meter_hhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3533 {
3534 	struct meter *m;
3535 	uint64_t time, n_pkts, n_bytes;
3536 	uint32_t length;
3537 	enum rte_color color_in, color_out;
3538 
3539 	TRACE("[Thread %2u] meter (hhi)\n", p->thread_id);
3540 
3541 	m = instr_meter_idx_nbo(p, t, ip);
3542 	rte_prefetch0(m->n_pkts);
3543 	time = rte_get_tsc_cycles();
3544 	length = instr_meter_length_nbo(t, ip);
3545 	color_in = (enum rte_color)ip->meter.color_in_val;
3546 
3547 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3548 		&m->profile->profile,
3549 		time,
3550 		length,
3551 		color_in);
3552 
3553 	color_out &= m->color_mask;
3554 
3555 	n_pkts = m->n_pkts[color_out];
3556 	n_bytes = m->n_bytes[color_out];
3557 
3558 	instr_meter_color_out_hbo_set(t, ip, color_out);
3559 
3560 	m->n_pkts[color_out] = n_pkts + 1;
3561 	m->n_bytes[color_out] = n_bytes + length;
3562 }
3563 
3564 static inline void
3565 __instr_meter_hmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3566 {
3567 	struct meter *m;
3568 	uint64_t time, n_pkts, n_bytes;
3569 	uint32_t length;
3570 	enum rte_color color_in, color_out;
3571 
3572 	TRACE("[Thread %2u] meter (hmm)\n", p->thread_id);
3573 
3574 	m = instr_meter_idx_nbo(p, t, ip);
3575 	rte_prefetch0(m->n_pkts);
3576 	time = rte_get_tsc_cycles();
3577 	length = instr_meter_length_hbo(t, ip);
3578 	color_in = instr_meter_color_in_hbo(t, ip);
3579 
3580 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3581 		&m->profile->profile,
3582 		time,
3583 		length,
3584 		color_in);
3585 
3586 	color_out &= m->color_mask;
3587 
3588 	n_pkts = m->n_pkts[color_out];
3589 	n_bytes = m->n_bytes[color_out];
3590 
3591 	instr_meter_color_out_hbo_set(t, ip, color_out);
3592 
3593 	m->n_pkts[color_out] = n_pkts + 1;
3594 	m->n_bytes[color_out] = n_bytes + length;
3595 }
3596 
3597 static inline void
3598 __instr_meter_hmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3599 {
3600 	struct meter *m;
3601 	uint64_t time, n_pkts, n_bytes;
3602 	uint32_t length;
3603 	enum rte_color color_in, color_out;
3604 
3605 	TRACE("[Thread %2u] meter (hmi)\n", p->thread_id);
3606 
3607 	m = instr_meter_idx_nbo(p, t, ip);
3608 	rte_prefetch0(m->n_pkts);
3609 	time = rte_get_tsc_cycles();
3610 	length = instr_meter_length_hbo(t, ip);
3611 	color_in = (enum rte_color)ip->meter.color_in_val;
3612 
3613 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3614 		&m->profile->profile,
3615 		time,
3616 		length,
3617 		color_in);
3618 
3619 	color_out &= m->color_mask;
3620 
3621 	n_pkts = m->n_pkts[color_out];
3622 	n_bytes = m->n_bytes[color_out];
3623 
3624 	instr_meter_color_out_hbo_set(t, ip, color_out);
3625 
3626 	m->n_pkts[color_out] = n_pkts + 1;
3627 	m->n_bytes[color_out] = n_bytes + length;
3628 }
3629 
3630 static inline void
3631 __instr_meter_mhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3632 {
3633 	struct meter *m;
3634 	uint64_t time, n_pkts, n_bytes;
3635 	uint32_t length;
3636 	enum rte_color color_in, color_out;
3637 
3638 	TRACE("[Thread %2u] meter (mhm)\n", p->thread_id);
3639 
3640 	m = instr_meter_idx_hbo(p, t, ip);
3641 	rte_prefetch0(m->n_pkts);
3642 	time = rte_get_tsc_cycles();
3643 	length = instr_meter_length_nbo(t, ip);
3644 	color_in = instr_meter_color_in_hbo(t, ip);
3645 
3646 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3647 		&m->profile->profile,
3648 		time,
3649 		length,
3650 		color_in);
3651 
3652 	color_out &= m->color_mask;
3653 
3654 	n_pkts = m->n_pkts[color_out];
3655 	n_bytes = m->n_bytes[color_out];
3656 
3657 	instr_meter_color_out_hbo_set(t, ip, color_out);
3658 
3659 	m->n_pkts[color_out] = n_pkts + 1;
3660 	m->n_bytes[color_out] = n_bytes + length;
3661 }
3662 
3663 static inline void
3664 __instr_meter_mhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3665 {
3666 	struct meter *m;
3667 	uint64_t time, n_pkts, n_bytes;
3668 	uint32_t length;
3669 	enum rte_color color_in, color_out;
3670 
3671 	TRACE("[Thread %2u] meter (mhi)\n", p->thread_id);
3672 
3673 	m = instr_meter_idx_hbo(p, t, ip);
3674 	rte_prefetch0(m->n_pkts);
3675 	time = rte_get_tsc_cycles();
3676 	length = instr_meter_length_nbo(t, ip);
3677 	color_in = (enum rte_color)ip->meter.color_in_val;
3678 
3679 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3680 		&m->profile->profile,
3681 		time,
3682 		length,
3683 		color_in);
3684 
3685 	color_out &= m->color_mask;
3686 
3687 	n_pkts = m->n_pkts[color_out];
3688 	n_bytes = m->n_bytes[color_out];
3689 
3690 	instr_meter_color_out_hbo_set(t, ip, color_out);
3691 
3692 	m->n_pkts[color_out] = n_pkts + 1;
3693 	m->n_bytes[color_out] = n_bytes + length;
3694 }
3695 
3696 static inline void
3697 __instr_meter_mmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3698 {
3699 	struct meter *m;
3700 	uint64_t time, n_pkts, n_bytes;
3701 	uint32_t length;
3702 	enum rte_color color_in, color_out;
3703 
3704 	TRACE("[Thread %2u] meter (mmm)\n", p->thread_id);
3705 
3706 	m = instr_meter_idx_hbo(p, t, ip);
3707 	rte_prefetch0(m->n_pkts);
3708 	time = rte_get_tsc_cycles();
3709 	length = instr_meter_length_hbo(t, ip);
3710 	color_in = instr_meter_color_in_hbo(t, ip);
3711 
3712 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3713 		&m->profile->profile,
3714 		time,
3715 		length,
3716 		color_in);
3717 
3718 	color_out &= m->color_mask;
3719 
3720 	n_pkts = m->n_pkts[color_out];
3721 	n_bytes = m->n_bytes[color_out];
3722 
3723 	instr_meter_color_out_hbo_set(t, ip, color_out);
3724 
3725 	m->n_pkts[color_out] = n_pkts + 1;
3726 	m->n_bytes[color_out] = n_bytes + length;
3727 }
3728 
3729 static inline void
3730 __instr_meter_mmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3731 {
3732 	struct meter *m;
3733 	uint64_t time, n_pkts, n_bytes;
3734 	uint32_t length;
3735 	enum rte_color color_in, color_out;
3736 
3737 	TRACE("[Thread %2u] meter (mmi)\n", p->thread_id);
3738 
3739 	m = instr_meter_idx_hbo(p, t, ip);
3740 	rte_prefetch0(m->n_pkts);
3741 	time = rte_get_tsc_cycles();
3742 	length = instr_meter_length_hbo(t, ip);
3743 	color_in = (enum rte_color)ip->meter.color_in_val;
3744 
3745 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3746 		&m->profile->profile,
3747 		time,
3748 		length,
3749 		color_in);
3750 
3751 	color_out &= m->color_mask;
3752 
3753 	n_pkts = m->n_pkts[color_out];
3754 	n_bytes = m->n_bytes[color_out];
3755 
3756 	instr_meter_color_out_hbo_set(t, ip, color_out);
3757 
3758 	m->n_pkts[color_out] = n_pkts + 1;
3759 	m->n_bytes[color_out] = n_bytes + length;
3760 }
3761 
3762 static inline void
3763 __instr_meter_ihm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3764 {
3765 	struct meter *m;
3766 	uint64_t time, n_pkts, n_bytes;
3767 	uint32_t length;
3768 	enum rte_color color_in, color_out;
3769 
3770 	TRACE("[Thread %2u] meter (ihm)\n", p->thread_id);
3771 
3772 	m = instr_meter_idx_imm(p, ip);
3773 	rte_prefetch0(m->n_pkts);
3774 	time = rte_get_tsc_cycles();
3775 	length = instr_meter_length_nbo(t, ip);
3776 	color_in = instr_meter_color_in_hbo(t, ip);
3777 
3778 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3779 		&m->profile->profile,
3780 		time,
3781 		length,
3782 		color_in);
3783 
3784 	color_out &= m->color_mask;
3785 
3786 	n_pkts = m->n_pkts[color_out];
3787 	n_bytes = m->n_bytes[color_out];
3788 
3789 	instr_meter_color_out_hbo_set(t, ip, color_out);
3790 
3791 	m->n_pkts[color_out] = n_pkts + 1;
3792 	m->n_bytes[color_out] = n_bytes + length;
3793 }
3794 
3795 static inline void
3796 __instr_meter_ihi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3797 {
3798 	struct meter *m;
3799 	uint64_t time, n_pkts, n_bytes;
3800 	uint32_t length;
3801 	enum rte_color color_in, color_out;
3802 
3803 	TRACE("[Thread %2u] meter (ihi)\n", p->thread_id);
3804 
3805 	m = instr_meter_idx_imm(p, ip);
3806 	rte_prefetch0(m->n_pkts);
3807 	time = rte_get_tsc_cycles();
3808 	length = instr_meter_length_nbo(t, ip);
3809 	color_in = (enum rte_color)ip->meter.color_in_val;
3810 
3811 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3812 		&m->profile->profile,
3813 		time,
3814 		length,
3815 		color_in);
3816 
3817 	color_out &= m->color_mask;
3818 
3819 	n_pkts = m->n_pkts[color_out];
3820 	n_bytes = m->n_bytes[color_out];
3821 
3822 	instr_meter_color_out_hbo_set(t, ip, color_out);
3823 
3824 	m->n_pkts[color_out] = n_pkts + 1;
3825 	m->n_bytes[color_out] = n_bytes + length;
3826 }
3827 
3828 static inline void
3829 __instr_meter_imm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3830 {
3831 	struct meter *m;
3832 	uint64_t time, n_pkts, n_bytes;
3833 	uint32_t length;
3834 	enum rte_color color_in, color_out;
3835 
3836 	TRACE("[Thread %2u] meter (imm)\n", p->thread_id);
3837 
3838 	m = instr_meter_idx_imm(p, ip);
3839 	rte_prefetch0(m->n_pkts);
3840 	time = rte_get_tsc_cycles();
3841 	length = instr_meter_length_hbo(t, ip);
3842 	color_in = instr_meter_color_in_hbo(t, ip);
3843 
3844 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3845 		&m->profile->profile,
3846 		time,
3847 		length,
3848 		color_in);
3849 
3850 	color_out &= m->color_mask;
3851 
3852 	n_pkts = m->n_pkts[color_out];
3853 	n_bytes = m->n_bytes[color_out];
3854 
3855 	instr_meter_color_out_hbo_set(t, ip, color_out);
3856 
3857 	m->n_pkts[color_out] = n_pkts + 1;
3858 	m->n_bytes[color_out] = n_bytes + length;
3859 }
3860 
3861 static inline void
3862 __instr_meter_imi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3863 {
3864 	struct meter *m;
3865 	uint64_t time, n_pkts, n_bytes;
3866 	uint32_t length;
3867 	enum rte_color color_in, color_out;
3868 
3869 	TRACE("[Thread %2u] meter (imi)\n", p->thread_id);
3870 
3871 	m = instr_meter_idx_imm(p, ip);
3872 	rte_prefetch0(m->n_pkts);
3873 	time = rte_get_tsc_cycles();
3874 	length = instr_meter_length_hbo(t, ip);
3875 	color_in = (enum rte_color)ip->meter.color_in_val;
3876 
3877 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3878 		&m->profile->profile,
3879 		time,
3880 		length,
3881 		color_in);
3882 
3883 	color_out &= m->color_mask;
3884 
3885 	n_pkts = m->n_pkts[color_out];
3886 	n_bytes = m->n_bytes[color_out];
3887 
3888 	instr_meter_color_out_hbo_set(t, ip, color_out);
3889 
3890 	m->n_pkts[color_out] = n_pkts + 1;
3891 	m->n_bytes[color_out] = n_bytes + length;
3892 }
3893 
3894 #endif
3895