xref: /dpdk/lib/pipeline/rte_swx_pipeline_internal.h (revision 65c2bbf41f2258fea8e1639a86598f48d8251756)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2021 Intel Corporation
3  */
4 #ifndef __INCLUDE_RTE_SWX_PIPELINE_INTERNAL_H__
5 #define __INCLUDE_RTE_SWX_PIPELINE_INTERNAL_H__
6 
7 #include <inttypes.h>
8 #include <string.h>
9 #include <sys/queue.h>
10 
11 #include <rte_byteorder.h>
12 #include <rte_common.h>
13 #include <rte_cycles.h>
14 #include <rte_prefetch.h>
15 #include <rte_meter.h>
16 
17 #include <rte_swx_table_selector.h>
18 #include <rte_swx_table_learner.h>
19 #include <rte_swx_pipeline.h>
20 #include <rte_swx_ctl.h>
21 
22 #ifndef TRACE_LEVEL
23 #define TRACE_LEVEL 0
24 #endif
25 
26 #if TRACE_LEVEL
27 #define TRACE(...) printf(__VA_ARGS__)
28 #else
29 #define TRACE(...)
30 #endif
31 
32 /*
33  * Environment.
34  */
35 #define ntoh64(x) rte_be_to_cpu_64(x)
36 #define hton64(x) rte_cpu_to_be_64(x)
37 
38 /*
39  * Struct.
40  */
41 struct field {
42 	char name[RTE_SWX_NAME_SIZE];
43 	uint32_t n_bits;
44 	uint32_t offset;
45 	int var_size;
46 };
47 
48 struct struct_type {
49 	TAILQ_ENTRY(struct_type) node;
50 	char name[RTE_SWX_NAME_SIZE];
51 	struct field *fields;
52 	uint32_t n_fields;
53 	uint32_t n_bits;
54 	uint32_t n_bits_min;
55 	int var_size;
56 };
57 
58 TAILQ_HEAD(struct_type_tailq, struct_type);
59 
60 /*
61  * Input port.
62  */
63 struct port_in_type {
64 	TAILQ_ENTRY(port_in_type) node;
65 	char name[RTE_SWX_NAME_SIZE];
66 	struct rte_swx_port_in_ops ops;
67 };
68 
69 TAILQ_HEAD(port_in_type_tailq, port_in_type);
70 
71 struct port_in {
72 	TAILQ_ENTRY(port_in) node;
73 	struct port_in_type *type;
74 	void *obj;
75 	uint32_t id;
76 };
77 
78 TAILQ_HEAD(port_in_tailq, port_in);
79 
80 struct port_in_runtime {
81 	rte_swx_port_in_pkt_rx_t pkt_rx;
82 	void *obj;
83 };
84 
85 /*
86  * Output port.
87  */
88 struct port_out_type {
89 	TAILQ_ENTRY(port_out_type) node;
90 	char name[RTE_SWX_NAME_SIZE];
91 	struct rte_swx_port_out_ops ops;
92 };
93 
94 TAILQ_HEAD(port_out_type_tailq, port_out_type);
95 
96 struct port_out {
97 	TAILQ_ENTRY(port_out) node;
98 	struct port_out_type *type;
99 	void *obj;
100 	uint32_t id;
101 };
102 
103 TAILQ_HEAD(port_out_tailq, port_out);
104 
105 struct port_out_runtime {
106 	rte_swx_port_out_pkt_tx_t pkt_tx;
107 	rte_swx_port_out_flush_t flush;
108 	void *obj;
109 };
110 
111 /*
112  * Extern object.
113  */
114 struct extern_type_member_func {
115 	TAILQ_ENTRY(extern_type_member_func) node;
116 	char name[RTE_SWX_NAME_SIZE];
117 	rte_swx_extern_type_member_func_t func;
118 	uint32_t id;
119 };
120 
121 TAILQ_HEAD(extern_type_member_func_tailq, extern_type_member_func);
122 
123 struct extern_type {
124 	TAILQ_ENTRY(extern_type) node;
125 	char name[RTE_SWX_NAME_SIZE];
126 	struct struct_type *mailbox_struct_type;
127 	rte_swx_extern_type_constructor_t constructor;
128 	rte_swx_extern_type_destructor_t destructor;
129 	struct extern_type_member_func_tailq funcs;
130 	uint32_t n_funcs;
131 };
132 
133 TAILQ_HEAD(extern_type_tailq, extern_type);
134 
135 struct extern_obj {
136 	TAILQ_ENTRY(extern_obj) node;
137 	char name[RTE_SWX_NAME_SIZE];
138 	struct extern_type *type;
139 	void *obj;
140 	uint32_t struct_id;
141 	uint32_t id;
142 };
143 
144 TAILQ_HEAD(extern_obj_tailq, extern_obj);
145 
146 #ifndef RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX
147 #define RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX 8
148 #endif
149 
150 struct extern_obj_runtime {
151 	void *obj;
152 	uint8_t *mailbox;
153 	rte_swx_extern_type_member_func_t funcs[RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX];
154 };
155 
156 /*
157  * Extern function.
158  */
159 struct extern_func {
160 	TAILQ_ENTRY(extern_func) node;
161 	char name[RTE_SWX_NAME_SIZE];
162 	struct struct_type *mailbox_struct_type;
163 	rte_swx_extern_func_t func;
164 	uint32_t struct_id;
165 	uint32_t id;
166 };
167 
168 TAILQ_HEAD(extern_func_tailq, extern_func);
169 
170 struct extern_func_runtime {
171 	uint8_t *mailbox;
172 	rte_swx_extern_func_t func;
173 };
174 
175 /*
176  * Header.
177  */
178 struct header {
179 	TAILQ_ENTRY(header) node;
180 	char name[RTE_SWX_NAME_SIZE];
181 	struct struct_type *st;
182 	uint32_t struct_id;
183 	uint32_t id;
184 };
185 
186 TAILQ_HEAD(header_tailq, header);
187 
188 struct header_runtime {
189 	uint8_t *ptr0;
190 	uint32_t n_bytes;
191 };
192 
193 struct header_out_runtime {
194 	uint8_t *ptr0;
195 	uint8_t *ptr;
196 	uint32_t n_bytes;
197 };
198 
199 /*
200  * Instruction.
201  */
202 
203 /* Packet headers are always in Network Byte Order (NBO), i.e. big endian.
204  * Packet meta-data fields are always assumed to be in Host Byte Order (HBO).
205  * Table entry fields can be in either NBO or HBO; they are assumed to be in HBO
206  * when transferred to packet meta-data and in NBO when transferred to packet
207  * headers.
208  */
209 
210 /* Notation conventions:
211  *    -Header field: H = h.header.field (dst/src)
212  *    -Meta-data field: M = m.field (dst/src)
213  *    -Extern object mailbox field: E = e.field (dst/src)
214  *    -Extern function mailbox field: F = f.field (dst/src)
215  *    -Table action data field: T = t.field (src only)
216  *    -Immediate value: I = 32-bit unsigned value (src only)
217  */
218 
219 enum instruction_type {
220 	/* rx m.port_in */
221 	INSTR_RX,
222 
223 	/* tx port_out
224 	 * port_out = MI
225 	 */
226 	INSTR_TX,   /* port_out = M */
227 	INSTR_TX_I, /* port_out = I */
228 
229 	/* extract h.header */
230 	INSTR_HDR_EXTRACT,
231 	INSTR_HDR_EXTRACT2,
232 	INSTR_HDR_EXTRACT3,
233 	INSTR_HDR_EXTRACT4,
234 	INSTR_HDR_EXTRACT5,
235 	INSTR_HDR_EXTRACT6,
236 	INSTR_HDR_EXTRACT7,
237 	INSTR_HDR_EXTRACT8,
238 
239 	/* extract h.header m.last_field_size */
240 	INSTR_HDR_EXTRACT_M,
241 
242 	/* lookahead h.header */
243 	INSTR_HDR_LOOKAHEAD,
244 
245 	/* emit h.header */
246 	INSTR_HDR_EMIT,
247 	INSTR_HDR_EMIT_TX,
248 	INSTR_HDR_EMIT2_TX,
249 	INSTR_HDR_EMIT3_TX,
250 	INSTR_HDR_EMIT4_TX,
251 	INSTR_HDR_EMIT5_TX,
252 	INSTR_HDR_EMIT6_TX,
253 	INSTR_HDR_EMIT7_TX,
254 	INSTR_HDR_EMIT8_TX,
255 
256 	/* validate h.header */
257 	INSTR_HDR_VALIDATE,
258 
259 	/* invalidate h.header */
260 	INSTR_HDR_INVALIDATE,
261 
262 	/* mov dst src
263 	 * dst = src
264 	 * dst = HMEF, src = HMEFTI
265 	 */
266 	INSTR_MOV,    /* dst = MEF, src = MEFT */
267 	INSTR_MOV_MH, /* dst = MEF, src = H */
268 	INSTR_MOV_HM, /* dst = H, src = MEFT */
269 	INSTR_MOV_HH, /* dst = H, src = H */
270 	INSTR_MOV_I,  /* dst = HMEF, src = I */
271 
272 	/* dma h.header t.field
273 	 * memcpy(h.header, t.field, sizeof(h.header))
274 	 */
275 	INSTR_DMA_HT,
276 	INSTR_DMA_HT2,
277 	INSTR_DMA_HT3,
278 	INSTR_DMA_HT4,
279 	INSTR_DMA_HT5,
280 	INSTR_DMA_HT6,
281 	INSTR_DMA_HT7,
282 	INSTR_DMA_HT8,
283 
284 	/* add dst src
285 	 * dst += src
286 	 * dst = HMEF, src = HMEFTI
287 	 */
288 	INSTR_ALU_ADD,    /* dst = MEF, src = MEF */
289 	INSTR_ALU_ADD_MH, /* dst = MEF, src = H */
290 	INSTR_ALU_ADD_HM, /* dst = H, src = MEF */
291 	INSTR_ALU_ADD_HH, /* dst = H, src = H */
292 	INSTR_ALU_ADD_MI, /* dst = MEF, src = I */
293 	INSTR_ALU_ADD_HI, /* dst = H, src = I */
294 
295 	/* sub dst src
296 	 * dst -= src
297 	 * dst = HMEF, src = HMEFTI
298 	 */
299 	INSTR_ALU_SUB,    /* dst = MEF, src = MEF */
300 	INSTR_ALU_SUB_MH, /* dst = MEF, src = H */
301 	INSTR_ALU_SUB_HM, /* dst = H, src = MEF */
302 	INSTR_ALU_SUB_HH, /* dst = H, src = H */
303 	INSTR_ALU_SUB_MI, /* dst = MEF, src = I */
304 	INSTR_ALU_SUB_HI, /* dst = H, src = I */
305 
306 	/* ckadd dst src
307 	 * dst = dst '+ src[0:1] '+ src[2:3] + ...
308 	 * dst = H, src = {H, h.header}
309 	 */
310 	INSTR_ALU_CKADD_FIELD,    /* src = H */
311 	INSTR_ALU_CKADD_STRUCT20, /* src = h.header, with sizeof(header) = 20 */
312 	INSTR_ALU_CKADD_STRUCT,   /* src = h.hdeader, with any sizeof(header) */
313 
314 	/* cksub dst src
315 	 * dst = dst '- src
316 	 * dst = H, src = H
317 	 */
318 	INSTR_ALU_CKSUB_FIELD,
319 
320 	/* and dst src
321 	 * dst &= src
322 	 * dst = HMEF, src = HMEFTI
323 	 */
324 	INSTR_ALU_AND,    /* dst = MEF, src = MEFT */
325 	INSTR_ALU_AND_MH, /* dst = MEF, src = H */
326 	INSTR_ALU_AND_HM, /* dst = H, src = MEFT */
327 	INSTR_ALU_AND_HH, /* dst = H, src = H */
328 	INSTR_ALU_AND_I,  /* dst = HMEF, src = I */
329 
330 	/* or dst src
331 	 * dst |= src
332 	 * dst = HMEF, src = HMEFTI
333 	 */
334 	INSTR_ALU_OR,    /* dst = MEF, src = MEFT */
335 	INSTR_ALU_OR_MH, /* dst = MEF, src = H */
336 	INSTR_ALU_OR_HM, /* dst = H, src = MEFT */
337 	INSTR_ALU_OR_HH, /* dst = H, src = H */
338 	INSTR_ALU_OR_I,  /* dst = HMEF, src = I */
339 
340 	/* xor dst src
341 	 * dst ^= src
342 	 * dst = HMEF, src = HMEFTI
343 	 */
344 	INSTR_ALU_XOR,    /* dst = MEF, src = MEFT */
345 	INSTR_ALU_XOR_MH, /* dst = MEF, src = H */
346 	INSTR_ALU_XOR_HM, /* dst = H, src = MEFT */
347 	INSTR_ALU_XOR_HH, /* dst = H, src = H */
348 	INSTR_ALU_XOR_I,  /* dst = HMEF, src = I */
349 
350 	/* shl dst src
351 	 * dst <<= src
352 	 * dst = HMEF, src = HMEFTI
353 	 */
354 	INSTR_ALU_SHL,    /* dst = MEF, src = MEF */
355 	INSTR_ALU_SHL_MH, /* dst = MEF, src = H */
356 	INSTR_ALU_SHL_HM, /* dst = H, src = MEF */
357 	INSTR_ALU_SHL_HH, /* dst = H, src = H */
358 	INSTR_ALU_SHL_MI, /* dst = MEF, src = I */
359 	INSTR_ALU_SHL_HI, /* dst = H, src = I */
360 
361 	/* shr dst src
362 	 * dst >>= src
363 	 * dst = HMEF, src = HMEFTI
364 	 */
365 	INSTR_ALU_SHR,    /* dst = MEF, src = MEF */
366 	INSTR_ALU_SHR_MH, /* dst = MEF, src = H */
367 	INSTR_ALU_SHR_HM, /* dst = H, src = MEF */
368 	INSTR_ALU_SHR_HH, /* dst = H, src = H */
369 	INSTR_ALU_SHR_MI, /* dst = MEF, src = I */
370 	INSTR_ALU_SHR_HI, /* dst = H, src = I */
371 
372 	/* regprefetch REGARRAY index
373 	 * prefetch REGARRAY[index]
374 	 * index = HMEFTI
375 	 */
376 	INSTR_REGPREFETCH_RH, /* index = H */
377 	INSTR_REGPREFETCH_RM, /* index = MEFT */
378 	INSTR_REGPREFETCH_RI, /* index = I */
379 
380 	/* regrd dst REGARRAY index
381 	 * dst = REGARRAY[index]
382 	 * dst = HMEF, index = HMEFTI
383 	 */
384 	INSTR_REGRD_HRH, /* dst = H, index = H */
385 	INSTR_REGRD_HRM, /* dst = H, index = MEFT */
386 	INSTR_REGRD_HRI, /* dst = H, index = I */
387 	INSTR_REGRD_MRH, /* dst = MEF, index = H */
388 	INSTR_REGRD_MRM, /* dst = MEF, index = MEFT */
389 	INSTR_REGRD_MRI, /* dst = MEF, index = I */
390 
391 	/* regwr REGARRAY index src
392 	 * REGARRAY[index] = src
393 	 * index = HMEFTI, src = HMEFTI
394 	 */
395 	INSTR_REGWR_RHH, /* index = H, src = H */
396 	INSTR_REGWR_RHM, /* index = H, src = MEFT */
397 	INSTR_REGWR_RHI, /* index = H, src = I */
398 	INSTR_REGWR_RMH, /* index = MEFT, src = H */
399 	INSTR_REGWR_RMM, /* index = MEFT, src = MEFT */
400 	INSTR_REGWR_RMI, /* index = MEFT, src = I */
401 	INSTR_REGWR_RIH, /* index = I, src = H */
402 	INSTR_REGWR_RIM, /* index = I, src = MEFT */
403 	INSTR_REGWR_RII, /* index = I, src = I */
404 
405 	/* regadd REGARRAY index src
406 	 * REGARRAY[index] += src
407 	 * index = HMEFTI, src = HMEFTI
408 	 */
409 	INSTR_REGADD_RHH, /* index = H, src = H */
410 	INSTR_REGADD_RHM, /* index = H, src = MEFT */
411 	INSTR_REGADD_RHI, /* index = H, src = I */
412 	INSTR_REGADD_RMH, /* index = MEFT, src = H */
413 	INSTR_REGADD_RMM, /* index = MEFT, src = MEFT */
414 	INSTR_REGADD_RMI, /* index = MEFT, src = I */
415 	INSTR_REGADD_RIH, /* index = I, src = H */
416 	INSTR_REGADD_RIM, /* index = I, src = MEFT */
417 	INSTR_REGADD_RII, /* index = I, src = I */
418 
419 	/* metprefetch METARRAY index
420 	 * prefetch METARRAY[index]
421 	 * index = HMEFTI
422 	 */
423 	INSTR_METPREFETCH_H, /* index = H */
424 	INSTR_METPREFETCH_M, /* index = MEFT */
425 	INSTR_METPREFETCH_I, /* index = I */
426 
427 	/* meter METARRAY index length color_in color_out
428 	 * color_out = meter(METARRAY[index], length, color_in)
429 	 * index = HMEFTI, length = HMEFT, color_in = MEFTI, color_out = MEF
430 	 */
431 	INSTR_METER_HHM, /* index = H, length = H, color_in = MEFT */
432 	INSTR_METER_HHI, /* index = H, length = H, color_in = I */
433 	INSTR_METER_HMM, /* index = H, length = MEFT, color_in = MEFT */
434 	INSTR_METER_HMI, /* index = H, length = MEFT, color_in = I */
435 	INSTR_METER_MHM, /* index = MEFT, length = H, color_in = MEFT */
436 	INSTR_METER_MHI, /* index = MEFT, length = H, color_in = I */
437 	INSTR_METER_MMM, /* index = MEFT, length = MEFT, color_in = MEFT */
438 	INSTR_METER_MMI, /* index = MEFT, length = MEFT, color_in = I */
439 	INSTR_METER_IHM, /* index = I, length = H, color_in = MEFT */
440 	INSTR_METER_IHI, /* index = I, length = H, color_in = I */
441 	INSTR_METER_IMM, /* index = I, length = MEFT, color_in = MEFT */
442 	INSTR_METER_IMI, /* index = I, length = MEFT, color_in = I */
443 
444 	/* table TABLE */
445 	INSTR_TABLE,
446 	INSTR_TABLE_AF,
447 	INSTR_SELECTOR,
448 	INSTR_LEARNER,
449 	INSTR_LEARNER_AF,
450 
451 	/* learn LEARNER ACTION_NAME [ m.action_first_arg ] */
452 	INSTR_LEARNER_LEARN,
453 
454 	/* forget */
455 	INSTR_LEARNER_FORGET,
456 
457 	/* extern e.obj.func */
458 	INSTR_EXTERN_OBJ,
459 
460 	/* extern f.func */
461 	INSTR_EXTERN_FUNC,
462 
463 	/* jmp LABEL
464 	 * Unconditional jump
465 	 */
466 	INSTR_JMP,
467 
468 	/* jmpv LABEL h.header
469 	 * Jump if header is valid
470 	 */
471 	INSTR_JMP_VALID,
472 
473 	/* jmpnv LABEL h.header
474 	 * Jump if header is invalid
475 	 */
476 	INSTR_JMP_INVALID,
477 
478 	/* jmph LABEL
479 	 * Jump if table lookup hit
480 	 */
481 	INSTR_JMP_HIT,
482 
483 	/* jmpnh LABEL
484 	 * Jump if table lookup miss
485 	 */
486 	INSTR_JMP_MISS,
487 
488 	/* jmpa LABEL ACTION
489 	 * Jump if action run
490 	 */
491 	INSTR_JMP_ACTION_HIT,
492 
493 	/* jmpna LABEL ACTION
494 	 * Jump if action not run
495 	 */
496 	INSTR_JMP_ACTION_MISS,
497 
498 	/* jmpeq LABEL a b
499 	 * Jump if a is equal to b
500 	 * a = HMEFT, b = HMEFTI
501 	 */
502 	INSTR_JMP_EQ,    /* a = MEFT, b = MEFT */
503 	INSTR_JMP_EQ_MH, /* a = MEFT, b = H */
504 	INSTR_JMP_EQ_HM, /* a = H, b = MEFT */
505 	INSTR_JMP_EQ_HH, /* a = H, b = H */
506 	INSTR_JMP_EQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
507 
508 	/* jmpneq LABEL a b
509 	 * Jump if a is not equal to b
510 	 * a = HMEFT, b = HMEFTI
511 	 */
512 	INSTR_JMP_NEQ,    /* a = MEFT, b = MEFT */
513 	INSTR_JMP_NEQ_MH, /* a = MEFT, b = H */
514 	INSTR_JMP_NEQ_HM, /* a = H, b = MEFT */
515 	INSTR_JMP_NEQ_HH, /* a = H, b = H */
516 	INSTR_JMP_NEQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
517 
518 	/* jmplt LABEL a b
519 	 * Jump if a is less than b
520 	 * a = HMEFT, b = HMEFTI
521 	 */
522 	INSTR_JMP_LT,    /* a = MEFT, b = MEFT */
523 	INSTR_JMP_LT_MH, /* a = MEFT, b = H */
524 	INSTR_JMP_LT_HM, /* a = H, b = MEFT */
525 	INSTR_JMP_LT_HH, /* a = H, b = H */
526 	INSTR_JMP_LT_MI, /* a = MEFT, b = I */
527 	INSTR_JMP_LT_HI, /* a = H, b = I */
528 
529 	/* jmpgt LABEL a b
530 	 * Jump if a is greater than b
531 	 * a = HMEFT, b = HMEFTI
532 	 */
533 	INSTR_JMP_GT,    /* a = MEFT, b = MEFT */
534 	INSTR_JMP_GT_MH, /* a = MEFT, b = H */
535 	INSTR_JMP_GT_HM, /* a = H, b = MEFT */
536 	INSTR_JMP_GT_HH, /* a = H, b = H */
537 	INSTR_JMP_GT_MI, /* a = MEFT, b = I */
538 	INSTR_JMP_GT_HI, /* a = H, b = I */
539 
540 	/* return
541 	 * Return from action
542 	 */
543 	INSTR_RETURN,
544 
545 	/* Start of custom instructions. */
546 	INSTR_CUSTOM_0,
547 };
548 
549 struct instr_operand {
550 	uint8_t struct_id;
551 	uint8_t n_bits;
552 	uint8_t offset;
553 	uint8_t pad;
554 };
555 
556 struct instr_io {
557 	struct {
558 		union {
559 			struct {
560 				uint8_t offset;
561 				uint8_t n_bits;
562 				uint8_t pad[2];
563 			};
564 
565 			uint32_t val;
566 		};
567 	} io;
568 
569 	struct {
570 		uint8_t header_id[8];
571 		uint8_t struct_id[8];
572 		uint8_t n_bytes[8];
573 	} hdr;
574 };
575 
576 struct instr_hdr_validity {
577 	uint8_t header_id;
578 };
579 
580 struct instr_table {
581 	uint8_t table_id;
582 };
583 
584 struct instr_learn {
585 	uint8_t action_id;
586 	uint8_t mf_offset;
587 };
588 
589 struct instr_extern_obj {
590 	uint8_t ext_obj_id;
591 	uint8_t func_id;
592 };
593 
594 struct instr_extern_func {
595 	uint8_t ext_func_id;
596 };
597 
598 struct instr_dst_src {
599 	struct instr_operand dst;
600 	union {
601 		struct instr_operand src;
602 		uint64_t src_val;
603 	};
604 };
605 
606 struct instr_regarray {
607 	uint8_t regarray_id;
608 	uint8_t pad[3];
609 
610 	union {
611 		struct instr_operand idx;
612 		uint32_t idx_val;
613 	};
614 
615 	union {
616 		struct instr_operand dstsrc;
617 		uint64_t dstsrc_val;
618 	};
619 };
620 
621 struct instr_meter {
622 	uint8_t metarray_id;
623 	uint8_t pad[3];
624 
625 	union {
626 		struct instr_operand idx;
627 		uint32_t idx_val;
628 	};
629 
630 	struct instr_operand length;
631 
632 	union {
633 		struct instr_operand color_in;
634 		uint32_t color_in_val;
635 	};
636 
637 	struct instr_operand color_out;
638 };
639 
640 struct instr_dma {
641 	struct {
642 		uint8_t header_id[8];
643 		uint8_t struct_id[8];
644 	} dst;
645 
646 	struct {
647 		uint8_t offset[8];
648 	} src;
649 
650 	uint16_t n_bytes[8];
651 };
652 
653 struct instr_jmp {
654 	struct instruction *ip;
655 
656 	union {
657 		struct instr_operand a;
658 		uint8_t header_id;
659 		uint8_t action_id;
660 	};
661 
662 	union {
663 		struct instr_operand b;
664 		uint64_t b_val;
665 	};
666 };
667 
668 struct instruction {
669 	enum instruction_type type;
670 	union {
671 		struct instr_io io;
672 		struct instr_hdr_validity valid;
673 		struct instr_dst_src mov;
674 		struct instr_regarray regarray;
675 		struct instr_meter meter;
676 		struct instr_dma dma;
677 		struct instr_dst_src alu;
678 		struct instr_table table;
679 		struct instr_learn learn;
680 		struct instr_extern_obj ext_obj;
681 		struct instr_extern_func ext_func;
682 		struct instr_jmp jmp;
683 	};
684 };
685 
686 struct instruction_data {
687 	char label[RTE_SWX_NAME_SIZE];
688 	char jmp_label[RTE_SWX_NAME_SIZE];
689 	uint32_t n_users; /* user = jmp instruction to this instruction. */
690 	int invalid;
691 };
692 
693 typedef void (*instr_exec_t)(struct rte_swx_pipeline *);
694 
695 /*
696  * Action.
697  */
698 typedef void
699 (*action_func_t)(struct rte_swx_pipeline *p);
700 
701 struct action {
702 	TAILQ_ENTRY(action) node;
703 	char name[RTE_SWX_NAME_SIZE];
704 	struct struct_type *st;
705 	int *args_endianness; /* 0 = Host Byte Order (HBO); 1 = Network Byte Order (NBO). */
706 	struct instruction *instructions;
707 	struct instruction_data *instruction_data;
708 	uint32_t n_instructions;
709 	uint32_t id;
710 };
711 
712 TAILQ_HEAD(action_tailq, action);
713 
714 /*
715  * Table.
716  */
717 struct table_type {
718 	TAILQ_ENTRY(table_type) node;
719 	char name[RTE_SWX_NAME_SIZE];
720 	enum rte_swx_table_match_type match_type;
721 	struct rte_swx_table_ops ops;
722 };
723 
724 TAILQ_HEAD(table_type_tailq, table_type);
725 
726 struct match_field {
727 	enum rte_swx_table_match_type match_type;
728 	struct field *field;
729 };
730 
731 struct table {
732 	TAILQ_ENTRY(table) node;
733 	char name[RTE_SWX_NAME_SIZE];
734 	char args[RTE_SWX_NAME_SIZE];
735 	struct table_type *type; /* NULL when n_fields == 0. */
736 
737 	/* Match. */
738 	struct match_field *fields;
739 	uint32_t n_fields;
740 	struct header *header; /* Only valid when n_fields > 0. */
741 
742 	/* Action. */
743 	struct action **actions;
744 	struct action *default_action;
745 	uint8_t *default_action_data;
746 	uint32_t n_actions;
747 	int default_action_is_const;
748 	uint32_t action_data_size_max;
749 	int *action_is_for_table_entries;
750 	int *action_is_for_default_entry;
751 
752 	uint32_t size;
753 	uint32_t id;
754 };
755 
756 TAILQ_HEAD(table_tailq, table);
757 
758 struct table_runtime {
759 	rte_swx_table_lookup_t func;
760 	void *mailbox;
761 	uint8_t **key;
762 };
763 
764 struct table_statistics {
765 	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
766 	uint64_t *n_pkts_action;
767 };
768 
769 /*
770  * Selector.
771  */
772 struct selector {
773 	TAILQ_ENTRY(selector) node;
774 	char name[RTE_SWX_NAME_SIZE];
775 
776 	struct field *group_id_field;
777 	struct field **selector_fields;
778 	uint32_t n_selector_fields;
779 	struct header *selector_header;
780 	struct field *member_id_field;
781 
782 	uint32_t n_groups_max;
783 	uint32_t n_members_per_group_max;
784 
785 	uint32_t id;
786 };
787 
788 TAILQ_HEAD(selector_tailq, selector);
789 
790 struct selector_runtime {
791 	void *mailbox;
792 	uint8_t **group_id_buffer;
793 	uint8_t **selector_buffer;
794 	uint8_t **member_id_buffer;
795 };
796 
797 struct selector_statistics {
798 	uint64_t n_pkts;
799 };
800 
801 /*
802  * Learner table.
803  */
804 struct learner {
805 	TAILQ_ENTRY(learner) node;
806 	char name[RTE_SWX_NAME_SIZE];
807 
808 	/* Match. */
809 	struct field **fields;
810 	uint32_t n_fields;
811 	struct header *header;
812 
813 	/* Action. */
814 	struct action **actions;
815 	struct action *default_action;
816 	uint8_t *default_action_data;
817 	uint32_t n_actions;
818 	int default_action_is_const;
819 	uint32_t action_data_size_max;
820 	int *action_is_for_table_entries;
821 	int *action_is_for_default_entry;
822 
823 	uint32_t size;
824 	uint32_t timeout;
825 	uint32_t id;
826 };
827 
828 TAILQ_HEAD(learner_tailq, learner);
829 
830 struct learner_runtime {
831 	void *mailbox;
832 	uint8_t **key;
833 };
834 
835 struct learner_statistics {
836 	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
837 	uint64_t n_pkts_learn[2]; /* 0 = Learn OK, 1 = Learn error. */
838 	uint64_t n_pkts_forget;
839 	uint64_t *n_pkts_action;
840 };
841 
842 /*
843  * Register array.
844  */
845 struct regarray {
846 	TAILQ_ENTRY(regarray) node;
847 	char name[RTE_SWX_NAME_SIZE];
848 	uint64_t init_val;
849 	uint32_t size;
850 	uint32_t id;
851 };
852 
853 TAILQ_HEAD(regarray_tailq, regarray);
854 
855 struct regarray_runtime {
856 	uint64_t *regarray;
857 	uint32_t size_mask;
858 };
859 
860 /*
861  * Meter array.
862  */
863 struct meter_profile {
864 	TAILQ_ENTRY(meter_profile) node;
865 	char name[RTE_SWX_NAME_SIZE];
866 	struct rte_meter_trtcm_params params;
867 	struct rte_meter_trtcm_profile profile;
868 	uint32_t n_users;
869 };
870 
871 TAILQ_HEAD(meter_profile_tailq, meter_profile);
872 
873 struct metarray {
874 	TAILQ_ENTRY(metarray) node;
875 	char name[RTE_SWX_NAME_SIZE];
876 	uint32_t size;
877 	uint32_t id;
878 };
879 
880 TAILQ_HEAD(metarray_tailq, metarray);
881 
882 struct meter {
883 	struct rte_meter_trtcm m;
884 	struct meter_profile *profile;
885 	enum rte_color color_mask;
886 	uint8_t pad[20];
887 
888 	uint64_t n_pkts[RTE_COLORS];
889 	uint64_t n_bytes[RTE_COLORS];
890 };
891 
892 struct metarray_runtime {
893 	struct meter *metarray;
894 	uint32_t size_mask;
895 };
896 
897 /*
898  * Pipeline.
899  */
900 struct thread {
901 	/* Packet. */
902 	struct rte_swx_pkt pkt;
903 	uint8_t *ptr;
904 
905 	/* Structures. */
906 	uint8_t **structs;
907 
908 	/* Packet headers. */
909 	struct header_runtime *headers; /* Extracted or generated headers. */
910 	struct header_out_runtime *headers_out; /* Emitted headers. */
911 	uint8_t *header_storage;
912 	uint8_t *header_out_storage;
913 	uint64_t valid_headers;
914 	uint32_t n_headers_out;
915 
916 	/* Packet meta-data. */
917 	uint8_t *metadata;
918 
919 	/* Tables. */
920 	struct table_runtime *tables;
921 	struct selector_runtime *selectors;
922 	struct learner_runtime *learners;
923 	struct rte_swx_table_state *table_state;
924 	uint64_t action_id;
925 	int hit; /* 0 = Miss, 1 = Hit. */
926 	uint32_t learner_id;
927 	uint64_t time;
928 
929 	/* Extern objects and functions. */
930 	struct extern_obj_runtime *extern_objs;
931 	struct extern_func_runtime *extern_funcs;
932 
933 	/* Instructions. */
934 	struct instruction *ip;
935 	struct instruction *ret;
936 };
937 
938 #define MASK64_BIT_GET(mask, pos) ((mask) & (1LLU << (pos)))
939 #define MASK64_BIT_SET(mask, pos) ((mask) | (1LLU << (pos)))
940 #define MASK64_BIT_CLR(mask, pos) ((mask) & ~(1LLU << (pos)))
941 
942 #define HEADER_VALID(thread, header_id) \
943 	MASK64_BIT_GET((thread)->valid_headers, header_id)
944 
945 static inline uint64_t
946 instr_operand_hbo(struct thread *t, const struct instr_operand *x)
947 {
948 	uint8_t *x_struct = t->structs[x->struct_id];
949 	uint64_t *x64_ptr = (uint64_t *)&x_struct[x->offset];
950 	uint64_t x64 = *x64_ptr;
951 	uint64_t x64_mask = UINT64_MAX >> (64 - x->n_bits);
952 
953 	return x64 & x64_mask;
954 }
955 
956 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
957 
958 static inline uint64_t
959 instr_operand_nbo(struct thread *t, const struct instr_operand *x)
960 {
961 	uint8_t *x_struct = t->structs[x->struct_id];
962 	uint64_t *x64_ptr = (uint64_t *)&x_struct[x->offset];
963 	uint64_t x64 = *x64_ptr;
964 
965 	return ntoh64(x64) >> (64 - x->n_bits);
966 }
967 
968 #else
969 
970 #define instr_operand_nbo instr_operand_hbo
971 
972 #endif
973 
974 #define ALU(thread, ip, operator)  \
975 {                                                                              \
976 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
977 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
978 	uint64_t dst64 = *dst64_ptr;                                           \
979 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
980 	uint64_t dst = dst64 & dst64_mask;                                     \
981 									       \
982 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
983 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
984 	uint64_t src64 = *src64_ptr;                                           \
985 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
986 	uint64_t src = src64 & src64_mask;                                     \
987 									       \
988 	uint64_t result = dst operator src;                                    \
989 									       \
990 	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
991 }
992 
993 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
994 
995 #define ALU_MH(thread, ip, operator)  \
996 {                                                                              \
997 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
998 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
999 	uint64_t dst64 = *dst64_ptr;                                           \
1000 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1001 	uint64_t dst = dst64 & dst64_mask;                                     \
1002 									       \
1003 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1004 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1005 	uint64_t src64 = *src64_ptr;                                           \
1006 	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
1007 									       \
1008 	uint64_t result = dst operator src;                                    \
1009 									       \
1010 	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
1011 }
1012 
1013 #define ALU_HM(thread, ip, operator)  \
1014 {                                                                              \
1015 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1016 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1017 	uint64_t dst64 = *dst64_ptr;                                           \
1018 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1019 	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
1020 									       \
1021 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1022 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1023 	uint64_t src64 = *src64_ptr;                                           \
1024 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
1025 	uint64_t src = src64 & src64_mask;                                     \
1026 									       \
1027 	uint64_t result = dst operator src;                                    \
1028 	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
1029 									       \
1030 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
1031 }
1032 
1033 #define ALU_HM_FAST(thread, ip, operator)  \
1034 {                                                                                 \
1035 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];         \
1036 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];      \
1037 	uint64_t dst64 = *dst64_ptr;                                              \
1038 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);          \
1039 	uint64_t dst = dst64 & dst64_mask;                                        \
1040 										  \
1041 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];         \
1042 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];      \
1043 	uint64_t src64 = *src64_ptr;                                              \
1044 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);          \
1045 	uint64_t src = hton64(src64 & src64_mask) >> (64 - (ip)->alu.dst.n_bits); \
1046 										  \
1047 	uint64_t result = dst operator src;                                       \
1048 										  \
1049 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                              \
1050 }
1051 
1052 #define ALU_HH(thread, ip, operator)  \
1053 {                                                                              \
1054 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1055 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1056 	uint64_t dst64 = *dst64_ptr;                                           \
1057 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1058 	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
1059 									       \
1060 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1061 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1062 	uint64_t src64 = *src64_ptr;                                           \
1063 	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
1064 									       \
1065 	uint64_t result = dst operator src;                                    \
1066 	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
1067 									       \
1068 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
1069 }
1070 
1071 #define ALU_HH_FAST(thread, ip, operator)  \
1072 {                                                                                             \
1073 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];                     \
1074 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];                  \
1075 	uint64_t dst64 = *dst64_ptr;                                                          \
1076 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);                      \
1077 	uint64_t dst = dst64 & dst64_mask;                                                    \
1078 											      \
1079 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];                     \
1080 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];                  \
1081 	uint64_t src64 = *src64_ptr;                                                          \
1082 	uint64_t src = (src64 << (64 - (ip)->alu.src.n_bits)) >> (64 - (ip)->alu.dst.n_bits); \
1083 											      \
1084 	uint64_t result = dst operator src;                                                   \
1085 											      \
1086 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                                          \
1087 }
1088 
1089 #else
1090 
1091 #define ALU_MH ALU
1092 #define ALU_HM ALU
1093 #define ALU_HM_FAST ALU
1094 #define ALU_HH ALU
1095 #define ALU_HH_FAST ALU
1096 
1097 #endif
1098 
1099 #define ALU_I(thread, ip, operator)  \
1100 {                                                                              \
1101 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1102 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1103 	uint64_t dst64 = *dst64_ptr;                                           \
1104 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1105 	uint64_t dst = dst64 & dst64_mask;                                     \
1106 									       \
1107 	uint64_t src = (ip)->alu.src_val;                                      \
1108 									       \
1109 	uint64_t result = dst operator src;                                    \
1110 									       \
1111 	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
1112 }
1113 
1114 #define ALU_MI ALU_I
1115 
1116 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1117 
1118 #define ALU_HI(thread, ip, operator)  \
1119 {                                                                              \
1120 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1121 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1122 	uint64_t dst64 = *dst64_ptr;                                           \
1123 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1124 	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
1125 									       \
1126 	uint64_t src = (ip)->alu.src_val;                                      \
1127 									       \
1128 	uint64_t result = dst operator src;                                    \
1129 	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
1130 									       \
1131 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
1132 }
1133 
1134 #else
1135 
1136 #define ALU_HI ALU_I
1137 
1138 #endif
1139 
1140 #define MOV(thread, ip)  \
1141 {                                                                              \
1142 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1143 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1144 	uint64_t dst64 = *dst64_ptr;                                           \
1145 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1146 									       \
1147 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1148 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1149 	uint64_t src64 = *src64_ptr;                                           \
1150 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
1151 	uint64_t src = src64 & src64_mask;                                     \
1152 									       \
1153 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
1154 }
1155 
1156 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1157 
1158 #define MOV_MH(thread, ip)  \
1159 {                                                                              \
1160 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1161 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1162 	uint64_t dst64 = *dst64_ptr;                                           \
1163 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1164 									       \
1165 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1166 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1167 	uint64_t src64 = *src64_ptr;                                           \
1168 	uint64_t src = ntoh64(src64) >> (64 - (ip)->mov.src.n_bits);           \
1169 									       \
1170 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
1171 }
1172 
1173 #define MOV_HM(thread, ip)  \
1174 {                                                                              \
1175 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1176 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1177 	uint64_t dst64 = *dst64_ptr;                                           \
1178 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1179 									       \
1180 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1181 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1182 	uint64_t src64 = *src64_ptr;                                           \
1183 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
1184 	uint64_t src = src64 & src64_mask;                                     \
1185 									       \
1186 	src = hton64(src) >> (64 - (ip)->mov.dst.n_bits);                      \
1187 	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
1188 }
1189 
1190 #define MOV_HH(thread, ip)  \
1191 {                                                                              \
1192 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1193 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1194 	uint64_t dst64 = *dst64_ptr;                                           \
1195 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1196 									       \
1197 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1198 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1199 	uint64_t src64 = *src64_ptr;                                           \
1200 									       \
1201 	uint64_t src = src64 << (64 - (ip)->mov.src.n_bits);                   \
1202 	src = src >> (64 - (ip)->mov.dst.n_bits);                              \
1203 	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
1204 }
1205 
1206 #else
1207 
1208 #define MOV_MH MOV
1209 #define MOV_HM MOV
1210 #define MOV_HH MOV
1211 
1212 #endif
1213 
1214 #define MOV_I(thread, ip)  \
1215 {                                                                              \
1216 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1217 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1218 	uint64_t dst64 = *dst64_ptr;                                           \
1219 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1220 									       \
1221 	uint64_t src = (ip)->mov.src_val;                                      \
1222 									       \
1223 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
1224 }
1225 
1226 #define JMP_CMP(thread, ip, operator)  \
1227 {                                                                              \
1228 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1229 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1230 	uint64_t a64 = *a64_ptr;                                               \
1231 	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
1232 	uint64_t a = a64 & a64_mask;                                           \
1233 									       \
1234 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1235 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1236 	uint64_t b64 = *b64_ptr;                                               \
1237 	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
1238 	uint64_t b = b64 & b64_mask;                                           \
1239 									       \
1240 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1241 }
1242 
1243 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1244 
1245 #define JMP_CMP_MH(thread, ip, operator)  \
1246 {                                                                              \
1247 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1248 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1249 	uint64_t a64 = *a64_ptr;                                               \
1250 	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
1251 	uint64_t a = a64 & a64_mask;                                           \
1252 									       \
1253 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1254 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1255 	uint64_t b64 = *b64_ptr;                                               \
1256 	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
1257 									       \
1258 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1259 }
1260 
1261 #define JMP_CMP_HM(thread, ip, operator)  \
1262 {                                                                              \
1263 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1264 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1265 	uint64_t a64 = *a64_ptr;                                               \
1266 	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
1267 									       \
1268 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1269 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1270 	uint64_t b64 = *b64_ptr;                                               \
1271 	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
1272 	uint64_t b = b64 & b64_mask;                                           \
1273 									       \
1274 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1275 }
1276 
1277 #define JMP_CMP_HH(thread, ip, operator)  \
1278 {                                                                              \
1279 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1280 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1281 	uint64_t a64 = *a64_ptr;                                               \
1282 	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
1283 									       \
1284 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1285 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1286 	uint64_t b64 = *b64_ptr;                                               \
1287 	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
1288 									       \
1289 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1290 }
1291 
1292 #define JMP_CMP_HH_FAST(thread, ip, operator)  \
1293 {                                                                              \
1294 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1295 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1296 	uint64_t a64 = *a64_ptr;                                               \
1297 	uint64_t a = a64 << (64 - (ip)->jmp.a.n_bits);                         \
1298 									       \
1299 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1300 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1301 	uint64_t b64 = *b64_ptr;                                               \
1302 	uint64_t b = b64 << (64 - (ip)->jmp.b.n_bits);                         \
1303 									       \
1304 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1305 }
1306 
1307 #else
1308 
1309 #define JMP_CMP_MH JMP_CMP
1310 #define JMP_CMP_HM JMP_CMP
1311 #define JMP_CMP_HH JMP_CMP
1312 #define JMP_CMP_HH_FAST JMP_CMP
1313 
1314 #endif
1315 
1316 #define JMP_CMP_I(thread, ip, operator)  \
1317 {                                                                              \
1318 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1319 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1320 	uint64_t a64 = *a64_ptr;                                               \
1321 	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
1322 	uint64_t a = a64 & a64_mask;                                           \
1323 									       \
1324 	uint64_t b = (ip)->jmp.b_val;                                          \
1325 									       \
1326 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1327 }
1328 
1329 #define JMP_CMP_MI JMP_CMP_I
1330 
1331 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1332 
1333 #define JMP_CMP_HI(thread, ip, operator)  \
1334 {                                                                              \
1335 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1336 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1337 	uint64_t a64 = *a64_ptr;                                               \
1338 	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
1339 									       \
1340 	uint64_t b = (ip)->jmp.b_val;                                          \
1341 									       \
1342 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1343 }
1344 
1345 #else
1346 
1347 #define JMP_CMP_HI JMP_CMP_I
1348 
1349 #endif
1350 
1351 #define METADATA_READ(thread, offset, n_bits)                                  \
1352 ({                                                                             \
1353 	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
1354 	uint64_t m64 = *m64_ptr;                                               \
1355 	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
1356 	(m64 & m64_mask);                                                      \
1357 })
1358 
1359 #define METADATA_WRITE(thread, offset, n_bits, value)                          \
1360 {                                                                              \
1361 	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
1362 	uint64_t m64 = *m64_ptr;                                               \
1363 	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
1364 									       \
1365 	uint64_t m_new = value;                                                \
1366 									       \
1367 	*m64_ptr = (m64 & ~m64_mask) | (m_new & m64_mask);                     \
1368 }
1369 
1370 #ifndef RTE_SWX_PIPELINE_THREADS_MAX
1371 #define RTE_SWX_PIPELINE_THREADS_MAX 16
1372 #endif
1373 
1374 #ifndef RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX
1375 #define RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX 256
1376 #endif
1377 
1378 struct rte_swx_pipeline {
1379 	struct struct_type_tailq struct_types;
1380 	struct port_in_type_tailq port_in_types;
1381 	struct port_in_tailq ports_in;
1382 	struct port_out_type_tailq port_out_types;
1383 	struct port_out_tailq ports_out;
1384 	struct extern_type_tailq extern_types;
1385 	struct extern_obj_tailq extern_objs;
1386 	struct extern_func_tailq extern_funcs;
1387 	struct header_tailq headers;
1388 	struct struct_type *metadata_st;
1389 	uint32_t metadata_struct_id;
1390 	struct action_tailq actions;
1391 	struct table_type_tailq table_types;
1392 	struct table_tailq tables;
1393 	struct selector_tailq selectors;
1394 	struct learner_tailq learners;
1395 	struct regarray_tailq regarrays;
1396 	struct meter_profile_tailq meter_profiles;
1397 	struct metarray_tailq metarrays;
1398 
1399 	struct port_in_runtime *in;
1400 	struct port_out_runtime *out;
1401 	struct instruction **action_instructions;
1402 	action_func_t *action_funcs;
1403 	struct rte_swx_table_state *table_state;
1404 	struct table_statistics *table_stats;
1405 	struct selector_statistics *selector_stats;
1406 	struct learner_statistics *learner_stats;
1407 	struct regarray_runtime *regarray_runtime;
1408 	struct metarray_runtime *metarray_runtime;
1409 	struct instruction *instructions;
1410 	struct instruction_data *instruction_data;
1411 	instr_exec_t *instruction_table;
1412 	struct thread threads[RTE_SWX_PIPELINE_THREADS_MAX];
1413 	void *lib;
1414 
1415 	uint32_t n_structs;
1416 	uint32_t n_ports_in;
1417 	uint32_t n_ports_out;
1418 	uint32_t n_extern_objs;
1419 	uint32_t n_extern_funcs;
1420 	uint32_t n_actions;
1421 	uint32_t n_tables;
1422 	uint32_t n_selectors;
1423 	uint32_t n_learners;
1424 	uint32_t n_regarrays;
1425 	uint32_t n_metarrays;
1426 	uint32_t n_headers;
1427 	uint32_t thread_id;
1428 	uint32_t port_id;
1429 	uint32_t n_instructions;
1430 	int build_done;
1431 	int numa_node;
1432 };
1433 
1434 /*
1435  * Instruction.
1436  */
1437 static inline void
1438 pipeline_port_inc(struct rte_swx_pipeline *p)
1439 {
1440 	p->port_id = (p->port_id + 1) & (p->n_ports_in - 1);
1441 }
1442 
1443 static inline void
1444 thread_ip_reset(struct rte_swx_pipeline *p, struct thread *t)
1445 {
1446 	t->ip = p->instructions;
1447 }
1448 
1449 static inline void
1450 thread_ip_set(struct thread *t, struct instruction *ip)
1451 {
1452 	t->ip = ip;
1453 }
1454 
1455 static inline void
1456 thread_ip_action_call(struct rte_swx_pipeline *p,
1457 		      struct thread *t,
1458 		      uint32_t action_id)
1459 {
1460 	t->ret = t->ip + 1;
1461 	t->ip = p->action_instructions[action_id];
1462 }
1463 
1464 static inline void
1465 thread_ip_inc(struct rte_swx_pipeline *p);
1466 
1467 static inline void
1468 thread_ip_inc(struct rte_swx_pipeline *p)
1469 {
1470 	struct thread *t = &p->threads[p->thread_id];
1471 
1472 	t->ip++;
1473 }
1474 
1475 static inline void
1476 thread_ip_inc_cond(struct thread *t, int cond)
1477 {
1478 	t->ip += cond;
1479 }
1480 
1481 static inline void
1482 thread_yield(struct rte_swx_pipeline *p)
1483 {
1484 	p->thread_id = (p->thread_id + 1) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
1485 }
1486 
1487 static inline void
1488 thread_yield_cond(struct rte_swx_pipeline *p, int cond)
1489 {
1490 	p->thread_id = (p->thread_id + cond) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
1491 }
1492 
1493 /*
1494  * rx.
1495  */
1496 static inline int
1497 __instr_rx_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
1498 {
1499 	struct port_in_runtime *port = &p->in[p->port_id];
1500 	struct rte_swx_pkt *pkt = &t->pkt;
1501 	int pkt_received;
1502 
1503 	/* Packet. */
1504 	pkt_received = port->pkt_rx(port->obj, pkt);
1505 	t->ptr = &pkt->pkt[pkt->offset];
1506 	rte_prefetch0(t->ptr);
1507 
1508 	TRACE("[Thread %2u] rx %s from port %u\n",
1509 	      p->thread_id,
1510 	      pkt_received ? "1 pkt" : "0 pkts",
1511 	      p->port_id);
1512 
1513 	/* Headers. */
1514 	t->valid_headers = 0;
1515 	t->n_headers_out = 0;
1516 
1517 	/* Meta-data. */
1518 	METADATA_WRITE(t, ip->io.io.offset, ip->io.io.n_bits, p->port_id);
1519 
1520 	/* Tables. */
1521 	t->table_state = p->table_state;
1522 
1523 	/* Thread. */
1524 	pipeline_port_inc(p);
1525 
1526 	return pkt_received;
1527 }
1528 
1529 static inline void
1530 instr_rx_exec(struct rte_swx_pipeline *p)
1531 {
1532 	struct thread *t = &p->threads[p->thread_id];
1533 	struct instruction *ip = t->ip;
1534 	int pkt_received;
1535 
1536 	/* Packet. */
1537 	pkt_received = __instr_rx_exec(p, t, ip);
1538 
1539 	/* Thread. */
1540 	thread_ip_inc_cond(t, pkt_received);
1541 	thread_yield(p);
1542 }
1543 
1544 /*
1545  * tx.
1546  */
1547 static inline void
1548 emit_handler(struct thread *t)
1549 {
1550 	struct header_out_runtime *h0 = &t->headers_out[0];
1551 	struct header_out_runtime *h1 = &t->headers_out[1];
1552 	uint32_t offset = 0, i;
1553 
1554 	/* No header change or header decapsulation. */
1555 	if ((t->n_headers_out == 1) &&
1556 	    (h0->ptr + h0->n_bytes == t->ptr)) {
1557 		TRACE("Emit handler: no header change or header decap.\n");
1558 
1559 		t->pkt.offset -= h0->n_bytes;
1560 		t->pkt.length += h0->n_bytes;
1561 
1562 		return;
1563 	}
1564 
1565 	/* Header encapsulation (optionally, with prior header decasulation). */
1566 	if ((t->n_headers_out == 2) &&
1567 	    (h1->ptr + h1->n_bytes == t->ptr) &&
1568 	    (h0->ptr == h0->ptr0)) {
1569 		uint32_t offset;
1570 
1571 		TRACE("Emit handler: header encapsulation.\n");
1572 
1573 		offset = h0->n_bytes + h1->n_bytes;
1574 		memcpy(t->ptr - offset, h0->ptr, h0->n_bytes);
1575 		t->pkt.offset -= offset;
1576 		t->pkt.length += offset;
1577 
1578 		return;
1579 	}
1580 
1581 	/* For any other case. */
1582 	TRACE("Emit handler: complex case.\n");
1583 
1584 	for (i = 0; i < t->n_headers_out; i++) {
1585 		struct header_out_runtime *h = &t->headers_out[i];
1586 
1587 		memcpy(&t->header_out_storage[offset], h->ptr, h->n_bytes);
1588 		offset += h->n_bytes;
1589 	}
1590 
1591 	if (offset) {
1592 		memcpy(t->ptr - offset, t->header_out_storage, offset);
1593 		t->pkt.offset -= offset;
1594 		t->pkt.length += offset;
1595 	}
1596 }
1597 
1598 static inline void
1599 __instr_tx_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
1600 {
1601 	uint64_t port_id = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
1602 	struct port_out_runtime *port = &p->out[port_id];
1603 	struct rte_swx_pkt *pkt = &t->pkt;
1604 
1605 	TRACE("[Thread %2u]: tx 1 pkt to port %u\n",
1606 	      p->thread_id,
1607 	      (uint32_t)port_id);
1608 
1609 	/* Headers. */
1610 	emit_handler(t);
1611 
1612 	/* Packet. */
1613 	port->pkt_tx(port->obj, pkt);
1614 }
1615 
1616 static inline void
1617 __instr_tx_i_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
1618 {
1619 	uint64_t port_id = ip->io.io.val;
1620 	struct port_out_runtime *port = &p->out[port_id];
1621 	struct rte_swx_pkt *pkt = &t->pkt;
1622 
1623 	TRACE("[Thread %2u]: tx (i) 1 pkt to port %u\n",
1624 	      p->thread_id,
1625 	      (uint32_t)port_id);
1626 
1627 	/* Headers. */
1628 	emit_handler(t);
1629 
1630 	/* Packet. */
1631 	port->pkt_tx(port->obj, pkt);
1632 }
1633 
1634 /*
1635  * extract.
1636  */
1637 static inline void
1638 __instr_hdr_extract_many_exec(struct rte_swx_pipeline *p __rte_unused,
1639 			      struct thread *t,
1640 			      const struct instruction *ip,
1641 			      uint32_t n_extract)
1642 {
1643 	uint64_t valid_headers = t->valid_headers;
1644 	uint8_t *ptr = t->ptr;
1645 	uint32_t offset = t->pkt.offset;
1646 	uint32_t length = t->pkt.length;
1647 	uint32_t i;
1648 
1649 	for (i = 0; i < n_extract; i++) {
1650 		uint32_t header_id = ip->io.hdr.header_id[i];
1651 		uint32_t struct_id = ip->io.hdr.struct_id[i];
1652 		uint32_t n_bytes = ip->io.hdr.n_bytes[i];
1653 
1654 		TRACE("[Thread %2u]: extract header %u (%u bytes)\n",
1655 		      p->thread_id,
1656 		      header_id,
1657 		      n_bytes);
1658 
1659 		/* Headers. */
1660 		t->structs[struct_id] = ptr;
1661 		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
1662 
1663 		/* Packet. */
1664 		offset += n_bytes;
1665 		length -= n_bytes;
1666 		ptr += n_bytes;
1667 	}
1668 
1669 	/* Headers. */
1670 	t->valid_headers = valid_headers;
1671 
1672 	/* Packet. */
1673 	t->pkt.offset = offset;
1674 	t->pkt.length = length;
1675 	t->ptr = ptr;
1676 }
1677 
1678 static inline void
1679 __instr_hdr_extract_exec(struct rte_swx_pipeline *p,
1680 			 struct thread *t,
1681 			 const struct instruction *ip)
1682 {
1683 	__instr_hdr_extract_many_exec(p, t, ip, 1);
1684 }
1685 
1686 static inline void
1687 __instr_hdr_extract2_exec(struct rte_swx_pipeline *p,
1688 			  struct thread *t,
1689 			  const struct instruction *ip)
1690 {
1691 	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
1692 
1693 	__instr_hdr_extract_many_exec(p, t, ip, 2);
1694 }
1695 
1696 static inline void
1697 __instr_hdr_extract3_exec(struct rte_swx_pipeline *p,
1698 			  struct thread *t,
1699 			  const struct instruction *ip)
1700 {
1701 	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
1702 
1703 	__instr_hdr_extract_many_exec(p, t, ip, 3);
1704 }
1705 
1706 static inline void
1707 __instr_hdr_extract4_exec(struct rte_swx_pipeline *p,
1708 			  struct thread *t,
1709 			  const struct instruction *ip)
1710 {
1711 	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
1712 
1713 	__instr_hdr_extract_many_exec(p, t, ip, 4);
1714 }
1715 
1716 static inline void
1717 __instr_hdr_extract5_exec(struct rte_swx_pipeline *p,
1718 			  struct thread *t,
1719 			  const struct instruction *ip)
1720 {
1721 	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
1722 
1723 	__instr_hdr_extract_many_exec(p, t, ip, 5);
1724 }
1725 
1726 static inline void
1727 __instr_hdr_extract6_exec(struct rte_swx_pipeline *p,
1728 			  struct thread *t,
1729 			  const struct instruction *ip)
1730 {
1731 	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
1732 
1733 	__instr_hdr_extract_many_exec(p, t, ip, 6);
1734 }
1735 
1736 static inline void
1737 __instr_hdr_extract7_exec(struct rte_swx_pipeline *p,
1738 			  struct thread *t,
1739 			  const struct instruction *ip)
1740 {
1741 	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
1742 
1743 	__instr_hdr_extract_many_exec(p, t, ip, 7);
1744 }
1745 
1746 static inline void
1747 __instr_hdr_extract8_exec(struct rte_swx_pipeline *p,
1748 			  struct thread *t,
1749 			  const struct instruction *ip)
1750 {
1751 	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
1752 
1753 	__instr_hdr_extract_many_exec(p, t, ip, 8);
1754 }
1755 
1756 static inline void
1757 __instr_hdr_extract_m_exec(struct rte_swx_pipeline *p __rte_unused,
1758 			   struct thread *t,
1759 			   const struct instruction *ip)
1760 {
1761 	uint64_t valid_headers = t->valid_headers;
1762 	uint8_t *ptr = t->ptr;
1763 	uint32_t offset = t->pkt.offset;
1764 	uint32_t length = t->pkt.length;
1765 
1766 	uint32_t n_bytes_last = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
1767 	uint32_t header_id = ip->io.hdr.header_id[0];
1768 	uint32_t struct_id = ip->io.hdr.struct_id[0];
1769 	uint32_t n_bytes = ip->io.hdr.n_bytes[0];
1770 
1771 	struct header_runtime *h = &t->headers[header_id];
1772 
1773 	TRACE("[Thread %2u]: extract header %u (%u + %u bytes)\n",
1774 	      p->thread_id,
1775 	      header_id,
1776 	      n_bytes,
1777 	      n_bytes_last);
1778 
1779 	n_bytes += n_bytes_last;
1780 
1781 	/* Headers. */
1782 	t->structs[struct_id] = ptr;
1783 	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
1784 	h->n_bytes = n_bytes;
1785 
1786 	/* Packet. */
1787 	t->pkt.offset = offset + n_bytes;
1788 	t->pkt.length = length - n_bytes;
1789 	t->ptr = ptr + n_bytes;
1790 }
1791 
1792 static inline void
1793 __instr_hdr_lookahead_exec(struct rte_swx_pipeline *p __rte_unused,
1794 			   struct thread *t,
1795 			   const struct instruction *ip)
1796 {
1797 	uint64_t valid_headers = t->valid_headers;
1798 	uint8_t *ptr = t->ptr;
1799 
1800 	uint32_t header_id = ip->io.hdr.header_id[0];
1801 	uint32_t struct_id = ip->io.hdr.struct_id[0];
1802 
1803 	TRACE("[Thread %2u]: lookahead header %u\n",
1804 	      p->thread_id,
1805 	      header_id);
1806 
1807 	/* Headers. */
1808 	t->structs[struct_id] = ptr;
1809 	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
1810 }
1811 
1812 /*
1813  * emit.
1814  */
1815 static inline void
1816 __instr_hdr_emit_many_exec(struct rte_swx_pipeline *p __rte_unused,
1817 			   struct thread *t,
1818 			   const struct instruction *ip,
1819 			   uint32_t n_emit)
1820 {
1821 	uint64_t valid_headers = t->valid_headers;
1822 	uint32_t n_headers_out = t->n_headers_out;
1823 	struct header_out_runtime *ho = &t->headers_out[n_headers_out - 1];
1824 	uint8_t *ho_ptr = NULL;
1825 	uint32_t ho_nbytes = 0, first = 1, i;
1826 
1827 	for (i = 0; i < n_emit; i++) {
1828 		uint32_t header_id = ip->io.hdr.header_id[i];
1829 		uint32_t struct_id = ip->io.hdr.struct_id[i];
1830 
1831 		struct header_runtime *hi = &t->headers[header_id];
1832 		uint8_t *hi_ptr0 = hi->ptr0;
1833 		uint32_t n_bytes = hi->n_bytes;
1834 
1835 		uint8_t *hi_ptr = t->structs[struct_id];
1836 
1837 		if (!MASK64_BIT_GET(valid_headers, header_id))
1838 			continue;
1839 
1840 		TRACE("[Thread %2u]: emit header %u\n",
1841 		      p->thread_id,
1842 		      header_id);
1843 
1844 		/* Headers. */
1845 		if (first) {
1846 			first = 0;
1847 
1848 			if (!t->n_headers_out) {
1849 				ho = &t->headers_out[0];
1850 
1851 				ho->ptr0 = hi_ptr0;
1852 				ho->ptr = hi_ptr;
1853 
1854 				ho_ptr = hi_ptr;
1855 				ho_nbytes = n_bytes;
1856 
1857 				n_headers_out = 1;
1858 
1859 				continue;
1860 			} else {
1861 				ho_ptr = ho->ptr;
1862 				ho_nbytes = ho->n_bytes;
1863 			}
1864 		}
1865 
1866 		if (ho_ptr + ho_nbytes == hi_ptr) {
1867 			ho_nbytes += n_bytes;
1868 		} else {
1869 			ho->n_bytes = ho_nbytes;
1870 
1871 			ho++;
1872 			ho->ptr0 = hi_ptr0;
1873 			ho->ptr = hi_ptr;
1874 
1875 			ho_ptr = hi_ptr;
1876 			ho_nbytes = n_bytes;
1877 
1878 			n_headers_out++;
1879 		}
1880 	}
1881 
1882 	ho->n_bytes = ho_nbytes;
1883 	t->n_headers_out = n_headers_out;
1884 }
1885 
1886 static inline void
1887 __instr_hdr_emit_exec(struct rte_swx_pipeline *p,
1888 		      struct thread *t,
1889 		      const struct instruction *ip)
1890 {
1891 	__instr_hdr_emit_many_exec(p, t, ip, 1);
1892 }
1893 
1894 static inline void
1895 __instr_hdr_emit_tx_exec(struct rte_swx_pipeline *p,
1896 			 struct thread *t,
1897 			 const struct instruction *ip)
1898 {
1899 	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
1900 
1901 	__instr_hdr_emit_many_exec(p, t, ip, 1);
1902 	__instr_tx_exec(p, t, ip);
1903 }
1904 
1905 static inline void
1906 __instr_hdr_emit2_tx_exec(struct rte_swx_pipeline *p,
1907 			  struct thread *t,
1908 			  const struct instruction *ip)
1909 {
1910 	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
1911 
1912 	__instr_hdr_emit_many_exec(p, t, ip, 2);
1913 	__instr_tx_exec(p, t, ip);
1914 }
1915 
1916 static inline void
1917 __instr_hdr_emit3_tx_exec(struct rte_swx_pipeline *p,
1918 			  struct thread *t,
1919 			  const struct instruction *ip)
1920 {
1921 	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
1922 
1923 	__instr_hdr_emit_many_exec(p, t, ip, 3);
1924 	__instr_tx_exec(p, t, ip);
1925 }
1926 
1927 static inline void
1928 __instr_hdr_emit4_tx_exec(struct rte_swx_pipeline *p,
1929 			  struct thread *t,
1930 			  const struct instruction *ip)
1931 {
1932 	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
1933 
1934 	__instr_hdr_emit_many_exec(p, t, ip, 4);
1935 	__instr_tx_exec(p, t, ip);
1936 }
1937 
1938 static inline void
1939 __instr_hdr_emit5_tx_exec(struct rte_swx_pipeline *p,
1940 			  struct thread *t,
1941 			  const struct instruction *ip)
1942 {
1943 	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
1944 
1945 	__instr_hdr_emit_many_exec(p, t, ip, 5);
1946 	__instr_tx_exec(p, t, ip);
1947 }
1948 
1949 static inline void
1950 __instr_hdr_emit6_tx_exec(struct rte_swx_pipeline *p,
1951 			  struct thread *t,
1952 			  const struct instruction *ip)
1953 {
1954 	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
1955 
1956 	__instr_hdr_emit_many_exec(p, t, ip, 6);
1957 	__instr_tx_exec(p, t, ip);
1958 }
1959 
1960 static inline void
1961 __instr_hdr_emit7_tx_exec(struct rte_swx_pipeline *p,
1962 			  struct thread *t,
1963 			  const struct instruction *ip)
1964 {
1965 	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
1966 
1967 	__instr_hdr_emit_many_exec(p, t, ip, 7);
1968 	__instr_tx_exec(p, t, ip);
1969 }
1970 
1971 static inline void
1972 __instr_hdr_emit8_tx_exec(struct rte_swx_pipeline *p,
1973 			  struct thread *t,
1974 			  const struct instruction *ip)
1975 {
1976 	TRACE("[Thread %2u] *** The next 9 instructions are fused. ***\n", p->thread_id);
1977 
1978 	__instr_hdr_emit_many_exec(p, t, ip, 8);
1979 	__instr_tx_exec(p, t, ip);
1980 }
1981 
1982 /*
1983  * validate.
1984  */
1985 static inline void
1986 __instr_hdr_validate_exec(struct rte_swx_pipeline *p __rte_unused,
1987 			  struct thread *t,
1988 			  const struct instruction *ip)
1989 {
1990 	uint32_t header_id = ip->valid.header_id;
1991 
1992 	TRACE("[Thread %2u] validate header %u\n", p->thread_id, header_id);
1993 
1994 	/* Headers. */
1995 	t->valid_headers = MASK64_BIT_SET(t->valid_headers, header_id);
1996 }
1997 
1998 /*
1999  * invalidate.
2000  */
2001 static inline void
2002 __instr_hdr_invalidate_exec(struct rte_swx_pipeline *p __rte_unused,
2003 			    struct thread *t,
2004 			    const struct instruction *ip)
2005 {
2006 	uint32_t header_id = ip->valid.header_id;
2007 
2008 	TRACE("[Thread %2u] invalidate header %u\n", p->thread_id, header_id);
2009 
2010 	/* Headers. */
2011 	t->valid_headers = MASK64_BIT_CLR(t->valid_headers, header_id);
2012 }
2013 
2014 /*
2015  * learn.
2016  */
2017 static inline void
2018 __instr_learn_exec(struct rte_swx_pipeline *p,
2019 		   struct thread *t,
2020 		   const struct instruction *ip)
2021 {
2022 	uint64_t action_id = ip->learn.action_id;
2023 	uint32_t mf_offset = ip->learn.mf_offset;
2024 	uint32_t learner_id = t->learner_id;
2025 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2026 		p->n_selectors + learner_id];
2027 	struct learner_runtime *l = &t->learners[learner_id];
2028 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2029 	uint32_t status;
2030 
2031 	/* Table. */
2032 	status = rte_swx_table_learner_add(ts->obj,
2033 					   l->mailbox,
2034 					   t->time,
2035 					   action_id,
2036 					   &t->metadata[mf_offset]);
2037 
2038 	TRACE("[Thread %2u] learner %u learn %s\n",
2039 	      p->thread_id,
2040 	      learner_id,
2041 	      status ? "ok" : "error");
2042 
2043 	stats->n_pkts_learn[status] += 1;
2044 }
2045 
2046 /*
2047  * forget.
2048  */
2049 static inline void
2050 __instr_forget_exec(struct rte_swx_pipeline *p,
2051 		    struct thread *t,
2052 		    const struct instruction *ip __rte_unused)
2053 {
2054 	uint32_t learner_id = t->learner_id;
2055 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2056 		p->n_selectors + learner_id];
2057 	struct learner_runtime *l = &t->learners[learner_id];
2058 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2059 
2060 	/* Table. */
2061 	rte_swx_table_learner_delete(ts->obj, l->mailbox);
2062 
2063 	TRACE("[Thread %2u] learner %u forget\n",
2064 	      p->thread_id,
2065 	      learner_id);
2066 
2067 	stats->n_pkts_forget += 1;
2068 }
2069 
2070 /*
2071  * extern.
2072  */
2073 static inline uint32_t
2074 __instr_extern_obj_exec(struct rte_swx_pipeline *p __rte_unused,
2075 			struct thread *t,
2076 			const struct instruction *ip)
2077 {
2078 	uint32_t obj_id = ip->ext_obj.ext_obj_id;
2079 	uint32_t func_id = ip->ext_obj.func_id;
2080 	struct extern_obj_runtime *obj = &t->extern_objs[obj_id];
2081 	rte_swx_extern_type_member_func_t func = obj->funcs[func_id];
2082 	uint32_t done;
2083 
2084 	TRACE("[Thread %2u] extern obj %u member func %u\n",
2085 	      p->thread_id,
2086 	      obj_id,
2087 	      func_id);
2088 
2089 	done = func(obj->obj, obj->mailbox);
2090 
2091 	return done;
2092 }
2093 
2094 static inline uint32_t
2095 __instr_extern_func_exec(struct rte_swx_pipeline *p __rte_unused,
2096 			 struct thread *t,
2097 			 const struct instruction *ip)
2098 {
2099 	uint32_t ext_func_id = ip->ext_func.ext_func_id;
2100 	struct extern_func_runtime *ext_func = &t->extern_funcs[ext_func_id];
2101 	rte_swx_extern_func_t func = ext_func->func;
2102 	uint32_t done;
2103 
2104 	TRACE("[Thread %2u] extern func %u\n",
2105 	      p->thread_id,
2106 	      ext_func_id);
2107 
2108 	done = func(ext_func->mailbox);
2109 
2110 	return done;
2111 }
2112 
2113 /*
2114  * mov.
2115  */
2116 static inline void
2117 __instr_mov_exec(struct rte_swx_pipeline *p __rte_unused,
2118 		 struct thread *t,
2119 		 const struct instruction *ip)
2120 {
2121 	TRACE("[Thread %2u] mov\n", p->thread_id);
2122 
2123 	MOV(t, ip);
2124 }
2125 
2126 static inline void
2127 __instr_mov_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2128 		    struct thread *t,
2129 		    const struct instruction *ip)
2130 {
2131 	TRACE("[Thread %2u] mov (mh)\n", p->thread_id);
2132 
2133 	MOV_MH(t, ip);
2134 }
2135 
2136 static inline void
2137 __instr_mov_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2138 		    struct thread *t,
2139 		    const struct instruction *ip)
2140 {
2141 	TRACE("[Thread %2u] mov (hm)\n", p->thread_id);
2142 
2143 	MOV_HM(t, ip);
2144 }
2145 
2146 static inline void
2147 __instr_mov_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2148 		    struct thread *t,
2149 		    const struct instruction *ip)
2150 {
2151 	TRACE("[Thread %2u] mov (hh)\n", p->thread_id);
2152 
2153 	MOV_HH(t, ip);
2154 }
2155 
2156 static inline void
2157 __instr_mov_i_exec(struct rte_swx_pipeline *p __rte_unused,
2158 		   struct thread *t,
2159 		   const struct instruction *ip)
2160 {
2161 	TRACE("[Thread %2u] mov m.f %" PRIx64 "\n", p->thread_id, ip->mov.src_val);
2162 
2163 	MOV_I(t, ip);
2164 }
2165 
2166 /*
2167  * dma.
2168  */
2169 static inline void
2170 __instr_dma_ht_many_exec(struct rte_swx_pipeline *p __rte_unused,
2171 			 struct thread *t,
2172 			 const struct instruction *ip,
2173 			 uint32_t n_dma)
2174 {
2175 	uint8_t *action_data = t->structs[0];
2176 	uint64_t valid_headers = t->valid_headers;
2177 	uint32_t i;
2178 
2179 	for (i = 0; i < n_dma; i++) {
2180 		uint32_t header_id = ip->dma.dst.header_id[i];
2181 		uint32_t struct_id = ip->dma.dst.struct_id[i];
2182 		uint32_t offset = ip->dma.src.offset[i];
2183 		uint32_t n_bytes = ip->dma.n_bytes[i];
2184 
2185 		struct header_runtime *h = &t->headers[header_id];
2186 		uint8_t *h_ptr0 = h->ptr0;
2187 		uint8_t *h_ptr = t->structs[struct_id];
2188 
2189 		void *dst = MASK64_BIT_GET(valid_headers, header_id) ?
2190 			h_ptr : h_ptr0;
2191 		void *src = &action_data[offset];
2192 
2193 		TRACE("[Thread %2u] dma h.s t.f\n", p->thread_id);
2194 
2195 		/* Headers. */
2196 		memcpy(dst, src, n_bytes);
2197 		t->structs[struct_id] = dst;
2198 		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
2199 	}
2200 
2201 	t->valid_headers = valid_headers;
2202 }
2203 
2204 static inline void
2205 __instr_dma_ht_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2206 {
2207 	__instr_dma_ht_many_exec(p, t, ip, 1);
2208 }
2209 
2210 static inline void
2211 __instr_dma_ht2_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2212 {
2213 	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
2214 
2215 	__instr_dma_ht_many_exec(p, t, ip, 2);
2216 }
2217 
2218 static inline void
2219 __instr_dma_ht3_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2220 {
2221 	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
2222 
2223 	__instr_dma_ht_many_exec(p, t, ip, 3);
2224 }
2225 
2226 static inline void
2227 __instr_dma_ht4_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2228 {
2229 	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
2230 
2231 	__instr_dma_ht_many_exec(p, t, ip, 4);
2232 }
2233 
2234 static inline void
2235 __instr_dma_ht5_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2236 {
2237 	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
2238 
2239 	__instr_dma_ht_many_exec(p, t, ip, 5);
2240 }
2241 
2242 static inline void
2243 __instr_dma_ht6_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2244 {
2245 	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
2246 
2247 	__instr_dma_ht_many_exec(p, t, ip, 6);
2248 }
2249 
2250 static inline void
2251 __instr_dma_ht7_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2252 {
2253 	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
2254 
2255 	__instr_dma_ht_many_exec(p, t, ip, 7);
2256 }
2257 
2258 static inline void
2259 __instr_dma_ht8_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2260 {
2261 	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
2262 
2263 	__instr_dma_ht_many_exec(p, t, ip, 8);
2264 }
2265 
2266 /*
2267  * alu.
2268  */
2269 static inline void
2270 __instr_alu_add_exec(struct rte_swx_pipeline *p __rte_unused,
2271 		     struct thread *t,
2272 		     const struct instruction *ip)
2273 {
2274 	TRACE("[Thread %2u] add\n", p->thread_id);
2275 
2276 	ALU(t, ip, +);
2277 }
2278 
2279 static inline void
2280 __instr_alu_add_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2281 			struct thread *t,
2282 			const struct instruction *ip)
2283 {
2284 	TRACE("[Thread %2u] add (mh)\n", p->thread_id);
2285 
2286 	ALU_MH(t, ip, +);
2287 }
2288 
2289 static inline void
2290 __instr_alu_add_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2291 			struct thread *t,
2292 			const struct instruction *ip)
2293 {
2294 	TRACE("[Thread %2u] add (hm)\n", p->thread_id);
2295 
2296 	ALU_HM(t, ip, +);
2297 }
2298 
2299 static inline void
2300 __instr_alu_add_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2301 			struct thread *t,
2302 			const struct instruction *ip)
2303 {
2304 	TRACE("[Thread %2u] add (hh)\n", p->thread_id);
2305 
2306 	ALU_HH(t, ip, +);
2307 }
2308 
2309 static inline void
2310 __instr_alu_add_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2311 			struct thread *t,
2312 			const struct instruction *ip)
2313 {
2314 	TRACE("[Thread %2u] add (mi)\n", p->thread_id);
2315 
2316 	ALU_MI(t, ip, +);
2317 }
2318 
2319 static inline void
2320 __instr_alu_add_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2321 			struct thread *t,
2322 			const struct instruction *ip)
2323 {
2324 	TRACE("[Thread %2u] add (hi)\n", p->thread_id);
2325 
2326 	ALU_HI(t, ip, +);
2327 }
2328 
2329 static inline void
2330 __instr_alu_sub_exec(struct rte_swx_pipeline *p __rte_unused,
2331 		     struct thread *t,
2332 		     const struct instruction *ip)
2333 {
2334 	TRACE("[Thread %2u] sub\n", p->thread_id);
2335 
2336 	ALU(t, ip, -);
2337 }
2338 
2339 static inline void
2340 __instr_alu_sub_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2341 			struct thread *t,
2342 			const struct instruction *ip)
2343 {
2344 	TRACE("[Thread %2u] sub (mh)\n", p->thread_id);
2345 
2346 	ALU_MH(t, ip, -);
2347 }
2348 
2349 static inline void
2350 __instr_alu_sub_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2351 			struct thread *t,
2352 			const struct instruction *ip)
2353 {
2354 	TRACE("[Thread %2u] sub (hm)\n", p->thread_id);
2355 
2356 	ALU_HM(t, ip, -);
2357 }
2358 
2359 static inline void
2360 __instr_alu_sub_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2361 			struct thread *t,
2362 			const struct instruction *ip)
2363 {
2364 	TRACE("[Thread %2u] sub (hh)\n", p->thread_id);
2365 
2366 	ALU_HH(t, ip, -);
2367 }
2368 
2369 static inline void
2370 __instr_alu_sub_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2371 			struct thread *t,
2372 			const struct instruction *ip)
2373 {
2374 	TRACE("[Thread %2u] sub (mi)\n", p->thread_id);
2375 
2376 	ALU_MI(t, ip, -);
2377 }
2378 
2379 static inline void
2380 __instr_alu_sub_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2381 			struct thread *t,
2382 			const struct instruction *ip)
2383 {
2384 	TRACE("[Thread %2u] sub (hi)\n", p->thread_id);
2385 
2386 	ALU_HI(t, ip, -);
2387 }
2388 
2389 static inline void
2390 __instr_alu_shl_exec(struct rte_swx_pipeline *p __rte_unused,
2391 		     struct thread *t,
2392 		     const struct instruction *ip)
2393 {
2394 	TRACE("[Thread %2u] shl\n", p->thread_id);
2395 
2396 	ALU(t, ip, <<);
2397 }
2398 
2399 static inline void
2400 __instr_alu_shl_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2401 			struct thread *t,
2402 			const struct instruction *ip)
2403 {
2404 	TRACE("[Thread %2u] shl (mh)\n", p->thread_id);
2405 
2406 	ALU_MH(t, ip, <<);
2407 }
2408 
2409 static inline void
2410 __instr_alu_shl_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2411 			struct thread *t,
2412 			const struct instruction *ip)
2413 {
2414 	TRACE("[Thread %2u] shl (hm)\n", p->thread_id);
2415 
2416 	ALU_HM(t, ip, <<);
2417 }
2418 
2419 static inline void
2420 __instr_alu_shl_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2421 			struct thread *t,
2422 			const struct instruction *ip)
2423 {
2424 	TRACE("[Thread %2u] shl (hh)\n", p->thread_id);
2425 
2426 	ALU_HH(t, ip, <<);
2427 }
2428 
2429 static inline void
2430 __instr_alu_shl_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2431 			struct thread *t,
2432 			const struct instruction *ip)
2433 {
2434 	TRACE("[Thread %2u] shl (mi)\n", p->thread_id);
2435 
2436 	ALU_MI(t, ip, <<);
2437 }
2438 
2439 static inline void
2440 __instr_alu_shl_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2441 			struct thread *t,
2442 			const struct instruction *ip)
2443 {
2444 	TRACE("[Thread %2u] shl (hi)\n", p->thread_id);
2445 
2446 	ALU_HI(t, ip, <<);
2447 }
2448 
2449 static inline void
2450 __instr_alu_shr_exec(struct rte_swx_pipeline *p __rte_unused,
2451 		     struct thread *t,
2452 		     const struct instruction *ip)
2453 {
2454 	TRACE("[Thread %2u] shr\n", p->thread_id);
2455 
2456 	ALU(t, ip, >>);
2457 }
2458 
2459 static inline void
2460 __instr_alu_shr_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2461 			struct thread *t,
2462 			const struct instruction *ip)
2463 {
2464 	TRACE("[Thread %2u] shr (mh)\n", p->thread_id);
2465 
2466 	ALU_MH(t, ip, >>);
2467 }
2468 
2469 static inline void
2470 __instr_alu_shr_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2471 			struct thread *t,
2472 			const struct instruction *ip)
2473 {
2474 	TRACE("[Thread %2u] shr (hm)\n", p->thread_id);
2475 
2476 	ALU_HM(t, ip, >>);
2477 }
2478 
2479 static inline void
2480 __instr_alu_shr_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2481 			struct thread *t,
2482 			const struct instruction *ip)
2483 {
2484 	TRACE("[Thread %2u] shr (hh)\n", p->thread_id);
2485 
2486 	ALU_HH(t, ip, >>);
2487 }
2488 
2489 static inline void
2490 __instr_alu_shr_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2491 			struct thread *t,
2492 			const struct instruction *ip)
2493 {
2494 	TRACE("[Thread %2u] shr (mi)\n", p->thread_id);
2495 
2496 	/* Structs. */
2497 	ALU_MI(t, ip, >>);
2498 }
2499 
2500 static inline void
2501 __instr_alu_shr_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2502 			struct thread *t,
2503 			const struct instruction *ip)
2504 {
2505 	TRACE("[Thread %2u] shr (hi)\n", p->thread_id);
2506 
2507 	ALU_HI(t, ip, >>);
2508 }
2509 
2510 static inline void
2511 __instr_alu_and_exec(struct rte_swx_pipeline *p __rte_unused,
2512 		     struct thread *t,
2513 		     const struct instruction *ip)
2514 {
2515 	TRACE("[Thread %2u] and\n", p->thread_id);
2516 
2517 	ALU(t, ip, &);
2518 }
2519 
2520 static inline void
2521 __instr_alu_and_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2522 			struct thread *t,
2523 			const struct instruction *ip)
2524 {
2525 	TRACE("[Thread %2u] and (mh)\n", p->thread_id);
2526 
2527 	ALU_MH(t, ip, &);
2528 }
2529 
2530 static inline void
2531 __instr_alu_and_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2532 			struct thread *t,
2533 			const struct instruction *ip)
2534 {
2535 	TRACE("[Thread %2u] and (hm)\n", p->thread_id);
2536 
2537 	ALU_HM_FAST(t, ip, &);
2538 }
2539 
2540 static inline void
2541 __instr_alu_and_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2542 			struct thread *t,
2543 			const struct instruction *ip)
2544 {
2545 	TRACE("[Thread %2u] and (hh)\n", p->thread_id);
2546 
2547 	ALU_HH_FAST(t, ip, &);
2548 }
2549 
2550 static inline void
2551 __instr_alu_and_i_exec(struct rte_swx_pipeline *p __rte_unused,
2552 		       struct thread *t,
2553 		       const struct instruction *ip)
2554 {
2555 	TRACE("[Thread %2u] and (i)\n", p->thread_id);
2556 
2557 	ALU_I(t, ip, &);
2558 }
2559 
2560 static inline void
2561 __instr_alu_or_exec(struct rte_swx_pipeline *p __rte_unused,
2562 		    struct thread *t,
2563 		    const struct instruction *ip)
2564 {
2565 	TRACE("[Thread %2u] or\n", p->thread_id);
2566 
2567 	ALU(t, ip, |);
2568 }
2569 
2570 static inline void
2571 __instr_alu_or_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2572 		       struct thread *t,
2573 		       const struct instruction *ip)
2574 {
2575 	TRACE("[Thread %2u] or (mh)\n", p->thread_id);
2576 
2577 	ALU_MH(t, ip, |);
2578 }
2579 
2580 static inline void
2581 __instr_alu_or_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2582 		       struct thread *t,
2583 		       const struct instruction *ip)
2584 {
2585 	TRACE("[Thread %2u] or (hm)\n", p->thread_id);
2586 
2587 	ALU_HM_FAST(t, ip, |);
2588 }
2589 
2590 static inline void
2591 __instr_alu_or_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2592 		       struct thread *t,
2593 		       const struct instruction *ip)
2594 {
2595 	TRACE("[Thread %2u] or (hh)\n", p->thread_id);
2596 
2597 	ALU_HH_FAST(t, ip, |);
2598 }
2599 
2600 static inline void
2601 __instr_alu_or_i_exec(struct rte_swx_pipeline *p __rte_unused,
2602 		      struct thread *t,
2603 		      const struct instruction *ip)
2604 {
2605 	TRACE("[Thread %2u] or (i)\n", p->thread_id);
2606 
2607 	ALU_I(t, ip, |);
2608 }
2609 
2610 static inline void
2611 __instr_alu_xor_exec(struct rte_swx_pipeline *p __rte_unused,
2612 		     struct thread *t,
2613 		     const struct instruction *ip)
2614 {
2615 	TRACE("[Thread %2u] xor\n", p->thread_id);
2616 
2617 	ALU(t, ip, ^);
2618 }
2619 
2620 static inline void
2621 __instr_alu_xor_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2622 			struct thread *t,
2623 			const struct instruction *ip)
2624 {
2625 	TRACE("[Thread %2u] xor (mh)\n", p->thread_id);
2626 
2627 	ALU_MH(t, ip, ^);
2628 }
2629 
2630 static inline void
2631 __instr_alu_xor_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2632 			struct thread *t,
2633 			const struct instruction *ip)
2634 {
2635 	TRACE("[Thread %2u] xor (hm)\n", p->thread_id);
2636 
2637 	ALU_HM_FAST(t, ip, ^);
2638 }
2639 
2640 static inline void
2641 __instr_alu_xor_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2642 			struct thread *t,
2643 			const struct instruction *ip)
2644 {
2645 	TRACE("[Thread %2u] xor (hh)\n", p->thread_id);
2646 
2647 	ALU_HH_FAST(t, ip, ^);
2648 }
2649 
2650 static inline void
2651 __instr_alu_xor_i_exec(struct rte_swx_pipeline *p __rte_unused,
2652 		       struct thread *t,
2653 		       const struct instruction *ip)
2654 {
2655 	TRACE("[Thread %2u] xor (i)\n", p->thread_id);
2656 
2657 	ALU_I(t, ip, ^);
2658 }
2659 
2660 static inline void
2661 __instr_alu_ckadd_field_exec(struct rte_swx_pipeline *p __rte_unused,
2662 			     struct thread *t,
2663 			     const struct instruction *ip)
2664 {
2665 	uint8_t *dst_struct, *src_struct;
2666 	uint16_t *dst16_ptr, dst;
2667 	uint64_t *src64_ptr, src64, src64_mask, src;
2668 	uint64_t r;
2669 
2670 	TRACE("[Thread %2u] ckadd (field)\n", p->thread_id);
2671 
2672 	/* Structs. */
2673 	dst_struct = t->structs[ip->alu.dst.struct_id];
2674 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
2675 	dst = *dst16_ptr;
2676 
2677 	src_struct = t->structs[ip->alu.src.struct_id];
2678 	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
2679 	src64 = *src64_ptr;
2680 	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
2681 	src = src64 & src64_mask;
2682 
2683 	r = dst;
2684 	r = ~r & 0xFFFF;
2685 
2686 	/* The first input (r) is a 16-bit number. The second and the third
2687 	 * inputs are 32-bit numbers. In the worst case scenario, the sum of the
2688 	 * three numbers (output r) is a 34-bit number.
2689 	 */
2690 	r += (src >> 32) + (src & 0xFFFFFFFF);
2691 
2692 	/* The first input is a 16-bit number. The second input is an 18-bit
2693 	 * number. In the worst case scenario, the sum of the two numbers is a
2694 	 * 19-bit number.
2695 	 */
2696 	r = (r & 0xFFFF) + (r >> 16);
2697 
2698 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
2699 	 * a 3-bit number (0 .. 7). Their sum is a 17-bit number (0 .. 0x10006).
2700 	 */
2701 	r = (r & 0xFFFF) + (r >> 16);
2702 
2703 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
2704 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
2705 	 * 0x10006), the output r is (0 .. 7). So no carry bit can be generated,
2706 	 * therefore the output r is always a 16-bit number.
2707 	 */
2708 	r = (r & 0xFFFF) + (r >> 16);
2709 
2710 	r = ~r & 0xFFFF;
2711 	r = r ? r : 0xFFFF;
2712 
2713 	*dst16_ptr = (uint16_t)r;
2714 }
2715 
2716 static inline void
2717 __instr_alu_cksub_field_exec(struct rte_swx_pipeline *p __rte_unused,
2718 			     struct thread *t,
2719 			     const struct instruction *ip)
2720 {
2721 	uint8_t *dst_struct, *src_struct;
2722 	uint16_t *dst16_ptr, dst;
2723 	uint64_t *src64_ptr, src64, src64_mask, src;
2724 	uint64_t r;
2725 
2726 	TRACE("[Thread %2u] cksub (field)\n", p->thread_id);
2727 
2728 	/* Structs. */
2729 	dst_struct = t->structs[ip->alu.dst.struct_id];
2730 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
2731 	dst = *dst16_ptr;
2732 
2733 	src_struct = t->structs[ip->alu.src.struct_id];
2734 	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
2735 	src64 = *src64_ptr;
2736 	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
2737 	src = src64 & src64_mask;
2738 
2739 	r = dst;
2740 	r = ~r & 0xFFFF;
2741 
2742 	/* Subtraction in 1's complement arithmetic (i.e. a '- b) is the same as
2743 	 * the following sequence of operations in 2's complement arithmetic:
2744 	 *    a '- b = (a - b) % 0xFFFF.
2745 	 *
2746 	 * In order to prevent an underflow for the below subtraction, in which
2747 	 * a 33-bit number (the subtrahend) is taken out of a 16-bit number (the
2748 	 * minuend), we first add a multiple of the 0xFFFF modulus to the
2749 	 * minuend. The number we add to the minuend needs to be a 34-bit number
2750 	 * or higher, so for readability reasons we picked the 36-bit multiple.
2751 	 * We are effectively turning the 16-bit minuend into a 36-bit number:
2752 	 *    (a - b) % 0xFFFF = (a + 0xFFFF00000 - b) % 0xFFFF.
2753 	 */
2754 	r += 0xFFFF00000ULL; /* The output r is a 36-bit number. */
2755 
2756 	/* A 33-bit number is subtracted from a 36-bit number (the input r). The
2757 	 * result (the output r) is a 36-bit number.
2758 	 */
2759 	r -= (src >> 32) + (src & 0xFFFFFFFF);
2760 
2761 	/* The first input is a 16-bit number. The second input is a 20-bit
2762 	 * number. Their sum is a 21-bit number.
2763 	 */
2764 	r = (r & 0xFFFF) + (r >> 16);
2765 
2766 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
2767 	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1001E).
2768 	 */
2769 	r = (r & 0xFFFF) + (r >> 16);
2770 
2771 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
2772 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
2773 	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
2774 	 * generated, therefore the output r is always a 16-bit number.
2775 	 */
2776 	r = (r & 0xFFFF) + (r >> 16);
2777 
2778 	r = ~r & 0xFFFF;
2779 	r = r ? r : 0xFFFF;
2780 
2781 	*dst16_ptr = (uint16_t)r;
2782 }
2783 
2784 static inline void
2785 __instr_alu_ckadd_struct20_exec(struct rte_swx_pipeline *p __rte_unused,
2786 				struct thread *t,
2787 				const struct instruction *ip)
2788 {
2789 	uint8_t *dst_struct, *src_struct;
2790 	uint16_t *dst16_ptr;
2791 	uint32_t *src32_ptr;
2792 	uint64_t r0, r1;
2793 
2794 	TRACE("[Thread %2u] ckadd (struct of 20 bytes)\n", p->thread_id);
2795 
2796 	/* Structs. */
2797 	dst_struct = t->structs[ip->alu.dst.struct_id];
2798 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
2799 
2800 	src_struct = t->structs[ip->alu.src.struct_id];
2801 	src32_ptr = (uint32_t *)&src_struct[0];
2802 
2803 	r0 = src32_ptr[0]; /* r0 is a 32-bit number. */
2804 	r1 = src32_ptr[1]; /* r1 is a 32-bit number. */
2805 	r0 += src32_ptr[2]; /* The output r0 is a 33-bit number. */
2806 	r1 += src32_ptr[3]; /* The output r1 is a 33-bit number. */
2807 	r0 += r1 + src32_ptr[4]; /* The output r0 is a 35-bit number. */
2808 
2809 	/* The first input is a 16-bit number. The second input is a 19-bit
2810 	 * number. Their sum is a 20-bit number.
2811 	 */
2812 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
2813 
2814 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
2815 	 * a 4-bit number (0 .. 15). The sum is a 17-bit number (0 .. 0x1000E).
2816 	 */
2817 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
2818 
2819 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
2820 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
2821 	 * 0x1000E), the output r is (0 .. 15). So no carry bit can be
2822 	 * generated, therefore the output r is always a 16-bit number.
2823 	 */
2824 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
2825 
2826 	r0 = ~r0 & 0xFFFF;
2827 	r0 = r0 ? r0 : 0xFFFF;
2828 
2829 	*dst16_ptr = (uint16_t)r0;
2830 }
2831 
2832 static inline void
2833 __instr_alu_ckadd_struct_exec(struct rte_swx_pipeline *p __rte_unused,
2834 			      struct thread *t,
2835 			      const struct instruction *ip)
2836 {
2837 	uint8_t *dst_struct, *src_struct;
2838 	uint16_t *dst16_ptr;
2839 	uint32_t *src32_ptr;
2840 	uint64_t r = 0;
2841 	uint32_t i;
2842 
2843 	TRACE("[Thread %2u] ckadd (struct)\n", p->thread_id);
2844 
2845 	/* Structs. */
2846 	dst_struct = t->structs[ip->alu.dst.struct_id];
2847 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
2848 
2849 	src_struct = t->structs[ip->alu.src.struct_id];
2850 	src32_ptr = (uint32_t *)&src_struct[0];
2851 
2852 	/* The max number of 32-bit words in a 256-byte header is 8 = 2^3.
2853 	 * Therefore, in the worst case scenario, a 35-bit number is added to a
2854 	 * 16-bit number (the input r), so the output r is 36-bit number.
2855 	 */
2856 	for (i = 0; i < ip->alu.src.n_bits / 32; i++, src32_ptr++)
2857 		r += *src32_ptr;
2858 
2859 	/* The first input is a 16-bit number. The second input is a 20-bit
2860 	 * number. Their sum is a 21-bit number.
2861 	 */
2862 	r = (r & 0xFFFF) + (r >> 16);
2863 
2864 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
2865 	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1000E).
2866 	 */
2867 	r = (r & 0xFFFF) + (r >> 16);
2868 
2869 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
2870 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
2871 	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
2872 	 * generated, therefore the output r is always a 16-bit number.
2873 	 */
2874 	r = (r & 0xFFFF) + (r >> 16);
2875 
2876 	r = ~r & 0xFFFF;
2877 	r = r ? r : 0xFFFF;
2878 
2879 	*dst16_ptr = (uint16_t)r;
2880 }
2881 
2882 /*
2883  * Register array.
2884  */
2885 static inline uint64_t *
2886 instr_regarray_regarray(struct rte_swx_pipeline *p, const struct instruction *ip)
2887 {
2888 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
2889 	return r->regarray;
2890 }
2891 
2892 static inline uint64_t
2893 instr_regarray_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2894 {
2895 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
2896 
2897 	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
2898 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
2899 	uint64_t idx64 = *idx64_ptr;
2900 	uint64_t idx64_mask = UINT64_MAX >> (64 - ip->regarray.idx.n_bits);
2901 	uint64_t idx = idx64 & idx64_mask & r->size_mask;
2902 
2903 	return idx;
2904 }
2905 
2906 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
2907 
2908 static inline uint64_t
2909 instr_regarray_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2910 {
2911 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
2912 
2913 	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
2914 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
2915 	uint64_t idx64 = *idx64_ptr;
2916 	uint64_t idx = (ntoh64(idx64) >> (64 - ip->regarray.idx.n_bits)) & r->size_mask;
2917 
2918 	return idx;
2919 }
2920 
2921 #else
2922 
2923 #define instr_regarray_idx_nbo instr_regarray_idx_hbo
2924 
2925 #endif
2926 
2927 static inline uint64_t
2928 instr_regarray_idx_imm(struct rte_swx_pipeline *p, const struct instruction *ip)
2929 {
2930 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
2931 
2932 	uint64_t idx = ip->regarray.idx_val & r->size_mask;
2933 
2934 	return idx;
2935 }
2936 
2937 static inline uint64_t
2938 instr_regarray_src_hbo(struct thread *t, const struct instruction *ip)
2939 {
2940 	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
2941 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
2942 	uint64_t src64 = *src64_ptr;
2943 	uint64_t src64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
2944 	uint64_t src = src64 & src64_mask;
2945 
2946 	return src;
2947 }
2948 
2949 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
2950 
2951 static inline uint64_t
2952 instr_regarray_src_nbo(struct thread *t, const struct instruction *ip)
2953 {
2954 	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
2955 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
2956 	uint64_t src64 = *src64_ptr;
2957 	uint64_t src = ntoh64(src64) >> (64 - ip->regarray.dstsrc.n_bits);
2958 
2959 	return src;
2960 }
2961 
2962 #else
2963 
2964 #define instr_regarray_src_nbo instr_regarray_src_hbo
2965 
2966 #endif
2967 
2968 static inline void
2969 instr_regarray_dst_hbo_src_hbo_set(struct thread *t, const struct instruction *ip, uint64_t src)
2970 {
2971 	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
2972 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
2973 	uint64_t dst64 = *dst64_ptr;
2974 	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
2975 
2976 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
2977 
2978 }
2979 
2980 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
2981 
2982 static inline void
2983 instr_regarray_dst_nbo_src_hbo_set(struct thread *t, const struct instruction *ip, uint64_t src)
2984 {
2985 	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
2986 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
2987 	uint64_t dst64 = *dst64_ptr;
2988 	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
2989 
2990 	src = hton64(src) >> (64 - ip->regarray.dstsrc.n_bits);
2991 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
2992 }
2993 
2994 #else
2995 
2996 #define instr_regarray_dst_nbo_src_hbo_set instr_regarray_dst_hbo_src_hbo_set
2997 
2998 #endif
2999 
3000 static inline void
3001 __instr_regprefetch_rh_exec(struct rte_swx_pipeline *p,
3002 			    struct thread *t,
3003 			    const struct instruction *ip)
3004 {
3005 	uint64_t *regarray, idx;
3006 
3007 	TRACE("[Thread %2u] regprefetch (r[h])\n", p->thread_id);
3008 
3009 	regarray = instr_regarray_regarray(p, ip);
3010 	idx = instr_regarray_idx_nbo(p, t, ip);
3011 	rte_prefetch0(&regarray[idx]);
3012 }
3013 
3014 static inline void
3015 __instr_regprefetch_rm_exec(struct rte_swx_pipeline *p,
3016 			    struct thread *t,
3017 			    const struct instruction *ip)
3018 {
3019 	uint64_t *regarray, idx;
3020 
3021 	TRACE("[Thread %2u] regprefetch (r[m])\n", p->thread_id);
3022 
3023 	regarray = instr_regarray_regarray(p, ip);
3024 	idx = instr_regarray_idx_hbo(p, t, ip);
3025 	rte_prefetch0(&regarray[idx]);
3026 }
3027 
3028 static inline void
3029 __instr_regprefetch_ri_exec(struct rte_swx_pipeline *p,
3030 			    struct thread *t __rte_unused,
3031 			    const struct instruction *ip)
3032 {
3033 	uint64_t *regarray, idx;
3034 
3035 	TRACE("[Thread %2u] regprefetch (r[i])\n", p->thread_id);
3036 
3037 	regarray = instr_regarray_regarray(p, ip);
3038 	idx = instr_regarray_idx_imm(p, ip);
3039 	rte_prefetch0(&regarray[idx]);
3040 }
3041 
3042 static inline void
3043 __instr_regrd_hrh_exec(struct rte_swx_pipeline *p,
3044 		       struct thread *t,
3045 		       const struct instruction *ip)
3046 {
3047 	uint64_t *regarray, idx;
3048 
3049 	TRACE("[Thread %2u] regrd (h = r[h])\n", p->thread_id);
3050 
3051 	regarray = instr_regarray_regarray(p, ip);
3052 	idx = instr_regarray_idx_nbo(p, t, ip);
3053 	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
3054 }
3055 
3056 static inline void
3057 __instr_regrd_hrm_exec(struct rte_swx_pipeline *p,
3058 		       struct thread *t,
3059 		       const struct instruction *ip)
3060 {
3061 	uint64_t *regarray, idx;
3062 
3063 	TRACE("[Thread %2u] regrd (h = r[m])\n", p->thread_id);
3064 
3065 	/* Structs. */
3066 	regarray = instr_regarray_regarray(p, ip);
3067 	idx = instr_regarray_idx_hbo(p, t, ip);
3068 	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
3069 }
3070 
3071 static inline void
3072 __instr_regrd_mrh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3073 {
3074 	uint64_t *regarray, idx;
3075 
3076 	TRACE("[Thread %2u] regrd (m = r[h])\n", p->thread_id);
3077 
3078 	regarray = instr_regarray_regarray(p, ip);
3079 	idx = instr_regarray_idx_nbo(p, t, ip);
3080 	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
3081 }
3082 
3083 static inline void
3084 __instr_regrd_mrm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3085 {
3086 	uint64_t *regarray, idx;
3087 
3088 	TRACE("[Thread %2u] regrd (m = r[m])\n", p->thread_id);
3089 
3090 	regarray = instr_regarray_regarray(p, ip);
3091 	idx = instr_regarray_idx_hbo(p, t, ip);
3092 	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
3093 }
3094 
3095 static inline void
3096 __instr_regrd_hri_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3097 {
3098 	uint64_t *regarray, idx;
3099 
3100 	TRACE("[Thread %2u] regrd (h = r[i])\n", p->thread_id);
3101 
3102 	regarray = instr_regarray_regarray(p, ip);
3103 	idx = instr_regarray_idx_imm(p, ip);
3104 	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
3105 }
3106 
3107 static inline void
3108 __instr_regrd_mri_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3109 {
3110 	uint64_t *regarray, idx;
3111 
3112 	TRACE("[Thread %2u] regrd (m = r[i])\n", p->thread_id);
3113 
3114 	regarray = instr_regarray_regarray(p, ip);
3115 	idx = instr_regarray_idx_imm(p, ip);
3116 	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
3117 }
3118 
3119 static inline void
3120 __instr_regwr_rhh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3121 {
3122 	uint64_t *regarray, idx, src;
3123 
3124 	TRACE("[Thread %2u] regwr (r[h] = h)\n", p->thread_id);
3125 
3126 	regarray = instr_regarray_regarray(p, ip);
3127 	idx = instr_regarray_idx_nbo(p, t, ip);
3128 	src = instr_regarray_src_nbo(t, ip);
3129 	regarray[idx] = src;
3130 }
3131 
3132 static inline void
3133 __instr_regwr_rhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3134 {
3135 	uint64_t *regarray, idx, src;
3136 
3137 	TRACE("[Thread %2u] regwr (r[h] = m)\n", p->thread_id);
3138 
3139 	regarray = instr_regarray_regarray(p, ip);
3140 	idx = instr_regarray_idx_nbo(p, t, ip);
3141 	src = instr_regarray_src_hbo(t, ip);
3142 	regarray[idx] = src;
3143 }
3144 
3145 static inline void
3146 __instr_regwr_rmh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3147 {
3148 	uint64_t *regarray, idx, src;
3149 
3150 	TRACE("[Thread %2u] regwr (r[m] = h)\n", p->thread_id);
3151 
3152 	regarray = instr_regarray_regarray(p, ip);
3153 	idx = instr_regarray_idx_hbo(p, t, ip);
3154 	src = instr_regarray_src_nbo(t, ip);
3155 	regarray[idx] = src;
3156 }
3157 
3158 static inline void
3159 __instr_regwr_rmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3160 {
3161 	uint64_t *regarray, idx, src;
3162 
3163 	TRACE("[Thread %2u] regwr (r[m] = m)\n", p->thread_id);
3164 
3165 	regarray = instr_regarray_regarray(p, ip);
3166 	idx = instr_regarray_idx_hbo(p, t, ip);
3167 	src = instr_regarray_src_hbo(t, ip);
3168 	regarray[idx] = src;
3169 }
3170 
3171 static inline void
3172 __instr_regwr_rhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3173 {
3174 	uint64_t *regarray, idx, src;
3175 
3176 	TRACE("[Thread %2u] regwr (r[h] = i)\n", p->thread_id);
3177 
3178 	regarray = instr_regarray_regarray(p, ip);
3179 	idx = instr_regarray_idx_nbo(p, t, ip);
3180 	src = ip->regarray.dstsrc_val;
3181 	regarray[idx] = src;
3182 }
3183 
3184 static inline void
3185 __instr_regwr_rmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3186 {
3187 	uint64_t *regarray, idx, src;
3188 
3189 	TRACE("[Thread %2u] regwr (r[m] = i)\n", p->thread_id);
3190 
3191 	regarray = instr_regarray_regarray(p, ip);
3192 	idx = instr_regarray_idx_hbo(p, t, ip);
3193 	src = ip->regarray.dstsrc_val;
3194 	regarray[idx] = src;
3195 }
3196 
3197 static inline void
3198 __instr_regwr_rih_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3199 {
3200 	uint64_t *regarray, idx, src;
3201 
3202 	TRACE("[Thread %2u] regwr (r[i] = h)\n", p->thread_id);
3203 
3204 	regarray = instr_regarray_regarray(p, ip);
3205 	idx = instr_regarray_idx_imm(p, ip);
3206 	src = instr_regarray_src_nbo(t, ip);
3207 	regarray[idx] = src;
3208 }
3209 
3210 static inline void
3211 __instr_regwr_rim_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3212 {
3213 	uint64_t *regarray, idx, src;
3214 
3215 	TRACE("[Thread %2u] regwr (r[i] = m)\n", p->thread_id);
3216 
3217 	regarray = instr_regarray_regarray(p, ip);
3218 	idx = instr_regarray_idx_imm(p, ip);
3219 	src = instr_regarray_src_hbo(t, ip);
3220 	regarray[idx] = src;
3221 }
3222 
3223 static inline void
3224 __instr_regwr_rii_exec(struct rte_swx_pipeline *p,
3225 		       struct thread *t __rte_unused,
3226 		       const struct instruction *ip)
3227 {
3228 	uint64_t *regarray, idx, src;
3229 
3230 	TRACE("[Thread %2u] regwr (r[i] = i)\n", p->thread_id);
3231 
3232 	regarray = instr_regarray_regarray(p, ip);
3233 	idx = instr_regarray_idx_imm(p, ip);
3234 	src = ip->regarray.dstsrc_val;
3235 	regarray[idx] = src;
3236 }
3237 
3238 static inline void
3239 __instr_regadd_rhh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3240 {
3241 	uint64_t *regarray, idx, src;
3242 
3243 	TRACE("[Thread %2u] regadd (r[h] += h)\n", p->thread_id);
3244 
3245 	regarray = instr_regarray_regarray(p, ip);
3246 	idx = instr_regarray_idx_nbo(p, t, ip);
3247 	src = instr_regarray_src_nbo(t, ip);
3248 	regarray[idx] += src;
3249 }
3250 
3251 static inline void
3252 __instr_regadd_rhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3253 {
3254 	uint64_t *regarray, idx, src;
3255 
3256 	TRACE("[Thread %2u] regadd (r[h] += m)\n", p->thread_id);
3257 
3258 	regarray = instr_regarray_regarray(p, ip);
3259 	idx = instr_regarray_idx_nbo(p, t, ip);
3260 	src = instr_regarray_src_hbo(t, ip);
3261 	regarray[idx] += src;
3262 }
3263 
3264 static inline void
3265 __instr_regadd_rmh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3266 {
3267 	uint64_t *regarray, idx, src;
3268 
3269 	TRACE("[Thread %2u] regadd (r[m] += h)\n", p->thread_id);
3270 
3271 	regarray = instr_regarray_regarray(p, ip);
3272 	idx = instr_regarray_idx_hbo(p, t, ip);
3273 	src = instr_regarray_src_nbo(t, ip);
3274 	regarray[idx] += src;
3275 }
3276 
3277 static inline void
3278 __instr_regadd_rmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3279 {
3280 	uint64_t *regarray, idx, src;
3281 
3282 	TRACE("[Thread %2u] regadd (r[m] += m)\n", p->thread_id);
3283 
3284 	regarray = instr_regarray_regarray(p, ip);
3285 	idx = instr_regarray_idx_hbo(p, t, ip);
3286 	src = instr_regarray_src_hbo(t, ip);
3287 	regarray[idx] += src;
3288 }
3289 
3290 static inline void
3291 __instr_regadd_rhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3292 {
3293 	uint64_t *regarray, idx, src;
3294 
3295 	TRACE("[Thread %2u] regadd (r[h] += i)\n", p->thread_id);
3296 
3297 	regarray = instr_regarray_regarray(p, ip);
3298 	idx = instr_regarray_idx_nbo(p, t, ip);
3299 	src = ip->regarray.dstsrc_val;
3300 	regarray[idx] += src;
3301 }
3302 
3303 static inline void
3304 __instr_regadd_rmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3305 {
3306 	uint64_t *regarray, idx, src;
3307 
3308 	TRACE("[Thread %2u] regadd (r[m] += i)\n", p->thread_id);
3309 
3310 	regarray = instr_regarray_regarray(p, ip);
3311 	idx = instr_regarray_idx_hbo(p, t, ip);
3312 	src = ip->regarray.dstsrc_val;
3313 	regarray[idx] += src;
3314 }
3315 
3316 static inline void
3317 __instr_regadd_rih_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3318 {
3319 	uint64_t *regarray, idx, src;
3320 
3321 	TRACE("[Thread %2u] regadd (r[i] += h)\n", p->thread_id);
3322 
3323 	regarray = instr_regarray_regarray(p, ip);
3324 	idx = instr_regarray_idx_imm(p, ip);
3325 	src = instr_regarray_src_nbo(t, ip);
3326 	regarray[idx] += src;
3327 }
3328 
3329 static inline void
3330 __instr_regadd_rim_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3331 {
3332 	uint64_t *regarray, idx, src;
3333 
3334 	TRACE("[Thread %2u] regadd (r[i] += m)\n", p->thread_id);
3335 
3336 	regarray = instr_regarray_regarray(p, ip);
3337 	idx = instr_regarray_idx_imm(p, ip);
3338 	src = instr_regarray_src_hbo(t, ip);
3339 	regarray[idx] += src;
3340 }
3341 
3342 static inline void
3343 __instr_regadd_rii_exec(struct rte_swx_pipeline *p,
3344 			struct thread *t __rte_unused,
3345 			const struct instruction *ip)
3346 {
3347 	uint64_t *regarray, idx, src;
3348 
3349 	TRACE("[Thread %2u] regadd (r[i] += i)\n", p->thread_id);
3350 
3351 	regarray = instr_regarray_regarray(p, ip);
3352 	idx = instr_regarray_idx_imm(p, ip);
3353 	src = ip->regarray.dstsrc_val;
3354 	regarray[idx] += src;
3355 }
3356 
3357 /*
3358  * metarray.
3359  */
3360 static inline struct meter *
3361 instr_meter_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3362 {
3363 	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
3364 
3365 	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
3366 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
3367 	uint64_t idx64 = *idx64_ptr;
3368 	uint64_t idx64_mask = UINT64_MAX >> (64 - (ip)->meter.idx.n_bits);
3369 	uint64_t idx = idx64 & idx64_mask & r->size_mask;
3370 
3371 	return &r->metarray[idx];
3372 }
3373 
3374 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3375 
3376 static inline struct meter *
3377 instr_meter_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3378 {
3379 	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
3380 
3381 	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
3382 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
3383 	uint64_t idx64 = *idx64_ptr;
3384 	uint64_t idx = (ntoh64(idx64) >> (64 - ip->meter.idx.n_bits)) & r->size_mask;
3385 
3386 	return &r->metarray[idx];
3387 }
3388 
3389 #else
3390 
3391 #define instr_meter_idx_nbo instr_meter_idx_hbo
3392 
3393 #endif
3394 
3395 static inline struct meter *
3396 instr_meter_idx_imm(struct rte_swx_pipeline *p, const struct instruction *ip)
3397 {
3398 	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
3399 
3400 	uint64_t idx =  ip->meter.idx_val & r->size_mask;
3401 
3402 	return &r->metarray[idx];
3403 }
3404 
3405 static inline uint32_t
3406 instr_meter_length_hbo(struct thread *t, const struct instruction *ip)
3407 {
3408 	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
3409 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
3410 	uint64_t src64 = *src64_ptr;
3411 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->meter.length.n_bits);
3412 	uint64_t src = src64 & src64_mask;
3413 
3414 	return (uint32_t)src;
3415 }
3416 
3417 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3418 
3419 static inline uint32_t
3420 instr_meter_length_nbo(struct thread *t, const struct instruction *ip)
3421 {
3422 	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
3423 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
3424 	uint64_t src64 = *src64_ptr;
3425 	uint64_t src = ntoh64(src64) >> (64 - ip->meter.length.n_bits);
3426 
3427 	return (uint32_t)src;
3428 }
3429 
3430 #else
3431 
3432 #define instr_meter_length_nbo instr_meter_length_hbo
3433 
3434 #endif
3435 
3436 static inline enum rte_color
3437 instr_meter_color_in_hbo(struct thread *t, const struct instruction *ip)
3438 {
3439 	uint8_t *src_struct = t->structs[ip->meter.color_in.struct_id];
3440 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.color_in.offset];
3441 	uint64_t src64 = *src64_ptr;
3442 	uint64_t src64_mask = UINT64_MAX >> (64 - ip->meter.color_in.n_bits);
3443 	uint64_t src = src64 & src64_mask;
3444 
3445 	return (enum rte_color)src;
3446 }
3447 
3448 static inline void
3449 instr_meter_color_out_hbo_set(struct thread *t,
3450 			      const struct instruction *ip,
3451 			      enum rte_color color_out)
3452 {
3453 	uint8_t *dst_struct = t->structs[ip->meter.color_out.struct_id];
3454 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->meter.color_out.offset];
3455 	uint64_t dst64 = *dst64_ptr;
3456 	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->meter.color_out.n_bits);
3457 
3458 	uint64_t src = (uint64_t)color_out;
3459 
3460 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
3461 }
3462 
3463 static inline void
3464 __instr_metprefetch_h_exec(struct rte_swx_pipeline *p,
3465 			   struct thread *t,
3466 			   const struct instruction *ip)
3467 {
3468 	struct meter *m;
3469 
3470 	TRACE("[Thread %2u] metprefetch (h)\n", p->thread_id);
3471 
3472 	m = instr_meter_idx_nbo(p, t, ip);
3473 	rte_prefetch0(m);
3474 }
3475 
3476 static inline void
3477 __instr_metprefetch_m_exec(struct rte_swx_pipeline *p,
3478 			   struct thread *t,
3479 			   const struct instruction *ip)
3480 {
3481 	struct meter *m;
3482 
3483 	TRACE("[Thread %2u] metprefetch (m)\n", p->thread_id);
3484 
3485 	m = instr_meter_idx_hbo(p, t, ip);
3486 	rte_prefetch0(m);
3487 }
3488 
3489 static inline void
3490 __instr_metprefetch_i_exec(struct rte_swx_pipeline *p,
3491 			   struct thread *t __rte_unused,
3492 			   const struct instruction *ip)
3493 {
3494 	struct meter *m;
3495 
3496 	TRACE("[Thread %2u] metprefetch (i)\n", p->thread_id);
3497 
3498 	m = instr_meter_idx_imm(p, ip);
3499 	rte_prefetch0(m);
3500 }
3501 
3502 static inline void
3503 __instr_meter_hhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3504 {
3505 	struct meter *m;
3506 	uint64_t time, n_pkts, n_bytes;
3507 	uint32_t length;
3508 	enum rte_color color_in, color_out;
3509 
3510 	TRACE("[Thread %2u] meter (hhm)\n", p->thread_id);
3511 
3512 	m = instr_meter_idx_nbo(p, t, ip);
3513 	rte_prefetch0(m->n_pkts);
3514 	time = rte_get_tsc_cycles();
3515 	length = instr_meter_length_nbo(t, ip);
3516 	color_in = instr_meter_color_in_hbo(t, ip);
3517 
3518 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3519 		&m->profile->profile,
3520 		time,
3521 		length,
3522 		color_in);
3523 
3524 	color_out &= m->color_mask;
3525 
3526 	n_pkts = m->n_pkts[color_out];
3527 	n_bytes = m->n_bytes[color_out];
3528 
3529 	instr_meter_color_out_hbo_set(t, ip, color_out);
3530 
3531 	m->n_pkts[color_out] = n_pkts + 1;
3532 	m->n_bytes[color_out] = n_bytes + length;
3533 }
3534 
3535 static inline void
3536 __instr_meter_hhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3537 {
3538 	struct meter *m;
3539 	uint64_t time, n_pkts, n_bytes;
3540 	uint32_t length;
3541 	enum rte_color color_in, color_out;
3542 
3543 	TRACE("[Thread %2u] meter (hhi)\n", p->thread_id);
3544 
3545 	m = instr_meter_idx_nbo(p, t, ip);
3546 	rte_prefetch0(m->n_pkts);
3547 	time = rte_get_tsc_cycles();
3548 	length = instr_meter_length_nbo(t, ip);
3549 	color_in = (enum rte_color)ip->meter.color_in_val;
3550 
3551 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3552 		&m->profile->profile,
3553 		time,
3554 		length,
3555 		color_in);
3556 
3557 	color_out &= m->color_mask;
3558 
3559 	n_pkts = m->n_pkts[color_out];
3560 	n_bytes = m->n_bytes[color_out];
3561 
3562 	instr_meter_color_out_hbo_set(t, ip, color_out);
3563 
3564 	m->n_pkts[color_out] = n_pkts + 1;
3565 	m->n_bytes[color_out] = n_bytes + length;
3566 }
3567 
3568 static inline void
3569 __instr_meter_hmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3570 {
3571 	struct meter *m;
3572 	uint64_t time, n_pkts, n_bytes;
3573 	uint32_t length;
3574 	enum rte_color color_in, color_out;
3575 
3576 	TRACE("[Thread %2u] meter (hmm)\n", p->thread_id);
3577 
3578 	m = instr_meter_idx_nbo(p, t, ip);
3579 	rte_prefetch0(m->n_pkts);
3580 	time = rte_get_tsc_cycles();
3581 	length = instr_meter_length_hbo(t, ip);
3582 	color_in = instr_meter_color_in_hbo(t, ip);
3583 
3584 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3585 		&m->profile->profile,
3586 		time,
3587 		length,
3588 		color_in);
3589 
3590 	color_out &= m->color_mask;
3591 
3592 	n_pkts = m->n_pkts[color_out];
3593 	n_bytes = m->n_bytes[color_out];
3594 
3595 	instr_meter_color_out_hbo_set(t, ip, color_out);
3596 
3597 	m->n_pkts[color_out] = n_pkts + 1;
3598 	m->n_bytes[color_out] = n_bytes + length;
3599 }
3600 
3601 static inline void
3602 __instr_meter_hmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3603 {
3604 	struct meter *m;
3605 	uint64_t time, n_pkts, n_bytes;
3606 	uint32_t length;
3607 	enum rte_color color_in, color_out;
3608 
3609 	TRACE("[Thread %2u] meter (hmi)\n", p->thread_id);
3610 
3611 	m = instr_meter_idx_nbo(p, t, ip);
3612 	rte_prefetch0(m->n_pkts);
3613 	time = rte_get_tsc_cycles();
3614 	length = instr_meter_length_hbo(t, ip);
3615 	color_in = (enum rte_color)ip->meter.color_in_val;
3616 
3617 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3618 		&m->profile->profile,
3619 		time,
3620 		length,
3621 		color_in);
3622 
3623 	color_out &= m->color_mask;
3624 
3625 	n_pkts = m->n_pkts[color_out];
3626 	n_bytes = m->n_bytes[color_out];
3627 
3628 	instr_meter_color_out_hbo_set(t, ip, color_out);
3629 
3630 	m->n_pkts[color_out] = n_pkts + 1;
3631 	m->n_bytes[color_out] = n_bytes + length;
3632 }
3633 
3634 static inline void
3635 __instr_meter_mhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3636 {
3637 	struct meter *m;
3638 	uint64_t time, n_pkts, n_bytes;
3639 	uint32_t length;
3640 	enum rte_color color_in, color_out;
3641 
3642 	TRACE("[Thread %2u] meter (mhm)\n", p->thread_id);
3643 
3644 	m = instr_meter_idx_hbo(p, t, ip);
3645 	rte_prefetch0(m->n_pkts);
3646 	time = rte_get_tsc_cycles();
3647 	length = instr_meter_length_nbo(t, ip);
3648 	color_in = instr_meter_color_in_hbo(t, ip);
3649 
3650 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3651 		&m->profile->profile,
3652 		time,
3653 		length,
3654 		color_in);
3655 
3656 	color_out &= m->color_mask;
3657 
3658 	n_pkts = m->n_pkts[color_out];
3659 	n_bytes = m->n_bytes[color_out];
3660 
3661 	instr_meter_color_out_hbo_set(t, ip, color_out);
3662 
3663 	m->n_pkts[color_out] = n_pkts + 1;
3664 	m->n_bytes[color_out] = n_bytes + length;
3665 }
3666 
3667 static inline void
3668 __instr_meter_mhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3669 {
3670 	struct meter *m;
3671 	uint64_t time, n_pkts, n_bytes;
3672 	uint32_t length;
3673 	enum rte_color color_in, color_out;
3674 
3675 	TRACE("[Thread %2u] meter (mhi)\n", p->thread_id);
3676 
3677 	m = instr_meter_idx_hbo(p, t, ip);
3678 	rte_prefetch0(m->n_pkts);
3679 	time = rte_get_tsc_cycles();
3680 	length = instr_meter_length_nbo(t, ip);
3681 	color_in = (enum rte_color)ip->meter.color_in_val;
3682 
3683 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3684 		&m->profile->profile,
3685 		time,
3686 		length,
3687 		color_in);
3688 
3689 	color_out &= m->color_mask;
3690 
3691 	n_pkts = m->n_pkts[color_out];
3692 	n_bytes = m->n_bytes[color_out];
3693 
3694 	instr_meter_color_out_hbo_set(t, ip, color_out);
3695 
3696 	m->n_pkts[color_out] = n_pkts + 1;
3697 	m->n_bytes[color_out] = n_bytes + length;
3698 }
3699 
3700 static inline void
3701 __instr_meter_mmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3702 {
3703 	struct meter *m;
3704 	uint64_t time, n_pkts, n_bytes;
3705 	uint32_t length;
3706 	enum rte_color color_in, color_out;
3707 
3708 	TRACE("[Thread %2u] meter (mmm)\n", p->thread_id);
3709 
3710 	m = instr_meter_idx_hbo(p, t, ip);
3711 	rte_prefetch0(m->n_pkts);
3712 	time = rte_get_tsc_cycles();
3713 	length = instr_meter_length_hbo(t, ip);
3714 	color_in = instr_meter_color_in_hbo(t, ip);
3715 
3716 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3717 		&m->profile->profile,
3718 		time,
3719 		length,
3720 		color_in);
3721 
3722 	color_out &= m->color_mask;
3723 
3724 	n_pkts = m->n_pkts[color_out];
3725 	n_bytes = m->n_bytes[color_out];
3726 
3727 	instr_meter_color_out_hbo_set(t, ip, color_out);
3728 
3729 	m->n_pkts[color_out] = n_pkts + 1;
3730 	m->n_bytes[color_out] = n_bytes + length;
3731 }
3732 
3733 static inline void
3734 __instr_meter_mmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3735 {
3736 	struct meter *m;
3737 	uint64_t time, n_pkts, n_bytes;
3738 	uint32_t length;
3739 	enum rte_color color_in, color_out;
3740 
3741 	TRACE("[Thread %2u] meter (mmi)\n", p->thread_id);
3742 
3743 	m = instr_meter_idx_hbo(p, t, ip);
3744 	rte_prefetch0(m->n_pkts);
3745 	time = rte_get_tsc_cycles();
3746 	length = instr_meter_length_hbo(t, ip);
3747 	color_in = (enum rte_color)ip->meter.color_in_val;
3748 
3749 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3750 		&m->profile->profile,
3751 		time,
3752 		length,
3753 		color_in);
3754 
3755 	color_out &= m->color_mask;
3756 
3757 	n_pkts = m->n_pkts[color_out];
3758 	n_bytes = m->n_bytes[color_out];
3759 
3760 	instr_meter_color_out_hbo_set(t, ip, color_out);
3761 
3762 	m->n_pkts[color_out] = n_pkts + 1;
3763 	m->n_bytes[color_out] = n_bytes + length;
3764 }
3765 
3766 static inline void
3767 __instr_meter_ihm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3768 {
3769 	struct meter *m;
3770 	uint64_t time, n_pkts, n_bytes;
3771 	uint32_t length;
3772 	enum rte_color color_in, color_out;
3773 
3774 	TRACE("[Thread %2u] meter (ihm)\n", p->thread_id);
3775 
3776 	m = instr_meter_idx_imm(p, ip);
3777 	rte_prefetch0(m->n_pkts);
3778 	time = rte_get_tsc_cycles();
3779 	length = instr_meter_length_nbo(t, ip);
3780 	color_in = instr_meter_color_in_hbo(t, ip);
3781 
3782 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3783 		&m->profile->profile,
3784 		time,
3785 		length,
3786 		color_in);
3787 
3788 	color_out &= m->color_mask;
3789 
3790 	n_pkts = m->n_pkts[color_out];
3791 	n_bytes = m->n_bytes[color_out];
3792 
3793 	instr_meter_color_out_hbo_set(t, ip, color_out);
3794 
3795 	m->n_pkts[color_out] = n_pkts + 1;
3796 	m->n_bytes[color_out] = n_bytes + length;
3797 }
3798 
3799 static inline void
3800 __instr_meter_ihi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3801 {
3802 	struct meter *m;
3803 	uint64_t time, n_pkts, n_bytes;
3804 	uint32_t length;
3805 	enum rte_color color_in, color_out;
3806 
3807 	TRACE("[Thread %2u] meter (ihi)\n", p->thread_id);
3808 
3809 	m = instr_meter_idx_imm(p, ip);
3810 	rte_prefetch0(m->n_pkts);
3811 	time = rte_get_tsc_cycles();
3812 	length = instr_meter_length_nbo(t, ip);
3813 	color_in = (enum rte_color)ip->meter.color_in_val;
3814 
3815 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3816 		&m->profile->profile,
3817 		time,
3818 		length,
3819 		color_in);
3820 
3821 	color_out &= m->color_mask;
3822 
3823 	n_pkts = m->n_pkts[color_out];
3824 	n_bytes = m->n_bytes[color_out];
3825 
3826 	instr_meter_color_out_hbo_set(t, ip, color_out);
3827 
3828 	m->n_pkts[color_out] = n_pkts + 1;
3829 	m->n_bytes[color_out] = n_bytes + length;
3830 }
3831 
3832 static inline void
3833 __instr_meter_imm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3834 {
3835 	struct meter *m;
3836 	uint64_t time, n_pkts, n_bytes;
3837 	uint32_t length;
3838 	enum rte_color color_in, color_out;
3839 
3840 	TRACE("[Thread %2u] meter (imm)\n", p->thread_id);
3841 
3842 	m = instr_meter_idx_imm(p, ip);
3843 	rte_prefetch0(m->n_pkts);
3844 	time = rte_get_tsc_cycles();
3845 	length = instr_meter_length_hbo(t, ip);
3846 	color_in = instr_meter_color_in_hbo(t, ip);
3847 
3848 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3849 		&m->profile->profile,
3850 		time,
3851 		length,
3852 		color_in);
3853 
3854 	color_out &= m->color_mask;
3855 
3856 	n_pkts = m->n_pkts[color_out];
3857 	n_bytes = m->n_bytes[color_out];
3858 
3859 	instr_meter_color_out_hbo_set(t, ip, color_out);
3860 
3861 	m->n_pkts[color_out] = n_pkts + 1;
3862 	m->n_bytes[color_out] = n_bytes + length;
3863 }
3864 
3865 static inline void
3866 __instr_meter_imi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3867 {
3868 	struct meter *m;
3869 	uint64_t time, n_pkts, n_bytes;
3870 	uint32_t length;
3871 	enum rte_color color_in, color_out;
3872 
3873 	TRACE("[Thread %2u] meter (imi)\n", p->thread_id);
3874 
3875 	m = instr_meter_idx_imm(p, ip);
3876 	rte_prefetch0(m->n_pkts);
3877 	time = rte_get_tsc_cycles();
3878 	length = instr_meter_length_hbo(t, ip);
3879 	color_in = (enum rte_color)ip->meter.color_in_val;
3880 
3881 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
3882 		&m->profile->profile,
3883 		time,
3884 		length,
3885 		color_in);
3886 
3887 	color_out &= m->color_mask;
3888 
3889 	n_pkts = m->n_pkts[color_out];
3890 	n_bytes = m->n_bytes[color_out];
3891 
3892 	instr_meter_color_out_hbo_set(t, ip, color_out);
3893 
3894 	m->n_pkts[color_out] = n_pkts + 1;
3895 	m->n_bytes[color_out] = n_bytes + length;
3896 }
3897 
3898 #endif
3899