xref: /dpdk/lib/pipeline/rte_swx_pipeline_internal.h (revision 5d52418fa4b9a7f28eaedc1d88ec5cf330381c0e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2021 Intel Corporation
3  */
4 #ifndef __INCLUDE_RTE_SWX_PIPELINE_INTERNAL_H__
5 #define __INCLUDE_RTE_SWX_PIPELINE_INTERNAL_H__
6 
7 #include <inttypes.h>
8 #include <string.h>
9 #include <sys/queue.h>
10 
11 #include <rte_bitops.h>
12 #include <rte_byteorder.h>
13 #include <rte_common.h>
14 #include <rte_cycles.h>
15 #include <rte_prefetch.h>
16 #include <rte_meter.h>
17 
18 #include <rte_swx_table_selector.h>
19 #include <rte_swx_table_learner.h>
20 #include <rte_swx_pipeline.h>
21 #include <rte_swx_ctl.h>
22 
23 #ifndef TRACE_LEVEL
24 #define TRACE_LEVEL 0
25 #endif
26 
27 #if TRACE_LEVEL
28 #define TRACE(...) printf(__VA_ARGS__)
29 #else
30 #define TRACE(...)
31 #endif
32 
33 /*
34  * Environment.
35  */
36 #define ntoh64(x) rte_be_to_cpu_64(x)
37 #define hton64(x) rte_cpu_to_be_64(x)
38 
39 /*
40  * Struct.
41  */
42 struct field {
43 	char name[RTE_SWX_NAME_SIZE];
44 	uint32_t n_bits;
45 	uint32_t offset;
46 	int var_size;
47 };
48 
49 struct struct_type {
50 	TAILQ_ENTRY(struct_type) node;
51 	char name[RTE_SWX_NAME_SIZE];
52 	struct field *fields;
53 	uint32_t n_fields;
54 	uint32_t n_bits;
55 	uint32_t n_bits_min;
56 	int var_size;
57 };
58 
59 TAILQ_HEAD(struct_type_tailq, struct_type);
60 
61 /*
62  * Input port.
63  */
64 struct port_in_type {
65 	TAILQ_ENTRY(port_in_type) node;
66 	char name[RTE_SWX_NAME_SIZE];
67 	struct rte_swx_port_in_ops ops;
68 };
69 
70 TAILQ_HEAD(port_in_type_tailq, port_in_type);
71 
72 struct port_in {
73 	TAILQ_ENTRY(port_in) node;
74 	struct port_in_type *type;
75 	void *obj;
76 	uint32_t id;
77 };
78 
79 TAILQ_HEAD(port_in_tailq, port_in);
80 
81 struct port_in_runtime {
82 	rte_swx_port_in_pkt_rx_t pkt_rx;
83 	void *obj;
84 };
85 
86 /*
87  * Output port.
88  */
89 struct port_out_type {
90 	TAILQ_ENTRY(port_out_type) node;
91 	char name[RTE_SWX_NAME_SIZE];
92 	struct rte_swx_port_out_ops ops;
93 };
94 
95 TAILQ_HEAD(port_out_type_tailq, port_out_type);
96 
97 struct port_out {
98 	TAILQ_ENTRY(port_out) node;
99 	struct port_out_type *type;
100 	void *obj;
101 	uint32_t id;
102 };
103 
104 TAILQ_HEAD(port_out_tailq, port_out);
105 
106 struct port_out_runtime {
107 	rte_swx_port_out_pkt_tx_t pkt_tx;
108 	rte_swx_port_out_pkt_fast_clone_tx_t pkt_fast_clone_tx;
109 	rte_swx_port_out_pkt_clone_tx_t pkt_clone_tx;
110 	rte_swx_port_out_flush_t flush;
111 	void *obj;
112 };
113 
114 /*
115  * Packet mirroring.
116  */
117 struct mirroring_session {
118 	uint32_t port_id;
119 	int fast_clone;
120 	uint32_t truncation_length;
121 };
122 
123 /*
124  * Extern object.
125  */
126 struct extern_type_member_func {
127 	TAILQ_ENTRY(extern_type_member_func) node;
128 	char name[RTE_SWX_NAME_SIZE];
129 	rte_swx_extern_type_member_func_t func;
130 	uint32_t id;
131 };
132 
133 TAILQ_HEAD(extern_type_member_func_tailq, extern_type_member_func);
134 
135 struct extern_type {
136 	TAILQ_ENTRY(extern_type) node;
137 	char name[RTE_SWX_NAME_SIZE];
138 	struct struct_type *mailbox_struct_type;
139 	rte_swx_extern_type_constructor_t constructor;
140 	rte_swx_extern_type_destructor_t destructor;
141 	struct extern_type_member_func_tailq funcs;
142 	uint32_t n_funcs;
143 };
144 
145 TAILQ_HEAD(extern_type_tailq, extern_type);
146 
147 struct extern_obj {
148 	TAILQ_ENTRY(extern_obj) node;
149 	char name[RTE_SWX_NAME_SIZE];
150 	struct extern_type *type;
151 	void *obj;
152 	uint32_t struct_id;
153 	uint32_t id;
154 };
155 
156 TAILQ_HEAD(extern_obj_tailq, extern_obj);
157 
158 #ifndef RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX
159 #define RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX 8
160 #endif
161 
162 struct extern_obj_runtime {
163 	void *obj;
164 	uint8_t *mailbox;
165 	rte_swx_extern_type_member_func_t funcs[RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX];
166 };
167 
168 /*
169  * Extern function.
170  */
171 struct extern_func {
172 	TAILQ_ENTRY(extern_func) node;
173 	char name[RTE_SWX_NAME_SIZE];
174 	struct struct_type *mailbox_struct_type;
175 	rte_swx_extern_func_t func;
176 	uint32_t struct_id;
177 	uint32_t id;
178 };
179 
180 TAILQ_HEAD(extern_func_tailq, extern_func);
181 
182 struct extern_func_runtime {
183 	uint8_t *mailbox;
184 	rte_swx_extern_func_t func;
185 };
186 
187 /*
188  * Hash function.
189  */
190 struct hash_func {
191 	TAILQ_ENTRY(hash_func) node;
192 	char name[RTE_SWX_NAME_SIZE];
193 	rte_swx_hash_func_t func;
194 	uint32_t id;
195 };
196 
197 TAILQ_HEAD(hash_func_tailq, hash_func);
198 
199 struct hash_func_runtime {
200 	rte_swx_hash_func_t func;
201 };
202 
203 /*
204  * RSS.
205  */
206 struct rss {
207 	TAILQ_ENTRY(rss) node;
208 	char name[RTE_SWX_NAME_SIZE];
209 	uint32_t id;
210 };
211 
212 TAILQ_HEAD(rss_tailq, rss);
213 
214 struct rss_runtime {
215 	uint32_t key_size; /* key size in bytes. */
216 	uint8_t key[0]; /* key. */
217 };
218 
219 /*
220  * Header.
221  */
222 struct header {
223 	TAILQ_ENTRY(header) node;
224 	char name[RTE_SWX_NAME_SIZE];
225 	struct struct_type *st;
226 	uint32_t struct_id;
227 	uint32_t id;
228 };
229 
230 TAILQ_HEAD(header_tailq, header);
231 
232 struct header_runtime {
233 	uint8_t *ptr0;
234 	uint32_t n_bytes;
235 };
236 
237 struct header_out_runtime {
238 	uint8_t *ptr0;
239 	uint8_t *ptr;
240 	uint32_t n_bytes;
241 };
242 
243 /*
244  * Instruction.
245  */
246 
247 /* Packet headers are always in Network Byte Order (NBO), i.e. big endian.
248  * Packet meta-data fields are always assumed to be in Host Byte Order (HBO).
249  * Table entry fields can be in either NBO or HBO; they are assumed to be in HBO
250  * when transferred to packet meta-data and in NBO when transferred to packet
251  * headers.
252  */
253 
254 /* Notation conventions:
255  *    -Header field: H = h.header.field (dst/src)
256  *    -Meta-data field: M = m.field (dst/src)
257  *    -Extern object mailbox field: E = e.field (dst/src)
258  *    -Extern function mailbox field: F = f.field (dst/src)
259  *    -Table action data field: T = t.field (src only)
260  *    -Immediate value: I = 32-bit unsigned value (src only)
261  */
262 
263 enum instruction_type {
264 	/* rx m.port_in */
265 	INSTR_RX,
266 
267 	/* tx port_out
268 	 * port_out = MI
269 	 */
270 	INSTR_TX,   /* port_out = M */
271 	INSTR_TX_I, /* port_out = I */
272 	INSTR_DROP,
273 
274 	/*
275 	 * mirror slot_id session_id
276 	 * slot_id = MEFT
277 	 * session_id = MEFT
278 	 */
279 	INSTR_MIRROR,
280 
281 	/* recirculate
282 	 */
283 	INSTR_RECIRCULATE,
284 
285 	/* recircid m.recirc_pass_id
286 	 * Read the internal recirculation pass ID into the specified meta-data field.
287 	 */
288 	INSTR_RECIRCID,
289 
290 	/* extract h.header */
291 	INSTR_HDR_EXTRACT,
292 	INSTR_HDR_EXTRACT2,
293 	INSTR_HDR_EXTRACT3,
294 	INSTR_HDR_EXTRACT4,
295 	INSTR_HDR_EXTRACT5,
296 	INSTR_HDR_EXTRACT6,
297 	INSTR_HDR_EXTRACT7,
298 	INSTR_HDR_EXTRACT8,
299 
300 	/* extract h.header m.last_field_size */
301 	INSTR_HDR_EXTRACT_M,
302 
303 	/* lookahead h.header */
304 	INSTR_HDR_LOOKAHEAD,
305 
306 	/* emit h.header */
307 	INSTR_HDR_EMIT,
308 	INSTR_HDR_EMIT_TX,
309 	INSTR_HDR_EMIT2_TX,
310 	INSTR_HDR_EMIT3_TX,
311 	INSTR_HDR_EMIT4_TX,
312 	INSTR_HDR_EMIT5_TX,
313 	INSTR_HDR_EMIT6_TX,
314 	INSTR_HDR_EMIT7_TX,
315 	INSTR_HDR_EMIT8_TX,
316 
317 	/* validate h.header */
318 	INSTR_HDR_VALIDATE,
319 
320 	/* invalidate h.header */
321 	INSTR_HDR_INVALIDATE,
322 
323 	/* mov dst src
324 	 * dst = src
325 	 * dst = HMEF, src = HMEFTI
326 	 */
327 	INSTR_MOV,     /* dst = MEF, src = MEFT; size(dst) <= 64 bits, size(src) <= 64 bits. */
328 	INSTR_MOV_MH,  /* dst = MEF, src = H; size(dst) <= 64 bits, size(src) <= 64 bits. */
329 	INSTR_MOV_HM,  /* dst = H, src = MEFT; size(dst) <= 64 bits, size(src) <= 64 bits. */
330 	INSTR_MOV_HH,  /* dst = H, src = H; size(dst) <= 64 bits, size(src) <= 64 bits. */
331 	INSTR_MOV_DMA, /* dst and src in NBO format. */
332 	INSTR_MOV_128, /* dst and src in NBO format, size(dst) = size(src) = 128 bits. */
333 	INSTR_MOV_128_32, /* dst and src in NBO format, size(dst) = 128 bits, size(src) = 32 b. */
334 	INSTR_MOV_I,   /* dst = HMEF, src = I; size(dst) <= 64 bits. */
335 
336 	/* dma h.header t.field
337 	 * memcpy(h.header, t.field, sizeof(h.header))
338 	 */
339 	INSTR_DMA_HT,
340 	INSTR_DMA_HT2,
341 	INSTR_DMA_HT3,
342 	INSTR_DMA_HT4,
343 	INSTR_DMA_HT5,
344 	INSTR_DMA_HT6,
345 	INSTR_DMA_HT7,
346 	INSTR_DMA_HT8,
347 
348 	/* add dst src
349 	 * dst += src
350 	 * dst = HMEF, src = HMEFTI
351 	 */
352 	INSTR_ALU_ADD,    /* dst = MEF, src = MEF */
353 	INSTR_ALU_ADD_MH, /* dst = MEF, src = H */
354 	INSTR_ALU_ADD_HM, /* dst = H, src = MEF */
355 	INSTR_ALU_ADD_HH, /* dst = H, src = H */
356 	INSTR_ALU_ADD_MI, /* dst = MEF, src = I */
357 	INSTR_ALU_ADD_HI, /* dst = H, src = I */
358 
359 	/* sub dst src
360 	 * dst -= src
361 	 * dst = HMEF, src = HMEFTI
362 	 */
363 	INSTR_ALU_SUB,    /* dst = MEF, src = MEF */
364 	INSTR_ALU_SUB_MH, /* dst = MEF, src = H */
365 	INSTR_ALU_SUB_HM, /* dst = H, src = MEF */
366 	INSTR_ALU_SUB_HH, /* dst = H, src = H */
367 	INSTR_ALU_SUB_MI, /* dst = MEF, src = I */
368 	INSTR_ALU_SUB_HI, /* dst = H, src = I */
369 
370 	/* ckadd dst src
371 	 * dst = dst '+ src[0:1] '+ src[2:3] '+ ...
372 	 * dst = H, src = {H, h.header}, '+ = 1's complement addition operator
373 	 */
374 	INSTR_ALU_CKADD_FIELD,    /* src = H */
375 	INSTR_ALU_CKADD_STRUCT20, /* src = h.header, with sizeof(header) = 20 bytes. */
376 	INSTR_ALU_CKADD_STRUCT,   /* src = h.header, with sizeof(header) any 4-byte multiple. */
377 
378 	/* cksub dst src
379 	 * dst = dst '- src
380 	 * dst = H, src = H, '- = 1's complement subtraction operator
381 	 */
382 	INSTR_ALU_CKSUB_FIELD,
383 
384 	/* and dst src
385 	 * dst &= src
386 	 * dst = HMEF, src = HMEFTI
387 	 */
388 	INSTR_ALU_AND,    /* dst = MEF, src = MEFT */
389 	INSTR_ALU_AND_MH, /* dst = MEF, src = H */
390 	INSTR_ALU_AND_HM, /* dst = H, src = MEFT */
391 	INSTR_ALU_AND_HH, /* dst = H, src = H */
392 	INSTR_ALU_AND_I,  /* dst = HMEF, src = I */
393 
394 	/* or dst src
395 	 * dst |= src
396 	 * dst = HMEF, src = HMEFTI
397 	 */
398 	INSTR_ALU_OR,    /* dst = MEF, src = MEFT */
399 	INSTR_ALU_OR_MH, /* dst = MEF, src = H */
400 	INSTR_ALU_OR_HM, /* dst = H, src = MEFT */
401 	INSTR_ALU_OR_HH, /* dst = H, src = H */
402 	INSTR_ALU_OR_I,  /* dst = HMEF, src = I */
403 
404 	/* xor dst src
405 	 * dst ^= src
406 	 * dst = HMEF, src = HMEFTI
407 	 */
408 	INSTR_ALU_XOR,    /* dst = MEF, src = MEFT */
409 	INSTR_ALU_XOR_MH, /* dst = MEF, src = H */
410 	INSTR_ALU_XOR_HM, /* dst = H, src = MEFT */
411 	INSTR_ALU_XOR_HH, /* dst = H, src = H */
412 	INSTR_ALU_XOR_I,  /* dst = HMEF, src = I */
413 
414 	/* shl dst src
415 	 * dst <<= src
416 	 * dst = HMEF, src = HMEFTI
417 	 */
418 	INSTR_ALU_SHL,    /* dst = MEF, src = MEF */
419 	INSTR_ALU_SHL_MH, /* dst = MEF, src = H */
420 	INSTR_ALU_SHL_HM, /* dst = H, src = MEF */
421 	INSTR_ALU_SHL_HH, /* dst = H, src = H */
422 	INSTR_ALU_SHL_MI, /* dst = MEF, src = I */
423 	INSTR_ALU_SHL_HI, /* dst = H, src = I */
424 
425 	/* shr dst src
426 	 * dst >>= src
427 	 * dst = HMEF, src = HMEFTI
428 	 */
429 	INSTR_ALU_SHR,    /* dst = MEF, src = MEF */
430 	INSTR_ALU_SHR_MH, /* dst = MEF, src = H */
431 	INSTR_ALU_SHR_HM, /* dst = H, src = MEF */
432 	INSTR_ALU_SHR_HH, /* dst = H, src = H */
433 	INSTR_ALU_SHR_MI, /* dst = MEF, src = I */
434 	INSTR_ALU_SHR_HI, /* dst = H, src = I */
435 
436 	/* regprefetch REGARRAY index
437 	 * prefetch REGARRAY[index]
438 	 * index = HMEFTI
439 	 */
440 	INSTR_REGPREFETCH_RH, /* index = H */
441 	INSTR_REGPREFETCH_RM, /* index = MEFT */
442 	INSTR_REGPREFETCH_RI, /* index = I */
443 
444 	/* regrd dst REGARRAY index
445 	 * dst = REGARRAY[index]
446 	 * dst = HMEF, index = HMEFTI
447 	 */
448 	INSTR_REGRD_HRH, /* dst = H, index = H */
449 	INSTR_REGRD_HRM, /* dst = H, index = MEFT */
450 	INSTR_REGRD_HRI, /* dst = H, index = I */
451 	INSTR_REGRD_MRH, /* dst = MEF, index = H */
452 	INSTR_REGRD_MRM, /* dst = MEF, index = MEFT */
453 	INSTR_REGRD_MRI, /* dst = MEF, index = I */
454 
455 	/* regwr REGARRAY index src
456 	 * REGARRAY[index] = src
457 	 * index = HMEFTI, src = HMEFTI
458 	 */
459 	INSTR_REGWR_RHH, /* index = H, src = H */
460 	INSTR_REGWR_RHM, /* index = H, src = MEFT */
461 	INSTR_REGWR_RHI, /* index = H, src = I */
462 	INSTR_REGWR_RMH, /* index = MEFT, src = H */
463 	INSTR_REGWR_RMM, /* index = MEFT, src = MEFT */
464 	INSTR_REGWR_RMI, /* index = MEFT, src = I */
465 	INSTR_REGWR_RIH, /* index = I, src = H */
466 	INSTR_REGWR_RIM, /* index = I, src = MEFT */
467 	INSTR_REGWR_RII, /* index = I, src = I */
468 
469 	/* regadd REGARRAY index src
470 	 * REGARRAY[index] += src
471 	 * index = HMEFTI, src = HMEFTI
472 	 */
473 	INSTR_REGADD_RHH, /* index = H, src = H */
474 	INSTR_REGADD_RHM, /* index = H, src = MEFT */
475 	INSTR_REGADD_RHI, /* index = H, src = I */
476 	INSTR_REGADD_RMH, /* index = MEFT, src = H */
477 	INSTR_REGADD_RMM, /* index = MEFT, src = MEFT */
478 	INSTR_REGADD_RMI, /* index = MEFT, src = I */
479 	INSTR_REGADD_RIH, /* index = I, src = H */
480 	INSTR_REGADD_RIM, /* index = I, src = MEFT */
481 	INSTR_REGADD_RII, /* index = I, src = I */
482 
483 	/* metprefetch METARRAY index
484 	 * prefetch METARRAY[index]
485 	 * index = HMEFTI
486 	 */
487 	INSTR_METPREFETCH_H, /* index = H */
488 	INSTR_METPREFETCH_M, /* index = MEFT */
489 	INSTR_METPREFETCH_I, /* index = I */
490 
491 	/* meter METARRAY index length color_in color_out
492 	 * color_out = meter(METARRAY[index], length, color_in)
493 	 * index = HMEFTI, length = HMEFT, color_in = MEFTI, color_out = MEF
494 	 */
495 	INSTR_METER_HHM, /* index = H, length = H, color_in = MEFT */
496 	INSTR_METER_HHI, /* index = H, length = H, color_in = I */
497 	INSTR_METER_HMM, /* index = H, length = MEFT, color_in = MEFT */
498 	INSTR_METER_HMI, /* index = H, length = MEFT, color_in = I */
499 	INSTR_METER_MHM, /* index = MEFT, length = H, color_in = MEFT */
500 	INSTR_METER_MHI, /* index = MEFT, length = H, color_in = I */
501 	INSTR_METER_MMM, /* index = MEFT, length = MEFT, color_in = MEFT */
502 	INSTR_METER_MMI, /* index = MEFT, length = MEFT, color_in = I */
503 	INSTR_METER_IHM, /* index = I, length = H, color_in = MEFT */
504 	INSTR_METER_IHI, /* index = I, length = H, color_in = I */
505 	INSTR_METER_IMM, /* index = I, length = MEFT, color_in = MEFT */
506 	INSTR_METER_IMI, /* index = I, length = MEFT, color_in = I */
507 
508 	/* table TABLE */
509 	INSTR_TABLE,
510 	INSTR_TABLE_AF,
511 	INSTR_SELECTOR,
512 	INSTR_LEARNER,
513 	INSTR_LEARNER_AF,
514 
515 	/* learn ACTION_NAME [ m.action_first_arg ] m.timeout_id */
516 	INSTR_LEARNER_LEARN,
517 
518 	/* rearm [ m.timeout_id ] */
519 	INSTR_LEARNER_REARM,
520 	INSTR_LEARNER_REARM_NEW,
521 
522 	/* forget */
523 	INSTR_LEARNER_FORGET,
524 
525 	/* entryid m.table_entry_id
526 	 * Read the internal table entry ID into the specified meta-data field.
527 	 */
528 	INSTR_ENTRYID,
529 
530 	/* extern e.obj.func */
531 	INSTR_EXTERN_OBJ,
532 
533 	/* extern f.func */
534 	INSTR_EXTERN_FUNC,
535 
536 	/* hash HASH_FUNC_NAME dst src_first src_last
537 	 * Compute hash value over range of struct fields.
538 	 * dst = M
539 	 * src_first = HMEFT
540 	 * src_last = HMEFT
541 	 * src_first and src_last must be fields within the same struct
542 	 */
543 	INSTR_HASH_FUNC,
544 
545 	/* rss RSS_OBJ_NAME dst src_first src_last
546 	 * Compute the RSS hash value over range of struct fields.
547 	 * dst = M
548 	 * src_first = HMEFT
549 	 * src_last = HMEFT
550 	 * src_first and src_last must be fields within the same struct
551 	 */
552 	INSTR_RSS,
553 
554 	/* jmp LABEL
555 	 * Unconditional jump
556 	 */
557 	INSTR_JMP,
558 
559 	/* jmpv LABEL h.header
560 	 * Jump if header is valid
561 	 */
562 	INSTR_JMP_VALID,
563 
564 	/* jmpnv LABEL h.header
565 	 * Jump if header is invalid
566 	 */
567 	INSTR_JMP_INVALID,
568 
569 	/* jmph LABEL
570 	 * Jump if table lookup hit
571 	 */
572 	INSTR_JMP_HIT,
573 
574 	/* jmpnh LABEL
575 	 * Jump if table lookup miss
576 	 */
577 	INSTR_JMP_MISS,
578 
579 	/* jmpa LABEL ACTION
580 	 * Jump if action run
581 	 */
582 	INSTR_JMP_ACTION_HIT,
583 
584 	/* jmpna LABEL ACTION
585 	 * Jump if action not run
586 	 */
587 	INSTR_JMP_ACTION_MISS,
588 
589 	/* jmpeq LABEL a b
590 	 * Jump if a is equal to b
591 	 * a = HMEFT, b = HMEFTI
592 	 */
593 	INSTR_JMP_EQ,    /* a = MEFT, b = MEFT */
594 	INSTR_JMP_EQ_MH, /* a = MEFT, b = H */
595 	INSTR_JMP_EQ_HM, /* a = H, b = MEFT */
596 	INSTR_JMP_EQ_HH, /* a = H, b = H */
597 	INSTR_JMP_EQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
598 
599 	/* jmpneq LABEL a b
600 	 * Jump if a is not equal to b
601 	 * a = HMEFT, b = HMEFTI
602 	 */
603 	INSTR_JMP_NEQ,    /* a = MEFT, b = MEFT */
604 	INSTR_JMP_NEQ_MH, /* a = MEFT, b = H */
605 	INSTR_JMP_NEQ_HM, /* a = H, b = MEFT */
606 	INSTR_JMP_NEQ_HH, /* a = H, b = H */
607 	INSTR_JMP_NEQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
608 
609 	/* jmplt LABEL a b
610 	 * Jump if a is less than b
611 	 * a = HMEFT, b = HMEFTI
612 	 */
613 	INSTR_JMP_LT,    /* a = MEFT, b = MEFT */
614 	INSTR_JMP_LT_MH, /* a = MEFT, b = H */
615 	INSTR_JMP_LT_HM, /* a = H, b = MEFT */
616 	INSTR_JMP_LT_HH, /* a = H, b = H */
617 	INSTR_JMP_LT_MI, /* a = MEFT, b = I */
618 	INSTR_JMP_LT_HI, /* a = H, b = I */
619 
620 	/* jmpgt LABEL a b
621 	 * Jump if a is greater than b
622 	 * a = HMEFT, b = HMEFTI
623 	 */
624 	INSTR_JMP_GT,    /* a = MEFT, b = MEFT */
625 	INSTR_JMP_GT_MH, /* a = MEFT, b = H */
626 	INSTR_JMP_GT_HM, /* a = H, b = MEFT */
627 	INSTR_JMP_GT_HH, /* a = H, b = H */
628 	INSTR_JMP_GT_MI, /* a = MEFT, b = I */
629 	INSTR_JMP_GT_HI, /* a = H, b = I */
630 
631 	/* return
632 	 * Return from action
633 	 */
634 	INSTR_RETURN,
635 
636 	/* Start of custom instructions. */
637 	INSTR_CUSTOM_0,
638 };
639 
640 struct instr_operand {
641 	uint8_t struct_id;
642 	uint8_t n_bits;
643 	uint8_t offset;
644 	uint8_t pad;
645 };
646 
647 struct instr_io {
648 	struct {
649 		union {
650 			struct {
651 				uint8_t offset;
652 				uint8_t n_bits;
653 				uint8_t pad[2];
654 			};
655 
656 			uint32_t val;
657 		};
658 	} io;
659 
660 	struct {
661 		uint8_t header_id[8];
662 		uint8_t struct_id[8];
663 		uint8_t n_bytes[8];
664 	} hdr;
665 };
666 
667 struct instr_hdr_validity {
668 	uint8_t header_id;
669 	uint8_t struct_id;
670 };
671 
672 struct instr_table {
673 	uint8_t table_id;
674 };
675 
676 struct instr_learn {
677 	uint8_t action_id;
678 	uint8_t mf_first_arg_offset;
679 	uint8_t mf_timeout_id_offset;
680 	uint8_t mf_timeout_id_n_bits;
681 };
682 
683 struct instr_extern_obj {
684 	uint8_t ext_obj_id;
685 	uint8_t func_id;
686 };
687 
688 struct instr_extern_func {
689 	uint8_t ext_func_id;
690 };
691 
692 struct instr_hash_func {
693 	uint8_t hash_func_id;
694 
695 	struct {
696 		uint8_t offset;
697 		uint8_t n_bits;
698 	} dst;
699 
700 	struct {
701 		uint8_t struct_id;
702 		uint16_t offset;
703 		uint16_t n_bytes;
704 	} src;
705 };
706 
707 struct instr_rss {
708 	uint8_t rss_obj_id;
709 
710 	struct {
711 		uint8_t offset;
712 		uint8_t n_bits;
713 	} dst;
714 
715 	struct {
716 		uint8_t struct_id;
717 		uint16_t offset;
718 		uint16_t n_bytes;
719 	} src;
720 };
721 
722 struct instr_dst_src {
723 	struct instr_operand dst;
724 	union {
725 		struct instr_operand src;
726 		uint64_t src_val;
727 	};
728 };
729 
730 struct instr_regarray {
731 	uint8_t regarray_id;
732 	uint8_t pad[3];
733 
734 	union {
735 		struct instr_operand idx;
736 		uint32_t idx_val;
737 	};
738 
739 	union {
740 		struct instr_operand dstsrc;
741 		uint64_t dstsrc_val;
742 	};
743 };
744 
745 struct instr_meter {
746 	uint8_t metarray_id;
747 	uint8_t pad[3];
748 
749 	union {
750 		struct instr_operand idx;
751 		uint32_t idx_val;
752 	};
753 
754 	struct instr_operand length;
755 
756 	union {
757 		struct instr_operand color_in;
758 		uint32_t color_in_val;
759 	};
760 
761 	struct instr_operand color_out;
762 };
763 
764 struct instr_dma {
765 	struct {
766 		uint8_t header_id[8];
767 		uint8_t struct_id[8];
768 	} dst;
769 
770 	struct {
771 		uint8_t offset[8];
772 	} src;
773 
774 	uint16_t n_bytes[8];
775 };
776 
777 struct instr_jmp {
778 	struct instruction *ip;
779 
780 	union {
781 		struct instr_operand a;
782 		uint8_t header_id;
783 		uint8_t action_id;
784 	};
785 
786 	union {
787 		struct instr_operand b;
788 		uint64_t b_val;
789 	};
790 };
791 
792 struct instruction {
793 	enum instruction_type type;
794 	union {
795 		struct instr_io io;
796 		struct instr_dst_src mirror;
797 		struct instr_hdr_validity valid;
798 		struct instr_dst_src mov;
799 		struct instr_regarray regarray;
800 		struct instr_meter meter;
801 		struct instr_dma dma;
802 		struct instr_dst_src alu;
803 		struct instr_table table;
804 		struct instr_learn learn;
805 		struct instr_extern_obj ext_obj;
806 		struct instr_extern_func ext_func;
807 		struct instr_hash_func hash_func;
808 		struct instr_rss rss;
809 		struct instr_jmp jmp;
810 	};
811 };
812 
813 struct instruction_data {
814 	char label[RTE_SWX_NAME_SIZE];
815 	char jmp_label[RTE_SWX_NAME_SIZE];
816 	uint32_t n_users; /* user = jmp instruction to this instruction. */
817 	int invalid;
818 };
819 
820 typedef void (*instr_exec_t)(struct rte_swx_pipeline *);
821 
822 /*
823  * Action.
824  */
825 typedef void
826 (*action_func_t)(struct rte_swx_pipeline *p);
827 
828 struct action {
829 	TAILQ_ENTRY(action) node;
830 	char name[RTE_SWX_NAME_SIZE];
831 	struct struct_type *st;
832 	int *args_endianness; /* 0 = Host Byte Order (HBO); 1 = Network Byte Order (NBO). */
833 	struct instruction *instructions;
834 	struct instruction_data *instruction_data;
835 	uint32_t n_instructions;
836 	uint32_t id;
837 };
838 
839 TAILQ_HEAD(action_tailq, action);
840 
841 /*
842  * Table.
843  */
844 struct table_type {
845 	TAILQ_ENTRY(table_type) node;
846 	char name[RTE_SWX_NAME_SIZE];
847 	enum rte_swx_table_match_type match_type;
848 	struct rte_swx_table_ops ops;
849 };
850 
851 TAILQ_HEAD(table_type_tailq, table_type);
852 
853 struct match_field {
854 	enum rte_swx_table_match_type match_type;
855 	struct field *field;
856 };
857 
858 struct table {
859 	TAILQ_ENTRY(table) node;
860 	char name[RTE_SWX_NAME_SIZE];
861 	char args[RTE_SWX_NAME_SIZE];
862 	struct table_type *type; /* NULL when n_fields == 0. */
863 
864 	/* Match. */
865 	struct match_field *fields;
866 	uint32_t n_fields;
867 	struct header *header; /* Only valid when n_fields > 0. */
868 
869 	/* Action. */
870 	struct action **actions;
871 	struct action *default_action;
872 	uint8_t *default_action_data;
873 	uint32_t n_actions;
874 	int default_action_is_const;
875 	uint32_t action_data_size_max;
876 	int *action_is_for_table_entries;
877 	int *action_is_for_default_entry;
878 
879 	struct hash_func *hf;
880 	uint32_t size;
881 	uint32_t id;
882 };
883 
884 TAILQ_HEAD(table_tailq, table);
885 
886 struct table_runtime {
887 	rte_swx_table_lookup_t func;
888 	void *mailbox;
889 	uint8_t **key;
890 };
891 
892 struct table_statistics {
893 	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
894 	uint64_t *n_pkts_action;
895 };
896 
897 /*
898  * Selector.
899  */
900 struct selector {
901 	TAILQ_ENTRY(selector) node;
902 	char name[RTE_SWX_NAME_SIZE];
903 
904 	struct field *group_id_field;
905 	struct field **selector_fields;
906 	uint32_t n_selector_fields;
907 	struct header *selector_header;
908 	struct field *member_id_field;
909 
910 	uint32_t n_groups_max;
911 	uint32_t n_members_per_group_max;
912 
913 	uint32_t id;
914 };
915 
916 TAILQ_HEAD(selector_tailq, selector);
917 
918 struct selector_runtime {
919 	void *mailbox;
920 	uint8_t **group_id_buffer;
921 	uint8_t **selector_buffer;
922 	uint8_t **member_id_buffer;
923 };
924 
925 struct selector_statistics {
926 	uint64_t n_pkts;
927 };
928 
929 /*
930  * Learner table.
931  */
932 struct learner {
933 	TAILQ_ENTRY(learner) node;
934 	char name[RTE_SWX_NAME_SIZE];
935 
936 	/* Match. */
937 	struct field **fields;
938 	uint32_t n_fields;
939 	struct header *header;
940 
941 	/* Action. */
942 	struct action **actions;
943 	struct action *default_action;
944 	uint8_t *default_action_data;
945 	uint32_t n_actions;
946 	int default_action_is_const;
947 	uint32_t action_data_size_max;
948 	int *action_is_for_table_entries;
949 	int *action_is_for_default_entry;
950 
951 	struct hash_func *hf;
952 	uint32_t size;
953 	uint32_t timeout[RTE_SWX_TABLE_LEARNER_N_KEY_TIMEOUTS_MAX];
954 	uint32_t n_timeouts;
955 	uint32_t id;
956 };
957 
958 TAILQ_HEAD(learner_tailq, learner);
959 
960 struct learner_runtime {
961 	void *mailbox;
962 	uint8_t **key;
963 };
964 
965 struct learner_statistics {
966 	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
967 	uint64_t n_pkts_learn[2]; /* 0 = Learn OK, 1 = Learn error. */
968 	uint64_t n_pkts_rearm;
969 	uint64_t n_pkts_forget;
970 	uint64_t *n_pkts_action;
971 };
972 
973 /*
974  * Register array.
975  */
976 struct regarray {
977 	TAILQ_ENTRY(regarray) node;
978 	char name[RTE_SWX_NAME_SIZE];
979 	uint64_t init_val;
980 	uint32_t size;
981 	uint32_t id;
982 };
983 
984 TAILQ_HEAD(regarray_tailq, regarray);
985 
986 struct regarray_runtime {
987 	uint64_t *regarray;
988 	uint32_t size_mask;
989 };
990 
991 /*
992  * Meter array.
993  */
994 struct meter_profile {
995 	TAILQ_ENTRY(meter_profile) node;
996 	char name[RTE_SWX_NAME_SIZE];
997 	struct rte_meter_trtcm_params params;
998 	struct rte_meter_trtcm_profile profile;
999 	uint32_t n_users;
1000 };
1001 
1002 TAILQ_HEAD(meter_profile_tailq, meter_profile);
1003 
1004 struct metarray {
1005 	TAILQ_ENTRY(metarray) node;
1006 	char name[RTE_SWX_NAME_SIZE];
1007 	uint32_t size;
1008 	uint32_t id;
1009 };
1010 
1011 TAILQ_HEAD(metarray_tailq, metarray);
1012 
1013 struct meter {
1014 	struct rte_meter_trtcm m;
1015 	struct meter_profile *profile;
1016 	enum rte_color color_mask;
1017 	uint8_t pad[20];
1018 
1019 	uint64_t n_pkts[RTE_COLORS];
1020 	uint64_t n_bytes[RTE_COLORS];
1021 };
1022 
1023 struct metarray_runtime {
1024 	struct meter *metarray;
1025 	uint32_t size_mask;
1026 };
1027 
1028 /*
1029  * Pipeline.
1030  */
1031 struct thread {
1032 	/* Packet. */
1033 	struct rte_swx_pkt pkt;
1034 	uint8_t *ptr;
1035 	uint32_t *mirroring_slots;
1036 	uint64_t mirroring_slots_mask;
1037 	int recirculate;
1038 	uint32_t recirc_pass_id;
1039 
1040 	/* Structures. */
1041 	uint8_t **structs;
1042 
1043 	/* Packet headers. */
1044 	struct header_runtime *headers; /* Extracted or generated headers. */
1045 	struct header_out_runtime *headers_out; /* Emitted headers. */
1046 	uint8_t *header_storage;
1047 	uint8_t *header_out_storage;
1048 	uint64_t valid_headers;
1049 	uint32_t n_headers_out;
1050 
1051 	/* Packet meta-data. */
1052 	uint8_t *metadata;
1053 
1054 	/* Tables. */
1055 	struct table_runtime *tables;
1056 	struct selector_runtime *selectors;
1057 	struct learner_runtime *learners;
1058 	struct rte_swx_table_state *table_state;
1059 	uint64_t action_id;
1060 	size_t entry_id;
1061 	int hit; /* 0 = Miss, 1 = Hit. */
1062 	uint32_t learner_id;
1063 	uint64_t time;
1064 
1065 	/* Extern objects and functions. */
1066 	struct extern_obj_runtime *extern_objs;
1067 	struct extern_func_runtime *extern_funcs;
1068 
1069 	/* Instructions. */
1070 	struct instruction *ip;
1071 	struct instruction *ret;
1072 };
1073 
1074 #define MASK64_BIT_GET(mask, pos) ((mask) & (1LLU << (pos)))
1075 #define MASK64_BIT_SET(mask, pos) ((mask) | (1LLU << (pos)))
1076 #define MASK64_BIT_CLR(mask, pos) ((mask) & ~(1LLU << (pos)))
1077 
1078 #define HEADER_VALID(thread, header_id) \
1079 	MASK64_BIT_GET((thread)->valid_headers, header_id)
1080 
1081 static inline uint64_t
1082 instr_operand_hbo(struct thread *t, const struct instr_operand *x)
1083 {
1084 	uint8_t *x_struct = t->structs[x->struct_id];
1085 	uint64_t *x64_ptr = (uint64_t *)&x_struct[x->offset];
1086 	uint64_t x64 = *x64_ptr;
1087 	uint64_t x64_mask = UINT64_MAX >> (64 - x->n_bits);
1088 
1089 	return x64 & x64_mask;
1090 }
1091 
1092 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1093 
1094 static inline uint64_t
1095 instr_operand_nbo(struct thread *t, const struct instr_operand *x)
1096 {
1097 	uint8_t *x_struct = t->structs[x->struct_id];
1098 	uint64_t *x64_ptr = (uint64_t *)&x_struct[x->offset];
1099 	uint64_t x64 = *x64_ptr;
1100 
1101 	return ntoh64(x64) >> (64 - x->n_bits);
1102 }
1103 
1104 #else
1105 
1106 #define instr_operand_nbo instr_operand_hbo
1107 
1108 #endif
1109 
1110 #define ALU(thread, ip, operator)  \
1111 {                                                                              \
1112 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1113 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1114 	uint64_t dst64 = *dst64_ptr;                                           \
1115 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1116 	uint64_t dst = dst64 & dst64_mask;                                     \
1117 									       \
1118 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1119 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1120 	uint64_t src64 = *src64_ptr;                                           \
1121 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
1122 	uint64_t src = src64 & src64_mask;                                     \
1123 									       \
1124 	uint64_t result = dst operator src;                                    \
1125 									       \
1126 	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
1127 }
1128 
1129 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1130 
1131 #define ALU_MH(thread, ip, operator)  \
1132 {                                                                              \
1133 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1134 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1135 	uint64_t dst64 = *dst64_ptr;                                           \
1136 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1137 	uint64_t dst = dst64 & dst64_mask;                                     \
1138 									       \
1139 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1140 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1141 	uint64_t src64 = *src64_ptr;                                           \
1142 	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
1143 									       \
1144 	uint64_t result = dst operator src;                                    \
1145 									       \
1146 	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
1147 }
1148 
1149 #define ALU_HM(thread, ip, operator)  \
1150 {                                                                              \
1151 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1152 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1153 	uint64_t dst64 = *dst64_ptr;                                           \
1154 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1155 	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
1156 									       \
1157 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1158 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1159 	uint64_t src64 = *src64_ptr;                                           \
1160 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
1161 	uint64_t src = src64 & src64_mask;                                     \
1162 									       \
1163 	uint64_t result = dst operator src;                                    \
1164 	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
1165 									       \
1166 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
1167 }
1168 
1169 #define ALU_HM_FAST(thread, ip, operator)  \
1170 {                                                                                 \
1171 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];         \
1172 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];      \
1173 	uint64_t dst64 = *dst64_ptr;                                              \
1174 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);          \
1175 	uint64_t dst = dst64 & dst64_mask;                                        \
1176 										  \
1177 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];         \
1178 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];      \
1179 	uint64_t src64 = *src64_ptr;                                              \
1180 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);          \
1181 	uint64_t src = hton64(src64 & src64_mask) >> (64 - (ip)->alu.dst.n_bits); \
1182 										  \
1183 	uint64_t result = dst operator src;                                       \
1184 										  \
1185 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                              \
1186 }
1187 
1188 #define ALU_HH(thread, ip, operator)  \
1189 {                                                                              \
1190 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1191 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1192 	uint64_t dst64 = *dst64_ptr;                                           \
1193 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1194 	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
1195 									       \
1196 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1197 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1198 	uint64_t src64 = *src64_ptr;                                           \
1199 	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
1200 									       \
1201 	uint64_t result = dst operator src;                                    \
1202 	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
1203 									       \
1204 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
1205 }
1206 
1207 #define ALU_HH_FAST(thread, ip, operator)  \
1208 {                                                                                             \
1209 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];                     \
1210 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];                  \
1211 	uint64_t dst64 = *dst64_ptr;                                                          \
1212 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);                      \
1213 	uint64_t dst = dst64 & dst64_mask;                                                    \
1214 											      \
1215 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];                     \
1216 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];                  \
1217 	uint64_t src64 = *src64_ptr;                                                          \
1218 	uint64_t src = (src64 << (64 - (ip)->alu.src.n_bits)) >> (64 - (ip)->alu.dst.n_bits); \
1219 											      \
1220 	uint64_t result = dst operator src;                                                   \
1221 											      \
1222 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                                          \
1223 }
1224 
1225 #else
1226 
1227 #define ALU_MH ALU
1228 #define ALU_HM ALU
1229 #define ALU_HM_FAST ALU
1230 #define ALU_HH ALU
1231 #define ALU_HH_FAST ALU
1232 
1233 #endif
1234 
1235 #define ALU_I(thread, ip, operator)  \
1236 {                                                                              \
1237 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1238 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1239 	uint64_t dst64 = *dst64_ptr;                                           \
1240 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1241 	uint64_t dst = dst64 & dst64_mask;                                     \
1242 									       \
1243 	uint64_t src = (ip)->alu.src_val;                                      \
1244 									       \
1245 	uint64_t result = dst operator src;                                    \
1246 									       \
1247 	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
1248 }
1249 
1250 #define ALU_MI ALU_I
1251 
1252 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1253 
1254 #define ALU_HI(thread, ip, operator)  \
1255 {                                                                              \
1256 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1257 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1258 	uint64_t dst64 = *dst64_ptr;                                           \
1259 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1260 	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
1261 									       \
1262 	uint64_t src = (ip)->alu.src_val;                                      \
1263 									       \
1264 	uint64_t result = dst operator src;                                    \
1265 	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
1266 									       \
1267 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
1268 }
1269 
1270 #else
1271 
1272 #define ALU_HI ALU_I
1273 
1274 #endif
1275 
1276 #define MOV(thread, ip)  \
1277 {                                                                              \
1278 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1279 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1280 	uint64_t dst64 = *dst64_ptr;                                           \
1281 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1282 									       \
1283 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1284 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1285 	uint64_t src64 = *src64_ptr;                                           \
1286 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
1287 	uint64_t src = src64 & src64_mask;                                     \
1288 									       \
1289 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
1290 }
1291 
1292 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1293 
1294 #define MOV_MH(thread, ip)  \
1295 {                                                                              \
1296 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1297 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1298 	uint64_t dst64 = *dst64_ptr;                                           \
1299 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1300 									       \
1301 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1302 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1303 	uint64_t src64 = *src64_ptr;                                           \
1304 	uint64_t src = ntoh64(src64) >> (64 - (ip)->mov.src.n_bits);           \
1305 									       \
1306 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
1307 }
1308 
1309 #define MOV_HM(thread, ip)  \
1310 {                                                                              \
1311 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1312 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1313 	uint64_t dst64 = *dst64_ptr;                                           \
1314 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1315 									       \
1316 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1317 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1318 	uint64_t src64 = *src64_ptr;                                           \
1319 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
1320 	uint64_t src = src64 & src64_mask;                                     \
1321 									       \
1322 	src = hton64(src) >> (64 - (ip)->mov.dst.n_bits);                      \
1323 	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
1324 }
1325 
1326 #define MOV_HH(thread, ip)  \
1327 {                                                                              \
1328 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1329 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1330 	uint64_t dst64 = *dst64_ptr;                                           \
1331 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1332 									       \
1333 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1334 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1335 	uint64_t src64 = *src64_ptr;                                           \
1336 									       \
1337 	uint64_t src = src64 << (64 - (ip)->mov.src.n_bits);                   \
1338 	src = src >> (64 - (ip)->mov.dst.n_bits);                              \
1339 	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
1340 }
1341 
1342 #else
1343 
1344 #define MOV_MH MOV
1345 #define MOV_HM MOV
1346 #define MOV_HH MOV
1347 
1348 #endif
1349 
1350 #define MOV_I(thread, ip)  \
1351 {                                                                              \
1352 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1353 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1354 	uint64_t dst64 = *dst64_ptr;                                           \
1355 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1356 									       \
1357 	uint64_t src = (ip)->mov.src_val;                                      \
1358 									       \
1359 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
1360 }
1361 
1362 #define JMP_CMP(thread, ip, operator)  \
1363 {                                                                              \
1364 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1365 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1366 	uint64_t a64 = *a64_ptr;                                               \
1367 	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
1368 	uint64_t a = a64 & a64_mask;                                           \
1369 									       \
1370 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1371 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1372 	uint64_t b64 = *b64_ptr;                                               \
1373 	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
1374 	uint64_t b = b64 & b64_mask;                                           \
1375 									       \
1376 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1377 }
1378 
1379 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1380 
1381 #define JMP_CMP_MH(thread, ip, operator)  \
1382 {                                                                              \
1383 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1384 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1385 	uint64_t a64 = *a64_ptr;                                               \
1386 	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
1387 	uint64_t a = a64 & a64_mask;                                           \
1388 									       \
1389 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1390 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1391 	uint64_t b64 = *b64_ptr;                                               \
1392 	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
1393 									       \
1394 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1395 }
1396 
1397 #define JMP_CMP_HM(thread, ip, operator)  \
1398 {                                                                              \
1399 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1400 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1401 	uint64_t a64 = *a64_ptr;                                               \
1402 	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
1403 									       \
1404 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1405 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1406 	uint64_t b64 = *b64_ptr;                                               \
1407 	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
1408 	uint64_t b = b64 & b64_mask;                                           \
1409 									       \
1410 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1411 }
1412 
1413 #define JMP_CMP_HH(thread, ip, operator)  \
1414 {                                                                              \
1415 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1416 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1417 	uint64_t a64 = *a64_ptr;                                               \
1418 	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
1419 									       \
1420 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1421 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1422 	uint64_t b64 = *b64_ptr;                                               \
1423 	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
1424 									       \
1425 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1426 }
1427 
1428 #define JMP_CMP_HH_FAST(thread, ip, operator)  \
1429 {                                                                              \
1430 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1431 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1432 	uint64_t a64 = *a64_ptr;                                               \
1433 	uint64_t a = a64 << (64 - (ip)->jmp.a.n_bits);                         \
1434 									       \
1435 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1436 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1437 	uint64_t b64 = *b64_ptr;                                               \
1438 	uint64_t b = b64 << (64 - (ip)->jmp.b.n_bits);                         \
1439 									       \
1440 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1441 }
1442 
1443 #else
1444 
1445 #define JMP_CMP_MH JMP_CMP
1446 #define JMP_CMP_HM JMP_CMP
1447 #define JMP_CMP_HH JMP_CMP
1448 #define JMP_CMP_HH_FAST JMP_CMP
1449 
1450 #endif
1451 
1452 #define JMP_CMP_I(thread, ip, operator)  \
1453 {                                                                              \
1454 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1455 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1456 	uint64_t a64 = *a64_ptr;                                               \
1457 	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
1458 	uint64_t a = a64 & a64_mask;                                           \
1459 									       \
1460 	uint64_t b = (ip)->jmp.b_val;                                          \
1461 									       \
1462 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1463 }
1464 
1465 #define JMP_CMP_MI JMP_CMP_I
1466 
1467 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1468 
1469 #define JMP_CMP_HI(thread, ip, operator)  \
1470 {                                                                              \
1471 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1472 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1473 	uint64_t a64 = *a64_ptr;                                               \
1474 	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
1475 									       \
1476 	uint64_t b = (ip)->jmp.b_val;                                          \
1477 									       \
1478 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1479 }
1480 
1481 #else
1482 
1483 #define JMP_CMP_HI JMP_CMP_I
1484 
1485 #endif
1486 
1487 #define METADATA_READ(thread, offset, n_bits)                                  \
1488 ({                                                                             \
1489 	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
1490 	uint64_t m64 = *m64_ptr;                                               \
1491 	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
1492 	(m64 & m64_mask);                                                      \
1493 })
1494 
1495 #define METADATA_WRITE(thread, offset, n_bits, value)                          \
1496 {                                                                              \
1497 	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
1498 	uint64_t m64 = *m64_ptr;                                               \
1499 	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
1500 									       \
1501 	uint64_t m_new = value;                                                \
1502 									       \
1503 	*m64_ptr = (m64 & ~m64_mask) | (m_new & m64_mask);                     \
1504 }
1505 
1506 #ifndef RTE_SWX_PIPELINE_THREADS_MAX
1507 #define RTE_SWX_PIPELINE_THREADS_MAX 16
1508 #endif
1509 
1510 #ifndef RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX
1511 #define RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX 1024
1512 #endif
1513 
1514 struct rte_swx_pipeline {
1515 	char name[RTE_SWX_NAME_SIZE];
1516 
1517 	struct struct_type_tailq struct_types;
1518 	struct port_in_type_tailq port_in_types;
1519 	struct port_in_tailq ports_in;
1520 	struct port_out_type_tailq port_out_types;
1521 	struct port_out_tailq ports_out;
1522 	struct extern_type_tailq extern_types;
1523 	struct extern_obj_tailq extern_objs;
1524 	struct extern_func_tailq extern_funcs;
1525 	struct hash_func_tailq hash_funcs;
1526 	struct rss_tailq rss;
1527 	struct header_tailq headers;
1528 	struct struct_type *metadata_st;
1529 	uint32_t metadata_struct_id;
1530 	struct action_tailq actions;
1531 	struct table_type_tailq table_types;
1532 	struct table_tailq tables;
1533 	struct selector_tailq selectors;
1534 	struct learner_tailq learners;
1535 	struct regarray_tailq regarrays;
1536 	struct meter_profile_tailq meter_profiles;
1537 	struct metarray_tailq metarrays;
1538 
1539 	struct port_in_runtime *in;
1540 	struct port_out_runtime *out;
1541 	struct mirroring_session *mirroring_sessions;
1542 	struct instruction **action_instructions;
1543 	action_func_t *action_funcs;
1544 	struct rte_swx_table_state *table_state;
1545 	struct table_statistics *table_stats;
1546 	struct selector_statistics *selector_stats;
1547 	struct learner_statistics *learner_stats;
1548 	struct hash_func_runtime *hash_func_runtime;
1549 	struct rss_runtime **rss_runtime;
1550 	struct regarray_runtime *regarray_runtime;
1551 	struct metarray_runtime *metarray_runtime;
1552 	struct instruction *instructions;
1553 	struct instruction_data *instruction_data;
1554 	instr_exec_t *instruction_table;
1555 	struct thread threads[RTE_SWX_PIPELINE_THREADS_MAX];
1556 	void *lib;
1557 
1558 	uint32_t n_structs;
1559 	uint32_t n_ports_in;
1560 	uint32_t n_ports_out;
1561 	uint32_t n_mirroring_slots;
1562 	uint32_t n_mirroring_sessions;
1563 	uint32_t n_extern_objs;
1564 	uint32_t n_extern_funcs;
1565 	uint32_t n_hash_funcs;
1566 	uint32_t n_rss;
1567 	uint32_t n_actions;
1568 	uint32_t n_tables;
1569 	uint32_t n_selectors;
1570 	uint32_t n_learners;
1571 	uint32_t n_regarrays;
1572 	uint32_t n_metarrays;
1573 	uint32_t n_headers;
1574 	uint32_t thread_id;
1575 	uint32_t port_id;
1576 	uint32_t n_instructions;
1577 	int build_done;
1578 	int numa_node;
1579 };
1580 
1581 /*
1582  * Instruction.
1583  */
1584 static inline void
1585 pipeline_port_inc(struct rte_swx_pipeline *p)
1586 {
1587 	p->port_id = (p->port_id + 1) & (p->n_ports_in - 1);
1588 }
1589 
1590 static inline void
1591 thread_ip_reset(struct rte_swx_pipeline *p, struct thread *t)
1592 {
1593 	t->ip = p->instructions;
1594 }
1595 
1596 static inline void
1597 thread_ip_set(struct thread *t, struct instruction *ip)
1598 {
1599 	t->ip = ip;
1600 }
1601 
1602 static inline void
1603 thread_ip_action_call(struct rte_swx_pipeline *p,
1604 		      struct thread *t,
1605 		      uint32_t action_id)
1606 {
1607 	t->ret = t->ip + 1;
1608 	t->ip = p->action_instructions[action_id];
1609 }
1610 
1611 static inline void
1612 thread_ip_inc(struct rte_swx_pipeline *p);
1613 
1614 static inline void
1615 thread_ip_inc(struct rte_swx_pipeline *p)
1616 {
1617 	struct thread *t = &p->threads[p->thread_id];
1618 
1619 	t->ip++;
1620 }
1621 
1622 static inline void
1623 thread_ip_inc_cond(struct thread *t, int cond)
1624 {
1625 	t->ip += cond;
1626 }
1627 
1628 static inline void
1629 thread_yield(struct rte_swx_pipeline *p)
1630 {
1631 	p->thread_id = (p->thread_id + 1) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
1632 }
1633 
1634 static inline void
1635 thread_yield_cond(struct rte_swx_pipeline *p, int cond)
1636 {
1637 	p->thread_id = (p->thread_id + cond) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
1638 }
1639 
1640 /*
1641  * rx.
1642  */
1643 static inline int
1644 __instr_rx_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
1645 {
1646 	struct port_in_runtime *port = &p->in[p->port_id];
1647 	struct rte_swx_pkt *pkt = &t->pkt;
1648 	int pkt_received;
1649 
1650 	/* Recirculation: keep the current packet. */
1651 	if (t->recirculate) {
1652 		TRACE("[Thread %2u] rx - recirculate (pass %u)\n",
1653 		      p->thread_id,
1654 		      t->recirc_pass_id + 1);
1655 
1656 		/* Packet. */
1657 		t->ptr = &pkt->pkt[pkt->offset];
1658 		t->mirroring_slots_mask = 0;
1659 		t->recirculate = 0;
1660 		t->recirc_pass_id++;
1661 
1662 		/* Headers. */
1663 		t->valid_headers = 0;
1664 		t->n_headers_out = 0;
1665 
1666 		/* Tables. */
1667 		t->table_state = p->table_state;
1668 
1669 		return 1;
1670 	}
1671 
1672 	/* Packet. */
1673 	pkt_received = port->pkt_rx(port->obj, pkt);
1674 	t->ptr = &pkt->pkt[pkt->offset];
1675 	rte_prefetch0(t->ptr);
1676 
1677 	TRACE("[Thread %2u] rx %s from port %u\n",
1678 	      p->thread_id,
1679 	      pkt_received ? "1 pkt" : "0 pkts",
1680 	      p->port_id);
1681 
1682 	t->mirroring_slots_mask = 0;
1683 	t->recirc_pass_id = 0;
1684 
1685 	/* Headers. */
1686 	t->valid_headers = 0;
1687 	t->n_headers_out = 0;
1688 
1689 	/* Meta-data. */
1690 	METADATA_WRITE(t, ip->io.io.offset, ip->io.io.n_bits, p->port_id);
1691 
1692 	/* Tables. */
1693 	t->table_state = p->table_state;
1694 
1695 	/* Thread. */
1696 	pipeline_port_inc(p);
1697 
1698 	return pkt_received;
1699 }
1700 
1701 static inline void
1702 instr_rx_exec(struct rte_swx_pipeline *p)
1703 {
1704 	struct thread *t = &p->threads[p->thread_id];
1705 	struct instruction *ip = t->ip;
1706 	int pkt_received;
1707 
1708 	/* Packet. */
1709 	pkt_received = __instr_rx_exec(p, t, ip);
1710 
1711 	/* Thread. */
1712 	thread_ip_inc_cond(t, pkt_received);
1713 	thread_yield(p);
1714 }
1715 
1716 /*
1717  * tx.
1718  */
1719 static inline void
1720 emit_handler(struct thread *t)
1721 {
1722 	struct header_out_runtime *h0 = &t->headers_out[0];
1723 	struct header_out_runtime *h1 = &t->headers_out[1];
1724 	uint32_t offset = 0, i;
1725 
1726 	/* No header change or header decapsulation. */
1727 	if ((t->n_headers_out == 1) &&
1728 	    (h0->ptr + h0->n_bytes == t->ptr)) {
1729 		TRACE("Emit handler: no header change or header decap.\n");
1730 
1731 		t->pkt.offset -= h0->n_bytes;
1732 		t->pkt.length += h0->n_bytes;
1733 
1734 		return;
1735 	}
1736 
1737 	/* Header encapsulation (optionally, with prior header decapsulation). */
1738 	if ((t->n_headers_out == 2) &&
1739 	    (h1->ptr + h1->n_bytes == t->ptr) &&
1740 	    (h0->ptr == h0->ptr0)) {
1741 		uint32_t offset;
1742 
1743 		TRACE("Emit handler: header encapsulation.\n");
1744 
1745 		offset = h0->n_bytes + h1->n_bytes;
1746 		memcpy(t->ptr - offset, h0->ptr, h0->n_bytes);
1747 		t->pkt.offset -= offset;
1748 		t->pkt.length += offset;
1749 
1750 		return;
1751 	}
1752 
1753 	/* For any other case. */
1754 	TRACE("Emit handler: complex case.\n");
1755 
1756 	for (i = 0; i < t->n_headers_out; i++) {
1757 		struct header_out_runtime *h = &t->headers_out[i];
1758 
1759 		memcpy(&t->header_out_storage[offset], h->ptr, h->n_bytes);
1760 		offset += h->n_bytes;
1761 	}
1762 
1763 	if (offset) {
1764 		memcpy(t->ptr - offset, t->header_out_storage, offset);
1765 		t->pkt.offset -= offset;
1766 		t->pkt.length += offset;
1767 	}
1768 }
1769 
1770 static inline void
1771 mirroring_handler(struct rte_swx_pipeline *p, struct thread *t, struct rte_swx_pkt *pkt)
1772 {
1773 	uint64_t slots_mask = t->mirroring_slots_mask, slot_mask;
1774 	uint32_t slot_id;
1775 
1776 	for (slot_id = 0, slot_mask = 1LLU ; slots_mask; slot_id++, slot_mask <<= 1)
1777 		if (slot_mask & slots_mask) {
1778 			struct port_out_runtime *port;
1779 			struct mirroring_session *session;
1780 			uint32_t port_id, session_id;
1781 
1782 			session_id = t->mirroring_slots[slot_id];
1783 			session = &p->mirroring_sessions[session_id];
1784 
1785 			port_id = session->port_id;
1786 			port = &p->out[port_id];
1787 
1788 			if (session->fast_clone)
1789 				port->pkt_fast_clone_tx(port->obj, pkt);
1790 			else
1791 				port->pkt_clone_tx(port->obj, pkt, session->truncation_length);
1792 
1793 			slots_mask &= ~slot_mask;
1794 		}
1795 }
1796 
1797 static inline void
1798 __instr_tx_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
1799 {
1800 	uint64_t port_id = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
1801 	struct port_out_runtime *port = &p->out[port_id];
1802 	struct rte_swx_pkt *pkt = &t->pkt;
1803 
1804 	/* Recirculation: keep the current packet. */
1805 	if (t->recirculate) {
1806 		TRACE("[Thread %2u]: tx 1 pkt - recirculate\n",
1807 		      p->thread_id);
1808 
1809 		/* Headers. */
1810 		emit_handler(t);
1811 
1812 		/* Packet. */
1813 		mirroring_handler(p, t, pkt);
1814 
1815 		return;
1816 	}
1817 
1818 	TRACE("[Thread %2u]: tx 1 pkt to port %u\n",
1819 	      p->thread_id,
1820 	      (uint32_t)port_id);
1821 
1822 	/* Headers. */
1823 	emit_handler(t);
1824 
1825 	/* Packet. */
1826 	mirroring_handler(p, t, pkt);
1827 	port->pkt_tx(port->obj, pkt);
1828 }
1829 
1830 static inline void
1831 __instr_tx_i_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
1832 {
1833 	uint64_t port_id = ip->io.io.val;
1834 	struct port_out_runtime *port = &p->out[port_id];
1835 	struct rte_swx_pkt *pkt = &t->pkt;
1836 
1837 	/* Recirculation: keep the current packet. */
1838 	if (t->recirculate) {
1839 		TRACE("[Thread %2u]: tx (i) 1 pkt - recirculate\n",
1840 		      p->thread_id);
1841 
1842 		/* Headers. */
1843 		emit_handler(t);
1844 
1845 		/* Packet. */
1846 		mirroring_handler(p, t, pkt);
1847 
1848 		return;
1849 	}
1850 
1851 	TRACE("[Thread %2u]: tx (i) 1 pkt to port %u\n",
1852 	      p->thread_id,
1853 	      (uint32_t)port_id);
1854 
1855 	/* Headers. */
1856 	emit_handler(t);
1857 
1858 	/* Packet. */
1859 	mirroring_handler(p, t, pkt);
1860 	port->pkt_tx(port->obj, pkt);
1861 }
1862 
1863 static inline void
1864 __instr_drop_exec(struct rte_swx_pipeline *p,
1865 		  struct thread *t,
1866 		  const struct instruction *ip __rte_unused)
1867 {
1868 	uint64_t port_id = p->n_ports_out - 1;
1869 	struct port_out_runtime *port = &p->out[port_id];
1870 	struct rte_swx_pkt *pkt = &t->pkt;
1871 
1872 	TRACE("[Thread %2u]: drop 1 pkt\n",
1873 	      p->thread_id);
1874 
1875 	/* Headers. */
1876 	emit_handler(t);
1877 
1878 	/* Packet. */
1879 	mirroring_handler(p, t, pkt);
1880 	port->pkt_tx(port->obj, pkt);
1881 }
1882 
1883 static inline void
1884 __instr_mirror_exec(struct rte_swx_pipeline *p,
1885 		    struct thread *t,
1886 		    const struct instruction *ip)
1887 {
1888 	uint64_t slot_id = instr_operand_hbo(t, &ip->mirror.dst);
1889 	uint64_t session_id = instr_operand_hbo(t, &ip->mirror.src);
1890 
1891 	slot_id &= p->n_mirroring_slots - 1;
1892 	session_id &= p->n_mirroring_sessions - 1;
1893 
1894 	TRACE("[Thread %2u]: mirror pkt (slot = %u, session = %u)\n",
1895 	      p->thread_id,
1896 	      (uint32_t)slot_id,
1897 	      (uint32_t)session_id);
1898 
1899 	t->mirroring_slots[slot_id] = session_id;
1900 	t->mirroring_slots_mask |= 1LLU << slot_id;
1901 }
1902 
1903 static inline void
1904 __instr_recirculate_exec(struct rte_swx_pipeline *p __rte_unused,
1905 			 struct thread *t,
1906 			 const struct instruction *ip __rte_unused)
1907 {
1908 	TRACE("[Thread %2u]: recirculate\n",
1909 	      p->thread_id);
1910 
1911 	t->recirculate = 1;
1912 }
1913 
1914 static inline void
1915 __instr_recircid_exec(struct rte_swx_pipeline *p __rte_unused,
1916 		      struct thread *t,
1917 		      const struct instruction *ip)
1918 {
1919 	TRACE("[Thread %2u]: recircid (pass %u)\n",
1920 	      p->thread_id,
1921 	      t->recirc_pass_id);
1922 
1923 	/* Meta-data. */
1924 	METADATA_WRITE(t, ip->io.io.offset, ip->io.io.n_bits, t->recirc_pass_id);
1925 }
1926 
1927 /*
1928  * extract.
1929  */
1930 static inline void
1931 __instr_hdr_extract_many_exec(struct rte_swx_pipeline *p __rte_unused,
1932 			      struct thread *t,
1933 			      const struct instruction *ip,
1934 			      uint32_t n_extract)
1935 {
1936 	uint64_t valid_headers = t->valid_headers;
1937 	uint8_t *ptr = t->ptr;
1938 	uint32_t offset = t->pkt.offset;
1939 	uint32_t length = t->pkt.length;
1940 	uint32_t i;
1941 
1942 	for (i = 0; i < n_extract; i++) {
1943 		uint32_t header_id = ip->io.hdr.header_id[i];
1944 		uint32_t struct_id = ip->io.hdr.struct_id[i];
1945 		uint32_t n_bytes = ip->io.hdr.n_bytes[i];
1946 
1947 		TRACE("[Thread %2u]: extract header %u (%u bytes)\n",
1948 		      p->thread_id,
1949 		      header_id,
1950 		      n_bytes);
1951 
1952 		/* Headers. */
1953 		t->structs[struct_id] = ptr;
1954 		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
1955 
1956 		/* Packet. */
1957 		offset += n_bytes;
1958 		length -= n_bytes;
1959 		ptr += n_bytes;
1960 	}
1961 
1962 	/* Headers. */
1963 	t->valid_headers = valid_headers;
1964 
1965 	/* Packet. */
1966 	t->pkt.offset = offset;
1967 	t->pkt.length = length;
1968 	t->ptr = ptr;
1969 }
1970 
1971 static inline void
1972 __instr_hdr_extract_exec(struct rte_swx_pipeline *p,
1973 			 struct thread *t,
1974 			 const struct instruction *ip)
1975 {
1976 	__instr_hdr_extract_many_exec(p, t, ip, 1);
1977 }
1978 
1979 static inline void
1980 __instr_hdr_extract2_exec(struct rte_swx_pipeline *p,
1981 			  struct thread *t,
1982 			  const struct instruction *ip)
1983 {
1984 	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
1985 
1986 	__instr_hdr_extract_many_exec(p, t, ip, 2);
1987 }
1988 
1989 static inline void
1990 __instr_hdr_extract3_exec(struct rte_swx_pipeline *p,
1991 			  struct thread *t,
1992 			  const struct instruction *ip)
1993 {
1994 	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
1995 
1996 	__instr_hdr_extract_many_exec(p, t, ip, 3);
1997 }
1998 
1999 static inline void
2000 __instr_hdr_extract4_exec(struct rte_swx_pipeline *p,
2001 			  struct thread *t,
2002 			  const struct instruction *ip)
2003 {
2004 	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
2005 
2006 	__instr_hdr_extract_many_exec(p, t, ip, 4);
2007 }
2008 
2009 static inline void
2010 __instr_hdr_extract5_exec(struct rte_swx_pipeline *p,
2011 			  struct thread *t,
2012 			  const struct instruction *ip)
2013 {
2014 	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
2015 
2016 	__instr_hdr_extract_many_exec(p, t, ip, 5);
2017 }
2018 
2019 static inline void
2020 __instr_hdr_extract6_exec(struct rte_swx_pipeline *p,
2021 			  struct thread *t,
2022 			  const struct instruction *ip)
2023 {
2024 	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
2025 
2026 	__instr_hdr_extract_many_exec(p, t, ip, 6);
2027 }
2028 
2029 static inline void
2030 __instr_hdr_extract7_exec(struct rte_swx_pipeline *p,
2031 			  struct thread *t,
2032 			  const struct instruction *ip)
2033 {
2034 	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
2035 
2036 	__instr_hdr_extract_many_exec(p, t, ip, 7);
2037 }
2038 
2039 static inline void
2040 __instr_hdr_extract8_exec(struct rte_swx_pipeline *p,
2041 			  struct thread *t,
2042 			  const struct instruction *ip)
2043 {
2044 	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
2045 
2046 	__instr_hdr_extract_many_exec(p, t, ip, 8);
2047 }
2048 
2049 static inline void
2050 __instr_hdr_extract_m_exec(struct rte_swx_pipeline *p __rte_unused,
2051 			   struct thread *t,
2052 			   const struct instruction *ip)
2053 {
2054 	uint64_t valid_headers = t->valid_headers;
2055 	uint8_t *ptr = t->ptr;
2056 	uint32_t offset = t->pkt.offset;
2057 	uint32_t length = t->pkt.length;
2058 
2059 	uint32_t n_bytes_last = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
2060 	uint32_t header_id = ip->io.hdr.header_id[0];
2061 	uint32_t struct_id = ip->io.hdr.struct_id[0];
2062 	uint32_t n_bytes = ip->io.hdr.n_bytes[0];
2063 
2064 	struct header_runtime *h = &t->headers[header_id];
2065 
2066 	TRACE("[Thread %2u]: extract header %u (%u + %u bytes)\n",
2067 	      p->thread_id,
2068 	      header_id,
2069 	      n_bytes,
2070 	      n_bytes_last);
2071 
2072 	n_bytes += n_bytes_last;
2073 
2074 	/* Headers. */
2075 	t->structs[struct_id] = ptr;
2076 	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
2077 	h->n_bytes = n_bytes;
2078 
2079 	/* Packet. */
2080 	t->pkt.offset = offset + n_bytes;
2081 	t->pkt.length = length - n_bytes;
2082 	t->ptr = ptr + n_bytes;
2083 }
2084 
2085 static inline void
2086 __instr_hdr_lookahead_exec(struct rte_swx_pipeline *p __rte_unused,
2087 			   struct thread *t,
2088 			   const struct instruction *ip)
2089 {
2090 	uint64_t valid_headers = t->valid_headers;
2091 	uint8_t *ptr = t->ptr;
2092 
2093 	uint32_t header_id = ip->io.hdr.header_id[0];
2094 	uint32_t struct_id = ip->io.hdr.struct_id[0];
2095 
2096 	TRACE("[Thread %2u]: lookahead header %u\n",
2097 	      p->thread_id,
2098 	      header_id);
2099 
2100 	/* Headers. */
2101 	t->structs[struct_id] = ptr;
2102 	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
2103 }
2104 
2105 /*
2106  * emit.
2107  */
2108 static inline void
2109 __instr_hdr_emit_many_exec(struct rte_swx_pipeline *p __rte_unused,
2110 			   struct thread *t,
2111 			   const struct instruction *ip,
2112 			   uint32_t n_emit)
2113 {
2114 	uint64_t valid_headers = t->valid_headers;
2115 	uint32_t n_headers_out = t->n_headers_out;
2116 	struct header_out_runtime *ho = NULL;
2117 	uint8_t *ho_ptr = NULL;
2118 	uint32_t ho_nbytes = 0, i;
2119 
2120 	for (i = 0; i < n_emit; i++) {
2121 		uint32_t header_id = ip->io.hdr.header_id[i];
2122 		uint32_t struct_id = ip->io.hdr.struct_id[i];
2123 
2124 		struct header_runtime *hi = &t->headers[header_id];
2125 		uint8_t *hi_ptr0 = hi->ptr0;
2126 		uint32_t n_bytes = hi->n_bytes;
2127 
2128 		uint8_t *hi_ptr = t->structs[struct_id];
2129 
2130 		if (!MASK64_BIT_GET(valid_headers, header_id)) {
2131 			TRACE("[Thread %2u]: emit header %u (invalid)\n",
2132 			      p->thread_id,
2133 			      header_id);
2134 
2135 			continue;
2136 		}
2137 
2138 		TRACE("[Thread %2u]: emit header %u (valid)\n",
2139 		      p->thread_id,
2140 		      header_id);
2141 
2142 		/* Headers. */
2143 		if (!ho) {
2144 			if (!n_headers_out) {
2145 				ho = &t->headers_out[0];
2146 
2147 				ho->ptr0 = hi_ptr0;
2148 				ho->ptr = hi_ptr;
2149 
2150 				ho_ptr = hi_ptr;
2151 				ho_nbytes = n_bytes;
2152 
2153 				n_headers_out = 1;
2154 
2155 				continue;
2156 			} else {
2157 				ho = &t->headers_out[n_headers_out - 1];
2158 
2159 				ho_ptr = ho->ptr;
2160 				ho_nbytes = ho->n_bytes;
2161 			}
2162 		}
2163 
2164 		if (ho_ptr + ho_nbytes == hi_ptr) {
2165 			ho_nbytes += n_bytes;
2166 		} else {
2167 			ho->n_bytes = ho_nbytes;
2168 
2169 			ho++;
2170 			ho->ptr0 = hi_ptr0;
2171 			ho->ptr = hi_ptr;
2172 
2173 			ho_ptr = hi_ptr;
2174 			ho_nbytes = n_bytes;
2175 
2176 			n_headers_out++;
2177 		}
2178 	}
2179 
2180 	if (ho)
2181 		ho->n_bytes = ho_nbytes;
2182 	t->n_headers_out = n_headers_out;
2183 }
2184 
2185 static inline void
2186 __instr_hdr_emit_exec(struct rte_swx_pipeline *p,
2187 		      struct thread *t,
2188 		      const struct instruction *ip)
2189 {
2190 	__instr_hdr_emit_many_exec(p, t, ip, 1);
2191 }
2192 
2193 static inline void
2194 __instr_hdr_emit_tx_exec(struct rte_swx_pipeline *p,
2195 			 struct thread *t,
2196 			 const struct instruction *ip)
2197 {
2198 	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
2199 
2200 	__instr_hdr_emit_many_exec(p, t, ip, 1);
2201 	__instr_tx_exec(p, t, ip);
2202 }
2203 
2204 static inline void
2205 __instr_hdr_emit2_tx_exec(struct rte_swx_pipeline *p,
2206 			  struct thread *t,
2207 			  const struct instruction *ip)
2208 {
2209 	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
2210 
2211 	__instr_hdr_emit_many_exec(p, t, ip, 2);
2212 	__instr_tx_exec(p, t, ip);
2213 }
2214 
2215 static inline void
2216 __instr_hdr_emit3_tx_exec(struct rte_swx_pipeline *p,
2217 			  struct thread *t,
2218 			  const struct instruction *ip)
2219 {
2220 	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
2221 
2222 	__instr_hdr_emit_many_exec(p, t, ip, 3);
2223 	__instr_tx_exec(p, t, ip);
2224 }
2225 
2226 static inline void
2227 __instr_hdr_emit4_tx_exec(struct rte_swx_pipeline *p,
2228 			  struct thread *t,
2229 			  const struct instruction *ip)
2230 {
2231 	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
2232 
2233 	__instr_hdr_emit_many_exec(p, t, ip, 4);
2234 	__instr_tx_exec(p, t, ip);
2235 }
2236 
2237 static inline void
2238 __instr_hdr_emit5_tx_exec(struct rte_swx_pipeline *p,
2239 			  struct thread *t,
2240 			  const struct instruction *ip)
2241 {
2242 	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
2243 
2244 	__instr_hdr_emit_many_exec(p, t, ip, 5);
2245 	__instr_tx_exec(p, t, ip);
2246 }
2247 
2248 static inline void
2249 __instr_hdr_emit6_tx_exec(struct rte_swx_pipeline *p,
2250 			  struct thread *t,
2251 			  const struct instruction *ip)
2252 {
2253 	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
2254 
2255 	__instr_hdr_emit_many_exec(p, t, ip, 6);
2256 	__instr_tx_exec(p, t, ip);
2257 }
2258 
2259 static inline void
2260 __instr_hdr_emit7_tx_exec(struct rte_swx_pipeline *p,
2261 			  struct thread *t,
2262 			  const struct instruction *ip)
2263 {
2264 	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
2265 
2266 	__instr_hdr_emit_many_exec(p, t, ip, 7);
2267 	__instr_tx_exec(p, t, ip);
2268 }
2269 
2270 static inline void
2271 __instr_hdr_emit8_tx_exec(struct rte_swx_pipeline *p,
2272 			  struct thread *t,
2273 			  const struct instruction *ip)
2274 {
2275 	TRACE("[Thread %2u] *** The next 9 instructions are fused. ***\n", p->thread_id);
2276 
2277 	__instr_hdr_emit_many_exec(p, t, ip, 8);
2278 	__instr_tx_exec(p, t, ip);
2279 }
2280 
2281 /*
2282  * validate.
2283  */
2284 static inline void
2285 __instr_hdr_validate_exec(struct rte_swx_pipeline *p __rte_unused,
2286 			  struct thread *t,
2287 			  const struct instruction *ip)
2288 {
2289 	uint32_t header_id = ip->valid.header_id;
2290 	uint32_t struct_id = ip->valid.struct_id;
2291 	uint64_t valid_headers = t->valid_headers;
2292 	struct header_runtime *h = &t->headers[header_id];
2293 
2294 	TRACE("[Thread %2u] validate header %u\n", p->thread_id, header_id);
2295 
2296 	/* If this header is already valid, then its associated t->structs[] element is also valid
2297 	 * and therefore it should not be modified. It could point to the packet buffer (in case of
2298 	 * extracted header) and setting it to the default location (h->ptr0) would be incorrect.
2299 	 */
2300 	if (MASK64_BIT_GET(valid_headers, header_id))
2301 		return;
2302 
2303 	/* Headers. */
2304 	t->structs[struct_id] = h->ptr0;
2305 	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
2306 }
2307 
2308 /*
2309  * invalidate.
2310  */
2311 static inline void
2312 __instr_hdr_invalidate_exec(struct rte_swx_pipeline *p __rte_unused,
2313 			    struct thread *t,
2314 			    const struct instruction *ip)
2315 {
2316 	uint32_t header_id = ip->valid.header_id;
2317 
2318 	TRACE("[Thread %2u] invalidate header %u\n", p->thread_id, header_id);
2319 
2320 	/* Headers. */
2321 	t->valid_headers = MASK64_BIT_CLR(t->valid_headers, header_id);
2322 }
2323 
2324 /*
2325  * learn.
2326  */
2327 static inline void
2328 __instr_learn_exec(struct rte_swx_pipeline *p,
2329 		   struct thread *t,
2330 		   const struct instruction *ip)
2331 {
2332 	uint64_t action_id = ip->learn.action_id;
2333 	uint32_t mf_first_arg_offset = ip->learn.mf_first_arg_offset;
2334 	uint32_t timeout_id = METADATA_READ(t, ip->learn.mf_timeout_id_offset,
2335 		ip->learn.mf_timeout_id_n_bits);
2336 	uint32_t learner_id = t->learner_id;
2337 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2338 		p->n_selectors + learner_id];
2339 	struct learner_runtime *l = &t->learners[learner_id];
2340 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2341 	uint32_t status;
2342 
2343 	/* Table. */
2344 	status = rte_swx_table_learner_add(ts->obj,
2345 					   l->mailbox,
2346 					   t->time,
2347 					   action_id,
2348 					   &t->metadata[mf_first_arg_offset],
2349 					   timeout_id);
2350 
2351 	TRACE("[Thread %2u] learner %u learn %s\n",
2352 	      p->thread_id,
2353 	      learner_id,
2354 	      status ? "ok" : "error");
2355 
2356 	stats->n_pkts_learn[status] += 1;
2357 }
2358 
2359 /*
2360  * rearm.
2361  */
2362 static inline void
2363 __instr_rearm_exec(struct rte_swx_pipeline *p,
2364 		   struct thread *t,
2365 		   const struct instruction *ip __rte_unused)
2366 {
2367 	uint32_t learner_id = t->learner_id;
2368 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2369 		p->n_selectors + learner_id];
2370 	struct learner_runtime *l = &t->learners[learner_id];
2371 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2372 
2373 	/* Table. */
2374 	rte_swx_table_learner_rearm(ts->obj, l->mailbox, t->time);
2375 
2376 	TRACE("[Thread %2u] learner %u rearm\n",
2377 	      p->thread_id,
2378 	      learner_id);
2379 
2380 	stats->n_pkts_rearm += 1;
2381 }
2382 
2383 static inline void
2384 __instr_rearm_new_exec(struct rte_swx_pipeline *p,
2385 		       struct thread *t,
2386 		       const struct instruction *ip)
2387 {
2388 	uint32_t timeout_id = METADATA_READ(t, ip->learn.mf_timeout_id_offset,
2389 		ip->learn.mf_timeout_id_n_bits);
2390 	uint32_t learner_id = t->learner_id;
2391 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2392 		p->n_selectors + learner_id];
2393 	struct learner_runtime *l = &t->learners[learner_id];
2394 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2395 
2396 	/* Table. */
2397 	rte_swx_table_learner_rearm_new(ts->obj, l->mailbox, t->time, timeout_id);
2398 
2399 	TRACE("[Thread %2u] learner %u rearm with timeout ID %u\n",
2400 	      p->thread_id,
2401 	      learner_id,
2402 	      timeout_id);
2403 
2404 	stats->n_pkts_rearm += 1;
2405 }
2406 
2407 /*
2408  * forget.
2409  */
2410 static inline void
2411 __instr_forget_exec(struct rte_swx_pipeline *p,
2412 		    struct thread *t,
2413 		    const struct instruction *ip __rte_unused)
2414 {
2415 	uint32_t learner_id = t->learner_id;
2416 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2417 		p->n_selectors + learner_id];
2418 	struct learner_runtime *l = &t->learners[learner_id];
2419 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2420 
2421 	/* Table. */
2422 	rte_swx_table_learner_delete(ts->obj, l->mailbox);
2423 
2424 	TRACE("[Thread %2u] learner %u forget\n",
2425 	      p->thread_id,
2426 	      learner_id);
2427 
2428 	stats->n_pkts_forget += 1;
2429 }
2430 
2431 /*
2432  * entryid.
2433  */
2434 static inline void
2435 __instr_entryid_exec(struct rte_swx_pipeline *p __rte_unused,
2436 		       struct thread *t,
2437 		       const struct instruction *ip)
2438 {
2439 	TRACE("[Thread %2u]: entryid\n",
2440 	      p->thread_id);
2441 
2442 	/* Meta-data. */
2443 	METADATA_WRITE(t, ip->mov.dst.offset, ip->mov.dst.n_bits, t->entry_id);
2444 }
2445 
2446 /*
2447  * extern.
2448  */
2449 static inline uint32_t
2450 __instr_extern_obj_exec(struct rte_swx_pipeline *p __rte_unused,
2451 			struct thread *t,
2452 			const struct instruction *ip)
2453 {
2454 	uint32_t obj_id = ip->ext_obj.ext_obj_id;
2455 	uint32_t func_id = ip->ext_obj.func_id;
2456 	struct extern_obj_runtime *obj = &t->extern_objs[obj_id];
2457 	rte_swx_extern_type_member_func_t func = obj->funcs[func_id];
2458 	uint32_t done;
2459 
2460 	TRACE("[Thread %2u] extern obj %u member func %u\n",
2461 	      p->thread_id,
2462 	      obj_id,
2463 	      func_id);
2464 
2465 	done = func(obj->obj, obj->mailbox);
2466 
2467 	return done;
2468 }
2469 
2470 static inline uint32_t
2471 __instr_extern_func_exec(struct rte_swx_pipeline *p __rte_unused,
2472 			 struct thread *t,
2473 			 const struct instruction *ip)
2474 {
2475 	uint32_t ext_func_id = ip->ext_func.ext_func_id;
2476 	struct extern_func_runtime *ext_func = &t->extern_funcs[ext_func_id];
2477 	rte_swx_extern_func_t func = ext_func->func;
2478 	uint32_t done;
2479 
2480 	TRACE("[Thread %2u] extern func %u\n",
2481 	      p->thread_id,
2482 	      ext_func_id);
2483 
2484 	done = func(ext_func->mailbox);
2485 
2486 	return done;
2487 }
2488 
2489 /*
2490  * hash.
2491  */
2492 static inline void
2493 __instr_hash_func_exec(struct rte_swx_pipeline *p,
2494 		       struct thread *t,
2495 		       const struct instruction *ip)
2496 {
2497 	uint32_t hash_func_id = ip->hash_func.hash_func_id;
2498 	uint32_t dst_offset = ip->hash_func.dst.offset;
2499 	uint32_t n_dst_bits = ip->hash_func.dst.n_bits;
2500 	uint32_t src_struct_id = ip->hash_func.src.struct_id;
2501 	uint32_t src_offset = ip->hash_func.src.offset;
2502 	uint32_t n_src_bytes = ip->hash_func.src.n_bytes;
2503 
2504 	struct hash_func_runtime *func = &p->hash_func_runtime[hash_func_id];
2505 	uint8_t *src_ptr = t->structs[src_struct_id];
2506 	uint32_t result;
2507 
2508 	TRACE("[Thread %2u] hash %u\n",
2509 	      p->thread_id,
2510 	      hash_func_id);
2511 
2512 	result = func->func(&src_ptr[src_offset], n_src_bytes, 0);
2513 	METADATA_WRITE(t, dst_offset, n_dst_bits, result);
2514 }
2515 
2516 /*
2517  * rss.
2518  */
2519 static inline uint32_t
2520 rss_func(void *rss_key, uint32_t rss_key_size, void *input_data, uint32_t input_data_size)
2521 {
2522 	uint32_t *key = (uint32_t *)rss_key;
2523 	uint32_t *data = (uint32_t *)input_data;
2524 	uint32_t key_size = rss_key_size >> 2;
2525 	uint32_t data_size = input_data_size >> 2;
2526 	uint32_t hash_val = 0, i;
2527 
2528 	for (i = 0; i < data_size; i++) {
2529 		uint32_t d;
2530 
2531 		for (d = data[i]; d; d &= (d - 1)) {
2532 			uint32_t key0, key1, pos;
2533 
2534 			pos = rte_bsf32(d);
2535 			key0 = key[i % key_size] << (31 - pos);
2536 			key1 = key[(i + 1) % key_size] >> (pos + 1);
2537 			hash_val ^= key0 | key1;
2538 		}
2539 	}
2540 
2541 	return hash_val;
2542 }
2543 
2544 static inline void
2545 __instr_rss_exec(struct rte_swx_pipeline *p,
2546 		 struct thread *t,
2547 		 const struct instruction *ip)
2548 {
2549 	uint32_t rss_obj_id = ip->rss.rss_obj_id;
2550 	uint32_t dst_offset = ip->rss.dst.offset;
2551 	uint32_t n_dst_bits = ip->rss.dst.n_bits;
2552 	uint32_t src_struct_id = ip->rss.src.struct_id;
2553 	uint32_t src_offset = ip->rss.src.offset;
2554 	uint32_t n_src_bytes = ip->rss.src.n_bytes;
2555 
2556 	struct rss_runtime *r = p->rss_runtime[rss_obj_id];
2557 	uint8_t *src_ptr = t->structs[src_struct_id];
2558 	uint32_t result;
2559 
2560 	TRACE("[Thread %2u] rss %u\n",
2561 	      p->thread_id,
2562 	      rss_obj_id);
2563 
2564 	result = rss_func(r->key, r->key_size, &src_ptr[src_offset], n_src_bytes);
2565 	METADATA_WRITE(t, dst_offset, n_dst_bits, result);
2566 }
2567 
2568 /*
2569  * mov.
2570  */
2571 static inline void
2572 __instr_mov_exec(struct rte_swx_pipeline *p __rte_unused,
2573 		 struct thread *t,
2574 		 const struct instruction *ip)
2575 {
2576 	TRACE("[Thread %2u] mov\n", p->thread_id);
2577 
2578 	MOV(t, ip);
2579 }
2580 
2581 static inline void
2582 __instr_mov_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2583 		    struct thread *t,
2584 		    const struct instruction *ip)
2585 {
2586 	TRACE("[Thread %2u] mov (mh)\n", p->thread_id);
2587 
2588 	MOV_MH(t, ip);
2589 }
2590 
2591 static inline void
2592 __instr_mov_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2593 		    struct thread *t,
2594 		    const struct instruction *ip)
2595 {
2596 	TRACE("[Thread %2u] mov (hm)\n", p->thread_id);
2597 
2598 	MOV_HM(t, ip);
2599 }
2600 
2601 static inline void
2602 __instr_mov_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2603 		    struct thread *t,
2604 		    const struct instruction *ip)
2605 {
2606 	TRACE("[Thread %2u] mov (hh)\n", p->thread_id);
2607 
2608 	MOV_HH(t, ip);
2609 }
2610 
2611 static inline void
2612 __instr_mov_dma_exec(struct rte_swx_pipeline *p __rte_unused,
2613 		     struct thread *t,
2614 		     const struct instruction *ip)
2615 {
2616 	uint8_t *dst = t->structs[ip->mov.dst.struct_id] + ip->mov.dst.offset;
2617 	uint8_t *src = t->structs[ip->mov.src.struct_id] + ip->mov.src.offset;
2618 
2619 	uint32_t n_dst = ip->mov.dst.n_bits >> 3;
2620 	uint32_t n_src = ip->mov.src.n_bits >> 3;
2621 
2622 	TRACE("[Thread %2u] mov (dma) %u bytes\n", p->thread_id, n);
2623 
2624 	/* Both dst and src are in NBO format. */
2625 	if (n_dst > n_src) {
2626 		uint32_t n_dst_zero = n_dst - n_src;
2627 
2628 		/* Zero padding the most significant bytes in dst. */
2629 		memset(dst, 0, n_dst_zero);
2630 		dst += n_dst_zero;
2631 
2632 		/* Copy src to dst. */
2633 		memcpy(dst, src, n_src);
2634 	} else {
2635 		uint32_t n_src_skipped = n_src - n_dst;
2636 
2637 		/* Copy src to dst. */
2638 		src += n_src_skipped;
2639 		memcpy(dst, src, n_dst);
2640 	}
2641 }
2642 
2643 static inline void
2644 __instr_mov_128_exec(struct rte_swx_pipeline *p __rte_unused,
2645 		     struct thread *t,
2646 		     const struct instruction *ip)
2647 {
2648 	uint8_t *dst_struct = t->structs[ip->mov.dst.struct_id];
2649 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->mov.dst.offset];
2650 
2651 	uint8_t *src_struct = t->structs[ip->mov.src.struct_id];
2652 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->mov.src.offset];
2653 
2654 	TRACE("[Thread %2u] mov (128)\n", p->thread_id);
2655 
2656 	dst64_ptr[0] = src64_ptr[0];
2657 	dst64_ptr[1] = src64_ptr[1];
2658 }
2659 
2660 static inline void
2661 __instr_mov_128_32_exec(struct rte_swx_pipeline *p __rte_unused,
2662 			struct thread *t,
2663 			const struct instruction *ip)
2664 {
2665 	uint8_t *dst = t->structs[ip->mov.dst.struct_id] + ip->mov.dst.offset;
2666 	uint8_t *src = t->structs[ip->mov.src.struct_id] + ip->mov.src.offset;
2667 
2668 	uint32_t *dst32 = (uint32_t *)dst;
2669 	uint32_t *src32 = (uint32_t *)src;
2670 
2671 	TRACE("[Thread %2u] mov (128 <- 32)\n", p->thread_id);
2672 
2673 	dst32[0] = 0;
2674 	dst32[1] = 0;
2675 	dst32[2] = 0;
2676 	dst32[3] = src32[0];
2677 }
2678 
2679 static inline void
2680 __instr_mov_i_exec(struct rte_swx_pipeline *p __rte_unused,
2681 		   struct thread *t,
2682 		   const struct instruction *ip)
2683 {
2684 	TRACE("[Thread %2u] mov m.f %" PRIx64 "\n", p->thread_id, ip->mov.src_val);
2685 
2686 	MOV_I(t, ip);
2687 }
2688 
2689 /*
2690  * dma.
2691  */
2692 static inline void
2693 __instr_dma_ht_many_exec(struct rte_swx_pipeline *p __rte_unused,
2694 			 struct thread *t,
2695 			 const struct instruction *ip,
2696 			 uint32_t n_dma)
2697 {
2698 	uint8_t *action_data = t->structs[0];
2699 	uint64_t valid_headers = t->valid_headers;
2700 	uint32_t i;
2701 
2702 	for (i = 0; i < n_dma; i++) {
2703 		uint32_t header_id = ip->dma.dst.header_id[i];
2704 		uint32_t struct_id = ip->dma.dst.struct_id[i];
2705 		uint32_t offset = ip->dma.src.offset[i];
2706 		uint32_t n_bytes = ip->dma.n_bytes[i];
2707 
2708 		struct header_runtime *h = &t->headers[header_id];
2709 		uint8_t *h_ptr0 = h->ptr0;
2710 		uint8_t *h_ptr = t->structs[struct_id];
2711 
2712 		void *dst = MASK64_BIT_GET(valid_headers, header_id) ?
2713 			h_ptr : h_ptr0;
2714 		void *src = &action_data[offset];
2715 
2716 		TRACE("[Thread %2u] dma h.s t.f\n", p->thread_id);
2717 
2718 		/* Headers. */
2719 		memcpy(dst, src, n_bytes);
2720 		t->structs[struct_id] = dst;
2721 		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
2722 	}
2723 
2724 	t->valid_headers = valid_headers;
2725 }
2726 
2727 static inline void
2728 __instr_dma_ht_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2729 {
2730 	__instr_dma_ht_many_exec(p, t, ip, 1);
2731 }
2732 
2733 static inline void
2734 __instr_dma_ht2_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2735 {
2736 	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
2737 
2738 	__instr_dma_ht_many_exec(p, t, ip, 2);
2739 }
2740 
2741 static inline void
2742 __instr_dma_ht3_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2743 {
2744 	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
2745 
2746 	__instr_dma_ht_many_exec(p, t, ip, 3);
2747 }
2748 
2749 static inline void
2750 __instr_dma_ht4_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2751 {
2752 	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
2753 
2754 	__instr_dma_ht_many_exec(p, t, ip, 4);
2755 }
2756 
2757 static inline void
2758 __instr_dma_ht5_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2759 {
2760 	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
2761 
2762 	__instr_dma_ht_many_exec(p, t, ip, 5);
2763 }
2764 
2765 static inline void
2766 __instr_dma_ht6_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2767 {
2768 	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
2769 
2770 	__instr_dma_ht_many_exec(p, t, ip, 6);
2771 }
2772 
2773 static inline void
2774 __instr_dma_ht7_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2775 {
2776 	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
2777 
2778 	__instr_dma_ht_many_exec(p, t, ip, 7);
2779 }
2780 
2781 static inline void
2782 __instr_dma_ht8_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2783 {
2784 	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
2785 
2786 	__instr_dma_ht_many_exec(p, t, ip, 8);
2787 }
2788 
2789 /*
2790  * alu.
2791  */
2792 static inline void
2793 __instr_alu_add_exec(struct rte_swx_pipeline *p __rte_unused,
2794 		     struct thread *t,
2795 		     const struct instruction *ip)
2796 {
2797 	TRACE("[Thread %2u] add\n", p->thread_id);
2798 
2799 	ALU(t, ip, +);
2800 }
2801 
2802 static inline void
2803 __instr_alu_add_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2804 			struct thread *t,
2805 			const struct instruction *ip)
2806 {
2807 	TRACE("[Thread %2u] add (mh)\n", p->thread_id);
2808 
2809 	ALU_MH(t, ip, +);
2810 }
2811 
2812 static inline void
2813 __instr_alu_add_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2814 			struct thread *t,
2815 			const struct instruction *ip)
2816 {
2817 	TRACE("[Thread %2u] add (hm)\n", p->thread_id);
2818 
2819 	ALU_HM(t, ip, +);
2820 }
2821 
2822 static inline void
2823 __instr_alu_add_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2824 			struct thread *t,
2825 			const struct instruction *ip)
2826 {
2827 	TRACE("[Thread %2u] add (hh)\n", p->thread_id);
2828 
2829 	ALU_HH(t, ip, +);
2830 }
2831 
2832 static inline void
2833 __instr_alu_add_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2834 			struct thread *t,
2835 			const struct instruction *ip)
2836 {
2837 	TRACE("[Thread %2u] add (mi)\n", p->thread_id);
2838 
2839 	ALU_MI(t, ip, +);
2840 }
2841 
2842 static inline void
2843 __instr_alu_add_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2844 			struct thread *t,
2845 			const struct instruction *ip)
2846 {
2847 	TRACE("[Thread %2u] add (hi)\n", p->thread_id);
2848 
2849 	ALU_HI(t, ip, +);
2850 }
2851 
2852 static inline void
2853 __instr_alu_sub_exec(struct rte_swx_pipeline *p __rte_unused,
2854 		     struct thread *t,
2855 		     const struct instruction *ip)
2856 {
2857 	TRACE("[Thread %2u] sub\n", p->thread_id);
2858 
2859 	ALU(t, ip, -);
2860 }
2861 
2862 static inline void
2863 __instr_alu_sub_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2864 			struct thread *t,
2865 			const struct instruction *ip)
2866 {
2867 	TRACE("[Thread %2u] sub (mh)\n", p->thread_id);
2868 
2869 	ALU_MH(t, ip, -);
2870 }
2871 
2872 static inline void
2873 __instr_alu_sub_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2874 			struct thread *t,
2875 			const struct instruction *ip)
2876 {
2877 	TRACE("[Thread %2u] sub (hm)\n", p->thread_id);
2878 
2879 	ALU_HM(t, ip, -);
2880 }
2881 
2882 static inline void
2883 __instr_alu_sub_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2884 			struct thread *t,
2885 			const struct instruction *ip)
2886 {
2887 	TRACE("[Thread %2u] sub (hh)\n", p->thread_id);
2888 
2889 	ALU_HH(t, ip, -);
2890 }
2891 
2892 static inline void
2893 __instr_alu_sub_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2894 			struct thread *t,
2895 			const struct instruction *ip)
2896 {
2897 	TRACE("[Thread %2u] sub (mi)\n", p->thread_id);
2898 
2899 	ALU_MI(t, ip, -);
2900 }
2901 
2902 static inline void
2903 __instr_alu_sub_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2904 			struct thread *t,
2905 			const struct instruction *ip)
2906 {
2907 	TRACE("[Thread %2u] sub (hi)\n", p->thread_id);
2908 
2909 	ALU_HI(t, ip, -);
2910 }
2911 
2912 static inline void
2913 __instr_alu_shl_exec(struct rte_swx_pipeline *p __rte_unused,
2914 		     struct thread *t,
2915 		     const struct instruction *ip)
2916 {
2917 	TRACE("[Thread %2u] shl\n", p->thread_id);
2918 
2919 	ALU(t, ip, <<);
2920 }
2921 
2922 static inline void
2923 __instr_alu_shl_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2924 			struct thread *t,
2925 			const struct instruction *ip)
2926 {
2927 	TRACE("[Thread %2u] shl (mh)\n", p->thread_id);
2928 
2929 	ALU_MH(t, ip, <<);
2930 }
2931 
2932 static inline void
2933 __instr_alu_shl_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2934 			struct thread *t,
2935 			const struct instruction *ip)
2936 {
2937 	TRACE("[Thread %2u] shl (hm)\n", p->thread_id);
2938 
2939 	ALU_HM(t, ip, <<);
2940 }
2941 
2942 static inline void
2943 __instr_alu_shl_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2944 			struct thread *t,
2945 			const struct instruction *ip)
2946 {
2947 	TRACE("[Thread %2u] shl (hh)\n", p->thread_id);
2948 
2949 	ALU_HH(t, ip, <<);
2950 }
2951 
2952 static inline void
2953 __instr_alu_shl_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2954 			struct thread *t,
2955 			const struct instruction *ip)
2956 {
2957 	TRACE("[Thread %2u] shl (mi)\n", p->thread_id);
2958 
2959 	ALU_MI(t, ip, <<);
2960 }
2961 
2962 static inline void
2963 __instr_alu_shl_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2964 			struct thread *t,
2965 			const struct instruction *ip)
2966 {
2967 	TRACE("[Thread %2u] shl (hi)\n", p->thread_id);
2968 
2969 	ALU_HI(t, ip, <<);
2970 }
2971 
2972 static inline void
2973 __instr_alu_shr_exec(struct rte_swx_pipeline *p __rte_unused,
2974 		     struct thread *t,
2975 		     const struct instruction *ip)
2976 {
2977 	TRACE("[Thread %2u] shr\n", p->thread_id);
2978 
2979 	ALU(t, ip, >>);
2980 }
2981 
2982 static inline void
2983 __instr_alu_shr_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2984 			struct thread *t,
2985 			const struct instruction *ip)
2986 {
2987 	TRACE("[Thread %2u] shr (mh)\n", p->thread_id);
2988 
2989 	ALU_MH(t, ip, >>);
2990 }
2991 
2992 static inline void
2993 __instr_alu_shr_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2994 			struct thread *t,
2995 			const struct instruction *ip)
2996 {
2997 	TRACE("[Thread %2u] shr (hm)\n", p->thread_id);
2998 
2999 	ALU_HM(t, ip, >>);
3000 }
3001 
3002 static inline void
3003 __instr_alu_shr_hh_exec(struct rte_swx_pipeline *p __rte_unused,
3004 			struct thread *t,
3005 			const struct instruction *ip)
3006 {
3007 	TRACE("[Thread %2u] shr (hh)\n", p->thread_id);
3008 
3009 	ALU_HH(t, ip, >>);
3010 }
3011 
3012 static inline void
3013 __instr_alu_shr_mi_exec(struct rte_swx_pipeline *p __rte_unused,
3014 			struct thread *t,
3015 			const struct instruction *ip)
3016 {
3017 	TRACE("[Thread %2u] shr (mi)\n", p->thread_id);
3018 
3019 	/* Structs. */
3020 	ALU_MI(t, ip, >>);
3021 }
3022 
3023 static inline void
3024 __instr_alu_shr_hi_exec(struct rte_swx_pipeline *p __rte_unused,
3025 			struct thread *t,
3026 			const struct instruction *ip)
3027 {
3028 	TRACE("[Thread %2u] shr (hi)\n", p->thread_id);
3029 
3030 	ALU_HI(t, ip, >>);
3031 }
3032 
3033 static inline void
3034 __instr_alu_and_exec(struct rte_swx_pipeline *p __rte_unused,
3035 		     struct thread *t,
3036 		     const struct instruction *ip)
3037 {
3038 	TRACE("[Thread %2u] and\n", p->thread_id);
3039 
3040 	ALU(t, ip, &);
3041 }
3042 
3043 static inline void
3044 __instr_alu_and_mh_exec(struct rte_swx_pipeline *p __rte_unused,
3045 			struct thread *t,
3046 			const struct instruction *ip)
3047 {
3048 	TRACE("[Thread %2u] and (mh)\n", p->thread_id);
3049 
3050 	ALU_MH(t, ip, &);
3051 }
3052 
3053 static inline void
3054 __instr_alu_and_hm_exec(struct rte_swx_pipeline *p __rte_unused,
3055 			struct thread *t,
3056 			const struct instruction *ip)
3057 {
3058 	TRACE("[Thread %2u] and (hm)\n", p->thread_id);
3059 
3060 	ALU_HM_FAST(t, ip, &);
3061 }
3062 
3063 static inline void
3064 __instr_alu_and_hh_exec(struct rte_swx_pipeline *p __rte_unused,
3065 			struct thread *t,
3066 			const struct instruction *ip)
3067 {
3068 	TRACE("[Thread %2u] and (hh)\n", p->thread_id);
3069 
3070 	ALU_HH_FAST(t, ip, &);
3071 }
3072 
3073 static inline void
3074 __instr_alu_and_i_exec(struct rte_swx_pipeline *p __rte_unused,
3075 		       struct thread *t,
3076 		       const struct instruction *ip)
3077 {
3078 	TRACE("[Thread %2u] and (i)\n", p->thread_id);
3079 
3080 	ALU_I(t, ip, &);
3081 }
3082 
3083 static inline void
3084 __instr_alu_or_exec(struct rte_swx_pipeline *p __rte_unused,
3085 		    struct thread *t,
3086 		    const struct instruction *ip)
3087 {
3088 	TRACE("[Thread %2u] or\n", p->thread_id);
3089 
3090 	ALU(t, ip, |);
3091 }
3092 
3093 static inline void
3094 __instr_alu_or_mh_exec(struct rte_swx_pipeline *p __rte_unused,
3095 		       struct thread *t,
3096 		       const struct instruction *ip)
3097 {
3098 	TRACE("[Thread %2u] or (mh)\n", p->thread_id);
3099 
3100 	ALU_MH(t, ip, |);
3101 }
3102 
3103 static inline void
3104 __instr_alu_or_hm_exec(struct rte_swx_pipeline *p __rte_unused,
3105 		       struct thread *t,
3106 		       const struct instruction *ip)
3107 {
3108 	TRACE("[Thread %2u] or (hm)\n", p->thread_id);
3109 
3110 	ALU_HM_FAST(t, ip, |);
3111 }
3112 
3113 static inline void
3114 __instr_alu_or_hh_exec(struct rte_swx_pipeline *p __rte_unused,
3115 		       struct thread *t,
3116 		       const struct instruction *ip)
3117 {
3118 	TRACE("[Thread %2u] or (hh)\n", p->thread_id);
3119 
3120 	ALU_HH_FAST(t, ip, |);
3121 }
3122 
3123 static inline void
3124 __instr_alu_or_i_exec(struct rte_swx_pipeline *p __rte_unused,
3125 		      struct thread *t,
3126 		      const struct instruction *ip)
3127 {
3128 	TRACE("[Thread %2u] or (i)\n", p->thread_id);
3129 
3130 	ALU_I(t, ip, |);
3131 }
3132 
3133 static inline void
3134 __instr_alu_xor_exec(struct rte_swx_pipeline *p __rte_unused,
3135 		     struct thread *t,
3136 		     const struct instruction *ip)
3137 {
3138 	TRACE("[Thread %2u] xor\n", p->thread_id);
3139 
3140 	ALU(t, ip, ^);
3141 }
3142 
3143 static inline void
3144 __instr_alu_xor_mh_exec(struct rte_swx_pipeline *p __rte_unused,
3145 			struct thread *t,
3146 			const struct instruction *ip)
3147 {
3148 	TRACE("[Thread %2u] xor (mh)\n", p->thread_id);
3149 
3150 	ALU_MH(t, ip, ^);
3151 }
3152 
3153 static inline void
3154 __instr_alu_xor_hm_exec(struct rte_swx_pipeline *p __rte_unused,
3155 			struct thread *t,
3156 			const struct instruction *ip)
3157 {
3158 	TRACE("[Thread %2u] xor (hm)\n", p->thread_id);
3159 
3160 	ALU_HM_FAST(t, ip, ^);
3161 }
3162 
3163 static inline void
3164 __instr_alu_xor_hh_exec(struct rte_swx_pipeline *p __rte_unused,
3165 			struct thread *t,
3166 			const struct instruction *ip)
3167 {
3168 	TRACE("[Thread %2u] xor (hh)\n", p->thread_id);
3169 
3170 	ALU_HH_FAST(t, ip, ^);
3171 }
3172 
3173 static inline void
3174 __instr_alu_xor_i_exec(struct rte_swx_pipeline *p __rte_unused,
3175 		       struct thread *t,
3176 		       const struct instruction *ip)
3177 {
3178 	TRACE("[Thread %2u] xor (i)\n", p->thread_id);
3179 
3180 	ALU_I(t, ip, ^);
3181 }
3182 
3183 static inline void
3184 __instr_alu_ckadd_field_exec(struct rte_swx_pipeline *p __rte_unused,
3185 			     struct thread *t,
3186 			     const struct instruction *ip)
3187 {
3188 	uint8_t *dst_struct, *src_struct;
3189 	uint16_t *dst16_ptr, dst;
3190 	uint64_t *src64_ptr, src64, src64_mask, src;
3191 	uint64_t r;
3192 
3193 	TRACE("[Thread %2u] ckadd (field)\n", p->thread_id);
3194 
3195 	/* Structs. */
3196 	dst_struct = t->structs[ip->alu.dst.struct_id];
3197 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
3198 	dst = *dst16_ptr;
3199 
3200 	src_struct = t->structs[ip->alu.src.struct_id];
3201 	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
3202 	src64 = *src64_ptr;
3203 	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
3204 	src = src64 & src64_mask;
3205 
3206 	/* Initialize the result with destination 1's complement. */
3207 	r = dst;
3208 	r = ~r & 0xFFFF;
3209 
3210 	/* The first input (r) is a 16-bit number. The second and the third
3211 	 * inputs are 32-bit numbers. In the worst case scenario, the sum of the
3212 	 * three numbers (output r) is a 34-bit number.
3213 	 */
3214 	r += (src >> 32) + (src & 0xFFFFFFFF);
3215 
3216 	/* The first input is a 16-bit number. The second input is an 18-bit
3217 	 * number. In the worst case scenario, the sum of the two numbers is a
3218 	 * 19-bit number.
3219 	 */
3220 	r = (r & 0xFFFF) + (r >> 16);
3221 
3222 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
3223 	 * a 3-bit number (0 .. 7). Their sum is a 17-bit number (0 .. 0x10006).
3224 	 */
3225 	r = (r & 0xFFFF) + (r >> 16);
3226 
3227 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
3228 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
3229 	 * 0x10006), the output r is (0 .. 7). So no carry bit can be generated,
3230 	 * therefore the output r is always a 16-bit number.
3231 	 */
3232 	r = (r & 0xFFFF) + (r >> 16);
3233 
3234 	/* Apply 1's complement to the result. */
3235 	r = ~r & 0xFFFF;
3236 	r = r ? r : 0xFFFF;
3237 
3238 	*dst16_ptr = (uint16_t)r;
3239 }
3240 
3241 static inline void
3242 __instr_alu_cksub_field_exec(struct rte_swx_pipeline *p __rte_unused,
3243 			     struct thread *t,
3244 			     const struct instruction *ip)
3245 {
3246 	uint8_t *dst_struct, *src_struct;
3247 	uint16_t *dst16_ptr, dst;
3248 	uint64_t *src64_ptr, src64, src64_mask, src;
3249 	uint64_t r;
3250 
3251 	TRACE("[Thread %2u] cksub (field)\n", p->thread_id);
3252 
3253 	/* Structs. */
3254 	dst_struct = t->structs[ip->alu.dst.struct_id];
3255 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
3256 	dst = *dst16_ptr;
3257 
3258 	src_struct = t->structs[ip->alu.src.struct_id];
3259 	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
3260 	src64 = *src64_ptr;
3261 	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
3262 	src = src64 & src64_mask;
3263 
3264 	/* Initialize the result with destination 1's complement. */
3265 	r = dst;
3266 	r = ~r & 0xFFFF;
3267 
3268 	/* Subtraction in 1's complement arithmetic (i.e. a '- b) is the same as
3269 	 * the following sequence of operations in 2's complement arithmetic:
3270 	 *    a '- b = (a - b) % 0xFFFF.
3271 	 *
3272 	 * In order to prevent an underflow for the below subtraction, in which
3273 	 * a 33-bit number (the subtrahend) is taken out of a 16-bit number (the
3274 	 * minuend), we first add a multiple of the 0xFFFF modulus to the
3275 	 * minuend. The number we add to the minuend needs to be a 34-bit number
3276 	 * or higher, so for readability reasons we picked the 36-bit multiple.
3277 	 * We are effectively turning the 16-bit minuend into a 36-bit number:
3278 	 *    (a - b) % 0xFFFF = (a + 0xFFFF00000 - b) % 0xFFFF.
3279 	 */
3280 	r += 0xFFFF00000ULL; /* The output r is a 36-bit number. */
3281 
3282 	/* A 33-bit number is subtracted from a 36-bit number (the input r). The
3283 	 * result (the output r) is a 36-bit number.
3284 	 */
3285 	r -= (src >> 32) + (src & 0xFFFFFFFF);
3286 
3287 	/* The first input is a 16-bit number. The second input is a 20-bit
3288 	 * number. Their sum is a 21-bit number.
3289 	 */
3290 	r = (r & 0xFFFF) + (r >> 16);
3291 
3292 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
3293 	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1001E).
3294 	 */
3295 	r = (r & 0xFFFF) + (r >> 16);
3296 
3297 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
3298 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
3299 	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
3300 	 * generated, therefore the output r is always a 16-bit number.
3301 	 */
3302 	r = (r & 0xFFFF) + (r >> 16);
3303 
3304 	/* Apply 1's complement to the result. */
3305 	r = ~r & 0xFFFF;
3306 	r = r ? r : 0xFFFF;
3307 
3308 	*dst16_ptr = (uint16_t)r;
3309 }
3310 
3311 static inline void
3312 __instr_alu_ckadd_struct20_exec(struct rte_swx_pipeline *p __rte_unused,
3313 				struct thread *t,
3314 				const struct instruction *ip)
3315 {
3316 	uint8_t *dst_struct, *src_struct;
3317 	uint16_t *dst16_ptr, dst;
3318 	uint32_t *src32_ptr;
3319 	uint64_t r0, r1;
3320 
3321 	TRACE("[Thread %2u] ckadd (struct of 20 bytes)\n", p->thread_id);
3322 
3323 	/* Structs. */
3324 	dst_struct = t->structs[ip->alu.dst.struct_id];
3325 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
3326 	dst = *dst16_ptr;
3327 
3328 	src_struct = t->structs[ip->alu.src.struct_id];
3329 	src32_ptr = (uint32_t *)&src_struct[0];
3330 
3331 	/* Initialize the result with destination 1's complement. */
3332 	r0 = dst;
3333 	r0 = ~r0 & 0xFFFF;
3334 
3335 	r0 += src32_ptr[0]; /* The output r0 is a 33-bit number. */
3336 	r1 = src32_ptr[1]; /* r1 is a 32-bit number. */
3337 	r0 += src32_ptr[2]; /* The output r0 is a 34-bit number. */
3338 	r1 += src32_ptr[3]; /* The output r1 is a 33-bit number. */
3339 	r0 += r1 + src32_ptr[4]; /* The output r0 is a 35-bit number. */
3340 
3341 	/* The first input is a 16-bit number. The second input is a 19-bit
3342 	 * number. Their sum is a 20-bit number.
3343 	 */
3344 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
3345 
3346 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
3347 	 * a 4-bit number (0 .. 15). The sum is a 17-bit number (0 .. 0x1000E).
3348 	 */
3349 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
3350 
3351 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
3352 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
3353 	 * 0x1000E), the output r is (0 .. 15). So no carry bit can be
3354 	 * generated, therefore the output r is always a 16-bit number.
3355 	 */
3356 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
3357 
3358 	/* Apply 1's complement to the result. */
3359 	r0 = ~r0 & 0xFFFF;
3360 	r0 = r0 ? r0 : 0xFFFF;
3361 
3362 	*dst16_ptr = (uint16_t)r0;
3363 }
3364 
3365 static inline void
3366 __instr_alu_ckadd_struct_exec(struct rte_swx_pipeline *p __rte_unused,
3367 			      struct thread *t,
3368 			      const struct instruction *ip)
3369 {
3370 	uint32_t src_header_id = ip->alu.src.n_bits; /* The src header ID is stored here. */
3371 	uint32_t n_src_header_bytes = t->headers[src_header_id].n_bytes;
3372 	uint8_t *dst_struct, *src_struct;
3373 	uint16_t *dst16_ptr, dst;
3374 	uint32_t *src32_ptr;
3375 	uint64_t r;
3376 	uint32_t i;
3377 
3378 	if (n_src_header_bytes == 20) {
3379 		__instr_alu_ckadd_struct20_exec(p, t, ip);
3380 		return;
3381 	}
3382 
3383 	TRACE("[Thread %2u] ckadd (struct)\n", p->thread_id);
3384 
3385 	/* Structs. */
3386 	dst_struct = t->structs[ip->alu.dst.struct_id];
3387 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
3388 	dst = *dst16_ptr;
3389 
3390 	src_struct = t->structs[ip->alu.src.struct_id];
3391 	src32_ptr = (uint32_t *)&src_struct[0];
3392 
3393 	/* Initialize the result with destination 1's complement. */
3394 	r = dst;
3395 	r = ~r & 0xFFFF;
3396 
3397 	/* The max number of 32-bit words in a 32K-byte header is 2^13.
3398 	 * Therefore, in the worst case scenario, a 45-bit number is added to a
3399 	 * 16-bit number (the input r), so the output r is 46-bit number.
3400 	 */
3401 	for (i = 0; i < n_src_header_bytes / 4; i++, src32_ptr++)
3402 		r += *src32_ptr;
3403 
3404 	/* The first input is a 16-bit number. The second input is a 30-bit
3405 	 * number. Their sum is a 31-bit number.
3406 	 */
3407 	r = (r & 0xFFFF) + (r >> 16);
3408 
3409 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
3410 	 * a 15-bit number (0 .. 0x7FFF). The sum is a 17-bit number (0 .. 0x17FFE).
3411 	 */
3412 	r = (r & 0xFFFF) + (r >> 16);
3413 
3414 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
3415 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
3416 	 * 0x17FFE), the output r is (0 .. 0x7FFF). So no carry bit can be
3417 	 * generated, therefore the output r is always a 16-bit number.
3418 	 */
3419 	r = (r & 0xFFFF) + (r >> 16);
3420 
3421 	/* Apply 1's complement to the result. */
3422 	r = ~r & 0xFFFF;
3423 	r = r ? r : 0xFFFF;
3424 
3425 	*dst16_ptr = (uint16_t)r;
3426 }
3427 
3428 /*
3429  * Register array.
3430  */
3431 static inline uint64_t *
3432 instr_regarray_regarray(struct rte_swx_pipeline *p, const struct instruction *ip)
3433 {
3434 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
3435 	return r->regarray;
3436 }
3437 
3438 static inline uint64_t
3439 instr_regarray_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3440 {
3441 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
3442 
3443 	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
3444 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
3445 	uint64_t idx64 = *idx64_ptr;
3446 	uint64_t idx64_mask = UINT64_MAX >> (64 - ip->regarray.idx.n_bits);
3447 	uint64_t idx = idx64 & idx64_mask & r->size_mask;
3448 
3449 	return idx;
3450 }
3451 
3452 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3453 
3454 static inline uint64_t
3455 instr_regarray_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3456 {
3457 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
3458 
3459 	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
3460 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
3461 	uint64_t idx64 = *idx64_ptr;
3462 	uint64_t idx = (ntoh64(idx64) >> (64 - ip->regarray.idx.n_bits)) & r->size_mask;
3463 
3464 	return idx;
3465 }
3466 
3467 #else
3468 
3469 #define instr_regarray_idx_nbo instr_regarray_idx_hbo
3470 
3471 #endif
3472 
3473 static inline uint64_t
3474 instr_regarray_idx_imm(struct rte_swx_pipeline *p, const struct instruction *ip)
3475 {
3476 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
3477 
3478 	uint64_t idx = ip->regarray.idx_val & r->size_mask;
3479 
3480 	return idx;
3481 }
3482 
3483 static inline uint64_t
3484 instr_regarray_src_hbo(struct thread *t, const struct instruction *ip)
3485 {
3486 	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
3487 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
3488 	uint64_t src64 = *src64_ptr;
3489 	uint64_t src64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
3490 	uint64_t src = src64 & src64_mask;
3491 
3492 	return src;
3493 }
3494 
3495 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3496 
3497 static inline uint64_t
3498 instr_regarray_src_nbo(struct thread *t, const struct instruction *ip)
3499 {
3500 	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
3501 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
3502 	uint64_t src64 = *src64_ptr;
3503 	uint64_t src = ntoh64(src64) >> (64 - ip->regarray.dstsrc.n_bits);
3504 
3505 	return src;
3506 }
3507 
3508 #else
3509 
3510 #define instr_regarray_src_nbo instr_regarray_src_hbo
3511 
3512 #endif
3513 
3514 static inline void
3515 instr_regarray_dst_hbo_src_hbo_set(struct thread *t, const struct instruction *ip, uint64_t src)
3516 {
3517 	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
3518 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
3519 	uint64_t dst64 = *dst64_ptr;
3520 	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
3521 
3522 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
3523 
3524 }
3525 
3526 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3527 
3528 static inline void
3529 instr_regarray_dst_nbo_src_hbo_set(struct thread *t, const struct instruction *ip, uint64_t src)
3530 {
3531 	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
3532 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
3533 	uint64_t dst64 = *dst64_ptr;
3534 	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
3535 
3536 	src = hton64(src) >> (64 - ip->regarray.dstsrc.n_bits);
3537 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
3538 }
3539 
3540 #else
3541 
3542 #define instr_regarray_dst_nbo_src_hbo_set instr_regarray_dst_hbo_src_hbo_set
3543 
3544 #endif
3545 
3546 static inline void
3547 __instr_regprefetch_rh_exec(struct rte_swx_pipeline *p,
3548 			    struct thread *t,
3549 			    const struct instruction *ip)
3550 {
3551 	uint64_t *regarray, idx;
3552 
3553 	TRACE("[Thread %2u] regprefetch (r[h])\n", p->thread_id);
3554 
3555 	regarray = instr_regarray_regarray(p, ip);
3556 	idx = instr_regarray_idx_nbo(p, t, ip);
3557 	rte_prefetch0(&regarray[idx]);
3558 }
3559 
3560 static inline void
3561 __instr_regprefetch_rm_exec(struct rte_swx_pipeline *p,
3562 			    struct thread *t,
3563 			    const struct instruction *ip)
3564 {
3565 	uint64_t *regarray, idx;
3566 
3567 	TRACE("[Thread %2u] regprefetch (r[m])\n", p->thread_id);
3568 
3569 	regarray = instr_regarray_regarray(p, ip);
3570 	idx = instr_regarray_idx_hbo(p, t, ip);
3571 	rte_prefetch0(&regarray[idx]);
3572 }
3573 
3574 static inline void
3575 __instr_regprefetch_ri_exec(struct rte_swx_pipeline *p,
3576 			    struct thread *t __rte_unused,
3577 			    const struct instruction *ip)
3578 {
3579 	uint64_t *regarray, idx;
3580 
3581 	TRACE("[Thread %2u] regprefetch (r[i])\n", p->thread_id);
3582 
3583 	regarray = instr_regarray_regarray(p, ip);
3584 	idx = instr_regarray_idx_imm(p, ip);
3585 	rte_prefetch0(&regarray[idx]);
3586 }
3587 
3588 static inline void
3589 __instr_regrd_hrh_exec(struct rte_swx_pipeline *p,
3590 		       struct thread *t,
3591 		       const struct instruction *ip)
3592 {
3593 	uint64_t *regarray, idx;
3594 
3595 	TRACE("[Thread %2u] regrd (h = r[h])\n", p->thread_id);
3596 
3597 	regarray = instr_regarray_regarray(p, ip);
3598 	idx = instr_regarray_idx_nbo(p, t, ip);
3599 	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
3600 }
3601 
3602 static inline void
3603 __instr_regrd_hrm_exec(struct rte_swx_pipeline *p,
3604 		       struct thread *t,
3605 		       const struct instruction *ip)
3606 {
3607 	uint64_t *regarray, idx;
3608 
3609 	TRACE("[Thread %2u] regrd (h = r[m])\n", p->thread_id);
3610 
3611 	/* Structs. */
3612 	regarray = instr_regarray_regarray(p, ip);
3613 	idx = instr_regarray_idx_hbo(p, t, ip);
3614 	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
3615 }
3616 
3617 static inline void
3618 __instr_regrd_mrh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3619 {
3620 	uint64_t *regarray, idx;
3621 
3622 	TRACE("[Thread %2u] regrd (m = r[h])\n", p->thread_id);
3623 
3624 	regarray = instr_regarray_regarray(p, ip);
3625 	idx = instr_regarray_idx_nbo(p, t, ip);
3626 	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
3627 }
3628 
3629 static inline void
3630 __instr_regrd_mrm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3631 {
3632 	uint64_t *regarray, idx;
3633 
3634 	TRACE("[Thread %2u] regrd (m = r[m])\n", p->thread_id);
3635 
3636 	regarray = instr_regarray_regarray(p, ip);
3637 	idx = instr_regarray_idx_hbo(p, t, ip);
3638 	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
3639 }
3640 
3641 static inline void
3642 __instr_regrd_hri_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3643 {
3644 	uint64_t *regarray, idx;
3645 
3646 	TRACE("[Thread %2u] regrd (h = r[i])\n", p->thread_id);
3647 
3648 	regarray = instr_regarray_regarray(p, ip);
3649 	idx = instr_regarray_idx_imm(p, ip);
3650 	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
3651 }
3652 
3653 static inline void
3654 __instr_regrd_mri_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3655 {
3656 	uint64_t *regarray, idx;
3657 
3658 	TRACE("[Thread %2u] regrd (m = r[i])\n", p->thread_id);
3659 
3660 	regarray = instr_regarray_regarray(p, ip);
3661 	idx = instr_regarray_idx_imm(p, ip);
3662 	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
3663 }
3664 
3665 static inline void
3666 __instr_regwr_rhh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3667 {
3668 	uint64_t *regarray, idx, src;
3669 
3670 	TRACE("[Thread %2u] regwr (r[h] = h)\n", p->thread_id);
3671 
3672 	regarray = instr_regarray_regarray(p, ip);
3673 	idx = instr_regarray_idx_nbo(p, t, ip);
3674 	src = instr_regarray_src_nbo(t, ip);
3675 	regarray[idx] = src;
3676 }
3677 
3678 static inline void
3679 __instr_regwr_rhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3680 {
3681 	uint64_t *regarray, idx, src;
3682 
3683 	TRACE("[Thread %2u] regwr (r[h] = m)\n", p->thread_id);
3684 
3685 	regarray = instr_regarray_regarray(p, ip);
3686 	idx = instr_regarray_idx_nbo(p, t, ip);
3687 	src = instr_regarray_src_hbo(t, ip);
3688 	regarray[idx] = src;
3689 }
3690 
3691 static inline void
3692 __instr_regwr_rmh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3693 {
3694 	uint64_t *regarray, idx, src;
3695 
3696 	TRACE("[Thread %2u] regwr (r[m] = h)\n", p->thread_id);
3697 
3698 	regarray = instr_regarray_regarray(p, ip);
3699 	idx = instr_regarray_idx_hbo(p, t, ip);
3700 	src = instr_regarray_src_nbo(t, ip);
3701 	regarray[idx] = src;
3702 }
3703 
3704 static inline void
3705 __instr_regwr_rmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3706 {
3707 	uint64_t *regarray, idx, src;
3708 
3709 	TRACE("[Thread %2u] regwr (r[m] = m)\n", p->thread_id);
3710 
3711 	regarray = instr_regarray_regarray(p, ip);
3712 	idx = instr_regarray_idx_hbo(p, t, ip);
3713 	src = instr_regarray_src_hbo(t, ip);
3714 	regarray[idx] = src;
3715 }
3716 
3717 static inline void
3718 __instr_regwr_rhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3719 {
3720 	uint64_t *regarray, idx, src;
3721 
3722 	TRACE("[Thread %2u] regwr (r[h] = i)\n", p->thread_id);
3723 
3724 	regarray = instr_regarray_regarray(p, ip);
3725 	idx = instr_regarray_idx_nbo(p, t, ip);
3726 	src = ip->regarray.dstsrc_val;
3727 	regarray[idx] = src;
3728 }
3729 
3730 static inline void
3731 __instr_regwr_rmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3732 {
3733 	uint64_t *regarray, idx, src;
3734 
3735 	TRACE("[Thread %2u] regwr (r[m] = i)\n", p->thread_id);
3736 
3737 	regarray = instr_regarray_regarray(p, ip);
3738 	idx = instr_regarray_idx_hbo(p, t, ip);
3739 	src = ip->regarray.dstsrc_val;
3740 	regarray[idx] = src;
3741 }
3742 
3743 static inline void
3744 __instr_regwr_rih_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3745 {
3746 	uint64_t *regarray, idx, src;
3747 
3748 	TRACE("[Thread %2u] regwr (r[i] = h)\n", p->thread_id);
3749 
3750 	regarray = instr_regarray_regarray(p, ip);
3751 	idx = instr_regarray_idx_imm(p, ip);
3752 	src = instr_regarray_src_nbo(t, ip);
3753 	regarray[idx] = src;
3754 }
3755 
3756 static inline void
3757 __instr_regwr_rim_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3758 {
3759 	uint64_t *regarray, idx, src;
3760 
3761 	TRACE("[Thread %2u] regwr (r[i] = m)\n", p->thread_id);
3762 
3763 	regarray = instr_regarray_regarray(p, ip);
3764 	idx = instr_regarray_idx_imm(p, ip);
3765 	src = instr_regarray_src_hbo(t, ip);
3766 	regarray[idx] = src;
3767 }
3768 
3769 static inline void
3770 __instr_regwr_rii_exec(struct rte_swx_pipeline *p,
3771 		       struct thread *t __rte_unused,
3772 		       const struct instruction *ip)
3773 {
3774 	uint64_t *regarray, idx, src;
3775 
3776 	TRACE("[Thread %2u] regwr (r[i] = i)\n", p->thread_id);
3777 
3778 	regarray = instr_regarray_regarray(p, ip);
3779 	idx = instr_regarray_idx_imm(p, ip);
3780 	src = ip->regarray.dstsrc_val;
3781 	regarray[idx] = src;
3782 }
3783 
3784 static inline void
3785 __instr_regadd_rhh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3786 {
3787 	uint64_t *regarray, idx, src;
3788 
3789 	TRACE("[Thread %2u] regadd (r[h] += h)\n", p->thread_id);
3790 
3791 	regarray = instr_regarray_regarray(p, ip);
3792 	idx = instr_regarray_idx_nbo(p, t, ip);
3793 	src = instr_regarray_src_nbo(t, ip);
3794 	regarray[idx] += src;
3795 }
3796 
3797 static inline void
3798 __instr_regadd_rhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3799 {
3800 	uint64_t *regarray, idx, src;
3801 
3802 	TRACE("[Thread %2u] regadd (r[h] += m)\n", p->thread_id);
3803 
3804 	regarray = instr_regarray_regarray(p, ip);
3805 	idx = instr_regarray_idx_nbo(p, t, ip);
3806 	src = instr_regarray_src_hbo(t, ip);
3807 	regarray[idx] += src;
3808 }
3809 
3810 static inline void
3811 __instr_regadd_rmh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3812 {
3813 	uint64_t *regarray, idx, src;
3814 
3815 	TRACE("[Thread %2u] regadd (r[m] += h)\n", p->thread_id);
3816 
3817 	regarray = instr_regarray_regarray(p, ip);
3818 	idx = instr_regarray_idx_hbo(p, t, ip);
3819 	src = instr_regarray_src_nbo(t, ip);
3820 	regarray[idx] += src;
3821 }
3822 
3823 static inline void
3824 __instr_regadd_rmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3825 {
3826 	uint64_t *regarray, idx, src;
3827 
3828 	TRACE("[Thread %2u] regadd (r[m] += m)\n", p->thread_id);
3829 
3830 	regarray = instr_regarray_regarray(p, ip);
3831 	idx = instr_regarray_idx_hbo(p, t, ip);
3832 	src = instr_regarray_src_hbo(t, ip);
3833 	regarray[idx] += src;
3834 }
3835 
3836 static inline void
3837 __instr_regadd_rhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3838 {
3839 	uint64_t *regarray, idx, src;
3840 
3841 	TRACE("[Thread %2u] regadd (r[h] += i)\n", p->thread_id);
3842 
3843 	regarray = instr_regarray_regarray(p, ip);
3844 	idx = instr_regarray_idx_nbo(p, t, ip);
3845 	src = ip->regarray.dstsrc_val;
3846 	regarray[idx] += src;
3847 }
3848 
3849 static inline void
3850 __instr_regadd_rmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3851 {
3852 	uint64_t *regarray, idx, src;
3853 
3854 	TRACE("[Thread %2u] regadd (r[m] += i)\n", p->thread_id);
3855 
3856 	regarray = instr_regarray_regarray(p, ip);
3857 	idx = instr_regarray_idx_hbo(p, t, ip);
3858 	src = ip->regarray.dstsrc_val;
3859 	regarray[idx] += src;
3860 }
3861 
3862 static inline void
3863 __instr_regadd_rih_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3864 {
3865 	uint64_t *regarray, idx, src;
3866 
3867 	TRACE("[Thread %2u] regadd (r[i] += h)\n", p->thread_id);
3868 
3869 	regarray = instr_regarray_regarray(p, ip);
3870 	idx = instr_regarray_idx_imm(p, ip);
3871 	src = instr_regarray_src_nbo(t, ip);
3872 	regarray[idx] += src;
3873 }
3874 
3875 static inline void
3876 __instr_regadd_rim_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3877 {
3878 	uint64_t *regarray, idx, src;
3879 
3880 	TRACE("[Thread %2u] regadd (r[i] += m)\n", p->thread_id);
3881 
3882 	regarray = instr_regarray_regarray(p, ip);
3883 	idx = instr_regarray_idx_imm(p, ip);
3884 	src = instr_regarray_src_hbo(t, ip);
3885 	regarray[idx] += src;
3886 }
3887 
3888 static inline void
3889 __instr_regadd_rii_exec(struct rte_swx_pipeline *p,
3890 			struct thread *t __rte_unused,
3891 			const struct instruction *ip)
3892 {
3893 	uint64_t *regarray, idx, src;
3894 
3895 	TRACE("[Thread %2u] regadd (r[i] += i)\n", p->thread_id);
3896 
3897 	regarray = instr_regarray_regarray(p, ip);
3898 	idx = instr_regarray_idx_imm(p, ip);
3899 	src = ip->regarray.dstsrc_val;
3900 	regarray[idx] += src;
3901 }
3902 
3903 /*
3904  * metarray.
3905  */
3906 static inline struct meter *
3907 instr_meter_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3908 {
3909 	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
3910 
3911 	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
3912 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
3913 	uint64_t idx64 = *idx64_ptr;
3914 	uint64_t idx64_mask = UINT64_MAX >> (64 - (ip)->meter.idx.n_bits);
3915 	uint64_t idx = idx64 & idx64_mask & r->size_mask;
3916 
3917 	return &r->metarray[idx];
3918 }
3919 
3920 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3921 
3922 static inline struct meter *
3923 instr_meter_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3924 {
3925 	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
3926 
3927 	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
3928 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
3929 	uint64_t idx64 = *idx64_ptr;
3930 	uint64_t idx = (ntoh64(idx64) >> (64 - ip->meter.idx.n_bits)) & r->size_mask;
3931 
3932 	return &r->metarray[idx];
3933 }
3934 
3935 #else
3936 
3937 #define instr_meter_idx_nbo instr_meter_idx_hbo
3938 
3939 #endif
3940 
3941 static inline struct meter *
3942 instr_meter_idx_imm(struct rte_swx_pipeline *p, const struct instruction *ip)
3943 {
3944 	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
3945 
3946 	uint64_t idx =  ip->meter.idx_val & r->size_mask;
3947 
3948 	return &r->metarray[idx];
3949 }
3950 
3951 static inline uint32_t
3952 instr_meter_length_hbo(struct thread *t, const struct instruction *ip)
3953 {
3954 	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
3955 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
3956 	uint64_t src64 = *src64_ptr;
3957 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->meter.length.n_bits);
3958 	uint64_t src = src64 & src64_mask;
3959 
3960 	return (uint32_t)src;
3961 }
3962 
3963 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3964 
3965 static inline uint32_t
3966 instr_meter_length_nbo(struct thread *t, const struct instruction *ip)
3967 {
3968 	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
3969 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
3970 	uint64_t src64 = *src64_ptr;
3971 	uint64_t src = ntoh64(src64) >> (64 - ip->meter.length.n_bits);
3972 
3973 	return (uint32_t)src;
3974 }
3975 
3976 #else
3977 
3978 #define instr_meter_length_nbo instr_meter_length_hbo
3979 
3980 #endif
3981 
3982 static inline enum rte_color
3983 instr_meter_color_in_hbo(struct thread *t, const struct instruction *ip)
3984 {
3985 	uint8_t *src_struct = t->structs[ip->meter.color_in.struct_id];
3986 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.color_in.offset];
3987 	uint64_t src64 = *src64_ptr;
3988 	uint64_t src64_mask = UINT64_MAX >> (64 - ip->meter.color_in.n_bits);
3989 	uint64_t src = src64 & src64_mask;
3990 
3991 	return (enum rte_color)src;
3992 }
3993 
3994 static inline void
3995 instr_meter_color_out_hbo_set(struct thread *t,
3996 			      const struct instruction *ip,
3997 			      enum rte_color color_out)
3998 {
3999 	uint8_t *dst_struct = t->structs[ip->meter.color_out.struct_id];
4000 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->meter.color_out.offset];
4001 	uint64_t dst64 = *dst64_ptr;
4002 	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->meter.color_out.n_bits);
4003 
4004 	uint64_t src = (uint64_t)color_out;
4005 
4006 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
4007 }
4008 
4009 static inline void
4010 __instr_metprefetch_h_exec(struct rte_swx_pipeline *p,
4011 			   struct thread *t,
4012 			   const struct instruction *ip)
4013 {
4014 	struct meter *m;
4015 
4016 	TRACE("[Thread %2u] metprefetch (h)\n", p->thread_id);
4017 
4018 	m = instr_meter_idx_nbo(p, t, ip);
4019 	rte_prefetch0(m);
4020 }
4021 
4022 static inline void
4023 __instr_metprefetch_m_exec(struct rte_swx_pipeline *p,
4024 			   struct thread *t,
4025 			   const struct instruction *ip)
4026 {
4027 	struct meter *m;
4028 
4029 	TRACE("[Thread %2u] metprefetch (m)\n", p->thread_id);
4030 
4031 	m = instr_meter_idx_hbo(p, t, ip);
4032 	rte_prefetch0(m);
4033 }
4034 
4035 static inline void
4036 __instr_metprefetch_i_exec(struct rte_swx_pipeline *p,
4037 			   struct thread *t __rte_unused,
4038 			   const struct instruction *ip)
4039 {
4040 	struct meter *m;
4041 
4042 	TRACE("[Thread %2u] metprefetch (i)\n", p->thread_id);
4043 
4044 	m = instr_meter_idx_imm(p, ip);
4045 	rte_prefetch0(m);
4046 }
4047 
4048 static inline void
4049 __instr_meter_hhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4050 {
4051 	struct meter *m;
4052 	uint64_t time, n_pkts, n_bytes;
4053 	uint32_t length;
4054 	enum rte_color color_in, color_out;
4055 
4056 	TRACE("[Thread %2u] meter (hhm)\n", p->thread_id);
4057 
4058 	m = instr_meter_idx_nbo(p, t, ip);
4059 	rte_prefetch0(m->n_pkts);
4060 	time = rte_get_tsc_cycles();
4061 	length = instr_meter_length_nbo(t, ip);
4062 	color_in = instr_meter_color_in_hbo(t, ip);
4063 
4064 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4065 		&m->profile->profile,
4066 		time,
4067 		length,
4068 		color_in);
4069 
4070 	color_out &= m->color_mask;
4071 
4072 	n_pkts = m->n_pkts[color_out];
4073 	n_bytes = m->n_bytes[color_out];
4074 
4075 	instr_meter_color_out_hbo_set(t, ip, color_out);
4076 
4077 	m->n_pkts[color_out] = n_pkts + 1;
4078 	m->n_bytes[color_out] = n_bytes + length;
4079 }
4080 
4081 static inline void
4082 __instr_meter_hhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4083 {
4084 	struct meter *m;
4085 	uint64_t time, n_pkts, n_bytes;
4086 	uint32_t length;
4087 	enum rte_color color_in, color_out;
4088 
4089 	TRACE("[Thread %2u] meter (hhi)\n", p->thread_id);
4090 
4091 	m = instr_meter_idx_nbo(p, t, ip);
4092 	rte_prefetch0(m->n_pkts);
4093 	time = rte_get_tsc_cycles();
4094 	length = instr_meter_length_nbo(t, ip);
4095 	color_in = (enum rte_color)ip->meter.color_in_val;
4096 
4097 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4098 		&m->profile->profile,
4099 		time,
4100 		length,
4101 		color_in);
4102 
4103 	color_out &= m->color_mask;
4104 
4105 	n_pkts = m->n_pkts[color_out];
4106 	n_bytes = m->n_bytes[color_out];
4107 
4108 	instr_meter_color_out_hbo_set(t, ip, color_out);
4109 
4110 	m->n_pkts[color_out] = n_pkts + 1;
4111 	m->n_bytes[color_out] = n_bytes + length;
4112 }
4113 
4114 static inline void
4115 __instr_meter_hmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4116 {
4117 	struct meter *m;
4118 	uint64_t time, n_pkts, n_bytes;
4119 	uint32_t length;
4120 	enum rte_color color_in, color_out;
4121 
4122 	TRACE("[Thread %2u] meter (hmm)\n", p->thread_id);
4123 
4124 	m = instr_meter_idx_nbo(p, t, ip);
4125 	rte_prefetch0(m->n_pkts);
4126 	time = rte_get_tsc_cycles();
4127 	length = instr_meter_length_hbo(t, ip);
4128 	color_in = instr_meter_color_in_hbo(t, ip);
4129 
4130 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4131 		&m->profile->profile,
4132 		time,
4133 		length,
4134 		color_in);
4135 
4136 	color_out &= m->color_mask;
4137 
4138 	n_pkts = m->n_pkts[color_out];
4139 	n_bytes = m->n_bytes[color_out];
4140 
4141 	instr_meter_color_out_hbo_set(t, ip, color_out);
4142 
4143 	m->n_pkts[color_out] = n_pkts + 1;
4144 	m->n_bytes[color_out] = n_bytes + length;
4145 }
4146 
4147 static inline void
4148 __instr_meter_hmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4149 {
4150 	struct meter *m;
4151 	uint64_t time, n_pkts, n_bytes;
4152 	uint32_t length;
4153 	enum rte_color color_in, color_out;
4154 
4155 	TRACE("[Thread %2u] meter (hmi)\n", p->thread_id);
4156 
4157 	m = instr_meter_idx_nbo(p, t, ip);
4158 	rte_prefetch0(m->n_pkts);
4159 	time = rte_get_tsc_cycles();
4160 	length = instr_meter_length_hbo(t, ip);
4161 	color_in = (enum rte_color)ip->meter.color_in_val;
4162 
4163 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4164 		&m->profile->profile,
4165 		time,
4166 		length,
4167 		color_in);
4168 
4169 	color_out &= m->color_mask;
4170 
4171 	n_pkts = m->n_pkts[color_out];
4172 	n_bytes = m->n_bytes[color_out];
4173 
4174 	instr_meter_color_out_hbo_set(t, ip, color_out);
4175 
4176 	m->n_pkts[color_out] = n_pkts + 1;
4177 	m->n_bytes[color_out] = n_bytes + length;
4178 }
4179 
4180 static inline void
4181 __instr_meter_mhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4182 {
4183 	struct meter *m;
4184 	uint64_t time, n_pkts, n_bytes;
4185 	uint32_t length;
4186 	enum rte_color color_in, color_out;
4187 
4188 	TRACE("[Thread %2u] meter (mhm)\n", p->thread_id);
4189 
4190 	m = instr_meter_idx_hbo(p, t, ip);
4191 	rte_prefetch0(m->n_pkts);
4192 	time = rte_get_tsc_cycles();
4193 	length = instr_meter_length_nbo(t, ip);
4194 	color_in = instr_meter_color_in_hbo(t, ip);
4195 
4196 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4197 		&m->profile->profile,
4198 		time,
4199 		length,
4200 		color_in);
4201 
4202 	color_out &= m->color_mask;
4203 
4204 	n_pkts = m->n_pkts[color_out];
4205 	n_bytes = m->n_bytes[color_out];
4206 
4207 	instr_meter_color_out_hbo_set(t, ip, color_out);
4208 
4209 	m->n_pkts[color_out] = n_pkts + 1;
4210 	m->n_bytes[color_out] = n_bytes + length;
4211 }
4212 
4213 static inline void
4214 __instr_meter_mhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4215 {
4216 	struct meter *m;
4217 	uint64_t time, n_pkts, n_bytes;
4218 	uint32_t length;
4219 	enum rte_color color_in, color_out;
4220 
4221 	TRACE("[Thread %2u] meter (mhi)\n", p->thread_id);
4222 
4223 	m = instr_meter_idx_hbo(p, t, ip);
4224 	rte_prefetch0(m->n_pkts);
4225 	time = rte_get_tsc_cycles();
4226 	length = instr_meter_length_nbo(t, ip);
4227 	color_in = (enum rte_color)ip->meter.color_in_val;
4228 
4229 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4230 		&m->profile->profile,
4231 		time,
4232 		length,
4233 		color_in);
4234 
4235 	color_out &= m->color_mask;
4236 
4237 	n_pkts = m->n_pkts[color_out];
4238 	n_bytes = m->n_bytes[color_out];
4239 
4240 	instr_meter_color_out_hbo_set(t, ip, color_out);
4241 
4242 	m->n_pkts[color_out] = n_pkts + 1;
4243 	m->n_bytes[color_out] = n_bytes + length;
4244 }
4245 
4246 static inline void
4247 __instr_meter_mmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4248 {
4249 	struct meter *m;
4250 	uint64_t time, n_pkts, n_bytes;
4251 	uint32_t length;
4252 	enum rte_color color_in, color_out;
4253 
4254 	TRACE("[Thread %2u] meter (mmm)\n", p->thread_id);
4255 
4256 	m = instr_meter_idx_hbo(p, t, ip);
4257 	rte_prefetch0(m->n_pkts);
4258 	time = rte_get_tsc_cycles();
4259 	length = instr_meter_length_hbo(t, ip);
4260 	color_in = instr_meter_color_in_hbo(t, ip);
4261 
4262 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4263 		&m->profile->profile,
4264 		time,
4265 		length,
4266 		color_in);
4267 
4268 	color_out &= m->color_mask;
4269 
4270 	n_pkts = m->n_pkts[color_out];
4271 	n_bytes = m->n_bytes[color_out];
4272 
4273 	instr_meter_color_out_hbo_set(t, ip, color_out);
4274 
4275 	m->n_pkts[color_out] = n_pkts + 1;
4276 	m->n_bytes[color_out] = n_bytes + length;
4277 }
4278 
4279 static inline void
4280 __instr_meter_mmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4281 {
4282 	struct meter *m;
4283 	uint64_t time, n_pkts, n_bytes;
4284 	uint32_t length;
4285 	enum rte_color color_in, color_out;
4286 
4287 	TRACE("[Thread %2u] meter (mmi)\n", p->thread_id);
4288 
4289 	m = instr_meter_idx_hbo(p, t, ip);
4290 	rte_prefetch0(m->n_pkts);
4291 	time = rte_get_tsc_cycles();
4292 	length = instr_meter_length_hbo(t, ip);
4293 	color_in = (enum rte_color)ip->meter.color_in_val;
4294 
4295 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4296 		&m->profile->profile,
4297 		time,
4298 		length,
4299 		color_in);
4300 
4301 	color_out &= m->color_mask;
4302 
4303 	n_pkts = m->n_pkts[color_out];
4304 	n_bytes = m->n_bytes[color_out];
4305 
4306 	instr_meter_color_out_hbo_set(t, ip, color_out);
4307 
4308 	m->n_pkts[color_out] = n_pkts + 1;
4309 	m->n_bytes[color_out] = n_bytes + length;
4310 }
4311 
4312 static inline void
4313 __instr_meter_ihm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4314 {
4315 	struct meter *m;
4316 	uint64_t time, n_pkts, n_bytes;
4317 	uint32_t length;
4318 	enum rte_color color_in, color_out;
4319 
4320 	TRACE("[Thread %2u] meter (ihm)\n", p->thread_id);
4321 
4322 	m = instr_meter_idx_imm(p, ip);
4323 	rte_prefetch0(m->n_pkts);
4324 	time = rte_get_tsc_cycles();
4325 	length = instr_meter_length_nbo(t, ip);
4326 	color_in = instr_meter_color_in_hbo(t, ip);
4327 
4328 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4329 		&m->profile->profile,
4330 		time,
4331 		length,
4332 		color_in);
4333 
4334 	color_out &= m->color_mask;
4335 
4336 	n_pkts = m->n_pkts[color_out];
4337 	n_bytes = m->n_bytes[color_out];
4338 
4339 	instr_meter_color_out_hbo_set(t, ip, color_out);
4340 
4341 	m->n_pkts[color_out] = n_pkts + 1;
4342 	m->n_bytes[color_out] = n_bytes + length;
4343 }
4344 
4345 static inline void
4346 __instr_meter_ihi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4347 {
4348 	struct meter *m;
4349 	uint64_t time, n_pkts, n_bytes;
4350 	uint32_t length;
4351 	enum rte_color color_in, color_out;
4352 
4353 	TRACE("[Thread %2u] meter (ihi)\n", p->thread_id);
4354 
4355 	m = instr_meter_idx_imm(p, ip);
4356 	rte_prefetch0(m->n_pkts);
4357 	time = rte_get_tsc_cycles();
4358 	length = instr_meter_length_nbo(t, ip);
4359 	color_in = (enum rte_color)ip->meter.color_in_val;
4360 
4361 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4362 		&m->profile->profile,
4363 		time,
4364 		length,
4365 		color_in);
4366 
4367 	color_out &= m->color_mask;
4368 
4369 	n_pkts = m->n_pkts[color_out];
4370 	n_bytes = m->n_bytes[color_out];
4371 
4372 	instr_meter_color_out_hbo_set(t, ip, color_out);
4373 
4374 	m->n_pkts[color_out] = n_pkts + 1;
4375 	m->n_bytes[color_out] = n_bytes + length;
4376 }
4377 
4378 static inline void
4379 __instr_meter_imm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4380 {
4381 	struct meter *m;
4382 	uint64_t time, n_pkts, n_bytes;
4383 	uint32_t length;
4384 	enum rte_color color_in, color_out;
4385 
4386 	TRACE("[Thread %2u] meter (imm)\n", p->thread_id);
4387 
4388 	m = instr_meter_idx_imm(p, ip);
4389 	rte_prefetch0(m->n_pkts);
4390 	time = rte_get_tsc_cycles();
4391 	length = instr_meter_length_hbo(t, ip);
4392 	color_in = instr_meter_color_in_hbo(t, ip);
4393 
4394 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4395 		&m->profile->profile,
4396 		time,
4397 		length,
4398 		color_in);
4399 
4400 	color_out &= m->color_mask;
4401 
4402 	n_pkts = m->n_pkts[color_out];
4403 	n_bytes = m->n_bytes[color_out];
4404 
4405 	instr_meter_color_out_hbo_set(t, ip, color_out);
4406 
4407 	m->n_pkts[color_out] = n_pkts + 1;
4408 	m->n_bytes[color_out] = n_bytes + length;
4409 }
4410 
4411 static inline void
4412 __instr_meter_imi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4413 {
4414 	struct meter *m;
4415 	uint64_t time, n_pkts, n_bytes;
4416 	uint32_t length;
4417 	enum rte_color color_in, color_out;
4418 
4419 	TRACE("[Thread %2u] meter (imi)\n", p->thread_id);
4420 
4421 	m = instr_meter_idx_imm(p, ip);
4422 	rte_prefetch0(m->n_pkts);
4423 	time = rte_get_tsc_cycles();
4424 	length = instr_meter_length_hbo(t, ip);
4425 	color_in = (enum rte_color)ip->meter.color_in_val;
4426 
4427 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4428 		&m->profile->profile,
4429 		time,
4430 		length,
4431 		color_in);
4432 
4433 	color_out &= m->color_mask;
4434 
4435 	n_pkts = m->n_pkts[color_out];
4436 	n_bytes = m->n_bytes[color_out];
4437 
4438 	instr_meter_color_out_hbo_set(t, ip, color_out);
4439 
4440 	m->n_pkts[color_out] = n_pkts + 1;
4441 	m->n_bytes[color_out] = n_bytes + length;
4442 }
4443 
4444 #endif
4445