xref: /dpdk/lib/pipeline/rte_swx_pipeline_internal.h (revision 665b49c51639a10c553433bc2bcd85c7331c631e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2021 Intel Corporation
3  */
4 #ifndef __INCLUDE_RTE_SWX_PIPELINE_INTERNAL_H__
5 #define __INCLUDE_RTE_SWX_PIPELINE_INTERNAL_H__
6 
7 #include <inttypes.h>
8 #include <string.h>
9 #include <sys/queue.h>
10 
11 #include <rte_byteorder.h>
12 #include <rte_common.h>
13 #include <rte_cycles.h>
14 #include <rte_prefetch.h>
15 #include <rte_meter.h>
16 
17 #include <rte_swx_table_selector.h>
18 #include <rte_swx_table_learner.h>
19 #include <rte_swx_pipeline.h>
20 #include <rte_swx_ctl.h>
21 
22 #ifndef TRACE_LEVEL
23 #define TRACE_LEVEL 0
24 #endif
25 
26 #if TRACE_LEVEL
27 #define TRACE(...) printf(__VA_ARGS__)
28 #else
29 #define TRACE(...)
30 #endif
31 
32 /*
33  * Environment.
34  */
35 #define ntoh64(x) rte_be_to_cpu_64(x)
36 #define hton64(x) rte_cpu_to_be_64(x)
37 
38 /*
39  * Struct.
40  */
41 struct field {
42 	char name[RTE_SWX_NAME_SIZE];
43 	uint32_t n_bits;
44 	uint32_t offset;
45 	int var_size;
46 };
47 
48 struct struct_type {
49 	TAILQ_ENTRY(struct_type) node;
50 	char name[RTE_SWX_NAME_SIZE];
51 	struct field *fields;
52 	uint32_t n_fields;
53 	uint32_t n_bits;
54 	uint32_t n_bits_min;
55 	int var_size;
56 };
57 
58 TAILQ_HEAD(struct_type_tailq, struct_type);
59 
60 /*
61  * Input port.
62  */
63 struct port_in_type {
64 	TAILQ_ENTRY(port_in_type) node;
65 	char name[RTE_SWX_NAME_SIZE];
66 	struct rte_swx_port_in_ops ops;
67 };
68 
69 TAILQ_HEAD(port_in_type_tailq, port_in_type);
70 
71 struct port_in {
72 	TAILQ_ENTRY(port_in) node;
73 	struct port_in_type *type;
74 	void *obj;
75 	uint32_t id;
76 };
77 
78 TAILQ_HEAD(port_in_tailq, port_in);
79 
80 struct port_in_runtime {
81 	rte_swx_port_in_pkt_rx_t pkt_rx;
82 	void *obj;
83 };
84 
85 /*
86  * Output port.
87  */
88 struct port_out_type {
89 	TAILQ_ENTRY(port_out_type) node;
90 	char name[RTE_SWX_NAME_SIZE];
91 	struct rte_swx_port_out_ops ops;
92 };
93 
94 TAILQ_HEAD(port_out_type_tailq, port_out_type);
95 
96 struct port_out {
97 	TAILQ_ENTRY(port_out) node;
98 	struct port_out_type *type;
99 	void *obj;
100 	uint32_t id;
101 };
102 
103 TAILQ_HEAD(port_out_tailq, port_out);
104 
105 struct port_out_runtime {
106 	rte_swx_port_out_pkt_tx_t pkt_tx;
107 	rte_swx_port_out_pkt_fast_clone_tx_t pkt_fast_clone_tx;
108 	rte_swx_port_out_pkt_clone_tx_t pkt_clone_tx;
109 	rte_swx_port_out_flush_t flush;
110 	void *obj;
111 };
112 
113 /*
114  * Packet mirroring.
115  */
116 struct mirroring_session {
117 	uint32_t port_id;
118 	int fast_clone;
119 	uint32_t truncation_length;
120 };
121 
122 /*
123  * Extern object.
124  */
125 struct extern_type_member_func {
126 	TAILQ_ENTRY(extern_type_member_func) node;
127 	char name[RTE_SWX_NAME_SIZE];
128 	rte_swx_extern_type_member_func_t func;
129 	uint32_t id;
130 };
131 
132 TAILQ_HEAD(extern_type_member_func_tailq, extern_type_member_func);
133 
134 struct extern_type {
135 	TAILQ_ENTRY(extern_type) node;
136 	char name[RTE_SWX_NAME_SIZE];
137 	struct struct_type *mailbox_struct_type;
138 	rte_swx_extern_type_constructor_t constructor;
139 	rte_swx_extern_type_destructor_t destructor;
140 	struct extern_type_member_func_tailq funcs;
141 	uint32_t n_funcs;
142 };
143 
144 TAILQ_HEAD(extern_type_tailq, extern_type);
145 
146 struct extern_obj {
147 	TAILQ_ENTRY(extern_obj) node;
148 	char name[RTE_SWX_NAME_SIZE];
149 	struct extern_type *type;
150 	void *obj;
151 	uint32_t struct_id;
152 	uint32_t id;
153 };
154 
155 TAILQ_HEAD(extern_obj_tailq, extern_obj);
156 
157 #ifndef RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX
158 #define RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX 8
159 #endif
160 
161 struct extern_obj_runtime {
162 	void *obj;
163 	uint8_t *mailbox;
164 	rte_swx_extern_type_member_func_t funcs[RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX];
165 };
166 
167 /*
168  * Extern function.
169  */
170 struct extern_func {
171 	TAILQ_ENTRY(extern_func) node;
172 	char name[RTE_SWX_NAME_SIZE];
173 	struct struct_type *mailbox_struct_type;
174 	rte_swx_extern_func_t func;
175 	uint32_t struct_id;
176 	uint32_t id;
177 };
178 
179 TAILQ_HEAD(extern_func_tailq, extern_func);
180 
181 struct extern_func_runtime {
182 	uint8_t *mailbox;
183 	rte_swx_extern_func_t func;
184 };
185 
186 /*
187  * Hash function.
188  */
189 struct hash_func {
190 	TAILQ_ENTRY(hash_func) node;
191 	char name[RTE_SWX_NAME_SIZE];
192 	rte_swx_hash_func_t func;
193 	uint32_t id;
194 };
195 
196 TAILQ_HEAD(hash_func_tailq, hash_func);
197 
198 struct hash_func_runtime {
199 	rte_swx_hash_func_t func;
200 };
201 
202 /*
203  * RSS.
204  */
205 struct rss {
206 	TAILQ_ENTRY(rss) node;
207 	char name[RTE_SWX_NAME_SIZE];
208 	uint32_t id;
209 };
210 
211 TAILQ_HEAD(rss_tailq, rss);
212 
213 struct rss_runtime {
214 	uint32_t key_size; /* key size in bytes. */
215 	uint8_t key[0]; /* key. */
216 };
217 
218 /*
219  * Header.
220  */
221 struct header {
222 	TAILQ_ENTRY(header) node;
223 	char name[RTE_SWX_NAME_SIZE];
224 	struct struct_type *st;
225 	uint32_t struct_id;
226 	uint32_t id;
227 };
228 
229 TAILQ_HEAD(header_tailq, header);
230 
231 struct header_runtime {
232 	uint8_t *ptr0;
233 	uint32_t n_bytes;
234 };
235 
236 struct header_out_runtime {
237 	uint8_t *ptr0;
238 	uint8_t *ptr;
239 	uint32_t n_bytes;
240 };
241 
242 /*
243  * Instruction.
244  */
245 
246 /* Packet headers are always in Network Byte Order (NBO), i.e. big endian.
247  * Packet meta-data fields are always assumed to be in Host Byte Order (HBO).
248  * Table entry fields can be in either NBO or HBO; they are assumed to be in HBO
249  * when transferred to packet meta-data and in NBO when transferred to packet
250  * headers.
251  */
252 
253 /* Notation conventions:
254  *    -Header field: H = h.header.field (dst/src)
255  *    -Meta-data field: M = m.field (dst/src)
256  *    -Extern object mailbox field: E = e.field (dst/src)
257  *    -Extern function mailbox field: F = f.field (dst/src)
258  *    -Table action data field: T = t.field (src only)
259  *    -Immediate value: I = 32-bit unsigned value (src only)
260  */
261 
262 enum instruction_type {
263 	/* rx m.port_in */
264 	INSTR_RX,
265 
266 	/* tx port_out
267 	 * port_out = MI
268 	 */
269 	INSTR_TX,   /* port_out = M */
270 	INSTR_TX_I, /* port_out = I */
271 	INSTR_DROP,
272 
273 	/*
274 	 * mirror slot_id session_id
275 	 * slot_id = MEFT
276 	 * session_id = MEFT
277 	 */
278 	INSTR_MIRROR,
279 
280 	/* recirculate
281 	 */
282 	INSTR_RECIRCULATE,
283 
284 	/* recircid m.recirc_pass_id
285 	 * Read the internal recirculation pass ID into the specified meta-data field.
286 	 */
287 	INSTR_RECIRCID,
288 
289 	/* extract h.header */
290 	INSTR_HDR_EXTRACT,
291 	INSTR_HDR_EXTRACT2,
292 	INSTR_HDR_EXTRACT3,
293 	INSTR_HDR_EXTRACT4,
294 	INSTR_HDR_EXTRACT5,
295 	INSTR_HDR_EXTRACT6,
296 	INSTR_HDR_EXTRACT7,
297 	INSTR_HDR_EXTRACT8,
298 
299 	/* extract h.header m.last_field_size */
300 	INSTR_HDR_EXTRACT_M,
301 
302 	/* lookahead h.header */
303 	INSTR_HDR_LOOKAHEAD,
304 
305 	/* emit h.header */
306 	INSTR_HDR_EMIT,
307 	INSTR_HDR_EMIT_TX,
308 	INSTR_HDR_EMIT2_TX,
309 	INSTR_HDR_EMIT3_TX,
310 	INSTR_HDR_EMIT4_TX,
311 	INSTR_HDR_EMIT5_TX,
312 	INSTR_HDR_EMIT6_TX,
313 	INSTR_HDR_EMIT7_TX,
314 	INSTR_HDR_EMIT8_TX,
315 
316 	/* validate h.header */
317 	INSTR_HDR_VALIDATE,
318 
319 	/* invalidate h.header */
320 	INSTR_HDR_INVALIDATE,
321 
322 	/* mov dst src
323 	 * dst = src
324 	 * dst = HMEF, src = HMEFTI
325 	 */
326 	INSTR_MOV,     /* dst = MEF, src = MEFT; size(dst) <= 64 bits, size(src) <= 64 bits. */
327 	INSTR_MOV_MH,  /* dst = MEF, src = H; size(dst) <= 64 bits, size(src) <= 64 bits. */
328 	INSTR_MOV_HM,  /* dst = H, src = MEFT; size(dst) <= 64 bits, size(src) <= 64 bits. */
329 	INSTR_MOV_HH,  /* dst = H, src = H; size(dst) <= 64 bits, size(src) <= 64 bits. */
330 	INSTR_MOV_DMA, /* dst and src in NBO format. */
331 	INSTR_MOV_128, /* dst and src in NBO format, size(dst) = size(src) = 128 bits. */
332 	INSTR_MOV_128_32, /* dst and src in NBO format, size(dst) = 128 bits, size(src) = 32 b. */
333 	INSTR_MOV_I,   /* dst = HMEF, src = I; size(dst) <= 64 bits. */
334 
335 	/* dma h.header t.field
336 	 * memcpy(h.header, t.field, sizeof(h.header))
337 	 */
338 	INSTR_DMA_HT,
339 	INSTR_DMA_HT2,
340 	INSTR_DMA_HT3,
341 	INSTR_DMA_HT4,
342 	INSTR_DMA_HT5,
343 	INSTR_DMA_HT6,
344 	INSTR_DMA_HT7,
345 	INSTR_DMA_HT8,
346 
347 	/* add dst src
348 	 * dst += src
349 	 * dst = HMEF, src = HMEFTI
350 	 */
351 	INSTR_ALU_ADD,    /* dst = MEF, src = MEF */
352 	INSTR_ALU_ADD_MH, /* dst = MEF, src = H */
353 	INSTR_ALU_ADD_HM, /* dst = H, src = MEF */
354 	INSTR_ALU_ADD_HH, /* dst = H, src = H */
355 	INSTR_ALU_ADD_MI, /* dst = MEF, src = I */
356 	INSTR_ALU_ADD_HI, /* dst = H, src = I */
357 
358 	/* sub dst src
359 	 * dst -= src
360 	 * dst = HMEF, src = HMEFTI
361 	 */
362 	INSTR_ALU_SUB,    /* dst = MEF, src = MEF */
363 	INSTR_ALU_SUB_MH, /* dst = MEF, src = H */
364 	INSTR_ALU_SUB_HM, /* dst = H, src = MEF */
365 	INSTR_ALU_SUB_HH, /* dst = H, src = H */
366 	INSTR_ALU_SUB_MI, /* dst = MEF, src = I */
367 	INSTR_ALU_SUB_HI, /* dst = H, src = I */
368 
369 	/* ckadd dst src
370 	 * dst = dst '+ src[0:1] '+ src[2:3] '+ ...
371 	 * dst = H, src = {H, h.header}, '+ = 1's complement addition operator
372 	 */
373 	INSTR_ALU_CKADD_FIELD,    /* src = H */
374 	INSTR_ALU_CKADD_STRUCT20, /* src = h.header, with sizeof(header) = 20 bytes. */
375 	INSTR_ALU_CKADD_STRUCT,   /* src = h.header, with sizeof(header) any 4-byte multiple. */
376 
377 	/* cksub dst src
378 	 * dst = dst '- src
379 	 * dst = H, src = H, '- = 1's complement subtraction operator
380 	 */
381 	INSTR_ALU_CKSUB_FIELD,
382 
383 	/* and dst src
384 	 * dst &= src
385 	 * dst = HMEF, src = HMEFTI
386 	 */
387 	INSTR_ALU_AND,    /* dst = MEF, src = MEFT */
388 	INSTR_ALU_AND_MH, /* dst = MEF, src = H */
389 	INSTR_ALU_AND_HM, /* dst = H, src = MEFT */
390 	INSTR_ALU_AND_HH, /* dst = H, src = H */
391 	INSTR_ALU_AND_I,  /* dst = HMEF, src = I */
392 
393 	/* or dst src
394 	 * dst |= src
395 	 * dst = HMEF, src = HMEFTI
396 	 */
397 	INSTR_ALU_OR,    /* dst = MEF, src = MEFT */
398 	INSTR_ALU_OR_MH, /* dst = MEF, src = H */
399 	INSTR_ALU_OR_HM, /* dst = H, src = MEFT */
400 	INSTR_ALU_OR_HH, /* dst = H, src = H */
401 	INSTR_ALU_OR_I,  /* dst = HMEF, src = I */
402 
403 	/* xor dst src
404 	 * dst ^= src
405 	 * dst = HMEF, src = HMEFTI
406 	 */
407 	INSTR_ALU_XOR,    /* dst = MEF, src = MEFT */
408 	INSTR_ALU_XOR_MH, /* dst = MEF, src = H */
409 	INSTR_ALU_XOR_HM, /* dst = H, src = MEFT */
410 	INSTR_ALU_XOR_HH, /* dst = H, src = H */
411 	INSTR_ALU_XOR_I,  /* dst = HMEF, src = I */
412 
413 	/* shl dst src
414 	 * dst <<= src
415 	 * dst = HMEF, src = HMEFTI
416 	 */
417 	INSTR_ALU_SHL,    /* dst = MEF, src = MEF */
418 	INSTR_ALU_SHL_MH, /* dst = MEF, src = H */
419 	INSTR_ALU_SHL_HM, /* dst = H, src = MEF */
420 	INSTR_ALU_SHL_HH, /* dst = H, src = H */
421 	INSTR_ALU_SHL_MI, /* dst = MEF, src = I */
422 	INSTR_ALU_SHL_HI, /* dst = H, src = I */
423 
424 	/* shr dst src
425 	 * dst >>= src
426 	 * dst = HMEF, src = HMEFTI
427 	 */
428 	INSTR_ALU_SHR,    /* dst = MEF, src = MEF */
429 	INSTR_ALU_SHR_MH, /* dst = MEF, src = H */
430 	INSTR_ALU_SHR_HM, /* dst = H, src = MEF */
431 	INSTR_ALU_SHR_HH, /* dst = H, src = H */
432 	INSTR_ALU_SHR_MI, /* dst = MEF, src = I */
433 	INSTR_ALU_SHR_HI, /* dst = H, src = I */
434 
435 	/* regprefetch REGARRAY index
436 	 * prefetch REGARRAY[index]
437 	 * index = HMEFTI
438 	 */
439 	INSTR_REGPREFETCH_RH, /* index = H */
440 	INSTR_REGPREFETCH_RM, /* index = MEFT */
441 	INSTR_REGPREFETCH_RI, /* index = I */
442 
443 	/* regrd dst REGARRAY index
444 	 * dst = REGARRAY[index]
445 	 * dst = HMEF, index = HMEFTI
446 	 */
447 	INSTR_REGRD_HRH, /* dst = H, index = H */
448 	INSTR_REGRD_HRM, /* dst = H, index = MEFT */
449 	INSTR_REGRD_HRI, /* dst = H, index = I */
450 	INSTR_REGRD_MRH, /* dst = MEF, index = H */
451 	INSTR_REGRD_MRM, /* dst = MEF, index = MEFT */
452 	INSTR_REGRD_MRI, /* dst = MEF, index = I */
453 
454 	/* regwr REGARRAY index src
455 	 * REGARRAY[index] = src
456 	 * index = HMEFTI, src = HMEFTI
457 	 */
458 	INSTR_REGWR_RHH, /* index = H, src = H */
459 	INSTR_REGWR_RHM, /* index = H, src = MEFT */
460 	INSTR_REGWR_RHI, /* index = H, src = I */
461 	INSTR_REGWR_RMH, /* index = MEFT, src = H */
462 	INSTR_REGWR_RMM, /* index = MEFT, src = MEFT */
463 	INSTR_REGWR_RMI, /* index = MEFT, src = I */
464 	INSTR_REGWR_RIH, /* index = I, src = H */
465 	INSTR_REGWR_RIM, /* index = I, src = MEFT */
466 	INSTR_REGWR_RII, /* index = I, src = I */
467 
468 	/* regadd REGARRAY index src
469 	 * REGARRAY[index] += src
470 	 * index = HMEFTI, src = HMEFTI
471 	 */
472 	INSTR_REGADD_RHH, /* index = H, src = H */
473 	INSTR_REGADD_RHM, /* index = H, src = MEFT */
474 	INSTR_REGADD_RHI, /* index = H, src = I */
475 	INSTR_REGADD_RMH, /* index = MEFT, src = H */
476 	INSTR_REGADD_RMM, /* index = MEFT, src = MEFT */
477 	INSTR_REGADD_RMI, /* index = MEFT, src = I */
478 	INSTR_REGADD_RIH, /* index = I, src = H */
479 	INSTR_REGADD_RIM, /* index = I, src = MEFT */
480 	INSTR_REGADD_RII, /* index = I, src = I */
481 
482 	/* metprefetch METARRAY index
483 	 * prefetch METARRAY[index]
484 	 * index = HMEFTI
485 	 */
486 	INSTR_METPREFETCH_H, /* index = H */
487 	INSTR_METPREFETCH_M, /* index = MEFT */
488 	INSTR_METPREFETCH_I, /* index = I */
489 
490 	/* meter METARRAY index length color_in color_out
491 	 * color_out = meter(METARRAY[index], length, color_in)
492 	 * index = HMEFTI, length = HMEFT, color_in = MEFTI, color_out = MEF
493 	 */
494 	INSTR_METER_HHM, /* index = H, length = H, color_in = MEFT */
495 	INSTR_METER_HHI, /* index = H, length = H, color_in = I */
496 	INSTR_METER_HMM, /* index = H, length = MEFT, color_in = MEFT */
497 	INSTR_METER_HMI, /* index = H, length = MEFT, color_in = I */
498 	INSTR_METER_MHM, /* index = MEFT, length = H, color_in = MEFT */
499 	INSTR_METER_MHI, /* index = MEFT, length = H, color_in = I */
500 	INSTR_METER_MMM, /* index = MEFT, length = MEFT, color_in = MEFT */
501 	INSTR_METER_MMI, /* index = MEFT, length = MEFT, color_in = I */
502 	INSTR_METER_IHM, /* index = I, length = H, color_in = MEFT */
503 	INSTR_METER_IHI, /* index = I, length = H, color_in = I */
504 	INSTR_METER_IMM, /* index = I, length = MEFT, color_in = MEFT */
505 	INSTR_METER_IMI, /* index = I, length = MEFT, color_in = I */
506 
507 	/* table TABLE */
508 	INSTR_TABLE,
509 	INSTR_TABLE_AF,
510 	INSTR_SELECTOR,
511 	INSTR_LEARNER,
512 	INSTR_LEARNER_AF,
513 
514 	/* learn ACTION_NAME [ m.action_first_arg ] m.timeout_id */
515 	INSTR_LEARNER_LEARN,
516 
517 	/* rearm [ m.timeout_id ] */
518 	INSTR_LEARNER_REARM,
519 	INSTR_LEARNER_REARM_NEW,
520 
521 	/* forget */
522 	INSTR_LEARNER_FORGET,
523 
524 	/* entryid m.table_entry_id
525 	 * Read the internal table entry ID into the specified meta-data field.
526 	 */
527 	INSTR_ENTRYID,
528 
529 	/* extern e.obj.func */
530 	INSTR_EXTERN_OBJ,
531 
532 	/* extern f.func */
533 	INSTR_EXTERN_FUNC,
534 
535 	/* hash HASH_FUNC_NAME dst src_first src_last
536 	 * Compute hash value over range of struct fields.
537 	 * dst = M
538 	 * src_first = HMEFT
539 	 * src_last = HMEFT
540 	 * src_first and src_last must be fields within the same struct
541 	 */
542 	INSTR_HASH_FUNC,
543 
544 	/* rss RSS_OBJ_NAME dst src_first src_last
545 	 * Compute the RSS hash value over range of struct fields.
546 	 * dst = M
547 	 * src_first = HMEFT
548 	 * src_last = HMEFT
549 	 * src_first and src_last must be fields within the same struct
550 	 */
551 	INSTR_RSS,
552 
553 	/* jmp LABEL
554 	 * Unconditional jump
555 	 */
556 	INSTR_JMP,
557 
558 	/* jmpv LABEL h.header
559 	 * Jump if header is valid
560 	 */
561 	INSTR_JMP_VALID,
562 
563 	/* jmpnv LABEL h.header
564 	 * Jump if header is invalid
565 	 */
566 	INSTR_JMP_INVALID,
567 
568 	/* jmph LABEL
569 	 * Jump if table lookup hit
570 	 */
571 	INSTR_JMP_HIT,
572 
573 	/* jmpnh LABEL
574 	 * Jump if table lookup miss
575 	 */
576 	INSTR_JMP_MISS,
577 
578 	/* jmpa LABEL ACTION
579 	 * Jump if action run
580 	 */
581 	INSTR_JMP_ACTION_HIT,
582 
583 	/* jmpna LABEL ACTION
584 	 * Jump if action not run
585 	 */
586 	INSTR_JMP_ACTION_MISS,
587 
588 	/* jmpeq LABEL a b
589 	 * Jump if a is equal to b
590 	 * a = HMEFT, b = HMEFTI
591 	 */
592 	INSTR_JMP_EQ,    /* a = MEFT, b = MEFT */
593 	INSTR_JMP_EQ_MH, /* a = MEFT, b = H */
594 	INSTR_JMP_EQ_HM, /* a = H, b = MEFT */
595 	INSTR_JMP_EQ_HH, /* a = H, b = H */
596 	INSTR_JMP_EQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
597 
598 	/* jmpneq LABEL a b
599 	 * Jump if a is not equal to b
600 	 * a = HMEFT, b = HMEFTI
601 	 */
602 	INSTR_JMP_NEQ,    /* a = MEFT, b = MEFT */
603 	INSTR_JMP_NEQ_MH, /* a = MEFT, b = H */
604 	INSTR_JMP_NEQ_HM, /* a = H, b = MEFT */
605 	INSTR_JMP_NEQ_HH, /* a = H, b = H */
606 	INSTR_JMP_NEQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
607 
608 	/* jmplt LABEL a b
609 	 * Jump if a is less than b
610 	 * a = HMEFT, b = HMEFTI
611 	 */
612 	INSTR_JMP_LT,    /* a = MEFT, b = MEFT */
613 	INSTR_JMP_LT_MH, /* a = MEFT, b = H */
614 	INSTR_JMP_LT_HM, /* a = H, b = MEFT */
615 	INSTR_JMP_LT_HH, /* a = H, b = H */
616 	INSTR_JMP_LT_MI, /* a = MEFT, b = I */
617 	INSTR_JMP_LT_HI, /* a = H, b = I */
618 
619 	/* jmpgt LABEL a b
620 	 * Jump if a is greater than b
621 	 * a = HMEFT, b = HMEFTI
622 	 */
623 	INSTR_JMP_GT,    /* a = MEFT, b = MEFT */
624 	INSTR_JMP_GT_MH, /* a = MEFT, b = H */
625 	INSTR_JMP_GT_HM, /* a = H, b = MEFT */
626 	INSTR_JMP_GT_HH, /* a = H, b = H */
627 	INSTR_JMP_GT_MI, /* a = MEFT, b = I */
628 	INSTR_JMP_GT_HI, /* a = H, b = I */
629 
630 	/* return
631 	 * Return from action
632 	 */
633 	INSTR_RETURN,
634 
635 	/* Start of custom instructions. */
636 	INSTR_CUSTOM_0,
637 };
638 
639 struct instr_operand {
640 	uint8_t struct_id;
641 	uint8_t n_bits;
642 	uint8_t offset;
643 	uint8_t pad;
644 };
645 
646 struct instr_io {
647 	struct {
648 		union {
649 			struct {
650 				uint8_t offset;
651 				uint8_t n_bits;
652 				uint8_t pad[2];
653 			};
654 
655 			uint32_t val;
656 		};
657 	} io;
658 
659 	struct {
660 		uint8_t header_id[8];
661 		uint8_t struct_id[8];
662 		uint8_t n_bytes[8];
663 	} hdr;
664 };
665 
666 struct instr_hdr_validity {
667 	uint8_t header_id;
668 	uint8_t struct_id;
669 };
670 
671 struct instr_table {
672 	uint8_t table_id;
673 };
674 
675 struct instr_learn {
676 	uint8_t action_id;
677 	uint8_t mf_first_arg_offset;
678 	uint8_t mf_timeout_id_offset;
679 	uint8_t mf_timeout_id_n_bits;
680 };
681 
682 struct instr_extern_obj {
683 	uint8_t ext_obj_id;
684 	uint8_t func_id;
685 };
686 
687 struct instr_extern_func {
688 	uint8_t ext_func_id;
689 };
690 
691 struct instr_hash_func {
692 	uint8_t hash_func_id;
693 
694 	struct {
695 		uint8_t offset;
696 		uint8_t n_bits;
697 	} dst;
698 
699 	struct {
700 		uint8_t struct_id;
701 		uint16_t offset;
702 		uint16_t n_bytes;
703 	} src;
704 };
705 
706 struct instr_rss {
707 	uint8_t rss_obj_id;
708 
709 	struct {
710 		uint8_t offset;
711 		uint8_t n_bits;
712 	} dst;
713 
714 	struct {
715 		uint8_t struct_id;
716 		uint16_t offset;
717 		uint16_t n_bytes;
718 	} src;
719 };
720 
721 struct instr_dst_src {
722 	struct instr_operand dst;
723 	union {
724 		struct instr_operand src;
725 		uint64_t src_val;
726 	};
727 };
728 
729 struct instr_regarray {
730 	uint8_t regarray_id;
731 	uint8_t pad[3];
732 
733 	union {
734 		struct instr_operand idx;
735 		uint32_t idx_val;
736 	};
737 
738 	union {
739 		struct instr_operand dstsrc;
740 		uint64_t dstsrc_val;
741 	};
742 };
743 
744 struct instr_meter {
745 	uint8_t metarray_id;
746 	uint8_t pad[3];
747 
748 	union {
749 		struct instr_operand idx;
750 		uint32_t idx_val;
751 	};
752 
753 	struct instr_operand length;
754 
755 	union {
756 		struct instr_operand color_in;
757 		uint32_t color_in_val;
758 	};
759 
760 	struct instr_operand color_out;
761 };
762 
763 struct instr_dma {
764 	struct {
765 		uint8_t header_id[8];
766 		uint8_t struct_id[8];
767 	} dst;
768 
769 	struct {
770 		uint8_t offset[8];
771 	} src;
772 
773 	uint16_t n_bytes[8];
774 };
775 
776 struct instr_jmp {
777 	struct instruction *ip;
778 
779 	union {
780 		struct instr_operand a;
781 		uint8_t header_id;
782 		uint8_t action_id;
783 	};
784 
785 	union {
786 		struct instr_operand b;
787 		uint64_t b_val;
788 	};
789 };
790 
791 struct instruction {
792 	enum instruction_type type;
793 	union {
794 		struct instr_io io;
795 		struct instr_dst_src mirror;
796 		struct instr_hdr_validity valid;
797 		struct instr_dst_src mov;
798 		struct instr_regarray regarray;
799 		struct instr_meter meter;
800 		struct instr_dma dma;
801 		struct instr_dst_src alu;
802 		struct instr_table table;
803 		struct instr_learn learn;
804 		struct instr_extern_obj ext_obj;
805 		struct instr_extern_func ext_func;
806 		struct instr_hash_func hash_func;
807 		struct instr_rss rss;
808 		struct instr_jmp jmp;
809 	};
810 };
811 
812 struct instruction_data {
813 	char label[RTE_SWX_NAME_SIZE];
814 	char jmp_label[RTE_SWX_NAME_SIZE];
815 	uint32_t n_users; /* user = jmp instruction to this instruction. */
816 	int invalid;
817 };
818 
819 typedef void (*instr_exec_t)(struct rte_swx_pipeline *);
820 
821 /*
822  * Action.
823  */
824 typedef void
825 (*action_func_t)(struct rte_swx_pipeline *p);
826 
827 struct action {
828 	TAILQ_ENTRY(action) node;
829 	char name[RTE_SWX_NAME_SIZE];
830 	struct struct_type *st;
831 	int *args_endianness; /* 0 = Host Byte Order (HBO); 1 = Network Byte Order (NBO). */
832 	struct instruction *instructions;
833 	struct instruction_data *instruction_data;
834 	uint32_t n_instructions;
835 	uint32_t id;
836 };
837 
838 TAILQ_HEAD(action_tailq, action);
839 
840 /*
841  * Table.
842  */
843 struct table_type {
844 	TAILQ_ENTRY(table_type) node;
845 	char name[RTE_SWX_NAME_SIZE];
846 	enum rte_swx_table_match_type match_type;
847 	struct rte_swx_table_ops ops;
848 };
849 
850 TAILQ_HEAD(table_type_tailq, table_type);
851 
852 struct match_field {
853 	enum rte_swx_table_match_type match_type;
854 	struct field *field;
855 };
856 
857 struct table {
858 	TAILQ_ENTRY(table) node;
859 	char name[RTE_SWX_NAME_SIZE];
860 	char args[RTE_SWX_NAME_SIZE];
861 	struct table_type *type; /* NULL when n_fields == 0. */
862 
863 	/* Match. */
864 	struct match_field *fields;
865 	uint32_t n_fields;
866 	struct header *header; /* Only valid when n_fields > 0. */
867 
868 	/* Action. */
869 	struct action **actions;
870 	struct action *default_action;
871 	uint8_t *default_action_data;
872 	uint32_t n_actions;
873 	int default_action_is_const;
874 	uint32_t action_data_size_max;
875 	int *action_is_for_table_entries;
876 	int *action_is_for_default_entry;
877 
878 	struct hash_func *hf;
879 	uint32_t size;
880 	uint32_t id;
881 };
882 
883 TAILQ_HEAD(table_tailq, table);
884 
885 struct table_runtime {
886 	rte_swx_table_lookup_t func;
887 	void *mailbox;
888 	uint8_t **key;
889 };
890 
891 struct table_statistics {
892 	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
893 	uint64_t *n_pkts_action;
894 };
895 
896 /*
897  * Selector.
898  */
899 struct selector {
900 	TAILQ_ENTRY(selector) node;
901 	char name[RTE_SWX_NAME_SIZE];
902 
903 	struct field *group_id_field;
904 	struct field **selector_fields;
905 	uint32_t n_selector_fields;
906 	struct header *selector_header;
907 	struct field *member_id_field;
908 
909 	uint32_t n_groups_max;
910 	uint32_t n_members_per_group_max;
911 
912 	uint32_t id;
913 };
914 
915 TAILQ_HEAD(selector_tailq, selector);
916 
917 struct selector_runtime {
918 	void *mailbox;
919 	uint8_t **group_id_buffer;
920 	uint8_t **selector_buffer;
921 	uint8_t **member_id_buffer;
922 };
923 
924 struct selector_statistics {
925 	uint64_t n_pkts;
926 };
927 
928 /*
929  * Learner table.
930  */
931 struct learner {
932 	TAILQ_ENTRY(learner) node;
933 	char name[RTE_SWX_NAME_SIZE];
934 
935 	/* Match. */
936 	struct field **fields;
937 	uint32_t n_fields;
938 	struct header *header;
939 
940 	/* Action. */
941 	struct action **actions;
942 	struct action *default_action;
943 	uint8_t *default_action_data;
944 	uint32_t n_actions;
945 	int default_action_is_const;
946 	uint32_t action_data_size_max;
947 	int *action_is_for_table_entries;
948 	int *action_is_for_default_entry;
949 
950 	struct hash_func *hf;
951 	uint32_t size;
952 	uint32_t timeout[RTE_SWX_TABLE_LEARNER_N_KEY_TIMEOUTS_MAX];
953 	uint32_t n_timeouts;
954 	uint32_t id;
955 };
956 
957 TAILQ_HEAD(learner_tailq, learner);
958 
959 struct learner_runtime {
960 	void *mailbox;
961 	uint8_t **key;
962 };
963 
964 struct learner_statistics {
965 	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
966 	uint64_t n_pkts_learn[2]; /* 0 = Learn OK, 1 = Learn error. */
967 	uint64_t n_pkts_rearm;
968 	uint64_t n_pkts_forget;
969 	uint64_t *n_pkts_action;
970 };
971 
972 /*
973  * Register array.
974  */
975 struct regarray {
976 	TAILQ_ENTRY(regarray) node;
977 	char name[RTE_SWX_NAME_SIZE];
978 	uint64_t init_val;
979 	uint32_t size;
980 	uint32_t id;
981 };
982 
983 TAILQ_HEAD(regarray_tailq, regarray);
984 
985 struct regarray_runtime {
986 	uint64_t *regarray;
987 	uint32_t size_mask;
988 };
989 
990 /*
991  * Meter array.
992  */
993 struct meter_profile {
994 	TAILQ_ENTRY(meter_profile) node;
995 	char name[RTE_SWX_NAME_SIZE];
996 	struct rte_meter_trtcm_params params;
997 	struct rte_meter_trtcm_profile profile;
998 	uint32_t n_users;
999 };
1000 
1001 TAILQ_HEAD(meter_profile_tailq, meter_profile);
1002 
1003 struct metarray {
1004 	TAILQ_ENTRY(metarray) node;
1005 	char name[RTE_SWX_NAME_SIZE];
1006 	uint32_t size;
1007 	uint32_t id;
1008 };
1009 
1010 TAILQ_HEAD(metarray_tailq, metarray);
1011 
1012 struct meter {
1013 	struct rte_meter_trtcm m;
1014 	struct meter_profile *profile;
1015 	enum rte_color color_mask;
1016 	uint8_t pad[20];
1017 
1018 	uint64_t n_pkts[RTE_COLORS];
1019 	uint64_t n_bytes[RTE_COLORS];
1020 };
1021 
1022 struct metarray_runtime {
1023 	struct meter *metarray;
1024 	uint32_t size_mask;
1025 };
1026 
1027 /*
1028  * Pipeline.
1029  */
1030 struct thread {
1031 	/* Packet. */
1032 	struct rte_swx_pkt pkt;
1033 	uint8_t *ptr;
1034 	uint32_t *mirroring_slots;
1035 	uint64_t mirroring_slots_mask;
1036 	int recirculate;
1037 	uint32_t recirc_pass_id;
1038 
1039 	/* Structures. */
1040 	uint8_t **structs;
1041 
1042 	/* Packet headers. */
1043 	struct header_runtime *headers; /* Extracted or generated headers. */
1044 	struct header_out_runtime *headers_out; /* Emitted headers. */
1045 	uint8_t *header_storage;
1046 	uint8_t *header_out_storage;
1047 	uint64_t valid_headers;
1048 	uint32_t n_headers_out;
1049 
1050 	/* Packet meta-data. */
1051 	uint8_t *metadata;
1052 
1053 	/* Tables. */
1054 	struct table_runtime *tables;
1055 	struct selector_runtime *selectors;
1056 	struct learner_runtime *learners;
1057 	struct rte_swx_table_state *table_state;
1058 	uint64_t action_id;
1059 	size_t entry_id;
1060 	int hit; /* 0 = Miss, 1 = Hit. */
1061 	uint32_t learner_id;
1062 	uint64_t time;
1063 
1064 	/* Extern objects and functions. */
1065 	struct extern_obj_runtime *extern_objs;
1066 	struct extern_func_runtime *extern_funcs;
1067 
1068 	/* Instructions. */
1069 	struct instruction *ip;
1070 	struct instruction *ret;
1071 };
1072 
1073 #define MASK64_BIT_GET(mask, pos) ((mask) & (1LLU << (pos)))
1074 #define MASK64_BIT_SET(mask, pos) ((mask) | (1LLU << (pos)))
1075 #define MASK64_BIT_CLR(mask, pos) ((mask) & ~(1LLU << (pos)))
1076 
1077 #define HEADER_VALID(thread, header_id) \
1078 	MASK64_BIT_GET((thread)->valid_headers, header_id)
1079 
1080 static inline uint64_t
1081 instr_operand_hbo(struct thread *t, const struct instr_operand *x)
1082 {
1083 	uint8_t *x_struct = t->structs[x->struct_id];
1084 	uint64_t *x64_ptr = (uint64_t *)&x_struct[x->offset];
1085 	uint64_t x64 = *x64_ptr;
1086 	uint64_t x64_mask = UINT64_MAX >> (64 - x->n_bits);
1087 
1088 	return x64 & x64_mask;
1089 }
1090 
1091 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1092 
1093 static inline uint64_t
1094 instr_operand_nbo(struct thread *t, const struct instr_operand *x)
1095 {
1096 	uint8_t *x_struct = t->structs[x->struct_id];
1097 	uint64_t *x64_ptr = (uint64_t *)&x_struct[x->offset];
1098 	uint64_t x64 = *x64_ptr;
1099 
1100 	return ntoh64(x64) >> (64 - x->n_bits);
1101 }
1102 
1103 #else
1104 
1105 #define instr_operand_nbo instr_operand_hbo
1106 
1107 #endif
1108 
1109 #define ALU(thread, ip, operator)  \
1110 {                                                                              \
1111 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1112 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1113 	uint64_t dst64 = *dst64_ptr;                                           \
1114 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1115 	uint64_t dst = dst64 & dst64_mask;                                     \
1116 									       \
1117 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1118 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1119 	uint64_t src64 = *src64_ptr;                                           \
1120 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
1121 	uint64_t src = src64 & src64_mask;                                     \
1122 									       \
1123 	uint64_t result = dst operator src;                                    \
1124 									       \
1125 	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
1126 }
1127 
1128 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1129 
1130 #define ALU_MH(thread, ip, operator)  \
1131 {                                                                              \
1132 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1133 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1134 	uint64_t dst64 = *dst64_ptr;                                           \
1135 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1136 	uint64_t dst = dst64 & dst64_mask;                                     \
1137 									       \
1138 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1139 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1140 	uint64_t src64 = *src64_ptr;                                           \
1141 	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
1142 									       \
1143 	uint64_t result = dst operator src;                                    \
1144 									       \
1145 	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
1146 }
1147 
1148 #define ALU_HM(thread, ip, operator)  \
1149 {                                                                              \
1150 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1151 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1152 	uint64_t dst64 = *dst64_ptr;                                           \
1153 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1154 	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
1155 									       \
1156 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1157 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1158 	uint64_t src64 = *src64_ptr;                                           \
1159 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
1160 	uint64_t src = src64 & src64_mask;                                     \
1161 									       \
1162 	uint64_t result = dst operator src;                                    \
1163 	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
1164 									       \
1165 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
1166 }
1167 
1168 #define ALU_HM_FAST(thread, ip, operator)  \
1169 {                                                                                 \
1170 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];         \
1171 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];      \
1172 	uint64_t dst64 = *dst64_ptr;                                              \
1173 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);          \
1174 	uint64_t dst = dst64 & dst64_mask;                                        \
1175 										  \
1176 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];         \
1177 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];      \
1178 	uint64_t src64 = *src64_ptr;                                              \
1179 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);          \
1180 	uint64_t src = hton64(src64 & src64_mask) >> (64 - (ip)->alu.dst.n_bits); \
1181 										  \
1182 	uint64_t result = dst operator src;                                       \
1183 										  \
1184 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                              \
1185 }
1186 
1187 #define ALU_HH(thread, ip, operator)  \
1188 {                                                                              \
1189 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1190 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1191 	uint64_t dst64 = *dst64_ptr;                                           \
1192 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1193 	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
1194 									       \
1195 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
1196 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
1197 	uint64_t src64 = *src64_ptr;                                           \
1198 	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
1199 									       \
1200 	uint64_t result = dst operator src;                                    \
1201 	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
1202 									       \
1203 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
1204 }
1205 
1206 #define ALU_HH_FAST(thread, ip, operator)  \
1207 {                                                                                             \
1208 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];                     \
1209 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];                  \
1210 	uint64_t dst64 = *dst64_ptr;                                                          \
1211 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);                      \
1212 	uint64_t dst = dst64 & dst64_mask;                                                    \
1213 											      \
1214 	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];                     \
1215 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];                  \
1216 	uint64_t src64 = *src64_ptr;                                                          \
1217 	uint64_t src = (src64 << (64 - (ip)->alu.src.n_bits)) >> (64 - (ip)->alu.dst.n_bits); \
1218 											      \
1219 	uint64_t result = dst operator src;                                                   \
1220 											      \
1221 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                                          \
1222 }
1223 
1224 #else
1225 
1226 #define ALU_MH ALU
1227 #define ALU_HM ALU
1228 #define ALU_HM_FAST ALU
1229 #define ALU_HH ALU
1230 #define ALU_HH_FAST ALU
1231 
1232 #endif
1233 
1234 #define ALU_I(thread, ip, operator)  \
1235 {                                                                              \
1236 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1237 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1238 	uint64_t dst64 = *dst64_ptr;                                           \
1239 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1240 	uint64_t dst = dst64 & dst64_mask;                                     \
1241 									       \
1242 	uint64_t src = (ip)->alu.src_val;                                      \
1243 									       \
1244 	uint64_t result = dst operator src;                                    \
1245 									       \
1246 	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
1247 }
1248 
1249 #define ALU_MI ALU_I
1250 
1251 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1252 
1253 #define ALU_HI(thread, ip, operator)  \
1254 {                                                                              \
1255 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
1256 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
1257 	uint64_t dst64 = *dst64_ptr;                                           \
1258 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
1259 	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
1260 									       \
1261 	uint64_t src = (ip)->alu.src_val;                                      \
1262 									       \
1263 	uint64_t result = dst operator src;                                    \
1264 	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
1265 									       \
1266 	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
1267 }
1268 
1269 #else
1270 
1271 #define ALU_HI ALU_I
1272 
1273 #endif
1274 
1275 #define MOV(thread, ip)  \
1276 {                                                                              \
1277 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1278 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1279 	uint64_t dst64 = *dst64_ptr;                                           \
1280 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1281 									       \
1282 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1283 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1284 	uint64_t src64 = *src64_ptr;                                           \
1285 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
1286 	uint64_t src = src64 & src64_mask;                                     \
1287 									       \
1288 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
1289 }
1290 
1291 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1292 
1293 #define MOV_MH(thread, ip)  \
1294 {                                                                              \
1295 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1296 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1297 	uint64_t dst64 = *dst64_ptr;                                           \
1298 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1299 									       \
1300 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1301 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1302 	uint64_t src64 = *src64_ptr;                                           \
1303 	uint64_t src = ntoh64(src64) >> (64 - (ip)->mov.src.n_bits);           \
1304 									       \
1305 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
1306 }
1307 
1308 #define MOV_HM(thread, ip)  \
1309 {                                                                              \
1310 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1311 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1312 	uint64_t dst64 = *dst64_ptr;                                           \
1313 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1314 									       \
1315 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1316 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1317 	uint64_t src64 = *src64_ptr;                                           \
1318 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
1319 	uint64_t src = src64 & src64_mask;                                     \
1320 									       \
1321 	src = hton64(src) >> (64 - (ip)->mov.dst.n_bits);                      \
1322 	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
1323 }
1324 
1325 #define MOV_HH(thread, ip)  \
1326 {                                                                              \
1327 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1328 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1329 	uint64_t dst64 = *dst64_ptr;                                           \
1330 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1331 									       \
1332 	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
1333 	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
1334 	uint64_t src64 = *src64_ptr;                                           \
1335 									       \
1336 	uint64_t src = src64 << (64 - (ip)->mov.src.n_bits);                   \
1337 	src = src >> (64 - (ip)->mov.dst.n_bits);                              \
1338 	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
1339 }
1340 
1341 #else
1342 
1343 #define MOV_MH MOV
1344 #define MOV_HM MOV
1345 #define MOV_HH MOV
1346 
1347 #endif
1348 
1349 #define MOV_I(thread, ip)  \
1350 {                                                                              \
1351 	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
1352 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
1353 	uint64_t dst64 = *dst64_ptr;                                           \
1354 	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
1355 									       \
1356 	uint64_t src = (ip)->mov.src_val;                                      \
1357 									       \
1358 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
1359 }
1360 
1361 #define JMP_CMP(thread, ip, operator)  \
1362 {                                                                              \
1363 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1364 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1365 	uint64_t a64 = *a64_ptr;                                               \
1366 	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
1367 	uint64_t a = a64 & a64_mask;                                           \
1368 									       \
1369 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1370 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1371 	uint64_t b64 = *b64_ptr;                                               \
1372 	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
1373 	uint64_t b = b64 & b64_mask;                                           \
1374 									       \
1375 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1376 }
1377 
1378 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1379 
1380 #define JMP_CMP_MH(thread, ip, operator)  \
1381 {                                                                              \
1382 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1383 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1384 	uint64_t a64 = *a64_ptr;                                               \
1385 	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
1386 	uint64_t a = a64 & a64_mask;                                           \
1387 									       \
1388 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1389 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1390 	uint64_t b64 = *b64_ptr;                                               \
1391 	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
1392 									       \
1393 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1394 }
1395 
1396 #define JMP_CMP_HM(thread, ip, operator)  \
1397 {                                                                              \
1398 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1399 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1400 	uint64_t a64 = *a64_ptr;                                               \
1401 	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
1402 									       \
1403 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1404 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1405 	uint64_t b64 = *b64_ptr;                                               \
1406 	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
1407 	uint64_t b = b64 & b64_mask;                                           \
1408 									       \
1409 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1410 }
1411 
1412 #define JMP_CMP_HH(thread, ip, operator)  \
1413 {                                                                              \
1414 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1415 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1416 	uint64_t a64 = *a64_ptr;                                               \
1417 	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
1418 									       \
1419 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1420 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1421 	uint64_t b64 = *b64_ptr;                                               \
1422 	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
1423 									       \
1424 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1425 }
1426 
1427 #define JMP_CMP_HH_FAST(thread, ip, operator)  \
1428 {                                                                              \
1429 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1430 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1431 	uint64_t a64 = *a64_ptr;                                               \
1432 	uint64_t a = a64 << (64 - (ip)->jmp.a.n_bits);                         \
1433 									       \
1434 	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
1435 	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
1436 	uint64_t b64 = *b64_ptr;                                               \
1437 	uint64_t b = b64 << (64 - (ip)->jmp.b.n_bits);                         \
1438 									       \
1439 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1440 }
1441 
1442 #else
1443 
1444 #define JMP_CMP_MH JMP_CMP
1445 #define JMP_CMP_HM JMP_CMP
1446 #define JMP_CMP_HH JMP_CMP
1447 #define JMP_CMP_HH_FAST JMP_CMP
1448 
1449 #endif
1450 
1451 #define JMP_CMP_I(thread, ip, operator)  \
1452 {                                                                              \
1453 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1454 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1455 	uint64_t a64 = *a64_ptr;                                               \
1456 	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
1457 	uint64_t a = a64 & a64_mask;                                           \
1458 									       \
1459 	uint64_t b = (ip)->jmp.b_val;                                          \
1460 									       \
1461 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1462 }
1463 
1464 #define JMP_CMP_MI JMP_CMP_I
1465 
1466 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
1467 
1468 #define JMP_CMP_HI(thread, ip, operator)  \
1469 {                                                                              \
1470 	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
1471 	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
1472 	uint64_t a64 = *a64_ptr;                                               \
1473 	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
1474 									       \
1475 	uint64_t b = (ip)->jmp.b_val;                                          \
1476 									       \
1477 	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
1478 }
1479 
1480 #else
1481 
1482 #define JMP_CMP_HI JMP_CMP_I
1483 
1484 #endif
1485 
1486 #define METADATA_READ(thread, offset, n_bits)                                  \
1487 ({                                                                             \
1488 	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
1489 	uint64_t m64 = *m64_ptr;                                               \
1490 	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
1491 	(m64 & m64_mask);                                                      \
1492 })
1493 
1494 #define METADATA_WRITE(thread, offset, n_bits, value)                          \
1495 {                                                                              \
1496 	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
1497 	uint64_t m64 = *m64_ptr;                                               \
1498 	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
1499 									       \
1500 	uint64_t m_new = value;                                                \
1501 									       \
1502 	*m64_ptr = (m64 & ~m64_mask) | (m_new & m64_mask);                     \
1503 }
1504 
1505 #ifndef RTE_SWX_PIPELINE_THREADS_MAX
1506 #define RTE_SWX_PIPELINE_THREADS_MAX 16
1507 #endif
1508 
1509 #ifndef RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX
1510 #define RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX 1024
1511 #endif
1512 
1513 struct rte_swx_pipeline {
1514 	char name[RTE_SWX_NAME_SIZE];
1515 
1516 	struct struct_type_tailq struct_types;
1517 	struct port_in_type_tailq port_in_types;
1518 	struct port_in_tailq ports_in;
1519 	struct port_out_type_tailq port_out_types;
1520 	struct port_out_tailq ports_out;
1521 	struct extern_type_tailq extern_types;
1522 	struct extern_obj_tailq extern_objs;
1523 	struct extern_func_tailq extern_funcs;
1524 	struct hash_func_tailq hash_funcs;
1525 	struct rss_tailq rss;
1526 	struct header_tailq headers;
1527 	struct struct_type *metadata_st;
1528 	uint32_t metadata_struct_id;
1529 	struct action_tailq actions;
1530 	struct table_type_tailq table_types;
1531 	struct table_tailq tables;
1532 	struct selector_tailq selectors;
1533 	struct learner_tailq learners;
1534 	struct regarray_tailq regarrays;
1535 	struct meter_profile_tailq meter_profiles;
1536 	struct metarray_tailq metarrays;
1537 
1538 	struct port_in_runtime *in;
1539 	struct port_out_runtime *out;
1540 	struct mirroring_session *mirroring_sessions;
1541 	struct instruction **action_instructions;
1542 	action_func_t *action_funcs;
1543 	struct rte_swx_table_state *table_state;
1544 	struct table_statistics *table_stats;
1545 	struct selector_statistics *selector_stats;
1546 	struct learner_statistics *learner_stats;
1547 	struct hash_func_runtime *hash_func_runtime;
1548 	struct rss_runtime **rss_runtime;
1549 	struct regarray_runtime *regarray_runtime;
1550 	struct metarray_runtime *metarray_runtime;
1551 	struct instruction *instructions;
1552 	struct instruction_data *instruction_data;
1553 	instr_exec_t *instruction_table;
1554 	struct thread threads[RTE_SWX_PIPELINE_THREADS_MAX];
1555 	void *lib;
1556 
1557 	uint32_t n_structs;
1558 	uint32_t n_ports_in;
1559 	uint32_t n_ports_out;
1560 	uint32_t n_mirroring_slots;
1561 	uint32_t n_mirroring_sessions;
1562 	uint32_t n_extern_objs;
1563 	uint32_t n_extern_funcs;
1564 	uint32_t n_hash_funcs;
1565 	uint32_t n_rss;
1566 	uint32_t n_actions;
1567 	uint32_t n_tables;
1568 	uint32_t n_selectors;
1569 	uint32_t n_learners;
1570 	uint32_t n_regarrays;
1571 	uint32_t n_metarrays;
1572 	uint32_t n_headers;
1573 	uint32_t thread_id;
1574 	uint32_t port_id;
1575 	uint32_t n_instructions;
1576 	int build_done;
1577 	int numa_node;
1578 };
1579 
1580 /*
1581  * Instruction.
1582  */
1583 static inline void
1584 pipeline_port_inc(struct rte_swx_pipeline *p)
1585 {
1586 	p->port_id = (p->port_id + 1) & (p->n_ports_in - 1);
1587 }
1588 
1589 static inline void
1590 thread_ip_reset(struct rte_swx_pipeline *p, struct thread *t)
1591 {
1592 	t->ip = p->instructions;
1593 }
1594 
1595 static inline void
1596 thread_ip_set(struct thread *t, struct instruction *ip)
1597 {
1598 	t->ip = ip;
1599 }
1600 
1601 static inline void
1602 thread_ip_action_call(struct rte_swx_pipeline *p,
1603 		      struct thread *t,
1604 		      uint32_t action_id)
1605 {
1606 	t->ret = t->ip + 1;
1607 	t->ip = p->action_instructions[action_id];
1608 }
1609 
1610 static inline void
1611 thread_ip_inc(struct rte_swx_pipeline *p);
1612 
1613 static inline void
1614 thread_ip_inc(struct rte_swx_pipeline *p)
1615 {
1616 	struct thread *t = &p->threads[p->thread_id];
1617 
1618 	t->ip++;
1619 }
1620 
1621 static inline void
1622 thread_ip_inc_cond(struct thread *t, int cond)
1623 {
1624 	t->ip += cond;
1625 }
1626 
1627 static inline void
1628 thread_yield(struct rte_swx_pipeline *p)
1629 {
1630 	p->thread_id = (p->thread_id + 1) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
1631 }
1632 
1633 static inline void
1634 thread_yield_cond(struct rte_swx_pipeline *p, int cond)
1635 {
1636 	p->thread_id = (p->thread_id + cond) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
1637 }
1638 
1639 /*
1640  * rx.
1641  */
1642 static inline int
1643 __instr_rx_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
1644 {
1645 	struct port_in_runtime *port = &p->in[p->port_id];
1646 	struct rte_swx_pkt *pkt = &t->pkt;
1647 	int pkt_received;
1648 
1649 	/* Recirculation: keep the current packet. */
1650 	if (t->recirculate) {
1651 		TRACE("[Thread %2u] rx - recirculate (pass %u)\n",
1652 		      p->thread_id,
1653 		      t->recirc_pass_id + 1);
1654 
1655 		/* Packet. */
1656 		t->ptr = &pkt->pkt[pkt->offset];
1657 		t->mirroring_slots_mask = 0;
1658 		t->recirculate = 0;
1659 		t->recirc_pass_id++;
1660 
1661 		/* Headers. */
1662 		t->valid_headers = 0;
1663 		t->n_headers_out = 0;
1664 
1665 		/* Tables. */
1666 		t->table_state = p->table_state;
1667 
1668 		return 1;
1669 	}
1670 
1671 	/* Packet. */
1672 	pkt_received = port->pkt_rx(port->obj, pkt);
1673 	t->ptr = &pkt->pkt[pkt->offset];
1674 	rte_prefetch0(t->ptr);
1675 
1676 	TRACE("[Thread %2u] rx %s from port %u\n",
1677 	      p->thread_id,
1678 	      pkt_received ? "1 pkt" : "0 pkts",
1679 	      p->port_id);
1680 
1681 	t->mirroring_slots_mask = 0;
1682 	t->recirc_pass_id = 0;
1683 
1684 	/* Headers. */
1685 	t->valid_headers = 0;
1686 	t->n_headers_out = 0;
1687 
1688 	/* Meta-data. */
1689 	METADATA_WRITE(t, ip->io.io.offset, ip->io.io.n_bits, p->port_id);
1690 
1691 	/* Tables. */
1692 	t->table_state = p->table_state;
1693 
1694 	/* Thread. */
1695 	pipeline_port_inc(p);
1696 
1697 	return pkt_received;
1698 }
1699 
1700 static inline void
1701 instr_rx_exec(struct rte_swx_pipeline *p)
1702 {
1703 	struct thread *t = &p->threads[p->thread_id];
1704 	struct instruction *ip = t->ip;
1705 	int pkt_received;
1706 
1707 	/* Packet. */
1708 	pkt_received = __instr_rx_exec(p, t, ip);
1709 
1710 	/* Thread. */
1711 	thread_ip_inc_cond(t, pkt_received);
1712 	thread_yield(p);
1713 }
1714 
1715 /*
1716  * tx.
1717  */
1718 static inline void
1719 emit_handler(struct thread *t)
1720 {
1721 	struct header_out_runtime *h0 = &t->headers_out[0];
1722 	struct header_out_runtime *h1 = &t->headers_out[1];
1723 	uint32_t offset = 0, i;
1724 
1725 	/* No header change or header decapsulation. */
1726 	if ((t->n_headers_out == 1) &&
1727 	    (h0->ptr + h0->n_bytes == t->ptr)) {
1728 		TRACE("Emit handler: no header change or header decap.\n");
1729 
1730 		t->pkt.offset -= h0->n_bytes;
1731 		t->pkt.length += h0->n_bytes;
1732 
1733 		return;
1734 	}
1735 
1736 	/* Header encapsulation (optionally, with prior header decapsulation). */
1737 	if ((t->n_headers_out == 2) &&
1738 	    (h1->ptr + h1->n_bytes == t->ptr) &&
1739 	    (h0->ptr == h0->ptr0)) {
1740 		uint32_t offset;
1741 
1742 		TRACE("Emit handler: header encapsulation.\n");
1743 
1744 		offset = h0->n_bytes + h1->n_bytes;
1745 		memcpy(t->ptr - offset, h0->ptr, h0->n_bytes);
1746 		t->pkt.offset -= offset;
1747 		t->pkt.length += offset;
1748 
1749 		return;
1750 	}
1751 
1752 	/* For any other case. */
1753 	TRACE("Emit handler: complex case.\n");
1754 
1755 	for (i = 0; i < t->n_headers_out; i++) {
1756 		struct header_out_runtime *h = &t->headers_out[i];
1757 
1758 		memcpy(&t->header_out_storage[offset], h->ptr, h->n_bytes);
1759 		offset += h->n_bytes;
1760 	}
1761 
1762 	if (offset) {
1763 		memcpy(t->ptr - offset, t->header_out_storage, offset);
1764 		t->pkt.offset -= offset;
1765 		t->pkt.length += offset;
1766 	}
1767 }
1768 
1769 static inline void
1770 mirroring_handler(struct rte_swx_pipeline *p, struct thread *t, struct rte_swx_pkt *pkt)
1771 {
1772 	uint64_t slots_mask = t->mirroring_slots_mask, slot_mask;
1773 	uint32_t slot_id;
1774 
1775 	for (slot_id = 0, slot_mask = 1LLU ; slots_mask; slot_id++, slot_mask <<= 1)
1776 		if (slot_mask & slots_mask) {
1777 			struct port_out_runtime *port;
1778 			struct mirroring_session *session;
1779 			uint32_t port_id, session_id;
1780 
1781 			session_id = t->mirroring_slots[slot_id];
1782 			session = &p->mirroring_sessions[session_id];
1783 
1784 			port_id = session->port_id;
1785 			port = &p->out[port_id];
1786 
1787 			if (session->fast_clone)
1788 				port->pkt_fast_clone_tx(port->obj, pkt);
1789 			else
1790 				port->pkt_clone_tx(port->obj, pkt, session->truncation_length);
1791 
1792 			slots_mask &= ~slot_mask;
1793 		}
1794 }
1795 
1796 static inline void
1797 __instr_tx_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
1798 {
1799 	uint64_t port_id = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
1800 	struct port_out_runtime *port = &p->out[port_id];
1801 	struct rte_swx_pkt *pkt = &t->pkt;
1802 
1803 	/* Recirculation: keep the current packet. */
1804 	if (t->recirculate) {
1805 		TRACE("[Thread %2u]: tx 1 pkt - recirculate\n",
1806 		      p->thread_id);
1807 
1808 		/* Headers. */
1809 		emit_handler(t);
1810 
1811 		/* Packet. */
1812 		mirroring_handler(p, t, pkt);
1813 
1814 		return;
1815 	}
1816 
1817 	TRACE("[Thread %2u]: tx 1 pkt to port %u\n",
1818 	      p->thread_id,
1819 	      (uint32_t)port_id);
1820 
1821 	/* Headers. */
1822 	emit_handler(t);
1823 
1824 	/* Packet. */
1825 	mirroring_handler(p, t, pkt);
1826 	port->pkt_tx(port->obj, pkt);
1827 }
1828 
1829 static inline void
1830 __instr_tx_i_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
1831 {
1832 	uint64_t port_id = ip->io.io.val;
1833 	struct port_out_runtime *port = &p->out[port_id];
1834 	struct rte_swx_pkt *pkt = &t->pkt;
1835 
1836 	/* Recirculation: keep the current packet. */
1837 	if (t->recirculate) {
1838 		TRACE("[Thread %2u]: tx (i) 1 pkt - recirculate\n",
1839 		      p->thread_id);
1840 
1841 		/* Headers. */
1842 		emit_handler(t);
1843 
1844 		/* Packet. */
1845 		mirroring_handler(p, t, pkt);
1846 
1847 		return;
1848 	}
1849 
1850 	TRACE("[Thread %2u]: tx (i) 1 pkt to port %u\n",
1851 	      p->thread_id,
1852 	      (uint32_t)port_id);
1853 
1854 	/* Headers. */
1855 	emit_handler(t);
1856 
1857 	/* Packet. */
1858 	mirroring_handler(p, t, pkt);
1859 	port->pkt_tx(port->obj, pkt);
1860 }
1861 
1862 static inline void
1863 __instr_drop_exec(struct rte_swx_pipeline *p,
1864 		  struct thread *t,
1865 		  const struct instruction *ip __rte_unused)
1866 {
1867 	uint64_t port_id = p->n_ports_out - 1;
1868 	struct port_out_runtime *port = &p->out[port_id];
1869 	struct rte_swx_pkt *pkt = &t->pkt;
1870 
1871 	TRACE("[Thread %2u]: drop 1 pkt\n",
1872 	      p->thread_id);
1873 
1874 	/* Headers. */
1875 	emit_handler(t);
1876 
1877 	/* Packet. */
1878 	mirroring_handler(p, t, pkt);
1879 	port->pkt_tx(port->obj, pkt);
1880 }
1881 
1882 static inline void
1883 __instr_mirror_exec(struct rte_swx_pipeline *p,
1884 		    struct thread *t,
1885 		    const struct instruction *ip)
1886 {
1887 	uint64_t slot_id = instr_operand_hbo(t, &ip->mirror.dst);
1888 	uint64_t session_id = instr_operand_hbo(t, &ip->mirror.src);
1889 
1890 	slot_id &= p->n_mirroring_slots - 1;
1891 	session_id &= p->n_mirroring_sessions - 1;
1892 
1893 	TRACE("[Thread %2u]: mirror pkt (slot = %u, session = %u)\n",
1894 	      p->thread_id,
1895 	      (uint32_t)slot_id,
1896 	      (uint32_t)session_id);
1897 
1898 	t->mirroring_slots[slot_id] = session_id;
1899 	t->mirroring_slots_mask |= 1LLU << slot_id;
1900 }
1901 
1902 static inline void
1903 __instr_recirculate_exec(struct rte_swx_pipeline *p __rte_unused,
1904 			 struct thread *t,
1905 			 const struct instruction *ip __rte_unused)
1906 {
1907 	TRACE("[Thread %2u]: recirculate\n",
1908 	      p->thread_id);
1909 
1910 	t->recirculate = 1;
1911 }
1912 
1913 static inline void
1914 __instr_recircid_exec(struct rte_swx_pipeline *p __rte_unused,
1915 		      struct thread *t,
1916 		      const struct instruction *ip)
1917 {
1918 	TRACE("[Thread %2u]: recircid (pass %u)\n",
1919 	      p->thread_id,
1920 	      t->recirc_pass_id);
1921 
1922 	/* Meta-data. */
1923 	METADATA_WRITE(t, ip->io.io.offset, ip->io.io.n_bits, t->recirc_pass_id);
1924 }
1925 
1926 /*
1927  * extract.
1928  */
1929 static inline void
1930 __instr_hdr_extract_many_exec(struct rte_swx_pipeline *p __rte_unused,
1931 			      struct thread *t,
1932 			      const struct instruction *ip,
1933 			      uint32_t n_extract)
1934 {
1935 	uint64_t valid_headers = t->valid_headers;
1936 	uint8_t *ptr = t->ptr;
1937 	uint32_t offset = t->pkt.offset;
1938 	uint32_t length = t->pkt.length;
1939 	uint32_t i;
1940 
1941 	for (i = 0; i < n_extract; i++) {
1942 		uint32_t header_id = ip->io.hdr.header_id[i];
1943 		uint32_t struct_id = ip->io.hdr.struct_id[i];
1944 		uint32_t n_bytes = ip->io.hdr.n_bytes[i];
1945 
1946 		TRACE("[Thread %2u]: extract header %u (%u bytes)\n",
1947 		      p->thread_id,
1948 		      header_id,
1949 		      n_bytes);
1950 
1951 		/* Headers. */
1952 		t->structs[struct_id] = ptr;
1953 		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
1954 
1955 		/* Packet. */
1956 		offset += n_bytes;
1957 		length -= n_bytes;
1958 		ptr += n_bytes;
1959 	}
1960 
1961 	/* Headers. */
1962 	t->valid_headers = valid_headers;
1963 
1964 	/* Packet. */
1965 	t->pkt.offset = offset;
1966 	t->pkt.length = length;
1967 	t->ptr = ptr;
1968 }
1969 
1970 static inline void
1971 __instr_hdr_extract_exec(struct rte_swx_pipeline *p,
1972 			 struct thread *t,
1973 			 const struct instruction *ip)
1974 {
1975 	__instr_hdr_extract_many_exec(p, t, ip, 1);
1976 }
1977 
1978 static inline void
1979 __instr_hdr_extract2_exec(struct rte_swx_pipeline *p,
1980 			  struct thread *t,
1981 			  const struct instruction *ip)
1982 {
1983 	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
1984 
1985 	__instr_hdr_extract_many_exec(p, t, ip, 2);
1986 }
1987 
1988 static inline void
1989 __instr_hdr_extract3_exec(struct rte_swx_pipeline *p,
1990 			  struct thread *t,
1991 			  const struct instruction *ip)
1992 {
1993 	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
1994 
1995 	__instr_hdr_extract_many_exec(p, t, ip, 3);
1996 }
1997 
1998 static inline void
1999 __instr_hdr_extract4_exec(struct rte_swx_pipeline *p,
2000 			  struct thread *t,
2001 			  const struct instruction *ip)
2002 {
2003 	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
2004 
2005 	__instr_hdr_extract_many_exec(p, t, ip, 4);
2006 }
2007 
2008 static inline void
2009 __instr_hdr_extract5_exec(struct rte_swx_pipeline *p,
2010 			  struct thread *t,
2011 			  const struct instruction *ip)
2012 {
2013 	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
2014 
2015 	__instr_hdr_extract_many_exec(p, t, ip, 5);
2016 }
2017 
2018 static inline void
2019 __instr_hdr_extract6_exec(struct rte_swx_pipeline *p,
2020 			  struct thread *t,
2021 			  const struct instruction *ip)
2022 {
2023 	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
2024 
2025 	__instr_hdr_extract_many_exec(p, t, ip, 6);
2026 }
2027 
2028 static inline void
2029 __instr_hdr_extract7_exec(struct rte_swx_pipeline *p,
2030 			  struct thread *t,
2031 			  const struct instruction *ip)
2032 {
2033 	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
2034 
2035 	__instr_hdr_extract_many_exec(p, t, ip, 7);
2036 }
2037 
2038 static inline void
2039 __instr_hdr_extract8_exec(struct rte_swx_pipeline *p,
2040 			  struct thread *t,
2041 			  const struct instruction *ip)
2042 {
2043 	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
2044 
2045 	__instr_hdr_extract_many_exec(p, t, ip, 8);
2046 }
2047 
2048 static inline void
2049 __instr_hdr_extract_m_exec(struct rte_swx_pipeline *p __rte_unused,
2050 			   struct thread *t,
2051 			   const struct instruction *ip)
2052 {
2053 	uint64_t valid_headers = t->valid_headers;
2054 	uint8_t *ptr = t->ptr;
2055 	uint32_t offset = t->pkt.offset;
2056 	uint32_t length = t->pkt.length;
2057 
2058 	uint32_t n_bytes_last = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
2059 	uint32_t header_id = ip->io.hdr.header_id[0];
2060 	uint32_t struct_id = ip->io.hdr.struct_id[0];
2061 	uint32_t n_bytes = ip->io.hdr.n_bytes[0];
2062 
2063 	struct header_runtime *h = &t->headers[header_id];
2064 
2065 	TRACE("[Thread %2u]: extract header %u (%u + %u bytes)\n",
2066 	      p->thread_id,
2067 	      header_id,
2068 	      n_bytes,
2069 	      n_bytes_last);
2070 
2071 	n_bytes += n_bytes_last;
2072 
2073 	/* Headers. */
2074 	t->structs[struct_id] = ptr;
2075 	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
2076 	h->n_bytes = n_bytes;
2077 
2078 	/* Packet. */
2079 	t->pkt.offset = offset + n_bytes;
2080 	t->pkt.length = length - n_bytes;
2081 	t->ptr = ptr + n_bytes;
2082 }
2083 
2084 static inline void
2085 __instr_hdr_lookahead_exec(struct rte_swx_pipeline *p __rte_unused,
2086 			   struct thread *t,
2087 			   const struct instruction *ip)
2088 {
2089 	uint64_t valid_headers = t->valid_headers;
2090 	uint8_t *ptr = t->ptr;
2091 
2092 	uint32_t header_id = ip->io.hdr.header_id[0];
2093 	uint32_t struct_id = ip->io.hdr.struct_id[0];
2094 
2095 	TRACE("[Thread %2u]: lookahead header %u\n",
2096 	      p->thread_id,
2097 	      header_id);
2098 
2099 	/* Headers. */
2100 	t->structs[struct_id] = ptr;
2101 	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
2102 }
2103 
2104 /*
2105  * emit.
2106  */
2107 static inline void
2108 __instr_hdr_emit_many_exec(struct rte_swx_pipeline *p __rte_unused,
2109 			   struct thread *t,
2110 			   const struct instruction *ip,
2111 			   uint32_t n_emit)
2112 {
2113 	uint64_t valid_headers = t->valid_headers;
2114 	uint32_t n_headers_out = t->n_headers_out;
2115 	struct header_out_runtime *ho = NULL;
2116 	uint8_t *ho_ptr = NULL;
2117 	uint32_t ho_nbytes = 0, i;
2118 
2119 	for (i = 0; i < n_emit; i++) {
2120 		uint32_t header_id = ip->io.hdr.header_id[i];
2121 		uint32_t struct_id = ip->io.hdr.struct_id[i];
2122 
2123 		struct header_runtime *hi = &t->headers[header_id];
2124 		uint8_t *hi_ptr0 = hi->ptr0;
2125 		uint32_t n_bytes = hi->n_bytes;
2126 
2127 		uint8_t *hi_ptr = t->structs[struct_id];
2128 
2129 		if (!MASK64_BIT_GET(valid_headers, header_id)) {
2130 			TRACE("[Thread %2u]: emit header %u (invalid)\n",
2131 			      p->thread_id,
2132 			      header_id);
2133 
2134 			continue;
2135 		}
2136 
2137 		TRACE("[Thread %2u]: emit header %u (valid)\n",
2138 		      p->thread_id,
2139 		      header_id);
2140 
2141 		/* Headers. */
2142 		if (!ho) {
2143 			if (!n_headers_out) {
2144 				ho = &t->headers_out[0];
2145 
2146 				ho->ptr0 = hi_ptr0;
2147 				ho->ptr = hi_ptr;
2148 
2149 				ho_ptr = hi_ptr;
2150 				ho_nbytes = n_bytes;
2151 
2152 				n_headers_out = 1;
2153 
2154 				continue;
2155 			} else {
2156 				ho = &t->headers_out[n_headers_out - 1];
2157 
2158 				ho_ptr = ho->ptr;
2159 				ho_nbytes = ho->n_bytes;
2160 			}
2161 		}
2162 
2163 		if (ho_ptr + ho_nbytes == hi_ptr) {
2164 			ho_nbytes += n_bytes;
2165 		} else {
2166 			ho->n_bytes = ho_nbytes;
2167 
2168 			ho++;
2169 			ho->ptr0 = hi_ptr0;
2170 			ho->ptr = hi_ptr;
2171 
2172 			ho_ptr = hi_ptr;
2173 			ho_nbytes = n_bytes;
2174 
2175 			n_headers_out++;
2176 		}
2177 	}
2178 
2179 	if (ho)
2180 		ho->n_bytes = ho_nbytes;
2181 	t->n_headers_out = n_headers_out;
2182 }
2183 
2184 static inline void
2185 __instr_hdr_emit_exec(struct rte_swx_pipeline *p,
2186 		      struct thread *t,
2187 		      const struct instruction *ip)
2188 {
2189 	__instr_hdr_emit_many_exec(p, t, ip, 1);
2190 }
2191 
2192 static inline void
2193 __instr_hdr_emit_tx_exec(struct rte_swx_pipeline *p,
2194 			 struct thread *t,
2195 			 const struct instruction *ip)
2196 {
2197 	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
2198 
2199 	__instr_hdr_emit_many_exec(p, t, ip, 1);
2200 	__instr_tx_exec(p, t, ip);
2201 }
2202 
2203 static inline void
2204 __instr_hdr_emit2_tx_exec(struct rte_swx_pipeline *p,
2205 			  struct thread *t,
2206 			  const struct instruction *ip)
2207 {
2208 	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
2209 
2210 	__instr_hdr_emit_many_exec(p, t, ip, 2);
2211 	__instr_tx_exec(p, t, ip);
2212 }
2213 
2214 static inline void
2215 __instr_hdr_emit3_tx_exec(struct rte_swx_pipeline *p,
2216 			  struct thread *t,
2217 			  const struct instruction *ip)
2218 {
2219 	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
2220 
2221 	__instr_hdr_emit_many_exec(p, t, ip, 3);
2222 	__instr_tx_exec(p, t, ip);
2223 }
2224 
2225 static inline void
2226 __instr_hdr_emit4_tx_exec(struct rte_swx_pipeline *p,
2227 			  struct thread *t,
2228 			  const struct instruction *ip)
2229 {
2230 	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
2231 
2232 	__instr_hdr_emit_many_exec(p, t, ip, 4);
2233 	__instr_tx_exec(p, t, ip);
2234 }
2235 
2236 static inline void
2237 __instr_hdr_emit5_tx_exec(struct rte_swx_pipeline *p,
2238 			  struct thread *t,
2239 			  const struct instruction *ip)
2240 {
2241 	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
2242 
2243 	__instr_hdr_emit_many_exec(p, t, ip, 5);
2244 	__instr_tx_exec(p, t, ip);
2245 }
2246 
2247 static inline void
2248 __instr_hdr_emit6_tx_exec(struct rte_swx_pipeline *p,
2249 			  struct thread *t,
2250 			  const struct instruction *ip)
2251 {
2252 	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
2253 
2254 	__instr_hdr_emit_many_exec(p, t, ip, 6);
2255 	__instr_tx_exec(p, t, ip);
2256 }
2257 
2258 static inline void
2259 __instr_hdr_emit7_tx_exec(struct rte_swx_pipeline *p,
2260 			  struct thread *t,
2261 			  const struct instruction *ip)
2262 {
2263 	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
2264 
2265 	__instr_hdr_emit_many_exec(p, t, ip, 7);
2266 	__instr_tx_exec(p, t, ip);
2267 }
2268 
2269 static inline void
2270 __instr_hdr_emit8_tx_exec(struct rte_swx_pipeline *p,
2271 			  struct thread *t,
2272 			  const struct instruction *ip)
2273 {
2274 	TRACE("[Thread %2u] *** The next 9 instructions are fused. ***\n", p->thread_id);
2275 
2276 	__instr_hdr_emit_many_exec(p, t, ip, 8);
2277 	__instr_tx_exec(p, t, ip);
2278 }
2279 
2280 /*
2281  * validate.
2282  */
2283 static inline void
2284 __instr_hdr_validate_exec(struct rte_swx_pipeline *p __rte_unused,
2285 			  struct thread *t,
2286 			  const struct instruction *ip)
2287 {
2288 	uint32_t header_id = ip->valid.header_id;
2289 	uint32_t struct_id = ip->valid.struct_id;
2290 	uint64_t valid_headers = t->valid_headers;
2291 	struct header_runtime *h = &t->headers[header_id];
2292 
2293 	TRACE("[Thread %2u] validate header %u\n", p->thread_id, header_id);
2294 
2295 	/* If this header is already valid, then its associated t->structs[] element is also valid
2296 	 * and therefore it should not be modified. It could point to the packet buffer (in case of
2297 	 * extracted header) and setting it to the default location (h->ptr0) would be incorrect.
2298 	 */
2299 	if (MASK64_BIT_GET(valid_headers, header_id))
2300 		return;
2301 
2302 	/* Headers. */
2303 	t->structs[struct_id] = h->ptr0;
2304 	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
2305 }
2306 
2307 /*
2308  * invalidate.
2309  */
2310 static inline void
2311 __instr_hdr_invalidate_exec(struct rte_swx_pipeline *p __rte_unused,
2312 			    struct thread *t,
2313 			    const struct instruction *ip)
2314 {
2315 	uint32_t header_id = ip->valid.header_id;
2316 
2317 	TRACE("[Thread %2u] invalidate header %u\n", p->thread_id, header_id);
2318 
2319 	/* Headers. */
2320 	t->valid_headers = MASK64_BIT_CLR(t->valid_headers, header_id);
2321 }
2322 
2323 /*
2324  * learn.
2325  */
2326 static inline void
2327 __instr_learn_exec(struct rte_swx_pipeline *p,
2328 		   struct thread *t,
2329 		   const struct instruction *ip)
2330 {
2331 	uint64_t action_id = ip->learn.action_id;
2332 	uint32_t mf_first_arg_offset = ip->learn.mf_first_arg_offset;
2333 	uint32_t timeout_id = METADATA_READ(t, ip->learn.mf_timeout_id_offset,
2334 		ip->learn.mf_timeout_id_n_bits);
2335 	uint32_t learner_id = t->learner_id;
2336 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2337 		p->n_selectors + learner_id];
2338 	struct learner_runtime *l = &t->learners[learner_id];
2339 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2340 	uint32_t status;
2341 
2342 	/* Table. */
2343 	status = rte_swx_table_learner_add(ts->obj,
2344 					   l->mailbox,
2345 					   t->time,
2346 					   action_id,
2347 					   &t->metadata[mf_first_arg_offset],
2348 					   timeout_id);
2349 
2350 	TRACE("[Thread %2u] learner %u learn %s\n",
2351 	      p->thread_id,
2352 	      learner_id,
2353 	      status ? "ok" : "error");
2354 
2355 	stats->n_pkts_learn[status] += 1;
2356 }
2357 
2358 /*
2359  * rearm.
2360  */
2361 static inline void
2362 __instr_rearm_exec(struct rte_swx_pipeline *p,
2363 		   struct thread *t,
2364 		   const struct instruction *ip __rte_unused)
2365 {
2366 	uint32_t learner_id = t->learner_id;
2367 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2368 		p->n_selectors + learner_id];
2369 	struct learner_runtime *l = &t->learners[learner_id];
2370 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2371 
2372 	/* Table. */
2373 	rte_swx_table_learner_rearm(ts->obj, l->mailbox, t->time);
2374 
2375 	TRACE("[Thread %2u] learner %u rearm\n",
2376 	      p->thread_id,
2377 	      learner_id);
2378 
2379 	stats->n_pkts_rearm += 1;
2380 }
2381 
2382 static inline void
2383 __instr_rearm_new_exec(struct rte_swx_pipeline *p,
2384 		       struct thread *t,
2385 		       const struct instruction *ip)
2386 {
2387 	uint32_t timeout_id = METADATA_READ(t, ip->learn.mf_timeout_id_offset,
2388 		ip->learn.mf_timeout_id_n_bits);
2389 	uint32_t learner_id = t->learner_id;
2390 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2391 		p->n_selectors + learner_id];
2392 	struct learner_runtime *l = &t->learners[learner_id];
2393 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2394 
2395 	/* Table. */
2396 	rte_swx_table_learner_rearm_new(ts->obj, l->mailbox, t->time, timeout_id);
2397 
2398 	TRACE("[Thread %2u] learner %u rearm with timeout ID %u\n",
2399 	      p->thread_id,
2400 	      learner_id,
2401 	      timeout_id);
2402 
2403 	stats->n_pkts_rearm += 1;
2404 }
2405 
2406 /*
2407  * forget.
2408  */
2409 static inline void
2410 __instr_forget_exec(struct rte_swx_pipeline *p,
2411 		    struct thread *t,
2412 		    const struct instruction *ip __rte_unused)
2413 {
2414 	uint32_t learner_id = t->learner_id;
2415 	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
2416 		p->n_selectors + learner_id];
2417 	struct learner_runtime *l = &t->learners[learner_id];
2418 	struct learner_statistics *stats = &p->learner_stats[learner_id];
2419 
2420 	/* Table. */
2421 	rte_swx_table_learner_delete(ts->obj, l->mailbox);
2422 
2423 	TRACE("[Thread %2u] learner %u forget\n",
2424 	      p->thread_id,
2425 	      learner_id);
2426 
2427 	stats->n_pkts_forget += 1;
2428 }
2429 
2430 /*
2431  * entryid.
2432  */
2433 static inline void
2434 __instr_entryid_exec(struct rte_swx_pipeline *p __rte_unused,
2435 		       struct thread *t,
2436 		       const struct instruction *ip)
2437 {
2438 	TRACE("[Thread %2u]: entryid\n",
2439 	      p->thread_id);
2440 
2441 	/* Meta-data. */
2442 	METADATA_WRITE(t, ip->mov.dst.offset, ip->mov.dst.n_bits, t->entry_id);
2443 }
2444 
2445 /*
2446  * extern.
2447  */
2448 static inline uint32_t
2449 __instr_extern_obj_exec(struct rte_swx_pipeline *p __rte_unused,
2450 			struct thread *t,
2451 			const struct instruction *ip)
2452 {
2453 	uint32_t obj_id = ip->ext_obj.ext_obj_id;
2454 	uint32_t func_id = ip->ext_obj.func_id;
2455 	struct extern_obj_runtime *obj = &t->extern_objs[obj_id];
2456 	rte_swx_extern_type_member_func_t func = obj->funcs[func_id];
2457 	uint32_t done;
2458 
2459 	TRACE("[Thread %2u] extern obj %u member func %u\n",
2460 	      p->thread_id,
2461 	      obj_id,
2462 	      func_id);
2463 
2464 	done = func(obj->obj, obj->mailbox);
2465 
2466 	return done;
2467 }
2468 
2469 static inline uint32_t
2470 __instr_extern_func_exec(struct rte_swx_pipeline *p __rte_unused,
2471 			 struct thread *t,
2472 			 const struct instruction *ip)
2473 {
2474 	uint32_t ext_func_id = ip->ext_func.ext_func_id;
2475 	struct extern_func_runtime *ext_func = &t->extern_funcs[ext_func_id];
2476 	rte_swx_extern_func_t func = ext_func->func;
2477 	uint32_t done;
2478 
2479 	TRACE("[Thread %2u] extern func %u\n",
2480 	      p->thread_id,
2481 	      ext_func_id);
2482 
2483 	done = func(ext_func->mailbox);
2484 
2485 	return done;
2486 }
2487 
2488 /*
2489  * hash.
2490  */
2491 static inline void
2492 __instr_hash_func_exec(struct rte_swx_pipeline *p,
2493 		       struct thread *t,
2494 		       const struct instruction *ip)
2495 {
2496 	uint32_t hash_func_id = ip->hash_func.hash_func_id;
2497 	uint32_t dst_offset = ip->hash_func.dst.offset;
2498 	uint32_t n_dst_bits = ip->hash_func.dst.n_bits;
2499 	uint32_t src_struct_id = ip->hash_func.src.struct_id;
2500 	uint32_t src_offset = ip->hash_func.src.offset;
2501 	uint32_t n_src_bytes = ip->hash_func.src.n_bytes;
2502 
2503 	struct hash_func_runtime *func = &p->hash_func_runtime[hash_func_id];
2504 	uint8_t *src_ptr = t->structs[src_struct_id];
2505 	uint32_t result;
2506 
2507 	TRACE("[Thread %2u] hash %u\n",
2508 	      p->thread_id,
2509 	      hash_func_id);
2510 
2511 	result = func->func(&src_ptr[src_offset], n_src_bytes, 0);
2512 	METADATA_WRITE(t, dst_offset, n_dst_bits, result);
2513 }
2514 
2515 /*
2516  * rss.
2517  */
2518 static inline uint32_t
2519 rss_func(void *rss_key, uint32_t rss_key_size, void *input_data, uint32_t input_data_size)
2520 {
2521 	uint32_t *key = (uint32_t *)rss_key;
2522 	uint32_t *data = (uint32_t *)input_data;
2523 	uint32_t key_size = rss_key_size >> 2;
2524 	uint32_t data_size = input_data_size >> 2;
2525 	uint32_t hash_val = 0, i;
2526 
2527 	for (i = 0; i < data_size; i++) {
2528 		uint32_t d;
2529 
2530 		for (d = data[i]; d; d &= (d - 1)) {
2531 			uint32_t key0, key1, pos;
2532 
2533 			pos = rte_bsf32(d);
2534 			key0 = key[i % key_size] << (31 - pos);
2535 			key1 = key[(i + 1) % key_size] >> (pos + 1);
2536 			hash_val ^= key0 | key1;
2537 		}
2538 	}
2539 
2540 	return hash_val;
2541 }
2542 
2543 static inline void
2544 __instr_rss_exec(struct rte_swx_pipeline *p,
2545 		 struct thread *t,
2546 		 const struct instruction *ip)
2547 {
2548 	uint32_t rss_obj_id = ip->rss.rss_obj_id;
2549 	uint32_t dst_offset = ip->rss.dst.offset;
2550 	uint32_t n_dst_bits = ip->rss.dst.n_bits;
2551 	uint32_t src_struct_id = ip->rss.src.struct_id;
2552 	uint32_t src_offset = ip->rss.src.offset;
2553 	uint32_t n_src_bytes = ip->rss.src.n_bytes;
2554 
2555 	struct rss_runtime *r = p->rss_runtime[rss_obj_id];
2556 	uint8_t *src_ptr = t->structs[src_struct_id];
2557 	uint32_t result;
2558 
2559 	TRACE("[Thread %2u] rss %u\n",
2560 	      p->thread_id,
2561 	      rss_obj_id);
2562 
2563 	result = rss_func(r->key, r->key_size, &src_ptr[src_offset], n_src_bytes);
2564 	METADATA_WRITE(t, dst_offset, n_dst_bits, result);
2565 }
2566 
2567 /*
2568  * mov.
2569  */
2570 static inline void
2571 __instr_mov_exec(struct rte_swx_pipeline *p __rte_unused,
2572 		 struct thread *t,
2573 		 const struct instruction *ip)
2574 {
2575 	TRACE("[Thread %2u] mov\n", p->thread_id);
2576 
2577 	MOV(t, ip);
2578 }
2579 
2580 static inline void
2581 __instr_mov_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2582 		    struct thread *t,
2583 		    const struct instruction *ip)
2584 {
2585 	TRACE("[Thread %2u] mov (mh)\n", p->thread_id);
2586 
2587 	MOV_MH(t, ip);
2588 }
2589 
2590 static inline void
2591 __instr_mov_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2592 		    struct thread *t,
2593 		    const struct instruction *ip)
2594 {
2595 	TRACE("[Thread %2u] mov (hm)\n", p->thread_id);
2596 
2597 	MOV_HM(t, ip);
2598 }
2599 
2600 static inline void
2601 __instr_mov_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2602 		    struct thread *t,
2603 		    const struct instruction *ip)
2604 {
2605 	TRACE("[Thread %2u] mov (hh)\n", p->thread_id);
2606 
2607 	MOV_HH(t, ip);
2608 }
2609 
2610 static inline void
2611 __instr_mov_dma_exec(struct rte_swx_pipeline *p __rte_unused,
2612 		     struct thread *t,
2613 		     const struct instruction *ip)
2614 {
2615 	uint8_t *dst = t->structs[ip->mov.dst.struct_id] + ip->mov.dst.offset;
2616 	uint8_t *src = t->structs[ip->mov.src.struct_id] + ip->mov.src.offset;
2617 
2618 	uint32_t n_dst = ip->mov.dst.n_bits >> 3;
2619 	uint32_t n_src = ip->mov.src.n_bits >> 3;
2620 
2621 	TRACE("[Thread %2u] mov (dma) %u bytes\n", p->thread_id, n);
2622 
2623 	/* Both dst and src are in NBO format. */
2624 	if (n_dst > n_src) {
2625 		uint32_t n_dst_zero = n_dst - n_src;
2626 
2627 		/* Zero padding the most significant bytes in dst. */
2628 		memset(dst, 0, n_dst_zero);
2629 		dst += n_dst_zero;
2630 
2631 		/* Copy src to dst. */
2632 		memcpy(dst, src, n_src);
2633 	} else {
2634 		uint32_t n_src_skipped = n_src - n_dst;
2635 
2636 		/* Copy src to dst. */
2637 		src += n_src_skipped;
2638 		memcpy(dst, src, n_dst);
2639 	}
2640 }
2641 
2642 static inline void
2643 __instr_mov_128_exec(struct rte_swx_pipeline *p __rte_unused,
2644 		     struct thread *t,
2645 		     const struct instruction *ip)
2646 {
2647 	uint8_t *dst_struct = t->structs[ip->mov.dst.struct_id];
2648 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->mov.dst.offset];
2649 
2650 	uint8_t *src_struct = t->structs[ip->mov.src.struct_id];
2651 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->mov.src.offset];
2652 
2653 	TRACE("[Thread %2u] mov (128)\n", p->thread_id);
2654 
2655 	dst64_ptr[0] = src64_ptr[0];
2656 	dst64_ptr[1] = src64_ptr[1];
2657 }
2658 
2659 static inline void
2660 __instr_mov_128_32_exec(struct rte_swx_pipeline *p __rte_unused,
2661 			struct thread *t,
2662 			const struct instruction *ip)
2663 {
2664 	uint8_t *dst = t->structs[ip->mov.dst.struct_id] + ip->mov.dst.offset;
2665 	uint8_t *src = t->structs[ip->mov.src.struct_id] + ip->mov.src.offset;
2666 
2667 	uint32_t *dst32 = (uint32_t *)dst;
2668 	uint32_t *src32 = (uint32_t *)src;
2669 
2670 	TRACE("[Thread %2u] mov (128 <- 32)\n", p->thread_id);
2671 
2672 	dst32[0] = 0;
2673 	dst32[1] = 0;
2674 	dst32[2] = 0;
2675 	dst32[3] = src32[0];
2676 }
2677 
2678 static inline void
2679 __instr_mov_i_exec(struct rte_swx_pipeline *p __rte_unused,
2680 		   struct thread *t,
2681 		   const struct instruction *ip)
2682 {
2683 	TRACE("[Thread %2u] mov m.f %" PRIx64 "\n", p->thread_id, ip->mov.src_val);
2684 
2685 	MOV_I(t, ip);
2686 }
2687 
2688 /*
2689  * dma.
2690  */
2691 static inline void
2692 __instr_dma_ht_many_exec(struct rte_swx_pipeline *p __rte_unused,
2693 			 struct thread *t,
2694 			 const struct instruction *ip,
2695 			 uint32_t n_dma)
2696 {
2697 	uint8_t *action_data = t->structs[0];
2698 	uint64_t valid_headers = t->valid_headers;
2699 	uint32_t i;
2700 
2701 	for (i = 0; i < n_dma; i++) {
2702 		uint32_t header_id = ip->dma.dst.header_id[i];
2703 		uint32_t struct_id = ip->dma.dst.struct_id[i];
2704 		uint32_t offset = ip->dma.src.offset[i];
2705 		uint32_t n_bytes = ip->dma.n_bytes[i];
2706 
2707 		struct header_runtime *h = &t->headers[header_id];
2708 		uint8_t *h_ptr0 = h->ptr0;
2709 		uint8_t *h_ptr = t->structs[struct_id];
2710 
2711 		void *dst = MASK64_BIT_GET(valid_headers, header_id) ?
2712 			h_ptr : h_ptr0;
2713 		void *src = &action_data[offset];
2714 
2715 		TRACE("[Thread %2u] dma h.s t.f\n", p->thread_id);
2716 
2717 		/* Headers. */
2718 		memcpy(dst, src, n_bytes);
2719 		t->structs[struct_id] = dst;
2720 		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
2721 	}
2722 
2723 	t->valid_headers = valid_headers;
2724 }
2725 
2726 static inline void
2727 __instr_dma_ht_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2728 {
2729 	__instr_dma_ht_many_exec(p, t, ip, 1);
2730 }
2731 
2732 static inline void
2733 __instr_dma_ht2_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2734 {
2735 	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
2736 
2737 	__instr_dma_ht_many_exec(p, t, ip, 2);
2738 }
2739 
2740 static inline void
2741 __instr_dma_ht3_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2742 {
2743 	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
2744 
2745 	__instr_dma_ht_many_exec(p, t, ip, 3);
2746 }
2747 
2748 static inline void
2749 __instr_dma_ht4_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2750 {
2751 	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
2752 
2753 	__instr_dma_ht_many_exec(p, t, ip, 4);
2754 }
2755 
2756 static inline void
2757 __instr_dma_ht5_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2758 {
2759 	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
2760 
2761 	__instr_dma_ht_many_exec(p, t, ip, 5);
2762 }
2763 
2764 static inline void
2765 __instr_dma_ht6_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2766 {
2767 	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
2768 
2769 	__instr_dma_ht_many_exec(p, t, ip, 6);
2770 }
2771 
2772 static inline void
2773 __instr_dma_ht7_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2774 {
2775 	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
2776 
2777 	__instr_dma_ht_many_exec(p, t, ip, 7);
2778 }
2779 
2780 static inline void
2781 __instr_dma_ht8_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
2782 {
2783 	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
2784 
2785 	__instr_dma_ht_many_exec(p, t, ip, 8);
2786 }
2787 
2788 /*
2789  * alu.
2790  */
2791 static inline void
2792 __instr_alu_add_exec(struct rte_swx_pipeline *p __rte_unused,
2793 		     struct thread *t,
2794 		     const struct instruction *ip)
2795 {
2796 	TRACE("[Thread %2u] add\n", p->thread_id);
2797 
2798 	ALU(t, ip, +);
2799 }
2800 
2801 static inline void
2802 __instr_alu_add_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2803 			struct thread *t,
2804 			const struct instruction *ip)
2805 {
2806 	TRACE("[Thread %2u] add (mh)\n", p->thread_id);
2807 
2808 	ALU_MH(t, ip, +);
2809 }
2810 
2811 static inline void
2812 __instr_alu_add_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2813 			struct thread *t,
2814 			const struct instruction *ip)
2815 {
2816 	TRACE("[Thread %2u] add (hm)\n", p->thread_id);
2817 
2818 	ALU_HM(t, ip, +);
2819 }
2820 
2821 static inline void
2822 __instr_alu_add_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2823 			struct thread *t,
2824 			const struct instruction *ip)
2825 {
2826 	TRACE("[Thread %2u] add (hh)\n", p->thread_id);
2827 
2828 	ALU_HH(t, ip, +);
2829 }
2830 
2831 static inline void
2832 __instr_alu_add_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2833 			struct thread *t,
2834 			const struct instruction *ip)
2835 {
2836 	TRACE("[Thread %2u] add (mi)\n", p->thread_id);
2837 
2838 	ALU_MI(t, ip, +);
2839 }
2840 
2841 static inline void
2842 __instr_alu_add_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2843 			struct thread *t,
2844 			const struct instruction *ip)
2845 {
2846 	TRACE("[Thread %2u] add (hi)\n", p->thread_id);
2847 
2848 	ALU_HI(t, ip, +);
2849 }
2850 
2851 static inline void
2852 __instr_alu_sub_exec(struct rte_swx_pipeline *p __rte_unused,
2853 		     struct thread *t,
2854 		     const struct instruction *ip)
2855 {
2856 	TRACE("[Thread %2u] sub\n", p->thread_id);
2857 
2858 	ALU(t, ip, -);
2859 }
2860 
2861 static inline void
2862 __instr_alu_sub_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2863 			struct thread *t,
2864 			const struct instruction *ip)
2865 {
2866 	TRACE("[Thread %2u] sub (mh)\n", p->thread_id);
2867 
2868 	ALU_MH(t, ip, -);
2869 }
2870 
2871 static inline void
2872 __instr_alu_sub_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2873 			struct thread *t,
2874 			const struct instruction *ip)
2875 {
2876 	TRACE("[Thread %2u] sub (hm)\n", p->thread_id);
2877 
2878 	ALU_HM(t, ip, -);
2879 }
2880 
2881 static inline void
2882 __instr_alu_sub_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2883 			struct thread *t,
2884 			const struct instruction *ip)
2885 {
2886 	TRACE("[Thread %2u] sub (hh)\n", p->thread_id);
2887 
2888 	ALU_HH(t, ip, -);
2889 }
2890 
2891 static inline void
2892 __instr_alu_sub_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2893 			struct thread *t,
2894 			const struct instruction *ip)
2895 {
2896 	TRACE("[Thread %2u] sub (mi)\n", p->thread_id);
2897 
2898 	ALU_MI(t, ip, -);
2899 }
2900 
2901 static inline void
2902 __instr_alu_sub_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2903 			struct thread *t,
2904 			const struct instruction *ip)
2905 {
2906 	TRACE("[Thread %2u] sub (hi)\n", p->thread_id);
2907 
2908 	ALU_HI(t, ip, -);
2909 }
2910 
2911 static inline void
2912 __instr_alu_shl_exec(struct rte_swx_pipeline *p __rte_unused,
2913 		     struct thread *t,
2914 		     const struct instruction *ip)
2915 {
2916 	TRACE("[Thread %2u] shl\n", p->thread_id);
2917 
2918 	ALU(t, ip, <<);
2919 }
2920 
2921 static inline void
2922 __instr_alu_shl_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2923 			struct thread *t,
2924 			const struct instruction *ip)
2925 {
2926 	TRACE("[Thread %2u] shl (mh)\n", p->thread_id);
2927 
2928 	ALU_MH(t, ip, <<);
2929 }
2930 
2931 static inline void
2932 __instr_alu_shl_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2933 			struct thread *t,
2934 			const struct instruction *ip)
2935 {
2936 	TRACE("[Thread %2u] shl (hm)\n", p->thread_id);
2937 
2938 	ALU_HM(t, ip, <<);
2939 }
2940 
2941 static inline void
2942 __instr_alu_shl_hh_exec(struct rte_swx_pipeline *p __rte_unused,
2943 			struct thread *t,
2944 			const struct instruction *ip)
2945 {
2946 	TRACE("[Thread %2u] shl (hh)\n", p->thread_id);
2947 
2948 	ALU_HH(t, ip, <<);
2949 }
2950 
2951 static inline void
2952 __instr_alu_shl_mi_exec(struct rte_swx_pipeline *p __rte_unused,
2953 			struct thread *t,
2954 			const struct instruction *ip)
2955 {
2956 	TRACE("[Thread %2u] shl (mi)\n", p->thread_id);
2957 
2958 	ALU_MI(t, ip, <<);
2959 }
2960 
2961 static inline void
2962 __instr_alu_shl_hi_exec(struct rte_swx_pipeline *p __rte_unused,
2963 			struct thread *t,
2964 			const struct instruction *ip)
2965 {
2966 	TRACE("[Thread %2u] shl (hi)\n", p->thread_id);
2967 
2968 	ALU_HI(t, ip, <<);
2969 }
2970 
2971 static inline void
2972 __instr_alu_shr_exec(struct rte_swx_pipeline *p __rte_unused,
2973 		     struct thread *t,
2974 		     const struct instruction *ip)
2975 {
2976 	TRACE("[Thread %2u] shr\n", p->thread_id);
2977 
2978 	ALU(t, ip, >>);
2979 }
2980 
2981 static inline void
2982 __instr_alu_shr_mh_exec(struct rte_swx_pipeline *p __rte_unused,
2983 			struct thread *t,
2984 			const struct instruction *ip)
2985 {
2986 	TRACE("[Thread %2u] shr (mh)\n", p->thread_id);
2987 
2988 	ALU_MH(t, ip, >>);
2989 }
2990 
2991 static inline void
2992 __instr_alu_shr_hm_exec(struct rte_swx_pipeline *p __rte_unused,
2993 			struct thread *t,
2994 			const struct instruction *ip)
2995 {
2996 	TRACE("[Thread %2u] shr (hm)\n", p->thread_id);
2997 
2998 	ALU_HM(t, ip, >>);
2999 }
3000 
3001 static inline void
3002 __instr_alu_shr_hh_exec(struct rte_swx_pipeline *p __rte_unused,
3003 			struct thread *t,
3004 			const struct instruction *ip)
3005 {
3006 	TRACE("[Thread %2u] shr (hh)\n", p->thread_id);
3007 
3008 	ALU_HH(t, ip, >>);
3009 }
3010 
3011 static inline void
3012 __instr_alu_shr_mi_exec(struct rte_swx_pipeline *p __rte_unused,
3013 			struct thread *t,
3014 			const struct instruction *ip)
3015 {
3016 	TRACE("[Thread %2u] shr (mi)\n", p->thread_id);
3017 
3018 	/* Structs. */
3019 	ALU_MI(t, ip, >>);
3020 }
3021 
3022 static inline void
3023 __instr_alu_shr_hi_exec(struct rte_swx_pipeline *p __rte_unused,
3024 			struct thread *t,
3025 			const struct instruction *ip)
3026 {
3027 	TRACE("[Thread %2u] shr (hi)\n", p->thread_id);
3028 
3029 	ALU_HI(t, ip, >>);
3030 }
3031 
3032 static inline void
3033 __instr_alu_and_exec(struct rte_swx_pipeline *p __rte_unused,
3034 		     struct thread *t,
3035 		     const struct instruction *ip)
3036 {
3037 	TRACE("[Thread %2u] and\n", p->thread_id);
3038 
3039 	ALU(t, ip, &);
3040 }
3041 
3042 static inline void
3043 __instr_alu_and_mh_exec(struct rte_swx_pipeline *p __rte_unused,
3044 			struct thread *t,
3045 			const struct instruction *ip)
3046 {
3047 	TRACE("[Thread %2u] and (mh)\n", p->thread_id);
3048 
3049 	ALU_MH(t, ip, &);
3050 }
3051 
3052 static inline void
3053 __instr_alu_and_hm_exec(struct rte_swx_pipeline *p __rte_unused,
3054 			struct thread *t,
3055 			const struct instruction *ip)
3056 {
3057 	TRACE("[Thread %2u] and (hm)\n", p->thread_id);
3058 
3059 	ALU_HM_FAST(t, ip, &);
3060 }
3061 
3062 static inline void
3063 __instr_alu_and_hh_exec(struct rte_swx_pipeline *p __rte_unused,
3064 			struct thread *t,
3065 			const struct instruction *ip)
3066 {
3067 	TRACE("[Thread %2u] and (hh)\n", p->thread_id);
3068 
3069 	ALU_HH_FAST(t, ip, &);
3070 }
3071 
3072 static inline void
3073 __instr_alu_and_i_exec(struct rte_swx_pipeline *p __rte_unused,
3074 		       struct thread *t,
3075 		       const struct instruction *ip)
3076 {
3077 	TRACE("[Thread %2u] and (i)\n", p->thread_id);
3078 
3079 	ALU_I(t, ip, &);
3080 }
3081 
3082 static inline void
3083 __instr_alu_or_exec(struct rte_swx_pipeline *p __rte_unused,
3084 		    struct thread *t,
3085 		    const struct instruction *ip)
3086 {
3087 	TRACE("[Thread %2u] or\n", p->thread_id);
3088 
3089 	ALU(t, ip, |);
3090 }
3091 
3092 static inline void
3093 __instr_alu_or_mh_exec(struct rte_swx_pipeline *p __rte_unused,
3094 		       struct thread *t,
3095 		       const struct instruction *ip)
3096 {
3097 	TRACE("[Thread %2u] or (mh)\n", p->thread_id);
3098 
3099 	ALU_MH(t, ip, |);
3100 }
3101 
3102 static inline void
3103 __instr_alu_or_hm_exec(struct rte_swx_pipeline *p __rte_unused,
3104 		       struct thread *t,
3105 		       const struct instruction *ip)
3106 {
3107 	TRACE("[Thread %2u] or (hm)\n", p->thread_id);
3108 
3109 	ALU_HM_FAST(t, ip, |);
3110 }
3111 
3112 static inline void
3113 __instr_alu_or_hh_exec(struct rte_swx_pipeline *p __rte_unused,
3114 		       struct thread *t,
3115 		       const struct instruction *ip)
3116 {
3117 	TRACE("[Thread %2u] or (hh)\n", p->thread_id);
3118 
3119 	ALU_HH_FAST(t, ip, |);
3120 }
3121 
3122 static inline void
3123 __instr_alu_or_i_exec(struct rte_swx_pipeline *p __rte_unused,
3124 		      struct thread *t,
3125 		      const struct instruction *ip)
3126 {
3127 	TRACE("[Thread %2u] or (i)\n", p->thread_id);
3128 
3129 	ALU_I(t, ip, |);
3130 }
3131 
3132 static inline void
3133 __instr_alu_xor_exec(struct rte_swx_pipeline *p __rte_unused,
3134 		     struct thread *t,
3135 		     const struct instruction *ip)
3136 {
3137 	TRACE("[Thread %2u] xor\n", p->thread_id);
3138 
3139 	ALU(t, ip, ^);
3140 }
3141 
3142 static inline void
3143 __instr_alu_xor_mh_exec(struct rte_swx_pipeline *p __rte_unused,
3144 			struct thread *t,
3145 			const struct instruction *ip)
3146 {
3147 	TRACE("[Thread %2u] xor (mh)\n", p->thread_id);
3148 
3149 	ALU_MH(t, ip, ^);
3150 }
3151 
3152 static inline void
3153 __instr_alu_xor_hm_exec(struct rte_swx_pipeline *p __rte_unused,
3154 			struct thread *t,
3155 			const struct instruction *ip)
3156 {
3157 	TRACE("[Thread %2u] xor (hm)\n", p->thread_id);
3158 
3159 	ALU_HM_FAST(t, ip, ^);
3160 }
3161 
3162 static inline void
3163 __instr_alu_xor_hh_exec(struct rte_swx_pipeline *p __rte_unused,
3164 			struct thread *t,
3165 			const struct instruction *ip)
3166 {
3167 	TRACE("[Thread %2u] xor (hh)\n", p->thread_id);
3168 
3169 	ALU_HH_FAST(t, ip, ^);
3170 }
3171 
3172 static inline void
3173 __instr_alu_xor_i_exec(struct rte_swx_pipeline *p __rte_unused,
3174 		       struct thread *t,
3175 		       const struct instruction *ip)
3176 {
3177 	TRACE("[Thread %2u] xor (i)\n", p->thread_id);
3178 
3179 	ALU_I(t, ip, ^);
3180 }
3181 
3182 static inline void
3183 __instr_alu_ckadd_field_exec(struct rte_swx_pipeline *p __rte_unused,
3184 			     struct thread *t,
3185 			     const struct instruction *ip)
3186 {
3187 	uint8_t *dst_struct, *src_struct;
3188 	uint16_t *dst16_ptr, dst;
3189 	uint64_t *src64_ptr, src64, src64_mask, src;
3190 	uint64_t r;
3191 
3192 	TRACE("[Thread %2u] ckadd (field)\n", p->thread_id);
3193 
3194 	/* Structs. */
3195 	dst_struct = t->structs[ip->alu.dst.struct_id];
3196 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
3197 	dst = *dst16_ptr;
3198 
3199 	src_struct = t->structs[ip->alu.src.struct_id];
3200 	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
3201 	src64 = *src64_ptr;
3202 	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
3203 	src = src64 & src64_mask;
3204 
3205 	/* Initialize the result with destination 1's complement. */
3206 	r = dst;
3207 	r = ~r & 0xFFFF;
3208 
3209 	/* The first input (r) is a 16-bit number. The second and the third
3210 	 * inputs are 32-bit numbers. In the worst case scenario, the sum of the
3211 	 * three numbers (output r) is a 34-bit number.
3212 	 */
3213 	r += (src >> 32) + (src & 0xFFFFFFFF);
3214 
3215 	/* The first input is a 16-bit number. The second input is an 18-bit
3216 	 * number. In the worst case scenario, the sum of the two numbers is a
3217 	 * 19-bit number.
3218 	 */
3219 	r = (r & 0xFFFF) + (r >> 16);
3220 
3221 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
3222 	 * a 3-bit number (0 .. 7). Their sum is a 17-bit number (0 .. 0x10006).
3223 	 */
3224 	r = (r & 0xFFFF) + (r >> 16);
3225 
3226 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
3227 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
3228 	 * 0x10006), the output r is (0 .. 7). So no carry bit can be generated,
3229 	 * therefore the output r is always a 16-bit number.
3230 	 */
3231 	r = (r & 0xFFFF) + (r >> 16);
3232 
3233 	/* Apply 1's complement to the result. */
3234 	r = ~r & 0xFFFF;
3235 	r = r ? r : 0xFFFF;
3236 
3237 	*dst16_ptr = (uint16_t)r;
3238 }
3239 
3240 static inline void
3241 __instr_alu_cksub_field_exec(struct rte_swx_pipeline *p __rte_unused,
3242 			     struct thread *t,
3243 			     const struct instruction *ip)
3244 {
3245 	uint8_t *dst_struct, *src_struct;
3246 	uint16_t *dst16_ptr, dst;
3247 	uint64_t *src64_ptr, src64, src64_mask, src;
3248 	uint64_t r;
3249 
3250 	TRACE("[Thread %2u] cksub (field)\n", p->thread_id);
3251 
3252 	/* Structs. */
3253 	dst_struct = t->structs[ip->alu.dst.struct_id];
3254 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
3255 	dst = *dst16_ptr;
3256 
3257 	src_struct = t->structs[ip->alu.src.struct_id];
3258 	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
3259 	src64 = *src64_ptr;
3260 	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
3261 	src = src64 & src64_mask;
3262 
3263 	/* Initialize the result with destination 1's complement. */
3264 	r = dst;
3265 	r = ~r & 0xFFFF;
3266 
3267 	/* Subtraction in 1's complement arithmetic (i.e. a '- b) is the same as
3268 	 * the following sequence of operations in 2's complement arithmetic:
3269 	 *    a '- b = (a - b) % 0xFFFF.
3270 	 *
3271 	 * In order to prevent an underflow for the below subtraction, in which
3272 	 * a 33-bit number (the subtrahend) is taken out of a 16-bit number (the
3273 	 * minuend), we first add a multiple of the 0xFFFF modulus to the
3274 	 * minuend. The number we add to the minuend needs to be a 34-bit number
3275 	 * or higher, so for readability reasons we picked the 36-bit multiple.
3276 	 * We are effectively turning the 16-bit minuend into a 36-bit number:
3277 	 *    (a - b) % 0xFFFF = (a + 0xFFFF00000 - b) % 0xFFFF.
3278 	 */
3279 	r += 0xFFFF00000ULL; /* The output r is a 36-bit number. */
3280 
3281 	/* A 33-bit number is subtracted from a 36-bit number (the input r). The
3282 	 * result (the output r) is a 36-bit number.
3283 	 */
3284 	r -= (src >> 32) + (src & 0xFFFFFFFF);
3285 
3286 	/* The first input is a 16-bit number. The second input is a 20-bit
3287 	 * number. Their sum is a 21-bit number.
3288 	 */
3289 	r = (r & 0xFFFF) + (r >> 16);
3290 
3291 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
3292 	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1001E).
3293 	 */
3294 	r = (r & 0xFFFF) + (r >> 16);
3295 
3296 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
3297 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
3298 	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
3299 	 * generated, therefore the output r is always a 16-bit number.
3300 	 */
3301 	r = (r & 0xFFFF) + (r >> 16);
3302 
3303 	/* Apply 1's complement to the result. */
3304 	r = ~r & 0xFFFF;
3305 	r = r ? r : 0xFFFF;
3306 
3307 	*dst16_ptr = (uint16_t)r;
3308 }
3309 
3310 static inline void
3311 __instr_alu_ckadd_struct20_exec(struct rte_swx_pipeline *p __rte_unused,
3312 				struct thread *t,
3313 				const struct instruction *ip)
3314 {
3315 	uint8_t *dst_struct, *src_struct;
3316 	uint16_t *dst16_ptr, dst;
3317 	uint32_t *src32_ptr;
3318 	uint64_t r0, r1;
3319 
3320 	TRACE("[Thread %2u] ckadd (struct of 20 bytes)\n", p->thread_id);
3321 
3322 	/* Structs. */
3323 	dst_struct = t->structs[ip->alu.dst.struct_id];
3324 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
3325 	dst = *dst16_ptr;
3326 
3327 	src_struct = t->structs[ip->alu.src.struct_id];
3328 	src32_ptr = (uint32_t *)&src_struct[0];
3329 
3330 	/* Initialize the result with destination 1's complement. */
3331 	r0 = dst;
3332 	r0 = ~r0 & 0xFFFF;
3333 
3334 	r0 += src32_ptr[0]; /* The output r0 is a 33-bit number. */
3335 	r1 = src32_ptr[1]; /* r1 is a 32-bit number. */
3336 	r0 += src32_ptr[2]; /* The output r0 is a 34-bit number. */
3337 	r1 += src32_ptr[3]; /* The output r1 is a 33-bit number. */
3338 	r0 += r1 + src32_ptr[4]; /* The output r0 is a 35-bit number. */
3339 
3340 	/* The first input is a 16-bit number. The second input is a 19-bit
3341 	 * number. Their sum is a 20-bit number.
3342 	 */
3343 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
3344 
3345 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
3346 	 * a 4-bit number (0 .. 15). The sum is a 17-bit number (0 .. 0x1000E).
3347 	 */
3348 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
3349 
3350 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
3351 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
3352 	 * 0x1000E), the output r is (0 .. 15). So no carry bit can be
3353 	 * generated, therefore the output r is always a 16-bit number.
3354 	 */
3355 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
3356 
3357 	/* Apply 1's complement to the result. */
3358 	r0 = ~r0 & 0xFFFF;
3359 	r0 = r0 ? r0 : 0xFFFF;
3360 
3361 	*dst16_ptr = (uint16_t)r0;
3362 }
3363 
3364 static inline void
3365 __instr_alu_ckadd_struct_exec(struct rte_swx_pipeline *p __rte_unused,
3366 			      struct thread *t,
3367 			      const struct instruction *ip)
3368 {
3369 	uint32_t src_header_id = ip->alu.src.n_bits; /* The src header ID is stored here. */
3370 	uint32_t n_src_header_bytes = t->headers[src_header_id].n_bytes;
3371 	uint8_t *dst_struct, *src_struct;
3372 	uint16_t *dst16_ptr, dst;
3373 	uint32_t *src32_ptr;
3374 	uint64_t r;
3375 	uint32_t i;
3376 
3377 	if (n_src_header_bytes == 20) {
3378 		__instr_alu_ckadd_struct20_exec(p, t, ip);
3379 		return;
3380 	}
3381 
3382 	TRACE("[Thread %2u] ckadd (struct)\n", p->thread_id);
3383 
3384 	/* Structs. */
3385 	dst_struct = t->structs[ip->alu.dst.struct_id];
3386 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
3387 	dst = *dst16_ptr;
3388 
3389 	src_struct = t->structs[ip->alu.src.struct_id];
3390 	src32_ptr = (uint32_t *)&src_struct[0];
3391 
3392 	/* Initialize the result with destination 1's complement. */
3393 	r = dst;
3394 	r = ~r & 0xFFFF;
3395 
3396 	/* The max number of 32-bit words in a 32K-byte header is 2^13.
3397 	 * Therefore, in the worst case scenario, a 45-bit number is added to a
3398 	 * 16-bit number (the input r), so the output r is 46-bit number.
3399 	 */
3400 	for (i = 0; i < n_src_header_bytes / 4; i++, src32_ptr++)
3401 		r += *src32_ptr;
3402 
3403 	/* The first input is a 16-bit number. The second input is a 30-bit
3404 	 * number. Their sum is a 31-bit number.
3405 	 */
3406 	r = (r & 0xFFFF) + (r >> 16);
3407 
3408 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
3409 	 * a 15-bit number (0 .. 0x7FFF). The sum is a 17-bit number (0 .. 0x17FFE).
3410 	 */
3411 	r = (r & 0xFFFF) + (r >> 16);
3412 
3413 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
3414 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
3415 	 * 0x17FFE), the output r is (0 .. 0x7FFF). So no carry bit can be
3416 	 * generated, therefore the output r is always a 16-bit number.
3417 	 */
3418 	r = (r & 0xFFFF) + (r >> 16);
3419 
3420 	/* Apply 1's complement to the result. */
3421 	r = ~r & 0xFFFF;
3422 	r = r ? r : 0xFFFF;
3423 
3424 	*dst16_ptr = (uint16_t)r;
3425 }
3426 
3427 /*
3428  * Register array.
3429  */
3430 static inline uint64_t *
3431 instr_regarray_regarray(struct rte_swx_pipeline *p, const struct instruction *ip)
3432 {
3433 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
3434 	return r->regarray;
3435 }
3436 
3437 static inline uint64_t
3438 instr_regarray_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3439 {
3440 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
3441 
3442 	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
3443 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
3444 	uint64_t idx64 = *idx64_ptr;
3445 	uint64_t idx64_mask = UINT64_MAX >> (64 - ip->regarray.idx.n_bits);
3446 	uint64_t idx = idx64 & idx64_mask & r->size_mask;
3447 
3448 	return idx;
3449 }
3450 
3451 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3452 
3453 static inline uint64_t
3454 instr_regarray_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3455 {
3456 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
3457 
3458 	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
3459 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
3460 	uint64_t idx64 = *idx64_ptr;
3461 	uint64_t idx = (ntoh64(idx64) >> (64 - ip->regarray.idx.n_bits)) & r->size_mask;
3462 
3463 	return idx;
3464 }
3465 
3466 #else
3467 
3468 #define instr_regarray_idx_nbo instr_regarray_idx_hbo
3469 
3470 #endif
3471 
3472 static inline uint64_t
3473 instr_regarray_idx_imm(struct rte_swx_pipeline *p, const struct instruction *ip)
3474 {
3475 	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
3476 
3477 	uint64_t idx = ip->regarray.idx_val & r->size_mask;
3478 
3479 	return idx;
3480 }
3481 
3482 static inline uint64_t
3483 instr_regarray_src_hbo(struct thread *t, const struct instruction *ip)
3484 {
3485 	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
3486 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
3487 	uint64_t src64 = *src64_ptr;
3488 	uint64_t src64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
3489 	uint64_t src = src64 & src64_mask;
3490 
3491 	return src;
3492 }
3493 
3494 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3495 
3496 static inline uint64_t
3497 instr_regarray_src_nbo(struct thread *t, const struct instruction *ip)
3498 {
3499 	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
3500 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
3501 	uint64_t src64 = *src64_ptr;
3502 	uint64_t src = ntoh64(src64) >> (64 - ip->regarray.dstsrc.n_bits);
3503 
3504 	return src;
3505 }
3506 
3507 #else
3508 
3509 #define instr_regarray_src_nbo instr_regarray_src_hbo
3510 
3511 #endif
3512 
3513 static inline void
3514 instr_regarray_dst_hbo_src_hbo_set(struct thread *t, const struct instruction *ip, uint64_t src)
3515 {
3516 	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
3517 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
3518 	uint64_t dst64 = *dst64_ptr;
3519 	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
3520 
3521 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
3522 
3523 }
3524 
3525 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3526 
3527 static inline void
3528 instr_regarray_dst_nbo_src_hbo_set(struct thread *t, const struct instruction *ip, uint64_t src)
3529 {
3530 	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
3531 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
3532 	uint64_t dst64 = *dst64_ptr;
3533 	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
3534 
3535 	src = hton64(src) >> (64 - ip->regarray.dstsrc.n_bits);
3536 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
3537 }
3538 
3539 #else
3540 
3541 #define instr_regarray_dst_nbo_src_hbo_set instr_regarray_dst_hbo_src_hbo_set
3542 
3543 #endif
3544 
3545 static inline void
3546 __instr_regprefetch_rh_exec(struct rte_swx_pipeline *p,
3547 			    struct thread *t,
3548 			    const struct instruction *ip)
3549 {
3550 	uint64_t *regarray, idx;
3551 
3552 	TRACE("[Thread %2u] regprefetch (r[h])\n", p->thread_id);
3553 
3554 	regarray = instr_regarray_regarray(p, ip);
3555 	idx = instr_regarray_idx_nbo(p, t, ip);
3556 	rte_prefetch0(&regarray[idx]);
3557 }
3558 
3559 static inline void
3560 __instr_regprefetch_rm_exec(struct rte_swx_pipeline *p,
3561 			    struct thread *t,
3562 			    const struct instruction *ip)
3563 {
3564 	uint64_t *regarray, idx;
3565 
3566 	TRACE("[Thread %2u] regprefetch (r[m])\n", p->thread_id);
3567 
3568 	regarray = instr_regarray_regarray(p, ip);
3569 	idx = instr_regarray_idx_hbo(p, t, ip);
3570 	rte_prefetch0(&regarray[idx]);
3571 }
3572 
3573 static inline void
3574 __instr_regprefetch_ri_exec(struct rte_swx_pipeline *p,
3575 			    struct thread *t __rte_unused,
3576 			    const struct instruction *ip)
3577 {
3578 	uint64_t *regarray, idx;
3579 
3580 	TRACE("[Thread %2u] regprefetch (r[i])\n", p->thread_id);
3581 
3582 	regarray = instr_regarray_regarray(p, ip);
3583 	idx = instr_regarray_idx_imm(p, ip);
3584 	rte_prefetch0(&regarray[idx]);
3585 }
3586 
3587 static inline void
3588 __instr_regrd_hrh_exec(struct rte_swx_pipeline *p,
3589 		       struct thread *t,
3590 		       const struct instruction *ip)
3591 {
3592 	uint64_t *regarray, idx;
3593 
3594 	TRACE("[Thread %2u] regrd (h = r[h])\n", p->thread_id);
3595 
3596 	regarray = instr_regarray_regarray(p, ip);
3597 	idx = instr_regarray_idx_nbo(p, t, ip);
3598 	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
3599 }
3600 
3601 static inline void
3602 __instr_regrd_hrm_exec(struct rte_swx_pipeline *p,
3603 		       struct thread *t,
3604 		       const struct instruction *ip)
3605 {
3606 	uint64_t *regarray, idx;
3607 
3608 	TRACE("[Thread %2u] regrd (h = r[m])\n", p->thread_id);
3609 
3610 	/* Structs. */
3611 	regarray = instr_regarray_regarray(p, ip);
3612 	idx = instr_regarray_idx_hbo(p, t, ip);
3613 	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
3614 }
3615 
3616 static inline void
3617 __instr_regrd_mrh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3618 {
3619 	uint64_t *regarray, idx;
3620 
3621 	TRACE("[Thread %2u] regrd (m = r[h])\n", p->thread_id);
3622 
3623 	regarray = instr_regarray_regarray(p, ip);
3624 	idx = instr_regarray_idx_nbo(p, t, ip);
3625 	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
3626 }
3627 
3628 static inline void
3629 __instr_regrd_mrm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3630 {
3631 	uint64_t *regarray, idx;
3632 
3633 	TRACE("[Thread %2u] regrd (m = r[m])\n", p->thread_id);
3634 
3635 	regarray = instr_regarray_regarray(p, ip);
3636 	idx = instr_regarray_idx_hbo(p, t, ip);
3637 	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
3638 }
3639 
3640 static inline void
3641 __instr_regrd_hri_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3642 {
3643 	uint64_t *regarray, idx;
3644 
3645 	TRACE("[Thread %2u] regrd (h = r[i])\n", p->thread_id);
3646 
3647 	regarray = instr_regarray_regarray(p, ip);
3648 	idx = instr_regarray_idx_imm(p, ip);
3649 	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
3650 }
3651 
3652 static inline void
3653 __instr_regrd_mri_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3654 {
3655 	uint64_t *regarray, idx;
3656 
3657 	TRACE("[Thread %2u] regrd (m = r[i])\n", p->thread_id);
3658 
3659 	regarray = instr_regarray_regarray(p, ip);
3660 	idx = instr_regarray_idx_imm(p, ip);
3661 	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
3662 }
3663 
3664 static inline void
3665 __instr_regwr_rhh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3666 {
3667 	uint64_t *regarray, idx, src;
3668 
3669 	TRACE("[Thread %2u] regwr (r[h] = h)\n", p->thread_id);
3670 
3671 	regarray = instr_regarray_regarray(p, ip);
3672 	idx = instr_regarray_idx_nbo(p, t, ip);
3673 	src = instr_regarray_src_nbo(t, ip);
3674 	regarray[idx] = src;
3675 }
3676 
3677 static inline void
3678 __instr_regwr_rhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3679 {
3680 	uint64_t *regarray, idx, src;
3681 
3682 	TRACE("[Thread %2u] regwr (r[h] = m)\n", p->thread_id);
3683 
3684 	regarray = instr_regarray_regarray(p, ip);
3685 	idx = instr_regarray_idx_nbo(p, t, ip);
3686 	src = instr_regarray_src_hbo(t, ip);
3687 	regarray[idx] = src;
3688 }
3689 
3690 static inline void
3691 __instr_regwr_rmh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3692 {
3693 	uint64_t *regarray, idx, src;
3694 
3695 	TRACE("[Thread %2u] regwr (r[m] = h)\n", p->thread_id);
3696 
3697 	regarray = instr_regarray_regarray(p, ip);
3698 	idx = instr_regarray_idx_hbo(p, t, ip);
3699 	src = instr_regarray_src_nbo(t, ip);
3700 	regarray[idx] = src;
3701 }
3702 
3703 static inline void
3704 __instr_regwr_rmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3705 {
3706 	uint64_t *regarray, idx, src;
3707 
3708 	TRACE("[Thread %2u] regwr (r[m] = m)\n", p->thread_id);
3709 
3710 	regarray = instr_regarray_regarray(p, ip);
3711 	idx = instr_regarray_idx_hbo(p, t, ip);
3712 	src = instr_regarray_src_hbo(t, ip);
3713 	regarray[idx] = src;
3714 }
3715 
3716 static inline void
3717 __instr_regwr_rhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3718 {
3719 	uint64_t *regarray, idx, src;
3720 
3721 	TRACE("[Thread %2u] regwr (r[h] = i)\n", p->thread_id);
3722 
3723 	regarray = instr_regarray_regarray(p, ip);
3724 	idx = instr_regarray_idx_nbo(p, t, ip);
3725 	src = ip->regarray.dstsrc_val;
3726 	regarray[idx] = src;
3727 }
3728 
3729 static inline void
3730 __instr_regwr_rmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3731 {
3732 	uint64_t *regarray, idx, src;
3733 
3734 	TRACE("[Thread %2u] regwr (r[m] = i)\n", p->thread_id);
3735 
3736 	regarray = instr_regarray_regarray(p, ip);
3737 	idx = instr_regarray_idx_hbo(p, t, ip);
3738 	src = ip->regarray.dstsrc_val;
3739 	regarray[idx] = src;
3740 }
3741 
3742 static inline void
3743 __instr_regwr_rih_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3744 {
3745 	uint64_t *regarray, idx, src;
3746 
3747 	TRACE("[Thread %2u] regwr (r[i] = h)\n", p->thread_id);
3748 
3749 	regarray = instr_regarray_regarray(p, ip);
3750 	idx = instr_regarray_idx_imm(p, ip);
3751 	src = instr_regarray_src_nbo(t, ip);
3752 	regarray[idx] = src;
3753 }
3754 
3755 static inline void
3756 __instr_regwr_rim_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3757 {
3758 	uint64_t *regarray, idx, src;
3759 
3760 	TRACE("[Thread %2u] regwr (r[i] = m)\n", p->thread_id);
3761 
3762 	regarray = instr_regarray_regarray(p, ip);
3763 	idx = instr_regarray_idx_imm(p, ip);
3764 	src = instr_regarray_src_hbo(t, ip);
3765 	regarray[idx] = src;
3766 }
3767 
3768 static inline void
3769 __instr_regwr_rii_exec(struct rte_swx_pipeline *p,
3770 		       struct thread *t __rte_unused,
3771 		       const struct instruction *ip)
3772 {
3773 	uint64_t *regarray, idx, src;
3774 
3775 	TRACE("[Thread %2u] regwr (r[i] = i)\n", p->thread_id);
3776 
3777 	regarray = instr_regarray_regarray(p, ip);
3778 	idx = instr_regarray_idx_imm(p, ip);
3779 	src = ip->regarray.dstsrc_val;
3780 	regarray[idx] = src;
3781 }
3782 
3783 static inline void
3784 __instr_regadd_rhh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3785 {
3786 	uint64_t *regarray, idx, src;
3787 
3788 	TRACE("[Thread %2u] regadd (r[h] += h)\n", p->thread_id);
3789 
3790 	regarray = instr_regarray_regarray(p, ip);
3791 	idx = instr_regarray_idx_nbo(p, t, ip);
3792 	src = instr_regarray_src_nbo(t, ip);
3793 	regarray[idx] += src;
3794 }
3795 
3796 static inline void
3797 __instr_regadd_rhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3798 {
3799 	uint64_t *regarray, idx, src;
3800 
3801 	TRACE("[Thread %2u] regadd (r[h] += m)\n", p->thread_id);
3802 
3803 	regarray = instr_regarray_regarray(p, ip);
3804 	idx = instr_regarray_idx_nbo(p, t, ip);
3805 	src = instr_regarray_src_hbo(t, ip);
3806 	regarray[idx] += src;
3807 }
3808 
3809 static inline void
3810 __instr_regadd_rmh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3811 {
3812 	uint64_t *regarray, idx, src;
3813 
3814 	TRACE("[Thread %2u] regadd (r[m] += h)\n", p->thread_id);
3815 
3816 	regarray = instr_regarray_regarray(p, ip);
3817 	idx = instr_regarray_idx_hbo(p, t, ip);
3818 	src = instr_regarray_src_nbo(t, ip);
3819 	regarray[idx] += src;
3820 }
3821 
3822 static inline void
3823 __instr_regadd_rmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3824 {
3825 	uint64_t *regarray, idx, src;
3826 
3827 	TRACE("[Thread %2u] regadd (r[m] += m)\n", p->thread_id);
3828 
3829 	regarray = instr_regarray_regarray(p, ip);
3830 	idx = instr_regarray_idx_hbo(p, t, ip);
3831 	src = instr_regarray_src_hbo(t, ip);
3832 	regarray[idx] += src;
3833 }
3834 
3835 static inline void
3836 __instr_regadd_rhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3837 {
3838 	uint64_t *regarray, idx, src;
3839 
3840 	TRACE("[Thread %2u] regadd (r[h] += i)\n", p->thread_id);
3841 
3842 	regarray = instr_regarray_regarray(p, ip);
3843 	idx = instr_regarray_idx_nbo(p, t, ip);
3844 	src = ip->regarray.dstsrc_val;
3845 	regarray[idx] += src;
3846 }
3847 
3848 static inline void
3849 __instr_regadd_rmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3850 {
3851 	uint64_t *regarray, idx, src;
3852 
3853 	TRACE("[Thread %2u] regadd (r[m] += i)\n", p->thread_id);
3854 
3855 	regarray = instr_regarray_regarray(p, ip);
3856 	idx = instr_regarray_idx_hbo(p, t, ip);
3857 	src = ip->regarray.dstsrc_val;
3858 	regarray[idx] += src;
3859 }
3860 
3861 static inline void
3862 __instr_regadd_rih_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3863 {
3864 	uint64_t *regarray, idx, src;
3865 
3866 	TRACE("[Thread %2u] regadd (r[i] += h)\n", p->thread_id);
3867 
3868 	regarray = instr_regarray_regarray(p, ip);
3869 	idx = instr_regarray_idx_imm(p, ip);
3870 	src = instr_regarray_src_nbo(t, ip);
3871 	regarray[idx] += src;
3872 }
3873 
3874 static inline void
3875 __instr_regadd_rim_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3876 {
3877 	uint64_t *regarray, idx, src;
3878 
3879 	TRACE("[Thread %2u] regadd (r[i] += m)\n", p->thread_id);
3880 
3881 	regarray = instr_regarray_regarray(p, ip);
3882 	idx = instr_regarray_idx_imm(p, ip);
3883 	src = instr_regarray_src_hbo(t, ip);
3884 	regarray[idx] += src;
3885 }
3886 
3887 static inline void
3888 __instr_regadd_rii_exec(struct rte_swx_pipeline *p,
3889 			struct thread *t __rte_unused,
3890 			const struct instruction *ip)
3891 {
3892 	uint64_t *regarray, idx, src;
3893 
3894 	TRACE("[Thread %2u] regadd (r[i] += i)\n", p->thread_id);
3895 
3896 	regarray = instr_regarray_regarray(p, ip);
3897 	idx = instr_regarray_idx_imm(p, ip);
3898 	src = ip->regarray.dstsrc_val;
3899 	regarray[idx] += src;
3900 }
3901 
3902 /*
3903  * metarray.
3904  */
3905 static inline struct meter *
3906 instr_meter_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3907 {
3908 	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
3909 
3910 	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
3911 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
3912 	uint64_t idx64 = *idx64_ptr;
3913 	uint64_t idx64_mask = UINT64_MAX >> (64 - (ip)->meter.idx.n_bits);
3914 	uint64_t idx = idx64 & idx64_mask & r->size_mask;
3915 
3916 	return &r->metarray[idx];
3917 }
3918 
3919 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3920 
3921 static inline struct meter *
3922 instr_meter_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
3923 {
3924 	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
3925 
3926 	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
3927 	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
3928 	uint64_t idx64 = *idx64_ptr;
3929 	uint64_t idx = (ntoh64(idx64) >> (64 - ip->meter.idx.n_bits)) & r->size_mask;
3930 
3931 	return &r->metarray[idx];
3932 }
3933 
3934 #else
3935 
3936 #define instr_meter_idx_nbo instr_meter_idx_hbo
3937 
3938 #endif
3939 
3940 static inline struct meter *
3941 instr_meter_idx_imm(struct rte_swx_pipeline *p, const struct instruction *ip)
3942 {
3943 	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
3944 
3945 	uint64_t idx =  ip->meter.idx_val & r->size_mask;
3946 
3947 	return &r->metarray[idx];
3948 }
3949 
3950 static inline uint32_t
3951 instr_meter_length_hbo(struct thread *t, const struct instruction *ip)
3952 {
3953 	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
3954 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
3955 	uint64_t src64 = *src64_ptr;
3956 	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->meter.length.n_bits);
3957 	uint64_t src = src64 & src64_mask;
3958 
3959 	return (uint32_t)src;
3960 }
3961 
3962 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
3963 
3964 static inline uint32_t
3965 instr_meter_length_nbo(struct thread *t, const struct instruction *ip)
3966 {
3967 	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
3968 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
3969 	uint64_t src64 = *src64_ptr;
3970 	uint64_t src = ntoh64(src64) >> (64 - ip->meter.length.n_bits);
3971 
3972 	return (uint32_t)src;
3973 }
3974 
3975 #else
3976 
3977 #define instr_meter_length_nbo instr_meter_length_hbo
3978 
3979 #endif
3980 
3981 static inline enum rte_color
3982 instr_meter_color_in_hbo(struct thread *t, const struct instruction *ip)
3983 {
3984 	uint8_t *src_struct = t->structs[ip->meter.color_in.struct_id];
3985 	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.color_in.offset];
3986 	uint64_t src64 = *src64_ptr;
3987 	uint64_t src64_mask = UINT64_MAX >> (64 - ip->meter.color_in.n_bits);
3988 	uint64_t src = src64 & src64_mask;
3989 
3990 	return (enum rte_color)src;
3991 }
3992 
3993 static inline void
3994 instr_meter_color_out_hbo_set(struct thread *t,
3995 			      const struct instruction *ip,
3996 			      enum rte_color color_out)
3997 {
3998 	uint8_t *dst_struct = t->structs[ip->meter.color_out.struct_id];
3999 	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->meter.color_out.offset];
4000 	uint64_t dst64 = *dst64_ptr;
4001 	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->meter.color_out.n_bits);
4002 
4003 	uint64_t src = (uint64_t)color_out;
4004 
4005 	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
4006 }
4007 
4008 static inline void
4009 __instr_metprefetch_h_exec(struct rte_swx_pipeline *p,
4010 			   struct thread *t,
4011 			   const struct instruction *ip)
4012 {
4013 	struct meter *m;
4014 
4015 	TRACE("[Thread %2u] metprefetch (h)\n", p->thread_id);
4016 
4017 	m = instr_meter_idx_nbo(p, t, ip);
4018 	rte_prefetch0(m);
4019 }
4020 
4021 static inline void
4022 __instr_metprefetch_m_exec(struct rte_swx_pipeline *p,
4023 			   struct thread *t,
4024 			   const struct instruction *ip)
4025 {
4026 	struct meter *m;
4027 
4028 	TRACE("[Thread %2u] metprefetch (m)\n", p->thread_id);
4029 
4030 	m = instr_meter_idx_hbo(p, t, ip);
4031 	rte_prefetch0(m);
4032 }
4033 
4034 static inline void
4035 __instr_metprefetch_i_exec(struct rte_swx_pipeline *p,
4036 			   struct thread *t __rte_unused,
4037 			   const struct instruction *ip)
4038 {
4039 	struct meter *m;
4040 
4041 	TRACE("[Thread %2u] metprefetch (i)\n", p->thread_id);
4042 
4043 	m = instr_meter_idx_imm(p, ip);
4044 	rte_prefetch0(m);
4045 }
4046 
4047 static inline void
4048 __instr_meter_hhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4049 {
4050 	struct meter *m;
4051 	uint64_t time, n_pkts, n_bytes;
4052 	uint32_t length;
4053 	enum rte_color color_in, color_out;
4054 
4055 	TRACE("[Thread %2u] meter (hhm)\n", p->thread_id);
4056 
4057 	m = instr_meter_idx_nbo(p, t, ip);
4058 	rte_prefetch0(m->n_pkts);
4059 	time = rte_get_tsc_cycles();
4060 	length = instr_meter_length_nbo(t, ip);
4061 	color_in = instr_meter_color_in_hbo(t, ip);
4062 
4063 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4064 		&m->profile->profile,
4065 		time,
4066 		length,
4067 		color_in);
4068 
4069 	color_out &= m->color_mask;
4070 
4071 	n_pkts = m->n_pkts[color_out];
4072 	n_bytes = m->n_bytes[color_out];
4073 
4074 	instr_meter_color_out_hbo_set(t, ip, color_out);
4075 
4076 	m->n_pkts[color_out] = n_pkts + 1;
4077 	m->n_bytes[color_out] = n_bytes + length;
4078 }
4079 
4080 static inline void
4081 __instr_meter_hhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4082 {
4083 	struct meter *m;
4084 	uint64_t time, n_pkts, n_bytes;
4085 	uint32_t length;
4086 	enum rte_color color_in, color_out;
4087 
4088 	TRACE("[Thread %2u] meter (hhi)\n", p->thread_id);
4089 
4090 	m = instr_meter_idx_nbo(p, t, ip);
4091 	rte_prefetch0(m->n_pkts);
4092 	time = rte_get_tsc_cycles();
4093 	length = instr_meter_length_nbo(t, ip);
4094 	color_in = (enum rte_color)ip->meter.color_in_val;
4095 
4096 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4097 		&m->profile->profile,
4098 		time,
4099 		length,
4100 		color_in);
4101 
4102 	color_out &= m->color_mask;
4103 
4104 	n_pkts = m->n_pkts[color_out];
4105 	n_bytes = m->n_bytes[color_out];
4106 
4107 	instr_meter_color_out_hbo_set(t, ip, color_out);
4108 
4109 	m->n_pkts[color_out] = n_pkts + 1;
4110 	m->n_bytes[color_out] = n_bytes + length;
4111 }
4112 
4113 static inline void
4114 __instr_meter_hmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4115 {
4116 	struct meter *m;
4117 	uint64_t time, n_pkts, n_bytes;
4118 	uint32_t length;
4119 	enum rte_color color_in, color_out;
4120 
4121 	TRACE("[Thread %2u] meter (hmm)\n", p->thread_id);
4122 
4123 	m = instr_meter_idx_nbo(p, t, ip);
4124 	rte_prefetch0(m->n_pkts);
4125 	time = rte_get_tsc_cycles();
4126 	length = instr_meter_length_hbo(t, ip);
4127 	color_in = instr_meter_color_in_hbo(t, ip);
4128 
4129 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4130 		&m->profile->profile,
4131 		time,
4132 		length,
4133 		color_in);
4134 
4135 	color_out &= m->color_mask;
4136 
4137 	n_pkts = m->n_pkts[color_out];
4138 	n_bytes = m->n_bytes[color_out];
4139 
4140 	instr_meter_color_out_hbo_set(t, ip, color_out);
4141 
4142 	m->n_pkts[color_out] = n_pkts + 1;
4143 	m->n_bytes[color_out] = n_bytes + length;
4144 }
4145 
4146 static inline void
4147 __instr_meter_hmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4148 {
4149 	struct meter *m;
4150 	uint64_t time, n_pkts, n_bytes;
4151 	uint32_t length;
4152 	enum rte_color color_in, color_out;
4153 
4154 	TRACE("[Thread %2u] meter (hmi)\n", p->thread_id);
4155 
4156 	m = instr_meter_idx_nbo(p, t, ip);
4157 	rte_prefetch0(m->n_pkts);
4158 	time = rte_get_tsc_cycles();
4159 	length = instr_meter_length_hbo(t, ip);
4160 	color_in = (enum rte_color)ip->meter.color_in_val;
4161 
4162 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4163 		&m->profile->profile,
4164 		time,
4165 		length,
4166 		color_in);
4167 
4168 	color_out &= m->color_mask;
4169 
4170 	n_pkts = m->n_pkts[color_out];
4171 	n_bytes = m->n_bytes[color_out];
4172 
4173 	instr_meter_color_out_hbo_set(t, ip, color_out);
4174 
4175 	m->n_pkts[color_out] = n_pkts + 1;
4176 	m->n_bytes[color_out] = n_bytes + length;
4177 }
4178 
4179 static inline void
4180 __instr_meter_mhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4181 {
4182 	struct meter *m;
4183 	uint64_t time, n_pkts, n_bytes;
4184 	uint32_t length;
4185 	enum rte_color color_in, color_out;
4186 
4187 	TRACE("[Thread %2u] meter (mhm)\n", p->thread_id);
4188 
4189 	m = instr_meter_idx_hbo(p, t, ip);
4190 	rte_prefetch0(m->n_pkts);
4191 	time = rte_get_tsc_cycles();
4192 	length = instr_meter_length_nbo(t, ip);
4193 	color_in = instr_meter_color_in_hbo(t, ip);
4194 
4195 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4196 		&m->profile->profile,
4197 		time,
4198 		length,
4199 		color_in);
4200 
4201 	color_out &= m->color_mask;
4202 
4203 	n_pkts = m->n_pkts[color_out];
4204 	n_bytes = m->n_bytes[color_out];
4205 
4206 	instr_meter_color_out_hbo_set(t, ip, color_out);
4207 
4208 	m->n_pkts[color_out] = n_pkts + 1;
4209 	m->n_bytes[color_out] = n_bytes + length;
4210 }
4211 
4212 static inline void
4213 __instr_meter_mhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4214 {
4215 	struct meter *m;
4216 	uint64_t time, n_pkts, n_bytes;
4217 	uint32_t length;
4218 	enum rte_color color_in, color_out;
4219 
4220 	TRACE("[Thread %2u] meter (mhi)\n", p->thread_id);
4221 
4222 	m = instr_meter_idx_hbo(p, t, ip);
4223 	rte_prefetch0(m->n_pkts);
4224 	time = rte_get_tsc_cycles();
4225 	length = instr_meter_length_nbo(t, ip);
4226 	color_in = (enum rte_color)ip->meter.color_in_val;
4227 
4228 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4229 		&m->profile->profile,
4230 		time,
4231 		length,
4232 		color_in);
4233 
4234 	color_out &= m->color_mask;
4235 
4236 	n_pkts = m->n_pkts[color_out];
4237 	n_bytes = m->n_bytes[color_out];
4238 
4239 	instr_meter_color_out_hbo_set(t, ip, color_out);
4240 
4241 	m->n_pkts[color_out] = n_pkts + 1;
4242 	m->n_bytes[color_out] = n_bytes + length;
4243 }
4244 
4245 static inline void
4246 __instr_meter_mmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4247 {
4248 	struct meter *m;
4249 	uint64_t time, n_pkts, n_bytes;
4250 	uint32_t length;
4251 	enum rte_color color_in, color_out;
4252 
4253 	TRACE("[Thread %2u] meter (mmm)\n", p->thread_id);
4254 
4255 	m = instr_meter_idx_hbo(p, t, ip);
4256 	rte_prefetch0(m->n_pkts);
4257 	time = rte_get_tsc_cycles();
4258 	length = instr_meter_length_hbo(t, ip);
4259 	color_in = instr_meter_color_in_hbo(t, ip);
4260 
4261 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4262 		&m->profile->profile,
4263 		time,
4264 		length,
4265 		color_in);
4266 
4267 	color_out &= m->color_mask;
4268 
4269 	n_pkts = m->n_pkts[color_out];
4270 	n_bytes = m->n_bytes[color_out];
4271 
4272 	instr_meter_color_out_hbo_set(t, ip, color_out);
4273 
4274 	m->n_pkts[color_out] = n_pkts + 1;
4275 	m->n_bytes[color_out] = n_bytes + length;
4276 }
4277 
4278 static inline void
4279 __instr_meter_mmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4280 {
4281 	struct meter *m;
4282 	uint64_t time, n_pkts, n_bytes;
4283 	uint32_t length;
4284 	enum rte_color color_in, color_out;
4285 
4286 	TRACE("[Thread %2u] meter (mmi)\n", p->thread_id);
4287 
4288 	m = instr_meter_idx_hbo(p, t, ip);
4289 	rte_prefetch0(m->n_pkts);
4290 	time = rte_get_tsc_cycles();
4291 	length = instr_meter_length_hbo(t, ip);
4292 	color_in = (enum rte_color)ip->meter.color_in_val;
4293 
4294 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4295 		&m->profile->profile,
4296 		time,
4297 		length,
4298 		color_in);
4299 
4300 	color_out &= m->color_mask;
4301 
4302 	n_pkts = m->n_pkts[color_out];
4303 	n_bytes = m->n_bytes[color_out];
4304 
4305 	instr_meter_color_out_hbo_set(t, ip, color_out);
4306 
4307 	m->n_pkts[color_out] = n_pkts + 1;
4308 	m->n_bytes[color_out] = n_bytes + length;
4309 }
4310 
4311 static inline void
4312 __instr_meter_ihm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4313 {
4314 	struct meter *m;
4315 	uint64_t time, n_pkts, n_bytes;
4316 	uint32_t length;
4317 	enum rte_color color_in, color_out;
4318 
4319 	TRACE("[Thread %2u] meter (ihm)\n", p->thread_id);
4320 
4321 	m = instr_meter_idx_imm(p, ip);
4322 	rte_prefetch0(m->n_pkts);
4323 	time = rte_get_tsc_cycles();
4324 	length = instr_meter_length_nbo(t, ip);
4325 	color_in = instr_meter_color_in_hbo(t, ip);
4326 
4327 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4328 		&m->profile->profile,
4329 		time,
4330 		length,
4331 		color_in);
4332 
4333 	color_out &= m->color_mask;
4334 
4335 	n_pkts = m->n_pkts[color_out];
4336 	n_bytes = m->n_bytes[color_out];
4337 
4338 	instr_meter_color_out_hbo_set(t, ip, color_out);
4339 
4340 	m->n_pkts[color_out] = n_pkts + 1;
4341 	m->n_bytes[color_out] = n_bytes + length;
4342 }
4343 
4344 static inline void
4345 __instr_meter_ihi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4346 {
4347 	struct meter *m;
4348 	uint64_t time, n_pkts, n_bytes;
4349 	uint32_t length;
4350 	enum rte_color color_in, color_out;
4351 
4352 	TRACE("[Thread %2u] meter (ihi)\n", p->thread_id);
4353 
4354 	m = instr_meter_idx_imm(p, ip);
4355 	rte_prefetch0(m->n_pkts);
4356 	time = rte_get_tsc_cycles();
4357 	length = instr_meter_length_nbo(t, ip);
4358 	color_in = (enum rte_color)ip->meter.color_in_val;
4359 
4360 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4361 		&m->profile->profile,
4362 		time,
4363 		length,
4364 		color_in);
4365 
4366 	color_out &= m->color_mask;
4367 
4368 	n_pkts = m->n_pkts[color_out];
4369 	n_bytes = m->n_bytes[color_out];
4370 
4371 	instr_meter_color_out_hbo_set(t, ip, color_out);
4372 
4373 	m->n_pkts[color_out] = n_pkts + 1;
4374 	m->n_bytes[color_out] = n_bytes + length;
4375 }
4376 
4377 static inline void
4378 __instr_meter_imm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4379 {
4380 	struct meter *m;
4381 	uint64_t time, n_pkts, n_bytes;
4382 	uint32_t length;
4383 	enum rte_color color_in, color_out;
4384 
4385 	TRACE("[Thread %2u] meter (imm)\n", p->thread_id);
4386 
4387 	m = instr_meter_idx_imm(p, ip);
4388 	rte_prefetch0(m->n_pkts);
4389 	time = rte_get_tsc_cycles();
4390 	length = instr_meter_length_hbo(t, ip);
4391 	color_in = instr_meter_color_in_hbo(t, ip);
4392 
4393 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4394 		&m->profile->profile,
4395 		time,
4396 		length,
4397 		color_in);
4398 
4399 	color_out &= m->color_mask;
4400 
4401 	n_pkts = m->n_pkts[color_out];
4402 	n_bytes = m->n_bytes[color_out];
4403 
4404 	instr_meter_color_out_hbo_set(t, ip, color_out);
4405 
4406 	m->n_pkts[color_out] = n_pkts + 1;
4407 	m->n_bytes[color_out] = n_bytes + length;
4408 }
4409 
4410 static inline void
4411 __instr_meter_imi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
4412 {
4413 	struct meter *m;
4414 	uint64_t time, n_pkts, n_bytes;
4415 	uint32_t length;
4416 	enum rte_color color_in, color_out;
4417 
4418 	TRACE("[Thread %2u] meter (imi)\n", p->thread_id);
4419 
4420 	m = instr_meter_idx_imm(p, ip);
4421 	rte_prefetch0(m->n_pkts);
4422 	time = rte_get_tsc_cycles();
4423 	length = instr_meter_length_hbo(t, ip);
4424 	color_in = (enum rte_color)ip->meter.color_in_val;
4425 
4426 	color_out = rte_meter_trtcm_color_aware_check(&m->m,
4427 		&m->profile->profile,
4428 		time,
4429 		length,
4430 		color_in);
4431 
4432 	color_out &= m->color_mask;
4433 
4434 	n_pkts = m->n_pkts[color_out];
4435 	n_bytes = m->n_bytes[color_out];
4436 
4437 	instr_meter_color_out_hbo_set(t, ip, color_out);
4438 
4439 	m->n_pkts[color_out] = n_pkts + 1;
4440 	m->n_bytes[color_out] = n_bytes + length;
4441 }
4442 
4443 #endif
4444