xref: /dpdk/drivers/event/dlb2/dlb2_sse.c (revision ffa46fc4a2b591d1c25862931a9f9f0a469016a4)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2022 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <stdbool.h>
7 
8 #include "dlb2_priv.h"
9 #include "dlb2_iface.h"
10 #include "dlb2_inline_fns.h"
11 
12 /*
13  * This source file is only used when the compiler on the build machine
14  * does not support AVX512VL.
15  */
16 
17 static uint8_t cmd_byte_map[DLB2_NUM_PORT_TYPES][DLB2_NUM_HW_SCHED_TYPES] = {
18 	{
19 		/* Load-balanced cmd bytes */
20 		[RTE_EVENT_OP_NEW] = DLB2_NEW_CMD_BYTE,
21 		[RTE_EVENT_OP_FORWARD] = DLB2_FWD_CMD_BYTE,
22 		[RTE_EVENT_OP_RELEASE] = DLB2_COMP_CMD_BYTE,
23 	},
24 	{
25 		/* Directed cmd bytes */
26 		[RTE_EVENT_OP_NEW] = DLB2_NEW_CMD_BYTE,
27 		[RTE_EVENT_OP_FORWARD] = DLB2_NEW_CMD_BYTE,
28 		[RTE_EVENT_OP_RELEASE] = DLB2_NOOP_CMD_BYTE,
29 	},
30 };
31 
32 void
dlb2_event_build_hcws(struct dlb2_port * qm_port,const struct rte_event ev[],int num,uint8_t * sched_type,uint8_t * queue_id)33 dlb2_event_build_hcws(struct dlb2_port *qm_port,
34 		      const struct rte_event ev[],
35 		      int num,
36 		      uint8_t *sched_type,
37 		      uint8_t *queue_id)
38 {
39 	struct dlb2_enqueue_qe *qe;
40 	uint16_t sched_word[4];
41 	__m128i sse_qe[2];
42 	int i;
43 
44 	qe = qm_port->qe4;
45 
46 	sse_qe[0] = _mm_setzero_si128();
47 	sse_qe[1] = _mm_setzero_si128();
48 
49 	switch (num) {
50 	case 4:
51 		/* Construct the metadata portion of two HCWs in one 128b SSE
52 		 * register. HCW metadata is constructed in the SSE registers
53 		 * like so:
54 		 * sse_qe[0][63:0]:   qe[0]'s metadata
55 		 * sse_qe[0][127:64]: qe[1]'s metadata
56 		 * sse_qe[1][63:0]:   qe[2]'s metadata
57 		 * sse_qe[1][127:64]: qe[3]'s metadata
58 		 */
59 
60 		/* Convert the event operation into a command byte and store it
61 		 * in the metadata:
62 		 * sse_qe[0][63:56]   = cmd_byte_map[is_directed][ev[0].op]
63 		 * sse_qe[0][127:120] = cmd_byte_map[is_directed][ev[1].op]
64 		 * sse_qe[1][63:56]   = cmd_byte_map[is_directed][ev[2].op]
65 		 * sse_qe[1][127:120] = cmd_byte_map[is_directed][ev[3].op]
66 		 */
67 #define DLB2_QE_CMD_BYTE 7
68 		sse_qe[0] = _mm_insert_epi8(sse_qe[0],
69 				cmd_byte_map[qm_port->is_directed][ev[0].op],
70 				DLB2_QE_CMD_BYTE);
71 		sse_qe[0] = _mm_insert_epi8(sse_qe[0],
72 				cmd_byte_map[qm_port->is_directed][ev[1].op],
73 				DLB2_QE_CMD_BYTE + 8);
74 		sse_qe[1] = _mm_insert_epi8(sse_qe[1],
75 				cmd_byte_map[qm_port->is_directed][ev[2].op],
76 				DLB2_QE_CMD_BYTE);
77 		sse_qe[1] = _mm_insert_epi8(sse_qe[1],
78 				cmd_byte_map[qm_port->is_directed][ev[3].op],
79 				DLB2_QE_CMD_BYTE + 8);
80 
81 		/* Store priority, scheduling type, and queue ID in the sched
82 		 * word array because these values are re-used when the
83 		 * destination is a directed queue.
84 		 */
85 		sched_word[0] = EV_TO_DLB2_PRIO(ev[0].priority) << 10 |
86 				sched_type[0] << 8 |
87 				queue_id[0];
88 		sched_word[1] = EV_TO_DLB2_PRIO(ev[1].priority) << 10 |
89 				sched_type[1] << 8 |
90 				queue_id[1];
91 		sched_word[2] = EV_TO_DLB2_PRIO(ev[2].priority) << 10 |
92 				sched_type[2] << 8 |
93 				queue_id[2];
94 		sched_word[3] = EV_TO_DLB2_PRIO(ev[3].priority) << 10 |
95 				sched_type[3] << 8 |
96 				queue_id[3];
97 
98 		/* Store the event priority, scheduling type, and queue ID in
99 		 * the metadata:
100 		 * sse_qe[0][31:16] = sched_word[0]
101 		 * sse_qe[0][95:80] = sched_word[1]
102 		 * sse_qe[1][31:16] = sched_word[2]
103 		 * sse_qe[1][95:80] = sched_word[3]
104 		 */
105 #define DLB2_QE_QID_SCHED_WORD 1
106 		sse_qe[0] = _mm_insert_epi16(sse_qe[0],
107 					     sched_word[0],
108 					     DLB2_QE_QID_SCHED_WORD);
109 		sse_qe[0] = _mm_insert_epi16(sse_qe[0],
110 					     sched_word[1],
111 					     DLB2_QE_QID_SCHED_WORD + 4);
112 		sse_qe[1] = _mm_insert_epi16(sse_qe[1],
113 					     sched_word[2],
114 					     DLB2_QE_QID_SCHED_WORD);
115 		sse_qe[1] = _mm_insert_epi16(sse_qe[1],
116 					     sched_word[3],
117 					     DLB2_QE_QID_SCHED_WORD + 4);
118 
119 		/* If the destination is a load-balanced queue, store the lock
120 		 * ID. If it is a directed queue, DLB places this field in
121 		 * bytes 10-11 of the received QE, so we format it accordingly:
122 		 * sse_qe[0][47:32]  = dir queue ? sched_word[0] : flow_id[0]
123 		 * sse_qe[0][111:96] = dir queue ? sched_word[1] : flow_id[1]
124 		 * sse_qe[1][47:32]  = dir queue ? sched_word[2] : flow_id[2]
125 		 * sse_qe[1][111:96] = dir queue ? sched_word[3] : flow_id[3]
126 		 */
127 #define DLB2_QE_LOCK_ID_WORD 2
128 		sse_qe[0] = _mm_insert_epi16(sse_qe[0],
129 				(sched_type[0] == DLB2_SCHED_DIRECTED) ?
130 					sched_word[0] : ev[0].flow_id,
131 				DLB2_QE_LOCK_ID_WORD);
132 		sse_qe[0] = _mm_insert_epi16(sse_qe[0],
133 				(sched_type[1] == DLB2_SCHED_DIRECTED) ?
134 					sched_word[1] : ev[1].flow_id,
135 				DLB2_QE_LOCK_ID_WORD + 4);
136 		sse_qe[1] = _mm_insert_epi16(sse_qe[1],
137 				(sched_type[2] == DLB2_SCHED_DIRECTED) ?
138 					sched_word[2] : ev[2].flow_id,
139 				DLB2_QE_LOCK_ID_WORD);
140 		sse_qe[1] = _mm_insert_epi16(sse_qe[1],
141 				(sched_type[3] == DLB2_SCHED_DIRECTED) ?
142 					sched_word[3] : ev[3].flow_id,
143 				DLB2_QE_LOCK_ID_WORD + 4);
144 
145 		/* Store the event type and sub event type in the metadata:
146 		 * sse_qe[0][15:0]  = flow_id[0]
147 		 * sse_qe[0][79:64] = flow_id[1]
148 		 * sse_qe[1][15:0]  = flow_id[2]
149 		 * sse_qe[1][79:64] = flow_id[3]
150 		 */
151 #define DLB2_QE_EV_TYPE_WORD 0
152 		sse_qe[0] = _mm_insert_epi16(sse_qe[0],
153 					     ev[0].sub_event_type << 8 |
154 						ev[0].event_type,
155 					     DLB2_QE_EV_TYPE_WORD);
156 		sse_qe[0] = _mm_insert_epi16(sse_qe[0],
157 					     ev[1].sub_event_type << 8 |
158 						ev[1].event_type,
159 					     DLB2_QE_EV_TYPE_WORD + 4);
160 		sse_qe[1] = _mm_insert_epi16(sse_qe[1],
161 					     ev[2].sub_event_type << 8 |
162 						ev[2].event_type,
163 					     DLB2_QE_EV_TYPE_WORD);
164 		sse_qe[1] = _mm_insert_epi16(sse_qe[1],
165 					     ev[3].sub_event_type << 8 |
166 						ev[3].event_type,
167 					     DLB2_QE_EV_TYPE_WORD + 4);
168 
169 		/*
170 		 * Store the metadata to memory (use the double-precision
171 		 * _mm_storeh_pd because there is no integer function for
172 		 * storing the upper 64b):
173 		 * qe[0] metadata = sse_qe[0][63:0]
174 		 * qe[1] metadata = sse_qe[0][127:64]
175 		 * qe[2] metadata = sse_qe[1][63:0]
176 		 * qe[3] metadata = sse_qe[1][127:64]
177 		 */
178 		_mm_storel_epi64((__m128i *)&qe[0].u.opaque_data,
179 				 sse_qe[0]);
180 		_mm_storeh_pd((double *)&qe[1].u.opaque_data,
181 			      (__m128d)sse_qe[0]);
182 		_mm_storel_epi64((__m128i *)&qe[2].u.opaque_data,
183 				 sse_qe[1]);
184 		_mm_storeh_pd((double *)&qe[3].u.opaque_data,
185 				      (__m128d)sse_qe[1]);
186 
187 		qe[0].data = ev[0].u64;
188 		qe[1].data = ev[1].u64;
189 		qe[2].data = ev[2].u64;
190 		qe[3].data = ev[3].u64;
191 
192 		/* will only be set for DLB 2.5 + */
193 		if (qm_port->cq_weight) {
194 			qe[0].weight = ev[0].impl_opaque & 3;
195 			qe[1].weight = ev[1].impl_opaque & 3;
196 			qe[2].weight = ev[2].impl_opaque & 3;
197 			qe[3].weight = ev[3].impl_opaque & 3;
198 		}
199 
200 		break;
201 	case 3:
202 	case 2:
203 	case 1:
204 		for (i = 0; i < num; i++) {
205 			qe[i].cmd_byte =
206 				cmd_byte_map[qm_port->is_directed][ev[i].op];
207 			qe[i].sched_type = sched_type[i];
208 			qe[i].data = ev[i].u64;
209 			qe[i].qid = queue_id[i];
210 			qe[i].priority = EV_TO_DLB2_PRIO(ev[i].priority);
211 			qe[i].lock_id = ev[i].flow_id;
212 			if (sched_type[i] == DLB2_SCHED_DIRECTED) {
213 				struct dlb2_msg_info *info =
214 					(struct dlb2_msg_info *)&qe[i].lock_id;
215 
216 				info->qid = queue_id[i];
217 				info->sched_type = DLB2_SCHED_DIRECTED;
218 				info->priority = qe[i].priority;
219 			}
220 			qe[i].u.event_type.major = ev[i].event_type;
221 			qe[i].u.event_type.sub = ev[i].sub_event_type;
222 		}
223 		break;
224 	case 0:
225 		break;
226 	}
227 }
228