1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018-2023 NXP
3 */
4
5 #include <rte_memory.h>
6
7 #include "dpaax_iova_table.h"
8 #include "dpaax_logs.h"
9
10 /* Global table reference */
11 struct dpaax_iova_table *dpaax_iova_table_p;
12
13 static int dpaax_handle_memevents(void);
14
15 /* A structure representing the device-tree node available in /proc/device-tree.
16 */
17 struct reg_node {
18 phys_addr_t addr;
19 size_t len;
20 };
21
22 /* A ntohll equivalent routine
23 * XXX: This is only applicable for 64 bit environment.
24 */
25 static void
rotate_8(unsigned char * arr)26 rotate_8(unsigned char *arr)
27 {
28 uint32_t temp;
29 uint32_t *first_half;
30 uint32_t *second_half;
31
32 first_half = (uint32_t *)(arr);
33 second_half = (uint32_t *)(arr + 4);
34
35 temp = *first_half;
36 *first_half = *second_half;
37 *second_half = temp;
38
39 *first_half = ntohl(*first_half);
40 *second_half = ntohl(*second_half);
41 }
42
43 /* read_memory_nodes
44 * Memory layout for DPAAx platforms (LS1043, LS1046, LS1088, LS2088, LX2160)
45 * are populated by Uboot and available in device tree:
46 * /proc/device-tree/memory@<address>/reg <= register.
47 * Entries are of the form:
48 * (<8 byte start addr><8 byte length>)(..more similar blocks of start,len>)..
49 *
50 * @param count
51 * OUT populate number of entries found in memory node
52 * @return
53 * Pointer to array of reg_node elements, count size
54 */
55 static struct reg_node *
read_memory_node(unsigned int * count)56 read_memory_node(unsigned int *count)
57 {
58 int fd, ret, i;
59 unsigned int j;
60 glob_t result = {0};
61 struct stat statbuf = {0};
62 char file_data[MEM_NODE_FILE_LEN];
63 struct reg_node *nodes = NULL;
64
65 *count = 0;
66
67 ret = glob(MEM_NODE_PATH_GLOB, 0, NULL, &result);
68 if (ret != 0)
69 ret = glob(MEM_NODE_PATH_GLOB_VM, 0, NULL, &result);
70
71 if (ret != 0) {
72 DPAAX_DEBUG("Unable to glob device-tree memory node (err: %d)",
73 ret);
74 goto out;
75 }
76
77 if (result.gl_pathc != 1) {
78 /* Either more than one memory@<addr> node found, or none.
79 * In either case, cannot work ahead.
80 */
81 DPAAX_DEBUG("Found (%zu) entries in device-tree. Not supported!",
82 result.gl_pathc);
83 goto out;
84 }
85
86 DPAAX_DEBUG("Opening and parsing device-tree node: (%s)",
87 result.gl_pathv[0]);
88 fd = open(result.gl_pathv[0], O_RDONLY);
89 if (fd < 0) {
90 DPAAX_DEBUG("Unable to open the device-tree node: (%s)(fd=%d)",
91 MEM_NODE_PATH_GLOB, fd);
92 goto cleanup;
93 }
94
95 /* Stat to get the file size */
96 ret = fstat(fd, &statbuf);
97 if (ret != 0) {
98 DPAAX_DEBUG("Unable to get device-tree memory node size.");
99 goto cleanup;
100 }
101
102 DPAAX_DEBUG("Size of device-tree mem node: %" PRIu64, statbuf.st_size);
103 if (statbuf.st_size > MEM_NODE_FILE_LEN) {
104 DPAAX_DEBUG("More memory nodes available than assumed.");
105 DPAAX_DEBUG("System may not work properly!");
106 }
107
108 ret = read(fd, file_data, statbuf.st_size > MEM_NODE_FILE_LEN ?
109 MEM_NODE_FILE_LEN : statbuf.st_size);
110 if (ret <= 0) {
111 DPAAX_DEBUG("Unable to read device-tree memory node: (%d)",
112 ret);
113 goto cleanup;
114 }
115
116 /* The reg node should be multiple of 16 bytes, 8 bytes each for addr
117 * and len.
118 */
119 *count = (statbuf.st_size / 16);
120 if ((*count) <= 0 || (statbuf.st_size % 16 != 0)) {
121 DPAAX_DEBUG("Invalid memory node values or count. (size=%" PRIu64 ")",
122 statbuf.st_size);
123 goto cleanup;
124 }
125
126 /* each entry is of 16 bytes, and size/16 is total count of entries */
127 nodes = malloc(sizeof(struct reg_node) * (*count));
128 if (!nodes) {
129 DPAAX_DEBUG("Failure in allocating working memory.");
130 goto cleanup;
131 }
132 memset(nodes, 0, sizeof(struct reg_node) * (*count));
133
134 for (i = 0, j = 0; i < (statbuf.st_size) && j < (*count); i += 16, j++) {
135 memcpy(&nodes[j], file_data + i, 16);
136 /* Rotate (ntohl) each 8 byte entry */
137 rotate_8((unsigned char *)(&(nodes[j].addr)));
138 rotate_8((unsigned char *)(&(nodes[j].len)));
139 }
140
141 DPAAX_DEBUG("Device-tree memory node data:");
142
143 while (j > 0) {
144 --j;
145 DPAAX_DEBUG(" %08" PRIx64 " %08zu",
146 nodes[j].addr, nodes[j].len);
147 }
148
149 cleanup:
150 close(fd);
151 globfree(&result);
152 out:
153 return nodes;
154 }
155
156 int
dpaax_iova_table_populate(void)157 dpaax_iova_table_populate(void)
158 {
159 int ret;
160 unsigned int i, node_count;
161 size_t tot_memory_size, total_table_size;
162 struct reg_node *nodes;
163 struct dpaax_iovat_element *entry;
164
165 /* dpaax_iova_table_p is a singleton - only one instance should be
166 * created.
167 */
168 if (dpaax_iova_table_p) {
169 DPAAX_DEBUG("Multiple allocation attempt for IOVA Table (%p)",
170 dpaax_iova_table_p);
171 /* This can be an error case as well - some path not cleaning
172 * up table - but, for now, it is assumed that if IOVA Table
173 * pointer is valid, table is allocated.
174 */
175 return 0;
176 }
177
178 nodes = read_memory_node(&node_count);
179 if (nodes == NULL) {
180 DPAAX_WARN("PA->VA translation not available;");
181 DPAAX_WARN("Expect performance impact.");
182 return -1;
183 }
184
185 tot_memory_size = 0;
186 for (i = 0; i < node_count; i++)
187 tot_memory_size += nodes[i].len;
188
189 DPAAX_DEBUG("Total available PA memory size: %zu", tot_memory_size);
190
191 /* Total table size = meta data + tot_memory_size/8 */
192 total_table_size = sizeof(struct dpaax_iova_table) +
193 (sizeof(struct dpaax_iovat_element) * node_count) +
194 ((tot_memory_size / DPAAX_MEM_SPLIT) * sizeof(uint64_t));
195
196 /* TODO: This memory doesn't need to shared but needs to be always
197 * pinned to RAM (no swap out) - using hugepage rather than malloc
198 */
199 dpaax_iova_table_p = rte_zmalloc(NULL, total_table_size, 0);
200 if (dpaax_iova_table_p == NULL) {
201 DPAAX_WARN("Unable to allocate memory for PA->VA Table;");
202 DPAAX_WARN("PA->VA translation not available;");
203 DPAAX_WARN("Expect performance impact.");
204 free(nodes);
205 return -1;
206 }
207
208 /* Initialize table */
209 dpaax_iova_table_p->count = node_count;
210 entry = dpaax_iova_table_p->entries;
211
212 DPAAX_DEBUG("IOVA Table entries: (entry start = %p)", (void *)entry);
213 DPAAX_DEBUG("\t(entry),(start),(len),(next)");
214
215 for (i = 0; i < node_count; i++) {
216 /* dpaax_iova_table_p
217 * | dpaax_iova_table_p->entries
218 * | |
219 * | |
220 * V V
221 * +------+------+-------+---+----------+---------+---
222 * |iova_ |entry | entry | | pages | pages |
223 * |table | 1 | 2 |...| entry 1 | entry2 |
224 * +-----'+.-----+-------+---+;---------+;--------+---
225 * \ \ / /
226 * `~~~~~~|~~~~~>pages /
227 * \ /
228 * `~~~~~~~~~~~>pages
229 */
230 entry[i].start = nodes[i].addr;
231 entry[i].len = nodes[i].len;
232 if (i > 0)
233 entry[i].pages = entry[i-1].pages +
234 ((entry[i-1].len/DPAAX_MEM_SPLIT));
235 else
236 entry[i].pages = (uint64_t *)((unsigned char *)entry +
237 (sizeof(struct dpaax_iovat_element) *
238 node_count));
239
240 DPAAX_DEBUG("\t(%u),(%8"PRIx64"),(%8zu),(%8p)",
241 i, entry[i].start, entry[i].len, entry[i].pages);
242 }
243
244 /* Release memory associated with nodes array - not required now */
245 free(nodes);
246
247 DPAAX_DEBUG("Adding mem-event handler");
248 ret = dpaax_handle_memevents();
249 if (ret) {
250 DPAAX_ERR("Unable to add mem-event handler");
251 DPAAX_WARN("Cases with non-buffer pool mem won't work!");
252 }
253
254 return 0;
255 }
256
257 void
dpaax_iova_table_depopulate(void)258 dpaax_iova_table_depopulate(void)
259 {
260 rte_free(dpaax_iova_table_p);
261 dpaax_iova_table_p = NULL;
262
263 DPAAX_DEBUG("IOVA Table cleaned");
264 }
265
266 int
dpaax_iova_table_update(phys_addr_t paddr,void * vaddr,size_t length)267 dpaax_iova_table_update(phys_addr_t paddr, void *vaddr, size_t length)
268 {
269 int found = 0;
270 unsigned int i;
271 size_t req_length = length, e_offset;
272 struct dpaax_iovat_element *entry;
273 uintptr_t align_vaddr;
274 phys_addr_t align_paddr;
275
276 if (unlikely(dpaax_iova_table_p == NULL))
277 return -1;
278
279 align_paddr = paddr & DPAAX_MEM_SPLIT_MASK;
280 align_vaddr = ((uintptr_t)vaddr & DPAAX_MEM_SPLIT_MASK);
281
282 /* Check if paddr is available in table */
283 entry = dpaax_iova_table_p->entries;
284 for (i = 0; i < dpaax_iova_table_p->count; i++) {
285 if (align_paddr < entry[i].start) {
286 /* Address lower than start, but not found in previous
287 * iteration shouldn't exist.
288 */
289 DPAAX_ERR("Add: Incorrect entry for PA->VA Table"
290 "(%"PRIu64")", paddr);
291 DPAAX_ERR("Add: Lowest address: %"PRIu64"",
292 entry[i].start);
293 return -1;
294 }
295
296 if (align_paddr > (entry[i].start + entry[i].len))
297 continue;
298
299 /* align_paddr >= start && align_paddr < (start + len) */
300 found = 1;
301
302 do {
303 e_offset = ((align_paddr - entry[i].start) / DPAAX_MEM_SPLIT);
304 /* TODO: Whatif something already exists at this
305 * location - is that an error? For now, ignoring the
306 * case.
307 */
308 entry[i].pages[e_offset] = align_vaddr;
309 #ifdef RTE_COMMON_DPAAX_DEBUG
310 DPAAX_DEBUG("Added: vaddr=%zu for Phy:%"PRIu64" at %zu"
311 " remaining len %zu", align_vaddr,
312 align_paddr, e_offset, req_length);
313 #endif
314 /* Incoming request can be larger than the
315 * DPAAX_MEM_SPLIT size - in which case, multiple
316 * entries in entry->pages[] are filled up.
317 */
318 if (req_length <= DPAAX_MEM_SPLIT)
319 break;
320 align_paddr += DPAAX_MEM_SPLIT;
321 align_vaddr += DPAAX_MEM_SPLIT;
322 req_length -= DPAAX_MEM_SPLIT;
323 } while (1);
324
325 break;
326 }
327
328 if (!found) {
329 /* There might be case where the incoming physical address is
330 * beyond the address discovered in the memory node of
331 * device-tree. Specially if some malloc'd area is used by EAL
332 * and the memevent handlers passes that across. But, this is
333 * not necessarily an error.
334 */
335 DPAAX_DEBUG("Add: Unable to find slot for vaddr:(%p),"
336 " phy(%"PRIu64")",
337 vaddr, paddr);
338 return -1;
339 }
340 #ifdef RTE_COMMON_DPAAX_DEBUG
341 DPAAX_DEBUG("Add: Found slot at (%"PRIu64")[(%zu)] for vaddr:(%p),"
342 " phy(%"PRIu64"), len(%zu)", entry[i].start, e_offset,
343 vaddr, paddr, length);
344 #endif
345 return 0;
346 }
347
348 /* dpaax_iova_table_dump
349 * Dump the table, with its entries, on screen. Only works in Debug Mode
350 * Not for weak hearted - the tables can get quite large
351 */
352 void
dpaax_iova_table_dump(void)353 dpaax_iova_table_dump(void)
354 {
355 unsigned int i, j;
356 struct dpaax_iovat_element *entry;
357
358 /* In case DEBUG is not enabled, some 'if' conditions might misbehave
359 * as they have nothing else in them except a DPAAX_DEBUG() which if
360 * tuned out would leave 'if' naked.
361 */
362 if (rte_log_get_global_level() < RTE_LOG_DEBUG) {
363 DPAAX_ERR("Set log level to Debug for PA->Table dump!");
364 return;
365 }
366
367 DPAAX_DEBUG(" === Start of PA->VA Translation Table ===");
368 if (dpaax_iova_table_p == NULL) {
369 DPAAX_DEBUG("\tNULL");
370 return;
371 }
372
373 entry = dpaax_iova_table_p->entries;
374 for (i = 0; i < dpaax_iova_table_p->count; i++) {
375 DPAAX_DEBUG("\t(%16i),(%16"PRIu64"),(%16zu),(%16p)",
376 i, entry[i].start, entry[i].len, entry[i].pages);
377 DPAAX_DEBUG("\t\t (PA), (VA)");
378 for (j = 0; j < (entry->len/DPAAX_MEM_SPLIT); j++) {
379 if (entry[i].pages[j] == 0)
380 continue;
381 DPAAX_DEBUG("\t\t(%16"PRIx64"),(%16"PRIx64")",
382 (entry[i].start + (j * sizeof(uint64_t))),
383 entry[i].pages[j]);
384 }
385 }
386 DPAAX_DEBUG(" === End of PA->VA Translation Table ===");
387 }
388
389 static void
dpaax_memevent_cb(enum rte_mem_event type,const void * addr,size_t len,void * arg __rte_unused)390 dpaax_memevent_cb(enum rte_mem_event type, const void *addr, size_t len,
391 void *arg __rte_unused)
392 {
393 struct rte_memseg_list *msl;
394 struct rte_memseg *ms;
395 size_t cur_len = 0, map_len = 0;
396 phys_addr_t phys_addr;
397 void *virt_addr;
398 int ret;
399
400 DPAAX_DEBUG("Called with addr=%p, len=%zu", addr, len);
401
402 msl = rte_mem_virt2memseg_list(addr);
403
404 while (cur_len < len) {
405 const void *va = RTE_PTR_ADD(addr, cur_len);
406
407 ms = rte_mem_virt2memseg(va, msl);
408 phys_addr = rte_mem_virt2phy(ms->addr);
409 virt_addr = ms->addr;
410 map_len = ms->len;
411 #ifdef RTE_COMMON_DPAAX_DEBUG
412 DPAAX_DEBUG("Request for %s, va=%p, virt_addr=%p,"
413 "iova=%"PRIu64", map_len=%zu",
414 type == RTE_MEM_EVENT_ALLOC ?
415 "alloc" : "dealloc",
416 va, virt_addr, phys_addr, map_len);
417 #endif
418 if (type == RTE_MEM_EVENT_ALLOC)
419 ret = dpaax_iova_table_update(phys_addr, virt_addr,
420 map_len);
421 else
422 /* In case of mem_events for MEM_EVENT_FREE, complete
423 * hugepage is released and its PA entry is set to 0.
424 */
425 ret = dpaax_iova_table_update(phys_addr, 0, map_len);
426
427 if (ret != 0) {
428 DPAAX_DEBUG("PA-Table entry update failed. "
429 "Map=%d, addr=%p, len=%zu, err:(%d)",
430 type, va, map_len, ret);
431 return;
432 }
433
434 cur_len += map_len;
435 }
436 }
437
438 static int
dpaax_memevent_walk_memsegs(const struct rte_memseg_list * msl __rte_unused,const struct rte_memseg * ms,size_t len,void * arg __rte_unused)439 dpaax_memevent_walk_memsegs(const struct rte_memseg_list *msl __rte_unused,
440 const struct rte_memseg *ms, size_t len,
441 void *arg __rte_unused)
442 {
443 DPAAX_DEBUG("Walking for %p (pa=%"PRIu64") and len %zu",
444 ms->addr, ms->iova, len);
445 dpaax_iova_table_update(rte_mem_virt2phy(ms->addr), ms->addr, len);
446 return 0;
447 }
448
449 static int
dpaax_handle_memevents(void)450 dpaax_handle_memevents(void)
451 {
452 /* First, walk through all memsegs and pin them, before installing
453 * handler. This assures that all memseg which have already been
454 * identified/allocated by EAL, are already part of PA->VA Table. This
455 * is especially for cases where application allocates memory before
456 * the EAL or this is an externally allocated memory passed to EAL.
457 */
458 rte_memseg_contig_walk_thread_unsafe(dpaax_memevent_walk_memsegs, NULL);
459
460 return rte_mem_event_callback_register("dpaax_memevents_cb",
461 dpaax_memevent_cb, NULL);
462 }
463
464 RTE_LOG_REGISTER_DEFAULT(dpaax_logger, ERR);
465