xref: /dpdk/drivers/common/dpaax/dpaax_iova_table.c (revision 68a03efeed657e6e05f281479b33b51102797e15)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 NXP
3  */
4 
5 #include <rte_memory.h>
6 
7 #include "dpaax_iova_table.h"
8 #include "dpaax_logs.h"
9 
10 /* Global table reference */
11 struct dpaax_iova_table *dpaax_iova_table_p;
12 
13 static int dpaax_handle_memevents(void);
14 
15 /* A structure representing the device-tree node available in /proc/device-tree.
16  */
17 struct reg_node {
18 	phys_addr_t addr;
19 	size_t len;
20 };
21 
22 /* A ntohll equivalent routine
23  * XXX: This is only applicable for 64 bit environment.
24  */
25 static void
26 rotate_8(unsigned char *arr)
27 {
28 	uint32_t temp;
29 	uint32_t *first_half;
30 	uint32_t *second_half;
31 
32 	first_half = (uint32_t *)(arr);
33 	second_half = (uint32_t *)(arr + 4);
34 
35 	temp = *first_half;
36 	*first_half = *second_half;
37 	*second_half = temp;
38 
39 	*first_half = ntohl(*first_half);
40 	*second_half = ntohl(*second_half);
41 }
42 
43 /* read_memory_nodes
44  * Memory layout for DPAAx platforms (LS1043, LS1046, LS1088, LS2088, LX2160)
45  * are populated by Uboot and available in device tree:
46  * /proc/device-tree/memory@<address>/reg <= register.
47  * Entries are of the form:
48  *  (<8 byte start addr><8 byte length>)(..more similar blocks of start,len>)..
49  *
50  * @param count
51  *    OUT populate number of entries found in memory node
52  * @return
53  *    Pointer to array of reg_node elements, count size
54  */
55 static struct reg_node *
56 read_memory_node(unsigned int *count)
57 {
58 	int fd, ret, i;
59 	unsigned int j;
60 	glob_t result = {0};
61 	struct stat statbuf = {0};
62 	char file_data[MEM_NODE_FILE_LEN];
63 	struct reg_node *nodes = NULL;
64 
65 	*count = 0;
66 
67 	ret = glob(MEM_NODE_PATH_GLOB, 0, NULL, &result);
68 	if (ret != 0)
69 		ret = glob(MEM_NODE_PATH_GLOB_VM, 0, NULL, &result);
70 
71 	if (ret != 0) {
72 		DPAAX_DEBUG("Unable to glob device-tree memory node (err: %d)",
73 			ret);
74 		goto out;
75 	}
76 
77 	if (result.gl_pathc != 1) {
78 		/* Either more than one memory@<addr> node found, or none.
79 		 * In either case, cannot work ahead.
80 		 */
81 		DPAAX_DEBUG("Found (%zu) entries in device-tree. Not supported!",
82 			    result.gl_pathc);
83 		goto out;
84 	}
85 
86 	DPAAX_DEBUG("Opening and parsing device-tree node: (%s)",
87 		    result.gl_pathv[0]);
88 	fd = open(result.gl_pathv[0], O_RDONLY);
89 	if (fd < 0) {
90 		DPAAX_DEBUG("Unable to open the device-tree node: (%s)(fd=%d)",
91 			    MEM_NODE_PATH_GLOB, fd);
92 		goto cleanup;
93 	}
94 
95 	/* Stat to get the file size */
96 	ret = fstat(fd, &statbuf);
97 	if (ret != 0) {
98 		DPAAX_DEBUG("Unable to get device-tree memory node size.");
99 		goto cleanup;
100 	}
101 
102 	DPAAX_DEBUG("Size of device-tree mem node: %" PRIu64, statbuf.st_size);
103 	if (statbuf.st_size > MEM_NODE_FILE_LEN) {
104 		DPAAX_DEBUG("More memory nodes available than assumed.");
105 		DPAAX_DEBUG("System may not work properly!");
106 	}
107 
108 	ret = read(fd, file_data, statbuf.st_size > MEM_NODE_FILE_LEN ?
109 				  MEM_NODE_FILE_LEN : statbuf.st_size);
110 	if (ret <= 0) {
111 		DPAAX_DEBUG("Unable to read device-tree memory node: (%d)",
112 			    ret);
113 		goto cleanup;
114 	}
115 
116 	/* The reg node should be multiple of 16 bytes, 8 bytes each for addr
117 	 * and len.
118 	 */
119 	*count = (statbuf.st_size / 16);
120 	if ((*count) <= 0 || (statbuf.st_size % 16 != 0)) {
121 		DPAAX_DEBUG("Invalid memory node values or count. (size=%" PRIu64 ")",
122 			    statbuf.st_size);
123 		goto cleanup;
124 	}
125 
126 	/* each entry is of 16 bytes, and size/16 is total count of entries */
127 	nodes = malloc(sizeof(struct reg_node) * (*count));
128 	if (!nodes) {
129 		DPAAX_DEBUG("Failure in allocating working memory.");
130 		goto cleanup;
131 	}
132 	memset(nodes, 0, sizeof(struct reg_node) * (*count));
133 
134 	for (i = 0, j = 0; i < (statbuf.st_size) && j < (*count); i += 16, j++) {
135 		memcpy(&nodes[j], file_data + i, 16);
136 		/* Rotate (ntohl) each 8 byte entry */
137 		rotate_8((unsigned char *)(&(nodes[j].addr)));
138 		rotate_8((unsigned char *)(&(nodes[j].len)));
139 	}
140 
141 	DPAAX_DEBUG("Device-tree memory node data:");
142 	do {
143 		DPAAX_DEBUG("    %08" PRIx64 " %08zu",
144 			    nodes[j].addr, nodes[j].len);
145 	} while (--j);
146 
147 cleanup:
148 	close(fd);
149 	globfree(&result);
150 out:
151 	return nodes;
152 }
153 
154 int
155 dpaax_iova_table_populate(void)
156 {
157 	int ret;
158 	unsigned int i, node_count;
159 	size_t tot_memory_size, total_table_size;
160 	struct reg_node *nodes;
161 	struct dpaax_iovat_element *entry;
162 
163 	/* dpaax_iova_table_p is a singleton - only one instance should be
164 	 * created.
165 	 */
166 	if (dpaax_iova_table_p) {
167 		DPAAX_DEBUG("Multiple allocation attempt for IOVA Table (%p)",
168 			    dpaax_iova_table_p);
169 		/* This can be an error case as well - some path not cleaning
170 		 * up table - but, for now, it is assumed that if IOVA Table
171 		 * pointer is valid, table is allocated.
172 		 */
173 		return 0;
174 	}
175 
176 	nodes = read_memory_node(&node_count);
177 	if (nodes == NULL) {
178 		DPAAX_WARN("PA->VA translation not available;");
179 		DPAAX_WARN("Expect performance impact.");
180 		return -1;
181 	}
182 
183 	tot_memory_size = 0;
184 	for (i = 0; i < node_count; i++)
185 		tot_memory_size += nodes[i].len;
186 
187 	DPAAX_DEBUG("Total available PA memory size: %zu", tot_memory_size);
188 
189 	/* Total table size = meta data + tot_memory_size/8 */
190 	total_table_size = sizeof(struct dpaax_iova_table) +
191 			   (sizeof(struct dpaax_iovat_element) * node_count) +
192 			   ((tot_memory_size / DPAAX_MEM_SPLIT) * sizeof(uint64_t));
193 
194 	/* TODO: This memory doesn't need to shared but needs to be always
195 	 * pinned to RAM (no swap out) - using hugepage rather than malloc
196 	 */
197 	dpaax_iova_table_p = rte_zmalloc(NULL, total_table_size, 0);
198 	if (dpaax_iova_table_p == NULL) {
199 		DPAAX_WARN("Unable to allocate memory for PA->VA Table;");
200 		DPAAX_WARN("PA->VA translation not available;");
201 		DPAAX_WARN("Expect performance impact.");
202 		free(nodes);
203 		return -1;
204 	}
205 
206 	/* Initialize table */
207 	dpaax_iova_table_p->count = node_count;
208 	entry = dpaax_iova_table_p->entries;
209 
210 	DPAAX_DEBUG("IOVA Table entries: (entry start = %p)", (void *)entry);
211 	DPAAX_DEBUG("\t(entry),(start),(len),(next)");
212 
213 	for (i = 0; i < node_count; i++) {
214 		/* dpaax_iova_table_p
215 		 * |   dpaax_iova_table_p->entries
216 		 * |      |
217 		 * |      |
218 		 * V      V
219 		 * +------+------+-------+---+----------+---------+---
220 		 * |iova_ |entry | entry |   | pages    | pages   |
221 		 * |table | 1    |  2    |...| entry 1  | entry2  |
222 		 * +-----'+.-----+-------+---+;---------+;--------+---
223 		 *         \      \          /          /
224 		 *          `~~~~~~|~~~~~>pages        /
225 		 *                  \                 /
226 		 *                   `~~~~~~~~~~~>pages
227 		 */
228 		entry[i].start = nodes[i].addr;
229 		entry[i].len = nodes[i].len;
230 		if (i > 0)
231 			entry[i].pages = entry[i-1].pages +
232 				((entry[i-1].len/DPAAX_MEM_SPLIT));
233 		else
234 			entry[i].pages = (uint64_t *)((unsigned char *)entry +
235 					 (sizeof(struct dpaax_iovat_element) *
236 					 node_count));
237 
238 		DPAAX_DEBUG("\t(%u),(%8"PRIx64"),(%8zu),(%8p)",
239 			    i, entry[i].start, entry[i].len, entry[i].pages);
240 	}
241 
242 	/* Release memory associated with nodes array - not required now */
243 	free(nodes);
244 
245 	DPAAX_DEBUG("Adding mem-event handler");
246 	ret = dpaax_handle_memevents();
247 	if (ret) {
248 		DPAAX_ERR("Unable to add mem-event handler");
249 		DPAAX_WARN("Cases with non-buffer pool mem won't work!");
250 	}
251 
252 	return 0;
253 }
254 
255 void
256 dpaax_iova_table_depopulate(void)
257 {
258 	if (dpaax_iova_table_p == NULL)
259 		return;
260 
261 	rte_free(dpaax_iova_table_p->entries);
262 	dpaax_iova_table_p = NULL;
263 
264 	DPAAX_DEBUG("IOVA Table cleanedup");
265 }
266 
267 int
268 dpaax_iova_table_update(phys_addr_t paddr, void *vaddr, size_t length)
269 {
270 	int found = 0;
271 	unsigned int i;
272 	size_t req_length = length, e_offset;
273 	struct dpaax_iovat_element *entry;
274 	uintptr_t align_vaddr;
275 	phys_addr_t align_paddr;
276 
277 	if (unlikely(dpaax_iova_table_p == NULL))
278 		return -1;
279 
280 	align_paddr = paddr & DPAAX_MEM_SPLIT_MASK;
281 	align_vaddr = ((uintptr_t)vaddr & DPAAX_MEM_SPLIT_MASK);
282 
283 	/* Check if paddr is available in table */
284 	entry = dpaax_iova_table_p->entries;
285 	for (i = 0; i < dpaax_iova_table_p->count; i++) {
286 		if (align_paddr < entry[i].start) {
287 			/* Address lower than start, but not found in previous
288 			 * iteration shouldn't exist.
289 			 */
290 			DPAAX_ERR("Add: Incorrect entry for PA->VA Table"
291 				  "(%"PRIu64")", paddr);
292 			DPAAX_ERR("Add: Lowest address: %"PRIu64"",
293 				  entry[i].start);
294 			return -1;
295 		}
296 
297 		if (align_paddr > (entry[i].start + entry[i].len))
298 			continue;
299 
300 		/* align_paddr >= start && align_paddr < (start + len) */
301 		found = 1;
302 
303 		do {
304 			e_offset = ((align_paddr - entry[i].start) / DPAAX_MEM_SPLIT);
305 			/* TODO: Whatif something already exists at this
306 			 * location - is that an error? For now, ignoring the
307 			 * case.
308 			 */
309 			entry[i].pages[e_offset] = align_vaddr;
310 #ifdef RTE_COMMON_DPAAX_DEBUG
311 			DPAAX_DEBUG("Added: vaddr=%zu for Phy:%"PRIu64" at %zu"
312 				    " remaining len %zu", align_vaddr,
313 				    align_paddr, e_offset, req_length);
314 #endif
315 			/* Incoming request can be larger than the
316 			 * DPAAX_MEM_SPLIT size - in which case, multiple
317 			 * entries in entry->pages[] are filled up.
318 			 */
319 			if (req_length <= DPAAX_MEM_SPLIT)
320 				break;
321 			align_paddr += DPAAX_MEM_SPLIT;
322 			align_vaddr += DPAAX_MEM_SPLIT;
323 			req_length -= DPAAX_MEM_SPLIT;
324 		} while (1);
325 
326 		break;
327 	}
328 
329 	if (!found) {
330 		/* There might be case where the incoming physical address is
331 		 * beyond the address discovered in the memory node of
332 		 * device-tree. Specially if some malloc'd area is used by EAL
333 		 * and the memevent handlers passes that across. But, this is
334 		 * not necessarily an error.
335 		 */
336 		DPAAX_DEBUG("Add: Unable to find slot for vaddr:(%p),"
337 			    " phy(%"PRIu64")",
338 			    vaddr, paddr);
339 		return -1;
340 	}
341 #ifdef RTE_COMMON_DPAAX_DEBUG
342 	DPAAX_DEBUG("Add: Found slot at (%"PRIu64")[(%zu)] for vaddr:(%p),"
343 		    " phy(%"PRIu64"), len(%zu)", entry[i].start, e_offset,
344 		    vaddr, paddr, length);
345 #endif
346 	return 0;
347 }
348 
349 /* dpaax_iova_table_dump
350  * Dump the table, with its entries, on screen. Only works in Debug Mode
351  * Not for weak hearted - the tables can get quite large
352  */
353 void
354 dpaax_iova_table_dump(void)
355 {
356 	unsigned int i, j;
357 	struct dpaax_iovat_element *entry;
358 
359 	/* In case DEBUG is not enabled, some 'if' conditions might misbehave
360 	 * as they have nothing else in them  except a DPAAX_DEBUG() which if
361 	 * tuned out would leave 'if' naked.
362 	 */
363 	if (rte_log_get_global_level() < RTE_LOG_DEBUG) {
364 		DPAAX_ERR("Set log level to Debug for PA->Table dump!");
365 		return;
366 	}
367 
368 	DPAAX_DEBUG(" === Start of PA->VA Translation Table ===");
369 	if (dpaax_iova_table_p == NULL)
370 		DPAAX_DEBUG("\tNULL");
371 
372 	entry = dpaax_iova_table_p->entries;
373 	for (i = 0; i < dpaax_iova_table_p->count; i++) {
374 		DPAAX_DEBUG("\t(%16i),(%16"PRIu64"),(%16zu),(%16p)",
375 			    i, entry[i].start, entry[i].len, entry[i].pages);
376 		DPAAX_DEBUG("\t\t          (PA),          (VA)");
377 		for (j = 0; j < (entry->len/DPAAX_MEM_SPLIT); j++) {
378 			if (entry[i].pages[j] == 0)
379 				continue;
380 			DPAAX_DEBUG("\t\t(%16"PRIx64"),(%16"PRIx64")",
381 				    (entry[i].start + (j * sizeof(uint64_t))),
382 				    entry[i].pages[j]);
383 		}
384 	}
385 	DPAAX_DEBUG(" === End of PA->VA Translation Table ===");
386 }
387 
388 static void
389 dpaax_memevent_cb(enum rte_mem_event type, const void *addr, size_t len,
390 		  void *arg __rte_unused)
391 {
392 	struct rte_memseg_list *msl;
393 	struct rte_memseg *ms;
394 	size_t cur_len = 0, map_len = 0;
395 	phys_addr_t phys_addr;
396 	void *virt_addr;
397 	int ret;
398 
399 	DPAAX_DEBUG("Called with addr=%p, len=%zu", addr, len);
400 
401 	msl = rte_mem_virt2memseg_list(addr);
402 
403 	while (cur_len < len) {
404 		const void *va = RTE_PTR_ADD(addr, cur_len);
405 
406 		ms = rte_mem_virt2memseg(va, msl);
407 		phys_addr = rte_mem_virt2phy(ms->addr);
408 		virt_addr = ms->addr;
409 		map_len = ms->len;
410 #ifdef RTE_COMMON_DPAAX_DEBUG
411 		DPAAX_DEBUG("Request for %s, va=%p, virt_addr=%p,"
412 			    "iova=%"PRIu64", map_len=%zu",
413 			    type == RTE_MEM_EVENT_ALLOC ?
414 			    "alloc" : "dealloc",
415 			    va, virt_addr, phys_addr, map_len);
416 #endif
417 		if (type == RTE_MEM_EVENT_ALLOC)
418 			ret = dpaax_iova_table_update(phys_addr, virt_addr,
419 						      map_len);
420 		else
421 			/* In case of mem_events for MEM_EVENT_FREE, complete
422 			 * hugepage is released and its PA entry is set to 0.
423 			 */
424 			ret = dpaax_iova_table_update(phys_addr, 0, map_len);
425 
426 		if (ret != 0) {
427 			DPAAX_DEBUG("PA-Table entry update failed. "
428 				    "Map=%d, addr=%p, len=%zu, err:(%d)",
429 				    type, va, map_len, ret);
430 			return;
431 		}
432 
433 		cur_len += map_len;
434 	}
435 }
436 
437 static int
438 dpaax_memevent_walk_memsegs(const struct rte_memseg_list *msl __rte_unused,
439 			    const struct rte_memseg *ms, size_t len,
440 			    void *arg __rte_unused)
441 {
442 	DPAAX_DEBUG("Walking for %p (pa=%"PRIu64") and len %zu",
443 		    ms->addr, ms->iova, len);
444 	dpaax_iova_table_update(rte_mem_virt2phy(ms->addr), ms->addr, len);
445 	return 0;
446 }
447 
448 static int
449 dpaax_handle_memevents(void)
450 {
451 	/* First, walk through all memsegs and pin them, before installing
452 	 * handler. This assures that all memseg which have already been
453 	 * identified/allocated by EAL, are already part of PA->VA Table. This
454 	 * is especially for cases where application allocates memory before
455 	 * the EAL or this is an externally allocated memory passed to EAL.
456 	 */
457 	rte_memseg_contig_walk_thread_unsafe(dpaax_memevent_walk_memsegs, NULL);
458 
459 	return rte_mem_event_callback_register("dpaax_memevents_cb",
460 					       dpaax_memevent_cb, NULL);
461 }
462 
463 RTE_LOG_REGISTER(dpaax_logger, pmd.common.dpaax, ERR);
464