1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2018 NXP 3 */ 4 5 #include <rte_memory.h> 6 7 #include "dpaax_iova_table.h" 8 #include "dpaax_logs.h" 9 10 /* Global table reference */ 11 struct dpaax_iova_table *dpaax_iova_table_p; 12 13 static int dpaax_handle_memevents(void); 14 15 /* A structure representing the device-tree node available in /proc/device-tree. 16 */ 17 struct reg_node { 18 phys_addr_t addr; 19 size_t len; 20 }; 21 22 /* A ntohll equivalent routine 23 * XXX: This is only applicable for 64 bit environment. 24 */ 25 static void 26 rotate_8(unsigned char *arr) 27 { 28 uint32_t temp; 29 uint32_t *first_half; 30 uint32_t *second_half; 31 32 first_half = (uint32_t *)(arr); 33 second_half = (uint32_t *)(arr + 4); 34 35 temp = *first_half; 36 *first_half = *second_half; 37 *second_half = temp; 38 39 *first_half = ntohl(*first_half); 40 *second_half = ntohl(*second_half); 41 } 42 43 /* read_memory_nodes 44 * Memory layout for DPAAx platforms (LS1043, LS1046, LS1088, LS2088, LX2160) 45 * are populated by Uboot and available in device tree: 46 * /proc/device-tree/memory@<address>/reg <= register. 47 * Entries are of the form: 48 * (<8 byte start addr><8 byte length>)(..more similar blocks of start,len>).. 49 * 50 * @param count 51 * OUT populate number of entries found in memory node 52 * @return 53 * Pointer to array of reg_node elements, count size 54 */ 55 static struct reg_node * 56 read_memory_node(unsigned int *count) 57 { 58 int fd, ret, i; 59 unsigned int j; 60 glob_t result = {0}; 61 struct stat statbuf = {0}; 62 char file_data[MEM_NODE_FILE_LEN]; 63 struct reg_node *nodes = NULL; 64 65 *count = 0; 66 67 ret = glob(MEM_NODE_PATH_GLOB, 0, NULL, &result); 68 if (ret != 0) 69 ret = glob(MEM_NODE_PATH_GLOB_VM, 0, NULL, &result); 70 71 if (ret != 0) { 72 DPAAX_DEBUG("Unable to glob device-tree memory node (err: %d)", 73 ret); 74 goto out; 75 } 76 77 if (result.gl_pathc != 1) { 78 /* Either more than one memory@<addr> node found, or none. 79 * In either case, cannot work ahead. 80 */ 81 DPAAX_DEBUG("Found (%zu) entries in device-tree. Not supported!", 82 result.gl_pathc); 83 goto out; 84 } 85 86 DPAAX_DEBUG("Opening and parsing device-tree node: (%s)", 87 result.gl_pathv[0]); 88 fd = open(result.gl_pathv[0], O_RDONLY); 89 if (fd < 0) { 90 DPAAX_DEBUG("Unable to open the device-tree node: (%s)(fd=%d)", 91 MEM_NODE_PATH_GLOB, fd); 92 goto cleanup; 93 } 94 95 /* Stat to get the file size */ 96 ret = fstat(fd, &statbuf); 97 if (ret != 0) { 98 DPAAX_DEBUG("Unable to get device-tree memory node size."); 99 goto cleanup; 100 } 101 102 DPAAX_DEBUG("Size of device-tree mem node: %" PRIu64, statbuf.st_size); 103 if (statbuf.st_size > MEM_NODE_FILE_LEN) { 104 DPAAX_DEBUG("More memory nodes available than assumed."); 105 DPAAX_DEBUG("System may not work properly!"); 106 } 107 108 ret = read(fd, file_data, statbuf.st_size > MEM_NODE_FILE_LEN ? 109 MEM_NODE_FILE_LEN : statbuf.st_size); 110 if (ret <= 0) { 111 DPAAX_DEBUG("Unable to read device-tree memory node: (%d)", 112 ret); 113 goto cleanup; 114 } 115 116 /* The reg node should be multiple of 16 bytes, 8 bytes each for addr 117 * and len. 118 */ 119 *count = (statbuf.st_size / 16); 120 if ((*count) <= 0 || (statbuf.st_size % 16 != 0)) { 121 DPAAX_DEBUG("Invalid memory node values or count. (size=%" PRIu64 ")", 122 statbuf.st_size); 123 goto cleanup; 124 } 125 126 /* each entry is of 16 bytes, and size/16 is total count of entries */ 127 nodes = malloc(sizeof(struct reg_node) * (*count)); 128 if (!nodes) { 129 DPAAX_DEBUG("Failure in allocating working memory."); 130 goto cleanup; 131 } 132 memset(nodes, 0, sizeof(struct reg_node) * (*count)); 133 134 for (i = 0, j = 0; i < (statbuf.st_size) && j < (*count); i += 16, j++) { 135 memcpy(&nodes[j], file_data + i, 16); 136 /* Rotate (ntohl) each 8 byte entry */ 137 rotate_8((unsigned char *)(&(nodes[j].addr))); 138 rotate_8((unsigned char *)(&(nodes[j].len))); 139 } 140 141 DPAAX_DEBUG("Device-tree memory node data:"); 142 do { 143 DPAAX_DEBUG(" %08" PRIx64 " %08zu", 144 nodes[j].addr, nodes[j].len); 145 } while (--j); 146 147 cleanup: 148 close(fd); 149 globfree(&result); 150 out: 151 return nodes; 152 } 153 154 int 155 dpaax_iova_table_populate(void) 156 { 157 int ret; 158 unsigned int i, node_count; 159 size_t tot_memory_size, total_table_size; 160 struct reg_node *nodes; 161 struct dpaax_iovat_element *entry; 162 163 /* dpaax_iova_table_p is a singleton - only one instance should be 164 * created. 165 */ 166 if (dpaax_iova_table_p) { 167 DPAAX_DEBUG("Multiple allocation attempt for IOVA Table (%p)", 168 dpaax_iova_table_p); 169 /* This can be an error case as well - some path not cleaning 170 * up table - but, for now, it is assumed that if IOVA Table 171 * pointer is valid, table is allocated. 172 */ 173 return 0; 174 } 175 176 nodes = read_memory_node(&node_count); 177 if (nodes == NULL) { 178 DPAAX_WARN("PA->VA translation not available;"); 179 DPAAX_WARN("Expect performance impact."); 180 return -1; 181 } 182 183 tot_memory_size = 0; 184 for (i = 0; i < node_count; i++) 185 tot_memory_size += nodes[i].len; 186 187 DPAAX_DEBUG("Total available PA memory size: %zu", tot_memory_size); 188 189 /* Total table size = meta data + tot_memory_size/8 */ 190 total_table_size = sizeof(struct dpaax_iova_table) + 191 (sizeof(struct dpaax_iovat_element) * node_count) + 192 ((tot_memory_size / DPAAX_MEM_SPLIT) * sizeof(uint64_t)); 193 194 /* TODO: This memory doesn't need to shared but needs to be always 195 * pinned to RAM (no swap out) - using hugepage rather than malloc 196 */ 197 dpaax_iova_table_p = rte_zmalloc(NULL, total_table_size, 0); 198 if (dpaax_iova_table_p == NULL) { 199 DPAAX_WARN("Unable to allocate memory for PA->VA Table;"); 200 DPAAX_WARN("PA->VA translation not available;"); 201 DPAAX_WARN("Expect performance impact."); 202 free(nodes); 203 return -1; 204 } 205 206 /* Initialize table */ 207 dpaax_iova_table_p->count = node_count; 208 entry = dpaax_iova_table_p->entries; 209 210 DPAAX_DEBUG("IOVA Table entries: (entry start = %p)", (void *)entry); 211 DPAAX_DEBUG("\t(entry),(start),(len),(next)"); 212 213 for (i = 0; i < node_count; i++) { 214 /* dpaax_iova_table_p 215 * | dpaax_iova_table_p->entries 216 * | | 217 * | | 218 * V V 219 * +------+------+-------+---+----------+---------+--- 220 * |iova_ |entry | entry | | pages | pages | 221 * |table | 1 | 2 |...| entry 1 | entry2 | 222 * +-----'+.-----+-------+---+;---------+;--------+--- 223 * \ \ / / 224 * `~~~~~~|~~~~~>pages / 225 * \ / 226 * `~~~~~~~~~~~>pages 227 */ 228 entry[i].start = nodes[i].addr; 229 entry[i].len = nodes[i].len; 230 if (i > 0) 231 entry[i].pages = entry[i-1].pages + 232 ((entry[i-1].len/DPAAX_MEM_SPLIT)); 233 else 234 entry[i].pages = (uint64_t *)((unsigned char *)entry + 235 (sizeof(struct dpaax_iovat_element) * 236 node_count)); 237 238 DPAAX_DEBUG("\t(%u),(%8"PRIx64"),(%8zu),(%8p)", 239 i, entry[i].start, entry[i].len, entry[i].pages); 240 } 241 242 /* Release memory associated with nodes array - not required now */ 243 free(nodes); 244 245 DPAAX_DEBUG("Adding mem-event handler"); 246 ret = dpaax_handle_memevents(); 247 if (ret) { 248 DPAAX_ERR("Unable to add mem-event handler"); 249 DPAAX_WARN("Cases with non-buffer pool mem won't work!"); 250 } 251 252 return 0; 253 } 254 255 void 256 dpaax_iova_table_depopulate(void) 257 { 258 if (dpaax_iova_table_p == NULL) 259 return; 260 261 rte_free(dpaax_iova_table_p->entries); 262 dpaax_iova_table_p = NULL; 263 264 DPAAX_DEBUG("IOVA Table cleanedup"); 265 } 266 267 int 268 dpaax_iova_table_update(phys_addr_t paddr, void *vaddr, size_t length) 269 { 270 int found = 0; 271 unsigned int i; 272 size_t req_length = length, e_offset; 273 struct dpaax_iovat_element *entry; 274 uintptr_t align_vaddr; 275 phys_addr_t align_paddr; 276 277 if (unlikely(dpaax_iova_table_p == NULL)) 278 return -1; 279 280 align_paddr = paddr & DPAAX_MEM_SPLIT_MASK; 281 align_vaddr = ((uintptr_t)vaddr & DPAAX_MEM_SPLIT_MASK); 282 283 /* Check if paddr is available in table */ 284 entry = dpaax_iova_table_p->entries; 285 for (i = 0; i < dpaax_iova_table_p->count; i++) { 286 if (align_paddr < entry[i].start) { 287 /* Address lower than start, but not found in previous 288 * iteration shouldn't exist. 289 */ 290 DPAAX_ERR("Add: Incorrect entry for PA->VA Table" 291 "(%"PRIu64")", paddr); 292 DPAAX_ERR("Add: Lowest address: %"PRIu64"", 293 entry[i].start); 294 return -1; 295 } 296 297 if (align_paddr > (entry[i].start + entry[i].len)) 298 continue; 299 300 /* align_paddr >= start && align_paddr < (start + len) */ 301 found = 1; 302 303 do { 304 e_offset = ((align_paddr - entry[i].start) / DPAAX_MEM_SPLIT); 305 /* TODO: Whatif something already exists at this 306 * location - is that an error? For now, ignoring the 307 * case. 308 */ 309 entry[i].pages[e_offset] = align_vaddr; 310 #ifdef RTE_COMMON_DPAAX_DEBUG 311 DPAAX_DEBUG("Added: vaddr=%zu for Phy:%"PRIu64" at %zu" 312 " remaining len %zu", align_vaddr, 313 align_paddr, e_offset, req_length); 314 #endif 315 /* Incoming request can be larger than the 316 * DPAAX_MEM_SPLIT size - in which case, multiple 317 * entries in entry->pages[] are filled up. 318 */ 319 if (req_length <= DPAAX_MEM_SPLIT) 320 break; 321 align_paddr += DPAAX_MEM_SPLIT; 322 align_vaddr += DPAAX_MEM_SPLIT; 323 req_length -= DPAAX_MEM_SPLIT; 324 } while (1); 325 326 break; 327 } 328 329 if (!found) { 330 /* There might be case where the incoming physical address is 331 * beyond the address discovered in the memory node of 332 * device-tree. Specially if some malloc'd area is used by EAL 333 * and the memevent handlers passes that across. But, this is 334 * not necessarily an error. 335 */ 336 DPAAX_DEBUG("Add: Unable to find slot for vaddr:(%p)," 337 " phy(%"PRIu64")", 338 vaddr, paddr); 339 return -1; 340 } 341 #ifdef RTE_COMMON_DPAAX_DEBUG 342 DPAAX_DEBUG("Add: Found slot at (%"PRIu64")[(%zu)] for vaddr:(%p)," 343 " phy(%"PRIu64"), len(%zu)", entry[i].start, e_offset, 344 vaddr, paddr, length); 345 #endif 346 return 0; 347 } 348 349 /* dpaax_iova_table_dump 350 * Dump the table, with its entries, on screen. Only works in Debug Mode 351 * Not for weak hearted - the tables can get quite large 352 */ 353 void 354 dpaax_iova_table_dump(void) 355 { 356 unsigned int i, j; 357 struct dpaax_iovat_element *entry; 358 359 /* In case DEBUG is not enabled, some 'if' conditions might misbehave 360 * as they have nothing else in them except a DPAAX_DEBUG() which if 361 * tuned out would leave 'if' naked. 362 */ 363 if (rte_log_get_global_level() < RTE_LOG_DEBUG) { 364 DPAAX_ERR("Set log level to Debug for PA->Table dump!"); 365 return; 366 } 367 368 DPAAX_DEBUG(" === Start of PA->VA Translation Table ==="); 369 if (dpaax_iova_table_p == NULL) 370 DPAAX_DEBUG("\tNULL"); 371 372 entry = dpaax_iova_table_p->entries; 373 for (i = 0; i < dpaax_iova_table_p->count; i++) { 374 DPAAX_DEBUG("\t(%16i),(%16"PRIu64"),(%16zu),(%16p)", 375 i, entry[i].start, entry[i].len, entry[i].pages); 376 DPAAX_DEBUG("\t\t (PA), (VA)"); 377 for (j = 0; j < (entry->len/DPAAX_MEM_SPLIT); j++) { 378 if (entry[i].pages[j] == 0) 379 continue; 380 DPAAX_DEBUG("\t\t(%16"PRIx64"),(%16"PRIx64")", 381 (entry[i].start + (j * sizeof(uint64_t))), 382 entry[i].pages[j]); 383 } 384 } 385 DPAAX_DEBUG(" === End of PA->VA Translation Table ==="); 386 } 387 388 static void 389 dpaax_memevent_cb(enum rte_mem_event type, const void *addr, size_t len, 390 void *arg __rte_unused) 391 { 392 struct rte_memseg_list *msl; 393 struct rte_memseg *ms; 394 size_t cur_len = 0, map_len = 0; 395 phys_addr_t phys_addr; 396 void *virt_addr; 397 int ret; 398 399 DPAAX_DEBUG("Called with addr=%p, len=%zu", addr, len); 400 401 msl = rte_mem_virt2memseg_list(addr); 402 403 while (cur_len < len) { 404 const void *va = RTE_PTR_ADD(addr, cur_len); 405 406 ms = rte_mem_virt2memseg(va, msl); 407 phys_addr = rte_mem_virt2phy(ms->addr); 408 virt_addr = ms->addr; 409 map_len = ms->len; 410 #ifdef RTE_COMMON_DPAAX_DEBUG 411 DPAAX_DEBUG("Request for %s, va=%p, virt_addr=%p," 412 "iova=%"PRIu64", map_len=%zu", 413 type == RTE_MEM_EVENT_ALLOC ? 414 "alloc" : "dealloc", 415 va, virt_addr, phys_addr, map_len); 416 #endif 417 if (type == RTE_MEM_EVENT_ALLOC) 418 ret = dpaax_iova_table_update(phys_addr, virt_addr, 419 map_len); 420 else 421 /* In case of mem_events for MEM_EVENT_FREE, complete 422 * hugepage is released and its PA entry is set to 0. 423 */ 424 ret = dpaax_iova_table_update(phys_addr, 0, map_len); 425 426 if (ret != 0) { 427 DPAAX_DEBUG("PA-Table entry update failed. " 428 "Map=%d, addr=%p, len=%zu, err:(%d)", 429 type, va, map_len, ret); 430 return; 431 } 432 433 cur_len += map_len; 434 } 435 } 436 437 static int 438 dpaax_memevent_walk_memsegs(const struct rte_memseg_list *msl __rte_unused, 439 const struct rte_memseg *ms, size_t len, 440 void *arg __rte_unused) 441 { 442 DPAAX_DEBUG("Walking for %p (pa=%"PRIu64") and len %zu", 443 ms->addr, ms->iova, len); 444 dpaax_iova_table_update(rte_mem_virt2phy(ms->addr), ms->addr, len); 445 return 0; 446 } 447 448 static int 449 dpaax_handle_memevents(void) 450 { 451 /* First, walk through all memsegs and pin them, before installing 452 * handler. This assures that all memseg which have already been 453 * identified/allocated by EAL, are already part of PA->VA Table. This 454 * is especially for cases where application allocates memory before 455 * the EAL or this is an externally allocated memory passed to EAL. 456 */ 457 rte_memseg_contig_walk_thread_unsafe(dpaax_memevent_walk_memsegs, NULL); 458 459 return rte_mem_event_callback_register("dpaax_memevents_cb", 460 dpaax_memevent_cb, NULL); 461 } 462 463 RTE_LOG_REGISTER(dpaax_logger, pmd.common.dpaax, ERR); 464