xref: /freebsd-src/sys/dev/mlx5/mlx5_core/mlx5_fwdump.c (revision cbfe997563d24cdbfe77d1763f2582fbace3ee2f)
1 /*-
2  * Copyright (c) 2018, 2019 Mellanox Technologies, Ltd.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 #include "opt_rss.h"
27 #include "opt_ratelimit.h"
28 
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/conf.h>
33 #include <sys/fcntl.h>
34 #include <dev/mlx5/driver.h>
35 #include <dev/mlx5/device.h>
36 #include <dev/mlx5/port.h>
37 #include <dev/mlx5/mlx5_core/mlx5_core.h>
38 #include <dev/mlx5/mlx5io.h>
39 #include <dev/mlx5/diagnostics.h>
40 
41 static MALLOC_DEFINE(M_MLX5_DUMP, "MLX5DUMP", "MLX5 Firmware dump");
42 
43 static unsigned
44 mlx5_fwdump_getsize(const struct mlx5_crspace_regmap *rege)
45 {
46 	const struct mlx5_crspace_regmap *r;
47 	unsigned sz;
48 
49 	for (sz = 0, r = rege; r->cnt != 0; r++)
50 		sz += r->cnt;
51 	return (sz);
52 }
53 
54 static void
55 mlx5_fwdump_destroy_dd(struct mlx5_core_dev *mdev)
56 {
57 
58 	mtx_assert(&mdev->dump_lock, MA_OWNED);
59 	free(mdev->dump_data, M_MLX5_DUMP);
60 	mdev->dump_data = NULL;
61 }
62 
63 static int mlx5_fw_dump_enable = 1;
64 SYSCTL_INT(_hw_mlx5, OID_AUTO, fw_dump_enable, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
65     &mlx5_fw_dump_enable, 0,
66     "Enable fw dump setup and op");
67 
68 void
69 mlx5_fwdump_prep(struct mlx5_core_dev *mdev)
70 {
71 	device_t dev;
72 	int error, vsc_addr;
73 	unsigned i, sz;
74 	u32 addr, in, out, next_addr;
75 
76 	mdev->dump_data = NULL;
77 
78 	TUNABLE_INT_FETCH("hw.mlx5.fw_dump_enable", &mlx5_fw_dump_enable);
79 	if (!mlx5_fw_dump_enable) {
80 		mlx5_core_warn(mdev,
81 		    "Firmware dump administratively prohibited\n");
82 		return;
83 	}
84 
85 	DROP_GIANT();
86 
87 	error = mlx5_vsc_find_cap(mdev);
88 	if (error != 0) {
89 		/* Inability to create a firmware dump is not fatal. */
90 		mlx5_core_warn(mdev,
91 		    "Unable to find vendor-specific capability, error %d\n",
92 		    error);
93 		goto pickup_g;
94 	}
95 	error = mlx5_vsc_lock(mdev);
96 	if (error != 0)
97 		goto pickup_g;
98 	error = mlx5_vsc_set_space(mdev, MLX5_VSC_DOMAIN_SCAN_CRSPACE);
99 	if (error != 0) {
100 		mlx5_core_warn(mdev, "VSC scan space is not supported\n");
101 		goto unlock_vsc;
102 	}
103 	dev = mdev->pdev->dev.bsddev;
104 	vsc_addr = mdev->vsc_addr;
105 	if (vsc_addr == 0) {
106 		mlx5_core_warn(mdev, "Cannot read VSC, no address\n");
107 		goto unlock_vsc;
108 	}
109 
110 	in = 0;
111 	for (sz = 1, addr = 0;;) {
112 		MLX5_VSC_SET(vsc_addr, &in, address, addr);
113 		pci_write_config(dev, vsc_addr + MLX5_VSC_ADDR_OFFSET, in, 4);
114 		error = mlx5_vsc_wait_on_flag(mdev, 1);
115 		if (error != 0) {
116 			mlx5_core_warn(mdev,
117 		    "Failed waiting for read complete flag, error %d addr %#x\n",
118 			    error, addr);
119 			goto unlock_vsc;
120 		}
121 		pci_read_config(dev, vsc_addr + MLX5_VSC_DATA_OFFSET, 4);
122 		out = pci_read_config(dev, vsc_addr + MLX5_VSC_ADDR_OFFSET, 4);
123 		next_addr = MLX5_VSC_GET(vsc_addr, &out, address);
124 		if (next_addr == 0 || next_addr == addr)
125 			break;
126 		if (next_addr != addr + 4)
127 			sz++;
128 		addr = next_addr;
129 	}
130 	if (sz == 1) {
131 		mlx5_core_warn(mdev, "no output from scan space\n");
132 		goto unlock_vsc;
133 	}
134 
135 	/*
136 	 * We add a sentinel element at the end of the array to
137 	 * terminate the read loop in mlx5_fwdump(), so allocate sz + 1.
138 	 */
139 	mdev->dump_rege = malloc((sz + 1) * sizeof(struct mlx5_crspace_regmap),
140 	    M_MLX5_DUMP, M_WAITOK | M_ZERO);
141 
142 	for (i = 0, addr = 0;;) {
143 		mdev->dump_rege[i].cnt++;
144 		MLX5_VSC_SET(vsc_addr, &in, address, addr);
145 		pci_write_config(dev, vsc_addr + MLX5_VSC_ADDR_OFFSET, in, 4);
146 		error = mlx5_vsc_wait_on_flag(mdev, 1);
147 		if (error != 0) {
148 			mlx5_core_warn(mdev,
149 		    "Failed waiting for read complete flag, error %d addr %#x\n",
150 			    error, addr);
151 			free(mdev->dump_rege, M_MLX5_DUMP);
152 			mdev->dump_rege = NULL;
153 			goto unlock_vsc;
154 		}
155 		pci_read_config(dev, vsc_addr + MLX5_VSC_DATA_OFFSET, 4);
156 		out = pci_read_config(dev, vsc_addr + MLX5_VSC_ADDR_OFFSET, 4);
157 		next_addr = MLX5_VSC_GET(vsc_addr, &out, address);
158 		if (next_addr == 0 || next_addr == addr)
159 			break;
160 		if (next_addr != addr + 4) {
161 			if (++i == sz) {
162 				mlx5_core_err(mdev,
163 		    "Inconsistent hw crspace reads (1): sz %u i %u addr %#lx",
164 				    sz, i, (unsigned long)addr);
165 				break;
166 			}
167 			mdev->dump_rege[i].addr = next_addr;
168 		}
169 		addr = next_addr;
170 	}
171 	/* i == sz case already reported by loop above */
172 	if (i + 1 != sz && i != sz) {
173 		mlx5_core_err(mdev,
174 		    "Inconsistent hw crspace reads (2): sz %u i %u addr %#lx",
175 		    sz, i, (unsigned long)addr);
176 	}
177 
178 	mdev->dump_size = mlx5_fwdump_getsize(mdev->dump_rege);
179 	mdev->dump_data = malloc(mdev->dump_size * sizeof(uint32_t),
180 	    M_MLX5_DUMP, M_WAITOK | M_ZERO);
181 	mdev->dump_valid = false;
182 	mdev->dump_copyout = false;
183 
184 unlock_vsc:
185 	mlx5_vsc_unlock(mdev);
186 pickup_g:
187 	PICKUP_GIANT();
188 }
189 
190 int
191 mlx5_fwdump(struct mlx5_core_dev *mdev)
192 {
193 	const struct mlx5_crspace_regmap *r;
194 	uint32_t i, ri;
195 	int error;
196 
197 	mlx5_core_info(mdev, "Issuing FW dump\n");
198 	mtx_lock(&mdev->dump_lock);
199 	if (mdev->dump_data == NULL) {
200 		error = EIO;
201 		goto failed;
202 	}
203 	if (mdev->dump_valid) {
204 		/* only one dump */
205 		mlx5_core_warn(mdev,
206 		    "Only one FW dump can be captured aborting FW dump\n");
207 		error = EEXIST;
208 		goto failed;
209 	}
210 
211 	/* mlx5_vsc already warns, be silent. */
212 	error = mlx5_vsc_lock(mdev);
213 	if (error != 0)
214 		goto failed;
215 	error = mlx5_vsc_set_space(mdev, MLX5_VSC_DOMAIN_PROTECTED_CRSPACE);
216 	if (error != 0)
217 		goto unlock_vsc;
218 	for (i = 0, r = mdev->dump_rege; r->cnt != 0; r++) {
219 		for (ri = 0; ri < r->cnt; ri++) {
220 			error = mlx5_vsc_read(mdev, r->addr + ri * 4,
221 			    &mdev->dump_data[i]);
222 			if (error != 0)
223 				goto unlock_vsc;
224 			i++;
225 		}
226 	}
227 	mdev->dump_valid = true;
228 unlock_vsc:
229 	mlx5_vsc_unlock(mdev);
230 failed:
231 	mtx_unlock(&mdev->dump_lock);
232 	return (error);
233 }
234 
235 void
236 mlx5_fwdump_clean(struct mlx5_core_dev *mdev)
237 {
238 
239 	mtx_lock(&mdev->dump_lock);
240 	while (mdev->dump_copyout)
241 		msleep(&mdev->dump_copyout, &mdev->dump_lock, 0, "mlx5fwc", 0);
242 	mlx5_fwdump_destroy_dd(mdev);
243 	mtx_unlock(&mdev->dump_lock);
244 	free(mdev->dump_rege, M_MLX5_DUMP);
245 }
246 
247 static int
248 mlx5_fwdump_reset(struct mlx5_core_dev *mdev)
249 {
250 	int error;
251 
252 	error = 0;
253 	mtx_lock(&mdev->dump_lock);
254 	if (mdev->dump_data != NULL) {
255 		while (mdev->dump_copyout) {
256 			msleep(&mdev->dump_copyout, &mdev->dump_lock,
257 			    0, "mlx5fwr", 0);
258 		}
259 		mdev->dump_valid = false;
260 	} else {
261 		error = ENOENT;
262 	}
263 	mtx_unlock(&mdev->dump_lock);
264 	return (error);
265 }
266 
267 static int
268 mlx5_dbsf_to_core(const struct mlx5_tool_addr *devaddr,
269     struct mlx5_core_dev **mdev)
270 {
271 	device_t dev;
272 	struct pci_dev *pdev;
273 
274 	dev = pci_find_dbsf(devaddr->domain, devaddr->bus, devaddr->slot,
275 	    devaddr->func);
276 	if (dev == NULL)
277 		return (ENOENT);
278 	if (device_get_devclass(dev) != mlx5_core_driver.bsdclass)
279 		return (EINVAL);
280 	pdev = device_get_softc(dev);
281 	*mdev = pci_get_drvdata(pdev);
282 	if (*mdev == NULL)
283 		return (ENOENT);
284 	return (0);
285 }
286 
287 static int
288 mlx5_fwdump_copyout(struct mlx5_core_dev *mdev, struct mlx5_fwdump_get *fwg)
289 {
290 	const struct mlx5_crspace_regmap *r;
291 	struct mlx5_fwdump_reg rv, *urv;
292 	uint32_t i, ri;
293 	int error;
294 
295 	mtx_lock(&mdev->dump_lock);
296 	if (mdev->dump_data == NULL) {
297 		mtx_unlock(&mdev->dump_lock);
298 		return (ENOENT);
299 	}
300 	if (fwg->buf == NULL) {
301 		fwg->reg_filled = mdev->dump_size;
302 		mtx_unlock(&mdev->dump_lock);
303 		return (0);
304 	}
305 	if (!mdev->dump_valid) {
306 		mtx_unlock(&mdev->dump_lock);
307 		return (ENOENT);
308 	}
309 	mdev->dump_copyout = true;
310 	mtx_unlock(&mdev->dump_lock);
311 
312 	urv = fwg->buf;
313 	for (i = 0, r = mdev->dump_rege; r->cnt != 0; r++) {
314 		for (ri = 0; ri < r->cnt; ri++) {
315 			if (i >= fwg->reg_cnt)
316 				goto out;
317 			rv.addr = r->addr + ri * 4;
318 			rv.val = mdev->dump_data[i];
319 			error = copyout(&rv, urv, sizeof(rv));
320 			if (error != 0)
321 				return (error);
322 			urv++;
323 			i++;
324 		}
325 	}
326 out:
327 	fwg->reg_filled = i;
328 	mtx_lock(&mdev->dump_lock);
329 	mdev->dump_copyout = false;
330 	wakeup(&mdev->dump_copyout);
331 	mtx_unlock(&mdev->dump_lock);
332 	return (0);
333 }
334 
335 static int
336 mlx5_fw_reset(struct mlx5_core_dev *mdev)
337 {
338 	device_t dev, bus;
339 	int error;
340 
341 	error = -mlx5_set_mfrl_reg(mdev, MLX5_FRL_LEVEL3);
342 	if (error == 0) {
343 		dev = mdev->pdev->dev.bsddev;
344 		bus_topo_lock();
345 		bus = device_get_parent(dev);
346 		error = BUS_RESET_CHILD(device_get_parent(bus), bus,
347 		    DEVF_RESET_DETACH);
348 		bus_topo_unlock();
349 	}
350 	return (error);
351 }
352 
353 static int
354 mlx5_eeprom_copyout(struct mlx5_core_dev *dev, struct mlx5_eeprom_get *eeprom_info)
355 {
356 	struct mlx5_eeprom eeprom;
357 	int error;
358 
359 	eeprom.i2c_addr = MLX5_I2C_ADDR_LOW;
360 	eeprom.device_addr = 0;
361 	eeprom.page_num = MLX5_EEPROM_LOW_PAGE;
362 	eeprom.page_valid = 0;
363 
364 	/* Read three first bytes to get important info */
365 	error = mlx5_get_eeprom_info(dev, &eeprom);
366 	if (error != 0) {
367 		mlx5_core_err(dev,
368 		    "Failed reading EEPROM initial information\n");
369 		return (error);
370 	}
371 	eeprom_info->eeprom_info_page_valid = eeprom.page_valid;
372 	eeprom_info->eeprom_info_out_len = eeprom.len;
373 
374 	if (eeprom_info->eeprom_info_buf == NULL)
375 		return (0);
376 	/*
377 	 * Allocate needed length buffer and additional space for
378 	 * page 0x03
379 	 */
380 	eeprom.data = malloc(eeprom.len + MLX5_EEPROM_PAGE_LENGTH,
381 	    M_MLX5_EEPROM, M_WAITOK | M_ZERO);
382 
383 	/* Read the whole eeprom information */
384 	error = mlx5_get_eeprom(dev, &eeprom);
385 	if (error != 0) {
386 		mlx5_core_err(dev, "Failed reading EEPROM error = %d\n",
387 		    error);
388 		error = 0;
389 		/*
390 		 * Continue printing partial information in case of
391 		 * an error
392 		 */
393 	}
394 	error = copyout(eeprom.data, eeprom_info->eeprom_info_buf,
395 	    eeprom.len);
396 	free(eeprom.data, M_MLX5_EEPROM);
397 
398 	return (error);
399 }
400 
401 static int
402 mlx5_ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
403     struct thread *td)
404 {
405 	struct mlx5_core_dev *mdev;
406 	struct mlx5_fwdump_get *fwg;
407 	struct mlx5_tool_addr *devaddr;
408 	struct mlx5_fw_update *fu;
409 	struct firmware fake_fw;
410 	struct mlx5_eeprom_get *eeprom_info;
411 	void *fw_data;
412 	int error;
413 
414 	error = 0;
415 	switch (cmd) {
416 	case MLX5_FWDUMP_GET:
417 		if ((fflag & FREAD) == 0) {
418 			error = EBADF;
419 			break;
420 		}
421 		fwg = (struct mlx5_fwdump_get *)data;
422 		devaddr = &fwg->devaddr;
423 		error = mlx5_dbsf_to_core(devaddr, &mdev);
424 		if (error != 0)
425 			break;
426 		error = mlx5_fwdump_copyout(mdev, fwg);
427 		break;
428 	case MLX5_FWDUMP_RESET:
429 		if ((fflag & FWRITE) == 0) {
430 			error = EBADF;
431 			break;
432 		}
433 		devaddr = (struct mlx5_tool_addr *)data;
434 		error = mlx5_dbsf_to_core(devaddr, &mdev);
435 		if (error == 0)
436 			error = mlx5_fwdump_reset(mdev);
437 		break;
438 	case MLX5_FWDUMP_FORCE:
439 		if ((fflag & FWRITE) == 0) {
440 			error = EBADF;
441 			break;
442 		}
443 		devaddr = (struct mlx5_tool_addr *)data;
444 		error = mlx5_dbsf_to_core(devaddr, &mdev);
445 		if (error != 0)
446 			break;
447 		error = mlx5_fwdump(mdev);
448 		break;
449 	case MLX5_FW_UPDATE:
450 		if ((fflag & FWRITE) == 0) {
451 			error = EBADF;
452 			break;
453 		}
454 		fu = (struct mlx5_fw_update *)data;
455 		if (fu->img_fw_data_len > 10 * 1024 * 1024) {
456 			error = EINVAL;
457 			break;
458 		}
459 		devaddr = &fu->devaddr;
460 		error = mlx5_dbsf_to_core(devaddr, &mdev);
461 		if (error != 0)
462 			break;
463 		fw_data = kmem_malloc(fu->img_fw_data_len, M_WAITOK);
464 		if (fake_fw.data == NULL) {
465 			error = ENOMEM;
466 			break;
467 		}
468 		error = copyin(fu->img_fw_data, fw_data, fu->img_fw_data_len);
469 		if (error == 0) {
470 			bzero(&fake_fw, sizeof(fake_fw));
471 			fake_fw.name = "umlx_fw_up";
472 			fake_fw.datasize = fu->img_fw_data_len;
473 			fake_fw.version = 1;
474 			fake_fw.data = fw_data;
475 			error = -mlx5_firmware_flash(mdev, &fake_fw);
476 		}
477 		kmem_free(fw_data, fu->img_fw_data_len);
478 		break;
479 	case MLX5_FW_RESET:
480 		if ((fflag & FWRITE) == 0) {
481 			error = EBADF;
482 			break;
483 		}
484 		devaddr = (struct mlx5_tool_addr *)data;
485 		error = mlx5_dbsf_to_core(devaddr, &mdev);
486 		if (error != 0)
487 			break;
488 		error = mlx5_fw_reset(mdev);
489 		break;
490 	case MLX5_EEPROM_GET:
491 		if ((fflag & FREAD) == 0) {
492 			error = EBADF;
493 			break;
494 		}
495 		eeprom_info = (struct mlx5_eeprom_get *)data;
496 		devaddr = &eeprom_info->devaddr;
497 		error = mlx5_dbsf_to_core(devaddr, &mdev);
498 		if (error != 0)
499 			break;
500 		error = mlx5_eeprom_copyout(mdev, eeprom_info);
501 		break;
502 	default:
503 		error = ENOTTY;
504 		break;
505 	}
506 	return (error);
507 }
508 
509 static struct cdevsw mlx5_ctl_devsw = {
510 	.d_version =	D_VERSION,
511 	.d_ioctl =	mlx5_ctl_ioctl,
512 };
513 
514 static struct cdev *mlx5_ctl_dev;
515 
516 int
517 mlx5_ctl_init(void)
518 {
519 	struct make_dev_args mda;
520 	int error;
521 
522 	make_dev_args_init(&mda);
523 	mda.mda_flags = MAKEDEV_WAITOK | MAKEDEV_CHECKNAME;
524 	mda.mda_devsw = &mlx5_ctl_devsw;
525 	mda.mda_uid = UID_ROOT;
526 	mda.mda_gid = GID_OPERATOR;
527 	mda.mda_mode = 0640;
528 	error = make_dev_s(&mda, &mlx5_ctl_dev, "mlx5ctl");
529 	return (-error);
530 }
531 
532 void
533 mlx5_ctl_fini(void)
534 {
535 
536 	if (mlx5_ctl_dev != NULL)
537 		destroy_dev(mlx5_ctl_dev);
538 
539 }
540