1 /* 2 * Copyright 2019 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include "amdgpu_ras_eeprom.h" 25 #include "amdgpu.h" 26 #include "amdgpu_ras.h" 27 #include <linux/bits.h> 28 #include "atom.h" 29 30 #define EEPROM_I2C_TARGET_ADDR_VEGA20 0xA0 31 #define EEPROM_I2C_TARGET_ADDR_ARCTURUS 0xA8 32 #define EEPROM_I2C_TARGET_ADDR_ARCTURUS_D342 0xA0 33 34 /* 35 * The 2 macros bellow represent the actual size in bytes that 36 * those entities occupy in the EEPROM memory. 37 * EEPROM_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which 38 * uses uint64 to store 6b fields such as retired_page. 39 */ 40 #define EEPROM_TABLE_HEADER_SIZE 20 41 #define EEPROM_TABLE_RECORD_SIZE 24 42 43 #define EEPROM_ADDRESS_SIZE 0x2 44 45 /* Table hdr is 'AMDR' */ 46 #define EEPROM_TABLE_HDR_VAL 0x414d4452 47 #define EEPROM_TABLE_VER 0x00010000 48 49 /* Assume 2 Mbit size */ 50 #define EEPROM_SIZE_BYTES 256000 51 #define EEPROM_PAGE__SIZE_BYTES 256 52 #define EEPROM_HDR_START 0 53 #define EEPROM_RECORD_START (EEPROM_HDR_START + EEPROM_TABLE_HEADER_SIZE) 54 #define EEPROM_MAX_RECORD_NUM ((EEPROM_SIZE_BYTES - EEPROM_TABLE_HEADER_SIZE) / EEPROM_TABLE_RECORD_SIZE) 55 #define EEPROM_ADDR_MSB_MASK GENMASK(17, 8) 56 57 #define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev 58 59 static bool __get_eeprom_i2c_addr_arct(struct amdgpu_device *adev, 60 uint16_t *i2c_addr) 61 { 62 STUB(); 63 return false; 64 #ifdef notyet 65 struct atom_context *atom_ctx = adev->mode_info.atom_context; 66 67 if (!i2c_addr || !atom_ctx) 68 return false; 69 70 if (strnstr(atom_ctx->vbios_version, 71 "D342", 72 sizeof(atom_ctx->vbios_version))) 73 *i2c_addr = EEPROM_I2C_TARGET_ADDR_ARCTURUS_D342; 74 else 75 *i2c_addr = EEPROM_I2C_TARGET_ADDR_ARCTURUS; 76 77 return true; 78 #endif 79 } 80 81 static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, 82 uint16_t *i2c_addr) 83 { 84 if (!i2c_addr) 85 return false; 86 87 switch (adev->asic_type) { 88 case CHIP_VEGA20: 89 *i2c_addr = EEPROM_I2C_TARGET_ADDR_VEGA20; 90 break; 91 92 case CHIP_ARCTURUS: 93 return __get_eeprom_i2c_addr_arct(adev, i2c_addr); 94 95 default: 96 return false; 97 } 98 99 return true; 100 } 101 102 static void __encode_table_header_to_buff(struct amdgpu_ras_eeprom_table_header *hdr, 103 unsigned char *buff) 104 { 105 uint32_t *pp = (uint32_t *) buff; 106 107 pp[0] = cpu_to_le32(hdr->header); 108 pp[1] = cpu_to_le32(hdr->version); 109 pp[2] = cpu_to_le32(hdr->first_rec_offset); 110 pp[3] = cpu_to_le32(hdr->tbl_size); 111 pp[4] = cpu_to_le32(hdr->checksum); 112 } 113 114 static void __decode_table_header_from_buff(struct amdgpu_ras_eeprom_table_header *hdr, 115 unsigned char *buff) 116 { 117 uint32_t *pp = (uint32_t *)buff; 118 119 hdr->header = le32_to_cpu(pp[0]); 120 hdr->version = le32_to_cpu(pp[1]); 121 hdr->first_rec_offset = le32_to_cpu(pp[2]); 122 hdr->tbl_size = le32_to_cpu(pp[3]); 123 hdr->checksum = le32_to_cpu(pp[4]); 124 } 125 126 static int __update_table_header(struct amdgpu_ras_eeprom_control *control, 127 unsigned char *buff) 128 { 129 int ret = 0; 130 struct amdgpu_device *adev = to_amdgpu_device(control); 131 struct i2c_msg msg = { 132 .addr = 0, 133 .flags = 0, 134 .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, 135 .buf = buff, 136 }; 137 138 139 *(uint16_t *)buff = EEPROM_HDR_START; 140 __encode_table_header_to_buff(&control->tbl_hdr, buff + EEPROM_ADDRESS_SIZE); 141 142 msg.addr = control->i2c_address; 143 144 ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1); 145 if (ret < 1) 146 DRM_ERROR("Failed to write EEPROM table header, ret:%d", ret); 147 148 return ret; 149 } 150 151 static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control) 152 { 153 int i; 154 uint32_t tbl_sum = 0; 155 156 /* Header checksum, skip checksum field in the calculation */ 157 for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); i++) 158 tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i); 159 160 return tbl_sum; 161 } 162 163 static uint32_t __calc_recs_byte_sum(struct eeprom_table_record *records, 164 int num) 165 { 166 int i, j; 167 uint32_t tbl_sum = 0; 168 169 /* Records checksum */ 170 for (i = 0; i < num; i++) { 171 struct eeprom_table_record *record = &records[i]; 172 173 for (j = 0; j < sizeof(*record); j++) { 174 tbl_sum += *(((unsigned char *)record) + j); 175 } 176 } 177 178 return tbl_sum; 179 } 180 181 static inline uint32_t __calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control, 182 struct eeprom_table_record *records, int num) 183 { 184 return __calc_hdr_byte_sum(control) + __calc_recs_byte_sum(records, num); 185 } 186 187 /* Checksum = 256 -((sum of all table entries) mod 256) */ 188 static void __update_tbl_checksum(struct amdgpu_ras_eeprom_control *control, 189 struct eeprom_table_record *records, int num, 190 uint32_t old_hdr_byte_sum) 191 { 192 /* 193 * This will update the table sum with new records. 194 * 195 * TODO: What happens when the EEPROM table is to be wrapped around 196 * and old records from start will get overridden. 197 */ 198 199 /* need to recalculate updated header byte sum */ 200 control->tbl_byte_sum -= old_hdr_byte_sum; 201 control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, num); 202 203 control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256); 204 } 205 206 /* table sum mod 256 + checksum must equals 256 */ 207 static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control, 208 struct eeprom_table_record *records, int num) 209 { 210 control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num); 211 212 if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) { 213 DRM_WARN("Checksum mismatch, checksum: %u ", control->tbl_hdr.checksum); 214 return false; 215 } 216 217 return true; 218 } 219 220 int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) 221 { 222 unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 }; 223 struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; 224 int ret = 0; 225 226 mutex_lock(&control->tbl_mutex); 227 228 hdr->header = EEPROM_TABLE_HDR_VAL; 229 hdr->version = EEPROM_TABLE_VER; 230 hdr->first_rec_offset = EEPROM_RECORD_START; 231 hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE; 232 233 control->tbl_byte_sum = 0; 234 __update_tbl_checksum(control, NULL, 0, 0); 235 control->next_addr = EEPROM_RECORD_START; 236 237 ret = __update_table_header(control, buff); 238 239 mutex_unlock(&control->tbl_mutex); 240 241 return ret; 242 243 } 244 245 int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) 246 { 247 int ret = 0; 248 struct amdgpu_device *adev = to_amdgpu_device(control); 249 unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 }; 250 struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; 251 struct i2c_msg msg = { 252 .addr = 0, 253 .flags = I2C_M_RD, 254 .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, 255 .buf = buff, 256 }; 257 258 /* Verify i2c adapter is initialized */ 259 if (!adev->pm.smu_i2c.algo) 260 return -ENOENT; 261 262 if (!__get_eeprom_i2c_addr(adev, &control->i2c_address)) 263 return -EINVAL; 264 265 rw_init(&control->tbl_mutex, "rastbl"); 266 267 msg.addr = control->i2c_address; 268 /* Read/Create table header from EEPROM address 0 */ 269 ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1); 270 if (ret < 1) { 271 DRM_ERROR("Failed to read EEPROM table header, ret:%d", ret); 272 return ret; 273 } 274 275 __decode_table_header_from_buff(hdr, &buff[2]); 276 277 if (hdr->header == EEPROM_TABLE_HDR_VAL) { 278 control->num_recs = (hdr->tbl_size - EEPROM_TABLE_HEADER_SIZE) / 279 EEPROM_TABLE_RECORD_SIZE; 280 control->tbl_byte_sum = __calc_hdr_byte_sum(control); 281 control->next_addr = EEPROM_RECORD_START; 282 283 DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", 284 control->num_recs); 285 286 } else { 287 DRM_INFO("Creating new EEPROM table"); 288 289 ret = amdgpu_ras_eeprom_reset_table(control); 290 } 291 292 return ret == 1 ? 0 : -EIO; 293 } 294 295 static void __encode_table_record_to_buff(struct amdgpu_ras_eeprom_control *control, 296 struct eeprom_table_record *record, 297 unsigned char *buff) 298 { 299 __le64 tmp = 0; 300 int i = 0; 301 302 /* Next are all record fields according to EEPROM page spec in LE foramt */ 303 buff[i++] = record->err_type; 304 305 buff[i++] = record->bank; 306 307 tmp = cpu_to_le64(record->ts); 308 memcpy(buff + i, &tmp, 8); 309 i += 8; 310 311 tmp = cpu_to_le64((record->offset & 0xffffffffffff)); 312 memcpy(buff + i, &tmp, 6); 313 i += 6; 314 315 buff[i++] = record->mem_channel; 316 buff[i++] = record->mcumc_id; 317 318 tmp = cpu_to_le64((record->retired_page & 0xffffffffffff)); 319 memcpy(buff + i, &tmp, 6); 320 } 321 322 static void __decode_table_record_from_buff(struct amdgpu_ras_eeprom_control *control, 323 struct eeprom_table_record *record, 324 unsigned char *buff) 325 { 326 __le64 tmp = 0; 327 int i = 0; 328 329 /* Next are all record fields according to EEPROM page spec in LE foramt */ 330 record->err_type = buff[i++]; 331 332 record->bank = buff[i++]; 333 334 memcpy(&tmp, buff + i, 8); 335 record->ts = le64_to_cpu(tmp); 336 i += 8; 337 338 memcpy(&tmp, buff + i, 6); 339 record->offset = (le64_to_cpu(tmp) & 0xffffffffffff); 340 i += 6; 341 342 record->mem_channel = buff[i++]; 343 record->mcumc_id = buff[i++]; 344 345 memcpy(&tmp, buff + i, 6); 346 record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff); 347 } 348 349 /* 350 * When reaching end of EEPROM memory jump back to 0 record address 351 * When next record access will go beyond EEPROM page boundary modify bits A17/A8 352 * in I2C selector to go to next page 353 */ 354 static uint32_t __correct_eeprom_dest_address(uint32_t curr_address) 355 { 356 uint32_t next_address = curr_address + EEPROM_TABLE_RECORD_SIZE; 357 358 /* When all EEPROM memory used jump back to 0 address */ 359 if (next_address > EEPROM_SIZE_BYTES) { 360 DRM_INFO("Reached end of EEPROM memory, jumping to 0 " 361 "and overriding old record"); 362 return EEPROM_RECORD_START; 363 } 364 365 /* 366 * To check if we overflow page boundary compare next address with 367 * current and see if bits 17/8 of the EEPROM address will change 368 * If they do start from the next 256b page 369 * 370 * https://www.st.com/resource/en/datasheet/m24m02-dr.pdf sec. 5.1.2 371 */ 372 if ((curr_address & EEPROM_ADDR_MSB_MASK) != (next_address & EEPROM_ADDR_MSB_MASK)) { 373 DRM_DEBUG_DRIVER("Reached end of EEPROM memory page, jumping to next: %lx", 374 (next_address & EEPROM_ADDR_MSB_MASK)); 375 376 return (next_address & EEPROM_ADDR_MSB_MASK); 377 } 378 379 return curr_address; 380 } 381 382 int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, 383 struct eeprom_table_record *records, 384 bool write, 385 int num) 386 { 387 int i, ret = 0; 388 struct i2c_msg *msgs, *msg; 389 unsigned char *buffs, *buff; 390 struct eeprom_table_record *record; 391 struct amdgpu_device *adev = to_amdgpu_device(control); 392 393 if (adev->asic_type != CHIP_VEGA20 && adev->asic_type != CHIP_ARCTURUS) 394 return 0; 395 396 buffs = kcalloc(num, EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE, 397 GFP_KERNEL); 398 if (!buffs) 399 return -ENOMEM; 400 401 mutex_lock(&control->tbl_mutex); 402 403 msgs = kcalloc(num, sizeof(*msgs), GFP_KERNEL); 404 if (!msgs) { 405 ret = -ENOMEM; 406 goto free_buff; 407 } 408 409 /* In case of overflow just start from beginning to not lose newest records */ 410 if (write && (control->next_addr + EEPROM_TABLE_RECORD_SIZE * num > EEPROM_SIZE_BYTES)) 411 control->next_addr = EEPROM_RECORD_START; 412 413 414 /* 415 * TODO Currently makes EEPROM writes for each record, this creates 416 * internal fragmentation. Optimized the code to do full page write of 417 * 256b 418 */ 419 for (i = 0; i < num; i++) { 420 buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; 421 record = &records[i]; 422 msg = &msgs[i]; 423 424 control->next_addr = __correct_eeprom_dest_address(control->next_addr); 425 426 /* 427 * Update bits 16,17 of EEPROM address in I2C address by setting them 428 * to bits 1,2 of Device address byte 429 */ 430 msg->addr = control->i2c_address | 431 ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15); 432 msg->flags = write ? 0 : I2C_M_RD; 433 msg->len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE; 434 msg->buf = buff; 435 436 /* Insert the EEPROM dest addess, bits 0-15 */ 437 buff[0] = ((control->next_addr >> 8) & 0xff); 438 buff[1] = (control->next_addr & 0xff); 439 440 /* EEPROM table content is stored in LE format */ 441 if (write) 442 __encode_table_record_to_buff(control, record, buff + EEPROM_ADDRESS_SIZE); 443 444 /* 445 * The destination EEPROM address might need to be corrected to account 446 * for page or entire memory wrapping 447 */ 448 control->next_addr += EEPROM_TABLE_RECORD_SIZE; 449 } 450 451 ret = i2c_transfer(&adev->pm.smu_i2c, msgs, num); 452 if (ret < 1) { 453 DRM_ERROR("Failed to process EEPROM table records, ret:%d", ret); 454 455 /* TODO Restore prev next EEPROM address ? */ 456 goto free_msgs; 457 } 458 459 460 if (!write) { 461 for (i = 0; i < num; i++) { 462 buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; 463 record = &records[i]; 464 465 __decode_table_record_from_buff(control, record, buff + EEPROM_ADDRESS_SIZE); 466 } 467 } 468 469 if (write) { 470 uint32_t old_hdr_byte_sum = __calc_hdr_byte_sum(control); 471 472 /* 473 * Update table header with size and CRC and account for table 474 * wrap around where the assumption is that we treat it as empty 475 * table 476 * 477 * TODO - Check the assumption is correct 478 */ 479 control->num_recs += num; 480 control->num_recs %= EEPROM_MAX_RECORD_NUM; 481 control->tbl_hdr.tbl_size += EEPROM_TABLE_RECORD_SIZE * num; 482 if (control->tbl_hdr.tbl_size > EEPROM_SIZE_BYTES) 483 control->tbl_hdr.tbl_size = EEPROM_TABLE_HEADER_SIZE + 484 control->num_recs * EEPROM_TABLE_RECORD_SIZE; 485 486 __update_tbl_checksum(control, records, num, old_hdr_byte_sum); 487 488 __update_table_header(control, buffs); 489 } else if (!__validate_tbl_checksum(control, records, num)) { 490 DRM_WARN("EEPROM Table checksum mismatch!"); 491 /* TODO Uncomment when EEPROM read/write is relliable */ 492 /* ret = -EIO; */ 493 } 494 495 free_msgs: 496 kfree(msgs); 497 498 free_buff: 499 kfree(buffs); 500 501 mutex_unlock(&control->tbl_mutex); 502 503 return ret == num ? 0 : -EIO; 504 } 505 506 /* Used for testing if bugs encountered */ 507 #if 0 508 void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control) 509 { 510 int i; 511 struct eeprom_table_record *recs = kcalloc(1, sizeof(*recs), GFP_KERNEL); 512 513 if (!recs) 514 return; 515 516 for (i = 0; i < 1 ; i++) { 517 recs[i].address = 0xdeadbeef; 518 recs[i].retired_page = i; 519 } 520 521 if (!amdgpu_ras_eeprom_process_recods(control, recs, true, 1)) { 522 523 memset(recs, 0, sizeof(*recs) * 1); 524 525 control->next_addr = EEPROM_RECORD_START; 526 527 if (!amdgpu_ras_eeprom_process_recods(control, recs, false, 1)) { 528 for (i = 0; i < 1; i++) 529 DRM_INFO("rec.address :0x%llx, rec.retired_page :%llu", 530 recs[i].address, recs[i].retired_page); 531 } else 532 DRM_ERROR("Failed in reading from table"); 533 534 } else 535 DRM_ERROR("Failed in writing to table"); 536 } 537 #endif 538