1 /* 2 * Copyright © 2013 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Brad Volkin <bradley.d.volkin@intel.com> 25 * 26 */ 27 28 #include "i915_drv.h" 29 #include "intel_ringbuffer.h" 30 31 /** 32 * DOC: batch buffer command parser 33 * 34 * Motivation: 35 * Certain OpenGL features (e.g. transform feedback, performance monitoring) 36 * require userspace code to submit batches containing commands such as 37 * MI_LOAD_REGISTER_IMM to access various registers. Unfortunately, some 38 * generations of the hardware will noop these commands in "unsecure" batches 39 * (which includes all userspace batches submitted via i915) even though the 40 * commands may be safe and represent the intended programming model of the 41 * device. 42 * 43 * The software command parser is similar in operation to the command parsing 44 * done in hardware for unsecure batches. However, the software parser allows 45 * some operations that would be noop'd by hardware, if the parser determines 46 * the operation is safe, and submits the batch as "secure" to prevent hardware 47 * parsing. 48 * 49 * Threats: 50 * At a high level, the hardware (and software) checks attempt to prevent 51 * granting userspace undue privileges. There are three categories of privilege. 52 * 53 * First, commands which are explicitly defined as privileged or which should 54 * only be used by the kernel driver. The parser rejects such commands 55 * 56 * Second, commands which access registers. To support correct/enhanced 57 * userspace functionality, particularly certain OpenGL extensions, the parser 58 * provides a whitelist of registers which userspace may safely access 59 * 60 * Third, commands which access privileged memory (i.e. GGTT, HWS page, etc). 61 * The parser always rejects such commands. 62 * 63 * The majority of the problematic commands fall in the MI_* range, with only a 64 * few specific commands on each engine (e.g. PIPE_CONTROL and MI_FLUSH_DW). 65 * 66 * Implementation: 67 * Each engine maintains tables of commands and registers which the parser 68 * uses in scanning batch buffers submitted to that engine. 69 * 70 * Since the set of commands that the parser must check for is significantly 71 * smaller than the number of commands supported, the parser tables contain only 72 * those commands required by the parser. This generally works because command 73 * opcode ranges have standard command length encodings. So for commands that 74 * the parser does not need to check, it can easily skip them. This is 75 * implemented via a per-engine length decoding vfunc. 76 * 77 * Unfortunately, there are a number of commands that do not follow the standard 78 * length encoding for their opcode range, primarily amongst the MI_* commands. 79 * To handle this, the parser provides a way to define explicit "skip" entries 80 * in the per-engine command tables. 81 * 82 * Other command table entries map fairly directly to high level categories 83 * mentioned above: rejected, register whitelist. The parser implements a number 84 * of checks, including the privileged memory checks, via a general bitmasking 85 * mechanism. 86 */ 87 88 /* 89 * A command that requires special handling by the command parser. 90 */ 91 struct drm_i915_cmd_descriptor { 92 /* 93 * Flags describing how the command parser processes the command. 94 * 95 * CMD_DESC_FIXED: The command has a fixed length if this is set, 96 * a length mask if not set 97 * CMD_DESC_SKIP: The command is allowed but does not follow the 98 * standard length encoding for the opcode range in 99 * which it falls 100 * CMD_DESC_REJECT: The command is never allowed 101 * CMD_DESC_REGISTER: The command should be checked against the 102 * register whitelist for the appropriate ring 103 */ 104 u32 flags; 105 #define CMD_DESC_FIXED (1<<0) 106 #define CMD_DESC_SKIP (1<<1) 107 #define CMD_DESC_REJECT (1<<2) 108 #define CMD_DESC_REGISTER (1<<3) 109 #define CMD_DESC_BITMASK (1<<4) 110 111 /* 112 * The command's unique identification bits and the bitmask to get them. 113 * This isn't strictly the opcode field as defined in the spec and may 114 * also include type, subtype, and/or subop fields. 115 */ 116 struct { 117 u32 value; 118 u32 mask; 119 } cmd; 120 121 /* 122 * The command's length. The command is either fixed length (i.e. does 123 * not include a length field) or has a length field mask. The flag 124 * CMD_DESC_FIXED indicates a fixed length. Otherwise, the command has 125 * a length mask. All command entries in a command table must include 126 * length information. 127 */ 128 union { 129 u32 fixed; 130 u32 mask; 131 } length; 132 133 /* 134 * Describes where to find a register address in the command to check 135 * against the ring's register whitelist. Only valid if flags has the 136 * CMD_DESC_REGISTER bit set. 137 * 138 * A non-zero step value implies that the command may access multiple 139 * registers in sequence (e.g. LRI), in that case step gives the 140 * distance in dwords between individual offset fields. 141 */ 142 struct { 143 u32 offset; 144 u32 mask; 145 u32 step; 146 } reg; 147 148 #define MAX_CMD_DESC_BITMASKS 3 149 /* 150 * Describes command checks where a particular dword is masked and 151 * compared against an expected value. If the command does not match 152 * the expected value, the parser rejects it. Only valid if flags has 153 * the CMD_DESC_BITMASK bit set. Only entries where mask is non-zero 154 * are valid. 155 * 156 * If the check specifies a non-zero condition_mask then the parser 157 * only performs the check when the bits specified by condition_mask 158 * are non-zero. 159 */ 160 struct { 161 u32 offset; 162 u32 mask; 163 u32 expected; 164 u32 condition_offset; 165 u32 condition_mask; 166 } bits[MAX_CMD_DESC_BITMASKS]; 167 }; 168 169 /* 170 * A table of commands requiring special handling by the command parser. 171 * 172 * Each engine has an array of tables. Each table consists of an array of 173 * command descriptors, which must be sorted with command opcodes in 174 * ascending order. 175 */ 176 struct drm_i915_cmd_table { 177 const struct drm_i915_cmd_descriptor *table; 178 int count; 179 }; 180 181 #define STD_MI_OPCODE_SHIFT (32 - 9) 182 #define STD_3D_OPCODE_SHIFT (32 - 16) 183 #define STD_2D_OPCODE_SHIFT (32 - 10) 184 #define STD_MFX_OPCODE_SHIFT (32 - 16) 185 #define MIN_OPCODE_SHIFT 16 186 187 #define CMD(op, opm, f, lm, fl, ...) \ 188 { \ 189 .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \ 190 .cmd = { (op & ~0u << (opm)), ~0u << (opm) }, \ 191 .length = { (lm) }, \ 192 __VA_ARGS__ \ 193 } 194 195 /* Convenience macros to compress the tables */ 196 #define SMI STD_MI_OPCODE_SHIFT 197 #define S3D STD_3D_OPCODE_SHIFT 198 #define S2D STD_2D_OPCODE_SHIFT 199 #define SMFX STD_MFX_OPCODE_SHIFT 200 #define F true 201 #define S CMD_DESC_SKIP 202 #define R CMD_DESC_REJECT 203 #define W CMD_DESC_REGISTER 204 #define B CMD_DESC_BITMASK 205 206 /* Command Mask Fixed Len Action 207 ---------------------------------------------------------- */ 208 static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = { 209 CMD( MI_NOOP, SMI, F, 1, S ), 210 CMD( MI_USER_INTERRUPT, SMI, F, 1, R ), 211 CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, R ), 212 CMD( MI_ARB_CHECK, SMI, F, 1, S ), 213 CMD( MI_REPORT_HEAD, SMI, F, 1, S ), 214 CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), 215 CMD( MI_SEMAPHORE_MBOX, SMI, !F, 0xFF, R ), 216 CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ), 217 CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, 218 .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ), 219 CMD( MI_STORE_REGISTER_MEM, SMI, F, 3, W | B, 220 .reg = { .offset = 1, .mask = 0x007FFFFC }, 221 .bits = {{ 222 .offset = 0, 223 .mask = MI_GLOBAL_GTT, 224 .expected = 0, 225 }}, ), 226 CMD( MI_LOAD_REGISTER_MEM, SMI, F, 3, W | B, 227 .reg = { .offset = 1, .mask = 0x007FFFFC }, 228 .bits = {{ 229 .offset = 0, 230 .mask = MI_GLOBAL_GTT, 231 .expected = 0, 232 }}, ), 233 /* 234 * MI_BATCH_BUFFER_START requires some special handling. It's not 235 * really a 'skip' action but it doesn't seem like it's worth adding 236 * a new action. See i915_parse_cmds(). 237 */ 238 CMD( MI_BATCH_BUFFER_START, SMI, !F, 0xFF, S ), 239 }; 240 241 static const struct drm_i915_cmd_descriptor gen7_render_cmds[] = { 242 CMD( MI_FLUSH, SMI, F, 1, S ), 243 CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), 244 CMD( MI_PREDICATE, SMI, F, 1, S ), 245 CMD( MI_TOPOLOGY_FILTER, SMI, F, 1, S ), 246 CMD( MI_SET_APPID, SMI, F, 1, S ), 247 CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), 248 CMD( MI_SET_CONTEXT, SMI, !F, 0xFF, R ), 249 CMD( MI_URB_CLEAR, SMI, !F, 0xFF, S ), 250 CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3F, B, 251 .bits = {{ 252 .offset = 0, 253 .mask = MI_GLOBAL_GTT, 254 .expected = 0, 255 }}, ), 256 CMD( MI_UPDATE_GTT, SMI, !F, 0xFF, R ), 257 CMD( MI_CLFLUSH, SMI, !F, 0x3FF, B, 258 .bits = {{ 259 .offset = 0, 260 .mask = MI_GLOBAL_GTT, 261 .expected = 0, 262 }}, ), 263 CMD( MI_REPORT_PERF_COUNT, SMI, !F, 0x3F, B, 264 .bits = {{ 265 .offset = 1, 266 .mask = MI_REPORT_PERF_COUNT_GGTT, 267 .expected = 0, 268 }}, ), 269 CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, B, 270 .bits = {{ 271 .offset = 0, 272 .mask = MI_GLOBAL_GTT, 273 .expected = 0, 274 }}, ), 275 CMD( GFX_OP_3DSTATE_VF_STATISTICS, S3D, F, 1, S ), 276 CMD( PIPELINE_SELECT, S3D, F, 1, S ), 277 CMD( MEDIA_VFE_STATE, S3D, !F, 0xFFFF, B, 278 .bits = {{ 279 .offset = 2, 280 .mask = MEDIA_VFE_STATE_MMIO_ACCESS_MASK, 281 .expected = 0, 282 }}, ), 283 CMD( GPGPU_OBJECT, S3D, !F, 0xFF, S ), 284 CMD( GPGPU_WALKER, S3D, !F, 0xFF, S ), 285 CMD( GFX_OP_3DSTATE_SO_DECL_LIST, S3D, !F, 0x1FF, S ), 286 CMD( GFX_OP_PIPE_CONTROL(5), S3D, !F, 0xFF, B, 287 .bits = {{ 288 .offset = 1, 289 .mask = (PIPE_CONTROL_MMIO_WRITE | PIPE_CONTROL_NOTIFY), 290 .expected = 0, 291 }, 292 { 293 .offset = 1, 294 .mask = (PIPE_CONTROL_GLOBAL_GTT_IVB | 295 PIPE_CONTROL_STORE_DATA_INDEX), 296 .expected = 0, 297 .condition_offset = 1, 298 .condition_mask = PIPE_CONTROL_POST_SYNC_OP_MASK, 299 }}, ), 300 }; 301 302 static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { 303 CMD( MI_SET_PREDICATE, SMI, F, 1, S ), 304 CMD( MI_RS_CONTROL, SMI, F, 1, S ), 305 CMD( MI_URB_ATOMIC_ALLOC, SMI, F, 1, S ), 306 CMD( MI_SET_APPID, SMI, F, 1, S ), 307 CMD( MI_RS_CONTEXT, SMI, F, 1, S ), 308 CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), 309 CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), 310 CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, 311 .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), 312 CMD( MI_RS_STORE_DATA_IMM, SMI, !F, 0xFF, S ), 313 CMD( MI_LOAD_URB_MEM, SMI, !F, 0xFF, S ), 314 CMD( MI_STORE_URB_MEM, SMI, !F, 0xFF, S ), 315 CMD( GFX_OP_3DSTATE_DX9_CONSTANTF_VS, S3D, !F, 0x7FF, S ), 316 CMD( GFX_OP_3DSTATE_DX9_CONSTANTF_PS, S3D, !F, 0x7FF, S ), 317 318 CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_VS, S3D, !F, 0x1FF, S ), 319 CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_GS, S3D, !F, 0x1FF, S ), 320 CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_HS, S3D, !F, 0x1FF, S ), 321 CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_DS, S3D, !F, 0x1FF, S ), 322 CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS, S3D, !F, 0x1FF, S ), 323 }; 324 325 static const struct drm_i915_cmd_descriptor gen7_video_cmds[] = { 326 CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), 327 CMD( MI_SET_APPID, SMI, F, 1, S ), 328 CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, 329 .bits = {{ 330 .offset = 0, 331 .mask = MI_GLOBAL_GTT, 332 .expected = 0, 333 }}, ), 334 CMD( MI_UPDATE_GTT, SMI, !F, 0x3F, R ), 335 CMD( MI_FLUSH_DW, SMI, !F, 0x3F, B, 336 .bits = {{ 337 .offset = 0, 338 .mask = MI_FLUSH_DW_NOTIFY, 339 .expected = 0, 340 }, 341 { 342 .offset = 1, 343 .mask = MI_FLUSH_DW_USE_GTT, 344 .expected = 0, 345 .condition_offset = 0, 346 .condition_mask = MI_FLUSH_DW_OP_MASK, 347 }, 348 { 349 .offset = 0, 350 .mask = MI_FLUSH_DW_STORE_INDEX, 351 .expected = 0, 352 .condition_offset = 0, 353 .condition_mask = MI_FLUSH_DW_OP_MASK, 354 }}, ), 355 CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, B, 356 .bits = {{ 357 .offset = 0, 358 .mask = MI_GLOBAL_GTT, 359 .expected = 0, 360 }}, ), 361 /* 362 * MFX_WAIT doesn't fit the way we handle length for most commands. 363 * It has a length field but it uses a non-standard length bias. 364 * It is always 1 dword though, so just treat it as fixed length. 365 */ 366 CMD( MFX_WAIT, SMFX, F, 1, S ), 367 }; 368 369 static const struct drm_i915_cmd_descriptor gen7_vecs_cmds[] = { 370 CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), 371 CMD( MI_SET_APPID, SMI, F, 1, S ), 372 CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, 373 .bits = {{ 374 .offset = 0, 375 .mask = MI_GLOBAL_GTT, 376 .expected = 0, 377 }}, ), 378 CMD( MI_UPDATE_GTT, SMI, !F, 0x3F, R ), 379 CMD( MI_FLUSH_DW, SMI, !F, 0x3F, B, 380 .bits = {{ 381 .offset = 0, 382 .mask = MI_FLUSH_DW_NOTIFY, 383 .expected = 0, 384 }, 385 { 386 .offset = 1, 387 .mask = MI_FLUSH_DW_USE_GTT, 388 .expected = 0, 389 .condition_offset = 0, 390 .condition_mask = MI_FLUSH_DW_OP_MASK, 391 }, 392 { 393 .offset = 0, 394 .mask = MI_FLUSH_DW_STORE_INDEX, 395 .expected = 0, 396 .condition_offset = 0, 397 .condition_mask = MI_FLUSH_DW_OP_MASK, 398 }}, ), 399 CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, B, 400 .bits = {{ 401 .offset = 0, 402 .mask = MI_GLOBAL_GTT, 403 .expected = 0, 404 }}, ), 405 }; 406 407 static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = { 408 CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), 409 CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, B, 410 .bits = {{ 411 .offset = 0, 412 .mask = MI_GLOBAL_GTT, 413 .expected = 0, 414 }}, ), 415 CMD( MI_UPDATE_GTT, SMI, !F, 0x3F, R ), 416 CMD( MI_FLUSH_DW, SMI, !F, 0x3F, B, 417 .bits = {{ 418 .offset = 0, 419 .mask = MI_FLUSH_DW_NOTIFY, 420 .expected = 0, 421 }, 422 { 423 .offset = 1, 424 .mask = MI_FLUSH_DW_USE_GTT, 425 .expected = 0, 426 .condition_offset = 0, 427 .condition_mask = MI_FLUSH_DW_OP_MASK, 428 }, 429 { 430 .offset = 0, 431 .mask = MI_FLUSH_DW_STORE_INDEX, 432 .expected = 0, 433 .condition_offset = 0, 434 .condition_mask = MI_FLUSH_DW_OP_MASK, 435 }}, ), 436 CMD( COLOR_BLT, S2D, !F, 0x3F, S ), 437 CMD( SRC_COPY_BLT, S2D, !F, 0x3F, S ), 438 }; 439 440 static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = { 441 CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), 442 CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), 443 }; 444 445 /* 446 * For Gen9 we can still rely on the h/w to enforce cmd security, and only 447 * need to re-enforce the register access checks. We therefore only need to 448 * teach the cmdparser how to find the end of each command, and identify 449 * register accesses. The table doesn't need to reject any commands, and so 450 * the only commands listed here are: 451 * 1) Those that touch registers 452 * 2) Those that do not have the default 8-bit length 453 * 454 * Note that the default MI length mask chosen for this table is 0xFF, not 455 * the 0x3F used on older devices. This is because the vast majority of MI 456 * cmds on Gen9 use a standard 8-bit Length field. 457 * All the Gen9 blitter instructions are standard 0xFF length mask, and 458 * none allow access to non-general registers, so in fact no BLT cmds are 459 * included in the table at all. 460 * 461 */ 462 static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = { 463 CMD( MI_NOOP, SMI, F, 1, S ), 464 CMD( MI_USER_INTERRUPT, SMI, F, 1, S ), 465 CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, S ), 466 CMD( MI_FLUSH, SMI, F, 1, S ), 467 CMD( MI_ARB_CHECK, SMI, F, 1, S ), 468 CMD( MI_REPORT_HEAD, SMI, F, 1, S ), 469 CMD( MI_ARB_ON_OFF, SMI, F, 1, S ), 470 CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), 471 CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, S ), 472 CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, S ), 473 CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, S ), 474 CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, 475 .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ), 476 CMD( MI_UPDATE_GTT, SMI, !F, 0x3FF, S ), 477 CMD( MI_STORE_REGISTER_MEM_GEN8, SMI, F, 4, W, 478 .reg = { .offset = 1, .mask = 0x007FFFFC } ), 479 CMD( MI_FLUSH_DW, SMI, !F, 0x3F, S ), 480 CMD( MI_LOAD_REGISTER_MEM_GEN8, SMI, F, 4, W, 481 .reg = { .offset = 1, .mask = 0x007FFFFC } ), 482 CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, 483 .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), 484 485 /* 486 * We allow BB_START but apply further checks. We just sanitize the 487 * basic fields here. 488 */ 489 #define MI_BB_START_OPERAND_MASK GENMASK(SMI-1, 0) 490 #define MI_BB_START_OPERAND_EXPECT (MI_BATCH_PPGTT_HSW | 1) 491 CMD( MI_BATCH_BUFFER_START_GEN8, SMI, !F, 0xFF, B, 492 .bits = {{ 493 .offset = 0, 494 .mask = MI_BB_START_OPERAND_MASK, 495 .expected = MI_BB_START_OPERAND_EXPECT, 496 }}, ), 497 }; 498 499 static const struct drm_i915_cmd_descriptor noop_desc = 500 CMD(MI_NOOP, SMI, F, 1, S); 501 502 #undef CMD 503 #undef SMI 504 #undef S3D 505 #undef S2D 506 #undef SMFX 507 #undef F 508 #undef S 509 #undef R 510 #undef W 511 #undef B 512 513 static const struct drm_i915_cmd_table gen7_render_cmd_table[] = { 514 { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, 515 { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, 516 }; 517 518 static const struct drm_i915_cmd_table hsw_render_ring_cmd_table[] = { 519 { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, 520 { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, 521 { hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) }, 522 }; 523 524 static const struct drm_i915_cmd_table gen7_video_cmd_table[] = { 525 { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, 526 { gen7_video_cmds, ARRAY_SIZE(gen7_video_cmds) }, 527 }; 528 529 static const struct drm_i915_cmd_table hsw_vebox_cmd_table[] = { 530 { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, 531 { gen7_vecs_cmds, ARRAY_SIZE(gen7_vecs_cmds) }, 532 }; 533 534 static const struct drm_i915_cmd_table gen7_blt_cmd_table[] = { 535 { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, 536 { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, 537 }; 538 539 static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = { 540 { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, 541 { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, 542 { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) }, 543 }; 544 545 static const struct drm_i915_cmd_table gen9_blt_cmd_table[] = { 546 { gen9_blt_cmds, ARRAY_SIZE(gen9_blt_cmds) }, 547 }; 548 549 550 /* 551 * Register whitelists, sorted by increasing register offset. 552 */ 553 554 /* 555 * An individual whitelist entry granting access to register addr. If 556 * mask is non-zero the argument of immediate register writes will be 557 * AND-ed with mask, and the command will be rejected if the result 558 * doesn't match value. 559 * 560 * Registers with non-zero mask are only allowed to be written using 561 * LRI. 562 */ 563 struct drm_i915_reg_descriptor { 564 i915_reg_t addr; 565 u32 mask; 566 u32 value; 567 }; 568 569 /* Convenience macro for adding 32-bit registers. */ 570 #define REG32(_reg, ...) \ 571 { .addr = (_reg), __VA_ARGS__ } 572 573 /* 574 * Convenience macro for adding 64-bit registers. 575 * 576 * Some registers that userspace accesses are 64 bits. The register 577 * access commands only allow 32-bit accesses. Hence, we have to include 578 * entries for both halves of the 64-bit registers. 579 */ 580 #define REG64(_reg) \ 581 { .addr = _reg }, \ 582 { .addr = _reg ## _UDW } 583 584 #define REG64_IDX(_reg, idx) \ 585 { .addr = _reg(idx) }, \ 586 { .addr = _reg ## _UDW(idx) } 587 588 static const struct drm_i915_reg_descriptor gen7_render_regs[] = { 589 REG64(GPGPU_THREADS_DISPATCHED), 590 REG64(HS_INVOCATION_COUNT), 591 REG64(DS_INVOCATION_COUNT), 592 REG64(IA_VERTICES_COUNT), 593 REG64(IA_PRIMITIVES_COUNT), 594 REG64(VS_INVOCATION_COUNT), 595 REG64(GS_INVOCATION_COUNT), 596 REG64(GS_PRIMITIVES_COUNT), 597 REG64(CL_INVOCATION_COUNT), 598 REG64(CL_PRIMITIVES_COUNT), 599 REG64(PS_INVOCATION_COUNT), 600 REG64(PS_DEPTH_COUNT), 601 REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE), 602 REG64(MI_PREDICATE_SRC0), 603 REG64(MI_PREDICATE_SRC1), 604 REG32(GEN7_3DPRIM_END_OFFSET), 605 REG32(GEN7_3DPRIM_START_VERTEX), 606 REG32(GEN7_3DPRIM_VERTEX_COUNT), 607 REG32(GEN7_3DPRIM_INSTANCE_COUNT), 608 REG32(GEN7_3DPRIM_START_INSTANCE), 609 REG32(GEN7_3DPRIM_BASE_VERTEX), 610 REG32(GEN7_GPGPU_DISPATCHDIMX), 611 REG32(GEN7_GPGPU_DISPATCHDIMY), 612 REG32(GEN7_GPGPU_DISPATCHDIMZ), 613 REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), 614 REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 0), 615 REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 1), 616 REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 2), 617 REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 3), 618 REG64_IDX(GEN7_SO_PRIM_STORAGE_NEEDED, 0), 619 REG64_IDX(GEN7_SO_PRIM_STORAGE_NEEDED, 1), 620 REG64_IDX(GEN7_SO_PRIM_STORAGE_NEEDED, 2), 621 REG64_IDX(GEN7_SO_PRIM_STORAGE_NEEDED, 3), 622 REG32(GEN7_SO_WRITE_OFFSET(0)), 623 REG32(GEN7_SO_WRITE_OFFSET(1)), 624 REG32(GEN7_SO_WRITE_OFFSET(2)), 625 REG32(GEN7_SO_WRITE_OFFSET(3)), 626 REG32(GEN7_L3SQCREG1), 627 REG32(GEN7_L3CNTLREG2), 628 REG32(GEN7_L3CNTLREG3), 629 REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), 630 }; 631 632 static const struct drm_i915_reg_descriptor hsw_render_regs[] = { 633 REG64_IDX(HSW_CS_GPR, 0), 634 REG64_IDX(HSW_CS_GPR, 1), 635 REG64_IDX(HSW_CS_GPR, 2), 636 REG64_IDX(HSW_CS_GPR, 3), 637 REG64_IDX(HSW_CS_GPR, 4), 638 REG64_IDX(HSW_CS_GPR, 5), 639 REG64_IDX(HSW_CS_GPR, 6), 640 REG64_IDX(HSW_CS_GPR, 7), 641 REG64_IDX(HSW_CS_GPR, 8), 642 REG64_IDX(HSW_CS_GPR, 9), 643 REG64_IDX(HSW_CS_GPR, 10), 644 REG64_IDX(HSW_CS_GPR, 11), 645 REG64_IDX(HSW_CS_GPR, 12), 646 REG64_IDX(HSW_CS_GPR, 13), 647 REG64_IDX(HSW_CS_GPR, 14), 648 REG64_IDX(HSW_CS_GPR, 15), 649 REG32(HSW_SCRATCH1, 650 .mask = ~HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE, 651 .value = 0), 652 REG32(HSW_ROW_CHICKEN3, 653 .mask = ~(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE << 16 | 654 HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE), 655 .value = 0), 656 }; 657 658 static const struct drm_i915_reg_descriptor gen7_blt_regs[] = { 659 REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE), 660 REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), 661 REG32(BCS_SWCTRL), 662 REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), 663 }; 664 665 static const struct drm_i915_reg_descriptor gen9_blt_regs[] = { 666 REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE), 667 REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), 668 REG32(BCS_SWCTRL), 669 REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), 670 REG64_IDX(BCS_GPR, 0), 671 REG64_IDX(BCS_GPR, 1), 672 REG64_IDX(BCS_GPR, 2), 673 REG64_IDX(BCS_GPR, 3), 674 REG64_IDX(BCS_GPR, 4), 675 REG64_IDX(BCS_GPR, 5), 676 REG64_IDX(BCS_GPR, 6), 677 REG64_IDX(BCS_GPR, 7), 678 REG64_IDX(BCS_GPR, 8), 679 REG64_IDX(BCS_GPR, 9), 680 REG64_IDX(BCS_GPR, 10), 681 REG64_IDX(BCS_GPR, 11), 682 REG64_IDX(BCS_GPR, 12), 683 REG64_IDX(BCS_GPR, 13), 684 REG64_IDX(BCS_GPR, 14), 685 REG64_IDX(BCS_GPR, 15), 686 }; 687 688 #undef REG64 689 #undef REG32 690 691 struct drm_i915_reg_table { 692 const struct drm_i915_reg_descriptor *regs; 693 int num_regs; 694 }; 695 696 static const struct drm_i915_reg_table ivb_render_reg_tables[] = { 697 { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, 698 }; 699 700 static const struct drm_i915_reg_table ivb_blt_reg_tables[] = { 701 { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, 702 }; 703 704 static const struct drm_i915_reg_table hsw_render_reg_tables[] = { 705 { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, 706 { hsw_render_regs, ARRAY_SIZE(hsw_render_regs) }, 707 }; 708 709 static const struct drm_i915_reg_table hsw_blt_reg_tables[] = { 710 { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, 711 }; 712 713 static const struct drm_i915_reg_table gen9_blt_reg_tables[] = { 714 { gen9_blt_regs, ARRAY_SIZE(gen9_blt_regs) }, 715 }; 716 717 static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) 718 { 719 u32 client = cmd_header >> INSTR_CLIENT_SHIFT; 720 u32 subclient = 721 (cmd_header & INSTR_SUBCLIENT_MASK) >> INSTR_SUBCLIENT_SHIFT; 722 723 if (client == INSTR_MI_CLIENT) 724 return 0x3F; 725 else if (client == INSTR_RC_CLIENT) { 726 if (subclient == INSTR_MEDIA_SUBCLIENT) 727 return 0xFFFF; 728 else 729 return 0xFF; 730 } 731 732 DRM_DEBUG_DRIVER("CMD: Abnormal rcs cmd length! 0x%08X\n", cmd_header); 733 return 0; 734 } 735 736 static u32 gen7_bsd_get_cmd_length_mask(u32 cmd_header) 737 { 738 u32 client = cmd_header >> INSTR_CLIENT_SHIFT; 739 u32 subclient = 740 (cmd_header & INSTR_SUBCLIENT_MASK) >> INSTR_SUBCLIENT_SHIFT; 741 u32 op = (cmd_header & INSTR_26_TO_24_MASK) >> INSTR_26_TO_24_SHIFT; 742 743 if (client == INSTR_MI_CLIENT) 744 return 0x3F; 745 else if (client == INSTR_RC_CLIENT) { 746 if (subclient == INSTR_MEDIA_SUBCLIENT) { 747 if (op == 6) 748 return 0xFFFF; 749 else 750 return 0xFFF; 751 } else 752 return 0xFF; 753 } 754 755 DRM_DEBUG_DRIVER("CMD: Abnormal bsd cmd length! 0x%08X\n", cmd_header); 756 return 0; 757 } 758 759 static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) 760 { 761 u32 client = cmd_header >> INSTR_CLIENT_SHIFT; 762 763 if (client == INSTR_MI_CLIENT) 764 return 0x3F; 765 else if (client == INSTR_BC_CLIENT) 766 return 0xFF; 767 768 DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header); 769 return 0; 770 } 771 772 static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header) 773 { 774 u32 client = cmd_header >> INSTR_CLIENT_SHIFT; 775 776 if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT) 777 return 0xFF; 778 779 DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header); 780 return 0; 781 } 782 783 static bool validate_cmds_sorted(const struct intel_engine_cs *engine, 784 const struct drm_i915_cmd_table *cmd_tables, 785 int cmd_table_count) 786 { 787 int i; 788 bool ret = true; 789 790 if (!cmd_tables || cmd_table_count == 0) 791 return true; 792 793 for (i = 0; i < cmd_table_count; i++) { 794 const struct drm_i915_cmd_table *table = &cmd_tables[i]; 795 u32 previous = 0; 796 int j; 797 798 for (j = 0; j < table->count; j++) { 799 const struct drm_i915_cmd_descriptor *desc = 800 &table->table[j]; 801 u32 curr = desc->cmd.value & desc->cmd.mask; 802 803 if (curr < previous) { 804 DRM_ERROR("CMD: %s [%d] command table not sorted: " 805 "table=%d entry=%d cmd=0x%08X prev=0x%08X\n", 806 engine->name, engine->id, 807 i, j, curr, previous); 808 ret = false; 809 } 810 811 previous = curr; 812 } 813 } 814 815 return ret; 816 } 817 818 static bool check_sorted(const struct intel_engine_cs *engine, 819 const struct drm_i915_reg_descriptor *reg_table, 820 int reg_count) 821 { 822 int i; 823 u32 previous = 0; 824 bool ret = true; 825 826 for (i = 0; i < reg_count; i++) { 827 u32 curr = i915_mmio_reg_offset(reg_table[i].addr); 828 829 if (curr < previous) { 830 DRM_ERROR("CMD: %s [%d] register table not sorted: " 831 "entry=%d reg=0x%08X prev=0x%08X\n", 832 engine->name, engine->id, 833 i, curr, previous); 834 ret = false; 835 } 836 837 previous = curr; 838 } 839 840 return ret; 841 } 842 843 static bool validate_regs_sorted(struct intel_engine_cs *engine) 844 { 845 int i; 846 const struct drm_i915_reg_table *table; 847 848 for (i = 0; i < engine->reg_table_count; i++) { 849 table = &engine->reg_tables[i]; 850 if (!check_sorted(engine, table->regs, table->num_regs)) 851 return false; 852 } 853 854 return true; 855 } 856 857 struct cmd_node { 858 const struct drm_i915_cmd_descriptor *desc; 859 struct hlist_node node; 860 }; 861 862 /* 863 * Different command ranges have different numbers of bits for the opcode. For 864 * example, MI commands use bits 31:23 while 3D commands use bits 31:16. The 865 * problem is that, for example, MI commands use bits 22:16 for other fields 866 * such as GGTT vs PPGTT bits. If we include those bits in the mask then when 867 * we mask a command from a batch it could hash to the wrong bucket due to 868 * non-opcode bits being set. But if we don't include those bits, some 3D 869 * commands may hash to the same bucket due to not including opcode bits that 870 * make the command unique. For now, we will risk hashing to the same bucket. 871 */ 872 static inline u32 cmd_header_key(u32 x) 873 { 874 switch (x >> INSTR_CLIENT_SHIFT) { 875 default: 876 case INSTR_MI_CLIENT: 877 return x >> STD_MI_OPCODE_SHIFT; 878 case INSTR_RC_CLIENT: 879 return x >> STD_3D_OPCODE_SHIFT; 880 case INSTR_BC_CLIENT: 881 return x >> STD_2D_OPCODE_SHIFT; 882 } 883 } 884 885 static int init_hash_table(struct intel_engine_cs *engine, 886 const struct drm_i915_cmd_table *cmd_tables, 887 int cmd_table_count) 888 { 889 int i, j; 890 891 hash_init(engine->cmd_hash); 892 893 for (i = 0; i < cmd_table_count; i++) { 894 const struct drm_i915_cmd_table *table = &cmd_tables[i]; 895 896 for (j = 0; j < table->count; j++) { 897 const struct drm_i915_cmd_descriptor *desc = 898 &table->table[j]; 899 struct cmd_node *desc_node = 900 kmalloc(sizeof(*desc_node), GFP_KERNEL); 901 902 if (!desc_node) 903 return -ENOMEM; 904 905 desc_node->desc = desc; 906 hash_add(engine->cmd_hash, &desc_node->node, 907 cmd_header_key(desc->cmd.value)); 908 } 909 } 910 911 return 0; 912 } 913 914 static void fini_hash_table(struct intel_engine_cs *engine) 915 { 916 struct hlist_node *tmp; 917 struct cmd_node *desc_node; 918 int i; 919 920 hash_for_each_safe(engine->cmd_hash, i, tmp, desc_node, node) { 921 hash_del(&desc_node->node); 922 kfree(desc_node); 923 } 924 } 925 926 /** 927 * intel_engine_init_cmd_parser() - set cmd parser related fields for an engine 928 * @engine: the engine to initialize 929 * 930 * Optionally initializes fields related to batch buffer command parsing in the 931 * struct intel_engine_cs based on whether the platform requires software 932 * command parsing. 933 */ 934 void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) 935 { 936 const struct drm_i915_cmd_table *cmd_tables; 937 int cmd_table_count; 938 int ret; 939 940 if (!IS_GEN7(engine->i915) && !(IS_GEN9(engine->i915) && 941 engine->id == BCS)) 942 return; 943 944 switch (engine->id) { 945 case RCS: 946 if (IS_HASWELL(engine->i915)) { 947 cmd_tables = hsw_render_ring_cmd_table; 948 cmd_table_count = 949 ARRAY_SIZE(hsw_render_ring_cmd_table); 950 } else { 951 cmd_tables = gen7_render_cmd_table; 952 cmd_table_count = ARRAY_SIZE(gen7_render_cmd_table); 953 } 954 955 if (IS_HASWELL(engine->i915)) { 956 engine->reg_tables = hsw_render_reg_tables; 957 engine->reg_table_count = ARRAY_SIZE(hsw_render_reg_tables); 958 } else { 959 engine->reg_tables = ivb_render_reg_tables; 960 engine->reg_table_count = ARRAY_SIZE(ivb_render_reg_tables); 961 } 962 engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask; 963 break; 964 case VCS: 965 cmd_tables = gen7_video_cmd_table; 966 cmd_table_count = ARRAY_SIZE(gen7_video_cmd_table); 967 engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; 968 break; 969 case BCS: 970 engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; 971 if (IS_GEN9(engine->i915)) { 972 cmd_tables = gen9_blt_cmd_table; 973 cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table); 974 engine->get_cmd_length_mask = 975 gen9_blt_get_cmd_length_mask; 976 977 /* BCS Engine unsafe without parser */ 978 engine->flags |= I915_ENGINE_REQUIRES_CMD_PARSER; 979 } else if (IS_HASWELL(engine->i915)) { 980 cmd_tables = hsw_blt_ring_cmd_table; 981 cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table); 982 } else { 983 cmd_tables = gen7_blt_cmd_table; 984 cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table); 985 } 986 987 if (IS_GEN9(engine->i915)) { 988 engine->reg_tables = gen9_blt_reg_tables; 989 engine->reg_table_count = 990 ARRAY_SIZE(gen9_blt_reg_tables); 991 } else if (IS_HASWELL(engine->i915)) { 992 engine->reg_tables = hsw_blt_reg_tables; 993 engine->reg_table_count = ARRAY_SIZE(hsw_blt_reg_tables); 994 } else { 995 engine->reg_tables = ivb_blt_reg_tables; 996 engine->reg_table_count = ARRAY_SIZE(ivb_blt_reg_tables); 997 } 998 break; 999 case VECS: 1000 cmd_tables = hsw_vebox_cmd_table; 1001 cmd_table_count = ARRAY_SIZE(hsw_vebox_cmd_table); 1002 /* VECS can use the same length_mask function as VCS */ 1003 engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; 1004 break; 1005 default: 1006 MISSING_CASE(engine->id); 1007 return; 1008 } 1009 1010 if (!validate_cmds_sorted(engine, cmd_tables, cmd_table_count)) { 1011 DRM_ERROR("%s: command descriptions are not sorted\n", 1012 engine->name); 1013 return; 1014 } 1015 if (!validate_regs_sorted(engine)) { 1016 DRM_ERROR("%s: registers are not sorted\n", engine->name); 1017 return; 1018 } 1019 1020 ret = init_hash_table(engine, cmd_tables, cmd_table_count); 1021 if (ret) { 1022 DRM_ERROR("%s: initialised failed!\n", engine->name); 1023 fini_hash_table(engine); 1024 return; 1025 } 1026 1027 engine->flags |= I915_ENGINE_USING_CMD_PARSER; 1028 } 1029 1030 /** 1031 * intel_engine_cleanup_cmd_parser() - clean up cmd parser related fields 1032 * @engine: the engine to clean up 1033 * 1034 * Releases any resources related to command parsing that may have been 1035 * initialized for the specified engine. 1036 */ 1037 void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine) 1038 { 1039 if (!intel_engine_using_cmd_parser(engine)) 1040 return; 1041 1042 fini_hash_table(engine); 1043 } 1044 1045 static const struct drm_i915_cmd_descriptor* 1046 find_cmd_in_table(struct intel_engine_cs *engine, 1047 u32 cmd_header) 1048 { 1049 struct cmd_node *desc_node; 1050 1051 hash_for_each_possible(engine->cmd_hash, desc_node, node, 1052 cmd_header_key(cmd_header)) { 1053 const struct drm_i915_cmd_descriptor *desc = desc_node->desc; 1054 if (((cmd_header ^ desc->cmd.value) & desc->cmd.mask) == 0) 1055 return desc; 1056 } 1057 1058 return NULL; 1059 } 1060 1061 /* 1062 * Returns a pointer to a descriptor for the command specified by cmd_header. 1063 * 1064 * The caller must supply space for a default descriptor via the default_desc 1065 * parameter. If no descriptor for the specified command exists in the engine's 1066 * command parser tables, this function fills in default_desc based on the 1067 * engine's default length encoding and returns default_desc. 1068 */ 1069 static const struct drm_i915_cmd_descriptor* 1070 find_cmd(struct intel_engine_cs *engine, 1071 u32 cmd_header, 1072 const struct drm_i915_cmd_descriptor *desc, 1073 struct drm_i915_cmd_descriptor *default_desc) 1074 { 1075 u32 mask; 1076 1077 if (((cmd_header ^ desc->cmd.value) & desc->cmd.mask) == 0) 1078 return desc; 1079 1080 desc = find_cmd_in_table(engine, cmd_header); 1081 if (desc) 1082 return desc; 1083 1084 mask = engine->get_cmd_length_mask(cmd_header); 1085 if (!mask) 1086 return NULL; 1087 1088 default_desc->cmd.value = cmd_header; 1089 default_desc->cmd.mask = ~0u << MIN_OPCODE_SHIFT; 1090 default_desc->length.mask = mask; 1091 default_desc->flags = CMD_DESC_SKIP; 1092 return default_desc; 1093 } 1094 1095 static const struct drm_i915_reg_descriptor * 1096 __find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr) 1097 { 1098 int start = 0, end = count; 1099 while (start < end) { 1100 int mid = start + (end - start) / 2; 1101 int ret = addr - i915_mmio_reg_offset(table[mid].addr); 1102 if (ret < 0) 1103 end = mid; 1104 else if (ret > 0) 1105 start = mid + 1; 1106 else 1107 return &table[mid]; 1108 } 1109 return NULL; 1110 } 1111 1112 static const struct drm_i915_reg_descriptor * 1113 find_reg(const struct intel_engine_cs *engine, u32 addr) 1114 { 1115 const struct drm_i915_reg_table *table = engine->reg_tables; 1116 const struct drm_i915_reg_descriptor *reg = NULL; 1117 int count = engine->reg_table_count; 1118 1119 for (; !reg && (count > 0); ++table, --count) 1120 reg = __find_reg(table->regs, table->num_regs, addr); 1121 1122 return reg; 1123 } 1124 1125 /* Returns a vmap'd pointer to dst_obj, which the caller must unmap */ 1126 static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, 1127 struct drm_i915_gem_object *src_obj, 1128 u32 batch_start_offset, 1129 u32 batch_len, 1130 bool *needs_clflush_after) 1131 { 1132 unsigned int src_needs_clflush; 1133 unsigned int dst_needs_clflush; 1134 void *dst, *src; 1135 int ret; 1136 1137 ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush); 1138 if (ret) 1139 return ERR_PTR(ret); 1140 1141 ret = i915_gem_obj_prepare_shmem_write(dst_obj, &dst_needs_clflush); 1142 if (ret) { 1143 dst = ERR_PTR(ret); 1144 goto unpin_src; 1145 } 1146 1147 dst = i915_gem_object_pin_map(dst_obj, I915_MAP_FORCE_WB); 1148 if (IS_ERR(dst)) 1149 goto unpin_dst; 1150 1151 src = ERR_PTR(-ENODEV); 1152 if (src_needs_clflush && 1153 i915_can_memcpy_from_wc(NULL, batch_start_offset, 0)) { 1154 src = i915_gem_object_pin_map(src_obj, I915_MAP_WC); 1155 if (!IS_ERR(src)) { 1156 i915_memcpy_from_wc(dst, 1157 src + batch_start_offset, 1158 roundup2(batch_len, 16)); 1159 i915_gem_object_unpin_map(src_obj); 1160 } 1161 } 1162 if (IS_ERR(src)) { 1163 void *ptr; 1164 int offset, n; 1165 1166 offset = offset_in_page(batch_start_offset); 1167 1168 /* We can avoid clflushing partial cachelines before the write 1169 * if we only every write full cache-lines. Since we know that 1170 * both the source and destination are in multiples of 1171 * PAGE_SIZE, we can simply round up to the next cacheline. 1172 * We don't care about copying too much here as we only 1173 * validate up to the end of the batch. 1174 */ 1175 if (dst_needs_clflush & CLFLUSH_BEFORE) 1176 batch_len = roundup(batch_len, 1177 curcpu()->ci_cflushsz); 1178 1179 ptr = dst; 1180 for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) { 1181 int len = min_t(int, batch_len, PAGE_SIZE - offset); 1182 1183 src = kmap_atomic(i915_gem_object_get_page(src_obj, n)); 1184 if (src_needs_clflush) 1185 drm_clflush_virt_range(src + offset, len); 1186 memcpy(ptr, src + offset, len); 1187 kunmap_atomic(src); 1188 1189 ptr += len; 1190 batch_len -= len; 1191 offset = 0; 1192 } 1193 } 1194 1195 /* dst_obj is returned with vmap pinned */ 1196 *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER; 1197 1198 unpin_dst: 1199 i915_gem_obj_finish_shmem_access(dst_obj); 1200 unpin_src: 1201 i915_gem_obj_finish_shmem_access(src_obj); 1202 return dst; 1203 } 1204 1205 static bool check_cmd(const struct intel_engine_cs *engine, 1206 const struct drm_i915_cmd_descriptor *desc, 1207 const u32 *cmd, u32 length) 1208 { 1209 if (desc->flags & CMD_DESC_SKIP) 1210 return true; 1211 1212 if (desc->flags & CMD_DESC_REJECT) { 1213 DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd); 1214 return false; 1215 } 1216 1217 if (desc->flags & CMD_DESC_REGISTER) { 1218 /* 1219 * Get the distance between individual register offset 1220 * fields if the command can perform more than one 1221 * access at a time. 1222 */ 1223 const u32 step = desc->reg.step ? desc->reg.step : length; 1224 u32 offset; 1225 1226 for (offset = desc->reg.offset; offset < length; 1227 offset += step) { 1228 const u32 reg_addr = cmd[offset] & desc->reg.mask; 1229 const struct drm_i915_reg_descriptor *reg = 1230 find_reg(engine, reg_addr); 1231 1232 if (!reg) { 1233 DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n", 1234 reg_addr, *cmd, engine->name); 1235 return false; 1236 } 1237 1238 /* 1239 * Check the value written to the register against the 1240 * allowed mask/value pair given in the whitelist entry. 1241 */ 1242 if (reg->mask) { 1243 if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { 1244 DRM_DEBUG_DRIVER("CMD: Rejected LRM to masked register 0x%08X\n", 1245 reg_addr); 1246 return false; 1247 } 1248 1249 if (desc->cmd.value == MI_LOAD_REGISTER_REG) { 1250 DRM_DEBUG_DRIVER("CMD: Rejected LRR to masked register 0x%08X\n", 1251 reg_addr); 1252 return false; 1253 } 1254 1255 if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) && 1256 (offset + 2 > length || 1257 (cmd[offset + 1] & reg->mask) != reg->value)) { 1258 DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked register 0x%08X\n", 1259 reg_addr); 1260 return false; 1261 } 1262 } 1263 } 1264 } 1265 1266 if (desc->flags & CMD_DESC_BITMASK) { 1267 int i; 1268 1269 for (i = 0; i < MAX_CMD_DESC_BITMASKS; i++) { 1270 u32 dword; 1271 1272 if (desc->bits[i].mask == 0) 1273 break; 1274 1275 if (desc->bits[i].condition_mask != 0) { 1276 u32 offset = 1277 desc->bits[i].condition_offset; 1278 u32 condition = cmd[offset] & 1279 desc->bits[i].condition_mask; 1280 1281 if (condition == 0) 1282 continue; 1283 } 1284 1285 if (desc->bits[i].offset >= length) { 1286 DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X, too short to check bitmask (%s)\n", 1287 *cmd, engine->name); 1288 return false; 1289 } 1290 1291 dword = cmd[desc->bits[i].offset] & 1292 desc->bits[i].mask; 1293 1294 if (dword != desc->bits[i].expected) { 1295 DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (%s)\n", 1296 *cmd, 1297 desc->bits[i].mask, 1298 desc->bits[i].expected, 1299 dword, engine->name); 1300 return false; 1301 } 1302 } 1303 } 1304 1305 return true; 1306 } 1307 1308 static int check_bbstart(const struct i915_gem_context *ctx, 1309 u32 *cmd, u32 offset, u32 length, 1310 u32 batch_len, 1311 u64 batch_start, 1312 u64 shadow_batch_start) 1313 { 1314 u64 jump_offset, jump_target; 1315 u32 target_cmd_offset, target_cmd_index; 1316 1317 /* For igt compatibility on older platforms */ 1318 if (CMDPARSER_USES_GGTT(ctx->i915)) { 1319 DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n"); 1320 return -EACCES; 1321 } 1322 1323 if (length != 3) { 1324 DRM_DEBUG("CMD: Recursive BB_START with bad length(%u)\n", 1325 length); 1326 return -EINVAL; 1327 } 1328 1329 jump_target = *(u64*)(cmd+1); 1330 jump_offset = jump_target - batch_start; 1331 1332 /* 1333 * Any underflow of jump_target is guaranteed to be outside the range 1334 * of a u32, so >= test catches both too large and too small 1335 */ 1336 if (jump_offset >= batch_len) { 1337 DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n", 1338 jump_target); 1339 return -EINVAL; 1340 } 1341 1342 /* 1343 * This cannot overflow a u32 because we already checked jump_offset 1344 * is within the BB, and the batch_len is a u32 1345 */ 1346 target_cmd_offset = lower_32_bits(jump_offset); 1347 target_cmd_index = target_cmd_offset / sizeof(u32); 1348 1349 *(u64*)(cmd + 1) = shadow_batch_start + target_cmd_offset; 1350 1351 if (target_cmd_index == offset) 1352 return 0; 1353 1354 if (ctx->jump_whitelist_cmds <= target_cmd_index) { 1355 DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n"); 1356 return -EINVAL; 1357 } else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) { 1358 DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n", 1359 jump_target); 1360 return -EINVAL; 1361 } 1362 1363 return 0; 1364 } 1365 1366 static void init_whitelist(struct i915_gem_context *ctx, u32 batch_len) 1367 { 1368 const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32)); 1369 const u32 exact_size = BITS_TO_LONGS(batch_cmds); 1370 u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds)); 1371 unsigned long *next_whitelist; 1372 1373 if (CMDPARSER_USES_GGTT(ctx->i915)) 1374 return; 1375 1376 if (batch_cmds <= ctx->jump_whitelist_cmds) { 1377 bitmap_zero(ctx->jump_whitelist, batch_cmds); 1378 return; 1379 } 1380 1381 again: 1382 next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL); 1383 if (next_whitelist) { 1384 kfree(ctx->jump_whitelist); 1385 ctx->jump_whitelist = next_whitelist; 1386 ctx->jump_whitelist_cmds = 1387 next_size * BITS_PER_BYTE * sizeof(long); 1388 return; 1389 } 1390 1391 if (next_size > exact_size) { 1392 next_size = exact_size; 1393 goto again; 1394 } 1395 1396 DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n"); 1397 bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds); 1398 1399 return; 1400 } 1401 1402 #define LENGTH_BIAS 2 1403 1404 /** 1405 * i915_parse_cmds() - parse a submitted batch buffer for privilege violations 1406 * @ctx: the context in which the batch is to execute 1407 * @engine: the engine on which the batch is to execute 1408 * @batch_obj: the batch buffer in question 1409 * @batch_start: Canonical base address of batch 1410 * @batch_start_offset: byte offset in the batch at which execution starts 1411 * @batch_len: length of the commands in batch_obj 1412 * @shadow_batch_obj: copy of the batch buffer in question 1413 * @shadow_batch_start: Canonical base address of shadow_batch_obj 1414 * 1415 * Parses the specified batch buffer looking for privilege violations as 1416 * described in the overview. 1417 * 1418 * Return: non-zero if the parser finds violations or otherwise fails; -EACCES 1419 * if the batch appears legal but should use hardware parsing 1420 */ 1421 1422 int intel_engine_cmd_parser(struct i915_gem_context *ctx, 1423 struct intel_engine_cs *engine, 1424 struct drm_i915_gem_object *batch_obj, 1425 u64 batch_start, 1426 u32 batch_start_offset, 1427 u32 batch_len, 1428 struct drm_i915_gem_object *shadow_batch_obj, 1429 u64 shadow_batch_start) 1430 { 1431 u32 *cmd, *batch_end, offset = 0; 1432 struct drm_i915_cmd_descriptor default_desc = noop_desc; 1433 const struct drm_i915_cmd_descriptor *desc = &default_desc; 1434 bool needs_clflush_after = false; 1435 int ret = 0; 1436 1437 cmd = copy_batch(shadow_batch_obj, batch_obj, 1438 batch_start_offset, batch_len, 1439 &needs_clflush_after); 1440 if (IS_ERR(cmd)) { 1441 DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n"); 1442 return PTR_ERR(cmd); 1443 } 1444 1445 init_whitelist(ctx, batch_len); 1446 1447 /* 1448 * We use the batch length as size because the shadow object is as 1449 * large or larger and copy_batch() will write MI_NOPs to the extra 1450 * space. Parsing should be faster in some cases this way. 1451 */ 1452 batch_end = cmd + (batch_len / sizeof(*batch_end)); 1453 do { 1454 u32 length; 1455 1456 if (*cmd == MI_BATCH_BUFFER_END) 1457 break; 1458 1459 desc = find_cmd(engine, *cmd, desc, &default_desc); 1460 if (!desc) { 1461 DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", 1462 *cmd); 1463 ret = -EINVAL; 1464 goto err; 1465 } 1466 1467 if (desc->flags & CMD_DESC_FIXED) 1468 length = desc->length.fixed; 1469 else 1470 length = ((*cmd & desc->length.mask) + LENGTH_BIAS); 1471 1472 if ((batch_end - cmd) < length) { 1473 DRM_DEBUG_DRIVER("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n", 1474 *cmd, 1475 length, 1476 batch_end - cmd); 1477 ret = -EINVAL; 1478 goto err; 1479 } 1480 1481 if (!check_cmd(engine, desc, cmd, length)) { 1482 ret = -EACCES; 1483 goto err; 1484 } 1485 1486 if (desc->cmd.value == MI_BATCH_BUFFER_START) { 1487 ret = check_bbstart(ctx, cmd, offset, length, 1488 batch_len, batch_start, 1489 shadow_batch_start); 1490 1491 if (ret) 1492 goto err; 1493 break; 1494 } 1495 1496 if (ctx->jump_whitelist_cmds > offset) 1497 set_bit(offset, ctx->jump_whitelist); 1498 1499 cmd += length; 1500 offset += length; 1501 if (cmd >= batch_end) { 1502 DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n"); 1503 ret = -EINVAL; 1504 goto err; 1505 } 1506 } while (1); 1507 1508 if (needs_clflush_after) { 1509 void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping); 1510 1511 drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr); 1512 } 1513 1514 err: 1515 i915_gem_object_unpin_map(shadow_batch_obj); 1516 return ret; 1517 } 1518 1519 /** 1520 * i915_cmd_parser_get_version() - get the cmd parser version number 1521 * @dev_priv: i915 device private 1522 * 1523 * The cmd parser maintains a simple increasing integer version number suitable 1524 * for passing to userspace clients to determine what operations are permitted. 1525 * 1526 * Return: the current version number of the cmd parser 1527 */ 1528 int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) 1529 { 1530 struct intel_engine_cs *engine; 1531 enum intel_engine_id id; 1532 bool active = false; 1533 1534 /* If the command parser is not enabled, report 0 - unsupported */ 1535 for_each_engine(engine, dev_priv, id) { 1536 if (intel_engine_using_cmd_parser(engine)) { 1537 active = true; 1538 break; 1539 } 1540 } 1541 if (!active) 1542 return 0; 1543 1544 /* 1545 * Command parser version history 1546 * 1547 * 1. Initial version. Checks batches and reports violations, but leaves 1548 * hardware parsing enabled (so does not allow new use cases). 1549 * 2. Allow access to the MI_PREDICATE_SRC0 and 1550 * MI_PREDICATE_SRC1 registers. 1551 * 3. Allow access to the GPGPU_THREADS_DISPATCHED register. 1552 * 4. L3 atomic chicken bits of HSW_SCRATCH1 and HSW_ROW_CHICKEN3. 1553 * 5. GPGPU dispatch compute indirect registers. 1554 * 6. TIMESTAMP register and Haswell CS GPR registers 1555 * 7. Allow MI_LOAD_REGISTER_REG between whitelisted registers. 1556 * 8. Don't report cmd_check() failures as EINVAL errors to userspace; 1557 * rely on the HW to NOOP disallowed commands as it would without 1558 * the parser enabled. 1559 * 9. Don't whitelist or handle oacontrol specially, as ownership 1560 * for oacontrol state is moving to i915-perf. 1561 * 10. Support for Gen9 BCS Parsing 1562 */ 1563 return 10; 1564 } 1565