xref: /openbsd-src/sys/dev/pci/drm/amd/amdgpu/amdgpu_gfx.c (revision 7350f337b9e3eb4461d99580e625c7ef148d107c)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  */
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 
29 /*
30  * GPU scratch registers helpers function.
31  */
32 /**
33  * amdgpu_gfx_scratch_get - Allocate a scratch register
34  *
35  * @adev: amdgpu_device pointer
36  * @reg: scratch register mmio offset
37  *
38  * Allocate a CP scratch register for use by the driver (all asics).
39  * Returns 0 on success or -EINVAL on failure.
40  */
41 int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg)
42 {
43 	int i;
44 
45 	i = ffs(adev->gfx.scratch.free_mask);
46 	if (i != 0 && i <= adev->gfx.scratch.num_reg) {
47 		i--;
48 		adev->gfx.scratch.free_mask &= ~(1u << i);
49 		*reg = adev->gfx.scratch.reg_base + i;
50 		return 0;
51 	}
52 	return -EINVAL;
53 }
54 
55 /**
56  * amdgpu_gfx_scratch_free - Free a scratch register
57  *
58  * @adev: amdgpu_device pointer
59  * @reg: scratch register mmio offset
60  *
61  * Free a CP scratch register allocated for use by the driver (all asics)
62  */
63 void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg)
64 {
65 	adev->gfx.scratch.free_mask |= 1u << (reg - adev->gfx.scratch.reg_base);
66 }
67 
68 /**
69  * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
70  *
71  * @mask: array in which the per-shader array disable masks will be stored
72  * @max_se: number of SEs
73  * @max_sh: number of SHs
74  *
75  * The bitmask of CUs to be disabled in the shader array determined by se and
76  * sh is stored in mask[se * max_sh + sh].
77  */
78 void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh)
79 {
80 	unsigned se, sh, cu;
81 	const char *p;
82 
83 	memset(mask, 0, sizeof(*mask) * max_se * max_sh);
84 
85 	if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
86 		return;
87 
88 #ifdef notyet
89 	p = amdgpu_disable_cu;
90 	for (;;) {
91 		char *next;
92 		int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
93 		if (ret < 3) {
94 			DRM_ERROR("amdgpu: could not parse disable_cu\n");
95 			return;
96 		}
97 
98 		if (se < max_se && sh < max_sh && cu < 16) {
99 			DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
100 			mask[se * max_sh + sh] |= 1u << cu;
101 		} else {
102 			DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
103 				  se, sh, cu);
104 		}
105 
106 		next = strchr(p, ',');
107 		if (!next)
108 			break;
109 		p = next + 1;
110 	}
111 #endif
112 }
113 
114 static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
115 {
116 	if (amdgpu_compute_multipipe != -1) {
117 		DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
118 			 amdgpu_compute_multipipe);
119 		return amdgpu_compute_multipipe == 1;
120 	}
121 
122 	/* FIXME: spreading the queues across pipes causes perf regressions
123 	 * on POLARIS11 compute workloads */
124 	if (adev->asic_type == CHIP_POLARIS11)
125 		return false;
126 
127 	return adev->gfx.mec.num_mec > 1;
128 }
129 
130 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
131 {
132 	int i, queue, pipe, mec;
133 	bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
134 
135 	/* policy for amdgpu compute queue ownership */
136 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
137 		queue = i % adev->gfx.mec.num_queue_per_pipe;
138 		pipe = (i / adev->gfx.mec.num_queue_per_pipe)
139 			% adev->gfx.mec.num_pipe_per_mec;
140 		mec = (i / adev->gfx.mec.num_queue_per_pipe)
141 			/ adev->gfx.mec.num_pipe_per_mec;
142 
143 		/* we've run out of HW */
144 		if (mec >= adev->gfx.mec.num_mec)
145 			break;
146 
147 		if (multipipe_policy) {
148 			/* policy: amdgpu owns the first two queues of the first MEC */
149 			if (mec == 0 && queue < 2)
150 				set_bit(i, adev->gfx.mec.queue_bitmap);
151 		} else {
152 			/* policy: amdgpu owns all queues in the first pipe */
153 			if (mec == 0 && pipe == 0)
154 				set_bit(i, adev->gfx.mec.queue_bitmap);
155 		}
156 	}
157 
158 	/* update the number of active compute rings */
159 	adev->gfx.num_compute_rings =
160 		bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
161 
162 	/* If you hit this case and edited the policy, you probably just
163 	 * need to increase AMDGPU_MAX_COMPUTE_RINGS */
164 	if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
165 		adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
166 }
167 
168 static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
169 				  struct amdgpu_ring *ring)
170 {
171 	int queue_bit;
172 	int mec, pipe, queue;
173 
174 	queue_bit = adev->gfx.mec.num_mec
175 		    * adev->gfx.mec.num_pipe_per_mec
176 		    * adev->gfx.mec.num_queue_per_pipe;
177 
178 	while (queue_bit-- >= 0) {
179 		if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
180 			continue;
181 
182 		amdgpu_gfx_bit_to_queue(adev, queue_bit, &mec, &pipe, &queue);
183 
184 		/*
185 		 * 1. Using pipes 2/3 from MEC 2 seems cause problems.
186 		 * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
187 		 * only can be issued on queue 0.
188 		 */
189 		if ((mec == 1 && pipe > 1) || queue != 0)
190 			continue;
191 
192 		ring->me = mec + 1;
193 		ring->pipe = pipe;
194 		ring->queue = queue;
195 
196 		return 0;
197 	}
198 
199 	dev_err(adev->dev, "Failed to find a queue for KIQ\n");
200 	return -EINVAL;
201 }
202 
203 int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
204 			     struct amdgpu_ring *ring,
205 			     struct amdgpu_irq_src *irq)
206 {
207 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
208 	int r = 0;
209 
210 	mtx_init(&kiq->ring_lock, IPL_TTY);
211 
212 	r = amdgpu_device_wb_get(adev, &adev->virt.reg_val_offs);
213 	if (r)
214 		return r;
215 
216 	ring->adev = NULL;
217 	ring->ring_obj = NULL;
218 	ring->use_doorbell = true;
219 	ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
220 
221 	r = amdgpu_gfx_kiq_acquire(adev, ring);
222 	if (r)
223 		return r;
224 
225 	ring->eop_gpu_addr = kiq->eop_gpu_addr;
226 	snprintf(ring->name, sizeof(ring->name), "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue);
227 	r = amdgpu_ring_init(adev, ring, 1024,
228 			     irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
229 	if (r)
230 		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
231 
232 	return r;
233 }
234 
235 void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
236 			      struct amdgpu_irq_src *irq)
237 {
238 	amdgpu_device_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
239 	amdgpu_ring_fini(ring);
240 }
241 
242 void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev)
243 {
244 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
245 
246 	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
247 }
248 
249 int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
250 			unsigned hpd_size)
251 {
252 	int r;
253 	u32 *hpd;
254 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
255 
256 	r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
257 				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
258 				    &kiq->eop_gpu_addr, (void **)&hpd);
259 	if (r) {
260 		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
261 		return r;
262 	}
263 
264 	memset(hpd, 0, hpd_size);
265 
266 	r = amdgpu_bo_reserve(kiq->eop_obj, true);
267 	if (unlikely(r != 0))
268 		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
269 	amdgpu_bo_kunmap(kiq->eop_obj);
270 	amdgpu_bo_unreserve(kiq->eop_obj);
271 
272 	return 0;
273 }
274 
275 /* create MQD for each compute queue */
276 int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
277 				   unsigned mqd_size)
278 {
279 	struct amdgpu_ring *ring = NULL;
280 	int r, i;
281 
282 	/* create MQD for KIQ */
283 	ring = &adev->gfx.kiq.ring;
284 	if (!ring->mqd_obj) {
285 		/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
286 		 * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
287 		 * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
288 		 * KIQ MQD no matter SRIOV or Bare-metal
289 		 */
290 		r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
291 					    AMDGPU_GEM_DOMAIN_VRAM, &ring->mqd_obj,
292 					    &ring->mqd_gpu_addr, &ring->mqd_ptr);
293 		if (r) {
294 			dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
295 			return r;
296 		}
297 
298 		/* prepare MQD backup */
299 		adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL);
300 		if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
301 				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
302 	}
303 
304 	/* create MQD for each KCQ */
305 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
306 		ring = &adev->gfx.compute_ring[i];
307 		if (!ring->mqd_obj) {
308 			r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
309 						    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
310 						    &ring->mqd_gpu_addr, &ring->mqd_ptr);
311 			if (r) {
312 				dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
313 				return r;
314 			}
315 
316 			/* prepare MQD backup */
317 			adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
318 			if (!adev->gfx.mec.mqd_backup[i])
319 				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
320 		}
321 	}
322 
323 	return 0;
324 }
325 
326 void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev)
327 {
328 	struct amdgpu_ring *ring = NULL;
329 	int i;
330 
331 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
332 		ring = &adev->gfx.compute_ring[i];
333 		kfree(adev->gfx.mec.mqd_backup[i]);
334 		amdgpu_bo_free_kernel(&ring->mqd_obj,
335 				      &ring->mqd_gpu_addr,
336 				      &ring->mqd_ptr);
337 	}
338 
339 	ring = &adev->gfx.kiq.ring;
340 	kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
341 	amdgpu_bo_free_kernel(&ring->mqd_obj,
342 			      &ring->mqd_gpu_addr,
343 			      &ring->mqd_ptr);
344 }
345