1*1e12ee3bSFrançois Tigeot /*
2*1e12ee3bSFrançois Tigeot * Copyright © 2016 Intel Corporation
3*1e12ee3bSFrançois Tigeot *
4*1e12ee3bSFrançois Tigeot * Permission is hereby granted, free of charge, to any person obtaining a
5*1e12ee3bSFrançois Tigeot * copy of this software and associated documentation files (the "Software"),
6*1e12ee3bSFrançois Tigeot * to deal in the Software without restriction, including without limitation
7*1e12ee3bSFrançois Tigeot * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*1e12ee3bSFrançois Tigeot * and/or sell copies of the Software, and to permit persons to whom the
9*1e12ee3bSFrançois Tigeot * Software is furnished to do so, subject to the following conditions:
10*1e12ee3bSFrançois Tigeot *
11*1e12ee3bSFrançois Tigeot * The above copyright notice and this permission notice (including the next
12*1e12ee3bSFrançois Tigeot * paragraph) shall be included in all copies or substantial portions of the
13*1e12ee3bSFrançois Tigeot * Software.
14*1e12ee3bSFrançois Tigeot *
15*1e12ee3bSFrançois Tigeot * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16*1e12ee3bSFrançois Tigeot * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17*1e12ee3bSFrançois Tigeot * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18*1e12ee3bSFrançois Tigeot * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19*1e12ee3bSFrançois Tigeot * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20*1e12ee3bSFrançois Tigeot * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21*1e12ee3bSFrançois Tigeot * IN THE SOFTWARE.
22*1e12ee3bSFrançois Tigeot *
23*1e12ee3bSFrançois Tigeot */
24*1e12ee3bSFrançois Tigeot
25*1e12ee3bSFrançois Tigeot #include <linux/kernel.h>
26*1e12ee3bSFrançois Tigeot #include <asm/fpu/api.h>
27*1e12ee3bSFrançois Tigeot
28*1e12ee3bSFrançois Tigeot #include "i915_drv.h"
29*1e12ee3bSFrançois Tigeot
30*1e12ee3bSFrançois Tigeot static DEFINE_STATIC_KEY_FALSE(has_movntdqa);
31*1e12ee3bSFrançois Tigeot
32*1e12ee3bSFrançois Tigeot #ifdef CONFIG_AS_MOVNTDQA
__memcpy_ntdqa(void * dst,const void * src,unsigned long len)33*1e12ee3bSFrançois Tigeot static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len)
34*1e12ee3bSFrançois Tigeot {
35*1e12ee3bSFrançois Tigeot kernel_fpu_begin();
36*1e12ee3bSFrançois Tigeot
37*1e12ee3bSFrançois Tigeot len >>= 4;
38*1e12ee3bSFrançois Tigeot while (len >= 4) {
39*1e12ee3bSFrançois Tigeot asm("movntdqa (%0), %%xmm0\n"
40*1e12ee3bSFrançois Tigeot "movntdqa 16(%0), %%xmm1\n"
41*1e12ee3bSFrançois Tigeot "movntdqa 32(%0), %%xmm2\n"
42*1e12ee3bSFrançois Tigeot "movntdqa 48(%0), %%xmm3\n"
43*1e12ee3bSFrançois Tigeot "movaps %%xmm0, (%1)\n"
44*1e12ee3bSFrançois Tigeot "movaps %%xmm1, 16(%1)\n"
45*1e12ee3bSFrançois Tigeot "movaps %%xmm2, 32(%1)\n"
46*1e12ee3bSFrançois Tigeot "movaps %%xmm3, 48(%1)\n"
47*1e12ee3bSFrançois Tigeot :: "r" (src), "r" (dst) : "memory");
48*1e12ee3bSFrançois Tigeot src += 64;
49*1e12ee3bSFrançois Tigeot dst += 64;
50*1e12ee3bSFrançois Tigeot len -= 4;
51*1e12ee3bSFrançois Tigeot }
52*1e12ee3bSFrançois Tigeot while (len--) {
53*1e12ee3bSFrançois Tigeot asm("movntdqa (%0), %%xmm0\n"
54*1e12ee3bSFrançois Tigeot "movaps %%xmm0, (%1)\n"
55*1e12ee3bSFrançois Tigeot :: "r" (src), "r" (dst) : "memory");
56*1e12ee3bSFrançois Tigeot src += 16;
57*1e12ee3bSFrançois Tigeot dst += 16;
58*1e12ee3bSFrançois Tigeot }
59*1e12ee3bSFrançois Tigeot
60*1e12ee3bSFrançois Tigeot kernel_fpu_end();
61*1e12ee3bSFrançois Tigeot }
62*1e12ee3bSFrançois Tigeot #endif
63*1e12ee3bSFrançois Tigeot
64*1e12ee3bSFrançois Tigeot /**
65*1e12ee3bSFrançois Tigeot * i915_memcpy_from_wc: perform an accelerated *aligned* read from WC
66*1e12ee3bSFrançois Tigeot * @dst: destination pointer
67*1e12ee3bSFrançois Tigeot * @src: source pointer
68*1e12ee3bSFrançois Tigeot * @len: how many bytes to copy
69*1e12ee3bSFrançois Tigeot *
70*1e12ee3bSFrançois Tigeot * i915_memcpy_from_wc copies @len bytes from @src to @dst using
71*1e12ee3bSFrançois Tigeot * non-temporal instructions where available. Note that all arguments
72*1e12ee3bSFrançois Tigeot * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple
73*1e12ee3bSFrançois Tigeot * of 16.
74*1e12ee3bSFrançois Tigeot *
75*1e12ee3bSFrançois Tigeot * To test whether accelerated reads from WC are supported, use
76*1e12ee3bSFrançois Tigeot * i915_memcpy_from_wc(NULL, NULL, 0);
77*1e12ee3bSFrançois Tigeot *
78*1e12ee3bSFrançois Tigeot * Returns true if the copy was successful, false if the preconditions
79*1e12ee3bSFrançois Tigeot * are not met.
80*1e12ee3bSFrançois Tigeot */
i915_memcpy_from_wc(void * dst,const void * src,unsigned long len)81*1e12ee3bSFrançois Tigeot bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
82*1e12ee3bSFrançois Tigeot {
83*1e12ee3bSFrançois Tigeot if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15))
84*1e12ee3bSFrançois Tigeot return false;
85*1e12ee3bSFrançois Tigeot
86*1e12ee3bSFrançois Tigeot #ifdef CONFIG_AS_MOVNTDQA
87*1e12ee3bSFrançois Tigeot if (static_branch_likely(&has_movntdqa)) {
88*1e12ee3bSFrançois Tigeot if (likely(len))
89*1e12ee3bSFrançois Tigeot __memcpy_ntdqa(dst, src, len);
90*1e12ee3bSFrançois Tigeot return true;
91*1e12ee3bSFrançois Tigeot }
92*1e12ee3bSFrançois Tigeot #endif
93*1e12ee3bSFrançois Tigeot
94*1e12ee3bSFrançois Tigeot return false;
95*1e12ee3bSFrançois Tigeot }
96*1e12ee3bSFrançois Tigeot
i915_memcpy_init_early(struct drm_i915_private * dev_priv)97*1e12ee3bSFrançois Tigeot void i915_memcpy_init_early(struct drm_i915_private *dev_priv)
98*1e12ee3bSFrançois Tigeot {
99*1e12ee3bSFrançois Tigeot if (static_cpu_has(X86_FEATURE_XMM4_1))
100*1e12ee3bSFrançois Tigeot static_branch_enable(&has_movntdqa);
101*1e12ee3bSFrançois Tigeot }
102