xref: /netbsd-src/sys/arch/powerpc/oea/altivec_subr.S (revision 16031f7d46f56c21335839c17974dddd9f9800b4)
1*16031f7dSrin/*	$NetBSD: altivec_subr.S,v 1.3 2020/07/06 09:34:17 rin Exp $	*/
2b8ea2c8cSmatt/*-
3b8ea2c8cSmatt * Copyright (c) 2011 The NetBSD Foundation, Inc.
4b8ea2c8cSmatt * All rights reserved.
5b8ea2c8cSmatt *
6b8ea2c8cSmatt * This code is derived from software contributed to The NetBSD Foundation
7b8ea2c8cSmatt * by Matt Thomas of 3am Software Foundry.
8b8ea2c8cSmatt *
9b8ea2c8cSmatt * Redistribution and use in source and binary forms, with or without
10b8ea2c8cSmatt * modification, are permitted provided that the following conditions
11b8ea2c8cSmatt * are met:
12b8ea2c8cSmatt * 1. Redistributions of source code must retain the above copyright
13b8ea2c8cSmatt *    notice, this list of conditions and the following disclaimer.
14b8ea2c8cSmatt * 2. Redistributions in binary form must reproduce the above copyright
15b8ea2c8cSmatt *    notice, this list of conditions and the following disclaimer in the
16b8ea2c8cSmatt *    documentation and/or other materials provided with the distribution.
17b8ea2c8cSmatt *
18b8ea2c8cSmatt * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19b8ea2c8cSmatt * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20b8ea2c8cSmatt * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21b8ea2c8cSmatt * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22b8ea2c8cSmatt * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23b8ea2c8cSmatt * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24b8ea2c8cSmatt * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25b8ea2c8cSmatt * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26b8ea2c8cSmatt * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27b8ea2c8cSmatt * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28b8ea2c8cSmatt * POSSIBILITY OF SUCH DAMAGE.
29b8ea2c8cSmatt */
30b8ea2c8cSmatt
31*16031f7dSrinRCSID("$NetBSD: altivec_subr.S,v 1.3 2020/07/06 09:34:17 rin Exp $")
32b8ea2c8cSmatt
33*16031f7dSrin#ifdef _KERNEL_OPT
34*16031f7dSrin#include "opt_altivec.h"
35*16031f7dSrin#endif
36b8ea2c8cSmatt
37b8ea2c8cSmatt#ifdef ALTIVEC
38b8ea2c8cSmatt/*
39b8ea2c8cSmatt * LINTSTUB: void vec_load_from_vreg(const struct vreg *vreg);
40b8ea2c8cSmatt */
41b8ea2c8cSmattENTRY(vec_load_from_vreg)
42b8ea2c8cSmatt	/*
43b8ea2c8cSmatt	 * Restore VSCR by first loading it into a vector and then into
44b8ea2c8cSmatt	 * VSCR. (this needs to done before loading the user's vector
45b8ea2c8cSmatt	 * registers since we need to use a scratch vector register)
46b8ea2c8cSmatt	 */
47b8ea2c8cSmatt	vxor %v0,%v0,%v0
48b8ea2c8cSmatt	li %r4,VREG_VSCR; lvewx %v0,%r3,%r4
49b8ea2c8cSmatt	mtvscr %v0
50b8ea2c8cSmatt
51b8ea2c8cSmatt	/*
52b8ea2c8cSmatt	 * Now load the vector registers.  We do it this way so that if on
53b8ea2c8cSmatt	 * a superscalar cpu, we can get some concurrency.
54b8ea2c8cSmatt	 */
55b8ea2c8cSmatt	li %r4,VREG_V0; lvx %v0,%r3,%r4
56b8ea2c8cSmatt	li %r5,VREG_V1; lvx %v1,%r3,%r5
57b8ea2c8cSmatt	li %r6,VREG_V2; lvx %v2,%r3,%r6
58b8ea2c8cSmatt	li %r7,VREG_V3; lvx %v3,%r3,%r7
59b8ea2c8cSmatt
60b8ea2c8cSmatt	li %r4,VREG_V4; lvx %v4,%r3,%r4
61b8ea2c8cSmatt	li %r5,VREG_V5; lvx %v5,%r3,%r5
62b8ea2c8cSmatt	li %r6,VREG_V6; lvx %v6,%r3,%r6
63b8ea2c8cSmatt	li %r7,VREG_V7; lvx %v7,%r3,%r7
64b8ea2c8cSmatt
65b8ea2c8cSmatt	li %r4,VREG_V8; lvx %v8,%r3,%r4
66b8ea2c8cSmatt	li %r5,VREG_V9; lvx %v9,%r3,%r5
67b8ea2c8cSmatt	li %r6,VREG_V10; lvx %v10,%r3,%r6
68b8ea2c8cSmatt	li %r7,VREG_V11; lvx %v11,%r3,%r7
69b8ea2c8cSmatt
70b8ea2c8cSmatt	li %r4,VREG_V12; lvx %v12,%r3,%r4
71b8ea2c8cSmatt	li %r5,VREG_V13; lvx %v13,%r3,%r5
72b8ea2c8cSmatt	li %r6,VREG_V14; lvx %v14,%r3,%r6
73b8ea2c8cSmatt	li %r7,VREG_V15; lvx %v15,%r3,%r7
74b8ea2c8cSmatt
75b8ea2c8cSmatt	li %r4,VREG_V16; lvx %v16,%r3,%r4
76b8ea2c8cSmatt	li %r5,VREG_V17; lvx %v17,%r3,%r5
77b8ea2c8cSmatt	li %r6,VREG_V18; lvx %v18,%r3,%r6
78b8ea2c8cSmatt	li %r7,VREG_V19; lvx %v19,%r3,%r7
79b8ea2c8cSmatt
80b8ea2c8cSmatt	li %r4,VREG_V20; lvx %v20,%r3,%r4
81b8ea2c8cSmatt	li %r5,VREG_V21; lvx %v21,%r3,%r5
82b8ea2c8cSmatt	li %r6,VREG_V22; lvx %v22,%r3,%r6
83b8ea2c8cSmatt	li %r7,VREG_V23; lvx %v23,%r3,%r7
84b8ea2c8cSmatt
85b8ea2c8cSmatt	li %r4,VREG_V24; lvx %v24,%r3,%r4
86b8ea2c8cSmatt	li %r5,VREG_V25; lvx %v25,%r3,%r5
87b8ea2c8cSmatt	li %r6,VREG_V26; lvx %v26,%r3,%r6
88b8ea2c8cSmatt	li %r7,VREG_V27; lvx %v27,%r3,%r7
89b8ea2c8cSmatt
90b8ea2c8cSmatt	li %r4,VREG_V28; lvx %v28,%r3,%r4
91b8ea2c8cSmatt	li %r5,VREG_V29; lvx %v29,%r3,%r5
92b8ea2c8cSmatt	li %r6,VREG_V30; lvx %v30,%r3,%r6
93b8ea2c8cSmatt	li %r7,VREG_V31; lvx %v31,%r3,%r7
94b8ea2c8cSmatt
95b8ea2c8cSmatt	isync
96b8ea2c8cSmatt	blr
97b8ea2c8cSmattEND(vec_load_from_vreg)
98b8ea2c8cSmatt
99b8ea2c8cSmatt/*
100b8ea2c8cSmatt * LINTSTUB: void vec_unload_to_vreg(struct vreg *vreg);
101b8ea2c8cSmatt */
102b8ea2c8cSmattENTRY(vec_unload_to_vreg)
103b8ea2c8cSmatt	/*
104b8ea2c8cSmatt	 * Store the vector registers.  We do it this way so that if on
105b8ea2c8cSmatt	 * a superscalar cpu, we can get some concurrency.
106b8ea2c8cSmatt	 */
107b8ea2c8cSmatt	li %r4,VREG_V0; stvx %v0,%r3,%r4
108b8ea2c8cSmatt	li %r5,VREG_V1; stvx %v1,%r3,%r5
109b8ea2c8cSmatt	li %r6,VREG_V2; stvx %v2,%r3,%r6
110b8ea2c8cSmatt	li %r7,VREG_V3; stvx %v3,%r3,%r7
111b8ea2c8cSmatt
112b8ea2c8cSmatt	li %r4,VREG_V4; stvx %v4,%r3,%r4
113b8ea2c8cSmatt	li %r5,VREG_V5; stvx %v5,%r3,%r5
114b8ea2c8cSmatt	li %r6,VREG_V6; stvx %v6,%r3,%r6
115b8ea2c8cSmatt	li %r7,VREG_V7; stvx %v7,%r3,%r7
116b8ea2c8cSmatt
117b8ea2c8cSmatt	li %r4,VREG_V8; stvx %v8,%r3,%r4
118b8ea2c8cSmatt	li %r5,VREG_V9; stvx %v9,%r3,%r5
119b8ea2c8cSmatt	li %r6,VREG_V10; stvx %v10,%r3,%r6
120b8ea2c8cSmatt	li %r7,VREG_V11; stvx %v11,%r3,%r7
121b8ea2c8cSmatt
122b8ea2c8cSmatt	li %r4,VREG_V12; stvx %v12,%r3,%r4
123b8ea2c8cSmatt	li %r5,VREG_V13; stvx %v13,%r3,%r5
124b8ea2c8cSmatt	li %r6,VREG_V14; stvx %v14,%r3,%r6
125b8ea2c8cSmatt	li %r7,VREG_V15; stvx %v15,%r3,%r7
126b8ea2c8cSmatt
127b8ea2c8cSmatt	li %r4,VREG_V16; stvx %v16,%r3,%r4
128b8ea2c8cSmatt	li %r5,VREG_V17; stvx %v17,%r3,%r5
129b8ea2c8cSmatt	li %r6,VREG_V18; stvx %v18,%r3,%r6
130b8ea2c8cSmatt	li %r7,VREG_V19; stvx %v19,%r3,%r7
131b8ea2c8cSmatt
132b8ea2c8cSmatt	li %r4,VREG_V20; stvx %v20,%r3,%r4
133b8ea2c8cSmatt	li %r5,VREG_V21; stvx %v21,%r3,%r5
134b8ea2c8cSmatt	li %r6,VREG_V22; stvx %v22,%r3,%r6
135b8ea2c8cSmatt	li %r7,VREG_V23; stvx %v23,%r3,%r7
136b8ea2c8cSmatt
137b8ea2c8cSmatt	li %r4,VREG_V24; stvx %v24,%r3,%r4
138b8ea2c8cSmatt	li %r5,VREG_V25; stvx %v25,%r3,%r5
139b8ea2c8cSmatt	li %r6,VREG_V26; stvx %v26,%r3,%r6
140b8ea2c8cSmatt	li %r7,VREG_V27; stvx %v27,%r3,%r7
141b8ea2c8cSmatt
142b8ea2c8cSmatt	li %r4,VREG_V28; stvx %v28,%r3,%r4
143b8ea2c8cSmatt	li %r5,VREG_V29; stvx %v29,%r3,%r5
144b8ea2c8cSmatt	li %r6,VREG_V30; stvx %v30,%r3,%r6
145b8ea2c8cSmatt	li %r7,VREG_V31; stvx %v31,%r3,%r7
146b8ea2c8cSmatt
147b8ea2c8cSmatt	/*
148b8ea2c8cSmatt	 * Save VSCR but remember to restore the vector that used to save it.
149b8ea2c8cSmatt	 */
150b8ea2c8cSmatt	mfvscr %v31
151b8ea2c8cSmatt	li %r4,VREG_VSCR; stvewx %v31,%r3,%r4	/* low word only */
152b8ea2c8cSmatt
153b8ea2c8cSmatt	lvx %v31,%r3,%r7	/* restore v31 */
154b8ea2c8cSmatt
155b8ea2c8cSmatt	isync
156b8ea2c8cSmatt	blr
157b8ea2c8cSmattEND(vec_load_from_vreg)
158b8ea2c8cSmatt#endif /* ALTIVEC */
159