1*16031f7dSrin/* $NetBSD: altivec_subr.S,v 1.3 2020/07/06 09:34:17 rin Exp $ */ 2b8ea2c8cSmatt/*- 3b8ea2c8cSmatt * Copyright (c) 2011 The NetBSD Foundation, Inc. 4b8ea2c8cSmatt * All rights reserved. 5b8ea2c8cSmatt * 6b8ea2c8cSmatt * This code is derived from software contributed to The NetBSD Foundation 7b8ea2c8cSmatt * by Matt Thomas of 3am Software Foundry. 8b8ea2c8cSmatt * 9b8ea2c8cSmatt * Redistribution and use in source and binary forms, with or without 10b8ea2c8cSmatt * modification, are permitted provided that the following conditions 11b8ea2c8cSmatt * are met: 12b8ea2c8cSmatt * 1. Redistributions of source code must retain the above copyright 13b8ea2c8cSmatt * notice, this list of conditions and the following disclaimer. 14b8ea2c8cSmatt * 2. Redistributions in binary form must reproduce the above copyright 15b8ea2c8cSmatt * notice, this list of conditions and the following disclaimer in the 16b8ea2c8cSmatt * documentation and/or other materials provided with the distribution. 17b8ea2c8cSmatt * 18b8ea2c8cSmatt * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19b8ea2c8cSmatt * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20b8ea2c8cSmatt * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21b8ea2c8cSmatt * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22b8ea2c8cSmatt * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23b8ea2c8cSmatt * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24b8ea2c8cSmatt * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25b8ea2c8cSmatt * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26b8ea2c8cSmatt * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27b8ea2c8cSmatt * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28b8ea2c8cSmatt * POSSIBILITY OF SUCH DAMAGE. 29b8ea2c8cSmatt */ 30b8ea2c8cSmatt 31*16031f7dSrinRCSID("$NetBSD: altivec_subr.S,v 1.3 2020/07/06 09:34:17 rin Exp $") 32b8ea2c8cSmatt 33*16031f7dSrin#ifdef _KERNEL_OPT 34*16031f7dSrin#include "opt_altivec.h" 35*16031f7dSrin#endif 36b8ea2c8cSmatt 37b8ea2c8cSmatt#ifdef ALTIVEC 38b8ea2c8cSmatt/* 39b8ea2c8cSmatt * LINTSTUB: void vec_load_from_vreg(const struct vreg *vreg); 40b8ea2c8cSmatt */ 41b8ea2c8cSmattENTRY(vec_load_from_vreg) 42b8ea2c8cSmatt /* 43b8ea2c8cSmatt * Restore VSCR by first loading it into a vector and then into 44b8ea2c8cSmatt * VSCR. (this needs to done before loading the user's vector 45b8ea2c8cSmatt * registers since we need to use a scratch vector register) 46b8ea2c8cSmatt */ 47b8ea2c8cSmatt vxor %v0,%v0,%v0 48b8ea2c8cSmatt li %r4,VREG_VSCR; lvewx %v0,%r3,%r4 49b8ea2c8cSmatt mtvscr %v0 50b8ea2c8cSmatt 51b8ea2c8cSmatt /* 52b8ea2c8cSmatt * Now load the vector registers. We do it this way so that if on 53b8ea2c8cSmatt * a superscalar cpu, we can get some concurrency. 54b8ea2c8cSmatt */ 55b8ea2c8cSmatt li %r4,VREG_V0; lvx %v0,%r3,%r4 56b8ea2c8cSmatt li %r5,VREG_V1; lvx %v1,%r3,%r5 57b8ea2c8cSmatt li %r6,VREG_V2; lvx %v2,%r3,%r6 58b8ea2c8cSmatt li %r7,VREG_V3; lvx %v3,%r3,%r7 59b8ea2c8cSmatt 60b8ea2c8cSmatt li %r4,VREG_V4; lvx %v4,%r3,%r4 61b8ea2c8cSmatt li %r5,VREG_V5; lvx %v5,%r3,%r5 62b8ea2c8cSmatt li %r6,VREG_V6; lvx %v6,%r3,%r6 63b8ea2c8cSmatt li %r7,VREG_V7; lvx %v7,%r3,%r7 64b8ea2c8cSmatt 65b8ea2c8cSmatt li %r4,VREG_V8; lvx %v8,%r3,%r4 66b8ea2c8cSmatt li %r5,VREG_V9; lvx %v9,%r3,%r5 67b8ea2c8cSmatt li %r6,VREG_V10; lvx %v10,%r3,%r6 68b8ea2c8cSmatt li %r7,VREG_V11; lvx %v11,%r3,%r7 69b8ea2c8cSmatt 70b8ea2c8cSmatt li %r4,VREG_V12; lvx %v12,%r3,%r4 71b8ea2c8cSmatt li %r5,VREG_V13; lvx %v13,%r3,%r5 72b8ea2c8cSmatt li %r6,VREG_V14; lvx %v14,%r3,%r6 73b8ea2c8cSmatt li %r7,VREG_V15; lvx %v15,%r3,%r7 74b8ea2c8cSmatt 75b8ea2c8cSmatt li %r4,VREG_V16; lvx %v16,%r3,%r4 76b8ea2c8cSmatt li %r5,VREG_V17; lvx %v17,%r3,%r5 77b8ea2c8cSmatt li %r6,VREG_V18; lvx %v18,%r3,%r6 78b8ea2c8cSmatt li %r7,VREG_V19; lvx %v19,%r3,%r7 79b8ea2c8cSmatt 80b8ea2c8cSmatt li %r4,VREG_V20; lvx %v20,%r3,%r4 81b8ea2c8cSmatt li %r5,VREG_V21; lvx %v21,%r3,%r5 82b8ea2c8cSmatt li %r6,VREG_V22; lvx %v22,%r3,%r6 83b8ea2c8cSmatt li %r7,VREG_V23; lvx %v23,%r3,%r7 84b8ea2c8cSmatt 85b8ea2c8cSmatt li %r4,VREG_V24; lvx %v24,%r3,%r4 86b8ea2c8cSmatt li %r5,VREG_V25; lvx %v25,%r3,%r5 87b8ea2c8cSmatt li %r6,VREG_V26; lvx %v26,%r3,%r6 88b8ea2c8cSmatt li %r7,VREG_V27; lvx %v27,%r3,%r7 89b8ea2c8cSmatt 90b8ea2c8cSmatt li %r4,VREG_V28; lvx %v28,%r3,%r4 91b8ea2c8cSmatt li %r5,VREG_V29; lvx %v29,%r3,%r5 92b8ea2c8cSmatt li %r6,VREG_V30; lvx %v30,%r3,%r6 93b8ea2c8cSmatt li %r7,VREG_V31; lvx %v31,%r3,%r7 94b8ea2c8cSmatt 95b8ea2c8cSmatt isync 96b8ea2c8cSmatt blr 97b8ea2c8cSmattEND(vec_load_from_vreg) 98b8ea2c8cSmatt 99b8ea2c8cSmatt/* 100b8ea2c8cSmatt * LINTSTUB: void vec_unload_to_vreg(struct vreg *vreg); 101b8ea2c8cSmatt */ 102b8ea2c8cSmattENTRY(vec_unload_to_vreg) 103b8ea2c8cSmatt /* 104b8ea2c8cSmatt * Store the vector registers. We do it this way so that if on 105b8ea2c8cSmatt * a superscalar cpu, we can get some concurrency. 106b8ea2c8cSmatt */ 107b8ea2c8cSmatt li %r4,VREG_V0; stvx %v0,%r3,%r4 108b8ea2c8cSmatt li %r5,VREG_V1; stvx %v1,%r3,%r5 109b8ea2c8cSmatt li %r6,VREG_V2; stvx %v2,%r3,%r6 110b8ea2c8cSmatt li %r7,VREG_V3; stvx %v3,%r3,%r7 111b8ea2c8cSmatt 112b8ea2c8cSmatt li %r4,VREG_V4; stvx %v4,%r3,%r4 113b8ea2c8cSmatt li %r5,VREG_V5; stvx %v5,%r3,%r5 114b8ea2c8cSmatt li %r6,VREG_V6; stvx %v6,%r3,%r6 115b8ea2c8cSmatt li %r7,VREG_V7; stvx %v7,%r3,%r7 116b8ea2c8cSmatt 117b8ea2c8cSmatt li %r4,VREG_V8; stvx %v8,%r3,%r4 118b8ea2c8cSmatt li %r5,VREG_V9; stvx %v9,%r3,%r5 119b8ea2c8cSmatt li %r6,VREG_V10; stvx %v10,%r3,%r6 120b8ea2c8cSmatt li %r7,VREG_V11; stvx %v11,%r3,%r7 121b8ea2c8cSmatt 122b8ea2c8cSmatt li %r4,VREG_V12; stvx %v12,%r3,%r4 123b8ea2c8cSmatt li %r5,VREG_V13; stvx %v13,%r3,%r5 124b8ea2c8cSmatt li %r6,VREG_V14; stvx %v14,%r3,%r6 125b8ea2c8cSmatt li %r7,VREG_V15; stvx %v15,%r3,%r7 126b8ea2c8cSmatt 127b8ea2c8cSmatt li %r4,VREG_V16; stvx %v16,%r3,%r4 128b8ea2c8cSmatt li %r5,VREG_V17; stvx %v17,%r3,%r5 129b8ea2c8cSmatt li %r6,VREG_V18; stvx %v18,%r3,%r6 130b8ea2c8cSmatt li %r7,VREG_V19; stvx %v19,%r3,%r7 131b8ea2c8cSmatt 132b8ea2c8cSmatt li %r4,VREG_V20; stvx %v20,%r3,%r4 133b8ea2c8cSmatt li %r5,VREG_V21; stvx %v21,%r3,%r5 134b8ea2c8cSmatt li %r6,VREG_V22; stvx %v22,%r3,%r6 135b8ea2c8cSmatt li %r7,VREG_V23; stvx %v23,%r3,%r7 136b8ea2c8cSmatt 137b8ea2c8cSmatt li %r4,VREG_V24; stvx %v24,%r3,%r4 138b8ea2c8cSmatt li %r5,VREG_V25; stvx %v25,%r3,%r5 139b8ea2c8cSmatt li %r6,VREG_V26; stvx %v26,%r3,%r6 140b8ea2c8cSmatt li %r7,VREG_V27; stvx %v27,%r3,%r7 141b8ea2c8cSmatt 142b8ea2c8cSmatt li %r4,VREG_V28; stvx %v28,%r3,%r4 143b8ea2c8cSmatt li %r5,VREG_V29; stvx %v29,%r3,%r5 144b8ea2c8cSmatt li %r6,VREG_V30; stvx %v30,%r3,%r6 145b8ea2c8cSmatt li %r7,VREG_V31; stvx %v31,%r3,%r7 146b8ea2c8cSmatt 147b8ea2c8cSmatt /* 148b8ea2c8cSmatt * Save VSCR but remember to restore the vector that used to save it. 149b8ea2c8cSmatt */ 150b8ea2c8cSmatt mfvscr %v31 151b8ea2c8cSmatt li %r4,VREG_VSCR; stvewx %v31,%r3,%r4 /* low word only */ 152b8ea2c8cSmatt 153b8ea2c8cSmatt lvx %v31,%r3,%r7 /* restore v31 */ 154b8ea2c8cSmatt 155b8ea2c8cSmatt isync 156b8ea2c8cSmatt blr 157b8ea2c8cSmattEND(vec_load_from_vreg) 158b8ea2c8cSmatt#endif /* ALTIVEC */ 159