191b08277SJerry Yu /**********************************************************************
291b08277SJerry Yu Copyright(c) 2019 Arm Corporation All rights reserved.
391b08277SJerry Yu
491b08277SJerry Yu Redistribution and use in source and binary forms, with or without
591b08277SJerry Yu modification, are permitted provided that the following conditions
691b08277SJerry Yu are met:
791b08277SJerry Yu * Redistributions of source code must retain the above copyright
891b08277SJerry Yu notice, this list of conditions and the following disclaimer.
991b08277SJerry Yu * Redistributions in binary form must reproduce the above copyright
1091b08277SJerry Yu notice, this list of conditions and the following disclaimer in
1191b08277SJerry Yu the documentation and/or other materials provided with the
1291b08277SJerry Yu distribution.
1391b08277SJerry Yu * Neither the name of Arm Corporation nor the names of its
1491b08277SJerry Yu contributors may be used to endorse or promote products derived
1591b08277SJerry Yu from this software without specific prior written permission.
1691b08277SJerry Yu
1791b08277SJerry Yu THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1891b08277SJerry Yu "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1991b08277SJerry Yu LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2091b08277SJerry Yu A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2191b08277SJerry Yu OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2291b08277SJerry Yu SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
2391b08277SJerry Yu LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2491b08277SJerry Yu DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2591b08277SJerry Yu THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2691b08277SJerry Yu (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2791b08277SJerry Yu OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2891b08277SJerry Yu **********************************************************************/
2991b08277SJerry Yu #ifndef __AARCH64_MULTIBINARY_H__
3091b08277SJerry Yu #define __AARCH64_MULTIBINARY_H__
3191b08277SJerry Yu #ifndef __aarch64__
3291b08277SJerry Yu #error "This file is for aarch64 only"
3391b08277SJerry Yu #endif
3491b08277SJerry Yu #include <asm/hwcap.h>
3591b08277SJerry Yu #ifdef __ASSEMBLY__
3691b08277SJerry Yu /**
3791b08277SJerry Yu * # mbin_interface : the wrapper layer for isal-l api
3891b08277SJerry Yu *
3991b08277SJerry Yu * ## references:
4091b08277SJerry Yu * * https://sourceware.org/git/gitweb.cgi?p=glibc.git;a=blob;f=sysdeps/aarch64/dl-trampoline.S
4191b08277SJerry Yu * * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf
4291b08277SJerry Yu * * https://static.docs.arm.com/ihi0057/b/IHI0057B_aadwarf64.pdf?_ga=2.80574487.1870739014.1564969896-1634778941.1548729310
4391b08277SJerry Yu *
4491b08277SJerry Yu * ## Usage:
4591b08277SJerry Yu * 1. Define dispather function
4691b08277SJerry Yu * 2. name must be \name\()_dispatcher
4791b08277SJerry Yu * 3. Prototype should be *"void * \name\()_dispatcher"*
4891b08277SJerry Yu * 4. The dispather should return the right function pointer , revision and a string information .
4991b08277SJerry Yu **/
5091b08277SJerry Yu .macro mbin_interface name:req
5191b08277SJerry Yu .extern \name\()_dispatcher
5291b08277SJerry Yu .section .data
5391b08277SJerry Yu .balign 8
5491b08277SJerry Yu .global \name\()_dispatcher_info
5591b08277SJerry Yu .type \name\()_dispatcher_info,%object
5691b08277SJerry Yu
5791b08277SJerry Yu \name\()_dispatcher_info:
5891b08277SJerry Yu .quad \name\()_mbinit //func_entry
5991b08277SJerry Yu
6091b08277SJerry Yu .size \name\()_dispatcher_info,. - \name\()_dispatcher_info
6191b08277SJerry Yu
6291b08277SJerry Yu .balign 8
6391b08277SJerry Yu .text
6491b08277SJerry Yu \name\()_mbinit:
6591b08277SJerry Yu //save lp fp, sub sp
6691b08277SJerry Yu .cfi_startproc
6791b08277SJerry Yu stp x29, x30, [sp, -224]!
6891b08277SJerry Yu
6991b08277SJerry Yu //add cfi directive to avoid GDB bt cmds error
7091b08277SJerry Yu //set cfi(Call Frame Information)
7191b08277SJerry Yu .cfi_def_cfa_offset 224
7291b08277SJerry Yu .cfi_offset 29, -224
7391b08277SJerry Yu .cfi_offset 30, -216
7491b08277SJerry Yu
7591b08277SJerry Yu //save parameter/result/indirect result registers
7691b08277SJerry Yu stp x8, x9, [sp, 16]
7791b08277SJerry Yu .cfi_offset 8, -208
7891b08277SJerry Yu .cfi_offset 9, -200
7991b08277SJerry Yu stp x0, x1, [sp, 32]
8091b08277SJerry Yu .cfi_offset 0, -192
8191b08277SJerry Yu .cfi_offset 1, -184
8291b08277SJerry Yu stp x2, x3, [sp, 48]
8391b08277SJerry Yu .cfi_offset 2, -176
8491b08277SJerry Yu .cfi_offset 3, -168
8591b08277SJerry Yu stp x4, x5, [sp, 64]
8691b08277SJerry Yu .cfi_offset 4, -160
8791b08277SJerry Yu .cfi_offset 5, -152
8891b08277SJerry Yu stp x6, x7, [sp, 80]
8991b08277SJerry Yu .cfi_offset 6, -144
9091b08277SJerry Yu .cfi_offset 7, -136
9191b08277SJerry Yu stp q0, q1, [sp, 96]
9291b08277SJerry Yu .cfi_offset 64, -128
9391b08277SJerry Yu .cfi_offset 65, -112
9491b08277SJerry Yu stp q2, q3, [sp, 128]
9591b08277SJerry Yu .cfi_offset 66, -96
9691b08277SJerry Yu .cfi_offset 67, -80
9791b08277SJerry Yu stp q4, q5, [sp, 160]
9891b08277SJerry Yu .cfi_offset 68, -64
9991b08277SJerry Yu .cfi_offset 69, -48
10091b08277SJerry Yu stp q6, q7, [sp, 192]
10191b08277SJerry Yu .cfi_offset 70, -32
10291b08277SJerry Yu .cfi_offset 71, -16
10391b08277SJerry Yu
10491b08277SJerry Yu /**
10591b08277SJerry Yu * The dispatcher functions have the following prototype:
10691b08277SJerry Yu * void * function_dispatcher(void)
10791b08277SJerry Yu * As the dispatcher is returning a struct, by the AAPCS,
10891b08277SJerry Yu */
10991b08277SJerry Yu
11091b08277SJerry Yu
11191b08277SJerry Yu bl \name\()_dispatcher
11291b08277SJerry Yu //restore temp/indirect result registers
11391b08277SJerry Yu ldp x8, x9, [sp, 16]
11491b08277SJerry Yu .cfi_restore 8
11591b08277SJerry Yu .cfi_restore 9
11691b08277SJerry Yu
11791b08277SJerry Yu // save function entry
11891b08277SJerry Yu str x0, [x9]
11991b08277SJerry Yu
12091b08277SJerry Yu //restore parameter/result registers
12191b08277SJerry Yu ldp x0, x1, [sp, 32]
12291b08277SJerry Yu .cfi_restore 0
12391b08277SJerry Yu .cfi_restore 1
12491b08277SJerry Yu ldp x2, x3, [sp, 48]
12591b08277SJerry Yu .cfi_restore 2
12691b08277SJerry Yu .cfi_restore 3
12791b08277SJerry Yu ldp x4, x5, [sp, 64]
12891b08277SJerry Yu .cfi_restore 4
12991b08277SJerry Yu .cfi_restore 5
13091b08277SJerry Yu ldp x6, x7, [sp, 80]
13191b08277SJerry Yu .cfi_restore 6
13291b08277SJerry Yu .cfi_restore 7
13391b08277SJerry Yu ldp q0, q1, [sp, 96]
13491b08277SJerry Yu .cfi_restore 64
13591b08277SJerry Yu .cfi_restore 65
13691b08277SJerry Yu ldp q2, q3, [sp, 128]
13791b08277SJerry Yu .cfi_restore 66
13891b08277SJerry Yu .cfi_restore 67
13991b08277SJerry Yu ldp q4, q5, [sp, 160]
14091b08277SJerry Yu .cfi_restore 68
14191b08277SJerry Yu .cfi_restore 69
14291b08277SJerry Yu ldp q6, q7, [sp, 192]
14391b08277SJerry Yu .cfi_restore 70
14491b08277SJerry Yu .cfi_restore 71
14591b08277SJerry Yu //save lp fp and sp
14691b08277SJerry Yu ldp x29, x30, [sp], 224
14791b08277SJerry Yu //restore cfi setting
14891b08277SJerry Yu .cfi_restore 30
14991b08277SJerry Yu .cfi_restore 29
15091b08277SJerry Yu .cfi_def_cfa_offset 0
15191b08277SJerry Yu .cfi_endproc
15291b08277SJerry Yu
15391b08277SJerry Yu .global \name
15491b08277SJerry Yu .type \name,%function
15591b08277SJerry Yu .align 2
15691b08277SJerry Yu \name\():
15791b08277SJerry Yu adrp x9, :got:\name\()_dispatcher_info
15891b08277SJerry Yu ldr x9, [x9, #:got_lo12:\name\()_dispatcher_info]
15991b08277SJerry Yu ldr x10,[x9]
16091b08277SJerry Yu br x10
16191b08277SJerry Yu .size \name,. - \name
16291b08277SJerry Yu
16391b08277SJerry Yu .endm
16491b08277SJerry Yu
16591b08277SJerry Yu /**
16691b08277SJerry Yu * mbin_interface_base is used for the interfaces which have only
16791b08277SJerry Yu * noarch implementation
16891b08277SJerry Yu */
16991b08277SJerry Yu .macro mbin_interface_base name:req, base:req
17091b08277SJerry Yu .extern \base
17191b08277SJerry Yu .section .data
17291b08277SJerry Yu .balign 8
17391b08277SJerry Yu .global \name\()_dispatcher_info
17491b08277SJerry Yu .type \name\()_dispatcher_info,%object
17591b08277SJerry Yu
17691b08277SJerry Yu \name\()_dispatcher_info:
17791b08277SJerry Yu .quad \base //func_entry
17891b08277SJerry Yu .size \name\()_dispatcher_info,. - \name\()_dispatcher_info
17991b08277SJerry Yu
18091b08277SJerry Yu .balign 8
18191b08277SJerry Yu .text
18291b08277SJerry Yu .global \name
18391b08277SJerry Yu .type \name,%function
18491b08277SJerry Yu .align 2
18591b08277SJerry Yu \name\():
18691b08277SJerry Yu adrp x9, :got:\name\()_dispatcher_info
18791b08277SJerry Yu ldr x9, [x9, #:got_lo12:\name\()_dispatcher_info]
18891b08277SJerry Yu ldr x10,[x9]
18991b08277SJerry Yu br x10
19091b08277SJerry Yu .size \name,. - \name
19191b08277SJerry Yu
19291b08277SJerry Yu .endm
19391b08277SJerry Yu
19491b08277SJerry Yu #else /* __ASSEMBLY__ */
19591b08277SJerry Yu #include <sys/auxv.h>
19691b08277SJerry Yu
19791b08277SJerry Yu
19891b08277SJerry Yu
19991b08277SJerry Yu #define DEFINE_INTERFACE_DISPATCHER(name) \
20091b08277SJerry Yu void * name##_dispatcher(void)
20191b08277SJerry Yu
20291b08277SJerry Yu #define PROVIDER_BASIC(name) \
20391b08277SJerry Yu PROVIDER_INFO(name##_base)
20491b08277SJerry Yu
20591b08277SJerry Yu #define DO_DIGNOSTIC(x) _Pragma GCC diagnostic ignored "-W"#x
20691b08277SJerry Yu #define DO_PRAGMA(x) _Pragma (#x)
20791b08277SJerry Yu #define DIGNOSTIC_IGNORE(x) DO_PRAGMA(GCC diagnostic ignored #x)
20891b08277SJerry Yu #define DIGNOSTIC_PUSH() DO_PRAGMA(GCC diagnostic push)
20991b08277SJerry Yu #define DIGNOSTIC_POP() DO_PRAGMA(GCC diagnostic pop)
21091b08277SJerry Yu
21191b08277SJerry Yu
21291b08277SJerry Yu #define PROVIDER_INFO(_func_entry) \
21391b08277SJerry Yu ({ DIGNOSTIC_PUSH() \
21491b08277SJerry Yu DIGNOSTIC_IGNORE(-Wnested-externs) \
215bb8c7f58SJerry Yu extern void _func_entry(void); \
21691b08277SJerry Yu DIGNOSTIC_POP() \
21791b08277SJerry Yu _func_entry; \
21891b08277SJerry Yu })
21991b08277SJerry Yu
220*c96e767eSJerry Yu /**
221*c96e767eSJerry Yu * Micro-Architector definitions
222*c96e767eSJerry Yu * Reference: https://developer.arm.com/docs/ddi0595/f/aarch64-system-registers/midr_el1
223*c96e767eSJerry Yu */
224*c96e767eSJerry Yu
225*c96e767eSJerry Yu #define CPU_IMPLEMENTER_RESERVE 0x00
226*c96e767eSJerry Yu #define CPU_IMPLEMENTER_ARM 0x41
227*c96e767eSJerry Yu
228*c96e767eSJerry Yu
229*c96e767eSJerry Yu #define CPU_PART_CORTEX_A57 0xD07
230*c96e767eSJerry Yu #define CPU_PART_CORTEX_A72 0xD08
231*c96e767eSJerry Yu #define CPU_PART_NEOVERSE_N1 0xD0C
232*c96e767eSJerry Yu
233*c96e767eSJerry Yu #define MICRO_ARCH_ID(imp,part) \
234*c96e767eSJerry Yu (((CPU_IMPLEMENTER_##imp&0xff)<<24)|((CPU_PART_##part&0xfff)<<4))
235*c96e767eSJerry Yu
236*c96e767eSJerry Yu #ifndef HWCAP_CPUID
237*c96e767eSJerry Yu #define HWCAP_CPUID (1<<11)
238*c96e767eSJerry Yu #endif
239*c96e767eSJerry Yu
240*c96e767eSJerry Yu /**
241*c96e767eSJerry Yu * @brief get_micro_arch_id
242*c96e767eSJerry Yu * read micro-architector register instruction if possible.This function
243*c96e767eSJerry Yu * provides microarchitecture information and make microarchitecture optimization
244*c96e767eSJerry Yu * possible. It will trap into kernel due to mrs instruction. So it should
245*c96e767eSJerry Yu * be called only in dispatcher, that will be called only once in program
246*c96e767eSJerry Yu * lifecycle. And HWCAP must be match,That will make sure there are no
247*c96e767eSJerry Yu * illegal instruction errors.
248*c96e767eSJerry Yu *
249*c96e767eSJerry Yu * NOTICE:
250*c96e767eSJerry Yu * - HWCAP_CPUID should be available. Otherwise it returns zero
251*c96e767eSJerry Yu * - It MUST be called inside dispather.
252*c96e767eSJerry Yu * - It MUST meet the HWCAP requirements
253*c96e767eSJerry Yu *
254*c96e767eSJerry Yu * Example:
255*c96e767eSJerry Yu * DEFINE_INTERFACE_DISPATCHER(crc32_iscsi)
256*c96e767eSJerry Yu * {
257*c96e767eSJerry Yu * unsigned long auxval = getauxval(AT_HWCAP);
258*c96e767eSJerry Yu * // MUST do the judgement is MUST.
259*c96e767eSJerry Yu * if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) {
260*c96e767eSJerry Yu * switch (get_micro_arch_id()) {
261*c96e767eSJerry Yu * case MICRO_ARCH_ID(ARM, CORTEX_A57):
262*c96e767eSJerry Yu * return PROVIDER_INFO(crc32_pmull_crc_for_a57);
263*c96e767eSJerry Yu * case MICRO_ARCH_ID(ARM, CORTEX_A72):
264*c96e767eSJerry Yu * return PROVIDER_INFO(crc32_pmull_crc_for_a72);
265*c96e767eSJerry Yu * case MICRO_ARCH_ID(ARM, NEOVERSE_N1):
266*c96e767eSJerry Yu * return PROVIDER_INFO(crc32_pmull_crc_for_n1);
267*c96e767eSJerry Yu * case default:
268*c96e767eSJerry Yu * return PROVIDER_INFO(crc32_pmull_crc_for_others);
269*c96e767eSJerry Yu * }
270*c96e767eSJerry Yu * }
271*c96e767eSJerry Yu * return PROVIDER_BASIC(crc32_iscsi);
272*c96e767eSJerry Yu * }
273*c96e767eSJerry Yu * KNOWN ISSUE:
274*c96e767eSJerry Yu * On a heterogeneous system (big.LITTLE), it will work but the performance
275*c96e767eSJerry Yu * might not be the best one as expected.
276*c96e767eSJerry Yu *
277*c96e767eSJerry Yu * If this function is called on the big core, it will return the function
278*c96e767eSJerry Yu * optimized for the big core.
279*c96e767eSJerry Yu *
280*c96e767eSJerry Yu * If execution is then scheduled to the little core. It will still work (1),
281*c96e767eSJerry Yu * but the function won't be optimized for the little core, thus the performance
282*c96e767eSJerry Yu * won't be as expected.
283*c96e767eSJerry Yu *
284*c96e767eSJerry Yu * References:
285*c96e767eSJerry Yu * - [CPU Feature detection](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/arm64/cpu-feature-registers.rst?h=v5.5)
286*c96e767eSJerry Yu *
287*c96e767eSJerry Yu */
get_micro_arch_id(void)288*c96e767eSJerry Yu static inline uint32_t get_micro_arch_id(void)
289*c96e767eSJerry Yu {
290*c96e767eSJerry Yu uint32_t id=CPU_IMPLEMENTER_RESERVE;
291*c96e767eSJerry Yu if ((getauxval(AT_HWCAP) & HWCAP_CPUID)) {
292*c96e767eSJerry Yu
293*c96e767eSJerry Yu asm("mrs %0, MIDR_EL1 " : "=r" (id));
294*c96e767eSJerry Yu }
295*c96e767eSJerry Yu return id&0xff00fff0;
296*c96e767eSJerry Yu }
297*c96e767eSJerry Yu
298*c96e767eSJerry Yu
299*c96e767eSJerry Yu
30091b08277SJerry Yu #endif /* __ASSEMBLY__ */
30191b08277SJerry Yu #endif
302