1*87aa9c9eSJonas Devlieghere #include <cstdint>
2*87aa9c9eSJonas Devlieghere
3*87aa9c9eSJonas Devlieghere struct alignas(32) ymm_t {
4*87aa9c9eSJonas Devlieghere uint64_t a, b, c, d;
5*87aa9c9eSJonas Devlieghere };
6*87aa9c9eSJonas Devlieghere
main()7*87aa9c9eSJonas Devlieghere int main() {
8*87aa9c9eSJonas Devlieghere constexpr ymm_t ymm[] = {
9*87aa9c9eSJonas Devlieghere { 0x0706050403020100, 0x0F0E0D0C0B0A0908,
10*87aa9c9eSJonas Devlieghere 0x1716151413121110, 0x1F1E1D1C1B1A1918, },
11*87aa9c9eSJonas Devlieghere { 0x0807060504030201, 0x100F0E0D0C0B0A09,
12*87aa9c9eSJonas Devlieghere 0x1817161514131211, 0x201F1E1D1C1B1A19, },
13*87aa9c9eSJonas Devlieghere { 0x0908070605040302, 0x11100F0E0D0C0B0A,
14*87aa9c9eSJonas Devlieghere 0x1918171615141312, 0x21201F1E1D1C1B1A, },
15*87aa9c9eSJonas Devlieghere { 0x0A09080706050403, 0x1211100F0E0D0C0B,
16*87aa9c9eSJonas Devlieghere 0x1A19181716151413, 0x2221201F1E1D1C1B, },
17*87aa9c9eSJonas Devlieghere { 0x0B0A090807060504, 0x131211100F0E0D0C,
18*87aa9c9eSJonas Devlieghere 0x1B1A191817161514, 0x232221201F1E1D1C, },
19*87aa9c9eSJonas Devlieghere { 0x0C0B0A0908070605, 0x14131211100F0E0D,
20*87aa9c9eSJonas Devlieghere 0x1C1B1A1918171615, 0x24232221201F1E1D, },
21*87aa9c9eSJonas Devlieghere { 0x0D0C0B0A09080706, 0x1514131211100F0E,
22*87aa9c9eSJonas Devlieghere 0x1D1C1B1A19181716, 0x2524232221201F1E, },
23*87aa9c9eSJonas Devlieghere { 0x0E0D0C0B0A090807, 0x161514131211100F,
24*87aa9c9eSJonas Devlieghere 0x1E1D1C1B1A191817, 0x262524232221201F, },
25*87aa9c9eSJonas Devlieghere #if defined(__x86_64__) || defined(_M_X64)
26*87aa9c9eSJonas Devlieghere { 0x0F0E0D0C0B0A0908, 0x1716151413121110,
27*87aa9c9eSJonas Devlieghere 0x1F1E1D1C1B1A1918, 0x2726252423222120, },
28*87aa9c9eSJonas Devlieghere { 0x100F0E0D0C0B0A09, 0x1817161514131211,
29*87aa9c9eSJonas Devlieghere 0x201F1E1D1C1B1A19, 0x2827262524232221, },
30*87aa9c9eSJonas Devlieghere { 0x11100F0E0D0C0B0A, 0x1918171615141312,
31*87aa9c9eSJonas Devlieghere 0x21201F1E1D1C1B1A, 0x2928272625242322, },
32*87aa9c9eSJonas Devlieghere { 0x1211100F0E0D0C0B, 0x1A19181716151413,
33*87aa9c9eSJonas Devlieghere 0x2221201F1E1D1C1B, 0x2A29282726252423, },
34*87aa9c9eSJonas Devlieghere { 0x131211100F0E0D0C, 0x1B1A191817161514,
35*87aa9c9eSJonas Devlieghere 0x232221201F1E1D1C, 0x2B2A292827262524, },
36*87aa9c9eSJonas Devlieghere { 0x14131211100F0E0D, 0x1C1B1A1918171615,
37*87aa9c9eSJonas Devlieghere 0x24232221201F1E1D, 0x2C2B2A2928272625, },
38*87aa9c9eSJonas Devlieghere { 0x1514131211100F0E, 0x1D1C1B1A19181716,
39*87aa9c9eSJonas Devlieghere 0x2524232221201F1E, 0x2D2C2B2A29282726, },
40*87aa9c9eSJonas Devlieghere { 0x161514131211100F, 0x1E1D1C1B1A191817,
41*87aa9c9eSJonas Devlieghere 0x262524232221201F, 0x2E2D2C2B2A292827, },
42*87aa9c9eSJonas Devlieghere #endif
43*87aa9c9eSJonas Devlieghere };
44*87aa9c9eSJonas Devlieghere
45*87aa9c9eSJonas Devlieghere asm volatile(
46*87aa9c9eSJonas Devlieghere "vmovaps 0x000(%0), %%ymm0\n\t"
47*87aa9c9eSJonas Devlieghere "vmovaps 0x020(%0), %%ymm1\n\t"
48*87aa9c9eSJonas Devlieghere "vmovaps 0x040(%0), %%ymm2\n\t"
49*87aa9c9eSJonas Devlieghere "vmovaps 0x060(%0), %%ymm3\n\t"
50*87aa9c9eSJonas Devlieghere "vmovaps 0x080(%0), %%ymm4\n\t"
51*87aa9c9eSJonas Devlieghere "vmovaps 0x0A0(%0), %%ymm5\n\t"
52*87aa9c9eSJonas Devlieghere "vmovaps 0x0C0(%0), %%ymm6\n\t"
53*87aa9c9eSJonas Devlieghere "vmovaps 0x0E0(%0), %%ymm7\n\t"
54*87aa9c9eSJonas Devlieghere #if defined(__x86_64__) || defined(_M_X64)
55*87aa9c9eSJonas Devlieghere "vmovaps 0x100(%0), %%ymm8\n\t"
56*87aa9c9eSJonas Devlieghere "vmovaps 0x120(%0), %%ymm9\n\t"
57*87aa9c9eSJonas Devlieghere "vmovaps 0x140(%0), %%ymm10\n\t"
58*87aa9c9eSJonas Devlieghere "vmovaps 0x160(%0), %%ymm11\n\t"
59*87aa9c9eSJonas Devlieghere "vmovaps 0x180(%0), %%ymm12\n\t"
60*87aa9c9eSJonas Devlieghere "vmovaps 0x1A0(%0), %%ymm13\n\t"
61*87aa9c9eSJonas Devlieghere "vmovaps 0x1C0(%0), %%ymm14\n\t"
62*87aa9c9eSJonas Devlieghere "vmovaps 0x1E0(%0), %%ymm15\n\t"
63*87aa9c9eSJonas Devlieghere #endif
64*87aa9c9eSJonas Devlieghere "\n\t"
65*87aa9c9eSJonas Devlieghere "int3\n\t"
66*87aa9c9eSJonas Devlieghere :
67*87aa9c9eSJonas Devlieghere : "b"(ymm)
68*87aa9c9eSJonas Devlieghere : "%ymm0", "%ymm1", "%ymm2", "%ymm3", "%ymm4", "%ymm5", "%ymm6", "%ymm7"
69*87aa9c9eSJonas Devlieghere #if defined(__x86_64__) || defined(_M_X64)
70*87aa9c9eSJonas Devlieghere , "%ymm8", "%ymm9", "%ymm10", "%ymm11", "%ymm12", "%ymm13", "%ymm14",
71*87aa9c9eSJonas Devlieghere "%ymm15"
72*87aa9c9eSJonas Devlieghere #endif
73*87aa9c9eSJonas Devlieghere );
74*87aa9c9eSJonas Devlieghere
75*87aa9c9eSJonas Devlieghere return 0;
76*87aa9c9eSJonas Devlieghere }
77