xref: /llvm-project/polly/docs/experiments/matmul/matmul.polly.interchanged.s (revision 3b4d331d8cbd72078ae9fff1b81ca96c0d55ecb8)
1	.text
2	.file	"matmul.c"
3	.section	.rodata.cst8,"aM",@progbits,8
4	.p2align	3               # -- Begin function init_array
5.LCPI0_0:
6	.quad	4602678819172646912     # double 0.5
7	.text
8	.globl	init_array
9	.p2align	4, 0x90
10	.type	init_array,@function
11init_array:                             # @init_array
12	.cfi_startproc
13# %bb.0:                                # %entry
14	pushq	%rbp
15	.cfi_def_cfa_offset 16
16	.cfi_offset %rbp, -16
17	movq	%rsp, %rbp
18	.cfi_def_cfa_register %rbp
19	leaq	B(%rip), %rax
20	leaq	A(%rip), %rcx
21	xorl	%r8d, %r8d
22	movsd	.LCPI0_0(%rip), %xmm0   # xmm0 = mem[0],zero
23	xorl	%r9d, %r9d
24	.p2align	4, 0x90
25.LBB0_1:                                # %polly.loop_header
26                                        # =>This Loop Header: Depth=1
27                                        #     Child Loop BB0_2 Depth 2
28	movl	$1, %edi
29	xorl	%edx, %edx
30	.p2align	4, 0x90
31.LBB0_2:                                # %polly.loop_header1
32                                        #   Parent Loop BB0_1 Depth=1
33                                        # =>  This Inner Loop Header: Depth=2
34	movl	%edx, %esi
35	andl	$1022, %esi             # imm = 0x3FE
36	orl	$1, %esi
37	xorps	%xmm1, %xmm1
38	cvtsi2sdl	%esi, %xmm1
39	mulsd	%xmm0, %xmm1
40	cvtsd2ss	%xmm1, %xmm1
41	movss	%xmm1, -4(%rcx,%rdi,4)
42	movss	%xmm1, -4(%rax,%rdi,4)
43	leal	(%r9,%rdx), %esi
44	andl	$1023, %esi             # imm = 0x3FF
45	addl	$1, %esi
46	xorps	%xmm1, %xmm1
47	cvtsi2sdl	%esi, %xmm1
48	mulsd	%xmm0, %xmm1
49	cvtsd2ss	%xmm1, %xmm1
50	movss	%xmm1, (%rcx,%rdi,4)
51	movss	%xmm1, (%rax,%rdi,4)
52	addq	$2, %rdi
53	addl	%r8d, %edx
54	cmpq	$1537, %rdi             # imm = 0x601
55	jne	.LBB0_2
56# %bb.3:                                # %polly.loop_exit3
57                                        #   in Loop: Header=BB0_1 Depth=1
58	addq	$1, %r9
59	addq	$6144, %rax             # imm = 0x1800
60	addq	$6144, %rcx             # imm = 0x1800
61	addl	$2, %r8d
62	cmpq	$1536, %r9              # imm = 0x600
63	jne	.LBB0_1
64# %bb.4:                                # %polly.exiting
65	popq	%rbp
66	.cfi_def_cfa %rsp, 8
67	retq
68.Lfunc_end0:
69	.size	init_array, .Lfunc_end0-init_array
70	.cfi_endproc
71                                        # -- End function
72	.globl	print_array             # -- Begin function print_array
73	.p2align	4, 0x90
74	.type	print_array,@function
75print_array:                            # @print_array
76	.cfi_startproc
77# %bb.0:                                # %entry
78	pushq	%rbp
79	.cfi_def_cfa_offset 16
80	.cfi_offset %rbp, -16
81	movq	%rsp, %rbp
82	.cfi_def_cfa_register %rbp
83	pushq	%r15
84	pushq	%r14
85	pushq	%r13
86	pushq	%r12
87	pushq	%rbx
88	pushq	%rax
89	.cfi_offset %rbx, -56
90	.cfi_offset %r12, -48
91	.cfi_offset %r13, -40
92	.cfi_offset %r14, -32
93	.cfi_offset %r15, -24
94	leaq	C(%rip), %r13
95	xorl	%eax, %eax
96	movl	$3435973837, %r12d      # imm = 0xCCCCCCCD
97	leaq	.L.str(%rip), %r14
98	.p2align	4, 0x90
99.LBB1_1:                                # %for.cond1.preheader
100                                        # =>This Loop Header: Depth=1
101                                        #     Child Loop BB1_2 Depth 2
102	movq	%rax, -48(%rbp)         # 8-byte Spill
103	movq	stdout(%rip), %rsi
104	xorl	%ebx, %ebx
105	.p2align	4, 0x90
106.LBB1_2:                                # %for.body3
107                                        #   Parent Loop BB1_1 Depth=1
108                                        # =>  This Inner Loop Header: Depth=2
109	movl	%ebx, %eax
110	imulq	%r12, %rax
111	shrq	$38, %rax
112	leal	(%rax,%rax,4), %r15d
113	shll	$4, %r15d
114	addl	$79, %r15d
115	movss	(%r13,%rbx,4), %xmm0    # xmm0 = mem[0],zero,zero,zero
116	cvtss2sd	%xmm0, %xmm0
117	movb	$1, %al
118	movq	%rsi, %rdi
119	movq	%r14, %rsi
120	callq	fprintf
121	cmpl	%ebx, %r15d
122	jne	.LBB1_4
123# %bb.3:                                # %if.then
124                                        #   in Loop: Header=BB1_2 Depth=2
125	movq	stdout(%rip), %rsi
126	movl	$10, %edi
127	callq	fputc@PLT
128.LBB1_4:                                # %for.inc
129                                        #   in Loop: Header=BB1_2 Depth=2
130	addq	$1, %rbx
131	movq	stdout(%rip), %rsi
132	cmpq	$1536, %rbx             # imm = 0x600
133	jne	.LBB1_2
134# %bb.5:                                # %for.end
135                                        #   in Loop: Header=BB1_1 Depth=1
136	movl	$10, %edi
137	callq	fputc@PLT
138	movq	-48(%rbp), %rax         # 8-byte Reload
139	addq	$1, %rax
140	addq	$6144, %r13             # imm = 0x1800
141	cmpq	$1536, %rax             # imm = 0x600
142	jne	.LBB1_1
143# %bb.6:                                # %for.end12
144	addq	$8, %rsp
145	popq	%rbx
146	popq	%r12
147	popq	%r13
148	popq	%r14
149	popq	%r15
150	popq	%rbp
151	.cfi_def_cfa %rsp, 8
152	retq
153.Lfunc_end1:
154	.size	print_array, .Lfunc_end1-print_array
155	.cfi_endproc
156                                        # -- End function
157	.globl	main                    # -- Begin function main
158	.p2align	4, 0x90
159	.type	main,@function
160main:                                   # @main
161	.cfi_startproc
162# %bb.0:                                # %entry
163	pushq	%rbp
164	.cfi_def_cfa_offset 16
165	.cfi_offset %rbp, -16
166	movq	%rsp, %rbp
167	.cfi_def_cfa_register %rbp
168	pushq	%r14
169	pushq	%rbx
170	.cfi_offset %rbx, -32
171	.cfi_offset %r14, -24
172	callq	init_array
173	leaq	C(%rip), %rbx
174	xorl	%r14d, %r14d
175	xorl	%esi, %esi
176	movl	$9437184, %edx          # imm = 0x900000
177	movq	%rbx, %rdi
178	callq	memset@PLT
179	leaq	B(%rip), %rax
180	leaq	A(%rip), %rcx
181	.p2align	4, 0x90
182.LBB2_1:                                # %polly.loop_header8
183                                        # =>This Loop Header: Depth=1
184                                        #     Child Loop BB2_2 Depth 2
185                                        #       Child Loop BB2_3 Depth 3
186	movq	%rax, %rdx
187	xorl	%esi, %esi
188	.p2align	4, 0x90
189.LBB2_2:                                # %polly.loop_header14
190                                        #   Parent Loop BB2_1 Depth=1
191                                        # =>  This Loop Header: Depth=2
192                                        #       Child Loop BB2_3 Depth 3
193	leaq	(%r14,%r14,2), %rdi
194	shlq	$11, %rdi
195	addq	%rcx, %rdi
196	movss	(%rdi,%rsi,4), %xmm0    # xmm0 = mem[0],zero,zero,zero
197	shufps	$0, %xmm0, %xmm0        # xmm0 = xmm0[0,0,0,0]
198	movl	$12, %edi
199	.p2align	4, 0x90
200.LBB2_3:                                # %vector.body
201                                        #   Parent Loop BB2_1 Depth=1
202                                        #     Parent Loop BB2_2 Depth=2
203                                        # =>    This Inner Loop Header: Depth=3
204	movaps	-48(%rdx,%rdi,4), %xmm1
205	mulps	%xmm0, %xmm1
206	movaps	-32(%rdx,%rdi,4), %xmm2
207	mulps	%xmm0, %xmm2
208	addps	-48(%rbx,%rdi,4), %xmm1
209	addps	-32(%rbx,%rdi,4), %xmm2
210	movaps	%xmm1, -48(%rbx,%rdi,4)
211	movaps	%xmm2, -32(%rbx,%rdi,4)
212	movaps	-16(%rdx,%rdi,4), %xmm1
213	mulps	%xmm0, %xmm1
214	movaps	(%rdx,%rdi,4), %xmm2
215	mulps	%xmm0, %xmm2
216	addps	-16(%rbx,%rdi,4), %xmm1
217	addps	(%rbx,%rdi,4), %xmm2
218	movaps	%xmm1, -16(%rbx,%rdi,4)
219	movaps	%xmm2, (%rbx,%rdi,4)
220	addq	$16, %rdi
221	cmpq	$1548, %rdi             # imm = 0x60C
222	jne	.LBB2_3
223# %bb.4:                                # %polly.loop_exit22
224                                        #   in Loop: Header=BB2_2 Depth=2
225	addq	$1, %rsi
226	addq	$6144, %rdx             # imm = 0x1800
227	cmpq	$1536, %rsi             # imm = 0x600
228	jne	.LBB2_2
229# %bb.5:                                # %polly.loop_exit16
230                                        #   in Loop: Header=BB2_1 Depth=1
231	addq	$1, %r14
232	addq	$6144, %rbx             # imm = 0x1800
233	cmpq	$1536, %r14             # imm = 0x600
234	jne	.LBB2_1
235# %bb.6:                                # %polly.exiting
236	xorl	%eax, %eax
237	popq	%rbx
238	popq	%r14
239	popq	%rbp
240	.cfi_def_cfa %rsp, 8
241	retq
242.Lfunc_end2:
243	.size	main, .Lfunc_end2-main
244	.cfi_endproc
245                                        # -- End function
246	.type	A,@object               # @A
247	.comm	A,9437184,16
248	.type	B,@object               # @B
249	.comm	B,9437184,16
250	.type	.L.str,@object          # @.str
251	.section	.rodata.str1.1,"aMS",@progbits,1
252.L.str:
253	.asciz	"%lf "
254	.size	.L.str, 5
255
256	.type	C,@object               # @C
257	.comm	C,9437184,16
258
259	.ident	"clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"
260	.section	".note.GNU-stack","",@progbits
261