1 .text 2 .file "matmul.c" 3 .section .rodata.cst8,"aM",@progbits,8 4 .p2align 3 # -- Begin function init_array 5.LCPI0_0: 6 .quad 4602678819172646912 # double 0.5 7 .text 8 .globl init_array 9 .p2align 4, 0x90 10 .type init_array,@function 11init_array: # @init_array 12 .cfi_startproc 13# %bb.0: # %entry 14 pushq %rbp 15 .cfi_def_cfa_offset 16 16 .cfi_offset %rbp, -16 17 movq %rsp, %rbp 18 .cfi_def_cfa_register %rbp 19 leaq B(%rip), %rax 20 leaq A(%rip), %rcx 21 xorl %r8d, %r8d 22 movsd .LCPI0_0(%rip), %xmm0 # xmm0 = mem[0],zero 23 xorl %r9d, %r9d 24 .p2align 4, 0x90 25.LBB0_1: # %polly.loop_header 26 # =>This Loop Header: Depth=1 27 # Child Loop BB0_2 Depth 2 28 movl $1, %edi 29 xorl %edx, %edx 30 .p2align 4, 0x90 31.LBB0_2: # %polly.loop_header1 32 # Parent Loop BB0_1 Depth=1 33 # => This Inner Loop Header: Depth=2 34 movl %edx, %esi 35 andl $1022, %esi # imm = 0x3FE 36 orl $1, %esi 37 xorps %xmm1, %xmm1 38 cvtsi2sdl %esi, %xmm1 39 mulsd %xmm0, %xmm1 40 cvtsd2ss %xmm1, %xmm1 41 movss %xmm1, -4(%rcx,%rdi,4) 42 movss %xmm1, -4(%rax,%rdi,4) 43 leal (%r9,%rdx), %esi 44 andl $1023, %esi # imm = 0x3FF 45 addl $1, %esi 46 xorps %xmm1, %xmm1 47 cvtsi2sdl %esi, %xmm1 48 mulsd %xmm0, %xmm1 49 cvtsd2ss %xmm1, %xmm1 50 movss %xmm1, (%rcx,%rdi,4) 51 movss %xmm1, (%rax,%rdi,4) 52 addq $2, %rdi 53 addl %r8d, %edx 54 cmpq $1537, %rdi # imm = 0x601 55 jne .LBB0_2 56# %bb.3: # %polly.loop_exit3 57 # in Loop: Header=BB0_1 Depth=1 58 addq $1, %r9 59 addq $6144, %rax # imm = 0x1800 60 addq $6144, %rcx # imm = 0x1800 61 addl $2, %r8d 62 cmpq $1536, %r9 # imm = 0x600 63 jne .LBB0_1 64# %bb.4: # %polly.exiting 65 popq %rbp 66 .cfi_def_cfa %rsp, 8 67 retq 68.Lfunc_end0: 69 .size init_array, .Lfunc_end0-init_array 70 .cfi_endproc 71 # -- End function 72 .globl print_array # -- Begin function print_array 73 .p2align 4, 0x90 74 .type print_array,@function 75print_array: # @print_array 76 .cfi_startproc 77# %bb.0: # %entry 78 pushq %rbp 79 .cfi_def_cfa_offset 16 80 .cfi_offset %rbp, -16 81 movq %rsp, %rbp 82 .cfi_def_cfa_register %rbp 83 pushq %r15 84 pushq %r14 85 pushq %r13 86 pushq %r12 87 pushq %rbx 88 pushq %rax 89 .cfi_offset %rbx, -56 90 .cfi_offset %r12, -48 91 .cfi_offset %r13, -40 92 .cfi_offset %r14, -32 93 .cfi_offset %r15, -24 94 leaq C(%rip), %r13 95 xorl %eax, %eax 96 movl $3435973837, %r12d # imm = 0xCCCCCCCD 97 leaq .L.str(%rip), %r14 98 .p2align 4, 0x90 99.LBB1_1: # %for.cond1.preheader 100 # =>This Loop Header: Depth=1 101 # Child Loop BB1_2 Depth 2 102 movq %rax, -48(%rbp) # 8-byte Spill 103 movq stdout(%rip), %rsi 104 xorl %ebx, %ebx 105 .p2align 4, 0x90 106.LBB1_2: # %for.body3 107 # Parent Loop BB1_1 Depth=1 108 # => This Inner Loop Header: Depth=2 109 movl %ebx, %eax 110 imulq %r12, %rax 111 shrq $38, %rax 112 leal (%rax,%rax,4), %r15d 113 shll $4, %r15d 114 addl $79, %r15d 115 movss (%r13,%rbx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero 116 cvtss2sd %xmm0, %xmm0 117 movb $1, %al 118 movq %rsi, %rdi 119 movq %r14, %rsi 120 callq fprintf 121 cmpl %ebx, %r15d 122 jne .LBB1_4 123# %bb.3: # %if.then 124 # in Loop: Header=BB1_2 Depth=2 125 movq stdout(%rip), %rsi 126 movl $10, %edi 127 callq fputc@PLT 128.LBB1_4: # %for.inc 129 # in Loop: Header=BB1_2 Depth=2 130 addq $1, %rbx 131 movq stdout(%rip), %rsi 132 cmpq $1536, %rbx # imm = 0x600 133 jne .LBB1_2 134# %bb.5: # %for.end 135 # in Loop: Header=BB1_1 Depth=1 136 movl $10, %edi 137 callq fputc@PLT 138 movq -48(%rbp), %rax # 8-byte Reload 139 addq $1, %rax 140 addq $6144, %r13 # imm = 0x1800 141 cmpq $1536, %rax # imm = 0x600 142 jne .LBB1_1 143# %bb.6: # %for.end12 144 addq $8, %rsp 145 popq %rbx 146 popq %r12 147 popq %r13 148 popq %r14 149 popq %r15 150 popq %rbp 151 .cfi_def_cfa %rsp, 8 152 retq 153.Lfunc_end1: 154 .size print_array, .Lfunc_end1-print_array 155 .cfi_endproc 156 # -- End function 157 .globl main # -- Begin function main 158 .p2align 4, 0x90 159 .type main,@function 160main: # @main 161 .cfi_startproc 162# %bb.0: # %entry 163 pushq %rbp 164 .cfi_def_cfa_offset 16 165 .cfi_offset %rbp, -16 166 movq %rsp, %rbp 167 .cfi_def_cfa_register %rbp 168 pushq %r14 169 pushq %rbx 170 .cfi_offset %rbx, -32 171 .cfi_offset %r14, -24 172 callq init_array 173 leaq C(%rip), %rbx 174 xorl %r14d, %r14d 175 xorl %esi, %esi 176 movl $9437184, %edx # imm = 0x900000 177 movq %rbx, %rdi 178 callq memset@PLT 179 leaq B(%rip), %rax 180 leaq A(%rip), %rcx 181 .p2align 4, 0x90 182.LBB2_1: # %polly.loop_header8 183 # =>This Loop Header: Depth=1 184 # Child Loop BB2_2 Depth 2 185 # Child Loop BB2_3 Depth 3 186 movq %rax, %rdx 187 xorl %esi, %esi 188 .p2align 4, 0x90 189.LBB2_2: # %polly.loop_header14 190 # Parent Loop BB2_1 Depth=1 191 # => This Loop Header: Depth=2 192 # Child Loop BB2_3 Depth 3 193 leaq (%r14,%r14,2), %rdi 194 shlq $11, %rdi 195 addq %rcx, %rdi 196 movss (%rdi,%rsi,4), %xmm0 # xmm0 = mem[0],zero,zero,zero 197 shufps $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0] 198 movl $12, %edi 199 .p2align 4, 0x90 200.LBB2_3: # %vector.body 201 # Parent Loop BB2_1 Depth=1 202 # Parent Loop BB2_2 Depth=2 203 # => This Inner Loop Header: Depth=3 204 movaps -48(%rdx,%rdi,4), %xmm1 205 mulps %xmm0, %xmm1 206 movaps -32(%rdx,%rdi,4), %xmm2 207 mulps %xmm0, %xmm2 208 addps -48(%rbx,%rdi,4), %xmm1 209 addps -32(%rbx,%rdi,4), %xmm2 210 movaps %xmm1, -48(%rbx,%rdi,4) 211 movaps %xmm2, -32(%rbx,%rdi,4) 212 movaps -16(%rdx,%rdi,4), %xmm1 213 mulps %xmm0, %xmm1 214 movaps (%rdx,%rdi,4), %xmm2 215 mulps %xmm0, %xmm2 216 addps -16(%rbx,%rdi,4), %xmm1 217 addps (%rbx,%rdi,4), %xmm2 218 movaps %xmm1, -16(%rbx,%rdi,4) 219 movaps %xmm2, (%rbx,%rdi,4) 220 addq $16, %rdi 221 cmpq $1548, %rdi # imm = 0x60C 222 jne .LBB2_3 223# %bb.4: # %polly.loop_exit22 224 # in Loop: Header=BB2_2 Depth=2 225 addq $1, %rsi 226 addq $6144, %rdx # imm = 0x1800 227 cmpq $1536, %rsi # imm = 0x600 228 jne .LBB2_2 229# %bb.5: # %polly.loop_exit16 230 # in Loop: Header=BB2_1 Depth=1 231 addq $1, %r14 232 addq $6144, %rbx # imm = 0x1800 233 cmpq $1536, %r14 # imm = 0x600 234 jne .LBB2_1 235# %bb.6: # %polly.exiting 236 xorl %eax, %eax 237 popq %rbx 238 popq %r14 239 popq %rbp 240 .cfi_def_cfa %rsp, 8 241 retq 242.Lfunc_end2: 243 .size main, .Lfunc_end2-main 244 .cfi_endproc 245 # -- End function 246 .type A,@object # @A 247 .comm A,9437184,16 248 .type B,@object # @B 249 .comm B,9437184,16 250 .type .L.str,@object # @.str 251 .section .rodata.str1.1,"aMS",@progbits,1 252.L.str: 253 .asciz "%lf " 254 .size .L.str, 5 255 256 .type C,@object # @C 257 .comm C,9437184,16 258 259 .ident "clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)" 260 .section ".note.GNU-stack","",@progbits 261