1*5971e316Smrg for (int c0 = 0; c0 < -(n % 8) + n; c0 += 8) { 2*5971e316Smrg for (int c1 = 0; c1 < -(n % 8) + n; c1 += 8) 3*5971e316Smrg for (int c2 = 0; c2 <= 7; c2 += 1) 4*5971e316Smrg for (int c3 = 0; c3 <= 7; c3 += 1) 5*5971e316Smrg A(c0 + c2, c1 + c3); 6*5971e316Smrg for (int c2 = 0; c2 <= 7; c2 += 1) 7*5971e316Smrg for (int c3 = 0; c3 < n % 8; c3 += 1) 8*5971e316Smrg A(c0 + c2, -(n % 8) + n + c3); 9*5971e316Smrg } 10*5971e316Smrg for (int c1 = 0; c1 < n; c1 += 8) 11*5971e316Smrg for (int c2 = 0; c2 < n % 8; c2 += 1) 12*5971e316Smrg for (int c3 = 0; c3 <= min(7, n - c1 - 1); c3 += 1) 13*5971e316Smrg A(-(n % 8) + n + c2, c1 + c3); 14