#pragma GCC optimize("Ofast", "unroll-loops", "omit-frame-pointer", "inline", \
"-ftree-vectorize", "-fopt-info-vec-all")
#pragma GCC option("arch=native", "tune=native", "no-zero-upper")
#pragma GCC target("sse,sse2,sse3,sse4.1,sse4.2,popcnt,abm,mmx,avx,avx2")
#define idx(i, j) ((i) * n + (j))
void matrix_multiply(int n, const double *A, const double *B, double *C) {
for (int k = 0; k < n; ++k) {
for (int i = 0; i < n; ++i) {
double r = A[idx(i,k)];
for (int j = 0; j < n; j += 4) {
C[idx(i,j)] += r * B[idx(k,j)];
C[idx(i,j+1)] += r * B[idx(k,j+1)];
C[idx(i,j+2)] += r * B[idx(k,j+2)];
C[idx(i,j+3)] += r * B[idx(k,j+3)];
}
}
}
}
Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
Testcase #1 | 742.787 ms | 8 MB + 8 KB | Accepted | Score: 100 | 显示更多 |