#include <cstdlib>
#define idx(i, j) ((i) * n + (j))
void matrix_multiply_small(int n, const double *A, const double *B, double *C) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
C[idx(i, j)] = 0;
}
}
for (int i = 0; i < n; i++) {
for (int k = 0; k < n; k++) {
for (int j = 0; j < n; j++) {
C[idx(i, j)] += A[idx(i, k)] * B[idx(k, j)];
}
}
}
}
void matrix_multiply(int n, const double *A, const double *B, double *C) {
if (n <= 64) {
matrix_multiply_small(n, A, B, C);
return;
}
double *X[14], *M[7];
int m = n >> 1;
for (int index = 0; index < 14; index++)
X[index] = (double*) malloc(sizeof(double) * m * m);
for (int index = 0; index < 7; index++)
M[index] = (double*) malloc(sizeof(double) * m * m);
double **Y = X + 10;
for (int i = 0, id = 0; i < m; i++)
for (int j = 0; j < m; j++) {
X[0][id] = B[idx(i, j + m)] - B[idx(i + m, j + m)];
X[1][id] = A[idx(i, j)] + A[idx(i, j + m)];
X[2][id] = A[idx(i + m, j)] + A[idx(i + m, j + m)];
X[3][id] = B[idx(i + m, j)] - B[idx(i, j)];
X[4][id] = A[idx(i, j)] + A[idx(i + m, j + m)];
X[5][id] = B[idx(i, j)] + B[idx(i + m, j + m)];
X[6][id] = A[idx(i, j + m)] - A[idx(i + m, j + m)];
X[7][id] = B[idx(i + m, j)] + B[idx(i + m, j + m)];
X[8][id] = A[idx(i, j)] - A[idx(i + m, j)];
X[9][id] = B[idx(i, j)] + B[idx(i, j + m)];
Y[0][id] = A[idx(i, j)];
Y[1][id] = A[idx(i + m, j + m)];
Y[2][id] = B[idx(i, j)];
Y[3][id] = B[idx(i + m, j + m)];
++id;
}
matrix_multiply(m, Y[0], X[0], M[0]);
matrix_multiply(m, X[1], Y[3], M[1]);
matrix_multiply(m, X[2], Y[2], M[2]);
matrix_multiply(m, Y[1], X[3], M[3]);
matrix_multiply(m, X[4], X[5], M[4]);
matrix_multiply(m, X[6], X[7], M[5]);
matrix_multiply(m, X[8], X[9], M[6]);
for (int i = 0, id = 0; i < m; i++)
for (int j = 0; j < m; j++) {
C[idx(i, j)] = M[4][id] + M[3][id] - M[1][id] + M[5][id];
C[idx(i, j + m)] = M[0][id] + M[1][id];
C[idx(i + m, j)] = M[2][id] + M[3][id];
C[idx(i + m, j + m)] = M[4][id] + M[0][id] - M[2][id] - M[6][id];
++id;
}
for (int i = 0; i < 14; i++)
free(X[i]);
for (int i = 0; i < 7; i++)
free(M[i]);
}
Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
Testcase #1 | 426.132 ms | 63 MB + 812 KB | Accepted | Score: 100 | 显示更多 |