#include <cstdlib>
#define idx(i, j) ((i) * n + (j))
void matrix_multiply_small(int n, const double *A, const double *B, double *C) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
C[idx(i, j)] = 0;
}
}
for (int i = 0; i < n; i++) {
for (int k = 0; k < n; k++) {
for (int j = 0; j < n; j++) {
C[idx(i, j)] += A[idx(i, k)] * B[idx(k, j)];
}
}
}
}
void construct_sub_matrix(int n, const double *A, double **X) {
int m = n >> 1;
for (int i = 0, id = 0; i < m; i++)
for (int j = 0; j < m; j++) {
X[0][id] = A[idx(i, j)];
X[1][id] = A[idx(i, j + m)];
X[2][id] = A[idx(i + m, j)];
X[3][id] = A[idx(i + m, j + m)];
++id;
}
}
void matrix_multiply(int n, const double *A, const double *B, double *C) {
if (n <= 64) {
matrix_multiply_small(n, A, B, C);
return;
}
double *X[4], *Y[4], *S[4], *T[4], *M[7];
int m = n >> 1;
for (int index = 0; index < 4; index++) {
X[index] = (double*) malloc(sizeof(double) * m * m);
Y[index] = (double*) malloc(sizeof(double) * m * m);
S[index] = (double*) malloc(sizeof(double) * m * m);
T[index] = (double*) malloc(sizeof(double) * m * m);
}
for (int index = 0; index < 7; index++)
M[index] = (double*) malloc(sizeof(double) * m * m);
construct_sub_matrix(n, A, X);
construct_sub_matrix(n, B, Y);
for (int i = 0, id = 0; i < m; i++)
for (int j = 0; j < m; j++) {
S[0][id] = X[2][id] + X[3][id];
S[1][id] = S[0][id] - X[0][id];
S[2][id] = X[0][id] - X[2][id];
S[3][id] = X[1][id] - S[1][id];
T[0][id] = Y[1][id] - Y[0][id];
T[1][id] = Y[3][id] - T[0][id];
T[2][id] = Y[3][id] - Y[1][id];
T[3][id] = T[1][id] - Y[2][id];
++id;
}
matrix_multiply(m, X[0], Y[0], M[0]);
matrix_multiply(m, X[1], Y[2], M[1]);
matrix_multiply(m, S[3], Y[3], M[2]);
matrix_multiply(m, X[3], T[3], M[3]);
matrix_multiply(m, S[0], T[0], M[4]);
matrix_multiply(m, S[1], T[1], M[5]);
matrix_multiply(m, S[2], T[2], M[6]);
for (int i = 0, id = 0; i < m; i++)
for (int j = 0; j < m; j++) {
double U0 = M[0][id] + M[1][id];
double U1 = M[0][id] + M[5][id];
double U2 = U1 + M[6][id];
double U3 = U1 + M[4][id];
double U4 = U3 + M[2][id];
double U5 = U2 - M[3][id];
double U6 = U2 + M[4][id];
C[idx(i, j)] = U0;
C[idx(i, j + m)] = U4;
C[idx(i + m, j)] = U5;
C[idx(i + m, j + m)] = U6;
++id;
}
for (int i = 0; i < 4; i++)
free(X[i]), free(Y[i]), free(S[i]), free(T[i]);
for (int i = 0; i < 7; i++)
free(M[i]);
}
Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
Testcase #1 | 424.579 ms | 69 MB + 108 KB | Accepted | Score: 100 | 显示更多 |