#pragma GCC optimize("Ofast")
#pragma GCC target("avx2,fma")
#include <stdint.h>
#include <string.h>
#define n 1024
#define idx(i, j) ((i) * n + (j))
static void gao(int s, int x, int y, int z, int dx, int dy, int dz, int dx2, int dy2, int dz2, int dx3, int dy3, int dz3, const double* A, const double* B, double* C) {
if (s == 5) {
int m = 1 << s;
for (int i = 0; i < m; ++i)
for (int j = 0; j < m; ++j)
for (int k = 0; k < m; ++k)
C[idx(x + i, z + k)] += A[idx(x + i, y + j)] * B[idx(y + j, z + k)];
return;
}
--s;
if (dx < 0) x -= dx << s;
if (dy < 0) y -= dy << s;
if (dz < 0) z -= dz << s;
if (dx2 < 0) x -= dx2 << s;
if (dy2 < 0) y -= dy2 << s;
if (dz2 < 0) z -= dz2 << s;
if (dx3 < 0) x -= dx3 << s;
if (dy3 < 0) y -= dy3 << s;
if (dz3 < 0) z -= dz3 << s;
gao(s, x, y, z, dx2, dy2, dz2, dx3, dy3, dz3, dx, dy, dz, A, B, C);
gao(s, x + (dx << s), y + (dy << s), z + (dz << s), dx3, dy3, dz3, dx, dy, dz, dx2, dy2, dz2, A, B, C);
gao(s, x + (dx << s) + (dx2 << s), y + (dy << s) + (dy2 << s), z + (dz << s) + (dz2 << s), dx3, dy3, dz3, dx, dy, dz, dx2, dy2, dz2, A, B, C);
gao(s, x + (dx2 << s), y + (dy2 << s), z + (dz2 << s), -dx, -dy, -dz, -dx2, -dy2, -dz2, dx3, dy3, dz3, A, B, C);
gao(s, x + (dx2 << s) + (dx3 << s), y + (dy2 << s) + (dy3 << s), z + (dz2 << s) + (dz3 << s), -dx, -dy, -dz, -dx2, -dy2, -dz2, dx3, dy3, dz3, A, B, C);
gao(s, x + (dx << s) + (dx2 << s) + (dx3 << s), y + (dy << s) + (dy2 << s) + (dy3 << s), z + (dz << s) + (dz2 << s) + (dz3 << s), -dx3, -dy3, -dz3, dx, dy, dz, -dx2, -dy2, -dz2, A, B, C);
gao(s, x + (dx << s) + (dx3 << s), y + (dy << s) + (dy3 << s), z + (dz << s) + (dz3 << s), -dx3, -dy3, -dz3, dx, dy, dz, -dx2, -dy2, -dz2, A, B, C);
gao(s, x + (dx3 << s), y + (dy3 << s), z + (dz3 << s), dx2, dy2, dz2, -dx3, -dy3, -dz3, -dx, -dy, -dz, A, B, C);
}
void matrix_multiply(int, const double* A, const double* B, double* C) {
memset(C, 0, 1024 * 1024 * sizeof(double));
gao(10, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, A, B, C);
}
| Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
| Testcase #1 | 338.004 ms | 8 MB + 8 KB | Accepted | Score: 100 | 显示更多 |