#ifdef memset0
# include <bits/stdc++.h>
const int n = 1024;
const double eps = 3 * n * n * DBL_EPSILON;
void matrix_multiply(int, const double*, const double*, double*);
void simple_matrix_multiply(int, const double*, const double*, double*);
std::mt19937 rng(20040602 ^ std::chrono::steady_clock::now().time_since_epoch().count());
template<class T> inline T rand(T l, T r) { return std::uniform_int_distribution<T>(l, r)(rng); }
inline int id(int i, int j) { return i * n + j; }
void simple_matrix_multiply(int n, const double* A, const double* B, double* C) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
C[id(i, j)] = 0;
for (int k = 0; k < n; k++) C[id(i, j)] += A[id(i, k)] * B[id(k, j)];
}
}
}
int main() {
double A[n * n], B[n * n], C[n * n], D[n * n];
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++) {
A[id(i, j)] = rng() / (double)rng.max();
B[id(i, j)] = rng() / (double)rng.max();
}
uint32_t simple_timer = 0;
simple_timer -= clock();
simple_matrix_multiply(n, A, B, D);
simple_timer += clock();
uint32_t timer = 0;
timer -= clock();
matrix_multiply(n, A, B, C);
timer += clock();
double delta = 0;
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++) {
double t = C[id(i, j)] - D[id(i, j)];
if (t < 0) {
delta = std::max(delta, -t);
} else {
delta = std::max(delta, t);
}
}
if (delta > eps) {
printf("Wrong Answer (%.12lf)\n", delta);
return 0;
}
printf("Accepted (time=%.12lf, %.2lf%%)\n", timer / (double)CLOCKS_PER_SEC, timer / (double)simple_timer);
return 0;
}
#endif
#pragma GCC target("avx")
#pragma GCC target("popcnt")
#pragma GCC optimize("unroll-loops")
#pragma GCC optimize("inline-functions")
#pragma GCC optimize("no-stack-protector")
#include <stdint.h>
#include <string.h>
#define n 1024
#define idx(i, j) ((i)*n + (j))
void matrix_multiply(int _, const double* A, const double* _B, double* C) {
double B[n * n];
memcpy(B, _B, sizeof(B));
for (uint32_t i = 0; i < n; i++) {
double t;
for (uint32_t j = i + 1; j < n; j++) {
t = B[idx(i, j)];
B[idx(i, j)] = B[idx(j, i)];
B[idx(j, i)] = t;
}
}
for (uint32_t i = 0; i < n; i++) {
for (uint32_t j = 0; j < n; j++) {
double s0 = 0, s1 = 0, s2 = 0, s3 = 0;
uint32_t k = 0;
const double* a = (double*)((uint64_t)(A) | (i << 10));
const double* b = (double*)((uint64_t)(B) | (j << 10));
while (k < n) {
s0 += (*a) * (*b);
s1 += (*(a + 1)) * (*(b + 1));
s2 += (*(a + 2)) * (*(b + 2));
s3 += (*(a + 3)) * (*(b + 3));
k += 4;
a += 4;
b += 4;
}
C[(i << 10) | j] = s0 + s1 + s2 + s3;
}
}
}
Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
Testcase #1 | 327.56 ms | 16 MB + 8 KB | Wrong Answer | Score: 0 | 显示更多 |