#ifdef memset0
# include <bits/stdc++.h>
const int n = 1024;
const double eps = 3 * n * n * DBL_EPSILON;
void matrix_multiply(int, const double*, const double*, double*);
void simple_matrix_multiply(int, const double*, const double*, double*);
std::mt19937 rng(20040602 ^ std::chrono::steady_clock::now().time_since_epoch().count());
template<class T> inline T rand(T l, T r) { return std::uniform_int_distribution<T>(l, r)(rng); }
inline int id(int i, int j) { return i * n + j; }
void simple_matrix_multiply(int n, const double* A, const double* B, double* C) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
C[id(i, j)] = 0;
for (int k = 0; k < n; k++) C[id(i, j)] += A[id(i, k)] * B[id(k, j)];
}
}
}
int main() {
double A[n * n], B[n * n], C[n * n], D[n * n];
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++) {
A[id(i, j)] = rng() / (double)rng.max();
B[id(i, j)] = rng() / (double)rng.max();
}
uint32_t simple_timer = 0;
simple_timer -= clock();
simple_matrix_multiply(n, A, B, D);
simple_timer += clock();
uint32_t timer = 0;
timer -= clock();
matrix_multiply(n, A, B, C);
timer += clock();
double delta = 0;
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++) {
double t = C[id(i, j)] - D[id(i, j)];
if (t < 0) {
delta = std::max(delta, -t);
} else {
delta = std::max(delta, t);
}
}
if (delta > eps) {
printf("Wrong Answer (%.12lf)\n", delta);
return 0;
}
printf("Accepted (time=%.12lf, %.2lf%%)\n", timer / (double)CLOCKS_PER_SEC, timer / (double)simple_timer);
return 0;
}
#endif
#pragma GCC target("avx")
#pragma GCC target("popcnt")
#pragma GCC optimize("unroll-loops")
#pragma GCC optimize("inline-functions")
#pragma GCC optimize("no-stack-protector")
#include <stdint.h>
#include <string.h>
#define n 1024
#define idx(i, j) ((i)*n + (j))
void matrix_multiply(int _, const double* A, const double* _B, double* C) {
double B[n * n];
memcpy(B, _B, sizeof(B));
for (uint32_t i = 0; i < n; i++) {
double t;
for (uint32_t j = i + 1; j < n; j++) {
t = B[idx(i, j)];
B[idx(i, j)] = B[idx(j, i)];
B[idx(j, i)] = t;
}
}
for (uint32_t i = 0; i < n; i++) {
for (uint32_t j = 0; j < n; j++) {
uint32_t k = 0;
const double* a = A + (i << 10);
const double* b = B + (j << 10);
double s0 = 0, s1 = 0;
while (k < n) {
s0 += (*a) * (*b);
s1 += (*(a + 1)) * (*(b + 1));
k += 2;
a += 2;
b += 2;
}
C[(i << 10) | j] = s0 + s1;
}
}
}
Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
Testcase #1 | 687.962 ms | 16 MB + 8 KB | Accepted | Score: 100 | 显示更多 |