提交记录 13453


用户 题目 状态 得分 用时 内存 语言 代码长度
memset0 mmmd1k. 测测你的双精度矩阵乘法-1k Compile Error 0 0 ns 0 KB C 2.56 KB
提交时间 评测时间
2020-08-05 00:04:43 2020-08-05 00:04:44
#ifdef memset0
#	include <bits/stdc++.h>

const int n = 1024;
const double eps = 3 * n * n * DBL_EPSILON;
void matrix_multiply(int, const double*, const double*, double*);
void simple_matrix_multiply(int, const double*, const double*, double*);

std::mt19937 rng(20040602 ^ std::chrono::steady_clock::now().time_since_epoch().count());
template<class T> inline T rand(T l, T r) { return std::uniform_int_distribution<T>(l, r)(rng); }

inline int id(int i, int j) { return i * n + j; }

void simple_matrix_multiply(int n, const double* A, const double* B, double* C) {
	for (int i = 0; i < n; i++) {
		for (int j = 0; j < n; j++) {
			C[id(i, j)] = 0;
			for (int k = 0; k < n; k++) C[id(i, j)] += A[id(i, k)] * B[id(k, j)];
		}
	}
}

int main() {
	double A[n * n], B[n * n], C[n * n], D[n * n];
	for (int i = 0; i < n; i++)
		for (int j = 0; j < n; j++) {
			A[id(i, j)] = rng() / (double)rng.max();
			B[id(i, j)] = rng() / (double)rng.max();
		}

	uint32_t simple_timer = 0;
	simple_timer -= clock();
	simple_matrix_multiply(n, A, B, D);
	simple_timer += clock();

	uint32_t timer = 0;
	timer -= clock();
	matrix_multiply(n, A, B, C);
	timer += clock();

	double delta = 0;
	for (int i = 0; i < n; i++)
		for (int j = 0; j < n; j++) {
			double t = C[id(i, j)] - D[id(i, j)];
			if (t < 0) {
				delta = std::max(delta, -t);
			} else {
				delta = std::max(delta, t);
			}
		}
	if (delta > eps) {
		printf("Wrong Answer (%.12lf)\n", delta);
		return 0;
	}

	printf("Accepted (time=%.12lf, %.2lf%%)\n", timer / (double)CLOCKS_PER_SEC, timer / (double)simple_timer);
	return 0;
}
#endif

#pragma GCC target("avx")
#pragma GCC target("popcnt")

#pragma GCC optimize("unroll-loops")
#pragma GCC optimize("inline-functions")
#pragma GCC optimize("no-stack-protector")

#include <stdint.h>
#include <string.h>
#define n 1024
#define idx(i, j) ((i)*n + (j))
void matrix_multiply(int _, const double* A, const double* _B, double* C) {
	double B[n * n];
	memcpy(B, _B, sizeof(B));
	for (uint32_t i = 0; i < n; i++) {
		double t;
		for (uint32_t j = i + 1; j < n; j++) {
			t = B[idx(i, j)];
			B[idx(i, j)] = B[idx(j, i)];
			B[idx(j, i)] = t;
		}
	}
	for (uint32_t i = 0; i < n; i++) {
		for (uint32_t j = 0; j < n; j++) {
			double s0 = 0, s1 = 0, s2 = 0, s3 = 0;
			uint32_t k = 0;
			const double* a = (double*)(size_t(A) | (i << 10));
			const double* b = (double*)(size_t(B) | (j << 10));
			while (k < n) {
				s0 += (*a) * (*b);
				s1 += (*(a + 1)) * (*(b + 1));
				s2 += (*(a + 2)) * (*(b + 2));
				s3 += (*(a + 3)) * (*(b + 3));
				k += 4;
				a += 4;
				b += 4;
			}
			C[(i << 10) | j] = s0 + s1 + s2 + s3;
		}
	}
}

CompilationN/AN/ACompile ErrorScore: N/A


Judge Duck Online | 评测鸭在线
Server Time: 2024-04-20 02:53:44 | Loaded in 1 ms | Server Status
个人娱乐项目,仅供学习交流使用