提交记录 13473


用户 题目 状态 得分 用时 内存 语言 代码长度
memset0 mmmd1k. 测测你的双精度矩阵乘法-1k Accepted 100 575.528 ms 16392 KB C 2.82 KB
提交时间 评测时间
2020-08-05 11:58:12 2020-08-05 11:58:15
#ifdef memset0
#	include <bits/stdc++.h>

const int n = 1024;
const double eps = 3 * n * n * DBL_EPSILON;
void matrix_multiply(int, const double*, const double*, double*);
void simple_matrix_multiply(int, const double*, const double*, double*);

std::mt19937 rng(20040602 /*❤️*/ ^ std::chrono::steady_clock::now().time_since_epoch().count());
template<class T> inline T rand(T l, T r) { return std::uniform_int_distribution<T>(l, r)(rng); }

inline int id(int i, int j) { return i * n + j; }

void simple_matrix_multiply(int n, const double* A, const double* B, double* C) {
	for (int i = 0; i < n; i++) {
		for (int j = 0; j < n; j++) {
			C[id(i, j)] = 0;
			for (int k = 0; k < n; k++) C[id(i, j)] += A[id(i, k)] * B[id(k, j)];
		}
	}
}

int main() {
	double A[n * n], B[n * n], C[n * n], D[n * n];
	for (int i = 0; i < n; i++)
		for (int j = 0; j < n; j++) {
			A[id(i, j)] = rng() / (double)rng.max();
			B[id(i, j)] = rng() / (double)rng.max();
		}

	uint32_t simple_timer = 0;
	simple_timer -= clock();
	simple_matrix_multiply(n, A, B, D);
	simple_timer += clock();

	uint32_t timer = 0;
	timer -= clock();
	matrix_multiply(n, A, B, C);
	timer += clock();

	double delta = 0;
	for (int i = 0; i < n; i++)
		for (int j = 0; j < n; j++) {
			double t = C[id(i, j)] - D[id(i, j)];
			if (t < 0) {
				delta = std::max(delta, -t);
			} else {
				delta = std::max(delta, t);
			}
		}
	if (delta > eps) {
		printf("Wrong Answer (%.12lf)\n", delta);
		return 0;
	}

	printf("Accepted (time=%.12lf, %.2lf%%)\n", timer / (double)CLOCKS_PER_SEC, timer / (double)simple_timer);
	return 0;
}
#endif

#pragma GCC target("avx")
#pragma GCC target("popcnt")

#pragma GCC optimize("unroll-loops")
#pragma GCC optimize("inline-functions")
#pragma GCC optimize("no-stack-protector")

#include <stdint.h>
#include <string.h>
#define n 1024
#define idx(i, j) ((i)*n + (j))
void matrix_multiply(int _, const double* A, const double* _B, double* C) {
	double B[n * n];
	memcpy(B, _B, sizeof(B));
	for (uint32_t i = 0; i < n; i++) {
		double t;
		double* a = B + ((i + 1) << 10) + i;
		double* b = B + (i << 10) + (i + 1);
		for (uint32_t j = i + 1; j < n; j++, a += (1 << 10), b++) {
			t = *b;
			*b = *a;
			*a = t;
		}
	}
	for (uint32_t i = 0; i < n; i++) {
		for (uint32_t j = 0; j < n; j++) {
			uint32_t k = 0;
			const double* a = A + (i << 10);
			const double* b = B + (j << 10);
			register double s[8];
			s[0] = s[1] = s[2] = s[3] = s[4] = s[5] = s[6] = s[7] = 0;
			while (k < n) {
				s[0] += (*a) * (*b);
				s[1] += (*(a + 1)) * (*(b + 1));
				s[2] += (*(a + 2)) * (*(b + 2));
				s[3] += (*(a + 3)) * (*(b + 3));
				s[4] += (*(a + 4)) * (*(b + 4));
				s[5] += (*(a + 5)) * (*(b + 5));
				s[6] += (*(a + 6)) * (*(b + 6));
				s[7] += (*(a + 7)) * (*(b + 7));
				k += 8;
				a += 8;
				b += 8;
			}
			C[(i << 10) | j] = s[0] + s[1] + s[2] + s[3] + s[4] + s[5] + s[6] + s[7];
		}
	}
}

CompilationN/AN/ACompile OKScore: N/A

Testcase #1575.528 ms16 MB + 8 KBAcceptedScore: 100


Judge Duck Online | 评测鸭在线
Server Time: 2024-12-05 10:38:07 | Loaded in 0 ms | Server Status
个人娱乐项目,仅供学习交流使用 | 捐赠