提交记录 13577


用户 题目 状态 得分 用时 内存 语言 代码长度
wys mmmd1k. 测测你的双精度矩阵乘法-1k Accepted 100 140.232 ms 8200 KB C++ 1.74 KB
提交时间 评测时间
2020-08-05 16:12:12 2020-08-05 16:12:15
// 32 KiB

#pragma GCC target("avx2,fma")

#include <string.h>
#include <x86intrin.h>

// dst += src * k
static inline void vector_mul_add(int n, double *dst, const double *src, double k) {
	__m256d K = _mm256_set1_pd(k);
	
	#define DST(i) (* (__m256d *) (dst + (i)))
	#define SRC(i) (* (__m256d *) (src + (i)))
	
	// assume 4096-aligned
	for (int i = 0; i < n; i += 32) {
		DST(i + 4 * 0) = _mm256_fmadd_pd(SRC(i + 4 * 0), K, DST(i + 4 * 0));
		DST(i + 4 * 1) = _mm256_fmadd_pd(SRC(i + 4 * 1), K, DST(i + 4 * 1));
		DST(i + 4 * 2) = _mm256_fmadd_pd(SRC(i + 4 * 2), K, DST(i + 4 * 2));
		DST(i + 4 * 3) = _mm256_fmadd_pd(SRC(i + 4 * 3), K, DST(i + 4 * 3));
		DST(i + 4 * 4) = _mm256_fmadd_pd(SRC(i + 4 * 4), K, DST(i + 4 * 4));
		DST(i + 4 * 5) = _mm256_fmadd_pd(SRC(i + 4 * 5), K, DST(i + 4 * 5));
		DST(i + 4 * 6) = _mm256_fmadd_pd(SRC(i + 4 * 6), K, DST(i + 4 * 6));
		DST(i + 4 * 7) = _mm256_fmadd_pd(SRC(i + 4 * 7), K, DST(i + 4 * 7));
	}
}

void matrix_multiply(int n, const double *A, const double *B, double *C) {
	const int i_step = 16;
	const int k_step = 16;
	const int j_step = 256;
	
	memset(C, 0, n * n * sizeof(double));
	
	for (int i_start = 0; i_start < n; i_start += i_step) {
		int i_end = i_start + i_step;
		
		for (int k_start = 0; k_start < n; k_start += k_step) {
			int k_end = k_start + k_step <= n ? k_start + k_step : n;
			
			for (int j_start = 0; j_start < n; j_start += j_step) {
				
				for (int i = i_start; i < i_end; i++) {
					const double *ai = A + i * n;
					double *ci = C + i * n;
					
					// bk[] footprint: 8 * 16 * 256 = 32 KiB
					for (int k = k_start; k < k_end; k++) {
						const double *bk = B + k * n;
						const double aik = ai[k];
						
						vector_mul_add(j_step, ci + j_start, bk + j_start, aik);
					}
				}
			}
		}
	}
}

CompilationN/AN/ACompile OKScore: N/A

Testcase #1140.232 ms8 MB + 8 KBAcceptedScore: 100


Judge Duck Online | 评测鸭在线
Server Time: 2026-03-24 02:45:45 | Loaded in 1 ms | Server Status
个人娱乐项目,仅供学习交流使用 | 捐赠