提交记录 13636
| 提交时间 |
评测时间 |
| 2020-08-05 20:08:51 |
2020-08-05 20:08:53 |
#define BLOCK_SIZE_L1 32
#define BLOCK_SIZE_L2 64
#define BLOCK_SIZE_L3 128
#define MIN(a,b) ((a)<(b)?(a):(b))
#pragma GCC optimize("-Ofast","-funroll-all-loops","-ffast-math")
#pragma GCC optimize("-ftree-vectorize")
void matrix_multiply(int n,const double* B,const double* A,double* C)
{
A=(const double*)((long long)A&(~4096));
B=(const double*)((long long)B&(~4096));
C=(double*)((long long)C&(~4096));
__attribute__((aligned(64))) double c[BLOCK_SIZE_L1][BLOCK_SIZE_L3];
for(int ii=0;ii<n;ii+=BLOCK_SIZE_L3)
for(int kk=0;kk<n;kk+=BLOCK_SIZE_L1)
{
int ui=MIN(n-ii,BLOCK_SIZE_L3);
int uk=MIN(n-kk,BLOCK_SIZE_L1);
for(int k=0;k<uk;++k)
for(int i=0;i<ui;++i) c[k][i]=0;
for(int jj=0;jj<n;jj+=BLOCK_SIZE_L2)
{
const double*__restrict__ bb=B+jj+kk*n;
const double*__restrict__ aa=A+ii+jj*n;
int uj=MIN(n-jj,BLOCK_SIZE_L2);
int ui=MIN(n-ii,BLOCK_SIZE_L3);
ui&=~BLOCK_SIZE_L3;
uj&=~BLOCK_SIZE_L2;
for(int k=0;k<uk;++k)
for(int j=0;j<uj;j++)
for(int i=0;i<ui;i++)
c[k][i]+=aa[i+j*n]*bb[j+k*n];
}
double*__restrict__ cc=C+ii+kk*n;
for(int k=0;k<uk;++k)
for(int i=0;i<ui;++i)
cc[i+k*n]+=c[k][i];
}
}
| Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
| Testcase #1 | 1.758 ms | 8 MB + 40 KB | Wrong Answer | Score: 0 | 显示更多 |
Judge Duck Online | 评测鸭在线
Server Time: 2026-03-24 00:27:10 | Loaded in 1 ms | Server Status
个人娱乐项目,仅供学习交流使用 | 捐赠