#pragma GCC target("sse2")
#pragma GCC optimize("Ofast,no-stack-protector,unroll-loops,fast-math")
#include<algorithm>
#include<cstdio>
#include<cmath>
#include<immintrin.h>
#define get(a,i)((double*)&a)[i]
typedef unsigned u32;
struct ano{
char a[1<<20],b[1<<21];
char*s,*t;
ano():s(a),t(b){
fread(a,1,sizeof a,stdin);
}
~ano(){fwrite(b,1,t-b,stdout);}
operator u32(){
u32 x=0;
while(*s<48)++s;
while(*s>32)
x=x*10+*s++-48;
return x;
}
void out(u32 x){
static char c[12];
char*i=c;
if(!x)*t++=48;
else{
while(x){
u32 y=x/10;
*i++=x-y*10+48,x=y;
}
while(i!=c)*t++=*--i;
}
*t++=32;
}
}buf;
typedef __m128d vec;
inline vec mul(vec a,vec b){
vec u=_mm_mul_pd(a,b);
vec v=_mm_mul_pd(a,_mm_loadr_pd((double*)&b));
return _mm_setr_pd(get(u,0)-get(u,1),get(v,0)+get(v,1));
}
using namespace std;
const u32 N=1<<17;
const double pi=acos(-1.);
inline vec conj(vec a){
return _mm_setr_pd(get(a,0),-get(a,1));
}
vec w[N/2],a[N],b[N],c[N];
void fft(vec*a,u32 n){
for(u32 i=0,j=0;i<n;++i){
if(i<j)
swap(a[i],a[j]);
for(u32 k=n>>1;;k>>=1)
if((j^=k)>=k)break;
}
w[0]=_mm_set_sd(1);
for(u32 i=1;i<n;i<<=1){
vec s=_mm_setr_pd(cos(pi/i),sin(pi/i));
for(int j=i-2;j>0;j-=2)
w[j]=w[j>>1];
for(u32 j=1;j<i;j+=2)
w[j]=mul(s,w[j-1]);
for(u32 j=0;j<n;j+=i<<1){
vec*b=a+j,*c=b+i;
for(u32 k=0;k<i;++k){
vec v=mul(w[k],c[k]);
c[k]=_mm_sub_pd(b[k],v);
b[k]=_mm_add_pd(b[k],v);
}
}
}
}
void read(vec*a,u32 n){
for(u32 i=0;i<n/2;++i){
u32 x=buf,y=buf;
a[i]=_mm_setr_pd(x,y);
}
if(n&1)
a[n/2]=_mm_set_sd(buf);
}
signed main(){
static u32 n=buf+1,m=buf+1,l=1<<__lg(n+m-1);
if(n<=1000000/m){
static u32 a[N],c[N];
for(u32 i=0;i<n;++i)
a[i]=buf;
for(u32 i=0;i<m;++i){
u32 x=buf;
for(u32 j=0;j<n;++j)
c[i+j]+=x*a[j];
}
for(u32 i=0;i<n+m-1;++i)
buf.out(c[i]);
exit(0);
}
read(a,n);
read(b,m);
fft(a,l);
fft(b,l);
for(u32 i=0;i<l;++i){
u32 j=l-i&l-1;
c[j]=mul(_mm_setr_pd(0,.25),_mm_sub_pd(mul(conj(mul(a[j],b[j])),_mm_set_sd(4)),mul(mul(_mm_sub_pd(conj(a[j]),a[i]),_mm_sub_pd(conj(b[j]),b[i])),_mm_add_pd(i<l/2?w[i]:mul(w[i-l/2],_mm_set_sd(-1)),_mm_set_sd(1)))));
}
fft(c,l);
for(u32 i=0;i<n+m-1;++i)
buf.out(get(c[i>>1],~i&1)/l+.5);
}
Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
Subtask #1 Testcase #1 | 12.41 us | 44 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #2 | 14.907 ms | 10 MB + 260 KB | Accepted | Score: 100 | 显示更多 |
Subtask #1 Testcase #3 | 1.177 ms | 1 MB + 164 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #4 | 1.143 ms | 1 MB + 532 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #5 | 12.13 us | 44 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #6 | 11.43 us | 44 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #7 | 11.73 us | 44 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #8 | 14.217 ms | 9 MB + 680 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #9 | 14.185 ms | 9 MB + 680 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #10 | 13.505 ms | 9 MB + 76 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #11 | 15.124 ms | 10 MB + 420 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #12 | 12.111 ms | 8 MB + 176 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #13 | 11.59 us | 44 KB | Accepted | Score: 0 | 显示更多 |