#pragma GCC target("sse2")
#pragma GCC optimize("Ofast,no-stack-protector,unroll-loops,fast-math")
#include<algorithm>
#include<cstdio>
#include<cmath>
#include<immintrin.h>
#define get(a,i)((double*)&a)[i]
typedef unsigned u32;
typedef __m128d vec;
inline vec mul(vec a,vec b){
vec u=_mm_mul_pd(a,b);
vec v=_mm_mul_pd(a,_mm_loadr_pd((double*)&b));
return _mm_setr_pd(get(u,0)-get(u,1),get(v,0)+get(v,1));
}
using namespace std;
const u32 N=1<<20;
const double pi=acos(-1.);
inline vec conj(vec a){
return _mm_setr_pd(get(a,0),-get(a,1));
}
vec w[N],a[N],b[N],c[N];
void fft(vec*a,u32 n){
for(u32 i=0,j=0;i<n;++i){
if(i<j)
swap(a[i],a[j]);
for(u32 k=n>>1;;k>>=1)
if((j^=k)>=k)break;
}
for(u32 i=1;i<n;i<<=1){
vec*w=::w+i;
for(u32 j=0;j<n;j+=i<<1){
vec*b=a+j,*c=b+i;
for(u32 k=0;k<i;++k){
vec v=mul(w[k],c[k]);
c[k]=_mm_sub_pd(b[k],v);
b[k]=_mm_add_pd(b[k],v);
}
}
}
}
void read(u32*f,u32 n,vec*a){
for(u32 i=0;i<n/2;++i)
a[i]=_mm_setr_pd(f[i*2],f[i*2+1]);
if(n&1)
a[n/2]=_mm_set_sd(f[n-1]);
}
void poly_multiply(u32*a,int n,u32*b,int m,u32*c){
++n,++m;
if(min(n,m)<=20){
fill_n(c,n+m-1,0);
for(u32 i=0;i<m;++i)
for(u32 j=0;j<n;++j)
c[i+j]+=b[i]*a[j];
}else{
u32 l=1<<__lg(n+m-2);
w[1]=_mm_set_sd(1);
for(u32 i=2;i<l;i<<=1){
vec*w1=w+i,*w0=w+i/2;
vec s=_mm_setr_pd(cos(pi/i),sin(pi/i));
for(u32 j=0;j<i;j+=2){
w1[j]=w0[j>>1];
w1[j+1]=mul(s,w1[j]);
}
}
read(a,n,::a);
read(b,m,::b);
vec*a=::a,*b=::b;
fft(a,l);
fft(b,l);
vec*w=::w+l/2;
for(u32 i=0;i<l;++i){
u32 j=l-i&l-1;
::c[j]=mul(_mm_setr_pd(0,.25),_mm_sub_pd(mul(conj(mul(a[j],b[j])),_mm_set_sd(4)),mul(mul(_mm_sub_pd(conj(a[j]),a[i]),_mm_sub_pd(conj(b[j]),b[i])),_mm_add_pd(i<l/2?w[i]:mul(w[i-l/2],_mm_set_sd(-1)),_mm_set_sd(1)))));
}
fft(::c,l);
for(u32 i=0;i<n+m-1;++i)
c[i]=get(::c[i>>1],~i&1)/l+.5;
}
}
Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
Testcase #1 | 185.076 ms | 71 MB + 656 KB | Accepted | Score: 100 | 显示更多 |