#include<cstdio>
#include<cstring>
#include<cmath>
#include<immintrin.h>
#define re register
#define ui unsigned int
#define ull unsigned long long
#define com __m128d
#define geta(a)((double*)&a)[0]
#define getb(a)((double*)&a)[1]
#pragma GCC optimize("Ofast,no-stack-protector,unroll-loops,fast-math")
const double pi=3.14159265358979323846264338327950;
const int maxn=(1<<20)+1;
com w[maxn],T1[maxn],xx1[maxn],xx2[maxn];
inline com mul(const com&A,const com&B)
{
re com x=_mm_mul_pd(A,B);
re com y=_mm_mul_pd(A,_mm_loadr_pd((double*)&B));
return _mm_setr_pd(geta(x)-getb(x),geta(y)+getb(y));
}ui x1[maxn],x2[maxn];
inline com conj(const com&A){return _mm_setr_pd(geta(A),-getb(A));}
void fft(com*a,re int len,re bool dft)
{
re ui i1=0;
re com x1;
for(re ui i=0,j=0;i<len;i++)
{
if(i<j)x1=a[i],a[i]=a[j],a[j]=x1;
for(ui k=len>>1;;k>>=1)if((j^=k)>=k)break;
}w[0]=_mm_setr_pd(1,0);
if(len>=2)
{
for(re ui i=0;i<len;i+=2)x1=a[i+1],a[i+1]=_mm_sub_pd(a[i],x1),a[i]+=x1;
}
if(len>=4){for(re ui i=0;i<len;i+=4)
{
x1=a[i+2],a[i+2]=_mm_sub_pd(a[i],x1),a[i]+=x1;
x1=dft?_mm_setr_pd(-getb(a[i+3]),geta(a[i+3])):_mm_setr_pd(getb(a[i+3]),-geta(a[i+3]));
a[i+3]=_mm_sub_pd(a[i+1],x1),a[i+1]+=x1;
}
w[1]=_mm_setr_pd(0,dft?1:-1);w[2]=_mm_setr_pd(-1,0);
}else w[1]=_mm_setr_pd(-1,0);
for(re ui i=8;i<=len;i<<=1)
{
i1=i>>1;
com s=_mm_setr_pd(cos(pi/i1),dft?sin(pi/i1):-sin(pi/i1));
for(int j=i-2;j>0;j-=2)
w[j]=w[j>>1];
for(ui j=1;j<i;j+=2)
w[j]=mul(s,w[j-1]);
for(re ui j=0;j<len;j+=i)
{
for(re com*aa=a+j,*bb=aa+i1,*ww=w,*aa1=bb;aa!=aa1;aa++,bb++,ww++)
{
x1=_mm_mul_pd(*ww,*bb);
re com y=_mm_mul_pd(*ww,_mm_loadr_pd((double*)&(*bb)));
x1=_mm_setr_pd(geta(x1)-getb(x1),geta(y)+getb(y));
*bb=(*aa)-x1;
*aa+=x1;
}
}
}
}
void DFT(ui *a,com *b,int n,int len) {
for(re int i=0;i<n+1>>1;++i)b[i]=_mm_setr_pd(a[i<<1],a[i<<1|1]);
fft(b,len,1);
}
void DFTMul(com *R1,com *S1,int len) {
for(re int i=0;i<len;++i) {
re int j=len-1&len-i;
com tmp=(i&len>>1)?_mm_setr_pd(1,0)-w[i^len>>1]:w[i]+_mm_setr_pd(1,0);
T1[i]=mul(R1[i],S1[i])-(mul(mul(R1[i]-conj(R1[j]),S1[i]-conj(S1[j])),tmp)*0.25);
}
}
void IDFT(ui *__ans,int r,int len) {
fft(T1,len,0);for(re int i=0;i<len;i++)T1[i]/=len;
for(re int i=0;i<r;++i)
{
__ans[i]=((i&1)?(ull)(getb(T1[i>>1])+0.5):(ull)(geta(T1[i>>1])+0.5));
}
}
void poly_multiply(unsigned *a, int n, unsigned *b, int m, unsigned *c)
{
re int len=getlen((n>m?n:m)+1),x;
DFT(a,xx1,n+1,len);DFT(b,xx2,m+1,len);
DFTMul(xx1,xx2,len);
IDFT(c,n+m+1,len);
}
| Compilation | N/A | N/A | Compile Error | Score: N/A | 显示更多 |