#pragma GCC optimize("Ofast,no-stack-protector,unroll-loops,fast-math")
#include<bits/stdc++.h>
using namespace std;
const int mod=998244353,gen=3;
namespace math{
// fast power , (x and ans) must in [-mod,mod], y must in [-mod+1,mod-1]
typedef int i32;
typedef unsigned int u32;
typedef long long i64;
typedef unsigned long long u64;
template<typename T=int>
inline T fsp(i64 x,int y,i64 ans=1){
for(y<0?y+=mod-1:0;y;y>>=1,x=x*x%mod)
y&1?ans=ans*x%mod:0;
return ans;
}
namespace fast_number_theory_transform{
const int maxbit=22;
const u32 modm2=mod+mod;
template<class T>
inline void butterfly(T* p,int bit) {
for(u32 i=0,j=0;i<(1u<<bit);i++){
// j=rev[i]=(rev[i>>1]>>1)^((i&1)<<(bit-1));
if(i>j)swap(p[i],p[j]);
for(u32 l=1u<<(bit-1);(j^=l)<l;l>>=1);
}
}
u32 *_p0[maxbit+1],*_p1[maxbit+1];
inline void prep(int bit){
static int k=0;
for(;k<bit;k++){
u32 *p,*q,nl=1<<k;
u64 g=fsp(3,mod>>(k+1));
p=_p0[k]=new u32[nl<<1];q=p+nl;
for(int i=p[0]=1;i<nl;i++)p[i]=p[i-1]*g%mod;
butterfly(p,k);
for(int i=0;i<nl;i++)q[i]=(u64(p[i])<<32)/mod;
g=fsp(g,-1);
p=_p1[k]=new u32[nl<<1];q=p+nl;
for(int i=p[0]=1;i<nl;i++)p[i]=p[i-1]*g%mod;
butterfly(p,k);
for(int i=0;i<nl;i++)q[i]=(u64(p[i])<<32)/mod;
}
}inline u32 ntt_mul(u32 x,u64 p,u64 q){return x*p-(q*x>>32)*mod;}
void ntt(u32* a,int bit,bool f=0){
prep(bit);
for(int k=bit;k-->0;){
u32 *_p=_p0[bit-k-1],*_q=_p+(1<<(bit-k-1));
u32 *_a0=a,*_a1=a+(1<<k);
for(int i=0;i<1<<(bit-k-1);i++,_a0+=1<<(k+1),_a1+=1<<(k+1))
for(int j=0;j<(1<<k);++j){
u32 x=_a0[j]-(_a0[j]>=modm2)*modm2,y=ntt_mul(_a1[j],_p[i],_q[i]);
_a0[j]=x+y;_a1[j]=x+modm2-y;
}
}for(int i=0;i<(1<<bit);i++){
a[i]-=(a[i]>=modm2)*modm2;
a[i]-=(a[i]>=mod)*mod;
}if(f)butterfly(a,bit);
}
void intt(u32* a,int bit,bool f=0){
prep(bit);
if(f)butterfly(a,bit);
for(int k=0;k<bit;k++){
u32 *_p=_p1[bit-k-1],*_q=_p+(1<<(bit-k-1));
u32 *_a0=a,*_a1=a+(1<<k);
for(int i=0;i<1<<(bit-k-1);i++,_a0+=1<<(k+1),_a1+=1<<(k+1))
for(int j=0;j<(1<<k);++j){
u32 x=_a0[j],y=_a1[j];
_a0[j]=x+y-(x+y>=modm2)*modm2;
_a1[j]=ntt_mul(x+modm2-y,_p[i],_q[i]);
}
}
u64 iv=mod;iv<<=bit;iv=(iv-mod+1)>>bit;
for(int i=0;i<(1<<bit);i++)
a[i]=a[i]*iv%mod;
}
}using fast_number_theory_transform::ntt;
using fast_number_theory_transform::intt;
}
using math::fsp;
using math::ntt;
using math::intt;
unsigned f[1<<21],g[1<<21];
void poly_multiply(unsigned *a, int n, unsigned *b, int m, unsigned *c){
int bit=__lg(n+m)+1;
memcpy(f,a,4*(n+1));
memcpy(g,b,4*(m+1));
ntt(f,bit);ntt(g,bit);
for(int i=0;i<(1<<bit);i++)f[i]=1ll*f[i]*g[i]%mod;
intt(f,bit);
memcpy(c,f,4*(n+m+1));
}
Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
Testcase #1 | 138.731 ms | 55 MB + 680 KB | Accepted | Score: 100 | 显示更多 |