#include<bits/stdc++.h>
#define LL long long
#define LLL __int128
#define uint unsigned
#define ldb long double
#define uLL unsigned long long
using namespace std;
typedef vector<int> poly;
typedef vector<int> Vec;
typedef vector<Vec> Mat;
typedef tuple<poly,poly> Vec2;
typedef tuple<poly,poly,poly,poly> Mat2;
mt19937 rng(chrono::system_clock::now().time_since_epoch().count());
const int Mod=998244353,G=3,img=86583718;
vector<uLL>Grt,iGrt;
poly frc({1,1}),inv({0,1}),ivf({1,1});vector<poly>Tr,Ts;
inline int qpow(int x,int y,int z=1){
for(;y;(y>>=1)&&(x=(LL)x*x%Mod))if(y&1)z=(LL)z*x%Mod;return z;
}
inline uLL reduce(const uLL&x){
constexpr uLL A=-(uLL)Mod/Mod+1;
constexpr uLL Q=(((__uint128_t)(-(uLL)Mod%Mod)<<64)+Mod-1)/Mod;
return x*A+(uLL)((__uint128_t)x*Q>>64)+1;
}
inline uLL mul(const uLL&x,const uLL&y){
return x*y*(__uint128_t)Mod>>64;
}
inline uLL add(uLL x,const uLL&y){
return (LL)((x+=y)-Mod)>=0?x-Mod:x;
}
inline uLL sub(uLL x,const uLL&y){
return (LL)(x-=y)<0?x+Mod:x;
}
inline void extend(const int&n){
if(Grt.empty())Grt.emplace_back(reduce(1)),iGrt.emplace_back(reduce(1));
if(Grt.size()<n){
int L=Grt.size();
Grt.resize(n),iGrt.resize(n);
for(;L<n;L*=2){
const int w=qpow(G,Mod/(L*4)),iw=qpow(w,Mod-2);
for(int i=0;i<L;++i)Grt[i+L]=reduce(mul(Grt[i],w)),iGrt[i+L]=reduce(mul(iGrt[i],iw));
}
}
}
template<int A,int B,int C=0,class fun>inline void Butterrep(int i,int j,int k,fun F){
if(A!=C)F(i,j+C/B,k+C*2-C%B),Butterrep<A,B,C+(C<A)>(i,j,k,F);
}
template<class fun>inline void ButterX(int i,int n,fun F){
for(int j=0;2*i*j<n;++j)for(int k=0;k<i;k+=32)Butterrep<32,32>(i,j,k+2*i*j,F);
}
template<int i,class fun>inline void ButterY(int n,fun F){
if(n>=32)for(int j=0;2*i*j<n;j+=32/i)Butterrep<32,i>(i,j,2*i*j,F);
else if(i<n)for(int j=0;2*i*j<n;++j)for(int k=0;k<i;++k)F(i,j,k+2*i*j);
}
inline void DFT(poly&P){
const int n=P.size();
extend(n);
const auto F=[&](int x,int y,int z){
const uLL a=P[z],b=mul(P[z+x],Grt[y]);
P[z]=add(a,b),P[z+x]=sub(a,b);
};
for(int i=n>>1;i>16;i>>=1)ButterX(i,n,F);
ButterY<16>(n,F),ButterY<8>(n,F),ButterY<4>(n,F),ButterY<2>(n,F),ButterY<1>(n,F);
}
inline void IDFT(poly&P){
const int n=P.size();
extend(n);
const auto F=[&](int x,int y,int z){
const uLL a=P[z],b=P[z+x];
P[z]=add(a,b),P[z+x]=mul(a-b+Mod,iGrt[y]);
};
ButterY<1>(n,F),ButterY<2>(n,F),ButterY<4>(n,F),ButterY<8>(n,F),ButterY<16>(n,F);
for(int i=32;i<n;i<<=1)ButterX(i,n,F);
}
inline poly Mul(poly P,poly Q){
if(P.empty()||Q.empty())return poly();
const int pn=P.size(),qn=Q.size(),rn=pn+qn-1;
const int m=2<<__lg(max(1,rn-1));
P.resize(m),Q.resize(m);
DFT(P),DFT(Q);
const uLL mi=reduce(qpow(m,Mod-2));
for(int i=m;i--;)P[i]=mul((uLL)P[i]*Q[i]%Mod,mi);
IDFT(P);
return P.resize(rn),P;
}
void poly_multiply(unsigned *a, int n, unsigned *b, int m, unsigned *c)
{
poly P=Mul(poly(a,a+n+1),poly(b,b+m+1));
copy(P.begin(),P.end(),c);
}
Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
Testcase #1 | 106.215 ms | 63 MB + 304 KB | Accepted | Score: 100 | 显示更多 |