提交记录 20779


用户 题目 状态 得分 用时 内存 语言 代码长度
Min_25 1002i. 【模板题】多项式乘法 Compile Error 0 0 ns 0 KB C++11 19.21 KB
提交时间 评测时间
2024-01-05 18:59:03 2024-01-05 18:59:05
#pragma GCC target("avx512f")
#include <immintrin.h>
#include <array>
#include <cstring>
#include <cstdint>
#include <iostream>
using i32=int32_t;
using i64=int64_t;
using u32=uint32_t;
using u64=uint64_t;
using idt=std::size_t;
using I128=__m128i;
using I256=__m256i;
using I512=__m512i;
struct LMont_32{
    u32 M,M2,niv,R,R2;
    LMont_32()=default;
    LMont_32(u32 m):M{m},M2{m*2},niv{2+m},R{-m%m},R2((-u64(m))%m){
        for(int i=0;i<4;++i){niv*=2+m*niv;}
    }
    template<bool cond=true>u32 shr(u32 x)const{
        if constexpr(cond){
            return std::min(x,x-M);
        }
        return x;
    }
    u32 shr2(u32 x)const{
        return std::min(x,x-M2);
    }
    u32 dil(u32 x)const{
        return std::min(x,x+M);
    }
    u32 dil2(u32 x)const{
        return std::min(x,x+M2);
    }
    u32 reduce(u64 x)const{
        return (x+u64(u32(x)*niv)*M)>>32;
    }
    template<bool shrk=false>u32 mul(u32 x,u32 y)const{
        return shr<shrk>(reduce(u64(x)*y));
    }
    template<bool shrk=false>u32 qpw(u32 a,u32 b,u32 r)const{
        for(;b;b>>=1,a=mul(a,a)){
            b&1?r=mul(r,a):r;
        }
        return shr<shrk>(r);
    }
    template<bool shrk=false>u32 qpw(u32 a,u32 b)const{
        return qpw<shrk>(a,b,R);
    }
    template<bool shrk=false>u32 inv(u32 a)const{
        return qpw<shrk>(a,M-2);
    }
    template<bool shrk=false>u32 in(u32 x)const{
        return mul<shrk>(x,R2);
    }
    u32 add(u32 x,u32 y)const{
        return shr2(x+y);
    }
    u32 sub(u32 x,u32 y)const{
        return dil2(x-y);
    }
    u32 Ladd(u32 x,u32 y)const{
        return x+y;
    }
    u32 Lsub(u32 x,u32 y)const{
        return x+M2-y;
    }
    u32 neg(u32 x)const{
        return M2-x;
    }
};
struct LMont_32_I512{
    I512 M,M2,niv;
    LMont_32_I512()=default;
    LMont_32_I512(LMont_32 mt):M{_mm512_set1_epi32(mt.M)},M2{_mm512_set1_epi32(mt.M2)},niv{_mm512_set1_epi32(mt.niv)}{}
    template<bool cond=true>I512 shr(I512 x)const{
        if constexpr(cond){
            return _mm512_min_epu32(x,_mm512_sub_epi32(x,M));
        }
        return x;
    }
    I512 shr2(I512 x)const{
        return _mm512_min_epu32(x,_mm512_sub_epi32(x,M2));
    }
    I512 dil(I512 x)const{
        return _mm512_min_epu32(x,_mm512_add_epi32(x,M));
    }
    I512 dil2(I512 x)const{
        return _mm512_min_epu32(x,_mm512_add_epi32(x,M2));
    }
    I512 _mul_hi(I512 x,I512 y)const{
        I512 a=_mm512_mul_epu32(x,y);
        return _mm512_add_epi64(a,_mm512_mul_epu32(_mm512_mul_epu32(a,niv),M));
    }
    template<bool shrk=false>I512 mul(I512 x,I512 y)const{
        I512 a=_mm512_mul_epu32(x,y),b=_mm512_mul_epu32(_mm512_srli_epi64(x,32),_mm512_srli_epi64(y,32));
        I512 c=_mm512_mul_epu32(a,niv),d=_mm512_mul_epu32(b,niv);
        c=_mm512_mul_epu32(c,M),d=_mm512_mul_epu32(d,M);
        return shr<shrk>(_mm512_mask_blend_epi32(0xaaaa,_mm512_srli_epi64(_mm512_add_epi64(a,c),32),_mm512_add_epi64(b,d)));
    }
    template<bool shrk=false>I512 mul_sm(I512 x,I512 y)const{
        I512 a=_mm512_mul_epu32(x,y),b=_mm512_mul_epu32(_mm512_srli_epi64(x,32),y);
        I512 c=_mm512_mul_epu32(a,niv),d=_mm512_mul_epu32(b,niv);
        c=_mm512_mul_epu32(c,M),d=_mm512_mul_epu32(d,M);
        return shr<shrk>(_mm512_mask_blend_epi32(0xaaaa,_mm512_srli_epi64(_mm512_add_epi64(a,c),32),_mm512_add_epi64(b,d)));
    }
    template<bool shrk=false>I512 mul_hi(I512 x,I512 y)const{
        return shr<shrk>(_mm512_srli_epi64(_mul_hi(x,y),32));
    }
    I512 add(I512 x,I512 y)const{
        return shr2(_mm512_add_epi32(x,y));
    }
    I512 sub(I512 x,I512 y)const{
        return dil2(_mm512_sub_epi32(x,y));
    }
    static I512 Ladd(I512 x,I512 y){
        return _mm512_add_epi32(x,y);
    }
    I512 Lsub(I512 x,I512 y)const{
        return _mm512_add_epi32(_mm512_sub_epi32(x,y),M2);
    }
    I512 neg(I512 x)const{
        return _mm512_sub_epi32(M2,x);
    }
    I512 neg_s(I512 x)const{
        return _mm512_sub_epi32(M,x);
    }
    template<int z>I512 neg_m(I512 x)const{
        return _mm512_mask_sub_epi32(x,z,M2,x);
    }
};
template<class T>concept trivialT=std::is_trivial_v<T>;
inline u32*alc(idt n){
    return new(std::align_val_t(64))u32[n];
}
inline void fre(u32*p){
    ::operator delete[](p,std::align_val_t(64));
}
template<trivialT T>inline T*cpy(T*f,const T*g,idt n){
    return (T*)memcpy(f,g,n*sizeof(T));
}
template<trivialT T>constexpr inline T*clr(T*f,idt n){
    return (T*)memset(f,0,n*sizeof(T));
}
struct conv_ntt_mod{
    static constexpr int mxlg=26;
    LMont_32 mt;
    u32 iv2;
    LMont_32_I512 ms;
    I512 img16,fx;
    alignas(64) std::array<u64,8> rt3[mxlg-2],bwb,st[mxlg>>1];
    alignas(64) static constexpr int
    id0x[]={0,2,0,4,0,2,0,4,0,2,0,4,0,2,0,4},
    id0i[]={8,10,8,12,8,10,8,12,8,10,8,12,8,10,8,12},
    id1x[]={7,3,7,3,7,3,7,3,7,11,7,11,7,11,7,11},
    id2x[]={8,0,9,1,10,2,11,3,12,4,13,5,14,6,15,7},
    id2i[]={0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15},
    id3x[]={8,0,10,2,12,4,14,6,9,1,11,3,13,5,15,7},
    id4x[]={4,0,6,2,5,1,7,3,12,8,14,10,13,9,15,11};
    conv_ntt_mod(u32 M,int k,u32 _g):mt{M},iv2{mt.in((M+1)>>1)},ms{mt}{
        u32 rt1[mxlg-1],rt1i[mxlg-1];
        rt1[k-2]=_g,rt1i[k-2]=mt.inv(_g);
        for(int i=k-2;i>0;--i){
            rt1[i-1]=mt.mul(rt1[i],rt1[i]);
            rt1i[i-1]=mt.mul(rt1i[i],rt1i[i]);
        }
        const u32 IE=mt.R,img=mt.shr(rt1[0]),nR=mt.M-IE;
        img16=_mm512_set1_epi32(img);
        u32 pr=IE,pri=IE;
        bwb={IE,IE,nR,img,IE,IE,nR};
        for(int i=0;i<k-2;++i){
            u32 r=mt.mul<true>(pr,rt1[i+1]),ri=mt.mul<true>(pri,rt1i[i+1]);
            u32 r2=mt.mul<true>(r,r),r2i=mt.mul<true>(ri,ri);
            u32 r3=mt.mul<true>(r,r2),r3i=mt.mul<true>(ri,r2i);
            rt3[i]={r,r2,r3,r,ri,r2i,r3i};
            pr=mt.mul(pr,rt1i[i+1]),pri=mt.mul(pri,rt1[i+1]);
        }
    }
    template<bool first>static void __butterfly4(I512*p0,I512*p1,I512*p2,I512*p3,I512 img,I512 r1,I512 r2,I512 nr3,const LMont_32_I512&ms){
        if constexpr(first){
            auto f1=ms.shr2(_mm512_load_si512(p1));
            auto f3=ms.shr2(_mm512_load_si512(p3));
            auto g1=ms.add(f1,f3);
            auto g3=ms.mul_sm(ms.Lsub(f1,f3),img);
            auto f0=ms.shr2(_mm512_load_si512(p0));
            auto f2=ms.shr2(_mm512_load_si512(p2));
            auto g0=ms.add(f0,f2);
            auto g2=ms.sub(f0,f2);
            _mm512_store_si512(p0,ms.Ladd(g0,g1));
            _mm512_store_si512(p1,ms.Lsub(g0,g1));
            _mm512_store_si512(p2,ms.Ladd(g2,g3));
            _mm512_store_si512(p3,ms.Lsub(g2,g3));
            return;
        }
        auto f1=ms.mul_sm(_mm512_load_si512(p1),r1);
        auto nf3=ms.mul_sm(_mm512_load_si512(p3),nr3);
        auto g1=ms.sub(f1,nf3);
        auto f0=ms.shr2(_mm512_load_si512(p0));
        auto f2=ms.mul_sm(_mm512_load_si512(p2),r2);
        auto g3=ms.mul_sm(ms.Ladd(f1,nf3),img);
        auto g0=ms.add(f0,f2);
        auto g2=ms.sub(f0,f2);
        _mm512_store_si512(p0,ms.Ladd(g0,g1));
        _mm512_store_si512(p1,ms.Lsub(g0,g1));
        _mm512_store_si512(p2,ms.Ladd(g2,g3));
        _mm512_store_si512(p3,ms.Lsub(g2,g3));
    }
    template<bool first,bool shrk=false>static void __ylfrettub4(I512*p0,I512*p1,I512*p2,I512*p3,I512 img,I512 r1,I512 r2,I512 nr3,const LMont_32_I512&ms){
        if constexpr(first){
            auto f2=_mm512_load_si512(p2);
            auto f3=_mm512_load_si512(p3);
            auto g3=ms.mul_sm(ms.Lsub(f3,f2),img);
            auto g2=ms.add(f2,f3);
            auto f0=_mm512_load_si512(p0);
            auto f1=_mm512_load_si512(p1);
            auto g0=ms.add(f0,f1);
            auto g1=ms.sub(f0,f1);
            _mm512_store_si512(p0,ms.shr<shrk>(ms.add(g0,g2)));
            _mm512_store_si512(p1,ms.shr<shrk>(ms.add(g1,g3)));
            _mm512_store_si512(p2,ms.shr<shrk>(ms.sub(g0,g2)));
            _mm512_store_si512(p3,ms.shr<shrk>(ms.sub(g1,g3)));
            return;
        }
        auto f0=_mm512_load_si512(p0);
        auto f1=_mm512_load_si512(p1);
        auto nf2=ms.neg(_mm512_load_si512(p2));
        auto f3=_mm512_load_si512(p3);
        auto g0=ms.add(f0,f1);
        auto ng2=ms.sub(nf2,f3);
        auto g3=ms.mul_sm(ms.Ladd(nf2,f3),img);
        _mm512_store_si512(p2,ms.mul_sm<shrk>(ms.Ladd(g0,ng2),r2));
        _mm512_store_si512(p0,ms.shr<shrk>(ms.sub(g0,ng2)));
        auto g1=ms.sub(f0,f1);
        _mm512_store_si512(p1,ms.mul_sm<shrk>(ms.Ladd(g1,g3),r1));
        _mm512_store_si512(p3,ms.mul_sm<shrk>(ms.Lsub(g3,g1),nr3));
    }
    //a = a*b*fx mod x^16-w
    static void __conv16(I512*a,I512*b,I512 w,I512 fx,const LMont_32_I512&ms){
        auto aa=ms.shr(ms.shr2(_mm512_load_si512(a))),bb=ms.mul_sm<true>(_mm512_load_si512(b),fx);
        auto ix=_mm512_set1_epi64(i64(8)<<32),al1=_mm512_set1_epi32(1);
        auto a1=_mm512_permutexvar_epi32(_mm512_load_si512(id2x),aa),a0=_mm512_shuffle_epi32(a1,_MM_PERM_CDAB);
        auto a1w=ms.mul_sm<true>(a1,w),a0w=_mm512_shuffle_epi32(a1w,_MM_PERM_CDAB);
        auto res0=_mm512_setzero_si512(),res1=_mm512_setzero_si512();
        auto res2=_mm512_setzero_si512(),res3=_mm512_setzero_si512();
        #pragma GCC unroll 8
        for(int i=0;i<8;++i){
            auto b0=_mm512_permutexvar_epi32(ix,bb),b1=_mm512_shuffle_epi32(b0,_MM_PERM_CDAB);
            auto pr1=(i==0)?a1:_mm512_alignr_epi64(a1,a0,8-i);
            auto pr2=(i==0)?a0:_mm512_alignr_epi64(a0,a1w,8-i);
            auto pr3=(i==0)?a1w:_mm512_alignr_epi64(a1w,a0w,8-i);
            res0=_mm512_add_epi64(res0,_mm512_mul_epu32(b0,pr2));
            res1=_mm512_add_epi64(res1,_mm512_mul_epu32(b0,pr1));
            res2=_mm512_add_epi64(res2,_mm512_mul_epu32(b1,pr3));
            res3=_mm512_add_epi64(res3,_mm512_mul_epu32(b1,pr2));
            ix=_mm512_add_epi32(ix,al1);
        }
        res0=_mm512_add_epi64(res0,res2),res1=_mm512_add_epi64(res1,res3);
        res2=_mm512_sub_epi64(res0,ms.M2),res3=_mm512_sub_epi64(res1,ms.M2);
        res0=_mm512_min_epu64(res0,res2),res1=_mm512_min_epu64(res1,res3);
        res2=_mm512_mul_epu32(res0,ms.niv),res3=_mm512_mul_epu32(res1,ms.niv);
        res2=_mm512_mul_epu32(res2,ms.M),res3=_mm512_mul_epu32(res3,ms.M);
        res0=_mm512_add_epi64(res0,res2),res1=_mm512_add_epi64(res1,res3);
        res0=_mm512_mask_blend_epi32(0xaaaa,_mm512_srli_epi64(res0,32),res1);
        _mm512_store_si512(a,_mm512_permutexvar_epi32(_mm512_load_si512(id2i),ms.shr2(res0)));
    }
    template<bool first,bool shrk=false>void __conv(I512*f,I512*g,idt lm,int k,int p){
        if constexpr(first){st[p]=bwb;}
        if(lm==4){
            const auto ms=this->ms;
            auto rt=_mm512_load_si512(st+p);
            auto r1=_mm512_permutexvar_epi32(_mm512_load_si512(id0x),rt),img=img16;
            auto r2=_mm512_shuffle_epi32(r1,_MM_PERM_BBBB);
            auto nr3=_mm512_shuffle_epi32(r1,_MM_PERM_DDDD);
            __butterfly4<first>(f+0,f+1,f+2,f+3,img,r1,r2,nr3,ms);
            __butterfly4<first>(g+0,g+1,g+2,g+3,img,r1,r2,nr3,ms);
            __conv16(f+0,g+0,r1,fx,ms);
            __conv16(f+1,g+1,ms.neg_s(r1),fx,ms);
            r1=_mm512_permutexvar_epi32(_mm512_set1_epi32(6),rt);
            __conv16(f+2,g+2,r1,fx,ms);
            __conv16(f+3,g+3,ms.neg_s(r1),fx,ms);
            r1=_mm512_permutexvar_epi32(_mm512_load_si512(id0i),rt);
            r2=_mm512_shuffle_epi32(r1,_MM_PERM_BBBB);
            nr3=_mm512_shuffle_epi32(r1,_MM_PERM_DDDD);
            __ylfrettub4<first,shrk>(f+0,f+1,f+2,f+3,img,r1,r2,nr3,ms);
            _mm512_store_si512(st+p,ms.mul_hi<true>(rt,_mm512_load_si512(rt3+k)));
            return;
        }
        idt qlm=lm>>2;
        {
            const auto ms=this->ms;
            auto rt=_mm512_load_si512(st+p),img=img16;
            auto r1=_mm512_permutexvar_epi32(_mm512_load_si512(id0x),rt);
            auto r2=_mm512_shuffle_epi32(r1,_MM_PERM_BBBB);
            auto nr3=_mm512_shuffle_epi32(r1,_MM_PERM_DDDD);
            for(idt j=0;j<qlm;++j){
                __butterfly4<first>(f+j+qlm*0,f+j+qlm*1,f+j+qlm*2,f+j+qlm*3,img,r1,r2,nr3,ms); 
                __butterfly4<first>(g+j+qlm*0,g+j+qlm*1,g+j+qlm*2,g+j+qlm*3,img,r1,r2,nr3,ms); 
            }
        }
        __conv<first>(f+qlm*0,g+qlm*0,qlm,0,p+1);
        __conv<false>(f+qlm*1,g+qlm*1,qlm,1,p+1);
        __conv<false>(f+qlm*2,g+qlm*2,qlm,0,p+1);
        __conv<false>(f+qlm*3,g+qlm*3,qlm,k+2,p+1);
        {
            const auto ms=this->ms;
            auto rt=_mm512_load_si512(st+p),img=img16;
            auto r1=_mm512_permutexvar_epi32(_mm512_load_si512(id0i),rt);
            auto r2=_mm512_shuffle_epi32(r1,_MM_PERM_BBBB);
            auto nr3=_mm512_shuffle_epi32(r1,_MM_PERM_DDDD);
            for(idt j=0;j<qlm;++j){
                __ylfrettub4<first,shrk>(f+j+qlm*0,f+j+qlm*1,f+j+qlm*2,f+j+qlm*3,img,r1,r2,nr3,ms); 
            }
            _mm512_store_si512(st+p,ms.mul_hi<true>(rt,_mm512_load_si512(rt3+k)));
        }
    }
    template<bool shrk=true>void conv(u32*f,u32*g,idt lm){
        if(lm<=32){
            u64 buf[32];
            memset(buf,0,lm*sizeof(u64));
            for(idt i=0;i<lm;++i){
                for(idt j=0;j<lm;++j){
                    buf[(i+j)&(lm-1)]+=u64(f[i])*g[j];
                }
                if(i==(lm-1)){
                    for(idt j=0;j<lm;++j){
                        f[j]=buf[j]%mt.M;
                    }
                }
                else if(((i&15)==15)){
                    for(idt i=0;i<lm;++i){
                        buf[i]%=mt.M;
                    }
                }
            }
            return;
        }
        int p=__builtin_ctzll(lm);
        fx=_mm512_set1_epi32(mt.qpw<true>(iv2,p-4,mt.R2));
        auto F=(I512*)f,G=(I512*)g;
        if(p&1){
            idt hlm=lm>>5;
            {
                const auto ms=this->ms;
                for(idt i=0;i<hlm;++i){
                    auto x=_mm512_load_si512(F+i);
                    auto y=_mm512_load_si512(F+hlm+i);
                    auto a=_mm512_load_si512(G+i);
                    auto b=_mm512_load_si512(G+hlm+i);
                    _mm512_store_si512(F+i,ms.Ladd(x,y));
                    _mm512_store_si512(F+hlm+i,ms.Lsub(x,y));
                    _mm512_store_si512(G+i,ms.Ladd(a,b));
                    _mm512_store_si512(G+hlm+i,ms.Lsub(a,b));
                }
            }
            __conv<true>(F,G,hlm,0,0);
            __conv<false>(F+hlm,G+hlm,hlm,1,0);
            {
                const auto ms=this->ms;
                for(idt i=0;i<hlm;++i){
                    auto x=_mm512_load_si512(F+i);
                    auto y=_mm512_load_si512(F+hlm+i);
                    _mm512_store_si512(F+i,ms.shr<shrk>(ms.add(x,y)));
                    _mm512_store_si512(F+hlm+i,ms.shr<shrk>(ms.sub(x,y)));
                }
            }
        }   
        else{
            __conv<true,shrk>(F,G,lm>>4,0,0);
        }
    }
};

#include <chrono>
#include <iostream>
struct auto_timer{
	std::chrono::system_clock::time_point lst;
    std::string nam;
	auto_timer(std::string name):lst(std::chrono::system_clock::now()),nam(name){
		
	}
	~auto_timer(){
		std::chrono::duration<long double,std::milli> tott=std::chrono::system_clock::now()-lst;
		std::clog<<nam<<":"<<tott.count()<<"ms"<<std::endl;
	}
};
constexpr idt bcl(idt x){
    return x<2?1:idt(2)<<std::__lg(x-1);
}
#include <sys/mman.h>
#include <sys/stat.h>
namespace QIO_base{
    constexpr std::size_t O_buffer_default_size=1<<18;
	constexpr std::size_t O_buffer_flush_threshold=40;
	constexpr std::size_t O_string_copy_threshold=512;
	constexpr u64 E16=1e16,E12=1e12;
	constexpr u32 E8=1e8,E4=1e4;
	struct ict4{
		int num[E4];
		constexpr ict4():num{}{
			int j=0;
			for(int e0=(48<<0);e0<(58<<0);e0+=(1<<0)){
				for(int e1=(48<<8);e1<(58<<8);e1+=(1<<8)){
					for(int e2=(48<<16);e2<(58<<16);e2+=(1<<16)){
						for(int e3=(48<<24);e3<(58<<24);e3+=(1<<24)){
							num[j]=e0^e1^e2^e3,++j;
						}
					}
				}
			}
		}
        const char*get(u32 p)const{
            return (const char*)(num+p);
        }
	};
	constexpr ict4 ot={};
}
namespace QIO_I {
	using namespace QIO_base;
	struct Qinf{
		FILE *f;
		char *bg,*ed,*p;
		struct stat Fl;
		Qinf(FILE *fi):f(fi){
			int fd=fileno(f);
			fstat(fd,&Fl);
			bg=(char*)mmap(0,Fl.st_size+1,PROT_READ,MAP_PRIVATE,fd,0);
			p=bg,ed=bg+Fl.st_size;
		}
		~Qinf(){
			munmap(bg,Fl.st_size+1);
		}
		void skip_space(){
			while(*p<=' '){
				++p;
			}
		}
		char get(){
			return *p++;
		}
		char seek()const{
			return *p;
		}
		Qinf& read(char *s,std::size_t count){
			memcpy(s,p,count),p+=count;
			return *this;
		}
		template<std::unsigned_integral T>Qinf&operator >> (T&x){
			skip_space(),x=0;
			for(;*p>' ';++p){
				x=x*10+(*p&0xf);
			}
			return *this;
		}
		template<std::signed_integral T>Qinf& operator >> (T&x){
			skip_space();
			if(*p=='-'){
                ++p;
                for(x=48-*p++;*p>' ';++p){
					x=x*10-(*p&0xf);
				}	
			}
			else{
				for(x=*p++-48;*p>' ';++p){
					x=x*10+(*p&0xf);
				}
			}
			return *this;
		}
		std::string_view read_token(){
			skip_space();
            const char*bg=p;
			while(*p>' '){
                ++p;
			}
			return {bg,p};
		}
	}qin(stdin);
}
namespace QIO_O{
	using namespace QIO_base;
	struct Qoutf{
		FILE *f;
		char *bg,*ed,*p;
		char *ed_thre;
		Qoutf(FILE *fo,std::size_t sz=O_buffer_default_size):f(fo),bg(new char[sz]),ed(bg+sz),p(bg),ed_thre(ed-O_buffer_flush_threshold){}
		void flush(){
			fwrite_unlocked(bg,1,p-bg,f),p=bg;
		}
		void chk(){
			if(p>ed_thre)[[unlikely]]{
				flush();
			}
		}
		~Qoutf(){
			flush();
			delete[] bg;
		}
        void putb(u32 x){
            memcpy(p,ot.get(x),4),p+=4;
        }
		void put4(u32 x){
			auto C=ot.get(x);
			if(x>99){
				if(x>999){
					memcpy(p,C,4),p+=4;
				}
				else{
					memcpy(p,C+1,3),p+=3;
				}	
			} 
			else{
				if(x>9){
					memcpy(p,C+2,2),p+=2;
				}
				else{
					*p++=x^48;
				}
			}
		}
		void put2(u32 x){
			if(x>9){	
				memcpy(p,ot.get(x)+2,2),p+=2;
			}
			else{
				*p++=x^48;
			}
		}
		Qoutf &write(const char *s,std::size_t count){
			if(count>O_string_copy_threshold){
				flush();
				fwrite_unlocked(s,1,count,f);
				return *this;
            }
			if((p+count)>ed_thre){
				flush();
			}
			memcpy(p,s,count),p+=count;
			return *this;
		}
		Qoutf &operator << (char ch){
			*p++=ch;
			return *this;
		}
		Qoutf &operator << (u32 x){
			if(x>=E8){
				put2(x/E8),x%=E8;
                putb(x/E4),putb(x%E4);
			}
            else if(x>=E4) {
				put4(x/E4),putb(x%E4);
			}
            else{
				put4(x);
			}
			chk();
			return *this;
		}
		Qoutf& operator << (int x){
			if(x<0){
				*p++='-',x=-x;
			}
			return *this<<static_cast<u32>(x);
		}
		Qoutf& operator << (u64 x){
				if(x>=E4){
					put4(x/E4),putb(x%E4);
				}
                else{
					put4(x);
				}
			chk();
			return *this;
		}
		Qoutf& operator << (i64 x){
			if(x<0){
				*p++='-',x=-x;
			}
			return *this<<static_cast<u64>(x);
		}
		Qoutf& operator << (std::string_view s){
			return write(s.data(),s.size());
		}
	}qout(stdout);
}
namespace QIO{
	using QIO_I::Qinf;
	using QIO_I::qin;
	using QIO_O::Qoutf;
	using QIO_O::qout;
}
using namespace QIO;
void solve(){
    idt n,m;
    qin>>n>>m;
    ++n,++m;
    idt lm=bcl(n+m-1);
    auto f=alc(lm),g=alc(lm);
    for(idt i=0;i<n;++i){
        qin>>f[i];
    }
    clr(f+n,lm-n);
    for(idt i=0;i<m;++i){
        qin>>g[i];
    }
    clr(g+m,lm-m);
    conv_ntt_mod nt{998244353,23,752838388};
    nt.conv(f,g,lm);
    for(idt i=0;i<n+m-1;++i)
        qout<<f[i]<<' ';
}
int main(){
    std::ios::sync_with_stdio(false);
    std::cin.tie(nullptr);
    solve();
    return 0;
}

CompilationN/AN/ACompile ErrorScore: N/A


Judge Duck Online | 评测鸭在线
Server Time: 2025-06-07 00:41:35 | Loaded in 1 ms | Server Status
个人娱乐项目,仅供学习交流使用 | 捐赠