#include <algorithm>
#include <cstring>
typedef unsigned int uint;
typedef long long unsigned int uint64;
constexpr uint Max_size = 1 << 21 | 5;
constexpr uint g = 3, Mod = 998244353;
struct Z
{
uint v;
Z(const uint _v = 0) : v(_v) { }
};
inline Z operator+(const Z &x1, const Z &x2) { return x1.v + x2.v < Mod ? x1.v + x2.v : x1.v + x2.v - Mod; }
inline Z operator-(const Z &x1, const Z &x2) { return x1.v >= x2.v ? x1.v - x2.v : x1.v + Mod - x2.v; }
inline Z operator*(const Z &x1, const Z &x2) { return static_cast<uint64>(x1.v) * x2.v % Mod; }
inline Z &operator*=(Z &x1, const Z &x2) { x1.v = static_cast<uint64>(x1.v) * x2.v % Mod; return x1; }
Z Power(Z Base, int Exp)
{
Z res = 1;
for (; Exp; Base *= Base, Exp >>= 1)
if (Exp & 1)
res *= Base;
return res;
}
inline uint mf(uint x)
{
return (static_cast<uint64>(x) << 32) / Mod;
}
int size;
uint w[Max_size], w_mf[Max_size];
inline void init(int n)
{
for (size = 2; size < n; size <<= 1)
;
Z pr = Power(g, (Mod - 1) / size);
w[size / 2] = 1, w_mf[size / 2] = mf(w[size / 2]);
for (int i = 1; i < size / 2; ++i)
w[size / 2 + i] = (w[size / 2 + i - 1] * pr).v, w_mf[size / 2 + i] = mf(w[size / 2 + i]);
for (int i = size / 2 - 1; i; --i)
w[i] = w[i << 1], w_mf[i] = w_mf[i << 1];
}
inline void DIF_NTT_core_2(Z _A[], const int L)
{
uint *A = reinterpret_cast<uint *>(_A);
for (int d = L >> 1; d; d >>= 1)
for (int i = 0; i != L; i += d << 1)
for (int j = 0; j != d; ++j)
{
uint x = A[i + j] + A[i + d + j];
if (x >= Mod * 2)
x -= Mod * 2;
uint64 t = A[i + j] + Mod * 2 - A[i + d + j];
uint q = t * w_mf[d + j] >> 32;
uint y = t * w[d + j] - q * Mod;
A[i + j] = x, A[i + d + j] = y;
}
}
inline void DIT_INTT_core_2(Z _A[], const int L)
{
uint *A = reinterpret_cast<uint *>(_A);
for (int d = 1; d != L; d <<= 1)
for (int i = 0; i != L; i += d << 1)
for (int j = 0; j != d; ++j)
{
uint x = A[i + j];
if (x >= Mod * 2)
x -= Mod * 2;
uint64 y = A[i + d + j];
uint q = y * w_mf[d + j] >> 32;
uint t = y * w[d + j] - q * Mod;
A[i + j] = x + t, A[i + d + j] = x + Mod * 2 - t;
}
std::reverse(A + 1, A + L);
if (L == 2)
{
if (A[0] >= Mod * 2)
A[0] -= Mod * 2;
if (A[1] >= Mod * 2)
A[1] -= Mod * 2;
}
int k = __builtin_ctz(L);
for (int i = 0; i != L; ++i)
{
uint64 m = -A[i] & (L - 1);
A[i] = (A[i] + m * Mod) >> k;
}
}
inline void normalize_2(Z _A[], const int L)
{
uint *A = reinterpret_cast<uint *>(_A);
for (int i = 0; i != L; ++i)
if (A[i] >= Mod)
A[i] -= Mod;
}
Z A[Max_size], B[Max_size];
void poly_multiply(uint _A[], int N, uint _B[], int M, uint _C[])
{
memcpy(reinterpret_cast<uint *>(A), _A, (N + 1) * sizeof(uint));
memcpy(reinterpret_cast<uint *>(B), _B, (M + 1) * sizeof(uint));
int L;
for (L = 2; L <= N + M; L <<= 1)
;
init(L);
DIF_NTT_core_2(A, L), DIF_NTT_core_2(B, L);
for (int i = 0; i != L; ++i)
A[i] *= B[i];
DIT_INTT_core_2(A, L), normalize_2(A, L);
memcpy(_C, reinterpret_cast<uint *>(A), (N + M + 1) * sizeof(uint));
}
Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
Testcase #1 | 142.778 ms | 39 MB + 656 KB | Accepted | Score: 100 | 显示更多 |