#include <bits/stdc++.h>
struct IO {
struct precalc {
alignas(64) char digit[40000];
uint64_t log[64][2];
constexpr precalc() : digit{}, log{} {
for(int i = 0; i < 10000; i++) {
digit[i * 4 + 0] = 48 + i / 1000;
digit[i * 4 + 1] = 48 + i / 100 % 10;
digit[i * 4 + 2] = 48 + i / 10 % 10;
digit[i * 4 + 3] = 48 + i % 10;
}
int index = 0;
uint64_t value = 9;
for(int i = 0; i < 64; i++) {
// [2^i-1, 2^(i+1)-2]
log[i][0] = index, log[i][1] = value;
if((2ULL << i) - 2 > value)
index++, value = value <= (UINT64_MAX - 9) / 10 ? value * 10 + 9 : UINT64_MAX;
}
}
inline __attribute((always_inline))
void store(void* out, uint64_t k)const {
memcpy(out, digit + k * 4, 4);
}
};
static const int inSZ = 1 << 17;
char inBuf[inSZ], *in1, *in2;
inline __attribute((always_inline))
int read() {
if(__builtin_expect(in1 > inBuf + inSZ - 32, 0)) {
auto len = in2 - in1;
memcpy(inBuf, in1, len);
in1 = inBuf, in2 = inBuf + len;
in2 += fread(in2, 1, inSZ - len, stdin);
if(in2 != inBuf + inSZ) *in2 = 0;
}
int res = 0;
char c;
while((c = *in1++) < 48);
while(res = res * 10 + c - 48, (c = *in1++) >= 48);
return res;
}
static const int outSZ = 1 << 21;
char outBuf[outSZ], *out;
inline __attribute((always_inline))
void write(uint64_t x) {
if(__builtin_expect(out > outBuf + outSZ - 32, 0))
fwrite(outBuf, 1, out - outBuf, stdout), out = outBuf;
static constexpr auto P = precalc();
uint64_t bsr = __builtin_ia32_bsrdi(x + 1);
const uint64_t B = 10000, B2 = B * B, B3 = B2 * B, B4 = B3 * B;
uint64_t len = P.log[bsr][0] + (P.log[bsr][1] < x);
const uint64_t extra[4] = {1000, 100, 10, 1};
x *= extra[len & 3];
switch(len >> 2) {
case 0: // [0, 9999]
P.store(out, x);
break;
case 1: // [10000, 99999999]
P.store(out + 4, x - x / B * B);
P.store(out, x / B);
break;
case 2:
P.store(out + 8, x - x / B * B);
P.store(out + 4, x / B - x / B2 * B);
P.store(out, x / B2);
break;
default: __builtin_unreachable();
}
out += len + 2, out[-1] = ' ';
}
IO() {
in1 = inBuf;
in2 = in1 + fread(in1, 1, inSZ, stdin);
out = outBuf;
}
~IO() { fwrite(outBuf, 1, out - outBuf, stdout); }
} IO;
using namespace std;
using u32 = uint32_t;
using u64 = uint64_t;
const int N = 1 << 21, P = 998244353;
const __uint128_t P2 = P;
int lim;
u64 w[N / 2], iw[N / 2];
u32 a[N], b[N];
inline u32 add(u32 a, u32 b) {
if(int((a += b) - P) >= 0) a -= P;
return a;
}
inline u32 sub(u32 a, u32 b) {
if(int(a -= b) < 0) a += P;
return a;
}
constexpr u64 Pow(u64 a, int n) {
u64 r = 1;
for(; n; n >>= 1, a = a * a % P)
if(n & 1) r = r * a % P;
return r;
}
constexpr u64 trans(u64 x) {
constexpr u64 k = -(u64)P / P + 1;
constexpr __uint128_t r = ((__uint128_t(-(u64)P % P) << 64) + P - 1) / P;
return x * k + u64(x * r >> 64) + 1;
}
struct PreCalc {
u64 e[23], ie[23];
constexpr PreCalc() : e{}, ie{} {
u64 now = Pow(3, P >> 23), inow = Pow(now, P - 2);
for(int i = 21; i >= 0; i--) {
e[i] = now, ie[i] = inow;
(now *= now) %= P, (inow *= inow) %= P;
}
now = 1, inow = 1;
for(int i = 0; i <= 21; i++) {
u64 x = now * e[i] % P, y = inow * ie[i] % P;
(now *= ie[i]) %= P, (inow *= e[i]) %= P;
e[i] = trans(x), ie[i] = trans(y);
}
}
} pc;
void prework() {
u64 now = 1, inow = 1;
w[0] = iw[0] = trans(1);
for(int i = 1; i < lim >> 1; i++) {
now = now * pc.e[__builtin_ctz(i)] * P2 >> 64;
inow = inow * pc.ie[__builtin_ctz(i)] * P2 >> 64;
w[i] = trans(now), iw[i] = trans(inow);
}
}
void DIF(u32 a[]) {
#define butterfly(cnt, i) \
for(int k = 0; k < cnt; k++) { \
u32 x = a[ks + k], y = a[ks + k + i] * now * P2 >> 64; \
a[ks + k] = add(x, y), a[ks + k + i] = sub(x, y); \
}
#define butterfly2(cnt, i) \
for(int k = 0; k < cnt; k++) { \
u32 x = a[ks + k], y = a[ks + k + i] * now * P2 >> 64; \
a[ks + k] = x + y, a[ks + k + i] = x - y + P; \
}
for(int i = lim >> 1, l = 1; i; i >>= 1, l <<= 1)
if(i == 1) for(int j = 0; j < l; j++) {
u64 now = w[j];
int ks = j * i * 2;
butterfly2(1, 1);
} else if(i == 2) for(int j = 0; j < l; j++) {
u64 now = w[j];
int ks = j * i * 2;
butterfly(1, 2);
ks++;
butterfly2(1, 2);
} else if(i == 4) for(int j = 0; j < l; j++) {
u64 now = w[j];
int ks = j * i * 2;
#pragma GCC unroll 64
butterfly(2, 4);
ks += 2;
#pragma GCC unroll 64
butterfly2(2, 4);
} else for(int j = 0; j < l; j++) {
u64 now = w[j];
int pos = j * i * 2;
for(int ks = pos; ks < pos + (i >> 1); ks += 4)
#pragma GCC unroll 64
butterfly(4, i);
for(int ks = pos + (i >> 1); ks < pos + i; ks += 4)
#pragma GCC unroll 64
butterfly2(4, i);
}
#undef butterfly
#undef butterfly2
}
void IDIF(u32 a[]) {
for(int i = 0; i < lim; i++) {
u32 t = a[i];
if(__builtin_parity(i)) t = P - t;
a[i] = t;
}
#define butterfly(cnt, i) \
for(int k = 0; k < cnt; k++) { \
u32 x = a[ks + k], y = a[ks + k + i]; \
a[ks + k] = sub(x, y), a[ks + k + i] = (x + y) * now * P2 >> 64; \
}
for(int i = 1, l = lim >> 1; l; i <<= 1, l >>= 1)
if(i == 1) for(int j = 0; j < l; j++) {
u64 now = iw[j];
int ks = j * i * 2;
#pragma GCC unroll 1
butterfly(1, 1);
} else if(i == 2) for(int j = 0; j < l; j++) {
u64 now = iw[j];
int ks = j * i * 2;
#pragma GCC unroll 2
butterfly(2, 2);
} else if(i == 4) for(int j = 0; j < l; j++) {
u64 now = iw[j];
int ks = j * i * 2;
#pragma GCC unroll 4
butterfly(4, 4);
} else for(int j = 0; j < l; j++) {
u64 now = iw[j];
int pos = j * i * 2;
for(int ks = pos; ks < pos + i; ks += 8)
#pragma GCC unroll 8
butterfly(8, i);
}
u64 inv = Pow(lim, P - 2);
if(lim < 8) for(int i = 0; i < lim; i++) a[i] = a[i] * inv % P;
else for(int is = 0; is < lim; is += 8)
#pragma GCC unroll 8
for(int i = 0; i < 8; i++) a[is + i] = a[is + i] * inv % P;
#undef butterfly
}
int main() {
int n = IO.read(), m = IO.read();
for(int i = 0; i <= n; i++)
a[i] = IO.read();
for(int i = 0; i <= m; i++)
b[i] = IO.read();
for(lim = 1; lim <= n + m; lim <<= 1);
prework();
DIF(a), DIF(b);
for(int i = 0; i < lim; i++)
a[i] = (u64)a[i] * b[i] % P;
IDIF(a);
for(int i = 0; i <= n + m; i++) IO.write(a[i]);
}
Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
Subtask #1 Testcase #1 | 35.52 us | 60 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #2 | 10.213 ms | 7 MB + 20 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #3 | 4.761 ms | 2 MB + 744 KB | Accepted | Score: 100 | 显示更多 |
Subtask #1 Testcase #4 | 4.744 ms | 2 MB + 724 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #5 | 38.35 us | 60 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #6 | 36.39 us | 60 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #7 | 35.66 us | 60 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #8 | 9.956 ms | 6 MB + 504 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #9 | 9.951 ms | 6 MB + 504 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #10 | 9.764 ms | 5 MB + 992 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #11 | 10.203 ms | 7 MB + 180 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #12 | 9.785 ms | 4 MB + 964 KB | Accepted | Score: 0 | 显示更多 |
Subtask #1 Testcase #13 | 35.46 us | 60 KB | Accepted | Score: 0 | 显示更多 |