提交记录 18934


用户 题目 状态 得分 用时 内存 语言 代码长度
platelet 1002i. 【模板题】多项式乘法 Accepted 100 10.213 ms 7348 KB C++ 7.45 KB
提交时间 评测时间
2023-01-30 23:06:14 2023-01-30 23:06:19
#include <bits/stdc++.h>

struct IO {
    struct precalc {
        alignas(64) char digit[40000];
        uint64_t log[64][2];
        constexpr precalc() : digit{}, log{} {
            for(int i = 0; i < 10000; i++) {
                digit[i * 4 + 0] = 48 + i / 1000;
                digit[i * 4 + 1] = 48 + i / 100 % 10;
                digit[i * 4 + 2] = 48 + i / 10 % 10;
                digit[i * 4 + 3] = 48 + i % 10;
            }
            int index = 0;
            uint64_t value = 9;
            for(int i = 0; i < 64; i++) {
                // [2^i-1, 2^(i+1)-2]
                log[i][0] = index, log[i][1] = value;
                if((2ULL << i) - 2 > value)
                    index++, value = value <= (UINT64_MAX - 9) / 10 ? value * 10 + 9 : UINT64_MAX;
            }
        }
        inline __attribute((always_inline))
        void store(void* out, uint64_t k)const {
            memcpy(out, digit + k * 4, 4);
        }
    };
    static const int inSZ = 1 << 17;
    char inBuf[inSZ], *in1, *in2;
    inline __attribute((always_inline))
    int read() {
        if(__builtin_expect(in1 > inBuf + inSZ - 32, 0)) {
            auto len = in2 - in1;
            memcpy(inBuf, in1, len);
            in1 = inBuf, in2 = inBuf + len;
            in2 += fread(in2, 1, inSZ - len, stdin);
            if(in2 != inBuf + inSZ) *in2 = 0;
        }
        int res = 0;
        char c;
        while((c = *in1++) < 48);
        while(res = res * 10 + c - 48, (c = *in1++) >= 48);
        return res;
    }
    static const int outSZ = 1 << 21;
    char outBuf[outSZ], *out;
    inline __attribute((always_inline))
    void write(uint64_t x) {
        if(__builtin_expect(out > outBuf + outSZ - 32, 0))
            fwrite(outBuf, 1, out - outBuf, stdout), out = outBuf;
        static constexpr auto P = precalc();
        uint64_t bsr = __builtin_ia32_bsrdi(x + 1);
        const uint64_t B = 10000, B2 = B * B, B3 = B2 * B, B4 = B3 * B;
        uint64_t len = P.log[bsr][0] + (P.log[bsr][1] < x);
        const uint64_t extra[4] = {1000, 100, 10, 1};
        x *= extra[len & 3];
        switch(len >> 2) {
            case 0: // [0, 9999]
                P.store(out, x);
                break;
            case 1: // [10000, 99999999]
                P.store(out + 4, x - x / B * B);
                P.store(out, x / B);
                break;
            case 2:
                P.store(out + 8, x - x / B * B);
                P.store(out + 4, x / B - x / B2 * B);
                P.store(out, x / B2);
                break;
            default: __builtin_unreachable();
        }
        out += len + 2, out[-1] = ' ';
    }
    IO() {
        in1 = inBuf;
        in2 = in1 + fread(in1, 1, inSZ, stdin);
        out = outBuf;
    }
    ~IO() { fwrite(outBuf, 1, out - outBuf, stdout); }
} IO;

using namespace std;
using u32 = uint32_t;
using u64 = uint64_t;

const int N = 1 << 21, P = 998244353;
const __uint128_t P2 = P;

int lim;
u64 w[N / 2], iw[N / 2];
u32 a[N], b[N];

inline u32 add(u32 a, u32 b) {
    if(int((a += b) - P) >= 0) a -= P;
    return a;
}
inline u32 sub(u32 a, u32 b) {
    if(int(a -= b) < 0) a += P;
    return a;
}
constexpr u64 Pow(u64 a, int n) {
    u64 r = 1;
    for(; n; n >>= 1, a = a * a % P)
    if(n & 1) r = r * a % P;
    return r;
}
constexpr u64 trans(u64 x) {
    constexpr u64 k = -(u64)P / P + 1;
    constexpr __uint128_t r = ((__uint128_t(-(u64)P % P) << 64) + P - 1) / P;
    return x * k + u64(x * r >> 64) + 1;
}
struct PreCalc {
    u64 e[23], ie[23];
    constexpr PreCalc() : e{}, ie{} {
        u64 now = Pow(3, P >> 23), inow = Pow(now, P - 2);
        for(int i = 21; i >= 0; i--) {
            e[i] = now, ie[i] = inow;
            (now *= now) %= P, (inow *= inow) %= P;
        }
        now = 1, inow = 1;
        for(int i = 0; i <= 21; i++) {
            u64 x = now * e[i] % P, y = inow * ie[i] % P;
            (now *= ie[i]) %= P, (inow *= e[i]) %= P;
            e[i] = trans(x), ie[i] = trans(y);
        }
    }
} pc;
void prework() {
    u64 now = 1, inow = 1;
    w[0] = iw[0] = trans(1);
    for(int i = 1; i < lim >> 1; i++) {
        now = now * pc.e[__builtin_ctz(i)] * P2 >> 64;
        inow = inow * pc.ie[__builtin_ctz(i)] * P2 >> 64;
        w[i] = trans(now), iw[i] = trans(inow);
    }
}
void DIF(u32 a[]) {
    #define butterfly(cnt, i) \
        for(int k = 0; k < cnt; k++) { \
            u32 x = a[ks + k], y = a[ks + k + i] * now * P2 >> 64; \
            a[ks + k] = add(x, y), a[ks + k + i] = sub(x, y); \
        }
    #define butterfly2(cnt, i) \
        for(int k = 0; k < cnt; k++) { \
            u32 x = a[ks + k], y = a[ks + k + i] * now * P2 >> 64; \
            a[ks + k] = x + y, a[ks + k + i] = x - y + P; \
        }
    for(int i = lim >> 1, l = 1; i; i >>= 1, l <<= 1)
        if(i == 1) for(int j = 0; j < l; j++) {
            u64 now = w[j];
            int ks = j * i * 2;
            butterfly2(1, 1);
        } else if(i == 2) for(int j = 0; j < l; j++) {
            u64 now = w[j];
            int ks = j * i * 2;
            butterfly(1, 2);
            ks++;
            butterfly2(1, 2);
        } else if(i == 4) for(int j = 0; j < l; j++) {
            u64 now = w[j];
            int ks = j * i * 2;
            #pragma GCC unroll 64
            butterfly(2, 4);
            ks += 2;
            #pragma GCC unroll 64
            butterfly2(2, 4);
        } else for(int j = 0; j < l; j++) {
            u64 now = w[j];
            int pos = j * i * 2;
            for(int ks = pos; ks < pos + (i >> 1); ks += 4)
                #pragma GCC unroll 64
                butterfly(4, i);
            for(int ks = pos + (i >> 1); ks < pos + i; ks += 4)
                #pragma GCC unroll 64
                butterfly2(4, i);
        }
    #undef butterfly
    #undef butterfly2
}
void IDIF(u32 a[]) {
    for(int i = 0; i < lim; i++) {
        u32 t = a[i];
        if(__builtin_parity(i)) t = P - t;
        a[i] = t;
    }
    #define butterfly(cnt, i) \
        for(int k = 0; k < cnt; k++) { \
            u32 x = a[ks + k], y = a[ks + k + i]; \
            a[ks + k] = sub(x, y), a[ks + k + i] = (x + y) * now * P2 >> 64; \
        }
    for(int i = 1, l = lim >> 1; l; i <<= 1, l >>= 1)
        if(i == 1) for(int j = 0; j < l; j++) {
            u64 now = iw[j];
            int ks = j * i * 2;
            #pragma GCC unroll 1
            butterfly(1, 1);
        } else if(i == 2) for(int j = 0; j < l; j++) {
            u64 now = iw[j];
            int ks = j * i * 2;
            #pragma GCC unroll 2
            butterfly(2, 2);
        } else if(i == 4) for(int j = 0; j < l; j++) {
            u64 now = iw[j];
            int ks = j * i * 2;
            #pragma GCC unroll 4
            butterfly(4, 4);
        } else for(int j = 0; j < l; j++) {
            u64 now = iw[j];
            int pos = j * i * 2;
            for(int ks = pos; ks < pos + i; ks += 8)
                #pragma GCC unroll 8
                butterfly(8, i);
        }
    u64 inv = Pow(lim, P - 2);
    if(lim < 8) for(int i = 0; i < lim; i++) a[i] = a[i] * inv % P;
    else for(int is = 0; is < lim; is += 8)
        #pragma GCC unroll 8
        for(int i = 0; i < 8; i++) a[is + i] = a[is + i] * inv % P;
    #undef butterfly
}
int main() {
    int n = IO.read(), m = IO.read();
    for(int i = 0; i <= n; i++)
        a[i] = IO.read();
    for(int i = 0; i <= m; i++)
        b[i] = IO.read();
    for(lim = 1; lim <= n + m; lim <<= 1);
    prework();
    DIF(a), DIF(b);
    for(int i = 0; i < lim; i++)
        a[i] = (u64)a[i] * b[i] % P;
    IDIF(a);
    for(int i = 0; i <= n + m; i++) IO.write(a[i]);
}

CompilationN/AN/ACompile OKScore: N/A

Subtask #1 Testcase #135.52 us60 KBAcceptedScore: 0

Subtask #1 Testcase #210.213 ms7 MB + 20 KBAcceptedScore: 0

Subtask #1 Testcase #34.761 ms2 MB + 744 KBAcceptedScore: 100

Subtask #1 Testcase #44.744 ms2 MB + 724 KBAcceptedScore: 0

Subtask #1 Testcase #538.35 us60 KBAcceptedScore: 0

Subtask #1 Testcase #636.39 us60 KBAcceptedScore: 0

Subtask #1 Testcase #735.66 us60 KBAcceptedScore: 0

Subtask #1 Testcase #89.956 ms6 MB + 504 KBAcceptedScore: 0

Subtask #1 Testcase #99.951 ms6 MB + 504 KBAcceptedScore: 0

Subtask #1 Testcase #109.764 ms5 MB + 992 KBAcceptedScore: 0

Subtask #1 Testcase #1110.203 ms7 MB + 180 KBAcceptedScore: 0

Subtask #1 Testcase #129.785 ms4 MB + 964 KBAcceptedScore: 0

Subtask #1 Testcase #1335.46 us60 KBAcceptedScore: 0


Judge Duck Online | 评测鸭在线
Server Time: 2024-12-05 10:10:12 | Loaded in 1 ms | Server Status
个人娱乐项目,仅供学习交流使用 | 捐赠