提交记录 19022


用户 题目 状态 得分 用时 内存 语言 代码长度
platelet 1008. 测测你的二维数点 Accepted 100 510.265 ms 353444 KB C++17 3.21 KB
提交时间 评测时间
2023-02-04 21:44:35 2023-02-04 21:44:37
#pragma GCC optimize("Ofast")
#pragma GCC target("avx2,popcnt,bmi,bmi2")

template<class T> struct vector_int {};
#define vector_int(T) template <> struct vector_int<T> { using type = __attribute((vector_size(32))) T; };
vector_int(char) vector_int(short) vector_int(int) vector_int(long) vector_int(long long)
#undef vector_int

template<class T, int N, class = typename vector_int<T>::type> struct btree_sum {
    using vec = typename vector_int<T>::type;

    static constexpr int B = 64 / sizeof(T), b = __builtin_ctz(B);

    static constexpr int height(int n) {
        return n <= B ? 1 : height(n >> b) + 1;
    }
    static constexpr int offset(int h) {
        int s = 0, n = N;
        while(h--) s += (n + B - 1) & -B, n = (n + B - 1) >> b;
        return s;
    }
    static constexpr int H = height(N);

    struct precalc {
        alignas(64) T mask[B][B];
        int offset_v[H];
        constexpr precalc() :mask{}, offset_v{} {
            for(int i = 0; i < B; i++)
                for(int j = i + 1; j < B; j++) mask[i][j] = -1;
            for(int i = 0; i < H; i++) offset_v[i] = offset(i);
        }
    };
    static constexpr auto p = precalc();
    alignas(64) T a[offset(H)];
    inline T sum(unsigned k) {
        T s = 0;
        #pragma GCC unroll 64
        for(int h = 0; h < H; h++)
            s += a[p.offset_v[h] + (k >> (h * b))];
        return s;
    }
    inline void add(unsigned k) {
        const auto v = 1 + vec{};
        #pragma GCC unroll 64
        for(int h = 0; h < H; h++) {
            auto t = (vec*)&a[p.offset_v[h] + (k >> (h * b) & -B)];
            auto m = (vec*)p.mask[k >> (h * b) & (B - 1)];
            t[0] += v & m[0], t[1] += v & m[1];
        }
    }
};

using u32 = unsigned;
using u64 = unsigned long long;

const int N = 1e7 + 64;

template<int id> void radixSort(int n, u32 a[N][4], u32 b[N][4]) {
    u32 buc[3][256] = {};
    #define GET(i, k) buc[k][i >> (k * 8) & 255]
    for(int i = 0; i < n; i++) {
        GET(a[i][id], 0)++, GET(a[i][id], 1)++, GET(a[i][id], 2)++;
    }
    for(int k = 0; k < 3; k++) {
        u32 s = 0;
        for(int i = 0; i < 256; i++) {
            u32 t = buc[k][i];
            buc[k][i] = s, s += t;
        }
    }
    for(int i = 0; i < n; i++) __builtin_memcpy(b[GET(a[i][id], 0)++], a[i], 16);
    for(int i = 0; i < n; i++) __builtin_memcpy(a[GET(b[i][id], 1)++], b[i], 16);
    for(int i = 0; i < n; i++) __builtin_memcpy(b[GET(a[i][id], 2)++], a[i], 16);
}
void count_2d(int n, const u32* x, const u32* y, u32* out) {
    static btree_sum<int, (N >> 6)> tree;
    static u32 a[N][4], b[N][4];
    static u64 mask[N >> 6];
    for(int i = 0; i < n; i++) a[i][0] = x[i], a[i][1] = y[i], a[i][3] = i;
    radixSort<1>(n, a, b);
    for(int l = 0, r; l < n; l = r) {
        u32 tmp = b[l][1];
        for(r = l; b[r][1] == tmp; r++)
            b[r][1] = l, b[r][2] = r;
    }
    radixSort<0>(n, b, a);
    for(int l = 0, r; l < n; l = r) {
        for(r = l; a[r][0] == a[l][0]; r++) {
            u32 p = a[r][1];
            out[a[r][3]] = tree.sum(p >> 6) + __builtin_popcountll(mask[p >> 6] & (1ULL << (p & 63)) - 1);
        }
        while(l < r) {
            u32 p = a[l++][2];
            tree.add(p >> 6), mask[p >> 6] |= 1ULL << (p & 63);
        }
    }
}

CompilationN/AN/ACompile OKScore: N/A

Testcase #1510.265 ms345 MB + 164 KBAcceptedScore: 100


Judge Duck Online | 评测鸭在线
Server Time: 2024-05-19 19:10:11 | Loaded in 1 ms | Server Status
个人娱乐项目,仅供学习交流使用