提交记录 10292


用户 题目 状态 得分 用时 内存 语言 代码长度
ws_zzy 1001. 测测你的排序 Accepted 100 758.774 ms 781272 KB C++11 4.95 KB
提交时间 评测时间
2019-09-17 11:21:51 2020-08-01 02:06:57
#if defined(__GNUC__) && !defined(__clang__)
#define O2 __attribute__((optimize("-O2")))
#else
#define O2
#endif

#include <stdio.h>
#include <stdlib.h>
#define BUCKET_MOD (0xFFU)
#define BUCKET_SIZE (BUCKET_MOD + 1U)
#ifndef MAX_SIZE
#define MAX_SIZE (100000000U)
#endif

#define SPLIT_LOOP_1(p, key, mod, value, index)     \
*(--(key[value[index] mod].addr)) = value[index]
#define SPLIT_LOOP_2(p, key, mod, value, index)     \
SPLIT_LOOP_1(p, key, mod, value, index + 1);        \
SPLIT_LOOP_1(p, key, mod, value, index)
#define SPLIT_LOOP_4(p, key, mod, value, index)     \
SPLIT_LOOP_2(p, key, mod, value, index + 2);        \
SPLIT_LOOP_2(p, key, mod, value, index)
#define _SPLIT_LOOP_8_(p, key, mod, value)          \
SPLIT_LOOP_4(p, key, mod, value, 4);                \
SPLIT_LOOP_4(p, key, mod, value, 0)

#define ADD_LOOP_1(key, mod, v1)                    \
++key[v1 mod].offset
#define ADD_LOOP_2(key, mod, v1, v2)                \
ADD_LOOP_1(key, mod, v1);                           \
ADD_LOOP_1(key, mod, v2)
#define ADD_LOOP_4(key, mod, v1, v2, v3, v4)        \
ADD_LOOP_2(key, mod, v1, v2);                       \
ADD_LOOP_2(key, mod, v3, v4)
#define ADD_LOOP_8(key, mod, v1, v2, v3, v4, v5, v6, v7, v8)    \
ADD_LOOP_4(key, mod, v1, v2, v3, v4)                \
ADD_LOOP_4(key, mod, v5, v6, v7, v8)

#define SPLIT_END_LOOP(dst, src, end, key, mod, p)  \
for (p = end; p > src; ) {                          \
    p -= 8;                                         \
    _SPLIT_LOOP_8_(dst, key, mod, p);               \
}
#define SPLIT_LOOP(dst, src, size, key, mod, p)     \
SPLIT_END_LOOP(dst, src, src + size, key, mod, p)
#define SPLIT_END_NOLOOP(dst, src, end, key, mod, p)\
for (p = end; p > src; ) {                          \
    register sort_t tmp = *(--p);                   \
    *(--key[tmp mod].addr) = tmp;                   \
}
#define SPLIT_NOLOOP(dst, src, size, key, mod, p)   \
SPLIT_END_NOLOOP(dst, src, src + size, key, mod, p)

#ifndef LOOP
#define SPLIT(dst, src, size, key, mod, p)          \
SPLIT_LOOP(dst, src, size, key, mod, p)
#else
#define SPLIT(dst, src, size, key, mod, p)          \
SPLIT_NOLOOP(dst, src, size, key, mod, p)
#endif

#define CAL_OFFSET_INIT(key, init, begin)           \
key[init].addr = begin + key[init].offset;
#define CAL_OFFSET(key, i)                          \
key[i].addr = key[i - 1].addr + key[i].offset

typedef unsigned sort_t;
typedef union int_p_t {
    unsigned int offset;
    sort_t* addr;
} int_p_t;
// sort_t arr[MAX_SIZE];

#define LSB_SORT(dst, src, dst_end, src_end) do {                                   \
    int_p_t k2[BUCKET_SIZE] = {0}, k3[BUCKET_SIZE] = {0}, k4[BUCKET_SIZE] = {0};    \
    sort_t *p = src;                                                                \
    for (; p < src_end; ) {                                                         \
        register sort_t tmp = *(p++);                                               \
        ADD_LOOP_1(k4, & BUCKET_MOD, tmp);                                          \
        ADD_LOOP_1(k3, >> 8 & BUCKET_MOD, tmp);                                     \
        ADD_LOOP_1(k2, >> 16 & BUCKET_MOD, tmp);                                    \
    }                                                                               \
    CAL_OFFSET_INIT(k4, 0, dst);                                                    \
    CAL_OFFSET_INIT(k3, 0, src);                                                    \
    CAL_OFFSET_INIT(k2, 0, dst);                                                    \
    for (unsigned int i = 1; i < BUCKET_SIZE; ++i) {                                \
        CAL_OFFSET(k4, i);                                                          \
        CAL_OFFSET(k3, i);                                                          \
        CAL_OFFSET(k2, i);                                                          \
    }                                                                               \
    SPLIT_END_NOLOOP(dst, src, src_end, k4, & BUCKET_MOD, p);                       \
    SPLIT_END_NOLOOP(src, dst, dst_end, k3, >> 8 & BUCKET_MOD, p);                  \
    SPLIT_END_NOLOOP(dst, src, src_end, k2, >> 16 & BUCKET_MOD, p);                 \
} while(0)

O2 void sort(sort_t *data, int n) {
    int_p_t k1[BUCKET_SIZE] = {0};
    sort_t *arr = (sort_t*)malloc(sizeof(data[0]) * n);
    sort_t *p = data, *end = data + n;
    for (; p < end; ) {
        register sort_t tmp = p[0], tmp_ = p[1], tmp__ = p[2], tmp___ = p[3];
        ADD_LOOP_4(k1, >> 24, tmp, tmp_, tmp__, tmp___);
        p += 4;
    }
    CAL_OFFSET_INIT(k1, 0, arr);
    for (unsigned int i = 1; i < BUCKET_SIZE; ++i) {
        CAL_OFFSET(k1, i);
    }

    SPLIT(arr, data, n, k1, >> 24, p);
    end = arr + n;
    sort_t* data_end = data + n;
    for (unsigned int i = BUCKET_SIZE; i; ) {
        sort_t* begin = k1[--i].addr;
        sort_t* data_begin = data_end - (end - begin);
        LSB_SORT(data_begin, begin, data_end, end);
        data_end = data_begin;
        end = begin;
    }
    
    free(arr);
}

CompilationN/AN/ACompile OKScore: N/A

Testcase #1758.774 ms762 MB + 984 KBAcceptedScore: 100


Judge Duck Online | 评测鸭在线
Server Time: 2025-01-19 02:40:06 | Loaded in 1 ms | Server Status
个人娱乐项目,仅供学习交流使用 | 捐赠