#if defined(__GNUC__) && !defined(__clang__)
#define O2 __attribute__((optimize("-O2")))
#else
#define O2
#endif
#include <stdio.h>
#include <stdlib.h>
#define BUCKET_MOD (0xFFU)
#define BUCKET_SIZE (BUCKET_MOD + 1U)
#ifndef MAX_SIZE
#define MAX_SIZE (1000000U)
#endif
#define LOOP_1(p, key, mod, value, index) \
p[--(key[value[index] mod])] = value[index]
#define LOOP_2(p, key, mod, value, index) \
LOOP_1(p, key, mod, value, index + 1); \
LOOP_1(p, key, mod, value, index)
#define LOOP_4(p, key, mod, value, index) \
LOOP_2(p, key, mod, value, index + 2); \
LOOP_2(p, key, mod, value, index)
#define _LOOP_8_(p, key, mod, value) \
LOOP_4(p, key, mod, value, 4); \
LOOP_4(p, key, mod, value, 0)
#ifdef LOOP
#define SPLIT(dst, src, size, key, mod, p) \
for (p = src + size; p > src; ) { \
p -= 8; \
_LOOP_8_(dst, key, mod, p); \
}
#else
#define SPLIT(dst, src, size, key, mod, p) \
for (p = src + size; p > src; ) { \
register sort_t tmp = *(--p); \
dst[--key[tmp mod]] = tmp; \
}
#endif
typedef unsigned sort_t;
sort_t arr[MAX_SIZE];
O2 void sort(sort_t *data, int n) {
unsigned int i;
unsigned int k1[BUCKET_SIZE] = {0}, k2[BUCKET_SIZE] = {0}, k3[BUCKET_SIZE] = {0}, k4[BUCKET_SIZE] = {0};
// sort_t *arr = (sort_t*)malloc(sizeof(data[0]) * n);
sort_t *p;
for (p = data + n; p > data; ) {
// register sort_t tmp = *(--p);
// ++k4[tmp & BUCKET_MOD];
// ++k3[(tmp >> 8) & BUCKET_MOD];
// ++k2[(tmp >> 16) & BUCKET_MOD];
// ++k1[tmp >> 24];
p -= 4;
register sort_t tmp = p[0], tmp_ = p[1], tmp__ = p[2], tmp___ = p[3];
++k4[tmp & BUCKET_MOD];
++k4[tmp_ & BUCKET_MOD];
++k4[tmp__ & BUCKET_MOD];
++k4[tmp___ & BUCKET_MOD];
++k3[(tmp >> 8) & BUCKET_MOD];
++k3[(tmp_ >> 8) & BUCKET_MOD];
++k3[(tmp__ >> 8) & BUCKET_MOD];
++k3[(tmp___ >> 8) & BUCKET_MOD];
++k2[(tmp >> 16) & BUCKET_MOD];
++k2[(tmp_ >> 16) & BUCKET_MOD];
++k2[(tmp__ >> 16) & BUCKET_MOD];
++k2[(tmp___ >> 16) & BUCKET_MOD];
++k1[tmp >> 24];
++k1[tmp_ >> 24];
++k1[tmp__ >> 24];
++k1[tmp___ >> 24];
}
for (i = 1; i < BUCKET_SIZE; ++i) {
k4[i] += k4[i - 1];
k3[i] += k3[i - 1];
k2[i] += k2[i - 1];
k1[i] += k1[i - 1];
}
SPLIT(arr, data, n, k4, & BUCKET_MOD, p);
SPLIT(data, arr, n, k3, >> 8 & BUCKET_MOD, p);
SPLIT(arr, data, n, k2, >> 16 & BUCKET_MOD, p);
SPLIT(data, arr, n, k1, >> 24, p);
// free(arr);
}
| Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
| Testcase #1 | 783.613 ms | 762 MB + 976 KB | Accepted | Score: 100 | 显示更多 |