#if defined(__GNUC__) && !defined(__clang__)
#define O2 __attribute__((optimize("-O3")))
#else
#define O2
#endif
#include <stdio.h>
#include <stdlib.h>
#define BUCKET_MOD (0xFFU)
#define BUCKET_SIZE (BUCKET_MOD + 1U)
#define MAX_SIZE (100000000U)
#define LOOP_1(p, key, mod, value, index) p[--(key[value[index] mod])] = value[index];
#define LOOP_8(p, key, mod, value) LOOP_1(p, key, mod, value, 7) \
LOOP_1(p, key, mod, value, 6) \
LOOP_1(p, key, mod, value, 5) \
LOOP_1(p, key, mod, value, 4) \
LOOP_1(p, key, mod, value, 3) \
LOOP_1(p, key, mod, value, 2) \
LOOP_1(p, key, mod, value, 1) \
LOOP_1(p, key, mod, value, 0)
typedef unsigned sort_t;
// sort_t arr[MAX_SIZE];
O2 void sort(sort_t *data, int n) {
unsigned int i;
unsigned int k1[BUCKET_SIZE] = {0}, k2[BUCKET_SIZE] = {0}, k3[BUCKET_SIZE] = {0}, k4[BUCKET_SIZE] = {0};
sort_t *arr = (sort_t*)malloc(sizeof(data[0]) * n);
sort_t *p;
for (p = data + n; p > data; ) {
register sort_t tmp = *(--p);
++k4[tmp & BUCKET_MOD];
++k3[(tmp >> 8) & BUCKET_MOD];
++k2[(tmp >> 16) & BUCKET_MOD];
++k1[tmp >> 24];
}
for (i = 1; i < BUCKET_SIZE; ++i) {
k4[i] += k4[i - 1];
k3[i] += k3[i - 1];
k2[i] += k2[i - 1];
k1[i] += k1[i - 1];
}
for (p = data + n; p > data; ) {
// register sort_t tmp = *(--p);
// arr[--k4[tmp & BUCKET_MOD]] = tmp;
p -= 8;
LOOP_8(arr, k4, & BUCKET_MOD, p);
}
for (p = arr + n; p > arr; ) {
// register sort_t tmp = *(--p);
// data[--k3[(tmp >> 8) & BUCKET_MOD]] = tmp;
p -= 8;
LOOP_8(data, k3, >> 8 & BUCKET_MOD, p);
}
for (p = data + n; p > data; ) {
// register sort_t tmp = *(--p);
// arr[--k2[(tmp >> 16) & BUCKET_MOD]] = tmp;
p -= 8;
LOOP_8(arr, k2, >> 16 & BUCKET_MOD, p);
}
for (p = arr + n; p > arr; ) {
// register sort_t tmp = *(--p);
// data[--k1[tmp >> 24]] = tmp;
p -= 8;
LOOP_8(data, k1, >> 24, p);
}
free(arr);
}
| Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
| Testcase #1 | 791.348 ms | 762 MB + 980 KB | Accepted | Score: 100 | 显示更多 |