#pragma GCC optimize("Ofast,unroll-loops")
typedef unsigned u32;
u32 cnt1[1 << 11], cnt2[1 << 11], cnt3[1 << 11];
void sort(unsigned *a, int n)
{
u32 * const tmp = new u32[n];
u32 * const end= a + n;
for (register u32 *i = a; i != a + n; i++)
++cnt1[*i & 0x7ff],
++cnt2[(*i >> 11) & 0x7ff],
++cnt3[(*i >> 22) & 0x7ff];
for(register u32 *i = cnt1 + 1, *j = cnt2 + 1, *k = cnt3 + 1; i <= cnt1 + 0x7ff; ++i, ++j, ++k)
*i += *(i - 1),
*j += *(j - 1),
*k += *(k - 1);
for(register u32 *i= end - 1; i != a - 1; i-=8)
tmp[--cnt1[*i & 0x7ff]] = *i,
tmp[--cnt1[*(i - 1) & 0x7ff]] = *(i - 1),
tmp[--cnt1[*(i - 2) & 0x7ff]] = *(i - 2),
tmp[--cnt1[*(i - 3) & 0x7ff]] = *(i - 3),
tmp[--cnt1[*(i - 4) & 0x7ff]] = *(i - 4),
tmp[--cnt1[*(i - 5) & 0x7ff]] = *(i - 5),
tmp[--cnt1[*(i - 6) & 0x7ff]] = *(i - 6),
tmp[--cnt1[*(i - 7) & 0x7ff]] = *(i - 7);
for(register u32 *i= tmp + n - 1; i != tmp - 1; i-=8)
a[--cnt2[(*i >> 11) & 0x7ff]] = *i,
a[--cnt2[(*(i - 1) >> 11) & 0x7ff]] = *(i - 1),
a[--cnt2[(*(i - 2) >> 11) & 0x7ff]] = *(i - 2),
a[--cnt2[(*(i - 3) >> 11) & 0x7ff]] = *(i - 3),
a[--cnt2[(*(i - 4) >> 11) & 0x7ff]] = *(i - 4),
a[--cnt2[(*(i - 5) >> 11) & 0x7ff]] = *(i - 5),
a[--cnt2[(*(i - 6) >> 11) & 0x7ff]] = *(i - 6),
a[--cnt2[(*(i - 7) >> 11) & 0x7ff]] = *(i - 7);
for(register u32 *i= end - 1; i != a - 1; i-=8)
tmp[--cnt3[(*i >> 22) & 0x7ff]] = *i,
tmp[--cnt3[(*(i - 1) >> 22) & 0x7ff]] = *(i - 1),
tmp[--cnt3[(*(i - 2) >> 22) & 0x7ff]] = *(i - 2),
tmp[--cnt3[(*(i - 3) >> 22) & 0x7ff]] = *(i - 3),
tmp[--cnt3[(*(i - 4) >> 22) & 0x7ff]] = *(i - 4),
tmp[--cnt3[(*(i - 5) >> 22) & 0x7ff]] = *(i - 5),
tmp[--cnt3[(*(i - 6) >> 22) & 0x7ff]] = *(i - 6),
tmp[--cnt3[(*(i - 7) >> 22) & 0x7ff]] = *(i - 7);
__builtin_memcpy(a, tmp, n * sizeof(u32));
delete[] tmp;
}
Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
Testcase #1 | 1.225 ms | 824 KB | Accepted | Score: 34 | 显示更多 |
Testcase #2 | 1.522 s | 762 MB + 1008 KB | Accepted | Score: 33 | 显示更多 |
Testcase #3 | 3 s | 1525 MB + 940 KB | Time Limit Exceeded | Score: 0 | 显示更多 |