#pragma GCC optimize("Ofast,unroll-loops")
#pragma GCC target("avx2,tune=native")
typedef unsigned u32;
u32 cnt1[1 << 8], cnt2[1 << 8], cnt3[1 << 8], cnt4[1 << 8], ptr1[1<<8], ptr2[1<<8], ptr3[1<<8], ptr4[1<<8];
void sort(unsigned *a, int n)
{
u32 *tmp = new u32[n];
u32 *end= a + n;
//#pragma unroll
for (register u32 *i = a; i != end; i++)
++cnt1[*i & 0xff],
++cnt2[(*i >> 8) & 0xff],
++cnt3[(*i >> 16) & 0xff],
++cnt4[*i >> 24];
*ptr1 = *cnt1;
*ptr2 = *cnt2;
*ptr3 = *cnt3;
*ptr4 = *cnt4;
for(
register u32
*i = cnt1 + 1, *j = cnt2 + 1, *k = cnt3 + 1, *l = cnt4 + 1,
*I = ptr1 + 1, *J = ptr2 + 1, *K = ptr3 + 1, *L = ptr4 + 1;
i <= cnt1 + 0xff;
++i, ++j, ++k, ++l,
++I, ++J, ++K, ++L)
*I = *(I - 1) + *i,
*J = *(J - 1) + *j,
*K = *(K - 1) + *k,
*L = *(L - 1) + *l;
for(register u32 *i= end - 1; i != a - 1; i-=8)
tmp[--ptr1[*i & 0xff]] = *i,
tmp[--ptr1[*(i - 1) & 0xff]] = *(i - 1),
tmp[--ptr1[*(i - 2) & 0xff]] = *(i - 2),
tmp[--ptr1[*(i - 3) & 0xff]] = *(i - 3),
tmp[--ptr1[*(i - 4) & 0xff]] = *(i - 4),
tmp[--ptr1[*(i - 5) & 0xff]] = *(i - 5),
tmp[--ptr1[*(i - 6) & 0xff]] = *(i - 6),
tmp[--ptr1[*(i - 7) & 0xff]] = *(i - 7);
for(register u32 *i= tmp + n - 1; i != tmp - 1; i-=8)
a[--ptr2[(*i >> 8) & 0xff]] = *i,
a[--ptr2[(*(i - 1) >> 8) & 0xff]] = *(i - 1),
a[--ptr2[(*(i - 2) >> 8) & 0xff]] = *(i - 2),
a[--ptr2[(*(i - 3) >> 8) & 0xff]] = *(i - 3),
a[--ptr2[(*(i - 4) >> 8) & 0xff]] = *(i - 4),
a[--ptr2[(*(i - 5) >> 8) & 0xff]] = *(i - 5),
a[--ptr2[(*(i - 6) >> 8) & 0xff]] = *(i - 6),
a[--ptr2[(*(i - 7) >> 8) & 0xff]] = *(i - 7);
for(register u32 *i= end - 1; i != a - 1; i-=8)
tmp[--ptr3[(*i >> 16) & 0xff]] = *i,
tmp[--ptr3[(*(i - 1) >> 16) & 0xff]] = *(i - 1),
tmp[--ptr3[(*(i - 2) >> 16) & 0xff]] = *(i - 2),
tmp[--ptr3[(*(i - 3) >> 16) & 0xff]] = *(i - 3),
tmp[--ptr3[(*(i - 4) >> 16) & 0xff]] = *(i - 4),
tmp[--ptr3[(*(i - 5) >> 16) & 0xff]] = *(i - 5),
tmp[--ptr3[(*(i - 6) >> 16) & 0xff]] = *(i - 6),
tmp[--ptr3[(*(i - 7) >> 16) & 0xff]] = *(i - 7);
for(register u32 *i= tmp + n - 1; i != tmp - 1; i-=8)
a[--ptr4[*i >> 24]] = *i,
a[--ptr4[*(i - 1) >> 24]] = *(i - 1),
a[--ptr4[*(i - 2) >> 24]] = *(i - 2),
a[--ptr4[*(i - 3) >> 24]] = *(i - 3),
a[--ptr4[*(i - 4) >> 24]] = *(i - 4),
a[--ptr4[*(i - 5) >> 24]] = *(i - 5),
a[--ptr4[*(i - 6) >> 24]] = *(i - 6),
a[--ptr4[*(i - 7) >> 24]] = *(i - 7);
delete[] tmp;
}
Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
Testcase #1 | 1.02 ms | 808 KB | Accepted | Score: 34 | 显示更多 |
Testcase #2 | 1.114 s | 762 MB + 992 KB | Accepted | Score: 33 | 显示更多 |
Testcase #3 | 2.23 s | 1525 MB + 928 KB | Accepted | Score: 33 | 显示更多 |