inline void insertSort_1(unsigned arr[], int length)
{
int j; unsigned key;
for (int i = 1; i < length; i++){
key = arr[i];
j = i - 1;
while (j >= 0 && arr[j] > key){
arr[j + 1] = arr[j];
j--;
}
arr[j + 1] = key;
}
}
void sort(unsigned *a, int n){
#define N 10000
#define D 128
#define D1 127
#define cal(w,w2,tw,op) \
p=w2-1;\
for(i=0;i<D;++i)rs[i]=p,p+=tw[i];\
for(i=0;i<N;i+=16){\
p=w+i;\
*++rs[p[0]op]=p[0],\
*++rs[p[1]op]=p[1],\
*++rs[p[2]op]=p[2],\
*++rs[p[3]op]=p[3],\
*++rs[p[4]op]=p[4],\
*++rs[p[5]op]=p[5],\
*++rs[p[6]op]=p[6],\
*++rs[p[7]op]=p[7];\
*++rs[p[8]op]=p[8],\
*++rs[p[9]op]=p[9],\
*++rs[p[10]op]=p[10],\
*++rs[p[11]op]=p[11],\
*++rs[p[12]op]=p[12],\
*++rs[p[13]op]=p[13],\
*++rs[p[14]op]=p[14],\
*++rs[p[15]op]=p[15];\
}
unsigned b[N],*rs[D],t0[D],t1[D],t2[D],t3[D],t4[D];
unsigned*p,i,x;
for(i=0;i<N;){
#define A x=a[i],++t0[x>>4&D1],++t1[x>>11&D1],++t2[x>>18&D1],++t3[x>>25],++i;
A A A A
#undef A
}
cal(a,b,t0,>>4&D1);
cal(b,a,t1,>>11&D1);
cal(a,b,t2,>>18&D1);
cal(b,a,t3,>>25);
//insertSort_1(a, N);
}
// This time lack cache so only 7-bit per scan
Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
Testcase #1 | 66.36 us | 88 KB | Accepted | Score: 100 | 显示更多 |