#define N 100000000
#define D 256
#define D1 255
unsigned *rs[D],t0[D],t1[D],t2[D],t3[D];
void sort(unsigned *a, int n) {
asm (
"L141: lodsl\n movzbl %%al, %%edx\n movzbl %%ah, %%edi\n incl %[t0](,%%edx,4)\n incl %[t1](,%%edi,4)\n"
"shrl $16,%%eax\n movzbl %%al, %%edx\n movzbl %%ah, %%edi\n incl %[t2](,%%edx,4)\n incl %[t3](,%%edi,4)\n"
"addl $-1, %%ecx\n jne L141": :
"S"(a),[t0]"m"(t0),[t1]"m"(t1),[t2]"m"(t2),[t3]"m"(t3),"c"(100000000):
"memory","eax","edx","edi"
);
#define R(a,b) asm("movntil %[b], (%[a])"::[a]"m"(a),[b]"m"(b):"memory")
//asm("gg");
#define cal(w,w2,tw,op) \
p=w2-1;\
for(i=0;i<D;++i)rs[i]=p,p+=tw[i];\
for(i=0;i<N;i+=8){\
p=w+i;\
R(++rs[p[0]op],p[0]);\
R(++rs[p[1]op],p[1]);\
R(++rs[p[2]op],p[2]);\
R(++rs[p[3]op],p[3]);\
R(++rs[p[4]op],p[4]);\
R(++rs[p[5]op],p[5]);\
R(++rs[p[6]op],p[6]);\
R(++rs[p[7]op],p[7]);\
}
unsigned b[N],*p,i,x;
cal(a,b,t0,&D1);
cal(b,a,t1,>>8&D1);
cal(a,b,t2,>>16&D1);
cal(b,a,t3,>>24);
}
| Compilation | N/A | N/A | Compile Error | Score: N/A | 显示更多 |