// Assumes ymm not used by other thing
#define asmv asm volatile
void step(int n, char*& p, char*& q, unsigned short* s) {
asmv ("vpxor %%ymm1, %%ymm1, %%ymm1;"
"vpxor %%ymm2, %%ymm2, %%ymm2;"
"lb%=:vmovupd (%0), %%ymm4; vmovupd (%1), %%ymm5;"
"vmovupd 32(%0), %%ymm6; vmovupd 32(%1), %%ymm7;"
"vpcmpeqb %%ymm4, %%ymm5, %%ymm4;"
"vpcmpeqb %%ymm6, %%ymm7, %%ymm6;"
"vpsubb %%ymm4, %%ymm1, %%ymm1;"
"vpsubb %%ymm6, %%ymm2, %%ymm2;"
"add $64, %0; add $64, %1; dec %2; jnz lb%=;"
"vmovupd (%3), %%ymm0;"
"vpmovzxbw %%xmm1, %%ymm4; vextracti128 $1, %%ymm1, %%xmm5;"
"vpmovzxbw %%xmm5, %%ymm5; vpaddw %%ymm4, %%ymm5, %%ymm5;"
"vpaddw %%ymm0, %%ymm5, %%ymm0;"
"vpmovzxbw %%xmm2, %%ymm6; vextracti128 $1, %%ymm2, %%xmm7;"
"vpmovzxbw %%xmm7, %%ymm7; vpaddw %%ymm6, %%ymm7, %%ymm7;"
"vpaddw %%ymm0, %%ymm7, %%ymm0; "
"vmovupd %%ymm0, (%3);"
: "+r"(p), "+r"(q), "+r"(n): "r"(s): "memory");
}
void solve(int n, int q, char *s1, char *s2, int *q_x, int *q_y, int *q_len, unsigned *ans) {
for (int i=0; i<n; ++i)
s1[i] = "120"[s1[i]-'0'];
for (int k=0; k<q; ++k) {
char* p1 = s1 + q_x[k];
char* p2 = s2 + q_y[k];
unsigned len = q_len[k];
unsigned short retbuf[32] = {};
while (len >= 256 * 64) {
step(255, p1, p2, retbuf);
len -= 255 * 64;
}
if(len>=64)
step (len/64, p1, p2, retbuf);
int ret = 0;
for (int i=0; i<32; ++i) ret += retbuf[i];
int hgp = len % 64;
for (int i=0; i<hgp; ++i) {
ret += p1[i] == p2[i];
}
ans[k] = ret;
}
}
Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
Testcase #1 | 188.14 us | 40 KB | Accepted | Score: 50 | 显示更多 |
Testcase #2 | 853.773 ms | 5 MB + 176 KB | Accepted | Score: 50 | 显示更多 |