提交记录 27581


用户 题目 状态 得分 用时 内存 语言 代码长度
user1 wc2017b2. 【WC2017】挑战-任务2 Wrong Answer 0 848.148 ms 5296 KB C++ 2.31 KB
提交时间 评测时间
2024-12-27 20:37:08 2024-12-27 20:37:10
// Assumes ymm not used by other thing
#define asmv asm volatile
void solve(int n, int q, char *s1, char *s2, int *q_x, int *q_y, int *q_len, unsigned *ans) {
	for (int i=0; i<n; ++i)
		s1[i] = "120"[s1[i]-'0'];
	for (int k=0; k<q; ++k) {
		char* p1 = s1 + q_x[k];
		char* p2 = s2 + q_y[k];
		unsigned len = q_len[k];
		asmv ("vpxor %ymm0, %ymm0, %ymm0"); // ymm0 total
		while (len >= 255 * 64) {           // ymm1, ymm2 this_group
			asmv ("vpxor %ymm1, %ymm1, %ymm1;"
			      "vpxor %ymm2, %ymm2, %ymm2;");
			for (int i=0; i<255; i++) {
				asmv ("vmovupd   (%0), %%ymm4; vmovupd   (%1), %%ymm5;"
				      "vmovupd 32(%0), %%ymm6; vmovupd 32(%1), %%ymm7;"
				      "vpcmpeqb %%ymm4, %%ymm5, %%ymm4;"
				      "vpcmpeqb %%ymm6, %%ymm7, %%ymm6;"
				      "vpsubb   %%ymm4, %%ymm1, %%ymm1;"
				      "vpsubb   %%ymm6, %%ymm2, %%ymm2;":: "r"(p1), "r"(p2));
				p1 += 64; p2 += 64;
			}
			asmv ("vpmovzxbw %xmm1, %ymm4; vextracti128 $1, %ymm1, %xmm5;"
			      "vpmovzxbw %xmm5, %ymm5; vpaddw %ymm4, %ymm5, %ymm5;"
			      "vpaddw %ymm0, %ymm5, %ymm0;"
			      "vpmovzxbw %xmm2, %ymm6; vextracti128 $1, %ymm2, %xmm7;"
			      "vpmovzxbw %xmm7, %ymm7; vpaddw %ymm6, %ymm7, %ymm7;"
			      "vpaddw %ymm0, %ymm7, %ymm0;");
			len -= 255 * 64;
		}
		int heregrp = len/64;            // ymm1, ymm2 this_group
			asmv ("vpxor %ymm1, %ymm1, %ymm1;"
			      "vpxor %ymm2, %ymm2, %ymm2;");
			for (int i=0; i<heregrp; i++) {
				asmv ("vmovupd   (%0), %%ymm4; vmovupd   (%1), %%ymm5;"
				      "vmovupd 32(%0), %%ymm6; vmovupd 32(%1), %%ymm7;"
				      "vpcmpeqb %%ymm4, %%ymm5, %%ymm4;"
				      "vpcmpeqb %%ymm6, %%ymm7, %%ymm6;"
				      "vpsubb   %%ymm4, %%ymm1, %%ymm1;"
				      "vpsubb   %%ymm6, %%ymm2, %%ymm2;":: "r"(p1), "r"(p2));
				p1 += 64; p2 += 64;
			}
		unsigned short retbuf[32];
			asmv ("vpmovzxbw %xmm1, %ymm4; vextracti128 $1, %ymm1, %xmm5;"
			      "vpmovzxbw %xmm5, %ymm5; vpaddw %ymm4, %ymm5, %ymm5;"
			      "vpaddw %ymm0, %ymm5, %ymm0;"
			      "vpmovzxbw %xmm2, %ymm6; vextracti128 $1, %ymm2, %xmm7;"
			      "vpmovzxbw %xmm7, %ymm7; vpaddw %ymm6, %ymm7, %ymm7;"
			      "vpaddw %ymm0, %ymm7, %ymm0;");
		asmv ("vmovupd %%ymm0, (%0)":: "r"(retbuf): "memory");
		int ret = 0;
		for (int i=0; i<32; ++i) ret += retbuf[i];
		heregrp %= 64;
		for (int i=0; i<heregrp; ++i) {
			ret += p1[i] == p2[i];
		}
		ans[k] = ret;
	}
}

CompilationN/AN/ACompile OKScore: N/A

Testcase #1161.6 us40 KBWrong AnswerScore: 0

Testcase #2848.148 ms5 MB + 176 KBWrong AnswerScore: 0


Judge Duck Online | 评测鸭在线
Server Time: 2025-04-19 14:57:25 | Loaded in 0 ms | Server Status
个人娱乐项目,仅供学习交流使用 | 捐赠