提交记录 27852
提交时间 |
评测时间 |
2025-01-28 02:35:13 |
2025-01-28 02:35:14 |
//written by r1
#include <immintrin.h>
#include <string.h>
// 启用AVX2指令集支持
#pragma GCC target("avx2")
// 每个step处理n个64字节块(n*64字节)
static inline void process_blocks(int blocks, const char** p1, const char** p2, unsigned* sum) {
__m256i total = _mm256_setzero_si256();
const __m256i ones = _mm256_set1_epi8(1);
for (int i = 0; i < blocks; ++i) {
// 加载64字节数据(分两个32字节块)
__m256i a1 = _mm256_loadu_si256((const __m256i*)(*p1));
__m256i a2 = _mm256_loadu_si256((const __m256i*)(*p2));
__m256i b1 = _mm256_loadu_si256((const __m256i*)(*p1 + 32));
__m256i b2 = _mm256_loadu_si256((const __m256i*)(*p2 + 32));
// 生成比较掩码
__m256i cmp1 = _mm256_cmpeq_epi8(a1, a2);
__m256i cmp2 = _mm256_cmpeq_epi8(b1, b2);
// 转换为位掩码并统计
uint32_t mask = _mm256_movemask_epi8(cmp1) |
((uint64_t)_mm256_movemask_epi8(cmp2) << 32);
*sum += __builtin_popcountll(mask);
*p1 += 64;
*p2 += 64;
}
}
void solve(int n, int q, char* s1, char* s2, int* q_x, int* q_y, int* q_len, unsigned* ans) {
// 预处理s1:0->1, 1->2, 2->0
for (int i = 0; i < n; ++i) {
s1[i] = "120"[s1[i] - '0'];
}
// 处理每个查询
for (int k = 0; k < q; ++k) {
const char* p1 = s1 + q_x[k];
const char* p2 = s2 + q_y[k];
unsigned len = q_len[k];
unsigned sum = 0;
// 处理完整64字节块(每次最多处理255块防止溢出)
while (len >= 64) {
int blocks = len / 64;
if (blocks > 255) blocks = 255;
process_blocks(blocks, &p1, &p2, &sum);
len -= blocks * 64;
}
// 处理剩余字节(<64)
for (unsigned i = 0; i < len; ++i) {
sum += (p1[i] == p2[i]);
}
ans[k] = sum;
}
}
Compilation | N/A | N/A | Compile Error | Score: N/A | 显示更多 |
Judge Duck Online | 评测鸭在线
Server Time: 2025-04-05 08:48:35 | Loaded in 0 ms | Server Status
个人娱乐项目,仅供学习交流使用 | 捐赠