提交记录 27853
提交时间 |
评测时间 |
2025-01-28 02:41:17 |
2025-01-28 02:41:20 |
//by r1
#include <immintrin.h>
#include <string.h>
#include <cstdint> // 添加缺失的头文件
#pragma GCC target("avx2")
// 使用标准命名空间的整型类型
using std::uint32_t;
using std::uint64_t;
// 每个step处理n个64字节块(n*64字节)
static inline void process_blocks(int blocks, const char** p1, const char** p2, unsigned* sum) {
__m256i total = _mm256_setzero_si256();
const __m256i ones = _mm256_set1_epi8(1);
for (int i = 0; i < blocks; ++i) {
// 加载64字节数据(分两个32字节块)
__m256i a1 = _mm256_loadu_si256((const __m256i*)(*p1));
__m256i a2 = _mm256_loadu_si256((const __m256i*)(*p2));
__m256i b1 = _mm256_loadu_si256((const __m256i*)(*p1 + 32));
__m256i b2 = _mm256_loadu_si256((const __m256i*)(*p2 + 32));
// 生成比较掩码
__m256i cmp1 = _mm256_cmpeq_epi8(a1, a2);
__m256i cmp2 = _mm256_cmpeq_epi8(b1, b2);
// 转换为位掩码并统计
uint32_t mask = _mm256_movemask_epi8(cmp1) |
((uint64_t)_mm256_movemask_epi8(cmp2) << 32);
*sum += __builtin_popcountll(mask);
*p1 += 64;
*p2 += 64;
}
}
void solve(int n, int q, char* s1, char* s2, int* q_x, int* q_y, int* q_len, unsigned* ans) {
// 预处理s1:0->1, 1->2, 2->0
for (int i = 0; i < n; ++i) {
s1[i] = "120"[s1[i] - '0'];
}
// 处理每个查询
for (int k = 0; k < q; ++k) {
const char* p1 = s1 + q_x[k];
const char* p2 = s2 + q_y[k];
unsigned len = q_len[k];
unsigned sum = 0;
// 处理完整64字节块(每次最多处理255块防止溢出)
while (len >= 64) {
int blocks = len / 64;
if (blocks > 255) blocks = 255;
process_blocks(blocks, &p1, &p2, &sum);
len -= blocks * 64;
}
// 处理剩余字节(<64)
for (unsigned i = 0; i < len; ++i) {
sum += (p1[i] == p2[i]);
}
ans[k] = sum;
}
}
Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
Testcase #1 | 163.44 us | 40 KB | Wrong Answer | Score: 0 | 显示更多 |
Testcase #2 | 782.669 ms | 5 MB + 176 KB | Wrong Answer | Score: 0 | 显示更多 |
Judge Duck Online | 评测鸭在线
Server Time: 2025-04-05 09:08:43 | Loaded in 1 ms | Server Status
个人娱乐项目,仅供学习交流使用 | 捐赠