提交记录 27850
提交时间 |
评测时间 |
2025-01-28 02:29:12 |
2025-01-28 02:29:14 |
#include <stdio.h>
#include <stdlib.h>
#include <immintrin.h>
#pragma GCC target("AVX2")
void solve(int n, int q, char *s1, char *s2, int *q_x, int *q_y, int *q_len, unsigned *ans) {
int *a1 = (int*)malloc(n * sizeof(int));
int *a2 = (int*)malloc(n * sizeof(int));
for (int i = 0; i < n; ++i) {
a1[i] = s1[i] - '0';
a2[i] = s2[i] - '0';
}
for (int i = 0; i < q; ++i) {
int x = q_x[i];
int y = q_y[i];
int l = q_len[i];
unsigned sum = 0;
int *p1 = a1 + x;
int *p2 = a2 + y;
int k = 0;
const int vec_size = 8; // AVX2处理8个int32
int limit = l - (l % vec_size);
__m256i sum_v = _mm256_setzero_si256();
__m256i three = _mm256_set1_epi32(3);
for (; k < limit; k += vec_size) {
__m256i va = _mm256_loadu_si256((__m256i*)(p1 + k));
__m256i vb = _mm256_loadu_si256((__m256i*)(p2 + k));
__m256i diff = _mm256_sub_epi32(va, vb);
__m256i v_plus3 = _mm256_add_epi32(diff, three);
// 检查v_plus3是否等于2或5
__m256i cmp2 = _mm256_cmpeq_epi32(v_plus3, _mm256_set1_epi32(2));
__m256i cmp5 = _mm256_cmpeq_epi32(v_plus3, _mm256_set1_epi32(5));
__m256i or_result = _mm256_or_si256(cmp2, cmp5);
// 将比较结果转换为1或0
or_result = _mm256_srai_epi32(or_result, 31); // 结果为全1或全0
or_result = _mm256_and_si256(or_result, _mm256_set1_epi32(1));
sum_v = _mm256_add_epi32(sum_v, or_result);
}
// 累加sum_v中的结果
int temp[8] __attribute__((aligned(32)));
_mm256_store_si256((__m256i*)temp, sum_v);
for (int j = 0; j < 8; ++j) {
sum += temp[j];
}
// 处理剩余元素
for (; k < l; ++k) {
int a = p1[k];
int b = p2[k];
sum += ((a - b + 3) % 3) == 2;
}
ans[i] = sum;
}
free(a1);
free(a2);
}
Compilation | N/A | N/A | Compile Error | Score: N/A | 显示更多 |
Judge Duck Online | 评测鸭在线
Server Time: 2025-04-05 08:36:11 | Loaded in 0 ms | Server Status
个人娱乐项目,仅供学习交流使用 | 捐赠