提交记录 27815
提交时间 |
评测时间 |
2025-01-27 13:22:51 |
2025-01-27 13:22:53 |
//written by deepseek-R1
#include <algorithm>
#include <cstdlib>
#include <immintrin.h>
void sort(unsigned *a, int n) {
const size_t alignment = 64; // 缓存行对齐
unsigned *tmp = static_cast<unsigned*>(aligned_alloc(alignment, n * sizeof(unsigned)));
unsigned *src = a;
unsigned *dst = tmp;
for (int k = 0; k < 4; ++k) {
const unsigned shift = k * 8;
unsigned count[256] = {0};
// 统计循环:指针遍历+预取
unsigned *p = src;
unsigned *end = src + n;
for (; p < end - 32; p += 32) {
_mm_prefetch(p + 64, _MM_HINT_T0); // 预取未来64元素
for (int i = 0; i < 32; ++i) {
unsigned x = p[i];
count[(x >> shift) & 0xFF]++;
}
}
// 处理剩余元素
for (; p < end; ++p) {
unsigned x = *p;
count[(x >> shift) & 0xFF]++;
}
// 前缀和计算
unsigned sum = 0;
for (int i = 0; i < 256; ++i) {
unsigned temp = count[i];
count[i] = sum;
sum += temp;
}
// 重新排列:批量写入优化
const int BUF_SIZE = 1024;
unsigned buffer[256][BUF_SIZE];
unsigned buf_counts[256] = {0};
p = src;
end = src + n;
for (; p < end; ++p) {
unsigned x = *p;
unsigned radix = (x >> shift) & 0xFF;
buffer[radix][buf_counts[radix]] = x;
if (++buf_counts[radix] == BUF_SIZE) {
unsigned *dst_ptr = dst + count[radix];
for (int j = 0; j < BUF_SIZE; j += 8) {
_mm256_storeu_si256(
reinterpret_cast<__m256i*>(dst_ptr + j),
_mm256_loadu_si256(reinterpret_cast<__m256i*>(&buffer[radix][j]))
);
}
count[radix] += BUF_SIZE;
buf_counts[radix] = 0;
}
}
// 处理剩余缓冲数据
for (int r = 0; r < 256; ++r) {
if (buf_counts[r] > 0) {
unsigned *dst_ptr = dst + count[r];
for (int j = 0; j < buf_counts[r]; j += 8) {
_mm256_storeu_si256(
reinterpret_cast<__m256i*>(dst_ptr + j),
_mm256_loadu_si256(reinterpret_cast<__m256i*>(&buffer[r][j]))
);
}
count[r] += buf_counts[r];
}
}
std::swap(src, dst);
}
if (src == tmp) {
_mm256_storeu_si256(
reinterpret_cast<__m256i*>(a),
_mm256_loadu_si256(reinterpret_cast<__m256i*>(src))
);
std::copy(src, src + n, a);
}
free(tmp);
}
Compilation | N/A | N/A | Compile Error | Score: N/A | 显示更多 |
Judge Duck Online | 评测鸭在线
Server Time: 2025-04-05 14:18:48 | Loaded in 3 ms | Server Status
个人娱乐项目,仅供学习交流使用 | 捐赠