提交记录 27819


用户 题目 状态 得分 用时 内存 语言 代码长度
Izumi_Sagiri wc2017b1. 【WC2017】挑战-任务1 Compile Error 0 0 ns 0 KB C++17 2.79 KB
提交时间 评测时间
2025-01-27 15:00:54 2025-01-27 15:00:56
#include <algorithm>
#include <cstring>
#include <omp.h>

void sort(unsigned *a, int n) {
    constexpr int RADIX_BITS = 8;
    constexpr int BUCKETS = 1 << RADIX_BITS;
    constexpr int NUM_PASSES = 32 / RADIX_BITS;

    unsigned* tmp = new unsigned[n];
    unsigned* input = a;
    unsigned* output = tmp;

    int* count_arrays = new int[omp_get_max_threads() * BUCKETS];

    for (int shift = 0; shift < 32; shift += RADIX_BITS) {
        const int tshift = shift;  // Workaround for OpenMP capture
        
        // Phase 1: Parallel histogram
        memset(count_arrays, 0, omp_get_max_threads() * BUCKETS * sizeof(int));
        
        #pragma omp parallel
        {
            int tid = omp_get_thread_num();
            int* hist = count_arrays + tid * BUCKETS;
            
            #pragma omp for nowait
            for (int i = 0; i < n; ++i) {
                hist[(input[i] >> tshift) & (BUCKETS-1)]++;
            }
        }

        // Phase 2: Aggregate histograms
        int global_hist[BUCKETS] = {0};
        for (int t = 0; t < omp_get_max_threads(); ++t) {
            int* hist = count_arrays + t * BUCKETS;
            for (int i = 0; i < BUCKETS; ++i) {
                global_hist[i] += hist[i];
            }
        }

        // Phase 3: Calculate prefix sums
        int prefix[BUCKETS];
        prefix[0] = 0;
        for (int i = 1; i < BUCKETS; ++i) {
            prefix[i] = prefix[i-1] + global_hist[i-1];
        }

        // Phase 4: Calculate write offsets
        int* offsets = new int[omp_get_max_threads() * BUCKETS];
        for (int b = 0; b < BUCKETS; ++b) {
            int accum = prefix[b];
            for (int t = 0; t < omp_get_max_threads(); ++t) {
                int* hist = count_arrays + t * BUCKETS;
                offsets[t * BUCKETS + b] = accum;
                accum += hist[b];
            }
        }

        // Phase 5: Parallel scatter
        #pragma omp parallel
        {
            int tid = omp_get_thread_num();
            int* offset = offsets + tid * BUCKETS;
            int* hist = count_arrays + tid * BUCKETS;
            
            #pragma omp for
            for (int i = 0; i < n; ++i) {
                unsigned val = input[i];
                int b = (val >> tshift) & (BUCKETS-1);
                int pos;
                
                // Find position in thread-local offset
                if (i < (n*(tid+0))/omp_get_num_threads()) continue;
                if (i >= (n*(tid+1))/omp_get_num_threads()) continue;
                
                pos = offset[b];
                offset[b]++;
                output[pos] = val;
            }
        }

        delete[] offsets;
        std::swap(input, output);
    }

    if (input == tmp) {
        std::memcpy(a, tmp, n * sizeof(unsigned));
    }

    delete[] tmp;
    delete[] count_arrays;
}

CompilationN/AN/ACompile ErrorScore: N/A


Judge Duck Online | 评测鸭在线
Server Time: 2025-04-05 13:58:40 | Loaded in 1 ms | Server Status
个人娱乐项目,仅供学习交流使用 | 捐赠