提交记录 28107


用户 题目 状态 得分 用时 内存 语言 代码长度
TSKY test. 自定义测试 Runtime Error 0 36.65 us 32 KB C++14 2.37 KB
提交时间 评测时间
2025-05-31 14:08:34 2025-05-31 14:08:36
#include <iostream>
#include <cstring>
#include <chrono>
#include <immintrin.h>

#pragma GCC target("avx")

size_t getFreq()
{
    size_t x = rand();
    size_t loop = 5e9;
    size_t begin = rand();
    size_t end = rand() + loop;
    auto begin_time = std::chrono::steady_clock::now();
    for (size_t i = begin; i < end; ++i)
    {
        x += i;
    }
    auto end_time = std::chrono::steady_clock::now();
    loop = end - begin;
    std::cout << x << std::endl;
    auto t = std::chrono::duration_cast<std::chrono::duration<double>>(end_time - begin_time).count();
    std::cout << t << std::endl;
    return loop / t;
}

size_t FREQ;

inline size_t my_mem_AVX4(char *dst, char *src, size_t len)
{
    auto trans = [](auto &sum, auto &diff)
    {
        auto t0 = sum, t1 = diff;
        sum = t0 + t1;
        diff = t0 - t1;
    };
    auto end = src + len;
    size_t instruction = 0;
    for (; src < end; src += 64)
    {
        auto x0 = _mm256_load_pd((const double *)src);
        auto x1 = _mm256_load_pd((const double *)(src + 32));
        constexpr size_t N = __LINE__;

        
        constexpr size_t INSTRUCTION = __LINE__ - N - 1;
        _mm256_store_pd((double *)src, x1);
        _mm256_store_pd((double *)(src + 32), x0);
        instruction += INSTRUCTION;
    }
    return instruction;
}
inline void test_mem()
{
    constexpr size_t MAX_LEN = size_t(1) << 30;
    constexpr size_t COPY_LEN = size_t(1) << 33;
    auto src = (char *)_mm_malloc(MAX_LEN, 64);
    std::cout << FREQ / 1e9 << "GHz\n";
    for (size_t len = MAX_LEN; len <= MAX_LEN; len *= 2)
    {
        size_t loop = COPY_LEN / len;
        size_t instruction = 0;
        memset(src, rand(), len);
        auto t_begin = std::chrono::steady_clock::now();
        for (size_t i = 0; i < loop; ++i)
        {
            // memcpy(dst, src, len);
            instruction += my_mem_AVX4(src, src, len);
        }
        auto t_end = std::chrono::steady_clock::now();
        auto time = std::chrono::duration_cast<std::chrono::duration<double>>(t_end - t_begin).count();
        double cycle = time * FREQ;
        std::cout << "len: " << len << " Byte\t" << instruction / cycle << "IPC\t" << COPY_LEN / cycle << " Byte / cycle\t"
                  << COPY_LEN / 1024 / 1024 / time << "MB / s" << std::endl;
    }
    std::cout << src[rand() % MAX_LEN] << std::endl;
    _mm_free(src);
}

int main()
{
    FREQ = 1;
    test_mem();
}

CompilationN/AN/ACompile OKScore: N/A

Testcase #136.65 us32 KBRuntime ErrorScore: 0


Judge Duck Online | 评测鸭在线
Server Time: 2025-06-03 04:16:10 | Loaded in 1 ms | Server Status
个人娱乐项目,仅供学习交流使用 | 捐赠