提交记录 20159


用户 题目 状态 得分 用时 内存 语言 代码长度
TSKY test. 自定义测试 Runtime Error 0 166.821 ms 44 KB C++14 5.15 KB
提交时间 评测时间
2023-09-15 18:35:02 2023-09-15 18:35:04
#include <iostream>
#include <chrono>
#include <immintrin.h>

#pragma GCC target("fma")
//#pragma GCC target("avx512f")

class StopWatch
{
private:
    bool is_stop = false, is_start = false;
    uint64_t tick = 0;
    double rate = 1.0f;
    std::chrono::system_clock::time_point begin;
    std::chrono::system_clock::time_point end;

public:
    StopWatch()
    {
        is_start = false;
        is_stop = false;
        rate = 1.0f;
        tick = 0;
    }
    StopWatch(double rate_in)
    {
        is_start = false;
        is_stop = false;
        rate = rate_in;
        tick = 0;
    }
    void start()
    {
        reset();
        is_start = true;
        is_stop = false;
        begin = std::chrono::system_clock::now();
    }
    void stop()
    {
        if (is_start)
        {
            is_stop = true;
            end = std::chrono::system_clock::now();
            auto delta_time = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
            tick = delta_time.count();
        }
    }
    void reset()
    {
        is_start = false;
        is_stop = false;
        tick = 0;
    }
    double duration()
    {
        if (!is_stop)
        {
            stop();
            is_stop = false;
        }
        return static_cast<double>(tick / (rate * 1000));
    }
};
StopWatch watch_default(1);

size_t MFLOPS_AVX2(size_t test_cycle)
{
    __m256d a0 = _mm256_set1_pd(1);
    __m256d a1 = _mm256_set1_pd(2);
    __m256d a2 = _mm256_set1_pd(3);
    __m256d a3 = _mm256_set1_pd(4);
    __m256d a4 = _mm256_set1_pd(5);
    __m256d a5 = _mm256_set1_pd(6);
    __m256d a6 = _mm256_set1_pd(7);
    __m256d a7 = _mm256_set1_pd(8);
    __m256d a8 = _mm256_set1_pd(9);
    __m256d a9 = _mm256_set1_pd(10);
    __m256d a10 = _mm256_set1_pd(11);
    __m256d a11 = _mm256_set1_pd(12);
    __m256d b = _mm256_set1_pd(2);
    watch_default.start();
    for (size_t i = 0; i < test_cycle; i++)
    {
        a0 = _mm256_fmadd_pd(b, a0, a0);
        a1 = _mm256_fmadd_pd(b, a1, a1);
        a2 = _mm256_fmadd_pd(b, a2, a2);
        a3 = _mm256_fmadd_pd(b, a3, a3);
        a4 = _mm256_fmadd_pd(b, a4, a4);
        a5 = _mm256_fmadd_pd(b, a5, a5);
        a6 = _mm256_fmadd_pd(b, a6, a6);
        a7 = _mm256_fmadd_pd(b, a7, a7);
        a8 = _mm256_fmadd_pd(b, a8, a8);
        a9 = _mm256_fmadd_pd(b, a9, a9);
        a10 = _mm256_fmadd_pd(b, a10, a10);
        a11 = _mm256_fmadd_pd(b, a11, a11);
    }
    watch_default.stop();
    double ary[48];
    _mm256_storeu_pd(ary, a0);
    _mm256_storeu_pd(ary + 4, a1);
    _mm256_storeu_pd(ary + 8, a2);
    _mm256_storeu_pd(ary + 12, a3);
    _mm256_storeu_pd(ary + 16, a4);
    _mm256_storeu_pd(ary + 20, a5);
    _mm256_storeu_pd(ary + 24, a6);
    _mm256_storeu_pd(ary + 28, a7);
    _mm256_storeu_pd(ary + 32, a8);
    _mm256_storeu_pd(ary + 36, a9);
    _mm256_storeu_pd(ary + 40, a10);
    _mm256_storeu_pd(ary + 44, a11);
    double sum = 0;
    for (int i = 0; i < 48; i++)
    {
        sum += ary[i];
    }
    std::cout << sum << std::endl;
    size_t flops = test_cycle * 12 * 4 * 2;
    size_t us = watch_default.duration();
    std::cout << "time: " << us / 1000 << "ms\t" << flops / us << "Mflops" << std::endl;
    return flops / us;
}
inline size_t MFLOPS_AVX512(size_t test_cycle)
{
    __m512d a0 = _mm512_set1_pd(1);
    __m512d a1 = _mm512_set1_pd(2);
    __m512d a2 = _mm512_set1_pd(3);
    __m512d a3 = _mm512_set1_pd(4);
    __m512d a4 = _mm512_set1_pd(5);
    __m512d a5 = _mm512_set1_pd(6);
    __m512d a6 = _mm512_set1_pd(7);
    __m512d a7 = _mm512_set1_pd(8);
    __m512d a8 = _mm512_set1_pd(9);
    __m512d a9 = _mm512_set1_pd(10);
    __m512d a10 = _mm512_set1_pd(11);
    __m512d a11 = _mm512_set1_pd(12);
    __m512d b = _mm512_set1_pd(2);
    watch_default.start();
    for (size_t i = 0; i < test_cycle; i++)
    {
        a0 = _mm512_fmadd_pd(b, a0, a0);
        a1 = _mm512_fmadd_pd(b, a1, a1);
        a2 = _mm512_fmadd_pd(b, a2, a2);
        a3 = _mm512_fmadd_pd(b, a3, a3);
        a4 = _mm512_fmadd_pd(b, a4, a4);
        a5 = _mm512_fmadd_pd(b, a5, a5);
        a6 = _mm512_fmadd_pd(b, a6, a6);
        a7 = _mm512_fmadd_pd(b, a7, a7);
        a8 = _mm512_fmadd_pd(b, a8, a8);
        a9 = _mm512_fmadd_pd(b, a9, a9);
        a10 = _mm512_fmadd_pd(b, a10, a10);
        a11 = _mm512_fmadd_pd(b, a11, a11);
    }
    watch_default.stop();
    double ary[96];
    _mm512_storeu_pd(ary, a0);
    _mm512_storeu_pd(ary + 8, a1);
    _mm512_storeu_pd(ary + 16, a2);
    _mm512_storeu_pd(ary + 24, a3);
    _mm512_storeu_pd(ary + 32, a4);
    _mm512_storeu_pd(ary + 40, a5);
    _mm512_storeu_pd(ary + 48, a6);
    _mm512_storeu_pd(ary + 56, a7);
    _mm512_storeu_pd(ary + 64, a8);
    _mm512_storeu_pd(ary + 72, a9);
    _mm512_storeu_pd(ary + 80, a10);
    _mm512_storeu_pd(ary + 88, a11);
    double sum = 0;
    for (int i = 0; i < 96; i++)
    {
        sum += ary[i];
    }
    std::cout << sum << std::endl;
    size_t flops = test_cycle * 12 * 8 * 2;
    size_t us = watch_default.duration();
    std::cout << "time: " << us / 1000 << "ms\t" << flops / us << "Mflops" << std::endl;
    return flops / us;
}
int main()
{
    std::cout << "AVX2:\n";
    MFLOPS_AVX2(1e8); // 循环1e9次
    //std::cout << "AVX512:\n";
    //MFLOPS_AVX512(1e9); // 循环1e9次
}

CompilationN/AN/ACompile OKScore: N/A

Testcase #1166.821 ms44 KBRuntime ErrorScore: 0


Judge Duck Online | 评测鸭在线
Server Time: 2025-05-09 18:41:52 | Loaded in 1 ms | Server Status
个人娱乐项目,仅供学习交流使用 | 捐赠