提交记录 21361


用户 题目 状态 得分 用时 内存 语言 代码长度
MicroSun 1001c. 测测你的排序4 Accepted 100 4.848 s 522304 KB C++ 21.81 KB
提交时间 评测时间
2024-03-02 12:22:39 2024-03-02 12:22:49
#ifndef PDQSORT_H
#define PDQSORT_H

#include <bits/stdc++.h>
using namespace std;

#if __cplusplus >= 201103L
    #include <cstdint>
    #include <type_traits>
    #define PDQSORT_PREFER_MOVE(x) std::move(x)
#else
    #define PDQSORT_PREFER_MOVE(x) (x)
#endif


namespace pdqsort_detail {
    enum {
        // Partitions below this size are sorted using insertion sort.
        insertion_sort_threshold = 24,

        // Partitions above this size use Tukey's ninther to select the pivot.
        ninther_threshold = 87,

        // When we detect an already sorted partition, attempt an insertion sort that allows this
        // amount of element moves before giving up.
        partial_insertion_sort_limit = 8,

        // Must be multiple of 8 due to loop unrolling, and < 256 to fit in unsigned char.
        block_size = 64,

        // Cacheline size, assumes power of two.
        cacheline_size = 64

    };

#if __cplusplus >= 201103L
    template<class T> struct is_default_compare : std::false_type { };
    template<class T> struct is_default_compare<std::less<T>> : std::true_type { };
    template<class T> struct is_default_compare<std::greater<T>> : std::true_type { };
#endif

    // Returns floor(log2(n)), assumes n > 0.
    template<class T>
    inline short log2(T n) {
    	if(n>=536870912)
    		return 29;
    	if(n>=268435456)
    		return 28;
    	if(n>=134217728)
    		return 27;
    	if(n>=61708864)
    		return 26;
    	if(n>=33554432)
    		return 25;
    	if(n>=16777216)
    		return 24;
    	if(n>=8388608)
    		return 24;
        short log = 0;
        while (n >>= 1) ++log;
        return log;
    }

    // Sorts [begin, end) using insertion sort with the given comparison function.
    template<class Iter, class Compare>
    inline void insertion_sort(Iter begin, Iter end, Compare comp) {
        typedef typename std::iterator_traits<Iter>::value_type T;
        if (begin == end) return;

        for (Iter cur = begin + 1; cur != end; ++cur) {
            Iter sift = cur;
            Iter sift_1 = cur - 1;

            // Compare first so we can avoid 2 moves for an element already positioned correctly.
            if (comp(*sift, *sift_1)) {
                T tmp = PDQSORT_PREFER_MOVE(*sift);

                do { *sift-- = PDQSORT_PREFER_MOVE(*sift_1); }
                while (sift != begin && comp(tmp, *--sift_1));

                *sift = PDQSORT_PREFER_MOVE(tmp);
            }
        }
    }

    // Sorts [begin, end) using insertion sort with the given comparison function. Assumes
    // *(begin - 1) is an element smaller than or equal to any element in [begin, end).
    template<class Iter, class Compare>
    inline void unguarded_insertion_sort(Iter begin, Iter end, Compare comp) {
        typedef typename std::iterator_traits<Iter>::value_type T;
        if (begin == end) return;

        for (Iter cur = begin + 1; cur != end; ++cur) {
            Iter sift = cur;
            Iter sift_1 = cur - 1;

            // Compare first so we can avoid 2 moves for an element already positioned correctly.
            if (comp(*sift, *sift_1)) {
                T tmp = PDQSORT_PREFER_MOVE(*sift);

                do { *sift-- = PDQSORT_PREFER_MOVE(*sift_1); }
                while (comp(tmp, *--sift_1));

                *sift = PDQSORT_PREFER_MOVE(tmp);
            }
        }
    }

    // Attempts to use insertion sort on [begin, end). Will return false if more than
    // partial_insertion_sort_limit elements were moved, and abort sorting. Otherwise it will
    // successfully sort and return true.
    template<class Iter, class Compare>
    inline bool partial_insertion_sort(Iter begin, Iter end, Compare comp) {
        typedef typename std::iterator_traits<Iter>::value_type T;
        if (begin == end) return true;
        
        std::size_t limit = 0;
        for (Iter cur = begin + 1; cur != end; ++cur) {
            Iter sift = cur;
            Iter sift_1 = cur - 1;

            // Compare first so we can avoid 2 moves for an element already positioned correctly.
            if (comp(*sift, *sift_1)) {
                T tmp = PDQSORT_PREFER_MOVE(*sift);

                do { *sift-- = PDQSORT_PREFER_MOVE(*sift_1); }
                while (sift != begin && comp(tmp, *--sift_1));

                *sift = PDQSORT_PREFER_MOVE(tmp);
                limit += cur - sift;
            }
            
            if (limit > partial_insertion_sort_limit) return false;
        }

        return true;
    }

    template<class Iter, class Compare>
    inline void sort2(Iter a, Iter b, Compare comp) {
        if (comp(*b, *a)) std::iter_swap(a, b);
    }

    // Sorts the elements *a, *b and *c using comparison function comp.
    template<class Iter, class Compare>
    inline void sort3(Iter a, Iter b, Iter c, Compare comp) {
        sort2(a, b, comp),
        sort2(b, c, comp),
        sort2(a, b, comp);
    }

    template<class T>
    inline T* align_cacheline(T* p) {
#if defined(UINTPTR_MAX) && __cplusplus >= 201103L
        std::uintptr_t ip = reinterpret_cast<std::uintptr_t>(p);
#else
        std::size_t ip = reinterpret_cast<std::size_t>(p);
#endif
        ip = (ip + cacheline_size - 1) & -cacheline_size;
        return reinterpret_cast<T*>(ip);
    }

    template<class Iter>
    inline void swap_offsets(Iter first, Iter last,
                             unsigned char* offsets_l, unsigned char* offsets_r,
                             size_t num, bool use_swaps) {
        typedef typename std::iterator_traits<Iter>::value_type T;
        if (use_swaps) {
            // This case is needed for the descending distribution, where we need
            // to have proper swapping for pdqsort to remain O(n).
            for (size_t i = 0; i < num; ++i) {
                std::iter_swap(first + offsets_l[i], last - offsets_r[i]);
            }
        } else if (num > 0) {
            Iter l = first + offsets_l[0]; Iter r = last - offsets_r[0];
            T tmp(PDQSORT_PREFER_MOVE(*l)); *l = PDQSORT_PREFER_MOVE(*r);
            for (size_t i = 1; i < num; ++i) {
                l = first + offsets_l[i], *r = PDQSORT_PREFER_MOVE(*l),
                r = last - offsets_r[i], *l = PDQSORT_PREFER_MOVE(*r);
            }
            *r = PDQSORT_PREFER_MOVE(tmp);
        }
    }

    // Partitions [begin, end) around pivot *begin using comparison function comp. Elements equal
    // to the pivot are put in the right-hand partition. Returns the position of the pivot after
    // partitioning and whether the passed sequence already was correctly partitioned. Assumes the
    // pivot is a median of at least 3 elements and that [begin, end) is at least
    // insertion_sort_threshold long. Uses branchless partitioning.
    template<class Iter, class Compare>
    inline std::pair<Iter, bool> partition_right_branchless(Iter begin, Iter end, Compare comp) {
        typedef typename std::iterator_traits<Iter>::value_type T;

        // Move pivot into local for speed.
        T pivot(PDQSORT_PREFER_MOVE(*begin));
        Iter first = begin;
        Iter last = end;

        // Find the first element greater than or equal than the pivot (the median of 3 guarantees
        // this exists).
        while (comp(*++first, pivot));

        // Find the first element strictly smaller than the pivot. We have to guard this search if
        // there was no element before *first.
        if (first - 1 == begin) while (first < last && !comp(*--last, pivot));
        else                    while (                !comp(*--last, pivot));

        // If the first pair of elements that should be swapped to partition are the same element,
        // the passed in sequence already was correctly partitioned.
        bool already_partitioned = first >= last;
        if (!already_partitioned) {
            std::iter_swap(first, last);
            ++first;

            // The following branchless partitioning is derived from "BlockQuicksort: How Branch
            // Mispredictions don’t affect Quicksort" by Stefan Edelkamp and Armin Weiss, but
            // heavily micro-optimized.
            unsigned char offsets_l_storage[block_size + cacheline_size];
            unsigned char offsets_r_storage[block_size + cacheline_size];
            unsigned char* offsets_l = align_cacheline(offsets_l_storage);
            unsigned char* offsets_r = align_cacheline(offsets_r_storage);

            Iter offsets_l_base = first;
            Iter offsets_r_base = last;
            size_t num_l, num_r, start_l, start_r;
            num_l = num_r = start_l = start_r = 0;
            
            while (first < last) {
                // Fill up offset blocks with elements that are on the wrong side.
                // First we determine how much elements are considered for each offset block.
                size_t num_unknown = last - first;
                size_t left_split = num_l == 0 ? (num_r == 0 ? num_unknown / 2 : num_unknown) : 0;
                size_t right_split = num_r == 0 ? (num_unknown - left_split) : 0;

                // Fill the offset blocks.
                if (left_split >= block_size) {
                    for (size_t i = 0; i < block_size;) {
                        offsets_l[num_l] = i++, num_l += !comp(*first, pivot); ++first,
                        offsets_l[num_l] = i++, num_l += !comp(*first, pivot); ++first,
                        offsets_l[num_l] = i++, num_l += !comp(*first, pivot); ++first,
                        offsets_l[num_l] = i++, num_l += !comp(*first, pivot); ++first,
                        offsets_l[num_l] = i++, num_l += !comp(*first, pivot); ++first,
                        offsets_l[num_l] = i++, num_l += !comp(*first, pivot); ++first,
                        offsets_l[num_l] = i++, num_l += !comp(*first, pivot); ++first,
                        offsets_l[num_l] = i++, num_l += !comp(*first, pivot); ++first;
                    }
                } else {
                    for (size_t i = 0; i < left_split;) {
                        offsets_l[num_l] = i++, num_l += !comp(*first, pivot); ++first;
                    }
                }

                if (right_split >= block_size) {
                    for (size_t i = 0; i < block_size;) {
                        offsets_r[num_r] = ++i, num_r += comp(*--last, pivot),
                        offsets_r[num_r] = ++i, num_r += comp(*--last, pivot),
                        offsets_r[num_r] = ++i, num_r += comp(*--last, pivot),
                        offsets_r[num_r] = ++i, num_r += comp(*--last, pivot),
                        offsets_r[num_r] = ++i, num_r += comp(*--last, pivot),
                        offsets_r[num_r] = ++i, num_r += comp(*--last, pivot),
                        offsets_r[num_r] = ++i, num_r += comp(*--last, pivot),
                        offsets_r[num_r] = ++i, num_r += comp(*--last, pivot);
                    }
                } else {
                    for (size_t i = 0; i < right_split;) {
                        offsets_r[num_r] = ++i, num_r += comp(*--last, pivot);
                    }
                }

                // Swap elements and update block sizes and first/last boundaries.
                size_t num = std::min(num_l, num_r);
                swap_offsets(offsets_l_base, offsets_r_base,
                             offsets_l + start_l, offsets_r + start_r,
                             num, num_l == num_r);
                num_l -= num, num_r -= num,
                start_l += num, start_r += num;

                if (num_l == 0) {
                    start_l = 0,
                    offsets_l_base = first;
                }
                
                if (num_r == 0) {
                    start_r = 0,
                    offsets_r_base = last;
                }
            }

            // We have now fully identified [first, last)'s proper position. Swap the last elements.
            if (num_l) {
                offsets_l += start_l;
                while (num_l--) std::iter_swap(offsets_l_base + offsets_l[num_l], --last);
                first = last;
            }
            if (num_r) {
                offsets_r += start_r;
                while (num_r--) std::iter_swap(offsets_r_base - offsets_r[num_r], first), ++first;
                last = first;
            }
        }

        // Put the pivot in the right place.
        Iter pivot_pos = first - 1;
        *begin = PDQSORT_PREFER_MOVE(*pivot_pos),
        *pivot_pos = PDQSORT_PREFER_MOVE(pivot);

        return std::make_pair(pivot_pos, already_partitioned);
    }



    // Partitions [begin, end) around pivot *begin using comparison function comp. Elements equal
    // to the pivot are put in the right-hand partition. Returns the position of the pivot after
    // partitioning and whether the passed sequence already was correctly partitioned. Assumes the
    // pivot is a median of at least 3 elements and that [begin, end) is at least
    // insertion_sort_threshold long.
    template<class Iter, class Compare>
    inline std::pair<Iter, bool> partition_right(Iter begin, Iter end, Compare comp) {
        typedef typename std::iterator_traits<Iter>::value_type T;
        
        // Move pivot into local for speed.
        T pivot(PDQSORT_PREFER_MOVE(*begin));

        Iter first = begin;
        Iter last = end;

        while (comp(*++first, pivot));

        if (first - 1 == begin) while (first < last && !comp(*--last, pivot));
        else                    while (                !comp(*--last, pivot));

        bool already_partitioned = first >= last;
        
        while (first < last) {
            std::iter_swap(first, last);
            while (comp(*++first, pivot));
            while (!comp(*--last, pivot));
        }

        Iter pivot_pos = first - 1;
        *begin = PDQSORT_PREFER_MOVE(*pivot_pos),
        *pivot_pos = PDQSORT_PREFER_MOVE(pivot);

        return std::make_pair(pivot_pos, already_partitioned);
    }

    template<class Iter, class Compare>
    inline Iter partition_left(Iter begin, Iter end, Compare comp) {
        typedef typename std::iterator_traits<Iter>::value_type T;

        T pivot(PDQSORT_PREFER_MOVE(*begin));
        Iter first = begin;
        Iter last = end;
        
        while (comp(pivot, *--last));

        if (last + 1 == end) while (first < last && !comp(pivot, *++first));
        else                 while (                !comp(pivot, *++first));

        while (first < last) {
            std::iter_swap(first, last);
            while (comp(pivot, *--last));
            while (!comp(pivot, *++first));
        }

        Iter pivot_pos = last;
        *begin = PDQSORT_PREFER_MOVE(*pivot_pos),
        *pivot_pos = PDQSORT_PREFER_MOVE(pivot);

        return pivot_pos;
    }


    template<class Iter, class Compare, bool Branchless>
    inline void pdqsort_loop(Iter begin, Iter end, Compare comp, short bad_allowed, bool leftmost = true) {
        typedef typename std::iterator_traits<Iter>::difference_type diff_t;

        while (true) {
            diff_t size = end - begin;

            if (size < insertion_sort_threshold) {
                if (leftmost) insertion_sort(begin, end, comp);
                else unguarded_insertion_sort(begin, end, comp);
                return;
            }

            diff_t s2 = size / 2;
            if (size > ninther_threshold) {
                sort3(begin, begin + s2, end - 1, comp),
                sort3(begin + 1, begin + (s2 - 1), end - 2, comp),
                sort3(begin + 2, begin + (s2 + 1), end - 3, comp),
                sort3(begin + (s2 - 1), begin + s2, begin + (s2 + 1), comp),
                std::iter_swap(begin, begin + s2);
            } else sort3(begin + s2, begin, end - 1, comp);

            if (!leftmost && !comp(*(begin - 1), *begin)) {
                begin = partition_left(begin, end, comp) + 1;
                continue;
            }

            std::pair<Iter, bool> part_result =
                Branchless ? partition_right_branchless(begin, end, comp)
                           : partition_right(begin, end, comp);
            Iter pivot_pos = part_result.first;
            bool already_partitioned = part_result.second;

            diff_t l_size = pivot_pos - begin;
            diff_t r_size = end - (pivot_pos + 1);
            bool highly_unbalanced = l_size < size / 8 || r_size < size / 8;

            if (highly_unbalanced) {
                if (--bad_allowed == 0) {
                    std::make_heap(begin, end, comp),
                    std::sort_heap(begin, end, comp);
                    return;
                }

                if (l_size >= insertion_sort_threshold) {
                    std::iter_swap(begin,             begin + l_size / 4),
                    std::iter_swap(pivot_pos - 1, pivot_pos - l_size / 4);

                    if (l_size > ninther_threshold) {
                        std::iter_swap(begin + 1,         begin + (l_size / 4 + 1)),
                        std::iter_swap(begin + 2,         begin + (l_size / 4 + 2)),
                        std::iter_swap(pivot_pos - 2, pivot_pos - (l_size / 4 + 1)),
                        std::iter_swap(pivot_pos - 3, pivot_pos - (l_size / 4 + 2));
                    }
                }
                
                if (r_size >= insertion_sort_threshold) {
                    std::iter_swap(pivot_pos + 1, pivot_pos + (1 + r_size / 4)),
                    std::iter_swap(end - 1,                   end - r_size / 4);
                    
                    if (r_size > ninther_threshold) {
                        std::iter_swap(pivot_pos + 2, pivot_pos + (2 + r_size / 4)),
                        std::iter_swap(pivot_pos + 3, pivot_pos + (3 + r_size / 4)),
                        std::iter_swap(end - 2,             end - (1 + r_size / 4)),
                        std::iter_swap(end - 3,             end - (2 + r_size / 4));
                    }
                }
            } else {
                if (already_partitioned && partial_insertion_sort(begin, pivot_pos, comp)
                                        && partial_insertion_sort(pivot_pos + 1, end, comp)) return;
            }
                
            pdqsort_loop<Iter, Compare, Branchless>(begin, pivot_pos, comp, bad_allowed, leftmost),
            begin = pivot_pos + 1,
            leftmost = false;
        }
    }
}


template<class Iter, class Compare>
inline void pdqsort(Iter begin, Iter end, Compare comp) {
    if (begin == end) return;

#if __cplusplus >= 201103L
    pdqsort_detail::pdqsort_loop<Iter, Compare,
        pdqsort_detail::is_default_compare<typename std::decay<Compare>::type>::value &&
        std::is_arithmetic<typename std::iterator_traits<Iter>::value_type>::value>(
        begin, end, comp, pdqsort_detail::log2(end - begin));
#else
    pdqsort_detail::pdqsort_loop<Iter, Compare, false>(
        begin, end, comp, pdqsort_detail::log2(end - begin));
#endif
}

template<class Iter>
inline void pdqsort(Iter begin, Iter end) {
    typedef typename std::iterator_traits<Iter>::value_type T;
    pdqsort(begin, end, std::less<T>());
}

template<class Iter, class Compare>
inline void pdqsort_branchless(Iter begin, Iter end, Compare comp) {
    if (begin == end) return;
    pdqsort_detail::pdqsort_loop<Iter, Compare, true>(
        begin, end, comp, pdqsort_detail::log2(end - begin));
}

template<class Iter>
inline void pdqsort_branchless(Iter begin, Iter end) {
    typedef typename std::iterator_traits<Iter>::value_type T;
    pdqsort_branchless(begin, end, std::less<T>());
}


#undef PDQSORT_PREFER_MOVE

#endif
namespace io {
	template <typename T>
	inline void read(T& x) {
		T sum = 0;
		short ch = getchar();
		for (; !isdigit(ch); ch=getchar());
		for (; isdigit(ch); ch=getchar()) sum = (sum<<1)+(sum<<3) + ch - '0';
		x = sum ;
	}
	template <typename T>
	inline void write(T x) {
		static short sta[35];
		short top = 0;
		do {
			sta[top++] = x % 10, x /= 10;
		} while (x);
		while (top) putchar(sta[--top] + 48); 
	}
}
namespace IO
{
  FILE *Fin(stdin), *Fout(stdout);
  class qistream
  {
    static const size_t SIZE = 1 << 16, BLOCK = 32;
    FILE * fp;
    char buf[SIZE];
    int p;

  public:
    qistream(FILE * _fp = stdin) : fp(_fp), p(0)
    {
      fread(buf + p, 1, SIZE - p, fp);
    }

    void flush()
    {
      memmove(buf, buf + p, SIZE - p), fread(buf + SIZE - p, 1, p, fp), p = 0;
    }

    qistream & operator>>(char & str)
    {
      str = getch();
      while (isspace(str))
        str = getch();
      return *this;
    }

    template <class T>
    qistream & operator>>(T & x)
    {
      x = 0;
      p + BLOCK >= SIZE ? flush() : void();
      bool flag = false;
      for (; !isdigit(buf[p]); ++p)
        flag = buf[p] == '-';
      for (; isdigit(buf[p]); ++p)
        x = x * 10 + buf[p] - '0';
      x = flag ? -x : x;
      return *this;
    }

    char getch()
    {
      return buf[p++];
    }

    qistream & operator>>(char * str)
    {
      char ch = getch();
      while (ch <= ' ')
        ch = getch();
      for (short i = 0; ch > ' '; ++i, ch = getch())
        str[i] = ch;
      return *this;
    }

  } qcin(Fin);

  class qostream
  {
    static const size_t SIZE = 1 << 16, BLOCK = 32;
    FILE * fp;
    char buf[SIZE];
    int p;

  public:
    qostream(FILE * _fp = stdout) : fp(_fp), p(0)
    {}

    ~qostream()
    {
      fwrite(buf, 1, p, fp);
    }

    void flush()
    {
      fwrite(buf, 1, p, fp), p = 0;
    }

    template <class T>
    qostream & operator<<(T x)
    {
      int len = 0;
      p + BLOCK >= SIZE ? flush() : void();
      x < 0 ? (x = -x, buf[p++] = '-') : 0;
      do
        buf[p + len] = x % 10 + '0', x /= 10, ++len;
      while (x);
      for (short i = 0, j = len - 1; i < j; ++i, --j)
        swap(buf[p + i], buf[p + j]);
      p += len;
      return *this;
    }

    qostream & operator<<(char x)
    {
      putch(x);
      return *this;
    }

    void putch(char ch)
    {
      p + BLOCK >= SIZE ? flush() : void();
      buf[p++] = ch;
    }

    qostream & operator<<(const char * str)
    {
      for (int i = 0; str[i]; ++i)
        putch(str[i]);
      return *this;
    }

  } qcout(Fout);
}
using namespace IO;
constexpr int N = 1e5+2;
int n, a[N];
void sort(unsigned *a, int n){

  pdqsort(a , a + n);
}

CompilationN/AN/ACompile OKScore: N/A

Testcase #14.848 s510 MB + 64 KBAcceptedScore: 100


Judge Duck Online | 评测鸭在线
Server Time: 2024-04-28 17:58:14 | Loaded in 1 ms | Server Status
个人娱乐项目,仅供学习交流使用