#pragma GCC optimize("Ofast")
#pragma GCC target("popcnt")
#define PACKED __attribute__((packed))
#define ALIGNED __attribute__((aligned(4096)))
typedef struct {
unsigned addr;
unsigned char len;
char pad[3]; // Padding for memory alignment
unsigned nexthop;
} PACKED RoutingTableEntry;
#include <bits/stdc++.h>
using namespace std;
#include <arpa/inet.h>
const int MAGIC = 16;
/* HashMap */
const int HASHMAP_SIZE = 1 << MAGIC;
const uint32_t HASHMAP_MOD = HASHMAP_SIZE + 1;
uint32_t hashmap_key[HASHMAP_SIZE] ALIGNED;
uint32_t hashmap_next[HASHMAP_SIZE] ALIGNED;
uint32_t hashmap_first[HASHMAP_MOD] ALIGNED;
uint32_t mask_pre[256] ALIGNED; // 2u << (index & 31)) - 1
int hashmap_size ALIGNED;
inline uint32_t hashmap_get(uint32_t x) {
uint32_t hash = x % HASHMAP_MOD;
uint32_t &first_entry_id = hashmap_first[hash];
uint32_t entry_id = first_entry_id;
while (1) {
const uint32_t &key = hashmap_key[entry_id];
if (key == x) {
return entry_id;
} else if (entry_id) {
entry_id = hashmap_next[entry_id];
} else {
break;
}
}
++hashmap_size;
hashmap_key[hashmap_size] = x;
hashmap_next[hashmap_size] = first_entry_id;
first_entry_id = hashmap_size;
return hashmap_size;
}
/* 3-level tree */
const uint32_t TABLE_32_SIZE = 1 << 16; // 16384;
const uint32_t TABLE_24_SIZE = 1 << 16; // 32768;
uint32_t table_32_cnt ALIGNED;
uint32_t table_24_cnt ALIGNED;
uint32_t table_32[TABLE_32_SIZE][1 << 8] ALIGNED;
uint32_t table_24[TABLE_24_SIZE][1 << 8] ALIGNED;
uint32_t table_16[1 << MAGIC] ALIGNED;
inline void fill(uint32_t *a, int n, uint32_t val) {
while (n >= 4) {
a[0] = val;
a[1] = val;
a[2] = val;
a[3] = val;
n -= 4;
a += 4;
}
while (n) {
a[0] = val;
n--;
a++;
}
}
inline void ins(uint32_t addr, int len, uint32_t nexthop) {
if (len <= MAGIC) {
fill(table_16 + (addr >> MAGIC), 1u << (MAGIC - len), nexthop);
} else if (len <= 24) {
uint32_t &t16 = table_16[addr >> MAGIC];
addr = (addr & ((1u << MAGIC) - 1)) >> 8;
uint32_t *tmp;
if (t16 < -TABLE_24_SIZE) {
tmp = table_24[--table_24_cnt + TABLE_24_SIZE];
fill(tmp, addr, t16);
fill(tmp + addr, 1u << (24 - len), nexthop);
fill(tmp + addr + (1u << (24 - len)), 256 - addr - (1u << (24 - len)), t16);
t16 = table_24_cnt;
} else {
tmp = table_24[t16 + TABLE_24_SIZE];
fill(tmp + addr, 1u << (24 - len), nexthop);
}
} else {
uint32_t &t16 = table_16[addr >> MAGIC];
addr &= (1u << MAGIC) - 1;
uint32_t *tmp;
if (t16 < -TABLE_24_SIZE) {
tmp = table_24[--table_24_cnt + TABLE_24_SIZE];
fill(tmp, 256, t16);
t16 = table_24_cnt;
} else {
tmp = table_24[t16 + TABLE_24_SIZE];
}
uint32_t &t24 = tmp[addr >> 8];
addr &= 255u;
if (t24 < -TABLE_32_SIZE) {
tmp = table_32[--table_32_cnt + TABLE_32_SIZE];
fill(tmp, addr, t24);
fill(tmp + addr, 1u << (32 - len), nexthop);
fill(tmp + addr + (1u << (32 - len)), 256 - addr - (1u << (32 - len)), t24);
t24 = table_32_cnt;
} else {
tmp = table_32[t24 + TABLE_32_SIZE];
fill(tmp + addr, 1u << (32 - len), nexthop);
}
}
}
/* Bit sets */
const int MAX_N_LEVEL3_POINTERS = 1 << 20;
struct LvPack
{
uint32_t s, sum;
};
LvPack level3_bit[TABLE_32_SIZE][(1 << 8) >> 5] ALIGNED;
#define level3_bits(x, y) level3_bit[x][y].s
#define level3_bit_sums(x, y) level3_bit[x][y].sum
// uint32_t level3_offsets[TABLE_32_SIZE]; // ??????
uint16_t level3_pointers[MAX_N_LEVEL3_POINTERS] ALIGNED;
uint32_t n_level3_pointers ALIGNED;
inline void walk_level3(uint32_t level3_index) {
uint32_t *tmp = table_32[level3_index];
// level3_offsets[level3_index] = n_level3_pointers;
uint16_t *pointers = level3_pointers + n_level3_pointers;
uint32_t cur = tmp[0];
int cnt = 0;
level3_bit_sums(level3_index, 0) = n_level3_pointers;
// index 0
pointers[0] = hashmap_get(cur);
for (int i = 1; i < 1 << 8; i++) {
if (i % 32 == 0) {
level3_bit_sums(level3_index, i >> 5) = cnt + n_level3_pointers;
}
if (tmp[i] != cur) {
level3_bits(level3_index, i >> 5) |= 1u << (i & 31);
cur = tmp[i];
pointers[++cnt] = hashmap_get(cur);
}
}
n_level3_pointers += cnt + 1;
}
const int MAX_N_LEVEL2_POINTERS = 1 << 21;
LvPack level2_bit[TABLE_24_SIZE][(1 << 8) >> 5] ALIGNED;
#define level2_bits(x, y) level2_bit[x][y].s
#define level2_bit_sums(x, y) level2_bit[x][y].sum
// uint32_t level2_offsets[TABLE_24_SIZE]; // may overflow uint16 ???
int16_t level2_pointers[MAX_N_LEVEL2_POINTERS] ALIGNED;
uint32_t n_level2_pointers ALIGNED;
inline void walk_level2(uint32_t level2_index) {
uint32_t *tmp = table_24[level2_index];
int16_t *pointers = level2_pointers + n_level2_pointers;
uint32_t cur = tmp[0];
int cnt = 0;
level2_bit_sums(level2_index, 0) = n_level2_pointers;
// index 0
pointers[0] = cur >= -TABLE_32_SIZE ? cur : hashmap_get(cur);
for (int i = 1; i < 1 << 8; i++) {
if (i % 32 == 0) {
level2_bit_sums(level2_index, i / 32) = cnt + n_level2_pointers;
}
if (tmp[i] != cur) {
level2_bits(level2_index, i >> 5) |= 1u << (i & 31);
cur = tmp[i];
pointers[++cnt] = cur >= -TABLE_32_SIZE ? cur : hashmap_get(cur);
}
}
n_level2_pointers += cnt + 1;
}
extern "C" void init(int n, int q, const RoutingTableEntry *tbl) {
for (int i = 0; i < 256; i++) {
mask_pre[i] = (2u << (i & 31)) - 1;
}
for (int i = 0; i < n; i++) {
ins(htonl(tbl[i].addr), tbl[i].len, tbl[i].nexthop);
}
for (int i = table_32_cnt; i < 0; i++) {
walk_level3(i + TABLE_32_SIZE);
}
for (int i = table_24_cnt; i < 0; i++) {
walk_level2(i + TABLE_24_SIZE);
}
// for (int i = 0; i < (1u << MAGIC)); i++) {
// if (i < htons(i)) {
// swap(table_16[i], table_16[htons(i)]);
// }
// }
}
struct QueryAddr
{
unsigned l3 : 8;
unsigned l2 : 24 - MAGIC;
unsigned l1 : MAGIC;
} PACKED;
extern "C" unsigned query(unsigned addr_in) {
addr_in = htonl(addr_in);
QueryAddr addr; memcpy(&addr, &addr_in, sizeof(addr));
uint32_t tmp = table_16[addr.l1];
if (tmp >= -TABLE_24_SIZE) {
uint32_t level2_index = tmp + TABLE_24_SIZE;
uint32_t off = level2_bit_sums(level2_index, addr.l2 >> 5)
+ __builtin_popcount(level2_bits(level2_index, addr.l2 >> 5) & mask_pre[addr.l2]);
tmp = (int32_t) level2_pointers[off];
if (tmp >= -TABLE_32_SIZE) {
uint32_t level3_index = tmp + TABLE_32_SIZE;
uint32_t off = level3_bit_sums(level3_index, addr.l3 >> 5)
+ __builtin_popcount(level3_bits(level3_index, addr.l3 >> 5) & mask_pre[addr.l3]);
tmp = (uint32_t) level3_pointers[off];
return hashmap_key[tmp];
} else {
return hashmap_key[tmp];
}
} else {
return tmp;
}
}
Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
Testcase #1 | 42 us | 88 KB | Accepted | Score: 25 | 显示更多 |
Testcase #2 | 20.287 ms | 40 MB + 680 KB | Accepted | Score: 25 | 显示更多 |
Testcase #3 | 31.351 ms | 40 MB + 680 KB | Accepted | Score: 25 | 显示更多 |
Testcase #4 | 41.692 ms | 40 MB + 680 KB | Accepted | Score: 25 | 显示更多 |