# 批改娘 20021. Dynamic Range Sum

，這些點不時會移動和改變權重。

• \$N\$
個點的資訊 (以 SoA (Structure of Array) 的方式儲存，以達到最好的快取使用率)
• 詢問的矩形 \$text{Rect}\$
(正交於兩軸)

```int32_t search_range(Rect rect, int32_t x[], int32_t y[], int32_t w[], int32_t n);
```

### main.c (測試用)

• 一開始，在二維空間 \$[0, R) times [0, R)\$
之間產生 \$N\$
個點的資訊
• 接著，模擬 \$M\$
次點的變化，並且呼叫 `search_range`
• 最後，將所有答案 HASH 輸出一個值
```#include
#include
#include
#include
#include
#include "DRS.h"

static uint32_t seed = 0;
static void p_srand(uint32_t x) { seed = x;}
static uint32_t p_rand() {return seed = (seed*9301 + 49297);}

static void swap(int *x, int *y) {
int tmp = *x;
*x = *y;
*y = tmp;
}
static inline Rect rand_rect(int R) {
Rect r;
r.lx = p_rand()%R;
r.ly = p_rand()%R;
r.rx = p_rand()%R;
r.ry = p_rand()%R;
if (r.lx > r.rx)	swap(&r.lx, &r.rx);
if (r.ly > r.ry)	swap(&r.ly, &r.ry);
return r;
}

static void init(int N, int R, int32_t x[], int32_t y[], int32_t w[]) {
for (int i = 0; i < N; i++) {
x[i] = p_rand()%R;
y[i] = p_rand()%R;
w[i] = p_rand()%R;
}
}

static void tick(int N, int R, int32_t x[], int32_t y[], int32_t w[]) {
for (int i = 0; i < 5; i++)	{
int idx = p_rand()%N;
x[idx] = p_rand()%R;
y[idx] = p_rand()%R;
}
for (int i = 0; i < 5; i++)	{
int idx = p_rand()%N;
w[idx] = p_rand()%R;
}
}

#define MAXN 1048576
int main() {
p_srand(0);
static int32_t x[MAXN], y[MAXN], w[MAXN];
int N = 1000, M = 10000, R = 100;

init(N, R, x, y, w);

int32_t hash = 0;
for (int it = 0; it < M; it++) {
Rect rect = rand_rect(R);
int32_t ret = search_range(rect, x, y, w, N);
hash ^= ret;
tick(N, R, x, y, w);
}
printf("%" PRIi32 "n", hash);
return 0;
}
```

### DRS.h

```#ifndef __DRS_H
#define __DRS_H

#include

typedef struct Rect {
int32_t lx, ly, rx, ry;
} Rect;

int32_t search_range(Rect rect, int32_t x[], int32_t y[],
int32_t w[], int32_t n);

#endif
```

### DRS.c

```#include "DRS.h"

int32_t search_range(Rect rect, int32_t x[], int32_t y[],
int32_t w[], int32_t n) {
int32_t ret = 0;
for (int i = 0; i < n; i++) {
if (rect.lx <= x[i] && x[i] <= rect.rx &&
rect.ly <= y[i] && y[i] <= rect.ry) {
ret += w[i];
}
}
return ret;
}
```

## 測資限制

• \$N le 131072\$
• \$R le 32768\$

## 編譯參數

```all: main

main: main.c
gcc -std=c99 -Ofast -march=native DRS.c -c -o DRS.o
gcc -std=c99 -Ofast -march=native main.c DRS.o -o main
```

## Solution

```#include "DRS.h"

#include
/*
int32_t search_range(Rect rect, int32_t x[], int32_t y[],
int32_t w[], int32_t n) {
int32_t ret = 0;
for (int i = 0; i < n; i++) {
if (rect.lx <= x[i] && x[i] <= rect.rx &&
rect.ly <= y[i] && y[i] <= rect.ry) {
ret += w[i];
}
}
return ret;
}
*/
int32_t search_range(Rect rect, int32_t x[], int32_t y[], int32_t w[], int32_t n) {
__m128i ret = _mm_set_epi32(0, 0, 0, 0);
rect.lx--, rect.ly--;
rect.rx++, rect.ry++;
__m128i lx = _mm_broadcastd_epi32(*((__m128i *) &rect.lx));
__m128i ly = _mm_broadcastd_epi32(*((__m128i *) &rect.ly));
__m128i rx = _mm_broadcastd_epi32(*((__m128i *) &rect.rx));
__m128i ry = _mm_broadcastd_epi32(*((__m128i *) &rect.ry));
__m128i zo = _mm_set_epi32(0, 0, 0, 0);
__m128i ic = _mm_set_epi32(3, 2, 1, 0);

for (int i = 0; i+4 >2)<<2; i < n; i++) {
if (rect.lx <= x[i] && x[i] <= rect.rx &&
rect.ly <= y[i] && y[i] <= rect.ry) {
sum += w[i];
}
}
static int32_t tmp[4] __attribute__ ((aligned (16)));
_mm_store_si128((__m128i*) &tmp[0], ret);
sum += tmp[0] + tmp[1] + tmp[2] + tmp[3];
return sum;
}
```

|