#ifndef __blur_h
#define __blur_h

typedef unsigned char uint8;
typedef unsigned short uint16;
typedef unsigned int uint32;

#ifndef u_getr15
#define u_getr15(c) (((c) >> 10) & 0x1F)
#define u_getg15(c) (((c) >>  5) & 0x1F)
#define u_getb15(c) ((c) & 0x1F)
#define u_makecol15(r, g, b) (((r) << 10) | ((g) << 5) | (b))
#define u_rmax15 31
#define u_gmax15 31
#define u_bmax15 31
#define u_readcol15(lineptr, x) (*((uint16 *) ((lineptr) + ((x)<<1))))
#define u_writecol15(lineptr, x, col) *((uint16 *) ((lineptr) + ((x)<<1))) = (col)

#define u_getr16(c) (((c) >> 11) & 0x1F)
#define u_getg16(c) (((c) >>  5) & 0x3F)
#define u_getb16(c) ((c) & 0x1F)
#define u_makecol16(r, g, b) (((r) << 11) | ((g) << 5) | (b))
#define u_rmax16 31
#define u_gmax16 63
#define u_bmax16 31
#define u_readcol16(lineptr, x) (*((uint16 *) ((lineptr) + ((x)<<1))))
#define u_writecol16(lineptr, x, col) *((uint16 *) ((lineptr) + ((x)<<1))) = (col)

#define u_getr24(c) (((c) >> 16) & 0xFF)
#define u_getg24(c) (((c) >>  8) & 0xFF)
#define u_getb24(c) ((c) & 0xFF)
#define u_makecol24(r, g, b) (((r) << 16) | ((g) << 8) | (b))
#define u_rmax24 255
#define u_gmax24 255
#define u_bmax24 255
#define u_readcol24(lineptr, x) *((int *) ((char *) (lineptr) + (x) * 3))
inline void u_writecol24(uint8 *lineptr, int x, int col) {
  *((uint16 *) (lineptr+x*3)) = col & 0xFFFF;
  *((uint8 *) (lineptr+x*3+2)) = col >> 16;
}

#define u_getr32(c) (((c) >> 16) & 0xFF)
#define u_getg32(c) (((c) >>  8) & 0xFF)
#define u_getb32(c) ((c) & 0xFF)
#define u_makecol32(r, g, b) (((r) << 16) | ((g) << 8) | (b))
#define u_rmax32 255
#define u_gmax32 255
#define u_bmax32 255
#define u_readcol32(lineptr, x) (*((uint32 *) ((lineptr) + ((x)<<2))))
#define u_writecol32(lineptr, x, col) *((uint32 *) ((lineptr) + ((x)<<2))) = (col)
#endif

#define __hblur_code(depth)                                       \
for (y = 0; y < h; y++) {                                         \
  uint8 *srcline = source->line[y];                               \
  uint8 *dstline = dest->line[y];                                 \
  int r = 0, g = 0, b = 0, c;                                     \
  for (x = 0; x < blur; x++) {                                    \
    c = u_readcol##depth##(srcline, x);                           \
    r += u_getr##depth##(c); g += u_getg##depth##(c); b += u_getb##depth##(c);         \
  }                                                               \
  for (x = 0; x < w; x++) {                                       \
    if (x > blur) {                                               \
      c = u_readcol##depth##(srcline, x - blur - 1);                     \
      r -= u_getr##depth##(c); g -= u_getg##depth##(c); b -= u_getb##depth##(c);       \
    }                                                             \
    if (x + blur < w) {                                           \
      c = u_readcol##depth##(srcline, x + blur);                         \
      r += u_getr##depth##(c); g += u_getg##depth##(c); b += u_getb##depth##(c);       \
    }                                                             \
    u_writecol##depth##(dstline, x, u_makecol##depth##(int(r * scale), int(g * scale), int(b * scale)));    \
  }                                                               \
}

void horizontal_blur(BITMAP *source, BITMAP *dest, int blur) {
  int x, y, w = source->w, h = source->h, depth = bitmap_color_depth(source);
  double scale = 1.0 / (blur * 2 + 1);
  if (depth == 32) {
    __hblur_code(32)
  } else if (depth == 24) {
    __hblur_code(24)
  } else if (depth == 16) {
    __hblur_code(16)
  } else if (depth == 15) {
    __hblur_code(15)
  }
}

#define __vblur_code(depth)                                       \
for (x = 0; x < w; x++) {                                         \
  int r = 0, g = 0, b = 0, c;                                     \
  for (y = 0; y < blur; y++) {                                    \
    c = u_readcol##depth##(source->line[y], x);                   \
    r += u_getr##depth##(c); g += u_getg##depth##(c); b += u_getb##depth##(c);         \
  }                                                               \
  for (y = 0; y < h; y++) {                                       \
    if (y > blur) {                                               \
      c = u_readcol##depth##(source->line[y-blur-1], x);                     \
      r -= u_getr##depth##(c); g -= u_getg##depth##(c); b -= u_getb##depth##(c);       \
    }                                                             \
    if (y + blur < h) {                                           \
      c = u_readcol##depth##(source->line[y+blur], x);                         \
      r += u_getr##depth##(c); g += u_getg##depth##(c); b += u_getb##depth##(c);       \
    }                                                             \
    u_writecol##depth##(dest->line[y], x, u_makecol##depth##(int(r * scale), int(g * scale), int(b * scale)));    \
  }                                                               \
}

void vertical_blur(BITMAP *source, BITMAP *dest, int blur) {
  int x, y, w = source->w, h = source->h, depth = bitmap_color_depth(source);
  double scale = 1.0 / (blur * 2 + 1);
  if (depth == 32) {
    __vblur_code(32)
  } else if (depth == 24) {
    __vblur_code(24)
  } else if (depth == 16) {
    __vblur_code(16)
  } else if (depth == 15) {
    __vblur_code(15)
  }
}

#define __sum_code(depth)                                                                       \
s = new sum_component[w * h];                                                                   \
for (y = 0; y < h; y++) {                                                                       \
  sum_component *cline = &s[y * w];                                                             \
  sum_component *prevline = &s[(y - 1) * w];                                                    \
  uint8 *srcline = source->line[y];                                                             \
  for (x = 0; x < w; x++) {                                                                     \
    int color = u_readcol##depth##(srcline, x), c2 = 0, c3 = 0, c4 = 0;                         \
    int r, g, b;                                                                                \
    r = u_getr##depth##(color);                                                                 \
    g = u_getg##depth##(color);                                                                 \
    b = u_getb##depth##(color);                                                                 \
    if (x > 0) { r += cline[x - 1].r; g += cline[x - 1].g; b += cline[x - 1].b; }               \
    if (y > 0) { r += prevline[x].r; g += prevline[x].g; b += prevline[x].b; }                  \
    if (x > 0 && y > 0) { r -= prevline[x-1].r; g -= prevline[x-1].g; b -= prevline[x-1].b; }   \
    cline[x].r = r; cline[x].g = g; cline[x].b = b;                                             \
  }                                                                                             \
}


#define __blur_code(depth)                                                                      \
for (y = 0; y < h; y++) {                                                                \
  sum_component *above_line = &s[((y - boxh - 1) < 0 ? 0: (y - boxh - 1)) * w];                         \
  sum_component *below_line = &s[((y + boxh) < h ? (y + boxh): (h - 1)) * w];                   \
  uint8 *dstline = dest->line[y];                                                               \
  for (x = 0; x < w; x++) {                                                              \
    int r, g, b;                                                                                \
    int xleft, xright;                                                                          \
    xleft = (x - boxw - 1) < 0 ? 0: (x - boxw - 1);                                             \
    xright = (x + boxw) < w ? (x + boxw): (w - 1);                                      \
    r = below_line[xright].r + above_line[xleft].r - below_line[xleft].r - above_line[xright].r; \
    g = below_line[xright].g + above_line[xleft].g - below_line[xleft].g - above_line[xright].g; \
    b = below_line[xright].b + above_line[xleft].b - below_line[xleft].b - above_line[xright].b; \
    u_writecol##depth##(dstline, x, u_makecol##depth##(int(r * scale), int(g * scale), int(b * scale))); \
  }                                                                                             \
}

typedef int sum_t;
class sum_component { public: sum_t r, g, b; };
void box_blur(BITMAP *source, BITMAP *dest, int boxw, int boxh) {
  int x, y, w = source->w, h = source->h, depth = bitmap_color_depth(source);
  if (bitmap_color_depth(dest) != depth || dest->w < source->w || dest->h < source->h) { return; }
  if (boxw < 0) { boxw = -boxw; }
  if (boxh < 0) { boxh = -boxh; }
  if (!boxw && !boxh) { blit(source, dest, 0, 0, 0, 0, w, h); return; }
  if (!boxw) { vertical_blur(source, dest, boxh); return; }
  if (!boxh) { horizontal_blur(source, dest, boxw); return; }
  double scale = 1.0 / (boxw * 2 + 1) / (boxh * 2 + 1);
  sum_component *s;
  if (depth == 32) {
    __sum_code(32)
    __blur_code(32)
  } else if (depth == 24) {
    __sum_code(24)
    __blur_code(24)
  } else if (depth == 16) {
    __sum_code(16)
    __blur_code(16)
  } else if (depth == 15) {
    __sum_code(15)
    __blur_code(15)
  }
  delete[] s;
}

class blur_table { public:
  sum_component *s;
  int w, h;
};

blur_table *create_blur_table(BITMAP *source) {
  int x, y, w = source->w, h = source->h, depth = bitmap_color_depth(source);
  sum_component *s;
  if (depth == 32) { __sum_code(32) }
  else if (depth == 24) { __sum_code(24) }
  else if (depth == 16) { __sum_code(16) }
  else if (depth == 15) { __sum_code(15) }
  blur_table *result = new blur_table;
  result->s = s;
  result->w = w; result->h = h;
  return result;
}
void destroy_blur_table(blur_table *table) { delete[] table->s; delete table; }
void blur_from_table(blur_table *table, BITMAP *dest, int boxw, int boxh) {
  int x, y, w = table->w, h = table->h, depth = bitmap_color_depth(dest);
  if (dest->w < w || dest->h < h) { return; }
  if (boxw < 0) { boxw = -boxw; }
  if (boxh < 0) { boxh = -boxh; }
  double scale = 1.0 / (boxw * 2 + 1) / (boxh * 2 + 1);
  sum_component *s = table->s;
  if (depth == 32) { __blur_code(32) }
  else if (depth == 24) { __blur_code(24) }
  else if (depth == 16) { __blur_code(16) }
  else if (depth == 15) { __blur_code(15) }
}

void fade_blur(BITMAP *src, double time, int direction, double range=2) {
  blur_table *table = create_blur_table(src);
  BITMAP *temp = create_bitmap(src->w, src->h);
  double start_time = __timer_func();
  while (__timer_func() < start_time + time) {
    double t = (__timer_func() - start_time) / time;
    if (direction > 0) { t = 1 - t; }
    int blurh = int(t * t * src->h * range);
    int blurw = int(t*10);
    blur_from_table(table, temp, blurw, blurh);
    blit(temp, screen, 0, 0, 0, 0, temp->w, temp->h);
  }
  destroy_blur_table(table);
  destroy_bitmap(temp);
}
void fadeout_blur(BITMAP *src, double time, double range=2) { fade_blur(src, time, -1, range); }
void fadein_blur(BITMAP *src, double time, double range=2) { fade_blur(src, time, 1, range); }

#endif
