/***************************************************************************
 *
 * aecolspc.c
 * Colour-space conversion code
 *
 * By Andrei Ellman
 *
 **************************************************************************/



/****************************************************************************
 Includes
 */


#include <math.h>	/* Needed for fmod() */
#include <float.h>	/* Needed for FLT_EPSILON */
#include <allegro.h>


#include "cpmthhlp.h"

#include "aecolspc.h"



/****************************************************************************
 Local Types
 */


/****************************************************************************
 Global Prototypes
 */


/****************************************************************************
 Local (Static) Prototypes
 */


/****************************************************************************
 Local Defines
 */


/****************************************************************************
 Local Macros
 */


/****************************************************************************
 Global Variables (across program)
 */


/****************************************************************************
 Local (static) Global Variables
 */




/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

                       Colourspace conversion functions

 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */



// CpColourSpaceFromFunction cpTurboHSV2RGB

// s,v: 00-ff, h: 000-5ff (but h can wraparound). Not unsigned in case we subtract stuf from it.
// TODO: We could optimise by not allowing hue wraparound.
uint32_t
cpTurboHSV2RGB(int16_t h, uint8_t s, uint8_t v)
{
	if (!s)
	{
		//*cpR = *cpG = *cpB = v;
		return (v<<16)+(v<<8)+v;
	}
	else
	{
		uint8_t hf;	/* Fractional part of the hue */
		uint8_t x,y;

		/* Make sure H value wraps round till it's in range (assuming h hardly ever is in need of wrapping, this is a lot quicker than doing a modulo in the switch statement) */
		if (h < 0x00)
		{
			do
			{
				h += 0x600;
			}
			while(h < 0x00);
		}
		else if(h>=0x600)
		{
			do
			{
				h-=0x600;
			}
			while(h>=0x600);
		}



		hf = h & 0xff; // f = h - i;



		//x = cpMUL8(v, 0xff - s);						// x = v * (1.0f - s) + 0.5f;
		//y = cpMUL8(v, 0xff - cpMUL8(s,hf));				// y = v * (1.0f - (s * hf)) + 0.5f;
		//z = cpMUL8(v, 0xff - cpMUL8(s,(0xff - hf)));	// z = v * (1.0f - (s * (1.0f - hf))) + 0.5f;

		// s * (1-hf) = s-s*hf

		// 1 - s
		// 1 - s * hf
		// 1 - (s - s*hf)

		// v - v * s
		// v - (v*s) * hf
		// v - ( (v*s) - ((v*s)*hf) )

		x = cpMUL8(v,s);
		y = cpMUL8(hf,x);
		x = v-x;


		// in x86 asm: could do mov ah,A; bswap; mov ah,B; mov al,C; // but would need a byte-prefix at the start of each instruction
		// also when doing (x+y)<<8 + x -> mov ah,x; mov al,x; add ah,y;

		/*
		switch (h>>8)
		{
			//case 6:
			case 0:	return (v<<16) + ((x+y)<<8) + x;	// *cpR = v; *cpG = x+y; *cpB = x; return;
			case 1:	return ((v-y)<<16) + (v<<8) + x;	// *cpR = v-y; *cpG = v; *cpB = x; return;
			case 2:	return (x<<16) + (v<<8) + x+y;		// *cpR = x; *cpG = v; *cpB = x+y; return;
			case 3:	return (x<<16) + ((v-y)<<8) + v;	// *cpR = x; *cpG = v-y; *cpB = v; return;
			case 4:	return ((x+y)<<16) + (x<<8) + v;	// *cpR = x+y; *cpG = x; *cpB = v; return;
			case 5:	return (v<<16) + (x<<8) + v-y;		// *cpR = v; *cpG = x; *cpB = v-y; return;

		}
		*/


		h>>=8;

		if (h < 3)
		{
			if (h < 2)
			{
				if (h == 0)
				{
					return (v<<16) + ((x+y)<<8) + x;	/* 0 */
				}
				else
				{
					return ((v-y)<<16) + (v<<8) + x;	/* 1 */
				}
			}
			else
			{
				return (x<<16) + (v<<8) + x+y;	/* 2 */
			}
		}
		else
		{
			if (h < 5)
			{
				if (h == 3)
				{
					return (x<<16) + ((v-y)<<8) + v;	/* 3 */
				}
				else
				{
					return ((x+y)<<16) + (x<<8) + v;	/* 4 */
				}
			}
			else
			{
				if (h == 5)
				{
					return (v<<16) + (x<<8) + v-y;	/* 5 */
				}
				else
				{
					return (v<<16) + ((x+y)<<8) + x;	/* 6 (same as 0) */
				}
			}
		}

	}


	return 0xAAFF00FF;	// Dummy colour - used to stop compiler whingeing (note A's before the pink)

}




// CpColourSpaceToFunction cpTurboRGB2HSV

// Optimise idea: Could this be optimised if we're only changing one of H S or V AND the original RGB value is known?
// Idea for saturation-only optimise: less saturation: move towards average of the max and min of R, G, B
// Hue optimise: The increasing and decreasing of the colour-sliders as we go round the circle.
// V optimise, just divide all pixels by a given value.

// test: try inputting rgb values and extracting them using these 2 fn's. See how they're molested.

// s,v: 00-ff, h: 000-5ff
void
cpTurboRGB2HSV(uint8_t r, uint8_t g, uint8_t b, int16_t *hout, uint8_t *sout, uint8_t *vout)
{
	// cpMUL8

	// Currently un-optimised
	// Note: As we're using the rgb2hsv func for pre-calcs, there is no need to optimise this.
	// TODO: But why not...

	float h,s,v;

	float min, max, delta, rc, gc, bc;

	rc = (float)r / 255.0f;
	gc = (float)g / 255.0f;
	bc = (float)b / 255.0f;
	max = MAX(rc, MAX(gc, bc));
	min = MIN(rc, MIN(gc, bc));
	delta = max - min;
	v = max;

	if (max != 0.0f)
	{
		s = delta / max;
	}
	else
	{
		s = 0.0f;
	}

	if (s == 0.0f)
	{
		h = 0.0f;
	}
	else
	{
		if (rc == max)
		{
			h = (gc - bc) / delta;
		}
		else if (gc == max)
		{
			h = 2.0f + (bc - rc) / delta;
		}
		else /* if (bc == max) */
		{
			h = 4.0f + (rc - gc) / delta;
		}

		if (h < 0.0f)
		{
			h += 6.0f;
		}

	}

	*hout = (int16_t)((h*255.83333)+0.5f);	// 255.83333 (256 - 1/6) for hue coz it's 00 to 6FF
	*sout = (uint8_t)((s*255.0)+0.5f);
	*vout = (uint8_t)((v*255.0)+0.5f);
}



// CpColourSpaceFromFunction cpTurboHLS2RGB

// s,v: 00-ff, h: 000-5ff (but h can wraparound). Not unsigned in case we subtract stuf from it.
// TODO: We could optimise by not allowing hue wraparound.
uint32_t
cpTurboHLS2RGB(int16_t h, uint8_t l, uint8_t s)
{
	int16_t m1, m2;


	if (s == 0) {   /* greyscale */
		return (l<<16)+(l<<8)+l;
	}
	else
	{
		/* Make sure H value wraps round till it's in range (assuming h hardly ever is in need of wrapping, this is a lot quicker than doing a modulo in the switch statement) */
		if (h < 0x00)
		{
			do
			{
				h += 0x600;
			}
			while(h < 0x00);
		}
		else if(h>=0x600)
		{
			do
			{
				h-=0x600;
			}
			while(h>=0x600);
		}


		/* Work out min and max of the r,g,b sliders */
		m2 = (l<=0x7f) ? (l+cpMUL8(l,s)) : (l+s - cpMUL8(l,s) -1 );   /* Max RGB value */ // -1 because cpmul8 without lookuptable can be a bit inaccurate.
		m1 = (l<<1) - m2;	/* Min RGB value */	// l+l?
		/* l is average of m1 and m2. */

		// ex m2 = (l<=0.5f) ? (l+(l*s)) : (l+s - (l*s));
		// ex m1 = 2.0f * l - m2;


		// m2 = l + (l*s);
		// m1 = l - (l*s);
		// m2-m1 = l + (l*s) - l + (l*s) = 2*(l*s)


		// m2 = l+s - (l*s);
		// m1 = l-s + (l*s);
		// m2-m1 = l+s - (l*s) - l+s - (l*s) = 2*s - 2*(l*s) = 2*(s-l*s)




		if(h < 3<<8)
		{
			if(h < 2<<8)
			{
				if(h < 1<<8)
				{
					// h 0..1 r-y
					return (m2<<16) + ((m1+cpMUL8((m2-m1),h))<<8) + m1;
				}
				else
				{
					// h 1..2 y-g
					return ((m1+cpMUL8((m2-m1),0xff-(h&0xff)))<<16) + (m2<<8) + m1;
				}
			}
			else
			{
				// h 2..3 g-c
				return (m1<<16) + (m2<<8) + (m1+cpMUL8((m2-m1),h&0xff));
			}
		}
		else
		{
			if(h < 5<<8)
			{
				if(h < 4<<8)
				{
					//h 3..4 c-b
					return (m1<<16) + ((m1+cpMUL8((m2-m1),0xff-(h&0xff)))<<8) + m2;
				}
				else
				{
					//h 4..5 b-m
					return ((m1+cpMUL8((m2-m1),h&0xff))<<16) + (m1<<8) + m2;
				}
			}
			else
			{
				// h 5..6 m-r
				return (m2<<16) + (m1<<8) + (m1+cpMUL8((m2-m1),0xff-(h&0xff)));
			}
		}
   }


}



// CpColourSpaceToFunction cpTurboRGB2HLS


// s,v: 00-ff, h: 000-5ff
void
cpTurboRGB2HLS(uint8_t r, uint8_t g, uint8_t b, int16_t *hout, uint8_t *lout, uint8_t *sout)
{
	// Currently un-optimised
	// Note: As we're using the rgb2hls func for pre-calcs, there is no need to optimise this.
	// TODO: But why not...

   float h,l,s;

   float min, max, delta, rc, gc, bc;

   rc = (float)r / 255.0f;
   gc = (float)g / 255.0f;
   bc = (float)b / 255.0f;

   max = MAX(rc, MAX(gc, bc));
   min = MIN(rc, MIN(gc, bc));

   l = (max+min)/2.0f;   /* L is the average of maximum and minimum r,g,b */

   delta = max - min;


   // See the following thread on Allegro.cc for why I'm not using '==0.0f' here
   // http://www.allegro.cc/forums/view_thread.php?_id=269409&request=1053865245&
   if (delta<1.0f/2048.0f) {
      /* Somewhere along the centre axis of the double-hex-cone (s==0) */
      s = 0.0f;        /* color has no saturation */
      h = 0.0f;        /* color has no hue */

   } else {

      /* Calculate Saturation */
      s = (l<=0.5f) ? (delta/(max+min)) : (delta/(2.0f-(max+min)));


      /* Calculate Hue */
      if (rc == max)
         h = (gc-bc) / delta;  /* Resulting color between yellow & magenta */
      else if (gc == max)
         h = 2.0f + (bc-rc) / delta;  /* Resulting color between cyan & yellow */
      else /* if (bc == max) */
         h = 4.0f + (rc-gc) / delta;  /* Resulting color between magenta & cyan */

      if (h < 0.0f)
         h += 6.0f;   /* Make sure hue is non-negative */

   }


	*hout = (int16_t)((h*255.83333)+0.5f);	// 255.83333 (256 - 1/6) for hue coz it's 00 to 5FF
	// !!!!!!!!!!
	// ERMMM!!!!! Shouldn't the multiplier for h be 256.0f??? Smallest value for delta is (1/255). (-(1/255)+6)*256 = 1535, whereas for 255.83333, it's 1534
	// !!!!!!!!!!
	// 255 for l-s, coz range 0..1. H in range 0..6-smallest_unit.
	*lout = (uint8_t)((l*255.0)+0.5f);
	*sout = (uint8_t)((s*255.0)+0.5f);


}




/* hls_to_rgb() has been taken from code I submitted to the Allegro Developers. If it is ever implemented, then I can get rid if the below function when the next stable release is out */

void
hls_to_rgb_float(float h, float l, float s, int *r, int *g, int *b)
{
   float m1, m2;

   l*=255.0f;

   // ?: Is doing the epsilon thing overkill here???

   if (s < FLT_EPSILON) {   /* greyscale */
      *r = *g = *b = (int)(l+0.5f);
   } else {
      /* Work out min and max of the r,g,b sliders */
      m2 = (l<=127.5f) ? (l+(l*s)) : (l+(s*255.0f) - (l*s));   /* Max RGB value */
      m1 = l+l - m2;	/* Min RGB value */
	  /* l is average of m1 and m2. */

      // ex m2 = (l<=0.5f) ? (l+(l*s)) : (l+s - (l*s));
      // ex m1 = 2.0f * l - m2;


      // m2 = l + (l*s);
      // m1 = l - (l*s);
	  // m2-m1 = l + (l*s) - l + (l*s) = 2*(l*s)


      // m2 = l+s - (l*s);
      // m1 = l-s + (l*s);
	  // m2-m1 = l+s - (l*s) - l+s - (l*s) = 2*s - 2*(l*s) = 2*(s-l*s)



      h = fmod(h, 360.0f) / 60.0f;
      if (h < 0.0f) {
         h += 6.0f;
      }


      m1+=0.5f;
      m2+=0.5f;


      if(h < 3.0f) {
         if(h < 2.0f) {
            if(h < 1.0f) {
               /* 0<=h<1 */
               *r = (int)m2;
               *g = (int)(m1+(m2-m1)*h);
               *b = (int)m1;
			}
			else {
               /* 1<=h<2 */
               *r = (int)(m1+(m2-m1)*(2.0f-h));
               *g = (int)m2;
               *b = (int)m1;
			}
         }
         else {
            /* 2<=h<3 */
            *r = (int)m1;
            *g = (int)m2;
            *b = (int)(m1+(m2-m1)*(h-2.0f));
         }
      }
      else {
         if(h < 5.0f) {
            if(h < 4.0f) {
               /* 3<=h<4 */
               *r = (int)m1;
               *g = (int)(m1+(m2-m1)*(4.0f-h));
               *b = (int)m2;
            }
            else {
               /* 4<=h<5 */
               *r = (int)(m1+(m2-m1)*(h-4.0f));
               *g = (int)m1;
               *b = (int)m2;
            }
         }
         else {
            /* 5<=h<6 */
            *r = (int)m2;
            *g = (int)m1;
            *b = (int)(m1+(m2-m1)*(6.0f-h));
         }
      }
   }


}   /* End of hls_to_rgb */




/* And now, some RGB colou-space fnctions. A bit overkill, but at least these functions can plug transparently into the code. */

// CpColourSpaceFromFunction cpTurboDecRGB

uint32_t
cpTurboDecRGB(int16_t r, uint8_t g, uint8_t b)
{
	/* Note: Only 8-bits of r are used */

	/* The caller won't clamp param #1 as it assumes the 1st param of this function is cyclic. Hence, we are clamping R here */
	// ?: Should it clamp param 1 to treat R G and B equally?
	if(r>0xff)
	{
		r = 0xff;
	}
	else if(r<0x000)
	{
		r = 0x000;
	}
	return (r<<16)+(g<<8)+b;
}



// CpColourSpaceToFunction cpTurboEncRGB



void
cpTurboEncRGB(uint8_t r, uint8_t g, uint8_t b, int16_t *rout, uint8_t *gout, uint8_t *bout)
{
	/* Note: Only 8-bits of rout are used */

	*rout = r;
	*gout = g;
	*bout = b;
}



/*



HSV <-> HLS



+ M=max(R,G,B)
+ m=min(R,G,B)
+ C=(M-m)

+ V = M
+ L = 0.5 * (M+m)


L = 0.5 * (V+m)

2L = V+m
V = 2L-m


+ S(v) = C / V
+ S(l) = C / (1-|2L-1|)

S(v) = C / (2L-m)

S(l) = C / (1-|2(0.5 * (V+m))-1|)
S(l) = C / (1-|(V+m)-1|)


+ C=V*S(v)
(M-m) = V*S(v)

V=M
(M-m) = (V-m)
C = V-m
m=V-C
m = V-V*S(v)
m = V*(1-S(v))

S(l) = C /  (1-|(V+m)-1|)
S(l) = (V*S(v)) /  (1-|(V+V-V*S(v))-1|)
S(l) = (V*S(v)) /  (1-|(2V-V*S(v))-1|)
S(l) = (V*S(v)) /  (1-|(V*(2-S(v)))-1|)
S(l) = S(v) /  (1/V - | 2-S(v) -1/V | )


S(l) = (V*S(v)) /  (1-|(V*(2-S(v)))-1|)
S(l) = (V*S(v)) /  (1-|(V*(2-S(v)))-1|)


L = 0.5 * (V+m)
L = 0.5 * (V+V-C)
L = 0.5 * (V+V-V*S(v))
L = 0.5 * (V*(2-S(v)))
L = 0.5 * (2V-V*S(v)))
L = V - 0.5 * V*S(v)


* C=V*S(v)
* S(l) = C / (1-|(2V-C)-1|)
* L = V - 0.5 * C            (or 0.5 * (2V-C) )
-> S(l) = C / (1-|2L-1|)


Alt: (m=V-C)
* m = V-V*S(v)
* S(l) = (V-m) / (1-|(V+m)-1|)
* L = 0.5 * (V+m)
-> S(l) = (V-m) / (1-|2L-1|)


+ C = (1-|2L-1|) * S(l)
(M-m) = (1-|2L-1|) * S(l)

L = 0.5 * (M+m)
(M-m) = (M+m)-2m
(M+m) = (M-m)+2m
L = 0.5 * ((M-m)+2m)
L = 0.5 * (C+2m)
2L = C+2m
2L-C=2m
m = L-(C/2)
C = (L-m)/2

m = L - ( ( (1-|2L-1|) * S(l) ) /2 )
m = L - ( (1-|2L-1|)/2 * S(l)/2 )

S(v) = C / (2L-m)
S(v) = ( (1-|2L-1|) * S(l) )  /  (2L- L - ( ( (1-|2L-1|) * S(l) ) /2 ) )
S(v) = ( (1-|2L-1|) * S(l) )  /  (L - ( ( (1-|2L-1|) * S(l) ) /2 ) )

S(v) = (1-|2L-1|)  /  ((L - ( ( (1-|2L-1|) * S(l) ) /2 ) ) / S(l))
S(v) = (1-|2L-1|)  /  (L/S(l) - ( (1-|2L-1|) * 1) /2 )
S(v) = (1-|2L-1|)  /  (L/S(l) - (1-|2L-1|)/2 )

S(v) = S(l) /  ((L - ( ( (1-|2L-1|) * S(l) ) /2 ) ) / (1-|2L-1|) )
S(v) = S(l) /  (L/(1-|2L-1|) - ( ( 1 * S(l) ) /2 ) )
S(v) = S(l) / (L/(1-|2L-1|) - S(l)/2 )
S(v) = 1 / ((L/(1-|2L-1|))/S(l) - 0.5 )
S(v) = S(l)/L / (1/(1-|2L-1|) - S(l)/2L )

1/S(v) = (L/(1-|2L-1|))/S(l) - 0.5
(1/S(v))+ 0.5 = (L/(1-|2L-1|))/S(l)
((1/S(v))+ 0.5)*S(l) = L/(1-|2L-1|)

S(v) = S(l)/L / (1/(1-|2L-1|) - S(l)/2L )
1/S(v) = (1/(1-|2L-1|) - S(l)/2L ) / (S(l)/L)



V = 2L-m
V = 2L-L-(C/2)
V = L-(C/2)



C = (1-|2L-1|) * S(l)
m = L-(C/2)

S(v) = C / (2L-L-(C/2))
S(v) = C / (L-(C/2))
S(v) = 2C / (2L-C)

S(v) = 1 / (L/C-0.5)

S(v) = C/L / (1-(C/2)/L)



m = L - ( ( (1-|2L-1|) * S(l) ) /2 )
C = (L-m)/2

S(v) = (L-m)/2 / (2L-m)
S(v) = (L-m) / (4L-2m)


* C = (1-|2L-1|) * S(l)
* S(v) = C / (L-(C/2))
* V = L-(C/2)
-> S(v) = C / V

Alt:
* m = L - ( ( (1-|2L-1|) * S(l) ) /2 )
* S(v) = (L-m) / (4L-2m)         (or (L-m) / 2(2L-m) )
* V = 2L-m
-> S(v) = (L-m) / V




*/
