/***************************************************************************
 *
 * cprender.c
 * ChromaPlas rendering-code
 *
 * By Andrei Ellman
 *
 **************************************************************************/



/****************************************************************************
 Includes
 */

#include <stdlib.h>
#include <stdarg.h>	// va_list
#include <allegro.h>


#include "aeglobal.h"

#include "cpglobal.h"

#include "aecolspc.h"

//#include "cpmthhlp.h"	// if we want to directly visualise G_caMultTab

#include "cpsttngs.h"

#include "cpplasma.h"
#include "cpfunpal.h"

#include "cpimcosp.h"

// ?: Needed if blitting to hdc?
//#ifdef ALLEGRO_WINDOWS
//
//#include <winalleg.h>
//#include "cpwin_ss.h"
//
//#endif

#include "cpgfx.h" // cpMapCpColourSpaceToInt

#include "cprender.h"


/****************************************************************************
 Local Types
 */


/****************************************************************************
 Global Prototypes
 */


/****************************************************************************
 Local (Static) Prototypes
 */


/****************************************************************************
 Local Defines
 */

/****************************************************************************
 Local Macros
 */


/* Used when visualising a pixel from an 8-bit buffer in 32-bit mono gfx */
#define cpMONOMAKE32(pixel, foo)	(pixel = (foo)+((foo)<<8)+((foo)<<16) )

/* Used when visualising a pixel from an 8-bit buffer in 16-bit mono gfx */
#define cpMONOMAKE16(pixel, foo)	(pixel = (foo>>3)+((foo>>2)<<5)+((foo>>3)<<11) )

/* Used when visualising a pixel from an 8-bit buffer in 15-bit mono gfx */
#define cpMONOMAKE15(pixel, foo)	(pixel = (foo>>3)+((foo>>3)<<5)+((foo>>3)<<10) )

/* Used when visualising a pixel from an 8-bit buffer in 32-bit mono gfx (with 'bounce') */
#define cpMONOMAKENWRAP32(pixel, foo)	\
	pixel = (foo);	\
	pixel = (pixel<128?pixel<<1:511-(pixel<<1));	/* Bounce the colour (ie. get it to reverse instead of wraparound (no need when doing Hue) */	\
	pixel += (pixel<<8)+(pixel<<16);	/* Duplicate B in G and R */



/****************************************************************************
 Global Variables (across program)
 */


/****************************************************************************
 Local (static) Global Variables
 */



/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

                          Various ancillary functions.

 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */





/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

                               Image and Error display

 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */


AeBool
cpSafeModeErrorDisplay(AL_CONST char *format, ...)
{
	int nSafeTextErrorColour;
	int nSafeTextErrorBGColour;

	if(set_gfx_mode(GFX_SAFE, 640, 480, 0, 0))	// todo: 640,480 -> 0,0
	{
		TRACE("Double-EEK: Can't even set a safe-mode resolution - what's the world comming to?!\n");
		return FALSE;
	}


	// TODO: if(bitmap_color_depth(screen)==8), then set a palette-entry and another one for the background and nSafeTextColour becomes that palette-entry
	nSafeTextErrorColour = makecol(0xFF, 0x00, 0xFF);
	nSafeTextErrorBGColour = makecol(0x0, 0x0, 0x0);

	{
		/* Code taken from Allegro's textprintf_ex - so that we can pass the variable argument list directly, instead of the arguments themselves. */
		char buf[512];
		va_list ap;

		va_start(ap, format);
		uvszprintf(buf, sizeof(buf), format, ap);
		va_end(ap);

		textout_ex(screen, font, buf, 0, 0, nSafeTextErrorColour, nSafeTextErrorBGColour);
	}

	textprintf_ex(screen,font,0,8,nSafeTextErrorColour,nSafeTextErrorBGColour, allegro_error);

	while(!cpCanExitApp())
	{
		// TODO: Make text bounce
		// Best to do with rest(k);drawtextatdifferentpos()
	}


	return TRUE;
}


/* Blit the image immediately so we don't have to wait for the pre-calculations to finish */
void
cpBlitImageToScreenImmediatelyIfNeeded(BITMAP *bmpSourcePic, PALETTE pSourcePicPal, AeBool bSetGfxMode, int nDestWidth, int nDestHeight, int nDestBPP)
{
	int nXOffset, nYOffset;	/* Used to centre the image */

	cpGetOffsetsForCenteringImageOnScreen(&nXOffset, &nYOffset, bmpSourcePic->w, bmpSourcePic->h, nDestWidth, nDestHeight);

	if(bSetGfxMode)
	{
		/* We may blit straight to the screen */
		select_palette(pSourcePicPal);	/* Converting from indexed to a higher depth */
		clear_to_color(screen,makecol_depth(nDestBPP, 0x0, 0x0, 0x0));	/* In case the source pic doesn't quite fit. */
		blit(bmpSourcePic, screen,0,0,nXOffset,nYOffset, bmpSourcePic->w, bmpSourcePic->h);
		//blit(bmpSourcePic, screen,0,0,0,0, bmpSourcePic->w, bmpSourcePic->h);
		unselect_palette();	/* Converting from indexed to a higher depth */
	}
	/*
	else
	// TODO: blit to the Windows SS preview window.
	// Note that if blitting straight to desktop, this is not needed.

	*/
}




/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

                               Rendering

 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */





/* Renders the contents of a 256*256 table onto the destination (used mainly for debugging purpouses) */
static void
_cpRenderTab256256x1to32(BITMAP *bmpDest, uint8_t *cpBuf)
{
	// Warning: must be in a graphics mode of at least 256x256

	int x,y;
	uint32_t nPixelColour;	// uint16_t for 15,16 bits. 24-bits = ???


	acquire_bitmap(bmpDest);

	bmp_select(bmpDest);	/* Video mem not part of normal adr space */


	for(y=0;y<256;y++)
	{
		uint32_t *npScreenLineAdr = (uint32_t *) bmp_write_line(bmpDest, y);

		x=256;

		while(x--)
		{
			cpMONOMAKE32(nPixelColour, *cpBuf);
			bmp_write32(npScreenLineAdr++, nPixelColour);
			cpBuf++;
		}

		bmp_unwrite_line(bmpDest);
	}

	release_bitmap(bmpDest);
}


/* Renders the contents of the plasma-tab onto the destination (used mainly for debugging purpouses) */
static void
_cpRenderPlasmaTabx1to32(BITMAP *bmpDest, uint8_t *cpBuf)
{
	int x,y;
	uint32_t nPixelColour;	// uint16_t for 15,16 bits. 24-bits = ???


	acquire_bitmap(bmpDest);

	bmp_select(bmpDest);	/* Video mem not part of normal adr space */


	for(y=0;y<G_nPlasmaBoxH*2;y++)
	{
		uint32_t *npScreenLineAdr = (uint32_t *) bmp_write_line(bmpDest, y);

		x = G_nPlasmaBoxW*2;

		while(x--)
		{
			cpMONOMAKE32(nPixelColour, *cpBuf);
			//cpMONOMAKENWRAP32(nPixelColour, *cpBuf);
			bmp_write32(npScreenLineAdr++, nPixelColour);
			cpBuf++;
		}

		bmp_unwrite_line(bmpDest);
	}

	release_bitmap(bmpDest);
}


/* Renders the contents of the plasma-buffer onto the destination (used mainly for debugging purpouses) */
static void
_cpRenderPlasmaBufferx1to32(BITMAP *bmpDest, uint8_t *cpBuf)
{
	int x,y;
	int nYOffset = cpGetYOffsetForCenteringImageOnScreen(G_nPlasmaBoxH, bmpDest->h);
	int nXOffset = cpGetXOffsetForCenteringImageOnScreen(G_nPlasmaBoxW, bmpDest->w);

	uint32_t nPixelColour;	// uint16_t for 15,16 bits. 24-bits = ???


	acquire_bitmap(bmpDest);

	bmp_select(bmpDest);	/* Video mem not part of normal adr space */


	for(y=nYOffset;y<G_nPlasmaBoxH+nYOffset;y++)
	{
		uint32_t *npScreenLineAdr;

		npScreenLineAdr = (uint32_t *) bmp_write_line(bmpDest, y);
		npScreenLineAdr += nXOffset;

		x = G_nPlasmaBoxW;

		while(x--)
		{
			uint8_t nBufVal = *cpBuf;

			cpMONOMAKE32(nPixelColour, nBufVal);
			//cpMONOMAKENWRAP32(nPixelColour, *cpBuf);
			bmp_write32(npScreenLineAdr++, nPixelColour);
			cpBuf++;
		}

		bmp_unwrite_line(bmpDest);
	}

	release_bitmap(bmpDest);
}



// INLINE static void???
static INLINE void
_cpRenderPlasmaBufferLinex2to8(BITMAP *bmpDest, uint8_t *cpBuf, int yDest)
{
	int x=G_nPlasmaBoxW;

	uint8_t *npScreenLineAdr;

	npScreenLineAdr = (uint8_t *)bmp_write_line(bmpDest, yDest);
	npScreenLineAdr += cpGetXOffsetForCenteringImageOnScreen(G_nPlasmaBoxW<<1, bmpDest->w);


	/* As this is an 8-bit mode, the palette is automatically used */

	while(x--)
	{
		bmp_write8(npScreenLineAdr++, *cpBuf);
		bmp_write8(npScreenLineAdr++, *cpBuf);
		// ?: bmp_write16( ((uint16_t *)npScreenLineAdr)++, a uint16_t with *cpBuf twice ) A: What if x-size not a * of 2? Also, there may be issues on machines with different endiannesses.

		cpBuf++;
	}

	bmp_unwrite_line(bmpDest);
}



// INLINE static void???
static INLINE void
_cpRenderPlasmaBufferLinex2to15(BITMAP *bmpDest, uint8_t *cpBuf, int yDest)
{
	int x = G_nPlasmaBoxW;

	uint16_t nPixelColour;	// uint32_t ?

	uint16_t *npScreenLineAdr;

	npScreenLineAdr = (uint16_t *) bmp_write_line(bmpDest, yDest);
	npScreenLineAdr += cpGetXOffsetForCenteringImageOnScreen(G_nPlasmaBoxW<<1, bmpDest->w);


	if(G_bApplyingColourLookupTableDirectlyToPlasmaPixels)
	{
		/* Manually lookup the colour to use in the palette */

		while(x--)
		{
			uint8_t nBufVal = *cpBuf;
			RGB *rgbp = &G_pPal[nBufVal];


			nPixelColour = (rgbp->r<<10)+(rgbp->g<<5)+(rgbp->b);	// ?: '|' instead of '+' ?

			bmp_write15(npScreenLineAdr++, nPixelColour);
			bmp_write15(npScreenLineAdr++, nPixelColour);
			// bmp_write15(nScreenLineAdr+(x2<<2), nPixelColour);	// with 24-bit, this might turn into something weird.

			cpBuf++;
		}
	}
	else
	{
		/* Use the raw plasma data */

		while(x--)
		{
			uint8_t nBufVal = *cpBuf;

			cpMONOMAKE15(nPixelColour, nBufVal);
			
			bmp_write15(npScreenLineAdr++, nPixelColour);
			bmp_write15(npScreenLineAdr++, nPixelColour);
			
			cpBuf++;
		}
	}

	bmp_unwrite_line(bmpDest);
}



// INLINE static void???
static INLINE void
_cpRenderPlasmaBufferLinex2to16(BITMAP *bmpDest, uint8_t *cpBuf, int yDest)
{
	int x = G_nPlasmaBoxW;

	uint16_t nPixelColour;	// uint32_t ?

	uint16_t *npScreenLineAdr;

	npScreenLineAdr = (uint16_t *) bmp_write_line(bmpDest, yDest);
	npScreenLineAdr += cpGetXOffsetForCenteringImageOnScreen(G_nPlasmaBoxW<<1, bmpDest->w);


	if(G_bApplyingColourLookupTableDirectlyToPlasmaPixels)
	{
		/* Manually lookup the colour to use in the palette */

		while(x--)
		{
			uint8_t nBufVal = *cpBuf;
			RGB *rgbp = &G_pPal[nBufVal];


			nPixelColour = (rgbp->r<<11)+(rgbp->g<<5)+(rgbp->b);	// ?: '|' instead of '+' ?

			bmp_write16(npScreenLineAdr++, nPixelColour);
			bmp_write16(npScreenLineAdr++, nPixelColour);
			// bmp_write16(nScreenLineAdr+(x2<<2), nPixelColour);	// with 24-bit, this might turn into something weird.

			cpBuf++;
		}
	}
	else
	{
		/* Use the raw plasma data */

		while(x--)
		{
			uint8_t nBufVal = *cpBuf;

			cpMONOMAKE16(nPixelColour, nBufVal);
			
			bmp_write16(npScreenLineAdr++, nPixelColour);
			bmp_write16(npScreenLineAdr++, nPixelColour);
			
			cpBuf++;
		}
	}

	bmp_unwrite_line(bmpDest);
}



// INLINE static void???
static INLINE void
_cpRenderPlasmaBufferLinex2to24(BITMAP *bmpDest, uint8_t *cpBuf, int yDest)
{
	int x = G_nPlasmaBoxW;

	// Don't bother with nPixelColour in 24bpp. uint32_t nPixelColour;	// uint16_t for 15,16 bits. 24-bits = ???

	uint8_t *npScreenLineAdr;

	npScreenLineAdr = (uint8_t *) bmp_write_line(bmpDest, yDest);
	npScreenLineAdr += (cpGetXOffsetForCenteringImageOnScreen(G_nPlasmaBoxW<<1, bmpDest->w))*3;


	if(G_bApplyingColourLookupTableDirectlyToPlasmaPixels)
	{
		/* Manually lookup the colour to use in the palette */

		while(x--)
		{
			uint8_t nBufVal = *cpBuf;
			RGB *rgbp = &G_pPal[nBufVal];


			// Write 2 pixels
			// ?: What if x-size not a * of 2?

			// TODO: Do this if() outside the loops.
			// TODO: Confirm that this is correct on big-endian machines. Also, do I need to do owt for 32-bit colour on big-endian machines?
			if(_rgb_b_shift_24==0)
			{
				// BGR
				// ?: If _rgb_b_shift_24 = 0, does that always mean _rgb_g_shift_24 = 8 and _rgb_r_shift_24 = 16?
				bmp_write8(npScreenLineAdr++,rgbp->b);
				bmp_write8(npScreenLineAdr++,rgbp->g);
				bmp_write8(npScreenLineAdr++,rgbp->r);

				bmp_write8(npScreenLineAdr++,rgbp->b);
				bmp_write8(npScreenLineAdr++,rgbp->g);
				bmp_write8(npScreenLineAdr++,rgbp->r);
			}
			else
			{
				//RGB
				// ?: Does this always mean that If _rgb_b_shift_24 = 16, _rgb_g_shift_24 = 16 and _rgb_r_shift_24 = 0?
				bmp_write8(npScreenLineAdr++,rgbp->r);
				bmp_write8(npScreenLineAdr++,rgbp->g);
				bmp_write8(npScreenLineAdr++,rgbp->b);

				bmp_write8(npScreenLineAdr++,rgbp->r);
				bmp_write8(npScreenLineAdr++,rgbp->g);
				bmp_write8(npScreenLineAdr++,rgbp->b);
			}

			cpBuf++;
		}
	}
	else
	{
		/* Use the raw plasma data */

		while(x--)
		{
			uint8_t nBufVal = *cpBuf;


			bmp_write8(npScreenLineAdr++,nBufVal);
			bmp_write8(npScreenLineAdr++,nBufVal);
			bmp_write8(npScreenLineAdr++,nBufVal);

			bmp_write8(npScreenLineAdr++,nBufVal);
			bmp_write8(npScreenLineAdr++,nBufVal);
			bmp_write8(npScreenLineAdr++,nBufVal);

			// ?: a bmp_write32 followed by a bmp_write16? A: There may be issues with RGB vs. BGR and endianness, but not sure about if on mono.
			
			cpBuf++;
		}
	}

	bmp_unwrite_line(bmpDest);
}



// INLINE static void???
static INLINE void
_cpRenderPlasmaBufferLinex2to32(BITMAP *bmpDest, uint8_t *cpBuf, int yDest)
{
	int x = G_nPlasmaBoxW;

	uint32_t nPixelColour;	// uint16_t for 15,16 bits. 24-bits = ???

	uint32_t *npScreenLineAdr;

	npScreenLineAdr = (uint32_t *) bmp_write_line(bmpDest, yDest);
	npScreenLineAdr += cpGetXOffsetForCenteringImageOnScreen(G_nPlasmaBoxW<<1, bmpDest->w);


	if(G_bApplyingColourLookupTableDirectlyToPlasmaPixels)
	{
		/* Manually lookup the colour to use in the palette */

		while(x--)
		{
			uint8_t nBufVal = *cpBuf;
			RGB *rgbp = &G_pPal[nBufVal];

			nPixelColour = (rgbp->r<<16)+(rgbp->g<<8)+(rgbp->b);	// ?: '|' instead of '+' ?

			bmp_write32(npScreenLineAdr++, nPixelColour);
			bmp_write32(npScreenLineAdr++, nPixelColour);
			// bmp_write32(nScreenLineAdr+(x2<<2), nPixelColour);	// with 24-bit, this might turn into something weird.

			cpBuf++;
		}
	}
	else
	{
		/* Use the raw plasma data */

		while(x--)
		{
			uint8_t nBufVal = *cpBuf;

			cpMONOMAKE32(nPixelColour, nBufVal);
			//cpMONOMAKENWRAP32(nPixelColour, *cpBuf);
			// Could optimize further by having a blue-only plasma.
			
			bmp_write32(npScreenLineAdr++, nPixelColour);
			bmp_write32(npScreenLineAdr++, nPixelColour);
			// bmp_write32(nScreenLineAdr+(x2<<2), nPixelColour);	// with 24-bit, this might turn into something weird.
			
			cpBuf++;
		}
	}

	bmp_unwrite_line(bmpDest);
}



/* Renders the contents of the plasma-buffer magnified by 2 onto the destination */
static void
_cpRenderPlasmaBufferx2(BITMAP *bmpDest, uint8_t *cpBuf)
{
	int y = G_nPlasmaBoxH;
	int nDestRow;

	
	nDestRow = cpGetYOffsetForCenteringImageOnScreen(G_nPlasmaBoxH<<1, bmpDest->h);

	acquire_bitmap(bmpDest);

	bmp_select(bmpDest);	/* Just in case bmpDest==screen Video mem not part of normal adr space */


	while(y--)
	{
		switch(bitmap_color_depth(bmpDest))
		{
			case 8:
			{
				_cpRenderPlasmaBufferLinex2to8(bmpDest, cpBuf, nDestRow++);
				_cpRenderPlasmaBufferLinex2to8(bmpDest, cpBuf, nDestRow++);
			}
			break;

			case 15:
			{
				_cpRenderPlasmaBufferLinex2to15(bmpDest, cpBuf, nDestRow++);
				_cpRenderPlasmaBufferLinex2to15(bmpDest, cpBuf, nDestRow++);
			}
			break;

			case 16:
			{
				_cpRenderPlasmaBufferLinex2to16(bmpDest, cpBuf, nDestRow++);
				_cpRenderPlasmaBufferLinex2to16(bmpDest, cpBuf, nDestRow++);
			}
			break;

			case 24:
			{
				_cpRenderPlasmaBufferLinex2to24(bmpDest, cpBuf, nDestRow++);
				_cpRenderPlasmaBufferLinex2to24(bmpDest, cpBuf, nDestRow++);
			}
			break;

			case 32:
			{
				_cpRenderPlasmaBufferLinex2to32(bmpDest, cpBuf, nDestRow++);
				_cpRenderPlasmaBufferLinex2to32(bmpDest, cpBuf, nDestRow++);
			}
			break;

			default:
			{
				ASSERT(FALSE);
			}

		}
				
		cpBuf += G_nPlasmaBoxW;	/* Move onto next line */
	}


	release_bitmap(bmpDest);
}







/*
Idea: Instead of applying plasma to an existing param, it should completely replace the existing param.
*/

/*
Note: averages are not a good idea as they bugger up the contrast in the value.
*/

/*
// Un-weighted average
//c8Stim2 = (c8Stim2>>1) + ((*cpPlasBufPos)>>1);	// Average
*/


/*
// Wieghted average

#define CPSATWEIGHTIMG 1.0
#define CPSATWEIGHTPLAS 2.0

nPixelSat = (
	nPixelSat*CPSATWEIGHTIMG + (((float)(*((char *)cpPlasBufPos)))*(1.0/256.0))*CPSATWEIGHTPLAS
	)
	/ (CPSATWEIGHTIMG + CPSATWEIGHTPLAS) ; // Sat is average of plasma and pic

// Idea: how could I do weights in such a way as to involve hardly any *'s.
// Solution: An intensity-table - sort of like an 8-bit / table
// Could even apply it to the plasma while it's being pre-calc'd.
// If applyting to the plasma, would have to apply after I did the bounce-thingy.

*/


// INLINE static void???
static INLINE void
_cpApplyPlasmaTo8Stimulus(uint8_t *cp8Stim, uint8_t *cpPlasBufPos)
{
	// 3 modes for adding the plasma: +0-+255, -128-+127, -255--0

	/*
	// This is how we'd do it with no clamping - just wraparound in 8-bits
	cp8Stim += *((char *)cpPlasBufPos);
	*/

	//long nTemp = (nTmpHSV>>8) & 0xFF;
	long nTemp = *cp8Stim + *((char *)cpPlasBufPos);	/* Cast to char first to get -128-127 value instead of 0-255. Rationale: The mid-value plasma-pixel should be the neutral position (0). This of course assumes we are dealing with a normalised plasma (which cpCalculatePlasmaBody() should take care of). */


	// Idea: with this clamping, could make it so that in ASM, we test for overflow. If plasmabuf has -ve value, set to 00, else set to FF

	if(nTemp>0xFF)
	{
		nTemp = 0xFF;
	}
	else if(nTemp<0x00)
	{
		nTemp = 0x00;
	}

	// or: if(nTemp&0xFFFFFF00) {fill lowest byte with ~sign-bit}

	/*
	// Alternative version of code above (which version to use depends on how the CPU is slowed down by conditional branching)
	nTemp &= (~nTemp) >> 31;	// Clamp values <0
	nTemp -= 0xFF;				// Clamp values >255
	nTemp &= nTemp >> 31;
	nTemp += 0xFF;
	*/


	*cp8Stim = (uint8_t)nTemp;

	

	//nTmpHSV &= 0xFFFF00FF;
	//nTmpHSV |= nTemp<<8;

}



// We could do a special case where we take the raw plasma-output and use that. There would be no intensity.
// We would use the plasma-code that does not molest the plasma with intensity, bounce, etc.
// If we did that, would we need to read cpPlasBufPos as a char or uint8_t? I don't think it matters.

// This is the only thing where we don't have to clamp (or for that matter, 'bounce') the plasma-intensity, coz hue wraps around. But we can if we want to.

// INLINE static void???
static INLINE void
_cpApplyPlasmaTo16Stimulus(int16_t *np16Stim, uint8_t *cpPlasBufPos)
{
	// 3 modes for adding the plasma: +0..+255, -128..+127, -255..0

	// Using signed int16_t so we can easily check for when <0

	//long nTemp = nTmpHSV>>16;


	*np16Stim+=(*((char *)cpPlasBufPos))*6;		/* Cast to char first to get -128-127 value instead of 0-255. Rationale: The mid-value plasma-pixel should be the neutral position (0). Note that if the paramater is cyclic and is not multiplied by intensity, this will work equally fine on normalised and un-normalised plsamas. */
	// Depending on how evil multiplies are, use this line instead. *np16Stim += ((*((char *)cpPlasBufPos))<<2) + ((*((char *)cpPlasBufPos))<<1);

	// Instead of *ing by 6, just make the plasma-value range to 5ff. No *'ing by 6, and more accuracy preserved.
	// Or alternatively, squash the hue-range to fit 0..ff


#if 0	// Not needed if using int16_t s rather than uint16_t s throughout.
	if(*np16Stim>0x7FFF)
	{
		/* In the case where the value is <0, adding this value should put it back in range. */
		*np16Stim += 0x600;
	}
	// Don't vother with >= 0x600 coz the XXX->RGB function takes care of that for us.
	// If we use a value >= 0x600, it prevents lots of needless adjustments as it will wrap around anyway.
#endif




	//nTmpHSV &= 0x0000FFFF;
	//nTmpHSV |= nTemp<<16;

}




// HSV -> ColourSpaceEncoded

static void
_cpRenderImage(CpImgColSp *icspColSpc, BITMAP *bmpDest)
{
	// Warning, if using MODE X, we may have to use double-buffering.

	int x,y;	/* Loop counters */
	uint32_t nPixelColour;	// uint16_t for 15,16 bits (24-bits = ???) ? A: Only if fpColourSpaceFunction returns an appropriately-sized return-value.
	int16_t n16Stim1;
	uint8_t c8Stim2, c8Stim3;
//	uint32_t nTmpHSV;

	uint32_t *npImgColSpChannelsBufPos = icspColSpc->naImgColSpChannels;

	uint8_t *cpPlasBufPos = cpGetPlasmaBuffer();



	/* Work out size of what we're dealing with */
	// TODO: Pass in X and Y.
	int nW = icspColSpc->nW;
	int nH = icspColSpc->nH;

	int nXOffset, nYOffset;	/* Used to centre the image */


	CpColourSpaceFromFunction *fpColourSpaceFunction;

	AeBool *baColourSpaceFlags = G_cps.baaColourSpaceFlags[cpMapCpColourSpaceToInt(G_cps.csColourSpaceToUse)];




	/* Which colourspace are we going to be using? */
	switch(G_cps.csColourSpaceToUse)
	{
		case cpCOLOURSPACE_RGB:	fpColourSpaceFunction = cpTurboDecRGB;	break;
		case cpCOLOURSPACE_HSV:	fpColourSpaceFunction = cpTurboHSV2RGB;	break;
		case cpCOLOURSPACE_HLS:	fpColourSpaceFunction = cpTurboHLS2RGB;	break;

		default:
		{
			TRACE("Invalid colour-space.\n");
			fpColourSpaceFunction = NULL;
			ASSERT(FALSE);
		}
	}



	cpGetOffsetsForCenteringImageOnScreen(&nXOffset, &nYOffset, nW, nH, bmpDest->w, bmpDest->h);


	acquire_bitmap(bmpDest);

	bmp_select(bmpDest);	/* Just in case bmpDest==screen Video mem not part of normal adr space */


	switch(bitmap_color_depth(bmpDest))
	{
		case 32:
		{
			for(y=0;y<nH;y++)
			{
				//uint32_t *nSrcBmpLineAdr = (uint32_t *)g_bmpSourcePic->line[y+nYOffset];
				uint32_t *npScreenLineAdr = (uint32_t *) bmp_write_line(bmpDest, y+nYOffset);
				npScreenLineAdr += nXOffset;

				x=nW;

				while(x--)
				{
					/* Store copies of the current stimuli so we can work on them */
					n16Stim1 = (*npImgColSpChannelsBufPos)>>16;
					c8Stim2 = (*npImgColSpChannelsBufPos)>>8;
					c8Stim3 = *npImgColSpChannelsBufPos++;
					// OPTIMIZATION idea: If we know the endian-ness, we can do away with these temp stimulation values. ?: Is the compiler smart enough to do this for us?

					//nTmpHSV = *npImgColSpChannelsBufPos++;



					if(baColourSpaceFlags[0])
					{
						_cpApplyPlasmaTo16Stimulus(&n16Stim1, cpPlasBufPos);
					}

					if(baColourSpaceFlags[1])
					{
						_cpApplyPlasmaTo8Stimulus(&c8Stim2, cpPlasBufPos);
					}

					if(baColourSpaceFlags[2])
					{
						_cpApplyPlasmaTo8Stimulus(&c8Stim3, cpPlasBufPos);
					}

					// Idea: option to invert the effect (0-1 -> 1-0).

					// IDEA: could use a 256*256 lookup table for applying two 8-bit values together. that way, we could implement effects such as bouncing.

					// Even a multiply table for 0-256 representing 0.0-1.0 Hue = 0.0-6.0



					nPixelColour = fpColourSpaceFunction(n16Stim1, c8Stim2, c8Stim3);
					//cpTurboHSV2RGB2(nTmpHSV);


					/*
					//This effect is for applying a plasma quickly on individual RGB channel (TODO: make sure overflow isn't carried to next colour-channel (clamp) or bounce the plasma values)
					nPixelColour =
						((*cpPlasBufPos)<<0)
						+ *nSrcBmpLineAdr++
						;
					*/



					/* ADVANCE pos in plasma buffer if nesc. */
					if(((uint32_t)npScreenLineAdr)&4)
					{
						cpPlasBufPos++;
					}
					/*
					cpPlasBufPos += (((uint32_t)npScreenLineAdr)&4)>>2;
					*/


					/* We're done, draw the pixel */
					bmp_write32(npScreenLineAdr++, nPixelColour);

				}

				bmp_unwrite_line(bmpDest);

				/* REPEAT line of plasma buffer if nesc. */
				if(!(y&1))
				{
					cpPlasBufPos -= G_nPlasmaBoxW;	/* 1 line of the plasma buffer is 2 lines of the screen so repeat this line if y is an even # */
				}
			}

		}
		break;

		case 24:
		{
			for(y=0;y<nH;y++)
			{
				//uint8_t *nSrcBmpLineAdr = (uint8_t *)g_bmpSourcePic->line[y+nYOffset];
				uint8_t *npScreenLineAdr = (uint8_t *) bmp_write_line(bmpDest, y+nYOffset);	// Warning: different * type when BPP!=32
				npScreenLineAdr += nXOffset*3;

				x=nW>>1;	/* Do two pixels in one go so we don't have to do a %3 on npScreenLineAdr */
				// ?: What if x-size not a * of 2?

				while(x--)
				{
					/* Store copies of the current stimuli so we can work on them */
					n16Stim1 = (*npImgColSpChannelsBufPos)>>16;
					c8Stim2 = (*npImgColSpChannelsBufPos)>>8;
					c8Stim3 = *npImgColSpChannelsBufPos++;
					// OPTIMIZATION idea: If we know the endian-ness, we can do away with these temp stimulation values. ?: Is the compiler smart enough to do this for us?

					//nTmpHSV = *npImgColSpChannelsBufPos++;



					if(baColourSpaceFlags[0])
					{
						_cpApplyPlasmaTo16Stimulus(&n16Stim1, cpPlasBufPos);
					}

					if(baColourSpaceFlags[1])
					{
						_cpApplyPlasmaTo8Stimulus(&c8Stim2, cpPlasBufPos);
					}

					if(baColourSpaceFlags[2])
					{
						_cpApplyPlasmaTo8Stimulus(&c8Stim3, cpPlasBufPos);
					}

					// Idea: option to invert the effect (0-1 -> 1-0).

					// IDEA: could use a 256*256 lookup table for applying two 8-bit values together. that way, we could implement effects such as bouncing.

					// Even a multiply table for 0-256 representing 0.0-1.0 Hue = 0.0-6.0



					nPixelColour = fpColourSpaceFunction(n16Stim1, c8Stim2, c8Stim3);
					//cpTurboHSV2RGB2(nTmpHSV);

					/* We're done with this one. draw the pixel */


					// TODO: Do this if() outside the loops.
					// TODO: Confirm that this is correct on big-endian machines. Also, do I need to do owt for 32-bit colour on big-endian machines?
					if(_rgb_b_shift_24==0)
					{
						// BGR
						// ?: If _rgb_b_shift_24 = 0, does that always mean _rgb_g_shift_24 = 8 and _rgb_r_shift_24 = 16?
						bmp_write8(npScreenLineAdr++, nPixelColour & 0xFF);	// B
						bmp_write8(npScreenLineAdr++, (nPixelColour>>8) & 0xFF);	// G
						bmp_write8(npScreenLineAdr++, nPixelColour>>16);	// R
					}
					else
					{
						//RGB
						// ?: Does this always mean that If _rgb_b_shift_24 = 16, _rgb_g_shift_24 = 16 and _rgb_r_shift_24 = 0?
						bmp_write8(npScreenLineAdr++, nPixelColour>>16);	// R
						bmp_write8(npScreenLineAdr++, (nPixelColour>>8) & 0xFF);	// G
						bmp_write8(npScreenLineAdr++, nPixelColour & 0xFF);	// B
					}



					/* Do a second pixel so we don't have to do a %3 on npScreenLineAdr */
					// ?: What if x-size not a * of 2?

					/* Store copies of the current stimuli so we can work on them */
					n16Stim1 = (*npImgColSpChannelsBufPos)>>16;
					c8Stim2 = (*npImgColSpChannelsBufPos)>>8;
					c8Stim3 = *npImgColSpChannelsBufPos++;
					// OPTIMIZATION idea: If we know the endian-ness, we can do away with these temp stimulation values. ?: Is the compiler smart enough to do this for us?

					//nTmpHSV = *npImgColSpChannelsBufPos++;



					if(baColourSpaceFlags[0])
					{
						_cpApplyPlasmaTo16Stimulus(&n16Stim1, cpPlasBufPos);
					}

					if(baColourSpaceFlags[1])
					{
						_cpApplyPlasmaTo8Stimulus(&c8Stim2, cpPlasBufPos);
					}

					if(baColourSpaceFlags[2])
					{
						_cpApplyPlasmaTo8Stimulus(&c8Stim3, cpPlasBufPos);
					}

					// Idea: option to invert the effect (0-1 -> 1-0).

					// IDEA: could use a 256*256 lookup table for applying two 8-bit values together. that way, we could implement effects such as bouncing.

					// Even a multiply table for 0-256 representing 0.0-1.0 Hue = 0.0-6.0



					nPixelColour = fpColourSpaceFunction(n16Stim1, c8Stim2, c8Stim3);
					//cpTurboHSV2RGB2(nTmpHSV);

					/* We're done with this one. draw the pixel */


					// TODO: Do this if() outside the loops.
					// TODO: Confirm that this is correct on big-endian machines. Also, do I need to do owt for 32-bit colour on big-endian machines?
					if(_rgb_b_shift_24==0)
					{
						// BGR
						// ?: If _rgb_b_shift_24 = 0, does that always mean _rgb_g_shift_24 = 8 and _rgb_r_shift_24 = 16?
						bmp_write8(npScreenLineAdr++, nPixelColour & 0xFF);	// B
						bmp_write8(npScreenLineAdr++, (nPixelColour>>8) & 0xFF);	// G
						bmp_write8(npScreenLineAdr++, nPixelColour>>16);	// R
					}
					else
					{
						//RGB
						// ?: Does this always mean that If _rgb_b_shift_24 = 16, _rgb_g_shift_24 = 16 and _rgb_r_shift_24 = 0?
						bmp_write8(npScreenLineAdr++, nPixelColour>>16);	// R
						bmp_write8(npScreenLineAdr++, (nPixelColour>>8) & 0xFF);	// G
						bmp_write8(npScreenLineAdr++, nPixelColour & 0xFF);	// B
					}



					/* We've drawn two pixels so ADVANCE pos in plasma buffer */
					cpPlasBufPos++;

				}

				bmp_unwrite_line(bmpDest);

				/* REPEAT line of plasma buffer if nesc. */
				if(!(y&1))
				{
					cpPlasBufPos -= G_nPlasmaBoxW;	/* 1 line of the plasma buffer is 2 lines of the screen so repeat this line if y is an even # */
				}
			}

		}
		break;

		case 16:
		{
			for(y=0;y<nH;y++)
			{
				//uint16_t *nSrcBmpLineAdr = (uint16_t *)g_bmpSourcePic->line[y+nYOffset];
				uint16_t *npScreenLineAdr = (uint16_t *) bmp_write_line(bmpDest, y+nYOffset);	// Warning: different * type when BPP!=32
				npScreenLineAdr += nXOffset;

				x=nW;

				while(x--)
				{
					/* Store copies of the current stimuli so we can work on them */
					n16Stim1 = (*npImgColSpChannelsBufPos)>>16;
					c8Stim2 = (*npImgColSpChannelsBufPos)>>8;
					c8Stim3 = *npImgColSpChannelsBufPos++;
					// OPTIMIZATION idea: If we know the endian-ness, we can do away with these temp stimulation values. ?: Is the compiler smart enough to do this for us?

					//nTmpHSV = *npImgColSpChannelsBufPos++;



					if(baColourSpaceFlags[0])
					{
						_cpApplyPlasmaTo16Stimulus(&n16Stim1, cpPlasBufPos);
					}

					if(baColourSpaceFlags[1])
					{
						_cpApplyPlasmaTo8Stimulus(&c8Stim2, cpPlasBufPos);
					}

					if(baColourSpaceFlags[2])
					{
						_cpApplyPlasmaTo8Stimulus(&c8Stim3, cpPlasBufPos);
					}

					// Idea: option to invert the effect (0-1 -> 1-0).

					// IDEA: could use a 256*256 lookup table for applying two 8-bit values together. that way, we could implement effects such as bouncing.

					// Even a multiply table for 0-256 representing 0.0-1.0 Hue = 0.0-6.0



					nPixelColour = fpColourSpaceFunction(n16Stim1, c8Stim2, c8Stim3);
					//cpTurboHSV2RGB2(nTmpHSV);




					/* ADVANCE pos in plasma buffer if nesc. */
					if(((uint32_t)npScreenLineAdr)&2)
					{
						cpPlasBufPos++;
					}
					/*
					cpPlasBufPos += (((uint32_t)npScreenLineAdr)&2)>>1;
					*/


					/* We're done, draw the pixel */

					// If fpColourSpaceFunction() returned the colour in 16-bit format, we wouldn't need this step.
					nPixelColour = ((nPixelColour & 0xF80000) >> 8) | ((nPixelColour & 0xFC00) >> 5) | ((nPixelColour & 0xF8) >> 3);
					//nPixelColour = makecol16(getr_depth(32, nPixelColour), getg_depth(32, nPixelColour), getb_depth(32, nPixelColour)); ? A: No coz fpColourSpaceFunction() uses shifts to get the R G and B in position.

					bmp_write16(npScreenLineAdr++, nPixelColour);

				}

				bmp_unwrite_line(bmpDest);

				/* REPEAT line of plasma buffer if nesc. */
				if(!(y&1))
				{
					cpPlasBufPos -= G_nPlasmaBoxW;	/* 1 line of the plasma buffer is 2 lines of the screen so repeat this line if y is an even # */
				}
			}

		}
		break;

		case 15:
		{
			for(y=0;y<nH;y++)
			{
				//uint16_t *nSrcBmpLineAdr = (uint16_t *)g_bmpSourcePic->line[y+nYOffset];
				uint16_t *npScreenLineAdr = (uint16_t *) bmp_write_line(bmpDest, y+nYOffset);	// Warning: different * type when BPP!=32
				npScreenLineAdr += nXOffset;

				x=nW;

				while(x--)
				{
					/* Store copies of the current stimuli so we can work on them */
					n16Stim1 = (*npImgColSpChannelsBufPos)>>16;
					c8Stim2 = (*npImgColSpChannelsBufPos)>>8;
					c8Stim3 = *npImgColSpChannelsBufPos++;
					// OPTIMIZATION idea: If we know the endian-ness, we can do away with these temp stimulation values. ?: Is the compiler smart enough to do this for us?

					//nTmpHSV = *npImgColSpChannelsBufPos++;



					if(baColourSpaceFlags[0])
					{
						_cpApplyPlasmaTo16Stimulus(&n16Stim1, cpPlasBufPos);
					}

					if(baColourSpaceFlags[1])
					{
						_cpApplyPlasmaTo8Stimulus(&c8Stim2, cpPlasBufPos);
					}

					if(baColourSpaceFlags[2])
					{
						_cpApplyPlasmaTo8Stimulus(&c8Stim3, cpPlasBufPos);
					}

					// Idea: option to invert the effect (0-1 -> 1-0).

					// IDEA: could use a 256*256 lookup table for applying two 8-bit values together. that way, we could implement effects such as bouncing.

					// Even a multiply table for 0-256 representing 0.0-1.0 Hue = 0.0-6.0



					nPixelColour = fpColourSpaceFunction(n16Stim1, c8Stim2, c8Stim3);
					//cpTurboHSV2RGB2(nTmpHSV);




					/* ADVANCE pos in plasma buffer if nesc. */
					if(((uint32_t)npScreenLineAdr)&2)
					{
						cpPlasBufPos++;
					}
					/*
					cpPlasBufPos += (((uint32_t)npScreenLineAdr)&2)>>1;
					*/


					/* We're done, draw the pixel */

					// If fpColourSpaceFunction() returned the colour in 15-bit format, we wouldn't need this step.
					nPixelColour = ((nPixelColour & 0xF80000) >> 9) | ((nPixelColour & 0xF800) >> 6) | ((nPixelColour & 0xF8) >> 3);
					//nPixelColour = makecol15(getr_depth(32, nPixelColour), getg_depth(32, nPixelColour), getb_depth(32, nPixelColour)); ? A: No coz fpColourSpaceFunction() uses shifts to get the R G and B in position.

					bmp_write15(npScreenLineAdr++, nPixelColour);

				}

				bmp_unwrite_line(bmpDest);

				/* REPEAT line of plasma buffer if nesc. */
				if(!(y&1))
				{
					cpPlasBufPos -= G_nPlasmaBoxW;	/* 1 line of the plasma buffer is 2 lines of the screen so repeat this line if y is an even # */
				}
			}

		}
		break;

		case 8:
		{
			for(y=0;y<nH;y++)
			{
				//uint8_t *nSrcBmpLineAdr = (uint8_t *)g_bmpSourcePic->line[y+nYOffset];
				uint8_t *npScreenLineAdr = (uint8_t *) bmp_write_line(bmpDest, y+nYOffset);
				npScreenLineAdr += nXOffset;

				x=nW;

				while(x--)
				{
					/* Store copies of the current stimuli so we can work on them */
					n16Stim1 = (*npImgColSpChannelsBufPos)>>16;
					c8Stim2 = (*npImgColSpChannelsBufPos)>>8;
					c8Stim3 = *npImgColSpChannelsBufPos++;
					// OPTIMIZATION idea: If we know the endian-ness, we can do away with these temp stimulation values. ?: Is the compiler smart enough to do this for us?

					//nTmpHSV = *npImgColSpChannelsBufPos++;



					if(baColourSpaceFlags[0])
					{
						_cpApplyPlasmaTo16Stimulus(&n16Stim1, cpPlasBufPos);
					}

					if(baColourSpaceFlags[1])
					{
						_cpApplyPlasmaTo8Stimulus(&c8Stim2, cpPlasBufPos);
					}

					if(baColourSpaceFlags[2])
					{
						_cpApplyPlasmaTo8Stimulus(&c8Stim3, cpPlasBufPos);
					}

					// Idea: option to invert the effect (0-1 -> 1-0).

					// IDEA: could use a 256*256 lookup table for applying two 8-bit values together. that way, we could implement effects such as bouncing.

					// Even a multiply table for 0-256 representing 0.0-1.0 Hue = 0.0-6.0



					nPixelColour = fpColourSpaceFunction(n16Stim1, c8Stim2, c8Stim3);
					//cpTurboHSV2RGB2(nTmpHSV);




					/* ADVANCE pos in plasma buffer if nesc. */
					if(((uint32_t)npScreenLineAdr)&1)
					{
						cpPlasBufPos++;
					}
					/*
					cpPlasBufPos += ((uint8_t)npScreenLineAdr)&1;
					*/


					/* We're done, draw the pixel */
					ASSERT(rgb_map);	/* Make sure we've set this up. */
					// TODO: instead of makecol8(), use rgb_map->data[r>>3][g>>3][b>>3]
					// Or if using a 3-3-2 palette, use shifts instead of makecol8
					// BEWARE: Allegro's generate_332_palette() generates 'magic pink' for colour 0 and sets colour 254 to be black. Solution: Make the palette ourselves.
					// ?: Should we always use the 3-3-2 (8-8-4 levels) palette? A: If using an alternative (eg. 6-level RGB, 6-7-6 levels RGB, 6-8-5, greyscale).
					// And of course when doing the preview-window, would want to use the same palette as the rest of the desktop.
					// TODO: Optimise by writing an entire uint32_t at a time. But beware if area.x is not a multiple of 4.
					bmp_write8(npScreenLineAdr++, makecol8((nPixelColour>>16), ((nPixelColour>>8)&0x000000ff), (nPixelColour&0x000000ff)));

				}

				bmp_unwrite_line(bmpDest);

				/* REPEAT line of plasma buffer if nesc. */
				if(!(y&1))
				{
					cpPlasBufPos -= G_nPlasmaBoxW;	/* 1 line of the plasma buffer is 2 lines of the screen so repeat this line if y is an even # */
				}
			}

		}
		break;

		default:
		{
			ASSERT(FALSE);
		}


	}	// END switch(bitmap_color_depth(bmpDest))

	release_bitmap(bmpDest);

}



void
cpRenderScreen(BITMAP *bmpDest)
{

	if(cpImgColSpIsUsed(&G_icsImgColSpc))
	{
		_cpRenderImage(&G_icsImgColSpc, bmpDest);
	}
	else
	{
		// Used to visualise G_caMultTab
		//_cpRenderTab256256x1to32(bmpDest, &G_caMultTab[0]);


		// Used to visualise a plasma tab
		//_cpRenderPlasmaTabx1to32(bmpDest, cpGetPlasmaTab(1));
		// Used to visualise the plasma at a 1:1 scale
		//_cpRenderPlasmaBufferx1to32(bmpDest, cpGetPlasmaBuffer());

		/* Draw the plasma magnified 2x2 */
		_cpRenderPlasmaBufferx2(bmpDest, cpGetPlasmaBuffer());

	}

}
