
#include "stdafx.h"
#include <windows.h>
#include "TestObj.h"

IMPLEMENT_DYNCREATE(CMemBlock, CObject)

/////////////////////////////////////////////////////////////////////////////
CMemBlock::CMemBlock()  
{
	m_dwSize = 0;
	m_ptrRaw = NULL;
	m_ptr = NULL;
}

/////////////////////////////////////////////////////////////////////////////
CMemBlock::~CMemBlock()
{
	if(m_ptrRaw)
		delete m_ptrRaw;
}

/////////////////////////////////////////////////////////////////////////////
void CMemBlock::Allocate(float** ppFloat, DWORD dwSize){
	*ppFloat = (float*)Allocate(dwSize*sizeof(float));
}
void CMemBlock::Allocate(short** ppShort, DWORD dwSize){
	*ppShort = (short*)Allocate(dwSize*sizeof(short));
}
void CMemBlock::Allocate(int** ppInt, DWORD dwSize){
	*ppInt = (int*)Allocate(dwSize*sizeof(int));
}
void CMemBlock::Allocate(UCHAR** ppUChar, DWORD dwSize){
	*ppUChar = (PUCHAR)Allocate(dwSize*sizeof(UCHAR));
}
void CMemBlock::Allocate(double** ppDouble, DWORD dwSize){
	*ppDouble = (double*)Allocate(dwSize*sizeof(double));
}

/////////////////////////////////////////////////////////////////////////////
LPVOID CMemBlock::Allocate(DWORD dwByteSize)
{
	if(m_ptrRaw)
		delete m_ptrRaw;

	m_dwSize = dwByteSize;
	DWORD dwAlign = 32;

	m_ptrRaw = malloc(dwByteSize + dwAlign);

	if(m_ptrRaw){
		void * ptrRaw = m_ptrRaw;
		void * ptrTmp = NULL;
		_asm 
		{
								; use align = 8 as an example
								; we are only looking at last byte
			mov ebx, dwAlign	; bl = 0x00001000
			sub ebx, 1			; bl = 0x00000111

			mov eax, ptrRaw		; al = 0x00000001
			add eax, ebx		; al = 0x00001000

			not ebx				; bl = 0x11111000
			and eax, ebx		; al = 0x00001000

			mov ptrTmp, eax
		} 
		m_ptr = ptrTmp;
	}
	else {
		m_ptr = NULL;
		m_dwSize = 0;
	}
	return m_ptr;
}

/////////////////////////////////////////////////////////////////////////////
void CMemBlock::ZeroBuffer()
{
	if(m_ptrRaw){
		ZeroMemory(m_ptrRaw, m_dwSize + 32);
	}
}


/////////////////////////////////////////////////////////////////////////////
// CTestObject
/////////////////////////////////////////////////////////////////////////////
IMPLEMENT_DYNCREATE(CTestObject, CObject)

CTestObject::CTestObject()
{
	m_bMMXPresent = FALSE;
	m_bColdCache = FALSE;
	m_tC.dwFlags = 0;
	m_tPent.dwFlags = 0;
	m_tMMX.dwFlags = 0;
	m_pOutBox = NULL;
	m_TestType = 0;
}

CTestObject::~CTestObject()
{
}

void CTestObject::RunTest()
{
	TIMERDECL
	_asm finit  // Make sure floating point stack is clean

	// Run C version
	if(m_bColdCache){
		ChillCache();
	} else {
		//Get any "C" buffers into cache
		if(m_MBC1.m_ptr != NULL)
			HeatCache(m_MBC1.m_ptr, m_MBC1.m_dwSize);
		if(m_MBC2.m_ptr != NULL)
			HeatCache(m_MBC2.m_ptr, m_MBC2.m_dwSize);
		if(m_MBC3.m_ptr != NULL)
			HeatCache(m_MBC3.m_ptr, m_MBC3.m_dwSize);
		RunC();
	}
	TIMERSTART
	RunC();
	TIMERSTOP
	m_tC.time = endLo-startLo;

	// Run Pentium version
	if(m_bColdCache){
		ChillCache();
	} else {
		// Get any "P" buffers into cache
		if(m_MBP1.m_ptr != NULL)
			HeatCache(m_MBP1.m_ptr, m_MBP1.m_dwSize);
		if(m_MBP2.m_ptr != NULL)
			HeatCache(m_MBP2.m_ptr, m_MBP2.m_dwSize);
		if(m_MBP3.m_ptr != NULL)
			HeatCache(m_MBP3.m_ptr, m_MBP3.m_dwSize);
		RunPent();
	}

	TIMERSTART
	RunPent();	
	TIMERSTOP
	m_tPent.time = endLo-startLo;

	// Run Pentium MMX version
	if(m_bMMXPresent){

		if(m_bColdCache){
			ChillCache();
		} else {
			// Get any "M" buffers into cache
			if(m_MBM1.m_ptr != NULL)
				HeatCache(m_MBM1.m_ptr, m_MBM1.m_dwSize);
			if(m_MBM2.m_ptr != NULL)
				HeatCache(m_MBM2.m_ptr, m_MBM2.m_dwSize);
			if(m_MBM3.m_ptr != NULL)
				HeatCache(m_MBM3.m_ptr, m_MBM3.m_dwSize);
			RunMMX();
		}

		TIMERSTART
		RunMMX();	
		TIMERSTOP
		m_tMMX.time = endLo-startLo;
	}
}

/////////////////////////////////////////////////////////////////////////////
float CTestObject::CompareBuffers(
	  float* pA, float* pB, DWORD dwCount)
{
	float diffTotal = 0.0f;
	float diffInc = 0.0f;

	for(DWORD i=0;i<dwCount;i++){
		diffInc = pA[i] - pB[i];
		if(diffInc < 0) 
			diffInc = -diffInc;
		diffTotal += diffInc;
		}
	return diffTotal;
}

float CTestObject::CompareBuffers(
	  short* pA, short* pB, DWORD dwCount)
{
	float diffTotal = 0.0f;
	float diffInc = 0.0f;

	for(DWORD i=0;i<dwCount;i++){
		diffInc = (float)(pA[i] - pB[i]);
		if(diffInc < 0) 
			diffInc = -diffInc;
		diffTotal += diffInc;
		}
	return diffTotal;
}

float CTestObject::CompareBuffers(
	  int* pA, int* pB, DWORD dwCount)
{
	float diffTotal = 0.0f;
	float diffInc = 0.0f;

	for(DWORD i=0;i<dwCount;i++){
		diffInc = (float)(pA[i] - pB[i]);
		if(diffInc < 0) 
			diffInc = -diffInc;
		diffTotal += diffInc;
		}
	return diffTotal;
}

float CTestObject::CompareBuffers(
	  double* pA, double* pB, DWORD dwCount)
{
	float diffTotal = 0.0f;
	float diffInc = 0.0f;

	for(DWORD i=0;i<dwCount;i++){
		diffInc = (float)(pA[i] - pB[i]);
		if(diffInc < 0) 
			diffInc = -diffInc;
		diffTotal += diffInc;
		}
	return diffTotal;
}

float CTestObject::CompareBuffers(
	  PUCHAR pA, PUCHAR pB, DWORD dwCount)
{
		float diffTotal = 0.0f;
	float diffInc = 0.0f;

	for(DWORD i=0;i<dwCount;i++){
		diffInc = (float)((int)(pA[i]) - (int)(pB[i]));
		if(diffInc < 0) 
			diffInc = -diffInc;
		diffTotal += diffInc;
		}
	return diffTotal;
}

/////////////////////////////////////////////////////////////////////////////
void CTestObject::ZeroAllBuffers()
{
	m_MBC1.ZeroBuffer();
	m_MBC2.ZeroBuffer();
	m_MBC3.ZeroBuffer();
	m_MBP1.ZeroBuffer();
	m_MBP2.ZeroBuffer();
	m_MBP3.ZeroBuffer();
	m_MBM1.ZeroBuffer();
	m_MBM2.ZeroBuffer();
	m_MBM3.ZeroBuffer();
}

/////////////////////////////////////////////////////////////////////////////
void CTestObject::ShowResults(LPCTSTR sTestName, BOOL bShowCycles)
{
	CString sTxt;
	CString sTmp;

	if(!m_pOutBox)
		return;

	// Build the output string.
	sTxt = sTestName;
	sTxt += "; ";

	if(!bShowCycles){

		// Show time as cycles/input byte
		if( m_tC.dwFlags & RESULT_TIME ){
			sTmp.Format(" C(%0.2f) ", (float)m_tC.time/m_dwCount);
			sTxt += sTmp;
		}

		if( m_tPent.dwFlags & RESULT_TIME ){
			sTmp.Format(" P(%0.2f) ", (float)m_tPent.time/m_dwCount);
			sTxt += sTmp;
		}

		if( m_tPent.dwFlags & RESULT_DIFF ){
			sTmp.Format(" :(%d) ", (int)m_tPent.diff);
			sTxt += sTmp;
		}
			
		if( m_tMMX.dwFlags & RESULT_TIME ){
			if(m_bMMXPresent){
				sTmp.Format(" M(%0.2f) ", (float)m_tMMX.time/m_dwCount);
			} else {
				sTmp.Format(" M(-np-) ");
			}
			sTxt += sTmp;
		}

		if( m_tMMX.dwFlags & RESULT_DIFF ){
			if(m_bMMXPresent){
				sTmp.Format(" :(%d) ", (int)m_tMMX.diff);
				sTxt += sTmp;
			}
		}

	} else {

		// Show time in actual cycles
		if( m_tC.dwFlags & RESULT_TIME ){
			sTmp.Format(" C(%d) ", m_tC.time);
			sTxt += sTmp;
		}

		if( m_tPent.dwFlags & RESULT_TIME ){
			sTmp.Format(" P(%d) ", m_tPent.time);
			sTxt += sTmp;
		}

		if( m_tPent.dwFlags & RESULT_DIFF ){
			sTmp.Format(" :(%d) ", (int)m_tPent.diff);
			sTxt += sTmp;
		}
			
		if( m_tMMX.dwFlags & RESULT_TIME ){
			if(m_bMMXPresent){
				sTmp.Format(" M(%d) ", m_tMMX.time);
			} else {
				sTmp.Format(" M(-np-) ");
			}
			sTxt += sTmp;
		}

		if( m_tMMX.dwFlags & RESULT_DIFF ){
			if(m_bMMXPresent){
				sTmp.Format(" :(%d) ", (int)m_tMMX.diff);
				sTxt += sTmp;
			}
		}
	}

	// Now send it out!
	m_pOutBox->AddString((LPCTSTR)sTxt);
}

/////////////////////////////////////////////////////////////////////////////
void CTestObject::ChillCache()
{
	int junkSize = 16*1024;
	char *pJunk;
	pJunk = (char*)malloc(junkSize);

	_asm
	{
		mov ecx, junkSize;
		mov esi, pJunk;
	MainLoop:
		mov eax, [esi+ecx];
		sub ecx, 32	// Every 32nd dword
		jge MainLoop
	}

	delete pJunk;
}

/////////////////////////////////////////////////////////////////////////////
void CTestObject::HeatCache(LPVOID pBuf, int count)
{
	_asm
	{
		mov ecx, count;
		mov esi, pBuf;
	MainLoop:
		mov eax, [esi+ecx];
		sub ecx, 32; // Every 32nd dword
		jge MainLoop
	}
}



