/****************************************************************
This file is part of the SkinScript library, and is provided
for educational purposes only.

The SkinScript library is Copyright 2000 Steve Hanov.
YOU MAY NOT INCORPORATE THE SKINSCRIPT LIBRARY INTO OTHER WORKS
IN WHOLE OR IN PART WITHOUT EXPRESS WRITTEN CONSENT FROM THE
COPYRIGHT HOLDER.

For more information, contact the author at smhanov@uwaterloo.ca
*****************************************************************/
//*******************************************************************
//CTagParser
//By Steve Hanov (smhanov@uwaterloo.ca)
//March 1999
//
// Provides limited state-based parsing of HTML-style files. (tags only)
// Note: Technically, this is merely a "Scanner"
//******************************************************************

#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <cctype>
#include <algorithm>
#include "TagParser.h"

#define CASE_WHITESPACE			case ' ': case 13: case 10: case 9

#define IS_WHITE_SPACE(a)	(((a)==' ') || ((a)==10) || ((a)==13) || ((a)==9))

void TrimString(std::string& str)
{
    static const _TCHAR* delim = " \t\r\n";
    size_t pos = str.find_first_not_of( delim );

    if ( pos != std::string::npos ) {
        str.erase( 0, pos );
    } else {
        str.clear();
        return;
    }

    // won't fail.
    pos = str.find_last_not_of( delim );
    str.erase( pos + 1 );
}

void LowerString( std::string& str )
{
    _strlwr( (char*)str.c_str() );
}

char* TrimString(char* pszString)
{
	int iFirstChar = 0;
	int iLastChar = strlen(pszString);
	int iLength = strlen(pszString) - 1;	

	while((iFirstChar < iLength) && IS_WHITE_SPACE(pszString[iFirstChar])) iFirstChar++;
	while((iLastChar > iFirstChar) && IS_WHITE_SPACE(pszString[iLastChar])) iLastChar--;
	memmove((void*)pszString, (void*)(pszString + iFirstChar), iLastChar - iFirstChar);
	pszString[iLastChar-iFirstChar] = 0;
	return pszString;
}

CTag::CTag()
{
}

CTag::~CTag()
{
	Clear();
}

void CTag::Clear()
{
	name.clear();
    attributes.clear();

    std::list<CTag*>::iterator iter = ChildList.begin();
    for( ; iter != ChildList.end(); ++iter ) {
        delete *iter;
    }

    ChildList.clear();

	text = "";
}

CTagParser::CTagParser()
{
	_pPotentialTag = NULL;
	_pCurrentTag = NULL;
	iDTD_TableSize = 0;
	pDTD_Table = NULL;
}

CTagParser::~CTagParser()
{
	Cleanup();

	if(pDTD_Table)
		delete[] pDTD_Table;

	iDTD_TableSize = 0;
}

bool CTagParser::SetTagRelationships(DTD_TABLE_ENTRY* pDocTypeDefTable, unsigned int iNumEntries)
{
	if(pDTD_Table)
		delete[] pDTD_Table;

	iDTD_TableSize = 0;

	pDTD_Table = new DTD_TABLE_ENTRY[iNumEntries];
	if(pDTD_Table)
	{
		memcpy(pDTD_Table, pDocTypeDefTable, iNumEntries * sizeof(DTD_TABLE_ENTRY));
		iDTD_TableSize = iNumEntries;
	}

	return pDTD_Table != NULL;
}

bool CTagParser::CanTagHaveChild(char* pszParent, char* pszPotentialChild)
{
	//If no table specified,
	//no tag can have children. This results in a tag list
	//rather than a tag tree.
	if(!iDTD_TableSize)
		return false;

	unsigned int i;
	bool bFound = false;

	for(i = 0; (i < iDTD_TableSize) && !bFound; i++)
	{
		if(!strcmp(pszParent, pDTD_Table[i].szParent) && 
			!strcmp(pszPotentialChild, pDTD_Table[i].szChild))
			bFound = true;
	}

	return bFound;
}

bool CTagParser::TagStoresText(const char* pszTagName)
{
	if(!iDTD_TableSize)
		return false;

	unsigned int i;
	bool bFound = false;

	for(i = 0; (i < iDTD_TableSize) && !bFound; i++)
	{
		if(!strcmp(pszTagName, pDTD_Table[i].szParent))
		{
			return pDTD_Table[i].uFlags & DTD_FLAG_CONTAINS_TEXT;
		}
	}

	return false;
}

bool CTagParser::IsTagContainer(const char* pszTagName)
{
	if(!iDTD_TableSize)
		return false;

	unsigned int i;
	bool bFound = false;

	for(i = 0; (i < iDTD_TableSize) && !bFound; i++)
	{
		if(!_stricmp(pszTagName, pDTD_Table[i].szParent))
			bFound = true;
	}

	return bFound;
}

bool CTagParser::ParseFile(char *szFileName)
{
	FILE* pFile = fopen(szFileName, "r");

	if(pFile == NULL)
		return false;

	Prepare();
	char Buffer[1024];

	size_t BytesRead = 0;

	do
	{
		BytesRead = fread((void*)Buffer, 1, 1024, pFile);
		
		for(size_t i = 0; i < BytesRead; i++)
			ParseCharacter(Buffer[i]);

	} while (BytesRead >= 1024);


	fclose(pFile);

	Cleanup();

	return true;
}

bool 
CTagParser::ParseText( const _TCHAR* text )
{
    Prepare();
    unsigned len = strlen( text );
    for(size_t i = 0; i < len; i++)
        ParseCharacter(text[i]);

    Cleanup();
    return true;
}


void CTagParser::Prepare()
{
	Cleanup();

	_RootTag.Clear();
	_pCurrentTag = &_RootTag;
	_bStoresText = false;
}

void CTagParser::Cleanup()
{
	if(_pPotentialTag)
	{
		ProcessTag();
		delete _pPotentialTag;
		_pPotentialTag = NULL;
	}
}


bool CTagParser::ParseCharacter(char cChar)
{
	bool bRet = true;

	switch(cChar)
	{
		case '<':
			BeginNewTag();
			break;

		case '>':
			EndBeginTag();
			break;


		default:

			//Call appropriate function based on state.
			switch(ParseState)
			{
				case PARSING_TAG_NAME:
					ParseTagName(cChar);
					break;
				
				case PARSING_TAG_ATTRIBUTE_NAME:
					ParseTagAttributeName(cChar);
					break;

				case PARSING_TAG_ATTRIBUTE_NAME_OR_VALUE:
					ParseTagAttributeNameOrValue(cChar);
					break;

				case PARSING_TAG_ATTRIBUTE_VALUE:
					ParseTagAttributeValue(cChar);
					break;

				case PARSING_COMMENT:
				case PARSING_TEXT:
					ParseText(cChar);
					break;
			}
	}

//	if(cChar == 13)
//		lLineCount++;
	
	return bRet;
}

void CTagParser::BeginNewTag()
{
	//Begins a new tag, aborting an old one if necessary.

	if(_pPotentialTag)
		delete _pPotentialTag;

	_pPotentialTag = new CTag;

	BeginParseState(PARSING_TAG_NAME);
}

void CTagParser::EndBeginTag()
{
	switch(ParseState)
	{
		case PARSING_TAG_NAME:
			ProcessTag();
			break;

		case PARSING_TAG_ATTRIBUTE_NAME:
		case PARSING_TAG_ATTRIBUTE_NAME_OR_VALUE:
		case PARSING_TAG_ATTRIBUTE_VALUE:

            _pPotentialTag->attributes[szTagAttributeName] =
                szTagAttributeValue;
			szTagAttributeName[0] = 0;
			szTagAttributeValue[0] = 0;
			
			ProcessTag();
			break;
	
	
		case PARSING_TEXT:

			//Not parsing a tag. just add it to document as text.
//			ParseText('>');
			break;
	}

	BeginParseState(PARSING_TEXT);

	return;
}

void CTagParser::ParseTagName(char cChar)
{
	int nCurTagNameLength = _pPotentialTag->name.length();
	if(nCurTagNameLength > MAX_TAG_NAME_LENGTH)
	{
		//abort this tag. Tag name too long. 
        _pPotentialTag->name.clear();
		BeginParseState(PARSING_TEXT);
		
		return;
	}

	switch(cChar)
	{
		CASE_WHITESPACE:
			
			if(bFoundPrintableChar)
			{
                LowerString( _pPotentialTag->name );
				TrimString(_pPotentialTag->name);
				//Whitespace char. Go to next state.
				if( _pPotentialTag->name == "!--" )
					BeginParseState(PARSING_COMMENT);
				else
					BeginParseState(PARSING_TAG_ATTRIBUTE_NAME);
			}
			break;
			
		default:	
            _pPotentialTag->name += cChar;
			bFoundPrintableChar = true;
	}

	return;
}

void CTagParser::ParseTagAttributeName(char cChar)
{
	int nTagAttributeNameLength = strlen(szTagAttributeName);
	if(nTagAttributeNameLength > MAX_TAG_ATTRIBUTE_NAME_LENGTH)
	{
		//too long. abort this attribute.
		szTagAttributeName[0] = 0;
		return;
	}

	switch(cChar)
	{
		case '=':

			if(bFoundPrintableChar)
			{
				//Whitespace char. Go to next state.
				TrimString(szTagAttributeName);
				_strlwr(szTagAttributeName);
				BeginParseState(PARSING_TAG_ATTRIBUTE_VALUE);
			}
			else
			{
				//ERROR! No tag name!
				//Abort the tag.
				BeginParseState(PARSING_TEXT);
			}
			break;
			
		default:	
			szTagAttributeName[nTagAttributeNameLength] = cChar;
			szTagAttributeName[nTagAttributeNameLength+1] = 0;
			bFoundPrintableChar = true;
	}
}

void CTagParser::ParseTagAttributeValue(char cChar)
{
	//ASSERT(ParseState = PARSING_TAG_ATTRIBUTE_VALUE);	
	int nTagAttributeValueLength = strlen(szTagAttributeValue);

	if(nTagAttributeValueLength > MAX_TAG_ATTRIBUTE_VALUE_LENGTH)
	{
		//too long. abort entire attribute.
		szTagAttributeValue[0] = 0;
		szTagAttributeName[0] = 0;
		BeginParseState(PARSING_TAG_ATTRIBUTE_NAME);

		return;
	}

	switch(cChar)
	{
		CASE_WHITESPACE:
			
			if(!bTagAttributeValueInQuotes && bFoundPrintableChar)
			{
				//Whitespace char. Go to next state.
                _pPotentialTag->attributes[szTagAttributeName] =
                    szTagAttributeValue;
				BeginParseState(PARSING_TAG_ATTRIBUTE_NAME);
			} else if (bTagAttributeValueInQuotes && bFoundPrintableChar) {
				szTagAttributeValue[nTagAttributeValueLength] = cChar;
				szTagAttributeValue[nTagAttributeValueLength+1] = 0;
			}
			break;

		case '\'':
		case '\"':
			if(!bTagAttributeValueInQuotes)
			{
				bTagAttributeValueInQuotes = true;
				bFoundPrintableChar = true;
				cQuote = cChar;
			}
			else
			{
				if(cQuote == cChar)
				{
					bTagAttributeValueInQuotes = false;
				}
			}
			bFoundPrintableChar = true;
			break;
			
		default:	
			szTagAttributeValue[nTagAttributeValueLength] = cChar;
			szTagAttributeValue[nTagAttributeValueLength+1] = 0;
			bFoundPrintableChar = true;
	}
}

void CTagParser::ParseTagAttributeNameOrValue(char cChar)
{
//	ASSERT(ParseState = PARSING_TAG_ATTRIBUTE_NAME_OR_VALUE);

	//look for next printable character. If it is an equal sign,
	//the next token is a value. If it is anything else,
	//next token is a value.

/*	switch(cChar)
	{
		CASE_WHITESPACE:
			//do nothing.
			break;			
			
		case '=':
			bTagAttributeValueInQuotes = false;
			ParseState = PARSING_TAG_ATTRIBUTE_VALUE;
			break;

		default:	
			_pPotentialTag->TagAttributeNames.AddTail(szTagAttributeName);
			_pPotentialTag->TagAttributeValues.AddTail("");
			szTagAttributeName[0] = 0;
			szTagAttributeValue[0] = 0;
			
			BeginParseState(PARSING_TAG_ATTRIBUTE_NAME);
			ParseTagAttributeName(cChar);
	}*/
}

void CTagParser::ParseText(char cChar)
{
	if(_bStoresText)
	{
		assert(_pCurrentTag);
		_pCurrentTag->text += cChar;
	}
}

void CTagParser::ProcessTag()
{
	LowerString(_pPotentialTag->name);
	if(_pPotentialTag->name[0] == '/')
		ProcessEndTag();
	else
		ProcessBeginTag();
}

void CTagParser::BeginParseState(PARSESTATE NewParseState)
{
	bFoundPrintableChar = false;
	bTagAttributeValueInQuotes = false;
	
	ParseState = NewParseState;
	bFoundPrintableChar = false;

	switch(ParseState)
	{
		case PARSING_TAG_NAME:
			_pPotentialTag->name.clear();
			break;
		
		case PARSING_TAG_ATTRIBUTE_NAME:
			szTagAttributeName[0] = 0;
			break;

		case PARSING_TAG_ATTRIBUTE_NAME_OR_VALUE:
			break;

		case PARSING_TAG_ATTRIBUTE_VALUE:
			szTagAttributeValue[0] = 0;
			break;

		case PARSING_COMMENT:
		case PARSING_TEXT:
			break;
	}
}

/****************************************************************************

   void CTagParser::ProcessStartTag()

	Called after a valid tag and all attribute names and values have been
	read. Takes care of:
	
		1. Closing open block elements, if necessary.
		2. Opening a new block element, if necessary.
		3. Adding inline elements, if necessary.

	- Changes to appropriate read state.

 ***************************************************************************/
void CTagParser::ProcessBeginTag()
{
	bool bTagIsContainer = false;

	bTagIsContainer = IsTagContainer(_pPotentialTag->name.c_str());

	_pCurrentTag->ChildList.push_back(_pPotentialTag);
	
	if(bTagIsContainer)
	{
		_TagStack.push_back(_pCurrentTag);
		_pCurrentTag = _pPotentialTag;
	}

	_bStoresText = TagStoresText(_pCurrentTag->name.c_str());

	_pPotentialTag = new CTag;

	/*TAGNAME TagName =	GetTagFromString(strTagName);

	if(TagName == eTag_undefined)
	{
		//unknown tag encountered.
		ReportError(CString("Unknown Tag \"") + strTagName + '\"');
		return;
	}

	TagStack.AddTail(TagName);
	CurrentTagName = TagName;
	
	switch(TagName)
	{
		case eTag_html:
			break;

		case eTag_body:
			HandleBeginTag_body();
			break;

		case eTag_p:
			HandleBeginTag_p();
			break;

		default:
			ASSERT(FALSE);
	}*/

}

void CTagParser::ProcessEndTag()
{
	if ( _pCurrentTag->name == _pPotentialTag->name.substr( 1 ) )
	{
		if(!_TagStack.empty())
		{
			_pCurrentTag = _TagStack.back();
            _TagStack.pop_back();
		}
	}
	
	delete _pPotentialTag;
	_pPotentialTag = new CTag;

	/*TAGNAME TagName = GetTagFromString(strTagName.Right(strTagName.GetLength() - 1));

	if(TagName == eTag_undefined)
	{
		//unknown tag encountered.
		ReportError(CString("Unknown Tag \"") + strTagName + '\"');
		return;
	}*/
}
