//
// NAME: CIDLib_StreamParser.Cpp
//
// DESCRIPTION:
//
//  This module implements the TStreamParser class. The stream parser is a
//  means of doing complex syntax based parsing of any input source that
//  has a stream wrapper around it.
//
//
// AUTHOR: Dean Roddey
//
// CREATE DATE: 06/20/93
//
// COPYRIGHT: 1992..1997, 'CIDCorp
//
// CAVEATS/GOTCHAS:
//


// -----------------------------------------------------------------------------
//  Facility specific includes
// -----------------------------------------------------------------------------
#include    "CIDLib_.Hpp"


// -----------------------------------------------------------------------------
//  Do our standard RTTI macros
// -----------------------------------------------------------------------------
RTTIData(TStreamParser,TObject)


// -----------------------------------------------------------------------------
//  Local constant data
//
//  __c4MaxTokens
//      This is the maximum number of predefined tokens.
//
//  __c4MaxUnget
//      This is the maximum size of the character unget stack.
// -----------------------------------------------------------------------------
const tCIDLib::TCard4       __c4MaxTokens  = 64;
const tCIDLib::TCard4       __c4MaxUnget   = 4096;


// ----------------------------------------------------------------------------
//   CLASS: TStreamParser
//  PREFIX: prsr
// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------
//  TStreamParser: Constructors and Destructors
// ----------------------------------------------------------------------------

TStreamParser::TStreamParser(           TTextStream* const  pstrmToReadFrom
                                , const tCIDLib::EAdoptOpts eAdopt) :

    __c4CurLine(1)
    , __c4TokenIndex(0)
    , __eAdopt(eAdopt)
    , __i4StackIndex(-1)
    , __pac4Tokens(0)
    , __pchCharStack(0)
    , __pstrmSrc(pstrmToReadFrom)
{
    __pac4Tokens = new tCIDLib::TCard4[__c4MaxTokens];
    __pchCharStack = new tCIDLib::Tch[__c4MaxUnget];
}

TStreamParser::~TStreamParser()
{
    delete __pac4Tokens;
    delete __pchCharStack;

    // Delete the stream if we adopted it
    if (__eAdopt)
        delete __pstrmSrc;
}


// ----------------------------------------------------------------------------
//  TStreamParser: Public, non-virtual methods
// ----------------------------------------------------------------------------

tCIDLib::TVoid TStreamParser::AddSyntaxToken(const TString& strBuf)
{
    if (__c4TokenIndex >= __c4MaxTokens)
    {
        facCIDLib.LogErr
        (
            __FILE__
            , __LINE__
            , kCIDErrs::errcPrsr_MaxTokens
            , tCIDLib::ESev_ProcessFatal
            , tCIDLib::EClass_BadParms
            , TCardinal(__c4MaxTokens)
        );
    }

    tCIDLib::TCard4 c4Len = strBuf.c4Length();

    #if CID_DEBUG_ON
    if (!c4Len || (c4Len > 2))
    {
        facCIDLib.LogErr
        (
            __FILE__
            , __LINE__
            , kCIDErrs::errcPrsr_TokenLength
            , tCIDLib::ESev_ProcessFatal
            , tCIDLib::EClass_BadParms
            , strBuf
        );
    }
    #endif

    // Add a new node to the list
    __pac4Tokens[__c4TokenIndex] = 0;
    if (c4Len == 2)
    {
        __pac4Tokens[__c4TokenIndex] = strBuf[1];
        __pac4Tokens[__c4TokenIndex] <<= 16;
    }
    __pac4Tokens[__c4TokenIndex] += strBuf[0];

    // Bump up the token index
    __c4TokenIndex++;
}


tCIDLib::TVoid TStreamParser::FlushLine()
{
    tCIDLib::TBoolean   bWaiting = kCIDLib::True;
    tCIDLib::Tch        chNext = __chGetNext();

    while (chNext)
    {
        if (bWaiting)
       { 
            if ((chNext == kCIDLib::chLF) || (chNext == kCIDLib::chCR))
                bWaiting = kCIDLib::False;
        }
         else
        {
            if ((chNext != kCIDLib::chLF) && (chNext != kCIDLib::chCR))
            {
                __UnGet(chNext);
                break;
            }
        }
        chNext = __chGetNext();
    }
}


tCIDLib::TVoid
TStreamParser::GetNextToken(const   TString&    strSeparators
                            ,       TString&    strTarget
                            , const TString&    strNoSyntax)
{
    // Make sure to start off the target empty
    strTarget.Clear();

    //
    //  First we need to read until we get a non-separator character. Then
    //  we unget that character and fall into the next loop.
    //
    tCIDLib::TCard4 c4Dummy;
    tCIDLib::TCard4 c4Ind       = 0;
    tCIDLib::Tch    chNext      = __chGetNext();
    tCIDLib::TCard4 c4TargetSz  = strTarget.c4BufChars();

    while (chNext)
    {
        if (!strSeparators.bFirstOccurrence(chNext, c4Dummy))
        {
            __UnGet(chNext);
            break;
        }
        chNext = __chGetNext();
    }

    if (!chNext)
        return;

    //
    //  Ok, start copying characters to the target until we come to a
    //  separator or syntax character. __chGetNext() will return 0 if it
    //  cannot get another character.
    //
    chNext = __chGetNext();
    while (chNext)
    {
        eTokenMatches eMatch = __eTokenMatch(chNext, 0, strNoSyntax);

        if (eMatch == eMatch_Full)
        {
            //
            //  If current index is 0, then this is the token itself.
            //  Otherwise, it just means we unget the character for next
            //  time and break out with current token text.
            //
            if (c4Ind == 0)
            {
                strTarget.Append(chNext);
                c4Ind++;
                break;
            }
             else
            {
                __UnGet(chNext);
                break;
            }
        }
         else if (eMatch == eMatch_First)
        {
            tCIDLib::Tch chSecond = __chGetNext();

            if (__eTokenMatch(chNext, chSecond, strNoSyntax) == eMatch_Full)
            {
                // It is a 2 char token, same rules as above
                if (c4Ind == 0)
                {
                    strTarget.Append(chNext);
                    c4Ind++;
                    strTarget.Append(chSecond);
                    c4Ind++;
                }
                 else
                {
                    // Unget the first and second char
                    __UnGet(chSecond);
                    __UnGet(chNext);
                }
                break;
            }
             else
            {
                // Unget the second char
                __UnGet(chSecond);
            }
        }

        if (!strSeparators.bFirstOccurrence(chNext, c4Dummy))
        {
            // Not in the list so copy over
            strTarget.Append(chNext);
            c4Ind++;

            if (c4Ind >= c4TargetSz)
            {
                facCIDLib.LogErr
                (
                    __FILE__
                    , __LINE__
                    , kCIDErrs::errcPrsr_Overflow
                    , tCIDLib::ESev_APIFailed
                    , tCIDLib::EClass_CantDo
                    , TCardinal(c4TargetSz)
                );
            }
        }
         else
        {
            // We hit a separator so unget and break
            __UnGet(chNext);
            break;
        }

        chNext = __chGetNext();
    }
}


tCIDLib::TVoid TStreamParser::GetLineRemainder(TString& strTarget)
{
    // Clear the string
    strTarget.Clear();

    //
    //  Now read until we hit the end of a line, appending each character
    //  that we get to the string.
    //
    while (1)
    {
        tCIDLib::Tch chNext = __chGetNext();

        if (!chNext)
            return;

        if ((chNext == kCIDLib::chLF) || (chNext == kCIDLib::chCR))
        {
            __UnGet(chNext);
            return;
        }

        strTarget.Append(chNext);
    }
}


tCIDLib::TVoid TStreamParser::UnGetToken(const TString& strTokenText)
{
    tCIDLib::TCard4 c4Len = strTokenText.c4Length();
    if (!c4Len)
        return;

    const tCIDLib::Tch* pszText = strTokenText.pszData();
    tCIDLib::TCard4     c4Index = c4Len-1;
    while (1)
    {
        __UnGet(pszText[c4Index]);
        if (!c4Index)
            break;
        c4Index--;
    }
}

tCIDLib::TVoid
TStreamParser::UnGetToken(const tCIDLib::Tch* const pszTokenText)
{
    tCIDLib::TCard4 c4Len = TRawStr::c4StrLen(pszTokenText);
    if (!c4Len)
        return;

    tCIDLib::TCard4 c4Index = c4Len-1;
    while (1)
    {
        __UnGet(pszTokenText[c4Index]);
        if (!c4Index)
            break;
        c4Index--;
    }
}


// ----------------------------------------------------------------------------
//  TStreamParser: Private, non-virtual methods
// ----------------------------------------------------------------------------

//
// FUNCTION/METHOD NAME: __chGetNext
//
// DESCRIPTION:
//
//  This method will return the next character in the buffer. If we have
//  read all of the characters in this buffer, then a new buffer's worth
//  will be read in. When the end of the file is hit, then a 0 is returned.
// ---------------------------------------
//   INPUT: None
//
//  OUTPUT: None
//
//  RETURN: The next char or 0 if no more available.
//
tCIDLib::Tch TStreamParser::__chGetNext()
{
    tCIDLib::Tch     chRet;

    // If the unget stack is not empty, then return the stack top
    if (__i4StackIndex != -1)
    {
        chRet = __pchCharStack[__i4StackIndex];
        __i4StackIndex--;
        return chRet;
    }

    //
    //  Else, get the next char from the stream. If we gt an end of stream
    //  exception, then we return a nul character. Any other errors we let
    //  pass through.
    //
    try
    {
        chRet = __pstrmSrc->chGet();
    }

    catch(const TError& errToCatch)
    {
        if (errToCatch.bCheckError(facCIDLib, kCIDErrs::errcStrm_EndOfStream))
            return kCIDLib::chNull;

        throw;
    }

    // If it is a new line, then bump up line count
    if (chRet == kCIDLib::chLF)
        __c4CurLine++;

    // Return the next character and bump up the index
    return chRet;
}


//
// FUNCTION/METHOD NAME: __eTokenMatch
//
// DESCRIPTION:
//
//  This method will look for the passed token text in the token list. A
//  token is either 1 or 2 characters. If the chSecond parm is 0, then a
//  search is made for a single character token that matches the first char.
// ---------------------------------------
//   INPUT: slstTokens is the list to search
//          chFirst is the first char, and is required
//          chSecond is the second char, and can be 0.
//          strNoSyntax is an optional list of chars to force out of the
//              normal list of syntax chars. Its can be NUL_TString.
//
//  OUTPUT: None
//
//  RETURN: eMatch_Full if there was a full match, eMatch_First if there
//              was no single character token that matched chFirst, but it
//              did match the first character of at least one token, and
//              eMatch_None if no match at all.
//
TStreamParser::eTokenMatches
TStreamParser::__eTokenMatch(   const   tCIDLib::Tch    chFirst
                                , const tCIDLib::Tch    chSecond
                                , const TString&        strNoSyntax)
{
    if (&strNoSyntax)
    {
        tCIDLib::TCard4     c4Dummy;

        if (strNoSyntax.bFirstOccurrence(chFirst, c4Dummy))
            return eMatch_None;

        if (chSecond)
        {
            if (strNoSyntax.bFirstOccurrence(chSecond, c4Dummy))
                return eMatch_None;
        }
    }

    tCIDLib::TBoolean   bPartial = kCIDLib::False;
    tCIDLib::TCard4     c4Test = tCIDLib::TCard4
    (
        (tCIDLib::TCard4(chSecond) << 16) | chFirst
    );

    for (tCIDLib::TCard4 c4Ind = 0; c4Ind < __c4TokenIndex; c4Ind++)
    {
        if (c4Test == __pac4Tokens[c4Ind])
            return eMatch_Full;

        if (chFirst == tCIDLib::Tch(__pac4Tokens[c4Ind] & 0xFFFF))
            bPartial = kCIDLib::True;
    }
    if (bPartial)
        return eMatch_First;

    return eMatch_None;
}


//
// FUNCTION/METHOD NAME: __UnGet
//
// DESCRIPTION:
//
//  This method will unget the last character read. It just sets the unget
//  flag.
// ---------------------------------------
//   INPUT: chPush is the char to push back
//
//  OUTPUT: None
//
//  RETURN: None
//
tCIDLib::TVoid TStreamParser::__UnGet(const tCIDLib::Tch chPush)
{
    if ((__i4StackIndex >= __c4MaxUnget) && (__i4StackIndex != -1))
    {
        // Can't unget but once
        facCIDLib.LogErr
        (
            __FILE__
            , __LINE__
            , kCIDErrs::errcPrsr_UngetFull
            , tCIDLib::ESev_ProcessFatal
            , tCIDLib::EClass_Internal
        );
    }

    __i4StackIndex++;
    __pchCharStack[__i4StackIndex] = chPush;
}
