/*
 * These functions compile a regular expression into an NFA and recognize
 * a pattern.
 *
 * Regular expressions are often used to describe a pattern that might
 * match several different or similar words.  An example of a regular
 * expression subset is the wild card characters '*' and '?' used to list
 * files in a directory.
 *
 * Might as well read the books and papers written by those who first wrote
 * the various [a-z]?greps.  Ken Thompson was the author of grep.  In one
 * weekend, Al Aho wrote egrep and fgrep.  Incidentally, when Dr. Aho was
 * the guest speaker at the Distinguished Lecture Series at Georgia Tech,
 * he personally autographed my copy of his book  _Compilers:  Principles,
 * Techniques, and Tools_, aka the "Dragon Book."
 *
 * See:
 *
 *   Ken Thompson, "Regular Expression Search Algorithm."  _Communications
 *      of the ACM_, 11 (No. 6): 419-422, 1968.
 *
 *   Alfred V. Aho, Ravi Sethi, and Jeffrey D. Ullman, _Compilers: Principles,
 *      Techniques, and Tools_, Addison-Wesley, Reading, Mass., 1986,
 *      Chapter 3, "Lexical Analysis", pp 83-158.  ISBN 0-201-10088-6.
 *
 *   Robert Sedgewick, _Algorithms in C_, Addison-Wesley, Reading, Mass.,
 *      1990, Chapter 20, "Pattern Matching", pp. 293-303, and Chapter 21,
 *      "Parsing", pp. 305-317.  ISBN 0-201-51425-7.
 *
 *   Andrew Hume, "A Tale of Two Greps."  _Software--Practice and Experience_,
 *      18 (No. 11): 1063-1072, 1988.
 *
 *
 *                         Regular Expressions in TDE
 *
 * The core of the regular expression search algorithm in TDE is based on
 * the nondeterministic finite-state machine in Dr. Segdewick's book.
 * Additional parsing, operator precedence, and nfa construction and
 * simulation info is from Dr. Aho's book.  The nfa node naming convention
 * and additional nfa construction guidelines are from Ken Thompson's paper.
 * I'm much too stupid to compile a regular expression into assembly language
 * as suggested in Ken Thompson's paper, but I did get the idea to do the
 * next best thing and added C library functions as NNODE terminal types.
 *
 * The local global NFA is builded with two arrays and a deque.  The
 * first array, next1, in the NFA is used to hold the path for lambda
 * transitions.  The second array, next2, is used to hold alternate paths.
 * Next1 can hold all types of nodes, but it gets priority for lambda
 * transitions.  The deque is a circular array where future states are queued
 * in the head and present states are pushed in the tail.
 *
 * Searching for regular expressions in TDE is not very fast.  The worst
 * case search, .*x, where x is any word or letter, is probably the slowest
 * of any implementation with a regular expression search function.  However,
 * TDE can handle a large regular expression subset.
 *
 * In version 3.1, I added BOW and EOW (beginning of word and end of word.)
 *
 * In version 3.2, the macro letters were moved to letters.h per Byrial Jensen.
 * We also use the bj_isxxx( ) functions to test whether a letter falls into
 * a predefined macro class.
 *
 * New editor name:  TDE, the Thomson-Davis Editor.
 * Author:           Frank Davis
 * Date:             June 5, 1991, version 1.0
 * Date:             July 29, 1991, version 1.1
 * Date:             October 5, 1991, version 1.2
 * Date:             January 20, 1992, version 1.3
 * Date:             February 17, 1992, version 1.4
 * Date:             April 1, 1992, version 1.5
 * Date:             June 5, 1992, version 2.0
 * Date:             October 31, 1992, version 2.1
 * Date:             April 1, 1993, version 2.2
 * Date:             June 5, 1993, version 3.0
 * Date:             August 29, 1993, version 3.1
 * Date:             November 13, 1993, version 3.2
 * Date:             June 5, 1994, version 4.0
 *
 * This code is released into the public domain, Frank Davis.
 * You may use and distribute it freely.
 */

#include "tdestr.h"
#include "common.h"
#include "tdefunc.h"
#include "define.h"

#ifndef min
 #define min(a,b)       (((a) < (b)) ? (a) : (b))
#endif

/*
 * types of nodes in the NFA.
 *
 * let's use the node naming convention in Ken Thompson's reg ex paper.
 */
#define CNODE           0
#define NNODE           1

#define SCAN            -1

/*
 * types of terminals in NNODEs.
 *
 * starting with simple ASCII terminals, it's easy to build in other types of
 *  terminals, e.g. wild chars, BOL, EOL, and character classes.  actually,
 *  we can easily build ANSI C ctype library function NNODEs.
 */
#define STRAIGHT_ASCII  0
#define IGNORE_ASCII    1
#define WILD            2
#define BOL             3
#define EOL             4
#define CLASS           5
#define NOTCLASS        6
#define WHITESPACE      7
#define ALPHA           8
#define UPPER           9
#define LOWER           10
#define ALPHANUM        11
#define DECIMAL         12
#define HEX             13
#define BOW             14
#define EOW             15


/*
 * types of terminals in CNODEs.
 */
#define START           0
#define ACCEPT          1
#define OR_NODE         2
#define JUXTA           3
#define CLOSURE         4
#define ZERO_OR_ONE     5


int  lookahead;
int  regx_rc;
int  reg_len;
int  parser_state;
char class_bits[32];
int  bit[8] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 };
int  c1;
int  c2;
int  ttype;
int  regx_error_line;

NFA_TYPE  *nfa_pointer;

int  nfa_status;
int  search_len;
int  search_col;
text_ptr search_string;

int  queued_states[REGX_SIZE+2];
int  deque[REGX_SIZE*2];
int  dq_head;
int  dq_tail;
int  stacked_node_count;
int  reset_count;


/*
 * Name:    find_regx
 * Purpose: To set up and find a regular expression.
 * Date:    June 5, 1993
 * Passed:  window:  pointer to current window
 * Notes:   Keep the regular expression until changed.
 */
int  find_regx( TDE_WIN *window )
{
int  direction;
int  new_string;
long found_line;
long bin_offset;
line_list_ptr ll;
register TDE_WIN *win;  /* put window pointer in a register */
int  rc;
int  old_rcol;
int  rcol;
char answer[MAX_COLS+2];
char temp[MAX_COLS+2];

   switch (g_status.command) {
      case FindRegX :
         new_string = TRUE;
         direction  = FORWARD;
         break;
      case RepeatFindRegX :
         new_string =  regx.search_defined != OK ? TRUE : FALSE;
         direction  = FORWARD;
         break;
      case RepeatFindRegXBackward :
         new_string =  regx.search_defined != OK ? TRUE : FALSE;
         direction  = BACKWARD;
         break;
      default :
         new_string = 0;
         direction  = 0;
         assert( FALSE );
         break;
   }

   win = window;
   entab_linebuff( );
   if (un_copy_line( win->ll, win, TRUE ) == ERROR)
      return( ERROR );

   regx_error_line = win->bottom_line;

   /*
    * get search text, using previous as default
    */
   rc = OK;
   if (new_string == TRUE) {
      *answer = '\0';
      if (regx.search_defined == OK) {

         assert( strlen( (char *)regx.pattern ) < (size_t)g_display.ncols );

         strcpy( answer, (char *)regx.pattern );
      }

      /*
       * string to find:
       */
      if (get_name( reg1, win->bottom_line, answer,
                 g_display.message_color ) != OK  ||  *answer == '\0')
         return( ERROR );
      regx.search_defined = OK;

      assert( strlen( answer ) < (size_t)g_display.ncols );

      strcpy( (char *)regx.pattern, answer );
      rc = build_nfa( );
      if (rc == ERROR)
         regx.search_defined = ERROR;
   }

   if (regx.search_defined == OK) {
      old_rcol = win->rcol;
      if (mode.inflate_tabs)
         win->rcol = entab_adjust_rcol( win->ll->line, win->ll->len, win->rcol);
      update_line( win );
      show_search_message( SEARCHING, g_display.diag_color );
      bin_offset = win->bin_offset;
      if (direction == FORWARD) {
         if ((ll = forward_regx_search( win, &found_line, &rcol )) != NULL) {
            if (g_status.wrapped && g_status.macro_executing)
               rc = ask_wrap_replace( win, &new_string );
            if (rc == OK)
               find_adjust( win, ll, found_line, rcol );
            else
               win->bin_offset = bin_offset;
         }
      } else {
         if ((ll = backward_regx_search( win, &found_line, &rcol )) != NULL) {
            if (g_status.wrapped && g_status.macro_executing)
               rc = ask_wrap_replace( win, &new_string );
            if (rc == OK)
               find_adjust( win, ll, found_line, rcol );
            else
               win->bin_offset = bin_offset;
         }
      }
      if (g_status.wrapped)
         show_search_message( WRAPPED, g_display.diag_color );
      else {
         if (nfa_status == OK)
            show_search_message( CLR_SEARCH, g_display.mode_color );
         else
            g_status.wrapped = TRUE;
      }
      if (ll == NULL) {
         /*
          * string not found
          */
         if (mode.inflate_tabs)
            win->rcol = old_rcol;
         combine_strings( temp, find5a, (char *)regx.pattern, find5b );
         error( WARNING, win->bottom_line, temp );
         rc = ERROR;
      }
      show_curl_line( win );
      make_ruler( win );
      show_ruler( win );
   } else {
      /*
       * find pattern not defined
       */
      error( WARNING, win->bottom_line, find6 );
      rc = ERROR;
   }
   return( rc );
}


/*
 * Name:    forward_regx_search
 * Purpose: search forward for regular expression
 * Date:    June 5, 1993
 * Passed:  window:  pointer to current window
 *          rline:   pointer to real line counter
 *          rcol:    pointer to real column variable
 * Returns: position in file if found or NULL if not found
 * Notes:   Start searching from cursor position to end of file.  If we hit
 *           end of file without a match, start searching from the beginning
 *           of file to cursor position.  (do wrapped searches)
 */
line_list_ptr forward_regx_search( TDE_WIN *window, long *rline, int *rcol )
{
register int len;
int  i;
int  end;
register TDE_WIN *win;  /* put window pointer in a register */
line_list_ptr ll;

   /*
    * if cursor is beyond end of line then start at end of line
    */
   win  = window;
   *rline = win->rline;
   i = win->rcol + 1;
   ll = win->ll;
   len = ll->len;
   if (i > len  &&  len != EOF) {
      ll = ll->next;
      ++*rline;
      i = 0;
   }
   if (i < 0)
      i = 0;

   *rcol = i;
   ll = regx_search_forward( ll, rline, rcol );

   if (ll == NULL) {

      end = 0;
      if (win->ll->next != NULL) {
         end = win->ll->next->len;
         win->ll->next->len = EOF;
      }

      /*
       * haven't found pattern yet - now search from beginning of file
       */
      g_status.wrapped = TRUE;

      *rcol = 0;
      *rline = 1L;
      ll = regx_search_forward( win->file_info->line_list, rline, rcol );

      if (ll == win->ll  &&  *rcol >= win->rcol)
         ll = NULL;

      if (win->ll->next != NULL)
         win->ll->next->len = end;
   }
   flush_keyboard( );

   if (ll != NULL)
      bin_offset_adjust( win, *rline );
   return( ll );
}


/*
 * Name:    regx_search_forward
 * Purpose: search forward for pattern using nfa
 * Date:    June 5, 1993
 * Passed:  ll:     pointer to current node in linked list
 *          rline:  pointer to real line counter
 *          col:    column in ll->line to begin search
 * Returns: position in file if found or NULL if not found
 * Notes:   run the nfa machine on each character in each line
 */
line_list_ptr regx_search_forward( line_list_ptr ll, long *rline, int  *col )
{
   if (ll->len == EOF)
      return( NULL );
   else {
      switch (g_status.command) {
         case DefineRegXGrep  :
         case RepeatGrep :
            nfa_pointer = &sas_nfa;
            stacked_node_count = sas_regx.node_count;
            break;
         case FindRegX  :
         case RepeatFindRegX :
            nfa_pointer = &nfa;
            stacked_node_count = regx.node_count;
            break;
         default :
            assert( FALSE );
            break;
      }
      nfa_status = OK;
      search_string = ll->line;
      search_len = ll->len;
      search_col = *col;
      reset_count = stacked_node_count * sizeof(int);
      for (; !g_status.control_break;) {
         for (; search_col <= search_len; search_col++) {
            if (nfa_match( ) != ERROR) {
               *col = search_col;
               return( ll );
            }
         }
         ++*rline;
         ll = ll->next;
         search_string = ll->line;
         if (ll->len == EOF)
            return( NULL );
         search_len = ll->len;
         search_col = 0;
      }
      return( NULL );
   }
}


/*
 * Name:    backward_regx_search
 * Purpose: search backward for regular expression
 * Date:    June 5, 1993
 * Passed:  window:  pointer to current window
 *          rline:   pointer to real line counter
 *          rcol:    pointer to real column variable
 * Returns: position in file if found or NULL if not found
 * Notes:   Start searching from cursor position to end of file.  If we hit
 *           end of file without a match, start searching from the beginning
 *           of file to cursor position.  (do wrapped searches)
 */
line_list_ptr backward_regx_search( TDE_WIN *window, long *rline, int *rcol )
{
int  i;
int  len;
int  end;
register TDE_WIN *win;  /* put window pointer in a register */
line_list_ptr ll;

   win  = window;
   *rline = win->rline;

   /*
    * see if cursor is on EOF line.  if so, move search start to previous node.
    */
   if (win->ll->len != EOF) {
      ll = win->ll;
      i  = win->rcol - 1;
      len = ll->len;
      if (i >= len)
         i = len - 1;
   } else {
      ll = win->ll->prev;
      --*rline;
      i = 0;
      if (ll != NULL)
         i = ll->len - 1;
   }
   *rcol = i;
   ll = regx_search_backward( ll, rline, rcol );

   if (ll == NULL  &&  win->rline <= win->file_info->length) {

      end = 0;
      if (win->ll->prev != NULL) {
         end = win->ll->prev->len;
         win->ll->prev->len = EOF;
      }

      /*
       * haven't found pattern yet - now search from end of file
       */
      g_status.wrapped = TRUE;
      ll = win->file_info->line_list_end;
      if (ll->prev != NULL)
         *rcol = ll->prev->len;
      else
        *rcol = 0;
      *rline = win->file_info->length;
      ll = regx_search_backward( ll->prev, rline, rcol );

      if (ll == win->ll  &&  *rcol <= win->rcol)
         ll = NULL;

      if (win->ll->prev != NULL)
         win->ll->prev->len = end;
   }
   flush_keyboard( );

   if (ll != NULL)
      bin_offset_adjust( win, *rline );
   return( ll );
}


/*
 * Name:    regx_search_backward
 * Purpose: search backward for pattern using regular expression
 * Date:    June 5, 1993
 * Passed:  ll:     pointer to current node in linked list
 *          rline:  pointer to real line counter
 *          col:    column in ll->line to begin search
 * Returns: position in file if found or NULL if not found
 * Notes:   run the nfa machine on each character in each line
 */
line_list_ptr regx_search_backward( line_list_ptr ll, long *rline, int *col )
{
   if (ll == NULL)
      return( NULL );
   if (ll->len == EOF)
      return( NULL );
   else {
      nfa_pointer = &nfa;
      stacked_node_count = regx.node_count;

      search_string = ll->line;
      search_len = ll->len;
      search_col = *col;
      reset_count = stacked_node_count * sizeof(int);
      while (!g_status.control_break) {
         for (; search_col >= 0; search_col--) {
            if (nfa_match( ) != ERROR) {
               *col = search_col;
               return( ll );
            }
         }
         --*rline;
         ll = ll->prev;
         if (ll == NULL)
            return( NULL );
         if (ll->len == EOF)
            return( NULL );
         search_string = ll->line;
         search_col = search_len = ll->len;
      }
      return( NULL );
   }
}


/******************************  NFA Recognizer  *********************/


/*
 * Name:    nfa_match
 * Purpose: try to recognize a pattern
 * Date:    June 5, 1993
 * Passed:  none, but modifies local global nfa recognizer.
 * Returns: length of recognized pattern if found or ERROR if not found.
 */
int  nfa_match( void )
{
register int c;
int  state;
int  j;
int  n1;
int  rc;

   j = search_col;
   c  =  j < search_len  ?  search_string[j]  :  EOF;
   state = nfa_pointer->next1[0];
   dq_head = 0;
   dq_tail = 0;
   memset( queued_states, 0, reset_count );
   put_dq( SCAN );

   while (state) {
      if (state == SCAN) {
         memset( queued_states, 0, reset_count );
         j++;
         put_dq( SCAN );
         c  =  j < search_len  ?  search_string[j]  :  EOF;
      } else if (nfa_pointer->node_type[state] == NNODE) {
         n1 = nfa_pointer->next1[state];
         rc = OK;
         switch (nfa_pointer->term_type[state]) {
            case STRAIGHT_ASCII :
               if (nfa_pointer->c[state] == c)
                  rc = put_dq( n1 );
               break;
            case IGNORE_ASCII   :
               if (nfa_pointer->c[state] == tolower( c ))
                  rc = put_dq( n1 );
               break;
            case WILD           :
               if (j < search_len)
                  rc = put_dq( n1 );
               break;
            case BOL            :
               if (j == 0) {
                  rc = put_dq( n1 );
                  if (deque[dq_tail] == SCAN)
                     --j;
               }
               break;
            case EOL            :
               if (j == search_len) {
                  rc = put_dq( n1 );
                  if (deque[dq_tail] == SCAN)
                     --j;
               }
               break;
            case CLASS          :
               if (c != EOF  &&  nfa_pointer->class[state][c/8] & bit[c%8])
                  rc = put_dq( n1 );
               break;
            case NOTCLASS       :
               if (c != EOF  &&  !(nfa_pointer->class[state][c/8] & bit[c%8]))
                  rc = put_dq( n1 );
               break;
            case WHITESPACE     :
               if (c == ' '  ||  c == '\t')
                  rc = put_dq( n1 );
               break;
            case ALPHA          :
               if (c != EOF  &&  bj_isalpha( c ))
                  rc = put_dq( n1 );
               break;
            case UPPER          :
               if (c != EOF  &&  bj_isupper( c ))
                  rc = put_dq( n1 );
               break;
            case LOWER          :
               if (c != EOF  &&  bj_islower( c ))
                  rc = put_dq( n1 );
               break;
            case ALPHANUM       :
               if (c != EOF  &&  bj_isalnum( c ))
                  rc = put_dq( n1 );
               break;
            case DECIMAL        :
               if (c != EOF  &&  bj_isdigit( c ))
                  rc = put_dq( n1 );
               break;
            case HEX            :
               if (c != EOF  &&  bj_isxdigit( c ))
                  rc = put_dq( n1 );
               break;
            case BOW            :
               if (c != EOF) {
                  if (!myiswhitespc( c )) {
                     if (j == 0) {
                        rc = put_dq( n1 );
                        if (deque[dq_tail] == SCAN)
                           --j;
                     } else if (j > 0) {
                        if (myiswhitespc( search_string[j-1] )) {
                           rc = put_dq( n1 );
                           if (deque[dq_tail] == SCAN)
                              --j;
                        }
                     }
                  }
               }
               break;
            case EOW            :
               if (c == EOF) {
                  if (search_len > 0) {
                     if (!myiswhitespc( search_string[search_len-1] )) {
                        rc = put_dq( n1 );
                        if (deque[dq_tail] == SCAN)
                           --j;
                     }
                  }
               } else {
                  if (myiswhitespc( c )  &&  j > 0) {
                     if (!myiswhitespc( search_string[j-1] )) {
                        rc = put_dq( n1 );
                        if (deque[dq_tail] == SCAN)
                           --j;
                     }
                  }
               }
               break;
            default             :
               assert( FALSE );
               break;
         }
         if (rc == ERROR)
            return( 0 );
      } else {
         assert( nfa_pointer->node_type[state] == CNODE );

         n1 = nfa_pointer->next1[state];
         if (push_dq( n1 ) == ERROR)
            return( 0 );

         if (n1 != nfa_pointer->next2[state])
            if (push_dq( nfa_pointer->next2[state] ) == ERROR)
               return( 0 );
      }
      if (dequeempty( )  ||  j > search_len)
         return( ERROR );
      state = pop_dq( );
   }
   return( j - search_col );
}


/*
 * Name:    put_dq
 * Purpose: queue future states
 * Date:    June 5, 1993
 * Passed:  v:  state to queue
 * Returns: none, but modifies local global deque variables.  future
 *           states are queued in the head.
 * Notes:   do not add any duplicate states to the head of the deque.
 */
int  put_dq( int v )
{
   if (queued_states[v+1] == 0) {
      ++queued_states[v+1];
      deque[dq_head] = v;
      dq_head--;
      if (dq_head < 0)
         dq_head = REGX_SIZE - 1;
      if (dq_tail == dq_head) {
         nfa_status = ERROR;
         show_search_message( NFA_GAVE_UP, g_display.diag_color );
         return( ERROR );
      }
   }
   return( OK );
}


/*
 * Name:    push_dq
 * Purpose: push state on deque
 * Date:    June 5, 1993
 * Passed:  v:  state to push
 * Returns: whether stack is OK or if stack overflowed - ERROR.  present
 *           states are stacked.
 */
int  push_dq( int v )
{
   ++dq_tail;
   if (dq_tail == dq_head) {
      nfa_status = ERROR;
      show_search_message( NFA_GAVE_UP, g_display.diag_color );
      return( ERROR );
   } else {
      if (dq_tail > REGX_SIZE - 1)
         dq_tail = 0;
      deque[dq_tail] = v;
      return( OK );
   }
}


/*
 * Name:    pop_dq
 * Purpose: pop next state on dq
 * Date:    June 5, 1993
 * Passed:  none, but looks at local global nfa recognizer
 * Returns: state on top of deque and decrements stack pointer
 */
int  pop_dq( void )
{
register int v;

   v = deque[dq_tail];
   dq_tail--;
   if (dq_tail < 0)
      dq_tail = REGX_SIZE - 1;
   return( v );
}


/*
 * Name:    dequeempty
 * Purpose: any room on dq?
 * Date:    June 5, 1993
 * Passed:  none, but looks at local global nfa recognizer
 * Returns: boolean, TRUE if empty.
 * Notes:   the deque is a circular array where future states are
 *           queued in the head and present states are pushed in the tail.
 */
int  dequeempty( void )
{
   if (dq_tail > dq_head)
      return( dq_tail - dq_head == 1 );
   else
      return( dq_tail + REGX_SIZE - dq_head == 1 );
}


/***************************  NFA Compiler  **************************/


/*
 * Name:    build_nfa
 * Purpose: initialize nfa and call the compiler
 * Date:    June 5, 1993
 * Passed:  none, but looks at local global pattern.
 * Returns: whether or not an ERROR occured
 * Notes:   sets up the initial variables for the compiler.  builds
 *          initial and acceptor states for the nfa after compiler finishes.
 */
int  build_nfa( void )
{
   regx_rc = OK;

   init_nfa( );
   lookahead = 0;
   reg_len   = strlen( (char *)regx.pattern );
   parser_state = 1;

   nfa.next1[0] = expression( );
   if (regx_rc == OK) {
      emit_cnode( 0, START, nfa.next1[0], nfa.next1[0] );
      emit_cnode( parser_state, ACCEPT, 0, 0 );
      regx.node_count = parser_state + 2;
   }

   if (g_status.command == DefineRegXGrep) {
      memcpy( &sas_nfa, &nfa, sizeof(NFA_TYPE) );
      memcpy( &sas_regx, &regx, sizeof(REGX_INFO) );
   }
   return( regx_rc );
}


/*
 * Name:    expression
 * Purpose: break reg ex into terms and expressions
 * Date:    June 5, 1993
 * Passed:  none, but looks at local global pattern.
 * Returns: none
 * Notes:   because recursion can go fairly deep, almost all variables
 *          were made local global.  no need to save most of this stuff
 *          on the stack.
 */
int  expression( void )
{
int t1;
int t2;
int r;

   t1 = term( );
   r = t1;
   if (regx.pattern[lookahead] == '|') {
      lookahead++;
      parser_state++;
      r = t2 = parser_state;
      parser_state++;
      emit_cnode( t2, OR_NODE, expression( ), t1 );
      emit_cnode( t2-1, JUXTA, parser_state, parser_state );

      /*
       * the OR_NODE seems to need a path from the previous node
       */
      if (nfa.node_type[t1] == CNODE)
         t1 = min( nfa.next1[t1], nfa.next2[t1] );
      nfa.next1[t1-1] = t2;
      if (nfa.node_type[t1-1] == NNODE)
         nfa.next2[t1-1] = t2;
   }
   return( r );
}


/*
 * Name:    term
 * Purpose: break reg ex into factors and terms
 * Date:    June 5, 1993
 * Passed:  none, but looks at local global pattern.
 * Returns: none
 */
int  term( void )
{
int r;

   r = factor( );
   if (regx.pattern[lookahead] == '('  ||  letter( regx.pattern[lookahead] ))
      term( );
   else if (Kleene_star( regx.pattern[lookahead] ))
      regx_error( reg11 );
   return( r );
}


/*
 * Name:    factor
 * Purpose: add NNODEs and CNODEs to local global nfa
 * Date:    June 5, 1993
 * Passed:  none
 * Returns: none, but modifies local global nfa.
 * Notes:   this function does most of the compiling.  it recognizes all
 *          NNODEs, predefined macros, escape characters, and operators.
 */
int  factor( void )
{
int t1;
int t2;
int r;
int c;
int paren;

   t2 = t1 = parser_state;
   paren = FALSE;
   c = regx.pattern[lookahead];
   if (c == '(') {
      lookahead++;
      t2 = expression( );
      if (regx.pattern[lookahead] == ')') {
         lookahead++;
         paren = TRUE;
      } else

         /*
          * unmatched open parens
          */
         regx_error( reg2 );
   } else if (letter( c )) {
      paren = FALSE;
      switch (c) {
         case ']' :

            /*
             * unmatched close bracket
             */
            regx_error( reg9 );
            break;
         case '.' :
            ttype = WILD;
            break;
         case ',' :
            ttype = WHITESPACE;
            break;
         case '^' :
            ttype = BOL;
            break;
         case '$' :
            ttype = EOL;
            break;
         case '<' :
            ttype = BOW;
            break;
         case '>' :
            ttype = EOW;
            break;
         case '\\' :
            ++lookahead;
            ttype =  mode.search_case == IGNORE ? IGNORE_ASCII : STRAIGHT_ASCII;
            if (lookahead != '\0') {
               if (regx.pattern[lookahead] != ':')
                  c = escape_char( regx.pattern[lookahead] );

               /*
                * predefined unix-like macros.
                */
               else {
                  c = regx.pattern[lookahead+1];
                  if (c != '\0') {
                     switch (c) {
                        case REG_ALPHANUM :
                           ++lookahead;
                           ttype = ALPHANUM;
                           break;
                        case REG_WHITESPACE :
                           ++lookahead;
                           ttype = WHITESPACE;
                           break;
                        case REG_ALPHA :
                           ++lookahead;
                           ttype = ALPHA;
                           break;
                        case REG_DECIMAL :
                           ++lookahead;
                           ttype = DECIMAL;
                           break;
                        case REG_HEX :
                           ++lookahead;
                           ttype = HEX;
                           break;
                        case REG_LOWER :
                           ++lookahead;
                           ttype = LOWER;
                           break;
                        case REG_UPPER :
                           ++lookahead;
                           ttype = UPPER;
                           break;
                        default :
                           c = escape_char( regx.pattern[lookahead] );
                           break;
                     }
                  } else
                     c = escape_char( regx.pattern[lookahead] );
               }
            } else
               regx_error( reg4 );
            break;
         case '[' :
            memset( class_bits, 0, sizeof(char) * 32 );
            ++lookahead;
            if (lookahead != '\0') {
               c = regx.pattern[lookahead];
               if (c == '^') {
                  ++lookahead;
                  ttype = NOTCLASS;
               } else
                  ttype = CLASS;

               c1 = regx.pattern[lookahead];
               do {
                  class_bits[c1/8]  |=  bit[c1%8];
                  if (c1 != '\0')
                     ++lookahead;
                  if (regx.pattern[lookahead] == '-') {
                     ++lookahead;
                     c2 = regx.pattern[lookahead];
                     if (c2 != '\0') {

                        /*
                         * just in case the hi for the range is given first,
                         *  switch c1 and c2,  e.g. [9-0].
                         */
                        if (c2 < c1) {
                           c  = c2;
                           c2 = c1;
                           c1 = c;
                        }

                        for (c=c1; c <= c2; c++)
                           class_bits[c/8] |= bit[c%8];

                        if (regx.pattern[lookahead] != '\0')
                           ++lookahead;
                     } else
                        regx_error( reg10 );
                  }
                  c1 = regx.pattern[lookahead];
               } while (c1  != '\0'  &&  c1 != ']');

               if (c1 == '\0')
                  regx_error( reg5 );
            } else
               regx_error( reg6 );
            break;
         default :
            if (mode.search_case == IGNORE) {
               c = tolower( c );
               ttype = IGNORE_ASCII;
            } else
               ttype = STRAIGHT_ASCII;
      }
      emit_nnode( parser_state, ttype, c, parser_state+1, parser_state+1 );
      if (ttype == CLASS  ||  ttype == NOTCLASS) {
         nfa.class[parser_state] = calloc( 32, sizeof(char) );
         if (nfa.class[parser_state] != NULL)
            memcpy( nfa.class[parser_state], class_bits, sizeof( char )*32 );
         else
            regx_error( reg7 );
      }
      t2 = parser_state;
      lookahead++;
      parser_state++;
   } else if (c == '\0')
      return( 0 );
   else {
      if (c == '*'  ||  c == '+'  ||  c == '?')
         regx_error( reg8 );
      else if (c  ==  ')')
         regx_error( reg3 );
      else
         regx_error( reg2 );
   }

   c = regx.pattern[lookahead];
   switch (c) {
      case '*' :
         emit_cnode( parser_state, CLOSURE, parser_state+1, t2 );
         r = parser_state;
         if (nfa.node_type[t1] == CNODE)
            t1 = min( nfa.next1[t1], nfa.next2[t1] );
         nfa.next1[t1-1] = parser_state;
         if (nfa.node_type[t1-1] == NNODE)
            nfa.next2[t1-1] = parser_state;
         lookahead++;
         parser_state++;
         paren = FALSE;
         break;
      case '+' :
         if (paren == TRUE) {
            emit_cnode( parser_state, JUXTA, parser_state+2, parser_state+2 );
            parser_state++;
         }

         emit_cnode( parser_state, JUXTA, t2, t2 );
         r = parser_state;
         parser_state++;

         if (paren == FALSE) {
            nfa.next1[t2] = parser_state;
            if (nfa.node_type[t2] == NNODE)
               nfa.next2[t2] = parser_state;
         }

         emit_cnode( parser_state, CLOSURE, parser_state+1, t2 );
         if (nfa.node_type[t1] == CNODE)
            t1 = min( nfa.next1[t1], nfa.next2[t1] );
         nfa.next1[t1-1] = r;
         if (nfa.node_type[t1-1] == NNODE)
            nfa.next2[t1-1] = r;
         parser_state++;
         lookahead++;
         paren = FALSE;
         break;
      case '?' :
         emit_cnode( parser_state, JUXTA, parser_state+2, parser_state+2 );
         parser_state++;
         r = parser_state;
         emit_cnode( parser_state, ZERO_OR_ONE, parser_state+1, t2 );
         if (nfa.node_type[t1] == CNODE)
            t1 = min( nfa.next1[t1], nfa.next2[t1] );
         nfa.next1[t1-1] = parser_state;
         if (nfa.node_type[t1-1] == NNODE)
            nfa.next2[t1-1] = parser_state;
         parser_state++;
         lookahead++;
         paren = FALSE;
         break;
      default  :
         r = t2;
         break;
   }

   /*
    * close parens seem to need a JUXTA node to gather all reg ex's
    *  to a common point.
    */
   if (paren) {
      emit_cnode( parser_state, JUXTA, parser_state+1, parser_state+1 );
      parser_state++;
   }
   return( r );
}


/*
 * Name:    escape_char
 * Purpose: recognize escape and C escape sequences
 * Date:    June 5, 1993
 * Passed:  let:  letter to escape
 * Returns: escaped letter
 */
int  escape_char( int let )
{
   switch (let) {
      case '0' :
         let = 0x00;
         break;
      case 'a' :
         let = 0x07;
         break;
      case 'b' :
         let = 0x08;
         break;
      case 'n' :
         let = 0x0a;
         break;
      case 'r' :
         let = 0x0d;
         break;
      case 't' :
         let = 0x09;
         break;
      default  :
         break;
   }
   return( let );
}


/*
 * Name:    emit_cnode
 * Purpose: add a null node to our pattern matching machine
 * Date:    June 5, 1993
 * Passed:  index:  current node in nfa
 *          ttype:  terminal type - CLOSURE, OR, JUXTA, etc...
 *          n1:     pointer to next state, path for lambda transitions
 *          n2:     pointer to other next state, usually a NNODE
 * Returns: none, but modifies local global nfa.
 */
void emit_cnode( int index, int ttype, int n1, int n2 )
{
   assert( index >= 0);
   assert( index < REGX_SIZE );

   nfa.node_type[index] = CNODE;
   nfa.term_type[index] = ttype;
   nfa.c[index] = 0;
   nfa.next1[index] = n1;
   nfa.next2[index] = n2;
}


/*
 * Name:    emit_nnode
 * Purpose: add a to our pattern matching machine
 * Date:    June 5, 1993
 * Passed:  index:  current node in nfa
 *          ttype:  terminal type - EOL, ASCII, etc...
 *          c:      letter this node recognizes
 *          n1:     pointer to next state
 *          n2:     pointer to other next state, which can be same as n1
 * Returns: none, but modifies local global nfa.
 */
void emit_nnode( int index, int ttype, int c, int n1, int n2 )
{
   assert( index >= 0);
   assert( index < REGX_SIZE );

   nfa.node_type[index] = NNODE;
   nfa.term_type[index] = ttype;
   nfa.c[index] = c;
   nfa.next1[index] = n1;
   nfa.next2[index] = n2;
}


/*
 * Name:    init_nfa
 * Purpose: set local global nfa to NULL state
 * Date:    June 5, 1993
 * Passed:  none
 */
void init_nfa( void )
{
int i;

   for (i=0; i < REGX_SIZE; i++) {
      nfa.node_type[i] = NNODE;
      nfa.term_type[i] = 0;
      nfa.c[i] = 0;
      nfa.next1[i] = 0;
      nfa.next2[i] = 0;
      if (nfa.class[i] != NULL)
         free( nfa.class[i] );
      nfa.class[i] = NULL;
   }
}


/*
 * Name:    regx_error
 * Purpose: display reg ex error message and set reg ex error code
 * Date:    June 5, 1993
 * Passed:  line:  line to display error
 * Returns: none, but sets reg ex return code to error.
 */
void regx_error( char *line )
{
   error( WARNING, regx_error_line, line );
   regx_rc = ERROR;
}


/*
 * Name:    separator
 * Purpose: determine if character is a reg ex separator
 * Date:    June 5, 1993
 * Passed:  let:  letter to look at
 * Returns: whether or not 'let' is a separator
 */
int  separator( int let )
{
   return( let == 0  ||  let == ')'  ||  let == '|' );
}


/*
 * Name:    Kleene_star
 * Purpose: determine if character is a reg ex operator
 * Date:    June 5, 1993
 * Passed:  let:  letter to look at
 * Returns: whether or not 'let' is a letter
 */
int  Kleene_star( int let )
{
   return( let == '*'  ||  let == '+'  ||  let == '?' );
}


/*
 * Name:    letter
 * Purpose: determine if character is a recognized reg ex character
 * Date:    June 5, 1993
 * Passed:  let:  letter to look at
 * Returns: whether or not 'let' is a letter.
 */
int  letter( int let )
{
   return( !separator( let )  &&  !Kleene_star( let ) );
}