/*     Regular expression library source code
       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       vv. 1.01            M.I.Barlow 3-10-97
 */
/*{{{  includes */

#include <string.h>
#ifdef __TURBOC__
#include <alloc.h>
#else
#include <malloc.h>
#endif
#include "rxplib.h"

/*}}}   */
/*{{{  defines */

/*{{{  booleans */

#define TRUE       1
#define FALSE      0

/*}}}   */
/*{{{  debugging etc. */

#define DEBUG_RXPARSE  FALSE  /* dump structs during parsing (cryptic)  */
#define DEBUG_RXSETS   FALSE  /* dump a bitmap of sets                  */
#define DEBUG_RXTREE   FALSE  /* dump schematic tree after parsing      */
#define DEBUG_CORRUPT  FALSE  /* explain Corrupt return from a match    */
#define DEBUG_DESTROY  FALSE  /* explain Corrupt return from a destroy  */
#define DEBUG_RESULT   FALSE  /* dump match result structure            */
#define DEBUG_FREE     FALSE  /* report locus of "double free" errors   */
			      /* allowing us to find them in this code. */

#define PRINTF printf
			/* allow debug data redirection in windowing
			   environments. If all the above are FALSE
			   it is never called.
			*/

#define MALLOC malloc
#define FREE free
			/* in case you want to change them :-) */

/*}}}   */
/*{{{  tree node tokens */

#define ROOT      'R'
#define ANY       '.'
#define LITCHAR   'L'
#define LITCASE   'I'
#define CCL       '['
#define NCCL      '!'
#define CLOSURE   '*'
#define OR_SYM    '|'
#define PAREN     '('
#define SUBST_IN  'S'
#define SUBST_CI  'C'
#define CAPTURE1  '1'
#define CAPTURE2  '2'
#define CAPTURE3  '3'
#define CAPTURE4  '4'
#define CAPTURE5  '5'
#define CAPTURE6  '6'
#define CAPTURE7  '7'
#define CAPTURE8  '8'
#define CAPTURE9  '9'
#define CASEOFF   '<'
#define CASEON    '>'
#define ANCHOR    '^'
#define TAIL_ANC  '$'

#define UNINITIALISED (-1)

/*}}}   */
/*{{{  other characters */

#define NEGATE    '^'
#define CCLEND    ']'
#define PLUS      '+'
#define QUERY     '?'
#define ESCAPE    '\\'
#define RANGE     '{'
#define RANEND    '}'
#define COMMA     ','
#define PAREND    ')'
#define CARET     '^'
#define DOLLAR    '$'

/*}}}   */
/*{{{  replacement modifiers */

#define BLANKOUT '_'
#define TOUPPER  '+'
#define TOLOWER  '-'
#define INITCAP  '='

/*}}}   */
/*{{{  replacement justification */

#define JUSTIFY_LEFT    'L'
#define JUSTIFY_RIGHT   'R'
#define JUSTIFY_CENTRE  'C'

/*}}}   */

/*}}}   */
/*{{{  typedefs */

typedef struct RxpToken
{
    int                                tok;
    union
    {
	char              lchar;
	char             *bitmap;
	unsigned char     limit[2];
	int               subst_no;
	int               min_chars;
	struct RxpToken  *or_patn;
    }                                 data;
    struct RxpToken                  *next;
}
RxpToken;

/*}}}   */
/*{{{  constants */

static const RxpMatch nomatch = { -1, { (char *)0, (char *)0,
					(char *)0, (char *)0,
					(char *)0, (char *)0,
					(char *)0, (char *)0,
					(char *)0, (char *)0  },
				      { (char *)0, (char *)0,
					(char *)0, (char *)0,
					(char *)0, (char *)0,
					(char *)0, (char *)0,
					(char *)0, (char *)0  } };

/*}}}   */
/*{{{  globals */

#define N_FLAGS    (12)
#define ENABLE_ALL (0xFFF)

static int feature_flags[N_FLAGS] = {1,1,1,1,1,1,1,1,1,1,1,1};

/* in the following, order is vitally important! */

#define ALLOW_C_escapes      (feature_flags[0])
#define ALLOW_groups         (feature_flags[1])

#define ALLOW_wild_chars     (feature_flags[2])
#define ALLOW_sets           (feature_flags[3])
#define ALLOW_alternatives   (feature_flags[4])
#define ALLOW_closures       (feature_flags[5])
#define ALLOW_anchors        (feature_flags[6])
#define ALLOW_case_insens    (feature_flags[7])

#define ALLOW_modifiers      (feature_flags[8])
#define ALLOW_slices         (feature_flags[9])
#define ALLOW_justify        (feature_flags[10])
#define ALLOW_rep_alts       (feature_flags[11])

/*}}}   */
/*{{{  prototypes */

static RxpError
alt_match( char       *lin,
	   RxpToken   *pat,
	   const char *start,
	   char      **eptr,
	   int        *which,
	   RxpMatch   *result );

/*}}}   */
/*{{{  functions */

/*{{{  debugging functions */

#if DEBUG_RXPARSE
  /*{{{  dump_rxp */
  
  static void dump_rxp(RxpToken *tptr)
  {
      PRINTF("{%p=%c,",tptr,tptr->tok);
      switch (tptr->tok)
      {
	  case ANY:
	  case ANCHOR:
	  case TAIL_ANC:
	  case ROOT:
	      break;
	  case LITCHAR:
	  case LITCASE:
	      PRINTF("\'%c\'",tptr->data.lchar);
	      break;
	  case CCL:
	  case NCCL:
	      PRINTF("%p",tptr->data.bitmap);
	      break;
	  case OR_SYM:
	  case PAREN:
	  case CAPTURE1:
	  case CAPTURE2:
	  case CAPTURE3:
	  case CAPTURE4:
	  case CAPTURE5:
	  case CAPTURE6:
	  case CAPTURE7:
	  case CAPTURE8:
	  case CAPTURE9:
	      PRINTF("%p",tptr->data.or_patn);
	      break;
	  case SUBST_IN:
	  case SUBST_CI:
	      PRINTF("-%d-",tptr->data.subst_no);
	      break;
	  case CLOSURE:
	      PRINTF("[%d,%d]",tptr->data.limit[0],tptr->data.limit[1]);
	      break;
	  default:
	      PRINTF("!!!ERROR!!!");
      }
      PRINTF(",%p}",tptr->next);
  }
  
  /*}}}   */
  /*{{{  dump_rxp_list */
  
  static void dump_rxp_list(RxpToken *tptr)
  {
      int i = 32;
      PRINTF("<<");
      while (tptr && i)
      {
	  dump_rxp(tptr);
	  tptr = tptr->next;
	  i--;
      }
      if (i)
	  PRINTF(">>\n");
      else
	  PRINTF("...\n");
  }
  
  /*}}}   */
#endif

#if DEBUG_RXTREE
  /*{{{  dump_node */
  
  static int dump_node(RxpToken *tptr)
  {
      int retval = 0;
  
      PRINTF("{%c,",tptr->tok);
      switch (tptr->tok)
      {
	  case ROOT:
	  case ANY:
	  case ANCHOR:
	  case TAIL_ANC:
	      PRINTF(" }");
	      break;
	  case LITCHAR:
	  case LITCASE:
	      PRINTF("%c}",tptr->data.lchar);
	      break;
	  case CCL:
	  case NCCL:
	      PRINTF("]}");
	      break;
	  case OR_SYM:
	  case PAREN:
	  case CAPTURE1:
	  case CAPTURE2:
	  case CAPTURE3:
	  case CAPTURE4:
	  case CAPTURE5:
	  case CAPTURE6:
	  case CAPTURE7:
	  case CAPTURE8:
	  case CAPTURE9:
	      retval = 1;
	      PRINTF("/}");
	      break;
	  case SUBST_IN:
	  case SUBST_CI:
	      PRINTF("%d}",tptr->data.subst_no);
	      break;
	  case CLOSURE:
	      PRINTF(".}");
	      break;
	  default:
	      PRINTF("!}");
      }
      return (retval);
  }
  
  /*}}}   */
  /*{{{  dump_tree */
  static void dump_tree(RxpToken *tptr, int indent)
  {
      static char flag[256];
      RxpToken *tp = tptr;
      int i, branches = 0;
  
      for (i = 0; i < indent; ++i)
	  if (flag[i])
	      PRINTF("  |  ");
	  else
	      PRINTF("     ");
      PRINTF("  |\n");
      for (i = 0; i < indent; ++i)
	  if (flag[i])
	      PRINTF("  |  ");
	  else
	      PRINTF("     ");
  
      i = indent;
      while (tp)
      {
	  int br = dump_node(tp);
	  branches += br;
	  flag[i] = br;
	  tp = tp->next;
	  i++;
      }
      PRINTF("{nul}\n");
  
      for (i = branches; i > 0; --i)
      {
	  RxpToken *last = tptr;
	  int j = -1;
	  int k = 0;
  
	  tp = tptr;
	  while (k < i)
	  {
	      switch (tp->tok)
	      {
		  case OR_SYM:
		  case PAREN:
		  case CAPTURE1:
		  case CAPTURE2:
		  case CAPTURE3:
		  case CAPTURE4:
		  case CAPTURE5:
		  case CAPTURE6:
		  case CAPTURE7:
		  case CAPTURE8:
		  case CAPTURE9:
		    k++;
		  default:
		    break;
	      }
	      last = tp;
	      tp = tp->next;
	      j++;
	  }
	  dump_tree(last->data.or_patn,indent + j);
      }
  }
  /*}}}   */
#endif

#if DEBUG_FREE
  /*{{{  check for double 'free's */
  
  static void do_free(int locus, void **ptr)
  {
      if (*ptr)
      {
	  FREE(*ptr);
	  *ptr = (void *)0;   /* so we catch 'double free' error*/
      }
      else
      {
	  PRINTF("RXPLIB: memory de-allocation error at line %d\n",locus);
      }
  }
  #define PHREE(loc,ptr)  do_free(loc,(void **)(&(ptr)))
  
  /*}}}   */
#else
  /*{{{  use 'free' normally */
  
  #define PHREE(loc,ptr)  FREE(ptr)
  
  /*}}}   */
#endif

/*}}}   */

/*{{{  rxp_configure */

int
rxp_configure( int enable_mask,
	       int disable_mask )
{
    int retval = 0;
    int i;

    for (i = 0; i < N_FLAGS; ++i)
	retval |= feature_flags[i] ? (1 << i) : 0;
    retval &= ~disable_mask;
    retval |= enable_mask;
    retval &= ENABLE_ALL;
    for (i = 0; i < N_FLAGS; ++i)
	feature_flags[i] = retval & (1 << i);
    return (retval);
}

/*}}}   */

/*{{{  destroy_pat */

static int
destroy_pat(RxpToken *head)
{
    RxpToken *old_head;

    while (head)
    {
	switch (head->tok)
	{
	    case CLOSURE:
	    case SUBST_IN:
	    case SUBST_CI:
	    case LITCHAR:
	    case LITCASE:
	    case ANY:
	    case ANCHOR:
	    case TAIL_ANC:
	    case ROOT:
	       break;
	    case CCL:
	    case NCCL:
	       PHREE(__LINE__,head->data.bitmap);
	       break;
	    case CAPTURE1:
	    case CAPTURE2:
	    case CAPTURE3:
	    case CAPTURE4:
	    case CAPTURE5:
	    case CAPTURE6:
	    case CAPTURE7:
	    case CAPTURE8:
	    case CAPTURE9:
	    case PAREN:
	    case OR_SYM:
	       {
		   int ret = destroy_pat(head->data.or_patn);

		   if (ret)
		   {
		       #if DEBUG_DESTROY
			 PRINTF("Branch token was: '%c'\n",head->tok);
		       #endif
		       return (1);
		   }
	       }
	       break;
	    default:
	    {
	       #if DEBUG_DESTROY
		 PRINTF("Saw anomalous token: '%c'\n",head->tok);
	       #endif
	       return (1);
	    }
	}
	old_head = head;
	head = head->next;
	PHREE(__LINE__,old_head);
    }

    return (0);
}

/*}}}   */
/*{{{  rxp_destroy_pattern */

RxpError
rxp_destroy_pattern(RxpPatn *patn)
{
    if (!(*patn))
	return (RxpOK);
    else if (((RxpToken *)(*patn))->tok != ROOT)
	return (RxpCorrupt);
    else
    {
	RxpError retval = destroy_pat((RxpToken *)(*patn)) ? RxpCorrupt
							   : RxpOK;
	*patn = (RxpPatn)0;
	return (retval);
    }
}

/*}}}   */

/*{{{  c_escape */

static char
c_escape ( char in )
{
    switch (in)
    {
	case 'b':
	    return ('\b');
	case 't':
	    return ('\t');
	case 'r':
	    return ('\r');
	case 'n':
	    return ('\n');
    }
    return ('\0'); /* should never happen */
}

/*}}}   */
/*{{{  hex_val */

static int
hex_val ( char in )
{
    if (isdigit(in))
	return (in - '0');
    if (in > 'F')
	return (in - ('a' - 10));
    else
	return (in - ('A' - 10));
}

/*}}}   */
/*{{{  scan_hex */

static int
scan_hex ( char *in )
{
    int hex = 0;

    if (!isxdigit(*in))
	return (-1);
    hex = hex_val(*in) << 4;
    in++;
    if (!isxdigit(*in))
	return (-1);
    hex |= hex_val(*in);
    return (hex);
}

/*}}}   */
/*{{{  init_token */

static RxpError
init_token(RxpToken **ntok)
{

    *ntok = (RxpToken *)MALLOC(sizeof(RxpToken));
    if (!(*ntok))
	return (RxpAllocFail);
    (*ntok)->tok = UNINITIALISED;
    (*ntok)->next = (RxpToken *)0;
    return (RxpOK);
}

/*}}}   */
/*{{{  set_bit */

static void
set_bit(unsigned c, char *map)
{
    map[c >> 3] |= (1 << (c & 0x07));
}

/*}}}   */
/*{{{  create_pat */

static RxpError
create_pat( char      **arg,
	    RxpToken  **pat,
	    const int   toplevel,
	    const char  delim,
	    int        *capture_no,
	    int        *min_chars   )
{
    /*{{{  declare */
    
    RxpToken *head, *tail, *ntok;
    RxpToken *prev = (RxpToken *)0;
    RxpError err;
    int case_sensitive = TRUE;
    int last_min;
    char *pars = *arg;
    
    /*}}}   */

    /*{{{  initialise */
    
    *pat = (RxpToken *)0;
    if ((err = init_token(&head)) != RxpOK)
	return (err);
    head->tok = OR_SYM;
    head->data.or_patn = (RxpToken *)0;
    tail = head;
    
    /*}}}   */
    if (toplevel && ALLOW_anchors && (*pars == CARET))
    {
	/*{{{  plant an anchor */
	
	RxpToken *tmp;
	
	if ((err = init_token(&tmp)) != RxpOK)
	    return (err);
	tmp->tok = ANCHOR;
	head->next = tmp;
	tail = tmp;
	pars++;
	
	/*}}}   */
    }
    while (*pars && (*pars != delim))
    {
	/*{{{  initialise a new token structure */
	
	if ((err = init_token(&ntok)) != RxpOK)
	{
	    if (destroy_pat(head))
		return (RxpCorrupt);
	    else
		return (err);
	}
	
	/*}}}   */
	/*{{{  determine which token it should be */
	
	if (*pars == ANY)
	{
	    /*{{{  just tag token */
	    
	    ntok->tok = ANY;
	    last_min = 1;
	    (*min_chars)++;
	    
	    /*}}}   */
	}
	else if (*pars == CCL)
	{
	    /*{{{  detect negated form and allocate bitmap */
	    
	    if (*(pars + 1) == NEGATE)
	    {
		ntok->tok = NCCL;
		pars += 2;
	    }
	    else
	    {
		ntok->tok = CCL;
		pars++;
	    }
	    ntok->data.bitmap = (char *)MALLOC(32);
	    if (ntok->data.bitmap == (char *)0)
	    {
		PHREE(__LINE__,ntok);
		if (destroy_pat(head))
		    return (RxpCorrupt);
		else
		    return (RxpAllocFail);
	    }
	    else
	    {
		/*{{{  scan the contents in */
		
		char *map = ntok->data.bitmap;
		char *start = map;
		int  first, last;
		
		for (first = 0; first < 32; first++)
		{
		    *start = '\0';
		    start++;
		}
		
		start = pars;
		if (*pars == CCLEND)
		{
		    set_bit(CCLEND,map);
		    pars++;
		}
		while (*pars  &&  *pars != CCLEND)
		{
		    if (*pars != '-')
			if (case_sensitive)
			    set_bit(*pars,map);
			else if (islower(*pars))
			{
			    set_bit(*pars,map);
			    set_bit((*pars) & 0xdf,map);
			}
			else if (isupper(*pars))
			{
			    set_bit(*pars,map);
			    set_bit((*pars) | 0x20,map);
			}
			else
			    set_bit(*pars,map);
		    else if ((pars == start) || (*((pars)+1) == CCLEND))
			set_bit('-',map);
		    else
		    {       
			pars++;
			if (*pars < *(pars - 2))
			{       
			    first = *pars;
			    last = *(pars - 2);
			}
			else
			{      
			    first = *(pars - 2);
			    last = *pars;
			}
			while (++first <= last)
			    set_bit(first,map);
		    }
		    pars++;
		}
		
		if (!*pars)
		{
		    PHREE(__LINE__,ntok->data.bitmap);
		    PHREE(__LINE__,ntok);
		    if (destroy_pat(head))
			return (RxpCorrupt);
		    else
			return (RxpBadSet);
		}
		
		#if DEBUG_RXSETS
		  /*{{{  display it */
		  PRINTF("Set: ");
		  for (first = 0; first < 32; first++)
		  {
		      PRINTF("%.2X",*(map + first));
		  }
		  PRINTF(" = {\n");
		  for (first = 0; first < 256; first++)
		  {
		      if ((*(map + (first >> 3)) & (1 << (first & 0x07))) != 0)
			 PRINTF("%c",first);
		  }
		  PRINTF("\n}\n");
		  /*}}}   */
		#endif
		
		/*}}}   */
	    }
	    last_min = 1;
	    (*min_chars)++;
	    
	    /*}}}   */
	}
	else if (*pars == CLOSURE)
	{
	    /*{{{  save status & type */
	    
	    ntok->tok = CLOSURE;
	    ntok->data.limit[0] = 0;
	    ntok->data.limit[1] = 0;
	    *min_chars -= last_min;
	    last_min = 0;
	    
	    /*}}}   */
	}
	else if (*pars == PLUS)
	{
	    /*{{{  save status & type */
	    
	    ntok->tok = CLOSURE;
	    ntok->data.limit[0] = 1;
	    ntok->data.limit[1] = 0;
	    
	    /*}}}   */
	}
	else if (*pars == QUERY)
	{
	    /*{{{  save status & type */
	    
	    ntok->tok = CLOSURE;
	    ntok->data.limit[0] = 0;
	    ntok->data.limit[1] = 1;
	    *min_chars -= last_min;
	    last_min = 0;
	    
	    /*}}}   */
	}
	else if (*pars == OR_SYM)
	{
	    /*{{{  start a new tree branch */
	    
	    pars++;
	    PHREE(__LINE__,ntok);
	    
	    #if DEBUG_RXPARSE
	      PRINTF("------- new branch --------\n");
	    #endif
	    
	    {
		int more_capts = 0;
		int new_min = 0;
	    
		err = create_pat(&pars,
				 &(head->data.or_patn),
				 toplevel,
				 delim,
				 toplevel ? &more_capts : capture_no,
				 &new_min                             );
		if (new_min < *min_chars)
		    *min_chars = new_min;
	    }
	    if (err)
	    {
		if (destroy_pat(head))
		    return (RxpCorrupt);
		else
		    return (err);
	    }
	    *pat = head;
	    return (err);
	    
	    /*}}}   */
	}
	else if (*pars == PAREN)
	{
	    /*{{{  spawn a sub-tree */
	    
	    #if DEBUG_RXPARSE
	      PRINTF("-------- sub tree ---------\n");
	    #endif
	    
	    pars++;
	    if (*capture_no < 9)
	    {
		ntok->tok = CAPTURE1 + *capture_no;
		(*capture_no)++;
	    }
	    else
		ntok->tok = PAREN;
	    
	    last_min = 0;
	    err = create_pat( &pars,
			      &(ntok->data.or_patn),
			      FALSE,
			      ')',
			      capture_no,
			      &last_min      );
	    if (err)
	    {
		PHREE(__LINE__,ntok);
		if (destroy_pat(head))
		    return (RxpCorrupt);
		else
		    return (err);
	    }
	    *min_chars += last_min;
	    
	    #if DEBUG_RXPARSE
	      PRINTF("------ end sub tree -------\n");
	    #endif
	    
	    /*}}}   */
	}
	else if (*pars == DOLLAR)
	{
	    /*{{{  it may be a tail anchor */
	    
	    if (ALLOW_anchors && ((*(pars + 1) == '\0')  ||
				  (*(pars + 1) == OR_SYM)))
	    {
		ntok->tok = TAIL_ANC;
	    }
	    else
	    {
		last_min = 1;
		(*min_chars)++;
		ntok->tok = LITCHAR;
		ntok->data.lchar = DOLLAR;
	    }
	    
	    /*}}}   */
	}
	else if (*pars == ESCAPE)
	{
	    /*{{{  check if it's a special one */
	    
	    if (!*(pars + 1))
	    {
		/*{{{  it's an RxpBadEscape */
		
		PHREE(__LINE__,ntok);
		if (destroy_pat(head))
		    return (RxpCorrupt);
		else
		    return (RxpBadEscape);
		
		/*}}}   */
	    }
	    else if (ALLOW_case_insens &&
		     ((*(pars + 1) == CASEOFF)  ||
		      (*(pars + 1) == CASEON)  ))
	    {
		/*{{{  switchover case-sensitivity */
		
		pars++;
		case_sensitive = (*pars == CASEON);
		ntok->tok = CASEOFF;
		
		/*}}}   */
	    }
	    else if (ALLOW_C_escapes && strchr("btrnxX",*(pars + 1)))
	    {
		/*{{{  do that too. */
		
		if (((*(pars + 1)) & 0xDF) == 'X')
		{
		    /*{{{  it's a hex one */
		    
		    int hex;
		    
		    pars++;
		    hex = scan_hex(pars + 1);
		    if (hex >= 0)
		    {
			pars += 2;
			ntok->tok = LITCHAR;
			ntok->data.lchar = hex;
			last_min = 1;
			(*min_chars)++;
		    }
		    else
		    {
			PHREE(__LINE__,ntok);
			if (destroy_pat(head))
			    return (RxpCorrupt);
			else
			    return (RxpBadHex);
		    }
		    /*}}}   */
		}
		else
		{
		    /*{{{  it's a control character */
		    
		    pars++;
		    ntok->tok = LITCHAR;
		    ntok->data.lchar = c_escape(*pars);
		    last_min = 1;
		    (*min_chars)++;
		    
		    /*}}}   */
		}
		
		/*}}}   */
	    }
	    else if (*(pars + 1) == RANGE)
	    {
		/*{{{  scan it for it's values */
		
		int min = 0, max = 0;
		
		ntok->tok = CLOSURE;
		pars += 2;
		if (!isdigit(*pars))
		{
		    PHREE(__LINE__,ntok);
		    if (destroy_pat(head))
			return (RxpCorrupt);
		    else
			return (RxpBadRange);
		}
		while (isdigit(*pars))
		{
		    min = (10 * min) + (*pars - '0');
		    pars++;
		}
		if (min > 255)
		{
		    PHREE(__LINE__,ntok);
		    if (destroy_pat(head))
			return (RxpCorrupt);
		    else
			return (RxpBigRange);
		}
		if ((*pars == ESCAPE) && (*(pars + 1) == RANEND))
		{
		    max = min;
		    pars++;
		}
		else if (*pars == COMMA)
		{
		    pars++;
		    if ((*pars == ESCAPE) && (*(pars + 1) == RANEND))
			pars++;
		    else if (!isdigit(*pars))
		    {
			PHREE(__LINE__,ntok);
			if (destroy_pat(head))
			    return (RxpCorrupt);
			else
			    return (RxpBadRange);
		    }
		    else
		    {
			while (isdigit(*pars))
			{
			    max = (10 * max) + (*pars - '0');
			    pars++;
			}
			if (max > 255)
			{
			    PHREE(__LINE__,ntok);
			    if (destroy_pat(head))
				return (RxpCorrupt);
			    else
				return (RxpBigRange);
			}
			if (max < min)
			{
			    PHREE(__LINE__,ntok);
			    if (destroy_pat(head))
				return (RxpCorrupt);
			    else
				return (RxpBadRange);
			}
			if ((*pars == ESCAPE) && (*(pars + 1) == RANEND))
			{
			    pars++;
			}
			else
			{
			    PHREE(__LINE__,ntok);
			    if (destroy_pat(head))
				return (RxpCorrupt);
			    else
				return (RxpBadRange);
			}
		    }
		}
		else
		{
		    PHREE(__LINE__,ntok);
		    if (destroy_pat(head))
			return (RxpCorrupt);
		    else
			return (RxpBadRange);
		}
		ntok->data.limit[0] = min;
		ntok->data.limit[1] = max;
		*min_chars += last_min * (min - 1);
		last_min = 0;
		
		/*}}}   */
	    }
	    else if (isdigit(*(pars + 1)))
	    {
		/*{{{  it's a capture substitution */
		
		pars++;
		ntok->tok = case_sensitive ? SUBST_IN : SUBST_CI;
		ntok->data.subst_no = *pars - '0';
		if (ntok->data.subst_no == 0)
		{
		    PHREE(__LINE__,ntok);
		    if (destroy_pat(head))
			return (RxpCorrupt);
		    else
			return (RxpBadCapture);
		}
		
		/*}}}   */
	    }
	    else
	    {
		/*{{{  it's a literal */
		
		pars++;
		if (case_sensitive)
		{
		    ntok->tok = LITCHAR;
		    ntok->data.lchar = *pars;
		}
		else
		{
		    int ch = *pars;
		
		    ntok->tok = LITCASE;
		    ntok->data.lchar = islower(ch) ? ch & 0xdf : ch;
		}
		last_min = 1;
		(*min_chars)++;
		
		/*}}}   */
	    }
	    
	    /*}}}   */
	}
	else
	{
	    /*{{{  it's a literal */
	    
	    if (case_sensitive)
	    {
		ntok->tok = LITCHAR;
		ntok->data.lchar = *pars;
	    }
	    else
	    {
		int ch = *pars;
	    
		ntok->tok = LITCASE;
		ntok->data.lchar = islower(ch) ? ch & 0xdf : ch;
	    }
	    last_min = 1;
	    (*min_chars)++;
	    
	    /*}}}   */
	}
	
	/*}}}   */
	/*{{{  link in this token */
	
	if (ntok == (RxpToken *)0)
	{
	    (void)destroy_pat(head);
	    return (RxpCorrupt);
	}
	else if (ntok->tok == CASEOFF)
	{
	    PHREE(__LINE__,ntok);
	}
	else if (ntok->tok != CLOSURE)
	{
	    tail->next = ntok;
	    prev = tail;
	    tail = ntok;
	}
	else if ((prev == (RxpToken *)0) || (prev->tok == CLOSURE))
	{
	    PHREE(__LINE__,ntok);
	    if (destroy_pat(head))
		return (RxpCorrupt);
	    else
		return (RxpBadClosure);
	}
	else
	{
	    prev->next = ntok;
	    ntok->next = tail;
	    prev = ntok;
	}
	
	/*}}}   */

	#if DEBUG_RXPARSE
	  dump_rxp_list(head);
	  PRINTF("%c=>(%p,%p,%p)\n",*pars,head,prev,tail);
	#endif

	pars++;
    }

    if (!(*pars) && !toplevel)
    {
	if (destroy_pat(head))
	    return (RxpCorrupt);
	else
	    return (RxpBadGroup);
    }

    *pat = head;
    *arg = pars;
    return (RxpOK);
}

/*}}}   */
/*{{{  rxp_create_pattern */

RxpError
rxp_create_pattern( char       *text,
		    RxpPatn    *patn,
		    char      **error_locus )
{
    /*{{{  declare */
    
    RxpToken *trunk;
    int capture_no = 0;
    int min_chars = 0;
    
    /*}}}   */

    /*{{{  check arguments */
    
    if (!text || !patn || !error_locus)
	return (RxpNullArg);
    
    /*}}}   */
    *error_locus = text;
    if (*text == '\0')
    {
	*patn = (RxpToken *)0;
	return (RxpFail);
    }
    else
    {
	RxpError retval = create_pat( error_locus,
				      &trunk,
				      TRUE,
				      '\0',
				      &capture_no,
				      &min_chars   );
	if (!retval)
	{
	    retval = init_token((RxpToken **)patn);
	    if (!retval)
	    {
		(*((RxpToken **)patn))->tok = ROOT;
		(*((RxpToken **)patn))->data.min_chars = min_chars;
		(*((RxpToken **)patn))->next = trunk;
		*error_locus = (char *)0;
	    }
	    else
	    {
		RxpError ret = destroy_pat(trunk);

		*patn = (RxpToken *)0;
		if (!ret)
		    retval = ret;
	    }
	}
	else
	{
	    *patn = (RxpToken *)0;
	}

	#if DEBUG_RXTREE
	  if (!retval)
	  {
	     PRINTF("PARSE TREE: <min. chars. for a match = %d>\n",
		    (*((RxpToken **)patn))->data.min_chars);
	     dump_tree(*patn,0);
	  }
	#endif

	return (retval);
    }
}

/*}}}   */

/*{{{  part_match */

static int
part_match ( char      **linp,
	     RxpToken   *pat,
	     RxpError   *retval,
	     RxpMatch   *result  )
{
    int advance = -1;

    *retval = RxpOK;
    if (**linp)
    {
	switch (pat->tok)
	{
	    case LITCHAR:
		/*{{{  match literally */
		
		if (**linp == pat->data.lchar)
		    advance = 1;
		break;
		
		/*}}}   */
	    case LITCASE:
		/*{{{  match literal case-insensitively */
		
		{
		   int ch = **linp;
		
		   if (islower(ch))
		       ch &= 0xdf;
		   if (ch == pat->data.lchar)
		       advance = 1;
		}
		break;
		
		/*}}}   */
	    case ANY:
		/*{{{  match anything except \0 */
		
		if (**linp)
		    advance = 1;
		break;
		
		/*}}}   */
	    case CCL:
		/*{{{  match a set */
		
		{
		    unsigned i = **linp;
		    if ((*((pat->data.bitmap) + (i >> 3)) & (1 << (i & 0x07))) != 0)
			advance = 1;
		}
		break;
		
		/*}}}   */
	    case NCCL:
		/*{{{  match set's complement */
		
		{
		    unsigned i = **linp;
		    if ((*((pat->data.bitmap) + (i >> 3)) & (1 << (i & 0x07))) == 0)
			advance = 1;
		}
		break;
		
		/*}}}   */
	    case CAPTURE1:
	    case CAPTURE2:
	    case CAPTURE3:
	    case CAPTURE4:
	    case CAPTURE5:
	    case CAPTURE6:
	    case CAPTURE7:
	    case CAPTURE8:
	    case CAPTURE9:
	    case PAREN:
		/*{{{  match the sub-expression and perhaps capture it */
		
		{
		    char *end, *start = *linp;
		    int dummy = -1;
		    RxpError rval = alt_match( start,
					       pat->data.or_patn,
					       (char *)0,
					       &end,
					       &dummy,
					       result            );
		    if (rval == RxpOK)
		    {
			int capture = pat->tok - (CAPTURE1 - 1);
		
			advance = (end - start + 1);
			if ((pat->tok != PAREN) && !result->fptr[capture])
			{
			    result->fptr[capture] = start;
			    result->bptr[capture] = end + 1;
			}
		    }
		    else if (rval != RxpFail)
		    {
		       *retval = rval;
		       return (-1);
		    }
		
		}
		break;
		
		/*}}}   */
	    case SUBST_IN:
	    case SUBST_CI:
		/*{{{  match a stored string */
		
		{
		    const int sub = pat->data.subst_no;
		    char *fp = result->fptr[sub];
		    char *bp = result->bptr[sub];
		    char *cp = *linp;
		    int len = bp - fp;
		    int scan = 1;
		
		    if (fp)
		    {
			while (scan && (fp < bp))
			{
			    if ( (*fp == *cp)                      ||
				 ((pat->tok == SUBST_CI)        &&
				  isalpha(*cp)                  &&
				  ((*fp & 0xDF) == (*cp & 0xDF)))     )
			    {
				fp++;
				cp++;
			    }
			    else
				scan = 0;
			}
			if (scan)
			    advance = len;
		    }
		    else
			advance = 0;
		}
		break;
		
		/*}}}   */
	    case TAIL_ANC:
		/*{{{  fail, we're not at end-of-line */
		
		break;
		
		/*}}}   */
	    default:
		/*{{{  this should never happen */
		
		#if DEBUG_CORRUPT
		    PRINTF("part_match: unknown token '%c' @ %x\n",(char)pat->tok,pat);
		#endif
		*retval = RxpCorrupt;
		return (-1);
		
		/*}}}   */
	}
    }
    else if (pat->tok == TAIL_ANC)
	return(1);
    if (advance > 0)
	*linp += advance;
    return (++advance);
}

/*}}}   */
/*{{{  hit_match */

static RxpError
hit_match( char      *lin,
	   RxpToken  *pat,
	   char     **eptr,
	   RxpMatch  *result )
{
    RxpError rval;
    char  *bocl, *strstart;

    strstart = lin;
    if (pat == (RxpToken *)0)
    {
	*eptr = (char *)0;
	return (RxpFail);
    }

    while (pat)
    {
	if (pat->tok == CLOSURE)
	{
	    /*{{{  seek copies */
	    
	    if (pat->next)
	    {
		/*{{{  seek and count matches */
		
		int min = pat->data.limit[0];
		int max = pat->data.limit[1];
		int count = 0;
		
		pat = pat->next;
		bocl = lin;
		while (*lin && part_match(&lin,
					  pat,
					  (RxpError *)(&rval),
					  result               ))
		{
			if (rval != RxpOK)
			{
			    *eptr = (char *)0;
			    return (rval);
			}
			else
			    count++;
		}
		if ((pat = pat->next) != (RxpToken *)0)
		{ 
		    /*{{{  scan backwards for next element */
		    
		    while (bocl <= lin)
		    {
			if ((rval = hit_match(lin,pat,eptr,result)) == RxpOK)
			{
			    if ((count < min) || (max && (count > max)))
			    {
				*eptr = (char *)0;
				return (RxpFail);
			    }
			    else
				return (RxpOK);
			}
			else if (rval == RxpFail)
			{
			    --count;
			    --lin;
			}
			else
			    return (rval);
		    }
		    *eptr = (char *)0;
		    return (RxpFail);
		    
		    /*}}}   */
		}
		
		if ((count < min) || (max && (count > max)))
		{
		    *eptr = (char *)0;
		    return (RxpFail);
		}
		
		/*}}}   */
	    }
	    else
	    {
		#if DEBUG_CORRUPT
		  PRINTF("hit_match: closure with no subject.");
		#endif
		return (RxpCorrupt);
	    }
	    
	    /*}}}   */
	}
	else if (part_match(&lin,
			    pat,
			    (RxpError *)(&rval),
			    result               ))
	{
	    /*{{{  try for more if OK */
	    
	    if (rval != RxpOK)
	    {
		*eptr = (char *)0;
		return (rval);
	    }
	    else
		pat = pat->next;
	    
	    /*}}}   */
	}
	else
	{
	    *eptr = (char *)0;
	    return (RxpFail);
	}
    }
    --lin;
    if (strstart > (lin + 1))
	*eptr = strstart;
    else
	*eptr = lin;
    return (RxpOK);
}

/*}}}   */
/*{{{  alt_match */

static RxpError
alt_match( char       *lin,
	   RxpToken   *pat,
	   const char *start,
	   char      **eptr,
	   int        *which,
	   RxpMatch   *result )
{
    RxpToken *head = pat;
    RxpError rval;

    if (head->tok != OR_SYM)
    {
	/*{{{  this is not a valid alternative */
	
	*eptr = (char *)0;
	#if DEBUG_CORRUPT
	    PRINTF("alt_match: saw '%c'; not OR_SYM @ %x\n",(char)pat->tok,pat);
	#endif
	return (RxpCorrupt);
	
	/*}}}   */
    }
    if ((head->next)->tok == ANCHOR)
    {
	/*{{{  check if we're at the start of the line */
	
	if (lin == start)
	    pat = pat->next;
	else
	    return (RxpFail);
	
	/*}}}   */
    }
    if ((rval = hit_match(lin,pat->next,eptr,result)) != RxpFail)
	return (rval);
    if (head->data.or_patn)
    {
	/*{{{  try the next alternative */
	
	if (*which >= 0)
	{
	    (*which)++;
	    memcpy(result,&nomatch,sizeof(RxpMatch));
	}
	return (alt_match( lin,
			   head->data.or_patn,
			   start,
			   eptr,
			   which,
			   result      ));
	
	/*}}}   */
    }
    *eptr = (char *)0;
    return (RxpFail);
}

/*}}}   */
/*{{{  rxp_find_match */

RxpError
rxp_find_match( RxpPatn    patn,
		char      *text,
		const int  sol,
		RxpMatch  *result   )
{
    /*{{{  declare */
    
    RxpToken *trunk;
    RxpError retval = RxpFail;
    char *txt = text;
    char *limit;
    
    /*}}}   */

    /*{{{  check arguments */
    
    if (!text || !patn || !result)
	return (RxpNullArg);
    
    /*}}}   */
    /*{{{  start anew with a blank result */
    
    memcpy(result,&nomatch,sizeof(RxpMatch));
    
    /*}}}   */
    if (((RxpToken *)patn)->tok == ROOT)
    {
	int min_chars = ((RxpToken *)patn)->data.min_chars;
	int len = strlen(text);

	if (len < min_chars)
	{
	    return (RxpFail);
	}
	limit = text + (len - min_chars + 1);
	trunk = ((RxpToken *)patn)->next;
    }
    else
	return (RxpCorrupt);

    while (*txt && (txt < limit) && (retval == RxpFail))
    {
	int which = 0;
	char *cp;

	/*{{{  start anew with a blank result */
	
	memcpy(result,&nomatch,sizeof(RxpMatch));
	
	/*}}}   */
	retval = alt_match( txt,
			    trunk,
			    sol ? text : (char *)0,
			    &cp,
			    &which,
			    result  );

	if (retval == RxpOK)
	{
	    result->which   = which;
	    result->fptr[0] = txt;
	    result->bptr[0] = cp + 1;
	}
	else
	    txt++;
    }

    #if DEBUG_RESULT
      {
	  int i;
	  PRINTF("Toplevel alternative: %d\n",result->which);
	  PRINTF("Captured strings:\n");
	  for (i = 0; i < 10; ++i)
	      if (result->fptr[i])
	      {
		  char tmp = *(result->bptr[i]);

		  *(result->bptr[i]) = '\0';
		  PRINTF("\\%d =>%s<=\n",i,result->fptr[i]);
		  *(result->bptr[i]) = tmp;
	      }
	      else
		  PRINTF("\\%d\n",i);
      }
    #endif

    return (retval);
}

/*}}}   */

/*{{{  ucase */

static char
ucase (char in)
{
    if (isalpha(in))
	return (in & 0xDF);
    else
	return (in);
}

/*}}}   */
/*{{{  lcase */

static char
lcase (char in)
{
    if (isalpha(in))
	return (in | 0x20);
    else
	return (in);
}

/*}}}   */
/*{{{  scan_number */

static char *
scan_number(char *cp, int *no)
{
    *no = 0;
    while (isdigit(*cp))
    {
	*no = (10 * (*no)) + (*cp - '0');
	cp++;
    }
    return (cp);
}

/*}}}   */
/*{{{  scan_slice */

static char *
scan_slice(char *cp, int *min, int *max)
{
    if (*cp == PAREN)
    {
	cp++;
	if (*cp == PAREND)
	    return ((char *)0);
	cp = scan_number(cp,min);
	switch (*cp)
	{
	    case PAREND:
		*max = *min;
		return (cp + 1);
	    case COMMA:
		cp++;
		if (*cp == PAREND)
		    return (cp + 1);
		else if (isdigit(*cp))
		{
		    cp = scan_number(cp,max);
		    if (*cp == PAREND)
		    {
			if (*max < *min)
			{
			    int tmp = *max;
			    *max = *min;
			    *min = tmp;
			}
			return (cp + 1);
		    }
		    else
			return ((char *)0);
		}
		else
		    return ((char *)0);
	    default:
		return ((char *)0);
	}
    }
    return (cp);
}

/*}}}   */
/*{{{  rxp_create_replacement */

RxpError
rxp_create_replacement( char            *mask,
			const RxpMatch   data,
			char           **result  )
{
    /*{{{  declare */
    
    char *cp, *pp, *bp = mask;
    int len = 0;
    int which = 0;
    int format = 0;
    int padding = -1;
    int base_len;
    int delim = ALLOW_rep_alts ? OR_SYM : 0;
    int i;
    
    /*}}}   */

    /*{{{  check arguments */
    
    if (!mask || !result)
	return (RxpNullArg);
    
    /*}}}   */
    /*{{{  find the length */
    
    cp = mask;
    bp = cp;
    base_len = data.bptr[0] - data.fptr[0];
    /*{{{  locate to the correct alternative */
    
    if (ALLOW_rep_alts && (data.which >= 0))
    {
	while (*cp && (which < data.which))
	{
	    while (*cp && (*cp != OR_SYM))
	    {
		if ((*cp == ESCAPE) && (*(cp + 1) == OR_SYM))
		   cp += 2;
		else
		   cp++;
	    }
	    if (*cp == OR_SYM)
	    {
	       cp++;
	       which++;
	       bp = cp;
	    }
	}
	cp = bp;
    }
    
    /*}}}   */
    /*{{{  recognise format specifiers */
    
    if (ALLOW_justify && (*cp == ESCAPE))
    {
	if (!*(cp + 1))
	{
	    return (RxpBadEscape);
	}
	else if (*(cp + 1) == JUSTIFY_LEFT)
	{
	    format = 1;
	    cp += 2;
	    bp = cp;
	}
	else if (*(cp + 1) == JUSTIFY_CENTRE)
	{
	    format = 2;
	    cp += 2;
	    bp = cp;
	}
	else if (*(cp + 1) == JUSTIFY_RIGHT)
	{
	    format = -1;
	    cp += 2;
	    bp = cp;
	}
    }
    
    /*}}}   */
    while (*cp && (*cp != delim))
    {
	/*{{{  use extended substitutions as appropriate */
	
	if (*cp == ESCAPE)
	{
	    cp++;
	    if (!*cp)
	    {
		return (RxpBadEscape);
	    }
	    else if (*cp == delim)
	    {
		len++;
		cp++;
	    }
	    else if (ALLOW_C_escapes && strchr("btrnxX",*cp))
	    {
		len++;
		if ((*cp & 0xDF) == 'X')
		    cp += 3;
		else
		    cp++;
	    }
	    else
	    {
		int max = 10000;
		int min = 0;
		char *sav = cp;
	
		/*{{{  skip over any valid modifiers and slices */
		
		if (ALLOW_modifiers)
		{
		    switch (*cp)
		    {
			case BLANKOUT:
			case TOUPPER:
			case TOLOWER:
			case INITCAP:
			    cp++;
			default:
			    break;
		    }
		}
		
		if (ALLOW_slices)
		{
		    cp = scan_slice(cp,&min,&max);
		    if (!cp)
			return (RxpBadRange);
		}
		
		/*}}}   */
	
		if (isdigit(*cp))
		{
		    if (!ALLOW_groups && !(*cp == '0'))
		    {
			return (RxpBadCapture);
		    }
		    else
		    {
			int i = *cp - '0';
			int count = data.fptr[i] ? data.bptr[i] - data.fptr[i] : 0;
			int trunc = count - max - 1;
	
			if (trunc > 0)
			    count -= trunc;
			if (min > count)
			    count = 0;
			else
			    count -= min;
			len += count;
			cp++;
		    }
		}
		else
		{
		    cp = sav + 1;
		    len++;
		}
	    }
	}
	else
	{
	    len++;
	    cp++;
	}
	
	/*}}}   */
    }
    /*{{{  implement any format specification */
    
    if (format && (len < base_len))
    {
	padding = base_len - len;
	len = base_len;
    }
    
    /*}}}   */
    
    /*}}}   */
    if ((*result = (char *)MALLOC(len + 1)) == (char *)0)
	return (RxpAllocFail);
    /*{{{  copy the replacement */
    
    cp = bp;
    pp = *result;
    /*{{{  add any necessary leading space */
    
    if ((format == -1) && (padding > 0))
	for (i = 0; i < padding; ++i)
	{
	    *pp = ' ';
	    pp++;
	}
    if ((format == 2) && (padding > 0))
	for (i = 0; i < (padding >> 1); ++i)
	{
	    *pp = ' ';
	    pp++;
	}
    
    /*}}}   */
    while (*cp && (*cp != delim))
    {
	/*{{{  use extended substitutions as appropriate */
	
	if (*cp == ESCAPE)
	{
	    cp++;
	    if (*cp == delim)
	    {
		*pp = OR_SYM;
		pp++;
		cp++;
	    }
	    else if (ALLOW_C_escapes && strchr("btrnxX",*cp))
	    {
		if ((*cp & 0xDF) == 'X')
		{
		    int hex = scan_hex(cp + 1);
	
		    if (hex >= 0)
		    {
			*pp = hex;
			pp++;
			cp += 3;
		    }
		    else
		    {
			PHREE(__LINE__,*result);
			return (RxpBadHex);
		    }
		}
		else
		{
		    *pp = c_escape(*cp);
		    pp++;
		    cp++;
		}
	    }
	    else
	    {
		int min = 0;
		int max = 10000;
		int modifier = FALSE;
		char *sav = cp;
	
		/*{{{  read any initial modifier and / or slice */
		
		if (ALLOW_modifiers)
		{
		    switch (*cp)
		    {
			case BLANKOUT:
			case TOUPPER:
			case TOLOWER:
			case INITCAP:
			    modifier = *cp;
			    cp++;
			default:
			    break;
		    }
		}
		if (ALLOW_slices)
		    cp = scan_slice(cp,&min,&max);
		
		/*}}}   */
		if (isdigit(*cp))
		{
		    int i = *cp - '0';
		    char *gp = data.fptr[i];
		    int count = gp ?  data.bptr[i] - gp : 0;
		    int trunc = count - max - 1;
	
		    if (trunc > 0)
			count -= trunc;
		    if (min > count)
			gp = (char *)0;
		    else
		    {
			gp += min;
			count -= min;
		    }
		    if (gp)
		    {
			for (i = 0; i < count; ++i)
			{
			    /*{{{  copy string, modifying as reqd. */
			    
			    switch (modifier)
			    {
				case INITCAP:
				    *pp = ucase(*gp);
				    modifier = TOLOWER;
				    break;
				case TOUPPER:
				    *pp = ucase(*gp);
				    break;
				case TOLOWER:
				    *pp = lcase(*gp);
				    break;
				case BLANKOUT:
				    *pp = ' ';
				    break;
				default:
				    *pp = *gp;
				    break;
			    }
			    gp++;
			    pp++;
			    
			    /*}}}   */
			}
		    }
		    cp++;
		}
		else
		{
		    cp = sav;
		    *pp = *cp;
		    pp++;
		    cp++;
		}
	    }
	}
	else
	{
	    *pp = *cp;
	    pp++;
	    cp++;
	}
	
	/*}}}   */
    }
    /*{{{  add any necessary trailing space */
    
    if ((format == 1) && (padding > 0))
	for (i = 0; i < padding; ++i)
	{
	    *pp = ' ';
	    pp++;
	}
    if ((format == 2) && (padding > 0))
	for (i = 0; i < (padding - (padding >> 1)); ++i)
	{
	    *pp = ' ';
	    pp++;
	}
    
    /*}}}   */
    *pp = '\0';
    
    /*}}}   */
    return (RxpOK);
}

/*}}}   */

/*}}}   */
