{$F+,O+,R+,B-,S+,I+,V-}

{ A Borland Pascal 7.0 Unit for reading and using
  FoxPro's .CDX compact compound index files.

  This unit was written by Trevor Magnusson and Peter Heweston,
  from HYDSYS Pty Ltd in Canberra, Australia.

  You are free to use, modify and distribute this code.
  However, if you distribute it to other programmers,
  we request that you acknowlege the original authors.

  Most of the data structures have been written from descriptions in
  "FoxPro 2 Programming Guide" by Michael Antonovich,
  from the Lance A. Leventhal MICROTREND Series.
  The discussions in this book of compound compact indexes were
  far from complete and contained a few minor inaccuracies.

  Using the unit is quite easy.  We assume that you have code to read
  .DBF files.  Declare a variable of type TdbCDX, and call dbOpenCDX
  before doing anything else.  You must call dbFindCDX before any calls
  to dbNextCDX.  Calling dbFindCDX with a find string of '' and soft=true
  will return the first key entry.  dbNextCDX will return subsequent
  entries, until there are no more.  Please call dbCloseCDX when finished,
  to release system memory and close the CDX file handle.

  As indexes can contain references to deleted records, we suggest that
  an actual application would provide an extra layer around our
  dbFindCDX and dbNextCDX routines.  Such a layer would execute a loop
  that read in a record from the actual .DBF file, and verify that the
  record was not deleted.  If it was, the loop would call the dbNextCDX
  function and repeat.

  Here is a skeleton program that demonstrates the unit.
  You will have to modify the .CDX file name, tag name, and
  index expression to match your own file.

  program CDXDemo;
  uses CDX;
  var dbcdx : tdbCDX;
      recno : longint;
  begin
    dbOpenCDX (dbCDX,'c:\dbf\accounts','tag1','CLIENTCODE');
    if dbFindCDX (dbCDX,'',true,recno) then begin
      writeln ('record ',recno:5,' key [',dbcdx^.keystr,']');
      while dbNextCDX (dbCDX,recno) do
        writeln ('record ',recno:5,' key [',dbcdx^.keystr,']');
    end;
    dbCloseCDX (dbCDX);
  end.

  The files we used to debug and test this unit were generated using
  Clipper 5.01 and the SuccessWare SIx Driver library.  We have not
  accessed any files generated by FoxPro.

  Comments and suggestions would be appreciated.
  You can reach Trevor Magnusson and Peter Heweston
  on CIS 100032,503.
  }

Unit cdx;
Interface
Uses crt,dos;

type
     {index header}
     TCDXHeader = record
       RootPtr    : LongInt;                {byte offset to root node}
       FreePtr    : LongInt;                {byte offset to next free block}
       Reserve1   : array [1..4] of Byte;
       KeyLength  : Word;                   {length of key}
       IndexOpts  : Byte;
       {bit field :   1 = unique
                      8 = FOR clause
                     32 = compact index
                     64 = compound index
                    128 = internal tag index, WE THINK! }

       Reserve2   : Byte;
       Reserve3   : array [1..486] of Byte;
       AscDesc    : Word;                   {0 = ascending; 1=descending}
       Reserve4   : Word;
       ForExpLen  : Word;                   {length of FOR clause}
       Reserve5   : Word;
       KeyExpLen  : Word;                   {length of index expression}
     end;

     {the block following the header with the key and for expression}
     TKeyForBlk = array [1..512] of char;

     { In .CDX files, there are two types of node : interior and exterior.
       Interior nodes contain keyed pointers to other nodes,
       exterior nodes point to actual data items in the indexed database.

       Furthermore, there may be many indexes, (using different keys) in a
       single .CDX file.  To manage this, there is a 'master' index of
       'tags' at the beginning of the file.  Different indexes are identified
       by their 'tag' - a 10 char label.  The index at the top of the file,
       is keyed on these tags, and the key entries point to the index header
       for each particular tag.  Although this master (or tag) index points
       to data within the .CDX file, it is structured as an exterior node.

       Interior nodes are quite simple, very similar to .IDX file nodes.
       The ony difference is that instead of the 4-byte file offset pointer
       following the key data immediately, there is an extra four bytes,
       and the offset pointer follows that.  We are not sure what the
       intervening four bytes are actually for, but they must be skipped!

       Exterior nodes are more complex, because they are compressed.
       The key data is split, with a run of small records from the start of
       the buffer, containing record number, duplicate count, and trailing
       byte count.  Starting at the end of the buffer, and working backwards,
       is the compressed key data.  All trailing bytes (spaces) are removed.
       The number of trailing bytes removed is stored in the associated
       record at the start of the buffer.  If any of the first characters
       in the key are the same as for the previous key, they are omitted, and
       the number is stored in the associated record at the start of the
       buffer.  So the key data starts at each end of the buffer and works
       toward the center.

       The records at the start (recno, dups, trails) are stored in as few
       bytes as possible.  This means we have to pull off values as variable
       length bit fields.  This is tricky. }

     {information common to both node types - at the start of both of them}
     TNodeComm  = record
       NodeType   : Word;
       { 0 : Index node     - interior
         1 : Root node      - interior
         2 : Leaf node      - exterior
         3 : Root-Leaf node - exterior }
       NumKeys    : Word;    { number of keys in this node }
       PriorPtr   : LongInt; { byte offset of previous node at this level }
       NextPtr    : LongInt; { byte offset of next node at this level }
     end;

     { The fact that nodes of a particular level are 'threaded' together
       means that to do serial indexed access, we do not need to bother
       with 'tree-walking', we can simply skim along at the bottom level.}

     {key data for interior nodes}
     TIntKData  = array [1..500] of Byte;

     {shortened key data for exterior nodes}
     TExtKData  = array [1..488] of Byte;

     {extra data for external nodes}
     TExtExtra  = record
       FreeSpace  : Word;    {free space in this key}
       RecNumMask : LongInt; {bit mask for record number}
       DupCntMask : Byte;    {bit mask for duplicate byte count}
       TrlCntMask : Byte;    {bit mask for trailing bytes count}
       RecNumBits : Byte;    {num of bits used for record number}
       DupCntBits : Byte;    {num of bits used for duplicate count}
       TrlCntBits : Byte;    {num of bits used for trail count}
       ShortBytes : Byte;    {bytes needed to store recno+dups+trail}
     end;

     {variant record for nodes - interior / exterior}
     TNode      = record
       Common     : TNodeComm;
       case integer of
         1 : (IntData : TIntKData);
         2 : (Extra   : TExtExtra;
              ExtData : TExtKData);
     end;

     {our record for handling CDX files}
     TDBCDXDat = record
       cdxFName   : string;     {name of cdx file}
       cdxfp      : file;       {file handle}
       cdxTag     : string;     {tag name to load when opening}
       cdxExp     : string;     {index expression, must be same as in file}
       cdxHeader  : TCDXHeader; {header - used for both tag index and actual}
       cdxNode    : TNode;      {node record, for both interior and exterior}

       KeyStr     : string;     {last constructed key}
       KeyCtr     : integer;    {last constructed key number}
       KeyPos     : integer;    {position in buffer of last key data}
       KeyRNo     : LongInt;    {record number for last key item}
     end;

     {a pointer to the structure, to conserve static memory}
     TDBCDX = ^TDBCDXDat;


{FUNCTIONS AND PROCEDURES:}

procedure dbOpenCDX (var cdx   : TdbCDX;   {our record}
                         xname : string;   {name of .CDX file}
                         xtag  : string;   {tag name}
                         xxpr  : string);  {index expression:
                                            USED FOR VERIFICATION ONLY}
{Opens and initialises a CDX index file}


function dbFindCDX (var CDX   : TdbCDX;    {our record}
                        kexp  : string;    {string to find}
                        soft  : boolean;   {allow partial find}
                    var recno : LongInt) : boolean;
{Tries to find a key string in an index.  Returns false if could not find }
{ If soft is true, a partial find will succeed. }
{ An empty key expression, with soft set true, will return first index}


function dbNextCDX (var cdx : TdbCDX; var recno : LongInt) : boolean;
{advances one along the index, if possible.  If no chain, go to first }


procedure dbCloseCDX (var cdx : TdbCDX);
{Closes a CDX index file, dispose all dynamic data}



Implementation



{ some general purpose routines }

procedure Abort (mess : string);
{terminates abnormally, with a message }
begin
  writeln;
  writeln (mess);
  writeln ('Program aborting...');
  halt (255);
end; {abort}

procedure Assrt (cond : boolean; mess : string);
{checks a condition, and if it is not true, aborts with a message}
begin
  if not cond then abort (mess);
end; {assrt}

procedure RTrim (var s : string);
{trims any trailing blanks from a string}
begin
  while (length (s) > 0) and (s [length (s)] = ' ') do delete (s,length(s),1);
end; {rtrim}

procedure CapStr (var s : string);
{capitalise a string}
var i:integer;
begin
  for i := 1 to length (s) do s [i] := UpCase (s [i]);
end; {numstr}

function NumStr (n : real; len,dec : integer) : string;
{returns the string representation of a number}
var s : string;
begin
  str (n:len:dec,s);
  NumStr := s;
end; {numstr}




procedure CrackCode  (var buf;
                          len    : integer;
                          rnbits : Byte;
                          dubits : Byte;
                          trbits : Byte;
                          rnmask : LongInt;
                          dumask : Byte;
                          trmask : Byte;
                      var recnum : LongInt;
                      var dups   : Byte;
                      var trail  : Byte);
{decodes the recnum, duplicates and trailing byte from bit-packed record}
type tbarr = array [1..6] of Byte;
     taptr = ^tbarr;

var  ap : taptr;
     l  : LongInt;
     bs : integer;
begin
  {point a manageable array at the amorphous untyped variable presented}
  ap := @buf;

  {here is where the black magic is invoked...}

  {trailing bytes}
  trail := (ap^ [len] shr (8-trbits)) AND trmask;

  {duplicate bytes}
  bs := 16 - trbits - dubits;
  dups := ((ap^ [len] shl (8 - bs)) + (ap^ [len-1] shr bs)) AND dumask;

  {record number}
  Move (buf,l,len);
  recnum := l AND rnmask;
end; {crackcode}



procedure InitExtNodeScan (var cdx : TdbCDX);
{Initialises a running scan of compacted key data}
begin
  with cdx^ do begin
    KeyPos := 489;  {end of key data - work backwards to reconstruct the keys}
    KeyStr := '';   {initaial key value is nil}
    KeyCtr := 1;    {first key in index node}
  end;
end; {initextnodescan}

procedure NextExtNodeKey (var cdx : TdbCDX; var NoMore : boolean);
{Pulls of next key value in a running scan of compacted key data}
var spo : integer;
    rno : LongInt;
    dup : Byte;
    tra : Byte;
    k   : integer;

begin
  with cdx^,cdxnode do begin

    NoMore := KeyCtr > common.numkeys;
    if NOT NoMore then begin

     {crack the bit-compressed record of key control data}
     spo := (KeyCtr-1)*extra.shortbytes+1;
     with extra do
       CrackCode (ExtData [spo], shortbytes,
                  RecNumBits, DupCntBits, TrlCntBits,
                  RecNumMask, DupCntMask, TrlCntMask,
                  KeyRNo,     dup,        tra);

     {reconstruct the actual key string}
     KeyPos := KeyPos - cdxHeader.KeyLength + dup + tra;
     KeyStr := copy (KeyStr,1,dup);
     for k := KeyPos to KeyPos + cdxHeader.keylength - dup - tra - 1 do
       KeyStr := KeyStr + chr (extdata [k]);
     for k := 1 to tra do KeyStr := KeyStr + ' ';

     {advance key counter for next time}
     KeyCtr := KeyCtr + 1;
    end;
  end;
end; {nextextnodekey}


function ExtNodeFind (var cdx    : TdbCDX;
                          fstr   : string;
                          soft   : boolean;
                      var hit    : boolean;
                      var fin    : boolean;
                      var ovr    : boolean) : LongInt;
{Attempts to find a value in an exterior node of keys}
var tst : string;
begin
  rtrim  (fstr);
  capstr (fstr);
  ExtNodeFind := -1;

  with cdx^,cdxNode do begin
    InitExtNodeScan (cdx);
    hit := false;
    fin := false;
    ovr := false;
    repeat
      NextExtNodeKey (cdx,fin);
      if Not fin then begin

        tst := KeyStr;
        RTrim  (tst);
        CapStr (tst);

        if (tst = fstr) OR ((pos (fstr,tst) = 1) and soft) then begin
          {a direct hit, or failing that, a partial hit, if allowed}
          hit := true;
          ExtNodeFind := KeyRNo;
        end else
        if (tst > fstr) or (Length (fstr) = 0) then begin
          { we have overrun the value we want,
            in the case of a nil string, this is the first value in the node}
          ovr := true;
          hit := soft;
          ExtNodeFind := KeyRNo;
        end;
      end; {if not fin}

    until hit or fin or ovr;
  end;
end; {extnodefind}


function IntNodeFind (var Node:TNode; KeyLen:integer; fstr:string) : LongInt;
{Attempts to find a value (or next past it) in an interior node of keys}
var kct : integer;
    fnd : boolean;
    fin : boolean;
    key : string;
    kvl : LongInt;
    spo : integer;
    i   : integer;
begin
  IntNodeFind := -1;
  with Node do begin
    kct := 1;
    fnd := false;
    fin := false;
    key := '';
    for i := 1 to KeyLen do key := key + ' ';
    repeat
      spo := (kct - 1) * (KeyLen + 8{4}) + 1;
      Move (IntData [spo], key [1], KeyLen);

      {in interior nodes, we do not worry abut direct hits}
      fnd := key >= fstr;

      if fnd then begin
        spo := spo + KeyLen;
        kvl := 0;

        {skip four intervenind bytes, take the next four to be the pointer}
        for i := spo+4 to spo+7 do kvl := kvl * 256 + IntData [i];

        IntNodeFind := kvl;
      end else begin
        kct := kct + 1;
        fin := kct > Common.NumKeys;
      end;
    until fin or fnd;
  end;
end; {intnodefind}


procedure dbOpenCDX (var cdx   : TdbCDX;
                         xname : string;
                         xtag  : string;
                         xxpr  : string);
{ Opens and initialises a CDX index file}
var ers : string;
    ern : integer;
    buf : TKeyForBlk;
    spo : integer;
    i   : integer;
    kxp : string;
    hit,fin,ovr : boolean;


  procedure treat (var s : string);
  {get rid of any spaces and capitalise a string}
  var j : integer;
  begin
    while pos (' ',s) <> 0 do delete (s,pos (' ',s),1);
    for j := 1 to length (s) do s [j] := upcase (s [j]);
  end;

begin
  New (cdx);
  with cdx^ do begin

    {keep tag}
    cdxTag := xtag;

    {open file, read header}
    CapStr (xname);
    if pos ('.CDX',xname) = 0 then xname := xname {}+ '.CDX';
    cdxFName := xname;

    Assign (cdxfp,cdxfname);
    {$I-} Reset (cdxfp,512); {$I+}
    Assrt (ioresult = 0,'Could not open CDX file ['+cdxfname+']');

    BlockRead (cdxfp, cdxheader, 1);

    { Lazy assertion #1:
      We think that the 128-bit means a header to an index of index tags }
    assrt (((cdxheader.indexopts AND  64) = 64) and
           ((cdxheader.indexopts AND  32) = 32) and
           ((cdxheader.indexopts AND 128) = 128),
           'CDX file ['+xname+'] - unexpected Tag Header index options '+
           NumStr (cdxheader.indexopts,3,0));

    {read the root node of index tags}
    Seek (cdxfp, cdxheader.RootPtr div 512);
    BlockRead (cdxfp, cdxNode, 1);

    { Lazy assertion #2:
      We think it highly unlikely that there will be any
      compound interior nodes in the tag index - we only handle
      type 3 (root-leaf) nodes for the tag index. }
    Assrt (cdxNode.Common.NodeType = 3,
           'CDX file ['+xname+'] - unexpeced tag-root-node type '+
           NumStr (cdxNode.Common.NodeType,3,0));

    {find the tag in the tag index, so that we can find OUR header}
    cdxHeader.RootPtr := ExtNodeFind (cdx,xtag,false,hit,fin,ovr);
    assrt (cdxHeader.RootPtr > 0,
          'CDX file ['+xname+'] - could not find tag ['+xtag+']');

    {read the header of the index we really want}
    Seek (cdxfp, cdxheader.RootPtr div 512);
    BlockRead (cdxfp, cdxheader, 1);

    {assrt the type of index is as we expect}
    Assrt (((cdxheader.indexopts AND  64) = 64) and
           ((cdxheader.indexopts AND  32) = 32) and
           ((cdxheader.indexopts AND 128) = 0),
           'CDX file ['+xname+'] - unexpected index options '+
           NumStr (cdxheader.indexopts,3,0));

    {read in the following block, with FOR clause and index key expression}
    BlockRead (cdxfp, buf, 1);
    spo := 1;
    if cdxheader.ForExpLen <> 1 then spo := cdxheader.ForExpLen + 1;
    kxp := '';
    for i := spo to spo + cdxheader.KeyExpLen - 2 do kxp := kxp + buf [i];

    Treat (kxp);
    Treat (xxpr);
    if (kxp<>xxpr)
      then Abort (
           'CDX file ['+xname+'] tag ['+xtag+'] index mismatch:'+#13+#10+
           'Expected ['+xxpr+']'+#13+#10+
           'Found    ['+kxp+']');
  end;
end; {dnopencdx}


function dbFindCDX (var CDX   : TdbCDX;
                        kexp  : string;
                        soft  : boolean;
                    var recno : LongInt) : boolean;
{ Tries to find a key string in an index.  Returns false if could not find }
var overshot : boolean;
    keyptr   : LongInt;
    hit,fin,ovr : boolean;
begin
  with cdx^ do begin
    dbFindCDX := false;
    recno := -1;

    { read the root node }
    Seek (cdxfp, cdxheader.RootPtr div 512);
    BlockRead (cdxfp, cdxnode, 1);

    { travel down any interior nodes until we get to an exterior one }
    overshot := false;
    while (cdxNode.Common.NodeType in [0,1]) and (NOT overshot) do begin
      keyptr := IntNodeFind (cdxNode,cdxHeader.KeyLength,kexp);
      overshot := keyptr = -1;

      {read in the next node}
      if not overshot then begin
        Seek (cdxfp, keyptr div 512);
        BlockRead (cdxfp, cdxnode, 1);
      end;
    end;

    if NOT overshot then begin

      { assert we actually have an exterior node }
      if NOT (cdxNode.Common.NodeType in [2,3])
        then Abort (
             'CDX file ['+cdxfname+'] tag ['+cdxtag+'] node type error:'+
              NumStr (cdxNode.Common.NodeType,5,0));

      recno := ExtNodeFind (cdx,kexp,soft,hit,fin,ovr);
      dbFindCDX := hit;

    end;
  end;
end; {dbfindcdx}


function dbNextCDX (var cdx : TdbCDX; var recno : LongInt) : boolean;
{ advances one along the index, if possible. }
var nomore : boolean;
begin
  dbNextCDX := false;
  recno := -1;
  with cdx^ do begin
    NextExtNodeKey (cdx,NoMore);
    if NoMore then begin
      if cdxNode.common.NextPtr <> -1 then begin

        {go to the next node}
        Seek (cdxfp, cdxNode.common.NextPtr div 512);
        BlockRead (cdxfp, cdxnode, 1);

        {lazy assrtion: we assume it is a leaf node!}
        if NOT (cdxNode.Common.NodeType in [2,3])
          then Abort (
               'CDX file ['+cdxfname+'] tag ['+cdxtag+'] NextNode type error:'+
                NumStr (cdxNode.Common.NodeType,5,0));

        InitExtNodeScan (cdx);
        NextExtNodeKey (cdx,NoMore);
      end;
    end;

    if Not NoMore then begin
      dbNextCDX := true;
      recno := KeyRNo;
    end;
  end;
end; {dbnexcdx}


procedure dbCloseCDX (var cdx : TdbCDX);
{closes and disposes}
begin
  Close (cdx^.cdxfp);
  Dispose (cdx);
end; {dbclosecdx}


End. {unit cdx}
