/*
 *  waislook.c
 *
 *  WAIS search driver.  Based on John Franks' WAISGN program.
 *
 *  (C) Copyright 1994 The University Court of the University of Edinburgh
 *  (C) Copyright 1994 John Franks
 *
 *  Author:  Chris Adie <C.J.Adie@ed.ac.uk> 
 *
 */

/******************************************************************************/
/* INCLUDE FILES                                                              */
/******************************************************************************/

#include <stdio.h>
#include <io.h>
#include <ctype.h>
#include <string.h>
#include <math.h>
#include "cutil.h"   
#include "irext.h"
#include "irsearch.h"
#include "docid.h"
#include "irtfiles.h"
#include "waisgn.h"
#include "version.h"

/******************************************************************************/
/* CONSTANT DEFINITIONS                                                       */
/******************************************************************************/

/* This section deliberately left blank */

/******************************************************************************/
/* MACRO FUNCTION DEFINITIONS                                                 */
/******************************************************************************/


/******************************************************************************/
/* TYPE DEFINITIONS                                                           */
/******************************************************************************/

/* This section deliberately left blank */

/******************************************************************************/
/* GLOBAL VARIABLES AND FUNCTIONS IMPORTED                                    */
/******************************************************************************/

/* The following are defined in irsearch.c */
extern boolean search_for_words(char *words,database *db,long doc_id,char *words_used);
extern long next_best_hit(hit *besthit,database *db);
extern char FileTypeFromTable(char *FileName);

/******************************************************************************/
/* GLOBAL VARIABLES EXPORTED                                                  */
/******************************************************************************/

/* char *log_file_name = NULL; */

/******************************************************************************/
/* VARIABLES PRIVATE TO THIS FILE                                             */
/******************************************************************************/

static char     host[MIDLEN] = "";
static char     dbname[MIDLEN] = "index";
static char     gntype[SMALLLEN] = "7w";
static char     words[MAXLEN] = "";
static char     gntitle[MAXLEN] = "";
static char     port[SMALLLEN] = "";
static char     virtpath[MAXLEN] = "";

static FILE *dfp = stderr;

static FILE *logfile = NULL;

static enum {interactive, http, gopher} mode = interactive;

static int    usedir = FALSE;
static int    userange = FALSE;
static int    is0h = FALSE;
static int    debug = FALSE;

static double MaxRawScore = 0.0;

/******************************************************************************/
/* FUNCTIONS PRIVATE TO THIS FILE                                             */
/******************************************************************************/

/*
 *  Change backslashes in the path to forward slashes, and then apply the URL
 *  character escaping rules.
 */
static void EscapeURL(char *path) {
char   *cp;
char   *cp2;
char   buf[BUFSIZE];

    /* Change backslashes to forward slashes */
    cp = path;
    while (*cp) {
        if (*cp=='\\') {
            *cp = '/';
        }
        cp++;
    }
    /* Apply character escaping rules */
    cp = path;
    cp2 = buf;
    while ( *cp ) {
        switch (*cp) {
        case ',':
        case ';':
        case '"':
        case '\'':
        case '&':
        case '=':
        case '(':
        case ')':
        case '{':
        case '}':
        case '%':
        case ' ':
            sprintf( cp2, "%%%X", (int) *cp);
            cp2 += 3;
            cp++;
            break;
        default:
            *cp2++ = *cp++;
        }
    }
    *cp2 = '\0';
    strcpy(path,buf);
}


/*
 *  For a particular match, produce output according to the selected protocol.
 */
static void doline(hit * match) {
int score;
int size;
char    *cp,
        *relpath,
        type1,
        typebuf[MIDLEN],
        pathbuf[MAXLEN],
        relpathbuf[MAXLEN],
        name[MAXLEN];

    strcpy( pathbuf, match->filename);
    relpath = pathbuf;

    strcpy( relpathbuf, relpath);

    strcpy( name, match->headline);
    cp = name;
    while ( *cp) {
        if ( isspace(*cp))
            *cp = ' ';
        cp++;
    }

    if (MaxRawScore>0.0) {
        score = (int)((match->weight / MaxRawScore) * 1000.0);
    } else {
        score = 0;
    }

    size = match->end_character-match->start_character;

        if ( userange) {
            type1 = '0';
            sprintf( typebuf, "R%ld-%ld-%range",
                match->start_character, match->end_character);
        }

        else if ( *(match->type) == 'D') {  /* DVI type */
            type1 = '9';
            strcpy( typebuf, "9");
        }
        else if ( is0h) {  /* 0h type */
            type1 = '0';
            strcpy( typebuf, "0h");
        }
        else if ( usedir) {  /* Return directory containing file */
            type1 = '1';
            strcpy( typebuf, "1");
        }
        else {
            type1 = '0';
            strcpy( typebuf, "0");
            if (mode==gopher) {
                type1 = FileTypeFromTable(relpathbuf);
                typebuf[0] = type1;
                typebuf[1] = '\0';
            }            
        }

    if (mode==http) {
        EscapeURL(relpathbuf);
        printf("<li> <a href=\"http://%s:%s", host, port);
        if (virtpath[0]!='\0') {
            printf("/%s",virtpath);
        }
        printf("/%s\">%s</a><BR>(Score=%d, Size=%d)\n", relpathbuf, name, score, size);
    } else
    if (mode==gopher) {
        printf("%c%s\t%s\\%s\t%s\t%s\r\n",type1, name, typebuf, relpathbuf, host, port);
    } else {
        printf("%s %s\n",relpathbuf,name);
    }
}



int senderr(char *msg) {
    if (mode==http) {
        printf( "<HEAD> <TITLE>%s</TITLE> </HEAD>\n", msg);
        printf( "<BODY><HR><H2>%s</H2>\n", msg );
        printf( "Sorry, an error has occurred in");
        printf( " the WAIS index search.\n<HR></BODY>\n");
    } else
    if (mode==gopher) {
        printf( "3Server error: %s\t\terror.host\t0\r\n.\r\n", msg);
    } else {
        printf("An error has occurred in the search: %s\n",msg);
    }
    return 0;
}


static void httpintro(void) {
    printf( "<HEAD> <TITLE>%s</TITLE> </HEAD>\n", gntitle);
    printf( "<BODY><HR><H2>%s</H2>\n", gntitle);
    printf( "The following items were returned as matches\n");
    printf( "for <B>`%s'</B> by the WAIS index search.\n", words);
    printf( "They are ordered with the best matches first.\n");
    printf( "<P>\n<UL>\n", words);
}


static void toobad(char *CatalogURL) {
    printf( "<HEAD> <TITLE>%s</TITLE> </HEAD>\n", gntitle);
    printf( "<BODY><HR><H2>%s</H2>\n", gntitle);
    printf( "Sorry, no matches for <B>`%s'</B> were returned\n", words);
    printf( "by the WAIS index search.  You may try again with\n");
    printf( "different search terms.\n <ISINDEX> \n");
    printf( "<p>The <a href=\"%s\">catalog of this WAIS database</a>\n",CatalogURL);
    printf( "may be helpful.</BODY>\n");
}

/*
 * This function is based very loosely on a function of the same
 * name in Don Gilbert's Go_Ask_WAIS utility.  I am very grateful
 * for the help in dealing with WAIS that his routine has provided
 * provided and for his kind permission to use it here.  Any errors are
 * mine and not Don's.  JMF
 */
static void AskWais(char *SearchWords,int maxhits) {
database        *db;
long            i;
query_parameter_type    parameters;
boolean         searchResult;
hit             theHit;
char            CatalogURL[MAXLEN];
char            *p;
          
    if ( debug)
        fprintf( dfp, "Opening data base %s\n", dbname);
    strcpy(CatalogURL, dbname);
    strcat(CatalogURL, dictionary_ext);
    if (_access(CatalogURL, 0) == -1) {
        senderr( "The database does not exist\n");
        exit( 2);
    }    
    if ( (db = openDatabase(dbname, false, true)) == (database *) NULL) {
        senderr( "Failed to open database\n");
        exit( 2);
    }
 
    parameters.max_hit_retrieved = ((maxhits > 0) ? maxhits : 256);
    set_query_parameter(SET_MAX_RETRIEVED_MASK, &parameters);
    searchResult = search_for_words(SearchWords, db, 0, NULL);

    /* Initialise the maximum raw hit weight */
    MaxRawScore = 0.0;
    
    if (searchResult == true) {
        finished_search_word(db);
        if ( debug)
            fprintf( dfp, "Dbase search successful\n");

        if (next_best_hit(&theHit, db) != 0) {
            if ( debug)
                fprintf( dfp, "Headline = %s\n",
                    theHit.headline);
            if (mode==http) {
                /* Generate the (relative) URL of the catalog. */
                p = strrchr(dbname,'\\');
                if (p==NULL) p = dbname; else p++;
                strncpy(CatalogURL,p,sizeof(CatalogURL));
                strncat(CatalogURL,".cat",sizeof(CatalogURL));
                EscapeURL(CatalogURL);
                toobad(CatalogURL);
            } else
            if (mode==gopher) {
                printf( ".\r\n");
            } else {
                printf("No match found\n");
            }
            finished_best_hit(db);
            closeDatabase(db);
            return;
        }

        if (mode==http) {
            httpintro();
        }

        i = 1;
        do {
            if (theHit.weight > 0) {
                if (MaxRawScore<=0.0) {
                    MaxRawScore = theHit.weight;
                }
                doline(&theHit);
            }
            i++;
        } while ( i < parameters.max_hit_retrieved  && 
            (next_best_hit(&theHit, db) == 0));

        if (mode==http) {
            printf( "</ul>\n<P>You may repeat your search with\n");
            printf( "a new search term. <P> <ISINDEX></BODY>\n");
        }
    } else {
        senderr( "The database search failed.");
        exit( 2);
    }
    finished_best_hit(db);
    closeDatabase(db);
    return;
}


/******************************************************************************/
/* GLOBAL FUNCTIONS EXPORTED                                                  */
/******************************************************************************/

/*
 *  The main program.
 */
int main(int argc,char *argv[]) {
char *cp;
int i;
    
    if (argc<=1) {
        /* No arguments */
        printf("Usage: %s [-d dbname] [-h host] [-p port] [-debug]\n",argv[0]);
        printf("       [-http|-gopher] [-t title] [-q virtpath] [-v] search words ...\n");
        exit(0);
    }

    /* Collect the arguments */
    for (i=1;i<argc;i++) {        

        if (strcmp(argv[i],"-debug")==0) {
            debug = TRUE;
        } else
        if (strcmp(argv[i],"-h")==0) {
            strncpy(host,argv[++i],sizeof(host));
        } else
        if (strcmp(argv[i],"-p")==0) {
            strncpy(port,argv[++i],sizeof(port));
        } else
        if (strcmp(argv[i],"-d")==0) {
            strncpy(dbname,argv[++i],sizeof(dbname));
            /* Strip off trailing extension if any */
            cp = strrchr(dbname,'\\');
            if (cp==NULL) {
                cp = dbname;
            }
            cp = strrchr(cp,'.');
            if (cp!=NULL) {
                *cp = '\0';
            }
        } else
        if (strcmp(argv[i],"-t")==0) {
            strncpy(gntitle,argv[++i],sizeof(gntitle));
        } else
        if (strcmp(argv[i],"-v")==0) {
#ifdef WIN32
            printf("%s\n",VERWIN32);
            if (argc == 2)
              exit(0);
#endif
        } else
        if (strcmp(argv[i],"-http")==0) {
            mode = http;
        } else
        if (strcmp(argv[i],"-gopher")==0) {
            mode = gopher;
        } else
        if (strcmp(argv[i],"-q")==0) {
            strncpy(virtpath,argv[++i],sizeof(virtpath));
        } else
        if (argv[i][0]=='-') {
            printf("Unknown option %s\n",argv[i]);
            exit(0);
        } else {
            /* Remaining arguments are the words to search for. */
            *words =  '\0';
            while( i < argc) {
                strncat(words, argv[i], sizeof(words));
                if ( i < argc - 1 ) {
                    /* Single space between words */
                    strncat(words, " ",sizeof(words));
                }
                i++;
            }
        }

    }

    if ( debug) {
        fprintf( dfp, "Database: %s\n", dbname);
        if (*host) fprintf( dfp, "Host: %s\n", host);
        if (*port) fprintf( dfp, "Port: %s\n", port);
        fprintf( dfp, "Type: %s\n", gntype);
        if (*gntitle) fprintf( dfp, "Title: %s\n", gntitle);
        switch (mode) {
            case interactive: cp = "interactive"; break;
            case http: cp = "http"; break;
            case gopher: cp = "gopher"; break;
            default: cp = "unknown";
        }
        fprintf( dfp, "Protocol: %s\n", cp);
        fprintf( dfp, "Search term: %s\n", words);
    }

    /*
     * gntype is "7w", "7wr", or "7wh" according to whether 
     * it is plain text, a range, or type 0h.
     * If "7w" has a 'd' appended it means that instead
     * of returning the selector of the file matched, the selector of
     * the directory containing it should be returned.  It can also
     * be "7wd" indicating that the search should return
     * the directory containing the matching item rather than the
     * item itself.
     */

    switch ( gntype[2]) {
    case 'd':
        usedir = TRUE;
        break;
    case 'r':
        userange = TRUE;
        break;
    case 'h':
        is0h = TRUE;
        break;
    }

    AskWais( words, MAXHITS_RETURNED);

    return 0;
}



