/*
** Copyright (C) 1995, Enterprise Integration Technologies Corp.    
** All Rights Reserved.
** Kevin Hughes, kevinh@eit.com 
** 3/11/94
 *
 * Copyright 1995-1998 by Miles O'Neal, Austin, TX, USA.
 * 
 * All Rights Reserved, except as noted herein.
 * 
 * This software may be redistributed in any fashion you like, with only
 * the following limitations.
 * 
 *    1) You must credit the authors in the source code and accompanying
 *       documentation.
 * 
 *    2) You may not use any of the names of the authors or their employers
 *       in any associated advertising without explicit permission from the
 *       parties you wish to name.  Finding and contacting them is up to you.
 * 
 *    3) This copyright must be provided along with the documentation or
 *       code.
 * 
 *    4) The accompanying disclaimer must must be provided along with the
 *       documentation or code.
 * 
 *    5) You must also follow the terms of the enclosed LICENSE-EIT.
 * 
 * If you can actually use this to make money, more power to you.  Just
 * realize that the swish author is dedicated to keeping good, robust,
 * useful code freely available to the public, and that philosophy applies
 * to this package as well.
 * 
 * 
 * WARRANTY & DISCLAIMER
 * 
 * This software is presented as is, with no warranties expressed
 * or implied, including implied warranties of merchantability and
 * fitness.  In no event shall the authors, their institutions, or
 * any subsequent distributors be liable for any special, direct,
 * indirect or consequential damages whatsoever resulting from loss
 * of use, data or profits, whether in an action of contract,
 * negligence or other tortious action, arising out of or in connection
 * with the use or performance of this software. In other words, if
 * you don't like it, don't use it!
 * 
 * Bugs and feature requests, complaints, and so forth, brought to the
 * swish author's attention, will be considered.  Any modifications will
 * be made at the sole discretion of the author.
 * 
 * 
 * AUTHOR
 * 
 * Miles O'Neal
 * meo@rru.com
 * 
 * [Non-obscene suggestions for improvement to this copyright and disclaimer
 * are always welcome.  The intent is to keep control, simply so that nobody
 * else takes control.  This document should be concise and user-friendly.]
 * 
*/

#include "swish.h"
#include "check.h"
#include "hash.h"
#include "string.h"

/* Check if a file with a particular suffix should be indexed
** according to the settings in the configuration file.
*/

int isoksuffix(filename, rulelist)
    char *filename;
    struct swline *rulelist;
{
    int badfile;
    char *c, suffix[MAXSUFFIXLEN], checksuffix[MAXSUFFIXLEN];
    struct swline *tmplist;

    tmplist = rulelist;
    if (tmplist == NULL)
        return 1;
    if ((c = (char *) strrchr(filename, '.')) == NULL)
        return 0;

    badfile = 1;
    strcpy(checksuffix, c + 1);
    while (tmplist != NULL) {
        if ((c = (char *) strrchr(tmplist->line, '.')) == NULL)
            strcpy(suffix, tmplist->line);
        else
            strcpy(suffix, c + 1);
        if (lstrstr(suffix, checksuffix) && strlen(suffix) ==
          strlen(checksuffix))
            badfile = 0;
        tmplist = tmplist->next;
    }
    return !(badfile);
}

/* Check if a particular title should be ignored
** according to the settings in the configuration file.
*/

int isoktitle(title)
    char *title;
{
    int badfile;
    struct swline *tmplist;

    badfile = 0;
    tmplist = titconlist;
    while (tmplist != NULL) {
        if (lstrstr(title, tmplist->line)) {
            badfile = 1;
            break;
        }
        tmplist = tmplist->next;
    }
    if (badfile)
        return 0;
    else
        return 1;
}

/* Should a word be indexed? Consults the stopword hash list
** and checks if the word is of a reasonable length...
** If you have any good rules that can work with most languages,
** please let me know...
*/

int isokword(word)
      char *word;
{
    int i, same, hasnumber, hasvowel, hascons,
        numberrow, vowelrow, consrow;
    char lastchar;

    if (word[0] == '\0' || isstopword(word) ||
      strlen(word) < minwordlimit || strlen(word) > maxwordlimit)
        return 0;

    lastchar = ':';
    same = 0;
    hasnumber = hasvowel = hascons = 0;
    numberrow = vowelrow = consrow = 0;
    for (i = 0; word[i] != '\0'; i++) {
        if (word[i] == lastchar) {
            same++;
            if (same > ignoresame)
                return 0;
        } else {
            same = 0;
        }
        if (isdigit(word[i])) {
            hasnumber++;
            if (++numberrow > ignorerown)
                return 0;
            vowelrow = consrow = 0;
        } else if (isvowel(word[i])) {
            hasvowel++;
            if (++vowelrow > ignorerowv)
                return 0;
            numberrow = consrow = 0;
        } else if (!ispunct(word[i])) {
            hascons++;
            if (++consrow > ignorerowc)
                return 0;
            numberrow = vowelrow = 0;
        }
        lastchar = word[i];
    }

    if (ignoreallv)
        if (hasvowel && !hascons)
            return 0;
    if (ignoreallc)
        if (hascons && !hasvowel)
            return 0;
    if (ignorealln)
        if (hasnumber && !hasvowel && !hascons)
            return 0;

    return 1;
}

/* Does a word have valid characters?
*/

int hasokchars(word)
    char *word;
{
    int i;
    char c;

    c = word[strlen(word) - 1];
    if (! isbeginchar(word[0]))
        return 0;
    if (! isendchar(c))
        return 0;
    for (i = 0; word[i] != '\0'; i++)
        if (! iswordchar(word[i]))
            return 0;
    return 1;
}

/* Is a letter a vowel?
*/

int isvowel(c)
    char c;
{
    if (c == 'e' || c == 'a' || c == 'o' || c == 'i' || c == 'u')
        return 1;
    return 0;
}

/* This checks if a filename has one of the following suffixes:
** "htm", "HTM", "html", "HTML", "shtml", "SHTML".
*/

int ishtml(filename)
    char *filename;
{
    char *c, suffix[MAXSUFFIXLEN];
    register int i;

    c = (char *) strrchr(filename, '.');

    if (c == NULL)
        return 0;
    c++;
    for(i = 0; *c != '\0'; i++)
        *(suffix + i) = tolower(*c++);
    *(suffix + i) = '\0';
    if (suffix[0] == '\0')
        return 0;

    if (!strncmp(suffix, "htm", 3))
        return 1;
    else if (!strncmp(suffix, "shtml", 5))
        return 1;
    return 0;
}
