#!/usr/bin/perl -Tw

use CGI::Carp qw/fatalsToBrowser/;

# an Expression object representing the boolean expression
$boolean_query = '';

# the text error message for any syntax error
$syntaxerror = undef;

require "./search_config.pl";		# configuration
require "./Expression.pm";		# Expression class
require "./boolean.pl";			# Boolean functions

# try to determine the script path if not set in the file
$scriptpath = $scriptpath || $ENV{'SCRIPT_NAME'} || die "Cannot determine script path. Please set it in search_config.pl";


# If you change the charset, you must also modify sub HTMLEscape below
# See http://www.cert.org/tech_tips/malicious_code_mitigation.html
print "Content-type: text/html; charset=ISO-8859-1\n\n";


&decode_query_string;

# default query mode
$form{'words'} = 'any' unless $form{'words'} eq 'all' || $form{'words'} eq 
	'phrase' || $form{'words'} eq 'boolean';

$number_of_matches = 0;

# so we don't have to un-escape it later...
$user_keywords = $form{'keywords'}; 
# $user_keywords =~s/&/&amp;/g;
# $user_keywords =~s/</&lt;/g;
# $user_keywords =~s/>/&gt;/g;
&HTMLEscape(\$user_keywords);

# escape '\Q', '\E'
$form{'keywords'} =~s/\\([QE])/\\E\\\\$1\\Q/g;


if ($form{'words'} eq "boolean")
    {
	Expression->caseSensitive($form{'case'});
	Expression->howNear($how_near);
	
	
    $boolean_query = &parse(&encode($form{'keywords'}));
    }

# display hits from & to
$from = $form{'from'};
$to   = $form{'to'};

# default values
$from =~/^\d+$/ || ($from=0);
$to =~/^\d+$/  || ($to=$hits_per_page-1);    

# do not bother searching if there was a syntax error
if (!$syntaxerror)
    {
	
	# open the index file
    open (INDEX_FILE, $index_file) || die "Cannot open $index_file";
    while (<INDEX_FILE>)
	    {
		
		# search each record
	    &SearchInFile(split('\|', $_));
	
	    }

    close INDEX_FILE;

    }

# URL - encoded version of the keywords
$URLkeywords = $2 if $ENV{'QUERY_STRING'}=~/(^|&)keywords=(.*?)(&|$)/;
&HTMLEscape(\$URLkeywords);

# 'Next' link 
if (($from + $hits_per_page) < $number_of_matches)
    {
    $next_from = $from + $hits_per_page;
    $next_to = $next_from + $hits_per_page - 1;
    }
else
    {
    $next_from = $from;
    $next_to = $number_of_matches - 1;
    }
	
$prev_from = $from - $hits_per_page;


$next_url ="$scriptpath?keywords=$URLkeywords&words=$form{'words'}&from=$next_from&to=$next_to";

####### 'Previous' link #########
$prev_from = 0 if $prev_from < 0;
$prev_to = $prev_from + $hits_per_page - 1;
$prev_url = "$scriptpath?keywords=$URLkeywords&words=$form{'words'}&from=$prev_from&to=$prev_to";


####### 'From' and 'To' values displayed ###########
$display_from = $from + 1;
(($to <= ($number_of_matches-1)) && ($display_to=$to+1)) || ($display_to = $number_of_matches);


####### '1', '2', ... 'n' page links ###############
for ($pageno=1;$pageno <= ($number_of_matches / $hits_per_page + 1);$pageno++)
    {
	# do not display more than 20 pages
    ($pageno > 20) && last;
	
	# page select template
    $temp = $page_select_format;
	
	# the number of the pages
    $temp=~s/<!--NUMBER-->/$pageno/g;
    
	$page_from = $pageno * $hits_per_page - $hits_per_page ;
    $page_to= $page_from + $hits_per_page -1;
    if ($page_to >= ($number_of_matches - 1))
        {$page_to=$number_of_matches - 1}
    
	$page_path = "$scriptpath?keywords=$URLkeywords&words=$form{'words'}&from=$page_from&to=$page_to";
    
	# the path to the page
	$temp=~s/<!--PATH-->/$page_path/g;
    $page_select .=$temp
    }

# show all the results if only a single page is used
if (!$multiple_pages)
    {
    $from=0;
    $to=$number_of_matches-1;
    }

# fill the page template
$htmlpage = &fillHTMLTemplate($search_results_page);


# if the page we are displaying should show search results
# (it will not if there wre no matches, or the user didn't enter a query)
if ($htmlpage =~/<!--SEARCHRESULTS-->/)
    {
	# we assume that the search results will only be in one place on the page
    ($header, $footer) = split('<!--SEARCHRESULTS-->', $htmlpage);
	
	# print the HTML before the search results
    print $header;
	
	# the results themselves
    &printSearchResults;
	
	# and the HTML after the results
    print $footer;
    }
else
    {
	# the results should not be shown; print all the HTML
    print $htmlpage;
    }

################# FOOTER ###################
# print '<p align="center"><center><small> footer appears here spanky</center></p>';
###################################################
#print '</BODY></HTML>';

print '</font></p><center>';
print '<a href="http://www.daytonprogress.com/search_help.html" class="link"><font size="-1" face="Arial">';
print 'Search Help</font></a>';
print '<br><img src="../images/spacer.gif" width="455" height="15"></center>';
print '</td><td align="center" valign="top" bgcolor="#FFFFFF">';
print '<img src="../images/spacer.gif" align="left" hspace="0" width="10" height="10"></td>';
print '</tr><tr><td bgcolor="#FFFFFF">&nbsp;</td><td bgcolor="#FFFFFF">';
print '<a href="http://www.daytonprogress.com">';
print '<img src="../images/copyright.gif" alt="Return to the Dayton Progress home page..." border="0" width="450" height="39"></a></td>';
print '<td bgcolor="#FFFFFF">&nbsp;</td></tr><tr>';
print '<td colspan="3"><img src="../images/barbottom.gif" width="475" height="9"></td>';
print '</tr></table></div></body></html>';


sub SearchInFile # called for each file in the index file
	{
	
	($filename, $title, $description, $lines,$size,$mtime)=@_;
	
	if ($form{'words'} eq "phrase") 
		{
		&FoundMatch($filename, $title, $description, $lines, $size, $mtime) if
			($form{'case'} && $lines =~/\Q$form{'keywords'}\E/) ||
			(!$form{'case'} && $lines =~/\Q$form{'keywords'}\E/i);
		}
	elsif ($form{'words'} eq "any")
		{
		# split up into individual words
		@thekeywords=split('\s', $form{'keywords'});
		foreach $keyword (@thekeywords)
			{
			# if the file text contains the keyword
			if (($form{'case'} && $lines=~/\Q$keyword\E/) ||
				(!$form{'case'} && $lines=~/\Q$keyword\E/i))
				{
				# add it to the list of pages that match
				&FoundMatch($filename, $title, $description, $lines,$size,$mtime);
				
				# ignore all the other words
				last;
				}
			}
		}
	elsif ($form{'words'} eq "all")
		{
		# default to the page matching
		$match=1;
		
		# split up search string into individual keywords
		@thekeywords=split('\s', $form{'keywords'});
		foreach $keyword (@thekeywords)
				{
				# if the file text does not contain the keyword
				if (($form{'case'} && $lines !~/\Q$keyword\E/) ||
					(!$form{'case'} && $lines !~/\Q$keyword\E/i))
					{
					# it does not match
					$match=0;
					
					# ignore the other keywords
					last;
					}
				}
		if ($match==1)
			{
			# add the page to the list
			&FoundMatch($filename, $title, $description, $lines,$size,$mtime);
			}
		}

	elsif ($form{'words'} eq "boolean" )
 		{
 		if ($boolean_query->evaluate($lines))
 	    	{
			# add the page to the list if it matches the boolean expression
 	    	&FoundMatch($filename, $title, $description, $lines,$size,$mtime);
 	    	}

 		}

	}

sub FoundMatch # called if the keyword(s) is/are found in the file
    {
    my($filename,$title, $description,$lines,$size,$mtime)=@_;
	
    $number_of_matches++;
    

	push(@paths, $filename);
    $titles{$filename}=$title;
    $descriptions{$filename}=$description;
    $sizes{$filename}=$size;
    $mtimes{$filename}=localtime($mtime);
    }


sub fillHTMLTemplate
    {
    my $template = shift;

    %tags =
        ( # the HTML tag specified by the first word is changed to the contents
		  # of the named variable '<!--PAGESELECT-->' => $page_select
         'PAGESELECT' => 'page_select',
         'NUMBER'     => 'number_of_matches',
         'QUERY'      => 'user_keywords',
         'FROM'       => 'display_from',
         'TO'         => 'display_to',
         'NEXT'       => 'next_url',
         'PREV'       => 'prev_url',
         'SCRIPTPATH' => 'scriptpath',
         'ERROR'      => 'syntaxerror',
        );

    foreach (keys %tags)
        {
		# do the substitution
        $template =~s/<!--$_-->/${$tags{$_}}/g;
        }

    if (!$user_keywords)
        {
		# remove text between these two tags if no keywords were entered
        $template =~s|\Q<!--QUERYENTERED-->\E.*?\Q<!--/QUERYENTERED-->\E||sg;
        }


    if ($number_of_matches)
        {
		# remove text between the tags if any documents matched
        
		$template =~s|\Q<!--NOMATCHES-->\E.*?\Q<!--/NOMATCHES-->\E||sg;
        
		# if there were any matches, there was not a syntax error
        $template =~s|\Q<!--SYNTAXERROR-->\E.*?\Q<!--/SYNTAXERROR-->\E||sg;

        if ($number_of_matches <= 1)
            {
			# text between these tags is removed if only one page matched the
			# query
            $template =~s|\Q<!--MORETHANONE-->\E.*?\Q<!--/MORETHANONE-->\E||sg;
            }
        }
    else
        {
		# text between these tags is removed if there were no matches
        $template =~s|\Q<!--MATCHES-->\E.*?\Q<!--/MATCHES-->\E||sg;
        if ($syntaxerror)
            {
			# removed if there was a syntax error in the boolean query
            $template =~s|\Q<!--NOSYNTAXERROR-->\E.*?\Q<!--/NOSYNTAXERROR-->\E||sg;
            }
        else
            {
			# removed if there was not a sytax error
            $template =~s|\Q<!--SYNTAXERROR-->\E.*?\Q<!--/SYNTAXERROR-->\E||sg;
            }
        }

    foreach (qw/QUERYENTERED NOMATCHES MORETHANONE MATCHES SYNTAXERROR NOSYNTAXERROR/)
        {
		# remove any remaining tags and their closing pairs
		# the text between them is left in place
        $template =~s/<!--\/?$_-->//g;
        }


	# fill in the form with the values the user entered
	# $optionselect_keyword can be 'SELECTED' for dropdown listboxes, or
	# 'CHECKED' for radio buttons
    $template =~s/<!--ANY-->/    ($form{'words'} eq "any"     ? $optionselect_keyword : '')/eg;
    $template =~s/<!--ALL-->/    ($form{'words'} eq 'all'     ? $optionselect_keyword : '')/eg;
    $template =~s/<!--PHRASE-->/ ($form{'words'} eq 'phrase'  ? $optionselect_keyword : '')/eg;
    $template =~s/<!--BOOLEAN-->/($form{'words'} eq "boolean" ? $optionselect_keyword : '')/eg;
    $template =~s/<!--CASE-->/   ($form{'case'}  ne ""        ? CHECKED : '')/eg;

    return $template;
    }

sub printSearchResults
    {
    for ($subscript=$from; $subscript<=$to; $subscript++)
        {
		# loop through all search results in the selected range
        $path=$paths[$subscript];
        
		# do not add the result if the path is null, in case we go off the end 
		# of the array
		if ($path) 
            {
            $number=$subscript+1;
            
			# make a copy of the template
			$temp=$search_result;
			
			# fill it in
            $temp=~s/<!--NUMBER-->/$number/g;
            $temp=~s/<!--PATH-->/$path/g;
            $temp=~s/<!--TITLE-->/$titles{$path}/g;
            $temp=~s/<!--DESCRIPTION-->/$descriptions{$path}/g;
            $temp=~s/<!--SIZE-->/$sizes{$path}/g;
            $temp=~s/<!--TIME-->/$mtimes{$path}/g;
            
			# and print it
			print $temp;
            }
        }
    }

$next_url = $how_near = $search_results_page = $display_from = $prev_url =
	$search_result = $page_select = $page_select_format = $multiple_pages =
	undef;

sub HTMLEscape
	{
	my $textref = shift;
	$$textref =~s/&/&amp;/g;
	$$textref =~s/</&lt;/g;
	$$textref =~s/>/&gt;/g;
	}

