#!/bin/bash
# makewhatis: create the whatis database
# Created: Sun Jun 14 10:49:37 1992
# Revised: Sat Jan  8 14:12:37 1994 by faith@cs.unc.edu
# Revised: Sat Mar 23 17:56:18 1996 by micheal@actrix.gen.nz
# Copyright 1992, 1993, 1994 Rickard E. Faith (faith@cs.unc.edu)
# May be freely distributed and modified as long as copyright is retained.
#

program=`basename $0`

# In case both /usr/man and /usr/share/man exist, the former is local
# and should be first.
# It is a bug to add /var/cache/man to DEFCATPATH.
dm=
for d in /usr/man /usr/share/man /usr/X11R6/man /usr/local/man
do
    if [ -d $d ]; then
	if [ x$dm = x ]; then dm=$d; else dm=$dm:$d; fi
    fi
done
DEFMANPATH=$dm
dc=
for d in /usr/man/preformat /usr/man /usr/share/man/preformat /usr/share/man
do
    if [ -d $d ]; then
	if [ x$dc = x ]; then dc=$d; else dc=$dc:$d; fi
    fi
done
DEFCATPATH=$dc

# make a single big /var/cache/man/whatis file,
DEFWHATIS=/var/cache/man/whatis

AWK=/usr/bin/gawk

# Find a place for our temporary files.

if TMPFILE=$(mktemp /tmp/whatis.XXXXXX)
then

# make sure TMPFILE is deleted if program is killed or terminates
# (just delete this line if your shell doesnt know about trap)
trap "rm -rf $TMPFILE" 0 
trap "rm -rf $TMPFILEDIR; exit 255" 1 2 3 15 

# default find arg: no directories, no empty files
findarg0="-type f -size +0"

topath=manpath

defmanpath=$DEFMANPATH
defcatpath=

sections=`grep "MANSEC" /etc/man.config | sed -e "1d" -e "s/MANSECT//" -e "s/:/ /g"`
whatisdb=$DEFWHATIS

for name in "$@"
do
if [ -n "$setsections" ]; then
	setsections=
	sections=$name
	continue
fi
if [ -n "$setwhatis" ]; then
       setwhatis=
       whatisdb=$name
       continue
fi
case $name in
    --version|-V)
	echo "$program from man-1.6d"
	exit 0;;
    -c) topath=catpath
	defmanpath=
	defcatpath=$DEFCATPATH
	continue;;
    -s) setsections=1
	continue;;
    -o) setwhatis=1
        continue;;
    -u) findarg="-ctime 0"
	update=1
	continue;;
    -v) verbose=1
	continue;;
    -w) manpath=`man --path`
	catpath=$manpath
	continue;;
    -*) echo "Usage: makewhatis [-s sections] [-u] [-v] [-w] [manpath] [-c [catpath]] [-o whatisdb]"
	echo "       This will build the whatis database for the man pages"
	echo "       found in manpath and the cat pages found in catpath."
        echo "       -s: sections (default: $sections)"
	echo "       -u: update database with new pages"
	echo "       -v: verbose"
        echo "       -o: location of whatis database (default: $DEFWHATIS)"
	echo "       -w: use manpath obtained from \`man --path\`"
	echo "       [manpath]: man directories (default: $DEFMANPATH)"
	echo "       [catpath]: cat directories (default: the first existing"
	echo "           directory in $DEFCATPATH)"
	exit;;
     *) if [ -d $name ]
	then
	    eval $topath="\$$topath":$name
	else
	    echo "No such directory $name"
	    exit
	fi;;
esac
done

manpath=`echo ${manpath-$defmanpath} | tr : ' '`
if [ x"$catpath" = x ]; then
   for d in `echo $defcatpath | tr : ' '`
   do
      if [ -d $d ]; then catpath=$d; break; fi
   done
fi
catpath=`echo ${catpath} | tr : ' '`

# first truncate all the whatis files that will be created new,
# then only update - we might visit the same directory twice
if [ x$update = x ]; then
   rpm -qa --queryformat '%-19{NAME} (rpm) - %{SUMMARY}\r' | tr -d '\n' | tr '\r' '\n' >$whatisdb
fi

for pages in man cat
do
   export pages
   eval path="\$$pages"path
   for mandir in $path
   do
     if [ x$verbose != x ]; then
	echo "about to enter $mandir" > /dev/stderr
     fi
       here=`pwd`
       cd $mandir
       for i in $sections
       do
	 if [ -d ${pages}$i ]
	 then
	    cd ${pages}$i
	    section=$i
	    curdir=$mandir/${pages}$i
	    export section verbose curdir
 
 	    utf8=
            for x in $(find $mandir/${pages}$i -name '*' $findarg0 $findarg)
            do
               if [ ${x%.gz} != ${x} ]
               then
                  cat=zcat
               elif [ ${x%.bz2} != ${x} ]
               then
                  cat=bzcat
               else
                  cat=cat
               fi
   
               if ${cat} ${x} | iconv -f utf-8 -t utf-8 -o /dev/null 2>/dev/null
               then
                  echo ${x} | $AWK '
   
    function readline() {
      if (use_zcat || use_bzcat) {
        result = (pipe_cmd | getline);
        if (result < 0) {
          print "Pipe error: " pipe_cmd " " ERRNO > "/dev/stderr";
        }
      } else {
        result = (getline < filename);
        if (result < 0) {
          print "Read file error: " filename " " ERRNO > "/dev/stderr";
        }
      }
      return result;
    }
   
    function closeline() {
      if (use_zcat || use_bzcat) {
        return close(pipe_cmd);
      } else {
        return close(filename);
      }
    }
   
    function do_one() {
      insh = 0; thisjoin = 1; done = 0;
      entire_line = "";
   
      if (verbose) {
        print "adding " filename > "/dev/stderr"
      }

      use_zcat = match(filename,"\\.Z$") ||
                 match(filename,"\\.z$") || match(filename,"\\.gz$");
      if (!use_zcat)
        use_bzcat = match(filename,"\\.bz2");
      if (use_zcat || use_bzcat) {
        filename_no_gz = substr(filename, 0, RSTART - 1);
      } else {
        filename_no_gz = filename;
      }
      match(filename_no_gz, "/[^/]+$");
      progname = substr(filename, RSTART + 1, RLENGTH - 1);
      if (match(progname, "\\." section "[A-Za-z]+")) {
        actual_section = substr(progname, RSTART + 1, RLENGTH - 1);
      } else {
        actual_section = section;
      }
      sub(/\..*/, "", progname);
      if (use_zcat || use_bzcat) {
        if (use_zcat) {
          pipe_cmd = "zcat \"" filename "\" 2>/dev/null";
        } else {
          pipe_cmd = "bzcat \"" filename "\" 2>/dev/null";
        }
        # try to avoid suspicious stuff
        if (filename ~ /[;&|`$(]/) {
          print "ignored strange file name " filename " in " curdir > "/dev/stderr";
          return;
        }
      }

      while (!done && readline() > 0) {
        gsub(/.\b/, "");
        if (($1 ~ /^\.[Ss][Hh]/ &&
           ($2 ~ /[Nn][Aa][Mm][Ee]/ ||   # en/de
            $2 ~ /^JMÉNO/ ||            # cs
            $2 ~ /^NAVN/ ||             # da
            $2 ~ /^NUME/ ||             # ro
            $2 ~ /^BEZEICHNUNG/ ||      # de
            $2 ~ /^NOMBRE/ ||           # es
            $2 ~ /^NIMI/ ||             # fi
            $2 ~ /^NOM/ ||              # fr
            $2 ~ /^IME/ ||              # sh
            $2 ~ /^N[ÉE]V/ ||          # hu
            $2 ~ /^NAMA/ ||             # XXX: what lang is this?
            $2 ~ /^名前/ ||             # ja
            $2 ~ /^이름/ ||             # ko
            $2 ~ /^NAZWA/ ||            # pl
            $2 ~ /^НАЗВАНИЕ/ || # ru
            $2 ~ /^名称/ ||             # zh_CN/ja
            $2 ~ /^名稱/ ||             # zh_TW
            $2 ~ /^NOME/ ||             # it/pt
            $2 ~ /^NAAM/)) ||           # nl
          (pages == "cat" && $1 ~ /^NAME/)) {
            if (!insh) {
              insh = 1;
            } else {
              done = 1;
	    }
        } else if (insh) {
          if ($1 ~ /^\.[Ss][HhYS]/ ||
            (pages == "cat" &&
            ($1 ~ /^S[yYeE]/ || $1 ~ /^DESCRIPTION/ ||
             $1 ~ /^COMMAND/ || $1 ~ /^OVERVIEW/ ||
             $1 ~ /^STRUCTURES/ || $1 ~ /^INTRODUCTION/ ||
             $0 ~ /^[^ ]/))) {
              # end insh for Synopsis, Syntax, but also for
              # DESCRIPTION (e.g., XFree86.1x),
              # COMMAND (e.g., xspread.1)
              # OVERVIEW (e.g., TclCommandWriting.3)
              # STRUCTURES (e.g., XEvent.3x)
              # INTRODUCTION (e.g., TclX.n)
              # and anything at all that begins in Column 1, so
              # is probably a section header.
            done = 1;
          } else {
            if ($0 ~ progname"-") {  # Fix old cat pages
                sub(progname"-", progname" - ");
   	    }	    
            if ($0 ~ /[^ \\]-$/) {
              sub(/-$/, "");      # Handle Hyphenations
              nextjoin = 1;
            } else if ($0 ~ /\\c$/) {
              sub(/\\c$/, "");    # Handle Continuations
              nextjoin = 1;
            } else
              nextjoin = 0;
 
            sub(/^.[IB] /, "");       # Kill bold and italics
            sub(/^.BI /, "");         #
            sub(/^.SM /, "");         # Kill small
            sub(/^.Nm /, "");         # Kill bold
            sub(/^.Tn /, "");         # Kill normal
            sub(/^.Li /, "");         # Kill .Li
            sub(/^.Dq /, "");         # Kill .Dq
            sub(/^.Nd */, "- ");      # Convert .Nd to dash
            sub(/\\\".*/, "");        # Trim pending comments
            sub(/  *$/, "");          # Trim pending spaces
            sub(/^\.$/, "");          # Kill blank comments
            sub(/^'"'"'.*/, "");      # Kill comment/troff lines
            sub(/^.in .*/, "");       # Kill various macros
            sub(/^.ti .*/, "");
            sub(/^.ta .*/, "");
            sub(/^.Vb .*/, "");
            sub(/^.[PLTH]P$/, "");    # .PP/.LP/.TP/.HP
            sub(/^.Pp$/, "");
            sub(/^.[iI]X .*$/, "");
            sub(/^.nolinks$/, "");
            sub(/^.B$/, "");
            sub(/^.nf$/, "");

            if (($1 ~ /^\.../ || $1 == "") &&
                (entire_line ~ / - / || entire_line ~ / \\- /)) {
              # Assume that this ends the description of one line
              # Sometimes there are several descriptions in one page,
              # as in outb(2).
              handle_entire_line();
 	      entire_line = "";
              thisjoin = 1;
            } else {
              if (thisjoin) {
                entire_line = entire_line $0;
	      } else {
                entire_line = entire_line " " $0;
	      }
              thisjoin = nextjoin;
	    }
          }
        }
      }
      handle_entire_line();
      closeline();
    }

    function handle_entire_line() {
      x = entire_line;             # Keep it short

      gsub(/\015/, "", x);         # Kill DOS remains
      gsub(/	/, " ", x);        # Translate tabs to spaces
      gsub(/  +/, " ", x);         # Collapse spaces
      gsub(/ *, */, ", ", x);      # Fix comma spacings
      sub(/^ /, "", x);            # Kill initial spaces
      sub(/ $/, "", x);            # Kill trailing spaces
      sub(/__+/, "_", x);          # Collapse underscores

      gsub(/\\f\(../, "", x);         # Kill font changes
      gsub(/\\f[PRIB0123]/, "", x);   # Kill font changes
      gsub(/\\s[-+0-9]*/, "", x);     # Kill size changes
      gsub(/\\&/, "", x);             # Kill \&
      gsub(/\\\|/, "", x);            # Kill \|
      gsub(/\\\((ru|ul)/, "_", x);    # Translate
      gsub(/\\\((mi|hy|em)/, "-", x); # Translate
      gsub(/\\\*\(../, "", x);        # Kill troff strings
      gsub(/\\/, "", x);              # Kill all backslashes
      gsub(/"/, "", x);               # Kill quotes (from .Nd "foo bar")
      sub(/<h1 align=center>/, "", x);# Yuk! HTML cruft
      gsub(/\000.*/, "X", x);         # Binary cruft in LAPACK pages
      gsub(/  +/, " ", x);            # Collapse spaces (again)
      sub(/^ /, "", x);               # Kill initial spaces (again)
      sub(/ $/, "", x);               # Kill trailing spaces (again)
      sub(/\.$/, "", x);              # Kill trailing period

      if (!match(x, / - /))
        return;

      after_dash = substr(x, RSTART);
      head = substr(x, 1, RSTART-1) ", ";
      while (match(head, /, /)) {
        prog = substr(head, 1, RSTART-1);
        head = substr(head, RSTART+2);
        if (prog != progname)
          prog = prog " [" progname "]";
        printf "%-*s (%s) %s\n", 20, prog, actual_section, after_dash;
      }
    }

    {                  # Main action - process each filename read in.
      filename = $0;
      do_one();
    }

	    ' pages=$pages section=$section verbose=$verbose curdir=$curdir
               fi
            done
	    cd ..
	 fi
       done > $TMPFILE

       cd $here

       if [ -f ${whatisdb} ]
       then
         cat ${whatisdb} >> $TMPFILE
       fi
       tr -s '\n' < $TMPFILE | sort -u > ${whatisdb}

       chmod 644 ${whatisdb}
   done
done

# remove tempdir
rm -rf $TMPFILE

else            # we're here if TMPFILE creation failed
    exit 1
fi