/* -*- Mode: C -*- * ===================================================================== * html-man-page.c -- CGI script to dump a Unix man page in HTML * Author : John Hamer * Created On : Wed May 10 10:37:35 1995 * Last Modified By: John Hamer * Last Modified On: Wed May 10 14:29:29 1995 * Copyright (c) 1995, John Hamer This file is distributed in the hope that it will be useful, but without any warrantee. Permission is granted to anyone to distribute verbatim copies of this source code as received, in any medium, provided that the copyright notice, the nonwarrantee warning, and this permission notice are preserved. Permission is granted to distribute modified versions of this source code, or of portions of it, under the above conditions, plus the conditions that all changed files carry prominent notices stating who last changed them and that the derived material is subject to this same permission notice. Permission is granted to include this material in products for which money is charged, provided that the customer is given written notice that the code is (or is derived from) material provided by John Hamer, and that the customer is given this source code on request. * Purpose: This program is intended to reside in the CGI (``Common Gateway Interface'') for a HTTP (the World-Wide-Web protocol) server, and be invoked with a URL of the form ...path-to-this-binary...?page=PAGE§ion=SECTION E.g. on the department's server, a URL might look like http://cs20.cs.auckland.ac.nz/staffbin/html-man-page?page=man The section part is optional, and defaults to section 1 (good choice...) This program reads the query string that is passed in the environment and invokes the ``man'' program to produce the manual page. The output of ``man'' is piped through a filter that converts ugly ``nroff'' formatting sequences into HTML markup (everything is done with backspaces -- i.e. nroff assumes an old-fashioned line printer!) This program was written as a C program rather than a shell script to (a) minimise the load on the machine; and (b) to avoid all potential security problems that can arise from shell meta-characters appearing in the query string. * ===================================================================== */ #include /* For strcmp(), strncmp() */ #include /* For puts(), fputs(), */ /* getchar(), putchar() */ #include /* For getenv() */ #include /* For pipe(), dup2(), execl() */ #include /* For wait() */ #include /* For perror() */ void man2html( char *page, char *section ); void main ( void ) { char *query, *page = (char*)0, *section = "1", *sp; puts("Content-type: text/html\n\n"); if( strcmp(getenv("REQUEST_METHOD"), "GET") != 0 ) { puts("This script should be referenced with a METHOD of GET."); puts("Are you sure you know what you are doing?"); exit(0); } query = getenv("QUERY_STRING"); /* Extract page and section from query. We don't bother with decoding escaped characters, because there shouldn't be any, and feeding man the escaped form won't matter (junk is junk...). */ for( sp = query; *sp; sp++ ) { if( strncmp( sp, "page=", 5) == 0 ) page = &sp[5]; else if( strncmp( sp, "section=", 8) == 0 ) section = &sp[8]; while( *sp && *sp != '&' ) sp++; if( *sp == '&' ) *sp = '\0'; } if( page == (char*) 0 ) { puts("No man page specified!"); exit(0); } { int man_pipe[2]; pipe(man_pipe); if( fork() == 0 ) { close(0); close(man_pipe[0]); dup2(man_pipe[1], 1); dup2(man_pipe[1], 2); execl("/usr/ucb/man", "man", "-", section, page, 0); puts("! Unable to exec the man command"); perror("man"); } else { close(man_pipe[1]); dup2(man_pipe[0], 0); /* Filter the man output to look like HTML */ man2html(page, section); { int status; wait( &status ); /* ``man'' must finish first? */ } } } } /* The function man2html() is fed the output of the man command on stdin. We convert sequences of x\bxy\by... (where x is any character other than underscore) to xy... Sequences of _\bx_\by... are converted to xy... Nroff sometimes emits a backspace sequence that overwrites one character with another (e.g. a `o' and a `+') to produce a special effect. In general, this is used to produce a nice looking `blob'. */ typedef enum { S_PLAIN, S_BOLD, S_UNDERLINE } state_t; typedef enum { ON, OFF } flag_t; inline void emit_html_tag( state_t s, flag_t f ) { switch( s ) { case S_PLAIN: break; case S_BOLD: fputs(f == ON ? "" : "", stdout); break; case S_UNDERLINE: fputs(f == ON ? "" : "", stdout); break; } } inline void change_state( state_t *s, state_t new ) { if( *s != new ) { emit_html_tag( *s, OFF ); emit_html_tag( *s = new, ON ); } } #define get0( c ) { c = getchar(); \ if( c == '\n' ) lineno++; \ if( c == EOF ) goto finish; \ } void man2html( char *page, char *section ) { char c0, c1, c2; state_t s = S_PLAIN; int lineno = 1; printf("Unix Manual Entry for %s (%s)\n
\n",
         page, section);

 read_3:
  get0( c0 );
  get0( c1 );
 read_1:
  get0( c2 );
  if( c1 == '\b' ) {
    if( c0 == '_' ) {
      change_state( &s, S_UNDERLINE );
      putchar( c2 );
    } else
      if( c2 == '_' ) {
        change_state( &s, S_UNDERLINE );
        putchar( c0 );
      } else
        if( c0 == c2 ) {
          change_state( &s, S_BOLD );
          putchar( c2 );
        } else
          putchar('*');
    goto read_3;
  } else {
    change_state( &s, S_PLAIN );
    if( c0 != '\b' )
      /*
         Some man pages use o\bo\b+\b+ to make a bold \oplus symbol.
         It is unclear how best to handle this, so we just ignore the \b
         and allow `o+' to appear on the output.
         */
      putchar( c0 );
    c0 = c1;
    c1 = c2;
    goto read_1;
  }
 finish:
  change_state( &s, S_PLAIN );
  fputs("\n
\n", stdout); }