Plasma GitLab Archive
Projects Blog Knowledge

(* $Id: db2man.ml,v 1.1 1999/06/25 19:57:19 gerd Exp $
 * ----------------------------------------------------------------------
 *
 *)

(* Usage:
 * 
 * db2man < file.xml
 *
 * where file.xml is a docbook document in XML representation.
 * All 'refentry' sections are transformed to manual pages. The
 * manual pages are stored in the current directory.
 *
 * Only a subset of the docbook tags that are allowed in 'refentry'
 * are supported.
 *)

open Xml;;
open Error;;

let devnull = open_out "/dev/null";;

type etree =
    { mutable el : element;
      mutable parent : etree option;
      mutable children : etree list;
      mutable id_index : (string, etree) Hashtbl.t
	(* An index where the "ID" attributes are *)
    } 
;;


let mk_etree e =
  let index = Hashtbl.create 1000 in

  let update al x =
    try
      let id = List.assoc "ID" al in
      Hashtbl.add index id x
    with
      Not_found -> ()
  in

  let rec mk p e =
    let x =
    { el = e;
      parent = p;
      children = [];
      id_index = index;
    } in
    x.children <-
      begin
        match e with
	  Eelement (n,al,el) -> update al x; List.map (mk (Some x)) el
	| Eempty (n,al)      -> update al x; []
	| _                  -> []
      end;
    x
  in
  mk None e
;;


let name_of_parent e =
  match e.parent with
    None -> ""
  | Some p ->
      begin
	match p.el with
	  Eelement (n,_,_) -> n
	| Eempty (n,_) -> n
	| _ -> ""
      end
;;


let child name e =
  let rec search ech_list =
    match ech_list with
      ech :: ech_list' ->
	begin match ech.el with
	  Eelement (n,al,el) -> if n=name then ech else search ech_list'
	| Eempty (n,al)      -> if n=name then ech else search ech_list'
	| _                  -> search ech_list'
	end
    | [] ->
	raise Not_found
  in
  search e.children
;;


let as_string e =
  let rec concat s ech_list =
    match ech_list with
      ech :: ech_list' ->
	begin match ech.el with
	  Echunk s' -> concat (s ^ s') ech_list'
	| Epi _ -> concat s ech_list'
	| _ -> concat (s ^ concat "" ech.children) ech_list'
	end
    | [] ->
	s
  in
  concat "" [e]
;;


let child_string name e default =
  try
    as_string (child name e)
  with
    Not_found -> default
;;


type font = Roman | Bold | Italics;;

type groff_context =
    { mutable section_has_para : bool;  
              (* There is already a paragraph in this section *)
      mutable para_has_content : bool;
              (* There are characters in the current paragraph *)
      mutable nofill : bool;
              (* troff is in 'nofill' mode *)
      mutable verbatim : bool;
              (* whitespace is taken verbatim *)
      mutable font : font;
              (* current troff font *)
      mutable line : string;
              (* In verbatim mode: The current line if it has not been
	       * ended by a newline character
	       *)
      mutable out : out_channel;
              (* the output file of the troff statements *)
    } 
;;


let in_words_ws s =
  (* splits s in words separated by whitespace *)
  let l = String.length s in
  let rec split i j =
    if j < l then
      match s.[j] with
        (' '|'\t'|'\n'|'\r') ->
          if i<j then (String.sub s i (j-i)) :: (split (j+1) (j+1))
                 else split (j+1) (j+1)
      | _ ->
          split i (j+1)
    else
      if i<j then [ String.sub s i (j-i) ] else []
  in
  split 0 0
;;


let in_lines s =
  (* splits s in lines separated by '\n'; empty lines remain *)
  let l = String.length s in
  let rec split i =
    if i < l then
      try
	let j = String.index_from s i '\n' in
	String.sub s i (j-i) :: split (j+1)
      with
	Not_found ->
	  [ String.sub s i (l-i) ]
    else
      [ "" ]
  in
  split 0
;;


let norm_gen allow_spaces_left allow_spaces_right s = 
  (* normalize whitespace;
   * allow_spaces_left: spaces at the beginning of s remain;
   * allow_spaces_right: spaces at the end of s remain
   *)
  let s' = String.concat " " (in_words_ws s) in
  if s <> "" then
    let l = String.length s - 1 in
    let s'' =
      if allow_spaces_right then
	match s.[l] with
	  (' '|'\t'|'\n'|'\r') -> s' ^ " "
	| _ -> s'
      else s'
    in
    if allow_spaces_left then
      match s.[0] with
	(' '|'\t'|'\n'|'\r') -> " " ^ s''
      | _ -> s''
    else
      s''
  else
    ""
;;


let norm = norm_gen false false;;
let norm_lr = norm_gen true true;;


let untabify s =
  (* transform tab characters to spaces *)
  let l = String.length s in
  let rec loop i pos =
    if i < l then begin
      let c = s.[i] in
      match c with
	'\t' -> let n = 8 - pos mod 8 in
	        String.make n ' ' :: loop (i+1) (pos+n)
      |	_    -> String.make 1 c :: loop (i+1) (pos+1)
    end
    else
      []
  in
  String.concat "" (loop 0 0)
;;


let chartrans s =
  (* transform character entities such as &lt; *)
  let l = String.length s in
  let rec loop i =
    if i < l then begin
      let c = s.[i] in
      match c with
	'&' ->
	  begin try
	    let j = String.index_from s i ';' in
	    let s' =
	      match String.sub s (i+1) (j-i-1) with
		"gt" ->  ">" 
	      | "lt" ->  "<"
	      | "amp" -> "&"
	      |	_    -> ""
	    in
	    s' :: loop (j+1)
	  with
	    Not_found -> loop (i+1)
	  end
      | _ ->
	  String.make 1 c :: loop (i+1)
    end
    else []
  in
  String.concat "" (loop 0)
;;


let print_chunk out verbatim s =
  (* output the text 's' as troff line to channel 'out'.
   * verbatim: spaces are treates as unpaddable spaces.
   *)
  let s' = chartrans s in
  let l = String.length s' in
  let rec print k =
    if k < l then
      let c = s'.[k] in
      begin match c with
	(' '|'\t') -> if verbatim then output_string out "\\ " else output_string out " "
      |	'\\' -> output_string out "\\e"
      | _ -> output_char out c
      end;
      print (k+1)
  in 

  if s' <> "" then begin
    let l = String.length s' - 1 in
    if s'.[0] = '.' or s'.[0] = '\'' or s'.[0] = ' ' then output_string out "\\&";
    let spaceatend = 
      let c = s'.[l] in
      c = ' ' or c = '\t' in
    print 0;
    if verbatim or not spaceatend then output_string out "\\c";
    output_string out "\n"
  end
;;


let print_text c s =
  (* output text 's' in context 'c' *)
  let rec print_butlast l =
    match l with
      [] -> ""
    | [s] -> s
    | s :: l' ->
	print_chunk c.out true (untabify s);
	output_string c.out "\\&\n";
	output_string c.out ".br\n";
	print_butlast l'
  in
  
  if c.verbatim then begin
    let lines = in_lines (c.line ^ s) in
    let line' = print_butlast lines in
    { c with line = line';
      para_has_content = true;
      section_has_para = true;
    } 
  end
  else begin
    let s' = norm_gen c.para_has_content true (c.line ^ s) in
    let s'' = if c.para_has_content or s' <> " " then s' else "" in
    print_chunk c.out false s'';
    { c with line = ""; 
      para_has_content = c.para_has_content or s'' <> "";
      section_has_para = c.section_has_para or s'' <> "";
    }
  end
;;


let print_macro out name arguments =
  (* TODO: process special characters *)
  let preprocess s =
    let s' = String.copy s in
    for i = 0 to String.length s - 1 do
      let c = s.[i] in
      match c with
	'"' -> s'.[i] <- '\''     (* a compromise *)
      |	_   -> s'.[i] <- c
    done;
    s'
  in

  output_string out
    (String.concat
       " "
       (name ::
	(List.map
	   (fun a -> "\"" ^ preprocess a ^ "\"")
	   arguments)));
  output_string out "\n"
;;


let switch_context current_context next_context =
  (* handles 'font', 'nofill' switches *)
  let out = current_context.out in
  print_chunk 
    out
    current_context.verbatim 
    (untabify current_context.line);
  if current_context.font <> next_context.font then begin
    match next_context.font with
      Roman   -> output_string out ".ft R\n"
    | Bold    -> output_string out ".ft B\n"
    | Italics -> output_string out ".ft I\n"
  end;
  if current_context.nofill <> next_context.nofill then begin
    match next_context.nofill with
      true  -> output_string out ".nf\n"
    | false -> output_string out ".fi\n"
  end;
  { next_context with line = "" }
;;



let rec process c e =
  (* process element 'e' in context 'c' *)
  let rec process_children c =
    let rec next c ech_list =
      match ech_list with
	[] -> c
      |	ech :: ech_list' -> next (process c ech) ech_list'
    in
    next c e.children
  in

  let new_para c =
    let c' = switch_context c c in
    if c.para_has_content then print_macro c.out ".PP" [];
    output_string c.out ".ft R\n";
    { c' with font = Roman; 
      section_has_para = true;
      para_has_content = false;
    } 
  in

  let process_element n al =
    match n with
      "REFENTRY" -> 
	let title, volume, misc = 
	  try
	    let refmeta = child "REFMETA" e in
	    (child_string "REFENTRYTITLE" refmeta "NONAME"),
	    (child_string "MANVOLNUM" refmeta "1"),
	    (child_string "REFMISCINFO" refmeta "")
	  with
	    Not_found -> "NONAME", "1", "" in
	let out =
	  open_out (title ^ "." ^ volume) in
	print_macro out ".TH" [ norm title; norm volume; norm misc; "User Manual"];
	let c' = process_children { c with out = out } in
	close_out out;
	{c' with out = devnull }
    | "REFMETA" ->
	c
    | "REFNAMEDIV" ->
	c
    | ("REFSYNOPSISDIV" | "REFSECT1") ->
	let title = child_string "TITLE" e "Untitled" in
	print_macro c.out ".SH" [ norm title ];
	output_string c.out ".ft R\n";
	process_children { c with section_has_para = false;
			   para_has_content = false;
			   font = Roman }
    | "REFSECT2" ->
	let title = child_string "TITLE" e "Untitled" in
	print_macro c.out ".SS" [ norm title ];
	output_string c.out ".ft R\n";
	process_children { c with section_has_para = false;
			   para_has_content = false;
			   font = Roman }
    | "REFSECT3" ->
	let title = child_string "TITLE" e "Untitled" in
	if c.section_has_para then print_macro c.out ".PP" [];
	print_macro c.out ".B" [ norm title ];
	output_string c.out ".ft R\n";
	print_macro c.out ".br" [];
	process_children { c with section_has_para = true;
			   para_has_content = false;
			   font = Roman }
    | ("SYNOPSIS" | "PROGRAMLISTING") ->
	let c' = switch_context (new_para c) 
	                        { c with font = Bold;
				  verbatim = true;
				  nofill = true;
				} in
	let c'' = process_children c' in
	switch_context c'' { c'' with verbatim = c.verbatim;
			     line = "";
			     nofill = c.nofill;
			     font = c.font
			   }
    | "PARA" ->
	let c' = new_para c in
	let c'' = switch_context c' { c' with font = Roman;
				      section_has_para = true;
				      para_has_content = false;
				    } in
	process_children c''
    | ("VARIABLELIST" | "ITEMIZEDLIST") ->
	let c' = new_para c in
	let c'' = process_children c' in
	new_para c''
    | "TERM" ->
	let c' = switch_context c c in
	if c'.para_has_content then output_string c.out ".sp\n";
	print_macro c.out ".RS" [ "2m" ];
	output_string c.out ".ft R\n";
	let c'' = switch_context c' { c' with font = Roman;
				      section_has_para = true;
				      para_has_content = false;
				    } in
	let c''' = process_children c'' in
	print_macro c.out ".RE" [];
	output_string c.out ".ft R\n";
	switch_context c''' { c''' with font = Roman;
			      section_has_para = true;
			      para_has_content = true;
			    }
	
    | "LISTITEM" ->
	let mkbullets = (name_of_parent e = "ITEMIZEDLIST") in
	let c' = switch_context c c in
	if c'.para_has_content then output_string c.out ".sp\n";
	print_macro c.out ".RS" [ "7m" ];
	output_string c.out ".ft R\n";
	if mkbullets then begin
	  output_string c.out "\\&\\h'-3m'\\z\\(bu\\h'3m'\\c\n";
	end;
	let c'' = switch_context c' { c' with font = Roman;
				      section_has_para = true;
				      para_has_content = false;
				    } in
	let c''' = process_children c'' in
	print_macro c.out ".RE" [];
	output_string c.out ".ft R\n";
	switch_context c''' { c''' with font = Roman;
			      section_has_para = true;
			      para_has_content = true;
			    }
	
    | "REPLACEABLE" ->
	let c' = switch_context c { c with font = Italics } in
	let c'' = process_children c' in
	switch_context c'' { c'' with font = c.font }
    | "EMPHASIS" ->
	let c' = switch_context c { c with font = Bold } in
	let c'' = process_children c' in
	switch_context c'' { c'' with font = c.font }
    | ("LINK"|"XREF") ->
	let c' = try
	  let endterm = List.assoc "ENDTERM" al in
	  let text = 
	    try
	      as_string (Hashtbl.find e.id_index endterm)
	    with
	      Not_found -> "[UNRESOLVED LINK]"
	  in
	  print_text c text
	with
	  Not_found -> 
	    process_children c 
	in
	c'
    | "TITLE" ->
	c
    | n ->
	prerr_endline ("NOT PROCESSED: " ^ n);
	process_children c
  in


  match e.el with
    Echunk s ->
      print_text c s
  | Epi (n,sl) -> 
      (* prerr_endline ("PI: " ^ n ^ " " ^ String.concat "," sl); *)
      c
  | Eempty (n,al) ->
      process_element n al
  | Eelement (n,al,_) ->
      process_element n al 
;;


let tree ch =
  let position (line,col) = "arround line "
    ^ (string_of_int line)
    ^ ", column "
    ^ (string_of_int col) in

  let lexbuf = Lexing.from_channel ch in
  let doc =
    try
      Xmlscan.init();
      Xmlparse.document Xmlscan.scan lexbuf
    with exn ->
        let pos = position (Xmlscan.position lexbuf) in
        begin match exn with
          | Parsing.Parse_error   -> error ("syntax error " ^ pos)
          | Error msg             -> error ("syntax error " ^ pos ^ ": " ^ msg)
          | Sys_error msg         -> error ("I/O error: " ^ msg)
          | _                     -> raise exn
        end
  in
  let XML(prolog, e, pl) = doc in
  e 
;;

let main() =
  let t = mk_etree(tree stdin) in
  let _ = process { section_has_para = false;
		    para_has_content = false;
		    verbatim = false;
		    nofill = false;
		    line = "";
		    out = devnull;
		    font = Roman } 
                  t
  in
  ()
;;


 main();;

(* ======================================================================
 * History:
 * 
 * $Log: db2man.ml,v $
 * Revision 1.1  1999/06/25 19:57:19  gerd
 * 	Initial revision.
 *
 * 
 *)

This web site is published by Informatikbüro Gerd Stolpmann
Powered by Caml