(* $Id: db2man.ml,v 1.1 1999/06/25 19:57:19 gerd Exp $ * ---------------------------------------------------------------------- * *) (* Usage: * * db2man < file.xml * * where file.xml is a docbook document in XML representation. * All 'refentry' sections are transformed to manual pages. The * manual pages are stored in the current directory. * * Only a subset of the docbook tags that are allowed in 'refentry' * are supported. *) open Xml;; open Error;; let devnull = open_out "/dev/null";; type etree = { mutable el : element; mutable parent : etree option; mutable children : etree list; mutable id_index : (string, etree) Hashtbl.t (* An index where the "ID" attributes are *) } ;; let mk_etree e = let index = Hashtbl.create 1000 in let update al x = try let id = List.assoc "ID" al in Hashtbl.add index id x with Not_found -> () in let rec mk p e = let x = { el = e; parent = p; children = []; id_index = index; } in x.children <- begin match e with Eelement (n,al,el) -> update al x; List.map (mk (Some x)) el | Eempty (n,al) -> update al x; [] | _ -> [] end; x in mk None e ;; let name_of_parent e = match e.parent with None -> "" | Some p -> begin match p.el with Eelement (n,_,_) -> n | Eempty (n,_) -> n | _ -> "" end ;; let child name e = let rec search ech_list = match ech_list with ech :: ech_list' -> begin match ech.el with Eelement (n,al,el) -> if n=name then ech else search ech_list' | Eempty (n,al) -> if n=name then ech else search ech_list' | _ -> search ech_list' end | [] -> raise Not_found in search e.children ;; let as_string e = let rec concat s ech_list = match ech_list with ech :: ech_list' -> begin match ech.el with Echunk s' -> concat (s ^ s') ech_list' | Epi _ -> concat s ech_list' | _ -> concat (s ^ concat "" ech.children) ech_list' end | [] -> s in concat "" [e] ;; let child_string name e default = try as_string (child name e) with Not_found -> default ;; type font = Roman | Bold | Italics;; type groff_context = { mutable section_has_para : bool; (* There is already a paragraph in this section *) mutable para_has_content : bool; (* There are characters in the current paragraph *) mutable nofill : bool; (* troff is in 'nofill' mode *) mutable verbatim : bool; (* whitespace is taken verbatim *) mutable font : font; (* current troff font *) mutable line : string; (* In verbatim mode: The current line if it has not been * ended by a newline character *) mutable out : out_channel; (* the output file of the troff statements *) } ;; let in_words_ws s = (* splits s in words separated by whitespace *) let l = String.length s in let rec split i j = if j < l then match s.[j] with (' '|'\t'|'\n'|'\r') -> if i<j then (String.sub s i (j-i)) :: (split (j+1) (j+1)) else split (j+1) (j+1) | _ -> split i (j+1) else if i<j then [ String.sub s i (j-i) ] else [] in split 0 0 ;; let in_lines s = (* splits s in lines separated by '\n'; empty lines remain *) let l = String.length s in let rec split i = if i < l then try let j = String.index_from s i '\n' in String.sub s i (j-i) :: split (j+1) with Not_found -> [ String.sub s i (l-i) ] else [ "" ] in split 0 ;; let norm_gen allow_spaces_left allow_spaces_right s = (* normalize whitespace; * allow_spaces_left: spaces at the beginning of s remain; * allow_spaces_right: spaces at the end of s remain *) let s' = String.concat " " (in_words_ws s) in if s <> "" then let l = String.length s - 1 in let s'' = if allow_spaces_right then match s.[l] with (' '|'\t'|'\n'|'\r') -> s' ^ " " | _ -> s' else s' in if allow_spaces_left then match s.[0] with (' '|'\t'|'\n'|'\r') -> " " ^ s'' | _ -> s'' else s'' else "" ;; let norm = norm_gen false false;; let norm_lr = norm_gen true true;; let untabify s = (* transform tab characters to spaces *) let l = String.length s in let rec loop i pos = if i < l then begin let c = s.[i] in match c with '\t' -> let n = 8 - pos mod 8 in String.make n ' ' :: loop (i+1) (pos+n) | _ -> String.make 1 c :: loop (i+1) (pos+1) end else [] in String.concat "" (loop 0 0) ;; let chartrans s = (* transform character entities such as < *) let l = String.length s in let rec loop i = if i < l then begin let c = s.[i] in match c with '&' -> begin try let j = String.index_from s i ';' in let s' = match String.sub s (i+1) (j-i-1) with "gt" -> ">" | "lt" -> "<" | "amp" -> "&" | _ -> "" in s' :: loop (j+1) with Not_found -> loop (i+1) end | _ -> String.make 1 c :: loop (i+1) end else [] in String.concat "" (loop 0) ;; let print_chunk out verbatim s = (* output the text 's' as troff line to channel 'out'. * verbatim: spaces are treates as unpaddable spaces. *) let s' = chartrans s in let l = String.length s' in let rec print k = if k < l then let c = s'.[k] in begin match c with (' '|'\t') -> if verbatim then output_string out "\\ " else output_string out " " | '\\' -> output_string out "\\e" | _ -> output_char out c end; print (k+1) in if s' <> "" then begin let l = String.length s' - 1 in if s'.[0] = '.' or s'.[0] = '\'' or s'.[0] = ' ' then output_string out "\\&"; let spaceatend = let c = s'.[l] in c = ' ' or c = '\t' in print 0; if verbatim or not spaceatend then output_string out "\\c"; output_string out "\n" end ;; let print_text c s = (* output text 's' in context 'c' *) let rec print_butlast l = match l with [] -> "" | [s] -> s | s :: l' -> print_chunk c.out true (untabify s); output_string c.out "\\&\n"; output_string c.out ".br\n"; print_butlast l' in if c.verbatim then begin let lines = in_lines (c.line ^ s) in let line' = print_butlast lines in { c with line = line'; para_has_content = true; section_has_para = true; } end else begin let s' = norm_gen c.para_has_content true (c.line ^ s) in let s'' = if c.para_has_content or s' <> " " then s' else "" in print_chunk c.out false s''; { c with line = ""; para_has_content = c.para_has_content or s'' <> ""; section_has_para = c.section_has_para or s'' <> ""; } end ;; let print_macro out name arguments = (* TODO: process special characters *) let preprocess s = let s' = String.copy s in for i = 0 to String.length s - 1 do let c = s.[i] in match c with '"' -> s'.[i] <- '\'' (* a compromise *) | _ -> s'.[i] <- c done; s' in output_string out (String.concat " " (name :: (List.map (fun a -> "\"" ^ preprocess a ^ "\"") arguments))); output_string out "\n" ;; let switch_context current_context next_context = (* handles 'font', 'nofill' switches *) let out = current_context.out in print_chunk out current_context.verbatim (untabify current_context.line); if current_context.font <> next_context.font then begin match next_context.font with Roman -> output_string out ".ft R\n" | Bold -> output_string out ".ft B\n" | Italics -> output_string out ".ft I\n" end; if current_context.nofill <> next_context.nofill then begin match next_context.nofill with true -> output_string out ".nf\n" | false -> output_string out ".fi\n" end; { next_context with line = "" } ;; let rec process c e = (* process element 'e' in context 'c' *) let rec process_children c = let rec next c ech_list = match ech_list with [] -> c | ech :: ech_list' -> next (process c ech) ech_list' in next c e.children in let new_para c = let c' = switch_context c c in if c.para_has_content then print_macro c.out ".PP" []; output_string c.out ".ft R\n"; { c' with font = Roman; section_has_para = true; para_has_content = false; } in let process_element n al = match n with "REFENTRY" -> let title, volume, misc = try let refmeta = child "REFMETA" e in (child_string "REFENTRYTITLE" refmeta "NONAME"), (child_string "MANVOLNUM" refmeta "1"), (child_string "REFMISCINFO" refmeta "") with Not_found -> "NONAME", "1", "" in let out = open_out (title ^ "." ^ volume) in print_macro out ".TH" [ norm title; norm volume; norm misc; "User Manual"]; let c' = process_children { c with out = out } in close_out out; {c' with out = devnull } | "REFMETA" -> c | "REFNAMEDIV" -> c | ("REFSYNOPSISDIV" | "REFSECT1") -> let title = child_string "TITLE" e "Untitled" in print_macro c.out ".SH" [ norm title ]; output_string c.out ".ft R\n"; process_children { c with section_has_para = false; para_has_content = false; font = Roman } | "REFSECT2" -> let title = child_string "TITLE" e "Untitled" in print_macro c.out ".SS" [ norm title ]; output_string c.out ".ft R\n"; process_children { c with section_has_para = false; para_has_content = false; font = Roman } | "REFSECT3" -> let title = child_string "TITLE" e "Untitled" in if c.section_has_para then print_macro c.out ".PP" []; print_macro c.out ".B" [ norm title ]; output_string c.out ".ft R\n"; print_macro c.out ".br" []; process_children { c with section_has_para = true; para_has_content = false; font = Roman } | ("SYNOPSIS" | "PROGRAMLISTING") -> let c' = switch_context (new_para c) { c with font = Bold; verbatim = true; nofill = true; } in let c'' = process_children c' in switch_context c'' { c'' with verbatim = c.verbatim; line = ""; nofill = c.nofill; font = c.font } | "PARA" -> let c' = new_para c in let c'' = switch_context c' { c' with font = Roman; section_has_para = true; para_has_content = false; } in process_children c'' | ("VARIABLELIST" | "ITEMIZEDLIST") -> let c' = new_para c in let c'' = process_children c' in new_para c'' | "TERM" -> let c' = switch_context c c in if c'.para_has_content then output_string c.out ".sp\n"; print_macro c.out ".RS" [ "2m" ]; output_string c.out ".ft R\n"; let c'' = switch_context c' { c' with font = Roman; section_has_para = true; para_has_content = false; } in let c''' = process_children c'' in print_macro c.out ".RE" []; output_string c.out ".ft R\n"; switch_context c''' { c''' with font = Roman; section_has_para = true; para_has_content = true; } | "LISTITEM" -> let mkbullets = (name_of_parent e = "ITEMIZEDLIST") in let c' = switch_context c c in if c'.para_has_content then output_string c.out ".sp\n"; print_macro c.out ".RS" [ "7m" ]; output_string c.out ".ft R\n"; if mkbullets then begin output_string c.out "\\&\\h'-3m'\\z\\(bu\\h'3m'\\c\n"; end; let c'' = switch_context c' { c' with font = Roman; section_has_para = true; para_has_content = false; } in let c''' = process_children c'' in print_macro c.out ".RE" []; output_string c.out ".ft R\n"; switch_context c''' { c''' with font = Roman; section_has_para = true; para_has_content = true; } | "REPLACEABLE" -> let c' = switch_context c { c with font = Italics } in let c'' = process_children c' in switch_context c'' { c'' with font = c.font } | "EMPHASIS" -> let c' = switch_context c { c with font = Bold } in let c'' = process_children c' in switch_context c'' { c'' with font = c.font } | ("LINK"|"XREF") -> let c' = try let endterm = List.assoc "ENDTERM" al in let text = try as_string (Hashtbl.find e.id_index endterm) with Not_found -> "[UNRESOLVED LINK]" in print_text c text with Not_found -> process_children c in c' | "TITLE" -> c | n -> prerr_endline ("NOT PROCESSED: " ^ n); process_children c in match e.el with Echunk s -> print_text c s | Epi (n,sl) -> (* prerr_endline ("PI: " ^ n ^ " " ^ String.concat "," sl); *) c | Eempty (n,al) -> process_element n al | Eelement (n,al,_) -> process_element n al ;; let tree ch = let position (line,col) = "arround line " ^ (string_of_int line) ^ ", column " ^ (string_of_int col) in let lexbuf = Lexing.from_channel ch in let doc = try Xmlscan.init(); Xmlparse.document Xmlscan.scan lexbuf with exn -> let pos = position (Xmlscan.position lexbuf) in begin match exn with | Parsing.Parse_error -> error ("syntax error " ^ pos) | Error msg -> error ("syntax error " ^ pos ^ ": " ^ msg) | Sys_error msg -> error ("I/O error: " ^ msg) | _ -> raise exn end in let XML(prolog, e, pl) = doc in e ;; let main() = let t = mk_etree(tree stdin) in let _ = process { section_has_para = false; para_has_content = false; verbatim = false; nofill = false; line = ""; out = devnull; font = Roman } t in () ;; main();; (* ====================================================================== * History: * * $Log: db2man.ml,v $ * Revision 1.1 1999/06/25 19:57:19 gerd * Initial revision. * * *)