(* $Id: db2man.ml,v 1.1 1999/06/25 19:57:19 gerd Exp $
* ----------------------------------------------------------------------
*
*)
(* Usage:
*
* db2man < file.xml
*
* where file.xml is a docbook document in XML representation.
* All 'refentry' sections are transformed to manual pages. The
* manual pages are stored in the current directory.
*
* Only a subset of the docbook tags that are allowed in 'refentry'
* are supported.
*)
open Xml;;
open Error;;
let devnull = open_out "/dev/null";;
type etree =
{ mutable el : element;
mutable parent : etree option;
mutable children : etree list;
mutable id_index : (string, etree) Hashtbl.t
(* An index where the "ID" attributes are *)
}
;;
let mk_etree e =
let index = Hashtbl.create 1000 in
let update al x =
try
let id = List.assoc "ID" al in
Hashtbl.add index id x
with
Not_found -> ()
in
let rec mk p e =
let x =
{ el = e;
parent = p;
children = [];
id_index = index;
} in
x.children <-
begin
match e with
Eelement (n,al,el) -> update al x; List.map (mk (Some x)) el
| Eempty (n,al) -> update al x; []
| _ -> []
end;
x
in
mk None e
;;
let name_of_parent e =
match e.parent with
None -> ""
| Some p ->
begin
match p.el with
Eelement (n,_,_) -> n
| Eempty (n,_) -> n
| _ -> ""
end
;;
let child name e =
let rec search ech_list =
match ech_list with
ech :: ech_list' ->
begin match ech.el with
Eelement (n,al,el) -> if n=name then ech else search ech_list'
| Eempty (n,al) -> if n=name then ech else search ech_list'
| _ -> search ech_list'
end
| [] ->
raise Not_found
in
search e.children
;;
let as_string e =
let rec concat s ech_list =
match ech_list with
ech :: ech_list' ->
begin match ech.el with
Echunk s' -> concat (s ^ s') ech_list'
| Epi _ -> concat s ech_list'
| _ -> concat (s ^ concat "" ech.children) ech_list'
end
| [] ->
s
in
concat "" [e]
;;
let child_string name e default =
try
as_string (child name e)
with
Not_found -> default
;;
type font = Roman | Bold | Italics;;
type groff_context =
{ mutable section_has_para : bool;
(* There is already a paragraph in this section *)
mutable para_has_content : bool;
(* There are characters in the current paragraph *)
mutable nofill : bool;
(* troff is in 'nofill' mode *)
mutable verbatim : bool;
(* whitespace is taken verbatim *)
mutable font : font;
(* current troff font *)
mutable line : string;
(* In verbatim mode: The current line if it has not been
* ended by a newline character
*)
mutable out : out_channel;
(* the output file of the troff statements *)
}
;;
let in_words_ws s =
(* splits s in words separated by whitespace *)
let l = String.length s in
let rec split i j =
if j < l then
match s.[j] with
(' '|'\t'|'\n'|'\r') ->
if i<j then (String.sub s i (j-i)) :: (split (j+1) (j+1))
else split (j+1) (j+1)
| _ ->
split i (j+1)
else
if i<j then [ String.sub s i (j-i) ] else []
in
split 0 0
;;
let in_lines s =
(* splits s in lines separated by '\n'; empty lines remain *)
let l = String.length s in
let rec split i =
if i < l then
try
let j = String.index_from s i '\n' in
String.sub s i (j-i) :: split (j+1)
with
Not_found ->
[ String.sub s i (l-i) ]
else
[ "" ]
in
split 0
;;
let norm_gen allow_spaces_left allow_spaces_right s =
(* normalize whitespace;
* allow_spaces_left: spaces at the beginning of s remain;
* allow_spaces_right: spaces at the end of s remain
*)
let s' = String.concat " " (in_words_ws s) in
if s <> "" then
let l = String.length s - 1 in
let s'' =
if allow_spaces_right then
match s.[l] with
(' '|'\t'|'\n'|'\r') -> s' ^ " "
| _ -> s'
else s'
in
if allow_spaces_left then
match s.[0] with
(' '|'\t'|'\n'|'\r') -> " " ^ s''
| _ -> s''
else
s''
else
""
;;
let norm = norm_gen false false;;
let norm_lr = norm_gen true true;;
let untabify s =
(* transform tab characters to spaces *)
let l = String.length s in
let rec loop i pos =
if i < l then begin
let c = s.[i] in
match c with
'\t' -> let n = 8 - pos mod 8 in
String.make n ' ' :: loop (i+1) (pos+n)
| _ -> String.make 1 c :: loop (i+1) (pos+1)
end
else
[]
in
String.concat "" (loop 0 0)
;;
let chartrans s =
(* transform character entities such as < *)
let l = String.length s in
let rec loop i =
if i < l then begin
let c = s.[i] in
match c with
'&' ->
begin try
let j = String.index_from s i ';' in
let s' =
match String.sub s (i+1) (j-i-1) with
"gt" -> ">"
| "lt" -> "<"
| "amp" -> "&"
| _ -> ""
in
s' :: loop (j+1)
with
Not_found -> loop (i+1)
end
| _ ->
String.make 1 c :: loop (i+1)
end
else []
in
String.concat "" (loop 0)
;;
let print_chunk out verbatim s =
(* output the text 's' as troff line to channel 'out'.
* verbatim: spaces are treates as unpaddable spaces.
*)
let s' = chartrans s in
let l = String.length s' in
let rec print k =
if k < l then
let c = s'.[k] in
begin match c with
(' '|'\t') -> if verbatim then output_string out "\\ " else output_string out " "
| '\\' -> output_string out "\\e"
| _ -> output_char out c
end;
print (k+1)
in
if s' <> "" then begin
let l = String.length s' - 1 in
if s'.[0] = '.' or s'.[0] = '\'' or s'.[0] = ' ' then output_string out "\\&";
let spaceatend =
let c = s'.[l] in
c = ' ' or c = '\t' in
print 0;
if verbatim or not spaceatend then output_string out "\\c";
output_string out "\n"
end
;;
let print_text c s =
(* output text 's' in context 'c' *)
let rec print_butlast l =
match l with
[] -> ""
| [s] -> s
| s :: l' ->
print_chunk c.out true (untabify s);
output_string c.out "\\&\n";
output_string c.out ".br\n";
print_butlast l'
in
if c.verbatim then begin
let lines = in_lines (c.line ^ s) in
let line' = print_butlast lines in
{ c with line = line';
para_has_content = true;
section_has_para = true;
}
end
else begin
let s' = norm_gen c.para_has_content true (c.line ^ s) in
let s'' = if c.para_has_content or s' <> " " then s' else "" in
print_chunk c.out false s'';
{ c with line = "";
para_has_content = c.para_has_content or s'' <> "";
section_has_para = c.section_has_para or s'' <> "";
}
end
;;
let print_macro out name arguments =
(* TODO: process special characters *)
let preprocess s =
let s' = String.copy s in
for i = 0 to String.length s - 1 do
let c = s.[i] in
match c with
'"' -> s'.[i] <- '\'' (* a compromise *)
| _ -> s'.[i] <- c
done;
s'
in
output_string out
(String.concat
" "
(name ::
(List.map
(fun a -> "\"" ^ preprocess a ^ "\"")
arguments)));
output_string out "\n"
;;
let switch_context current_context next_context =
(* handles 'font', 'nofill' switches *)
let out = current_context.out in
print_chunk
out
current_context.verbatim
(untabify current_context.line);
if current_context.font <> next_context.font then begin
match next_context.font with
Roman -> output_string out ".ft R\n"
| Bold -> output_string out ".ft B\n"
| Italics -> output_string out ".ft I\n"
end;
if current_context.nofill <> next_context.nofill then begin
match next_context.nofill with
true -> output_string out ".nf\n"
| false -> output_string out ".fi\n"
end;
{ next_context with line = "" }
;;
let rec process c e =
(* process element 'e' in context 'c' *)
let rec process_children c =
let rec next c ech_list =
match ech_list with
[] -> c
| ech :: ech_list' -> next (process c ech) ech_list'
in
next c e.children
in
let new_para c =
let c' = switch_context c c in
if c.para_has_content then print_macro c.out ".PP" [];
output_string c.out ".ft R\n";
{ c' with font = Roman;
section_has_para = true;
para_has_content = false;
}
in
let process_element n al =
match n with
"REFENTRY" ->
let title, volume, misc =
try
let refmeta = child "REFMETA" e in
(child_string "REFENTRYTITLE" refmeta "NONAME"),
(child_string "MANVOLNUM" refmeta "1"),
(child_string "REFMISCINFO" refmeta "")
with
Not_found -> "NONAME", "1", "" in
let out =
open_out (title ^ "." ^ volume) in
print_macro out ".TH" [ norm title; norm volume; norm misc; "User Manual"];
let c' = process_children { c with out = out } in
close_out out;
{c' with out = devnull }
| "REFMETA" ->
c
| "REFNAMEDIV" ->
c
| ("REFSYNOPSISDIV" | "REFSECT1") ->
let title = child_string "TITLE" e "Untitled" in
print_macro c.out ".SH" [ norm title ];
output_string c.out ".ft R\n";
process_children { c with section_has_para = false;
para_has_content = false;
font = Roman }
| "REFSECT2" ->
let title = child_string "TITLE" e "Untitled" in
print_macro c.out ".SS" [ norm title ];
output_string c.out ".ft R\n";
process_children { c with section_has_para = false;
para_has_content = false;
font = Roman }
| "REFSECT3" ->
let title = child_string "TITLE" e "Untitled" in
if c.section_has_para then print_macro c.out ".PP" [];
print_macro c.out ".B" [ norm title ];
output_string c.out ".ft R\n";
print_macro c.out ".br" [];
process_children { c with section_has_para = true;
para_has_content = false;
font = Roman }
| ("SYNOPSIS" | "PROGRAMLISTING") ->
let c' = switch_context (new_para c)
{ c with font = Bold;
verbatim = true;
nofill = true;
} in
let c'' = process_children c' in
switch_context c'' { c'' with verbatim = c.verbatim;
line = "";
nofill = c.nofill;
font = c.font
}
| "PARA" ->
let c' = new_para c in
let c'' = switch_context c' { c' with font = Roman;
section_has_para = true;
para_has_content = false;
} in
process_children c''
| ("VARIABLELIST" | "ITEMIZEDLIST") ->
let c' = new_para c in
let c'' = process_children c' in
new_para c''
| "TERM" ->
let c' = switch_context c c in
if c'.para_has_content then output_string c.out ".sp\n";
print_macro c.out ".RS" [ "2m" ];
output_string c.out ".ft R\n";
let c'' = switch_context c' { c' with font = Roman;
section_has_para = true;
para_has_content = false;
} in
let c''' = process_children c'' in
print_macro c.out ".RE" [];
output_string c.out ".ft R\n";
switch_context c''' { c''' with font = Roman;
section_has_para = true;
para_has_content = true;
}
| "LISTITEM" ->
let mkbullets = (name_of_parent e = "ITEMIZEDLIST") in
let c' = switch_context c c in
if c'.para_has_content then output_string c.out ".sp\n";
print_macro c.out ".RS" [ "7m" ];
output_string c.out ".ft R\n";
if mkbullets then begin
output_string c.out "\\&\\h'-3m'\\z\\(bu\\h'3m'\\c\n";
end;
let c'' = switch_context c' { c' with font = Roman;
section_has_para = true;
para_has_content = false;
} in
let c''' = process_children c'' in
print_macro c.out ".RE" [];
output_string c.out ".ft R\n";
switch_context c''' { c''' with font = Roman;
section_has_para = true;
para_has_content = true;
}
| "REPLACEABLE" ->
let c' = switch_context c { c with font = Italics } in
let c'' = process_children c' in
switch_context c'' { c'' with font = c.font }
| "EMPHASIS" ->
let c' = switch_context c { c with font = Bold } in
let c'' = process_children c' in
switch_context c'' { c'' with font = c.font }
| ("LINK"|"XREF") ->
let c' = try
let endterm = List.assoc "ENDTERM" al in
let text =
try
as_string (Hashtbl.find e.id_index endterm)
with
Not_found -> "[UNRESOLVED LINK]"
in
print_text c text
with
Not_found ->
process_children c
in
c'
| "TITLE" ->
c
| n ->
prerr_endline ("NOT PROCESSED: " ^ n);
process_children c
in
match e.el with
Echunk s ->
print_text c s
| Epi (n,sl) ->
(* prerr_endline ("PI: " ^ n ^ " " ^ String.concat "," sl); *)
c
| Eempty (n,al) ->
process_element n al
| Eelement (n,al,_) ->
process_element n al
;;
let tree ch =
let position (line,col) = "arround line "
^ (string_of_int line)
^ ", column "
^ (string_of_int col) in
let lexbuf = Lexing.from_channel ch in
let doc =
try
Xmlscan.init();
Xmlparse.document Xmlscan.scan lexbuf
with exn ->
let pos = position (Xmlscan.position lexbuf) in
begin match exn with
| Parsing.Parse_error -> error ("syntax error " ^ pos)
| Error msg -> error ("syntax error " ^ pos ^ ": " ^ msg)
| Sys_error msg -> error ("I/O error: " ^ msg)
| _ -> raise exn
end
in
let XML(prolog, e, pl) = doc in
e
;;
let main() =
let t = mk_etree(tree stdin) in
let _ = process { section_has_para = false;
para_has_content = false;
verbatim = false;
nofill = false;
line = "";
out = devnull;
font = Roman }
t
in
()
;;
main();;
(* ======================================================================
* History:
*
* $Log: db2man.ml,v $
* Revision 1.1 1999/06/25 19:57:19 gerd
* Initial revision.
*
*
*)