(* $Id: test_canonxml.ml,v 1.10 2001/06/13 18:42:00 gerd Exp $
* ----------------------------------------------------------------------
*
*)
open Pxp_document;;
open Pxp_yacc;;
open Pxp_types;;
let error_happened = ref false;;
let rec prerr_error e =
prerr_endline (string_of_exn e)
;;
class warner =
object
method warn w =
prerr_endline ("WARNING: " ^ w)
end
;;
let outbuf = String.create 8192;;
let output_utf8 config s =
match config.encoding with
`Enc_utf8 ->
print_string s
| `Enc_iso88591 ->
for i = 0 to String.length s - 1 do
let c = Char.code(s.[i]) in
if c <= 127 then
print_char(Char.chr(c))
else begin
print_char(Char.chr(0xc0 lor (c lsr 6)));
print_char(Char.chr(0x80 lor (c land 0x3f)));
end
done
| _ -> assert false
;;
let re = Str.regexp "[&<>\"\009\010\013]";;
let escaped s =
Str.global_substitute
re
(fun _ ->
match Str.matched_string s with
"&" -> "&"
| "<" -> "<"
| ">" -> ">"
| "\"" -> """
| "\009" -> "	"
| "\010" -> " "
| "\013" -> " "
| _ -> assert false
)
s
;;
let rec output_xml config n =
match n # node_type with
T_super_root ->
n # iter_nodes (output_xml config)
| T_pinstr pi_name ->
let [ pi ] = n # pinstr pi_name in
output_utf8 config "<?";
output_utf8 config (pi # target);
output_utf8 config " ";
output_utf8 config (pi # value);
output_utf8 config "?>";
| T_element name ->
output_utf8 config "<";
output_utf8 config name;
let sorted_attnames =
Sort.list ( <= ) (n # attribute_names) in
List.iter
(fun attname ->
match n # attribute attname with
Value v ->
output_utf8 config " ";
output_utf8 config attname;
output_utf8 config "=\"";
output_utf8 config (escaped v);
output_utf8 config "\"";
| Valuelist vl ->
let v = String.concat " " vl in
output_utf8 config " ";
output_utf8 config attname;
output_utf8 config "=\"";
output_utf8 config (escaped v);
output_utf8 config "\"";
| Implied_value ->
()
)
sorted_attnames;
output_utf8 config ">";
n # iter_nodes (output_xml config);
output_utf8 config "</";
output_utf8 config name;
output_utf8 config ">";
| T_data ->
let v = n # data in
output_utf8 config (escaped v)
| T_comment ->
let v =
match n # comment with
None -> assert false
| Some x -> x
in
output_utf8 config ("<!--" ^ v ^ "-->")
| _ ->
assert false
;;
let parse debug wf iso88591 comments filename =
let spec =
let e = new element_impl default_extension in
make_spec_from_mapping
~super_root_exemplar: (new super_root_impl default_extension)
~default_pinstr_exemplar: (new pinstr_impl default_extension)
~comment_exemplar: (new comment_impl default_extension)
~data_exemplar: (new data_impl default_extension)
~default_element_exemplar: e
~element_mapping: (Hashtbl.create 1)
()
in
let config =
{ default_config with
warner = new warner;
debugging_mode = debug;
enable_pinstr_nodes = true;
enable_super_root_node = true;
enable_comment_nodes = comments;
drop_ignorable_whitespace = false;
encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8;
idref_pass = true;
}
in
try
let parse_fn =
if wf then parse_wfdocument_entity
else
let index = new hash_index in
parse_document_entity
?transform_dtd:None
~id_index:(index :> 'ext index)
in
let tree =
parse_fn
config
(from_file filename)
spec
in
output_xml config (tree # root)
with
e ->
error_happened := true;
prerr_error e
;;
let main() =
let debug = ref false in
let wf = ref false in
let iso88591 = ref false in
let comments = ref false in
let files = ref [] in
Arg.parse
[ "-d", Arg.Set debug,
" turn debugging mode on";
"-wf", Arg.Set wf,
" check only on well-formedness";
"-iso-8859-1", Arg.Set iso88591,
" use ISO-8859-1 as internal encoding instead of UTF-8";
"-comments", Arg.Set comments,
" output comments, too";
]
(fun x -> files := x :: !files)
"
usage: test_canonxml [options] file ...
List of options:";
files := List.rev !files;
List.iter (parse !debug !wf !iso88591 !comments) !files;
;;
main();
if !error_happened then exit(1);;
(* ======================================================================
* History:
*
* $Log: test_canonxml.ml,v $
* Revision 1.10 2001/06/13 18:42:00 gerd
* Updated
*
* Revision 1.9 2001/05/17 22:31:03 gerd
* Updated
*
* Revision 1.8 2000/08/17 00:51:57 gerd
* Added -comments option to test enable_comment_nodes.
*
* Revision 1.7 2000/08/16 23:44:17 gerd
* Updates because of changes of the PXP API.
*
* Revision 1.6 2000/07/14 14:56:55 gerd
* Updated: warner.
*
* Revision 1.5 2000/07/14 14:17:58 gerd
* Updated because of iterface changes.
*
* Revision 1.4 2000/07/09 01:06:20 gerd
* Updated.
*
* Revision 1.3 2000/06/04 20:31:03 gerd
* Updates because of renamed PXP modules.
*
* Revision 1.2 2000/05/20 20:34:28 gerd
* Changed for UTF-8 support.
*
* Revision 1.1 2000/04/30 20:13:01 gerd
* Initial revision.
*
* Revision 1.3 1999/11/09 22:27:30 gerd
* The programs returns now an exit code of 1 if one of the
* XML files produces an error.
*
* Revision 1.2 1999/09/01 23:09:56 gerd
* Added the option -wf that switches to well-formedness checking
* instead of validation.
*
* Revision 1.1 1999/08/14 22:20:53 gerd
* Initial revision.
*
*
*)