(* $Id: pxp_event.ml 719 2007-05-07 15:04:40Z gerd $
* ----------------------------------------------------------------------
* PXP: The polymorphic XML parser for Objective Caml.
* Copyright by Gerd Stolpmann. See LICENSE for details.
*)
open Pxp_types
open Pxp_dtd
open Pxp_aux
let to_list get_ev =
(* This function must be tail-recursive! *)
let rec collect l =
match get_ev() with
Some ev -> collect (ev::l)
| None -> List.rev l
in
collect []
;;
let of_list l =
let l = ref l in
fun () ->
match !l with
x :: l' ->
l := l';
Some x
| [] ->
None
;;
let concat get_ev_list =
let get_ev_list = ref get_ev_list in
let rec get_ev' () =
match !get_ev_list with
get_ev :: get_ev_list' ->
let ev = get_ev() in
( match ev with
Some _ -> ev
| None -> get_ev_list := get_ev_list';
get_ev'()
)
| [] ->
None
in
get_ev'
;;
let rec iter f get_ev =
match get_ev() with
Some e ->
f e;
iter f get_ev
| None ->
()
;;
let extract e get_ev =
let counter = ref 0 in
let eof = ref false in
let count_event e =
match e with
E_start_doc(_,_)
| E_start_tag(_,_,_,_)
| E_start_super ->
incr counter
| E_end_doc _
| E_end_tag(_,_)
| E_end_super ->
decr counter;
eof := (!counter <= 0);
| E_position(_,_,_) ->
()
| _ ->
eof := (!counter <= 0);
in
let rec get_ev'() =
if !eof then (
if !counter <> 0 then failwith "Pxp_event.extract: Unbalanced events";
None
) else
let e_opt = get_ev() in
match e_opt with
Some e' ->
count_event e';
e_opt
| None ->
if !counter <> 0 then failwith "Pxp_event.extract: Unbalanced events";
None
in
count_event e;
concat [ of_list [e]; get_ev' ]
;;
let pfilter p get_ev =
let rec get_ev'() =
match get_ev () with
Some e as e_opt ->
if p e then
e_opt
else
get_ev'()
| None ->
None
in
get_ev'
;;
type filter = (unit -> event option) -> (unit -> event option)
let norm_cdata_filter get_ev =
let q = Queue.create () in
let rec get_ev' thing =
try
Queue.pop q
with
Queue.Empty ->
let ev = get_ev thing in
match ev with
Some (E_char_data s) ->
if s = "" then
get_ev' thing
else
gather_string [s] thing
| _ ->
ev
and gather_string sl thing =
let ev = get_ev thing in
match ev with
Some (E_char_data s) ->
gather_string (s :: sl) thing
| _ ->
Queue.add (Some(E_char_data(String.concat "" (List.rev sl)))) q;
Queue.add ev q;
get_ev' thing
in
get_ev'
;;
let drop_ignorable_whitespace_filter get_ev =
let found_dtd = ref None in
let elements = Stack.create() in
let pos = ref("",0,0) in
let has_ignorable_ws elname =
match !found_dtd with
Some dtd ->
( try
let el = dtd # element elname in
let cm = el # content_model in
( match cm with
Regexp _ -> true
| Mixed ml -> not (List.mem MPCDATA ml)
| _ -> false
)
with
Undeclared -> false
| Validation_error _ -> false (* element not found *)
)
| None ->
false
in
let pop() =
try
ignore(Stack.pop elements)
with
Stack.Empty ->
failwith "Pxp_event.drop_ignorable_whitespace_filter: bad event stream"
in
let rec get_ev' thing =
let ev = get_ev thing in
match ev with
Some(E_start_doc(_,dtd)) ->
if !found_dtd <> None then
failwith "Pxp_event.drop_ignorable_whitespace_filter: More than one E_start_doc event";
found_dtd := Some dtd;
ev
| Some(E_position(e,line,col)) ->
pos := (e,line,col);
ev
| Some(E_start_tag(name,_,_,_)) ->
let ign_ws = has_ignorable_ws name in
Stack.push ign_ws elements;
ev
| Some(E_end_tag(name,_)) ->
pop();
ev
| Some(E_char_data s) ->
let ign_ws = try Stack.top elements with Stack.Empty -> true in
if ign_ws then (
if not (Pxp_lib.only_whitespace s) then
let (e,line,col) = !pos in
let where = "In entity " ^ e ^ ", at line " ^
string_of_int line ^ ", position " ^
string_of_int col ^ ":\n" in
raise(At(where,WF_error("Data not allowed here")))
else
(* drop this event, and continue with next: *)
get_ev' thing
)
else ev
| _ ->
ev
in
get_ev'
;;
type dtd_style =
[ `Ignore
| `Include
| `Reference
]
;;
let wr_dsp do_display default_prefix dtd_style minimization out enc rep_enc get_ev =
(* do_display: whether [display_events] was called, not [write_events].
* default_prefix: The (optional) default prefix (only [write_events])
*)
let wms =
write_markup_string ~from_enc:rep_enc ~to_enc:enc out in
let write_att prefix (aname, avalue) =
wms ("\n" ^ prefix ^ aname ^ "=\"");
write_data_string ~from_enc:rep_enc ~to_enc:enc out avalue;
wms "\"";
in
let write_stack = Stack.create() in
(* Stack elements:
* (literal tag name, map of declared prefixes => uris, declared default uri)
*)
let write_start_tag name atts scope_opt minimized =
let (_, prefixes, default) =
try Stack.top write_stack
with Stack.Empty -> ("", StringMap.empty, None) in
(* prefixes: namespaces already in scope
* default: current xmlns default declaration
*)
let name_p, name_local = namespace_split name in
let name' =
(* Strip prefix if it is the default prefix.
* Note: we ignore [default], and take always [default_prefix] as
* reference.
*)
match default_prefix with
Some d -> if name_p = d then name_local else name
| None -> name in
(* Output start tag, and contained attributes: *)
wms ("<" ^ name');
List.iter (write_att "") atts;
( match scope_opt with
Some scope ->
(* Namespace case: *)
let mng = scope # namespace_manager in
(* Check whether additional namespace prefixes need to be
* declared:
*)
let (nsdecls, prefixes') =
List.fold_left
(fun (decls_acc, pref_acc) (n,_) ->
let p = extract_prefix n in
if p = "" then
(decls_acc, pref_acc)
else
let uri = mng # get_primary_uri p in
if StringMap.mem p pref_acc then
(decls_acc, pref_acc)
else
( (p,uri) :: decls_acc,
StringMap.add p uri pref_acc )
)
([], prefixes)
((name,"") :: atts)
in
(* Check whether the default namespace needs to be declared: *)
let nsdefault =
match default_prefix with
Some p ->
if default = None then
let uri = mng # get_primary_uri p in
[ uri ]
else []
| None ->
[]
in
(* Print the declarations: *)
List.iter (write_att "xmlns:") nsdecls;
List.iter (fun ( v) -> write_att "" ("xmlns", v)) nsdefault;
(* Push new state to stack:*)
let default' =
match nsdefault with
[ uri ] -> Some uri
| [] -> None
| _ -> assert false in
Stack.push (name', prefixes', default') write_stack;
| None ->
(* Non-namespace case: *)
Stack.push (name', prefixes, default) write_stack;
);
if minimized then (
wms "\n/>";
ignore(Stack.pop write_stack)
)
else
wms "\n>";
in
let display_start_tag name atts scope_opt minimized =
let (_, prefixes, default) =
try Stack.top write_stack
with Stack.Empty -> ("", StringMap.empty, None) in
(* prefixes: namespaces already in scope. The default declaration
* is represented as prefix "".
* default: not used
*)
match scope_opt with
Some scope ->
(* namespace case *)
let name_p, name_local = namespace_split name in
(* Get the required declarations: *)
let mng = scope # namespace_manager in
let eff_decl = scope # effective_declaration in
let eff_decl_to_add =
(* The prefixes in [eff_decl] that are not in [prefixes] *)
List.filter
(fun (dp, uri) ->
try
StringMap.find dp prefixes <> uri
with
Not_found -> true
)
eff_decl in
let eff_decl_to_add' = ref [] in (* further prefixes *)
let prefixes' =
ref (List.fold_left
(fun acc (dp, uri) ->
StringMap.add dp uri acc)
prefixes
eff_decl_to_add) in
let search_prefix uri =
(* Slow! *)
let p =
StringMap.fold
(fun _p _uri y -> if _uri = uri then _p else y)
!prefixes'
"" in
if p = "" then raise Not_found;
p
in
let invent_new_prefix uri =
let n = ref 0 in
while StringMap.mem ("ns" ^ string_of_int !n) !prefixes' do
incr n
done;
let p = "ns" ^ string_of_int !n in
prefixes' := StringMap.add p uri !prefixes';
eff_decl_to_add' := (p, uri) :: !eff_decl_to_add';
p
in
let write_att_remap (aname, avalue) =
let (p,local) = namespace_split aname in
if p = "" then
write_att "" (aname,avalue)
else
let d =
try scope # display_prefix_of_normprefix p
with Namespace_not_in_scope _ ->
(* Display prefix is missing. This is an error, but we
* can search or invent a new prefix on the fly.
*)
( let uri = mng # get_primary_uri p in
try
search_prefix uri
with
Not_found ->
invent_new_prefix uri )
in
write_att (d ^ ":") (local, avalue)
in
let this_display_prefix =
try
scope # display_prefix_of_normprefix name_p
with Namespace_not_in_scope _ ->
(* Display prefix is missing. This is an error, but we
* can search or invent a new prefix on the fly.
*)
( let uri = mng # get_primary_uri name_p in
try
search_prefix uri
with
Not_found ->
invent_new_prefix uri )
in
let name' =
if this_display_prefix = "" then
name_local (* within default namespace *)
else
this_display_prefix ^ ":" ^ name_local in
wms ("<" ^ name');
List.iter write_att_remap atts;
List.iter
(fun (n,v) ->
if n = "" then
write_att "" ("xmlns",v)
else
write_att "xmlns:" (n,v))
(eff_decl_to_add @ !eff_decl_to_add');
if minimized then
wms "\n/>"
else (
wms "\n>";
Stack.push (name', !prefixes', default) write_stack;
)
| None ->
(* non-namespace case *)
(* Output start tag, and contained attributes: *)
wms ("<" ^ name);
List.iter (write_att "") atts;
if minimized then
wms "\n/>"
else (
wms "\n>";
Stack.push (name, prefixes, default) write_stack;
)
in
let write_end_tag() =
let (name, _, _) =
try Stack.pop write_stack
with Stack.Empty ->
failwith "Pxp_event.write/display: Unbalanced start/end tags"
in
wms "</";
wms name;
wms "\n>"
in
let rec wr_dsp_event ev_opt =
let ev =
match ev_opt with
| Some ev -> ev
| None -> get_ev() in
match ev with
| Some(E_start_doc(version,dtd)) ->
wms ("<?xml version=\"" ^ version ^ "\" ");
wms ("encoding=\"" ^ Netconversion.string_of_encoding enc ^ "\" ");
if dtd # standalone_declaration then
wms "standalone=\"yes\" ";
wms "?>\n";
( match dtd_style with
`Ignore -> ()
| `Include ->
dtd # write out enc true
| `Reference ->
( match dtd # id with
Some (External _) ->
dtd # write_ref out enc
| _ ->
failwith "Pxp_event.write/display: Cannot output DTD as reference"
)
);
wr_dsp_event None
| Some (E_end_doc lit_name) ->
wr_dsp_event None
| Some (E_start_tag (name, atts, scope_opt, _)) ->
let minimized, ev_opt =
match minimization with
| `None ->
(false, None)
| `AllEmpty ->
(* Peek at the next event: *)
let ev' = get_ev() in
let do_minimize =
match ev' with
| Some(E_end_tag (name', _)) ->
name = name'
| _ ->
false in
if do_minimize then
(true, None) (* ==> consume ev'! *)
else
(false, Some ev') in
if do_display then
display_start_tag name atts scope_opt minimized
else
write_start_tag name atts scope_opt minimized;
wr_dsp_event ev_opt
| Some (E_end_tag (_, _)) ->
write_end_tag();
wr_dsp_event None
| Some (E_char_data data) ->
write_data_string ~from_enc:rep_enc ~to_enc:enc out data;
wr_dsp_event None
| Some (E_pinstr (target, value, ent_id)) ->
wms "<? "; wms target; wms " "; wms value; wms "?>";
wr_dsp_event None
| Some (E_pinstr_member (target, value, ent_id)) ->
wms "<? "; wms target; wms " "; wms value; wms "?>";
wr_dsp_event None
| Some (E_comment data) ->
wms "<!--"; wms data; wms "-->";
wr_dsp_event None
| Some E_start_super ->
wr_dsp_event None
| Some E_end_super ->
wr_dsp_event None
| Some (E_position (_,_,_)) ->
wr_dsp_event None
| Some (E_error exn) ->
failwith "Pxp_event.write/display: Cannot output E_error event"
| Some E_end_of_stream ->
wr_dsp_event None
| None ->
()
in
wr_dsp_event None
;;
let write_events ?default ?(dtd_style = `Include) ?(minimization=`None) =
wr_dsp false default dtd_style minimization ;;
let display_events ?(dtd_style = `Include) ?(minimization=`None) =
wr_dsp true None dtd_style minimization ;;