Plasma GitLab Archive
Projects Blog Knowledge

(* $Id$
 * ----------------------------------------------------------------------
 * PXP: The polymorphic XML parser for Objective Caml.
 * Copyright by Gerd Stolpmann. See LICENSE for details.
 *)

open Pxp_types
open Pxp_dtd
open Pxp_aux
open Printf

let to_list get_ev =
  (* This function must be tail-recursive! *)
  let rec collect l =
    match get_ev() with
	Some ev -> collect (ev::l)
      | None    -> List.rev l
  in
  collect []
;;


let of_list l =
  let l = ref l in
  fun () ->
    match !l with
	x :: l' ->
	  l := l';
	  Some x
      | [] ->
	  None
;;


let concat get_ev_list =
  let get_ev_list = ref get_ev_list in
  let rec get_ev' () =
    match !get_ev_list with
	get_ev :: get_ev_list' ->
	  let ev = get_ev() in
	  ( match ev with
		Some _ -> ev
	      | None   -> get_ev_list := get_ev_list';
		          get_ev'()
	  )
      | [] ->
	  None
  in
  get_ev'
;;


let rec iter f get_ev =
  match get_ev() with
      Some e ->
	f e;
	iter f get_ev
    | None ->
	()
;;


let extract e get_ev =
  let counter = ref 0 in
  let eof = ref false in
  
  let count_event e =
    match e with
	E_start_doc(_,_)
      | E_start_tag(_,_,_,_)
      | E_start_super ->
	  incr counter
      | E_end_doc _
      | E_end_tag(_,_)
      | E_end_super ->
	  decr counter;
	  eof := (!counter <= 0);
      | E_position(_,_,_) ->
	  ()
      | _ ->
	  eof := (!counter <= 0);
  in

  let rec get_ev'() =
    if !eof then (
      if !counter <> 0 then failwith "Pxp_event.extract: Unbalanced events";
      None
    ) else
      let e_opt = get_ev() in
      match e_opt with
	  Some e' ->
	    count_event e';
	    e_opt
	| None ->
	    if !counter <> 0 then failwith "Pxp_event.extract: Unbalanced events";
	    None
  in

  count_event e;
  concat [ of_list [e]; get_ev' ]
;;


let pfilter p get_ev =
  let rec get_ev'() =
    match get_ev () with
	Some e as e_opt ->
	  if p e then
	    e_opt
	  else
	    get_ev'()
      | None ->
	  None
  in
  get_ev'
;;


type pull_fn = unit -> event option
type filter = pull_fn -> pull_fn

let norm_cdata_filter get_ev =
  let q = Queue.create () in
  let rec get_ev' thing =
    try
      Queue.pop q
    with
	Queue.Empty ->
	  let ev = get_ev thing in
	  match ev with
	      Some (E_char_data s) ->
		if s = "" then
		  get_ev' thing
		else
		  gather_string [s] thing
	    | _ ->
		ev
  and gather_string sl thing =
    let ev = get_ev thing in
    match ev with
	Some (E_char_data s) ->
	  gather_string (s :: sl) thing
      | _ ->
	  Queue.add (Some(E_char_data(String.concat "" (List.rev sl)))) q;
	  Queue.add ev q;
	  get_ev' thing
  in
  get_ev'
;;
	

let drop_ignorable_whitespace_filter get_ev =
  let found_dtd = ref (None : Pxp_dtd.dtd option) in
  let elements = Stack.create() in
  let pos = ref("",0,0) in

  let has_ignorable_ws elname =
    match !found_dtd with
	Some dtd ->
	  ( try 
	      let el = dtd # element elname in
	      let cm = el # content_model  in
	      ( match cm with
		    Regexp _ -> true
		  | Mixed ml -> not (List.mem MPCDATA ml)
		  | _ -> false
	      )
	    with
		Undeclared -> false
	      | Validation_error _ -> false   (* element not found *)
	  )
      | None ->
	  false
  in

  let pop() =
    try
      ignore(Stack.pop elements)
    with
	Stack.Empty ->
	  failwith "Pxp_event.drop_ignorable_whitespace_filter: bad event stream"
  in

  let rec get_ev' thing =
    let ev = get_ev thing in
    match ev with
	Some(E_start_doc(_,dtd)) ->
	  if !found_dtd <> None then
	    failwith "Pxp_event.drop_ignorable_whitespace_filter: More than one E_start_doc event";
	  found_dtd := Some dtd;
	  ev
      | Some(E_position(e,line,col)) ->
	  pos := (e,line,col);
	  ev
      |	Some(E_start_tag(name,_,_,_)) ->
	  let ign_ws = has_ignorable_ws name in
	  Stack.push ign_ws elements;
	  ev
      | Some(E_end_tag(name,_)) ->
	  pop();
	  ev
      | Some(E_char_data s) ->
	  let ign_ws = try Stack.top elements with Stack.Empty -> true in
	  if ign_ws then (
	    if not (Pxp_lib.only_whitespace s) then
	      let (e,line,col) = !pos in
	      let where = "In entity " ^ e ^ ", at line " ^ 
			  string_of_int line ^ ", position " ^ 
			  string_of_int col ^ ":\n" in
	      raise(At(where,WF_error("Data not allowed here")))
	    else
	      (* drop this event, and continue with next: *)
	      get_ev' thing
	  ) 
	  else ev
	  
      | _ ->
	  ev
  in

  get_ev'
;;


let unwrap_document pull =
  let doc_details = ref None in
  let first_event_done = ref false in

  let get_doc_details() =
    if not !first_event_done then (
      match pull() with
	| Some(E_start_doc(v,dtd)) ->
	    doc_details := Some(v,dtd);
	    first_event_done := true
	| _ ->
	    ()   (* Will cause an exception! *)
    );
    match !doc_details with
      | None ->
	  failwith "Pxp_event.unwrap_document: No E_start_doc event found"
      | Some (v,dtd) ->
	  (v,dtd)
  in

  let pull' =
    pfilter
      (function
	 | E_start_doc(v,dtd) ->
	     doc_details := Some(v,dtd);
	     first_event_done := true;
	     false
	 | E_end_doc _ | E_start_super | E_end_super | E_end_of_stream ->
	     false
	 | E_error e ->
	     raise e
	 | _ ->
	     true
      )
      pull
  in

  (get_doc_details, pull')
;;


let namespace_split = Pxp_aux.namespace_split
;;


let extract_prefix = Pxp_aux.extract_prefix
;;


type dtd_style =
    [ `Ignore
    | `Include
    | `Reference
    ]
;;

let wr_dsp do_display default_prefix dtd_style minimization out enc rep_enc get_ev =
  (* do_display: whether [display_events] was called, not [write_events].
   * default_prefix: The (optional) default prefix (only [write_events])
   *)

  let wms =
    write_markup_string ~from_enc:rep_enc ~to_enc:enc out in

  let write_att prefix (aname, avalue) =
    wms ("\n" ^ prefix ^ aname ^ "=\"");
    write_data_string ~from_enc:rep_enc ~to_enc:enc out avalue;
    wms "\"";
  in

  let write_stack = Stack.create() in
    (* Stack elements: 
     * (literal tag name, map of declared prefixes => uris, declared default uri)
     *)

  let write_start_tag name atts scope_opt minimized =
    let (_, prefixes, default) = 
      try Stack.top write_stack
      with Stack.Empty -> ("", StringMap.empty, None) in
    (* prefixes: namespaces already in scope
     * default: current xmlns default declaration
     *)

    let name_p, name_local = namespace_split name in

    let name' = 
      (* Strip prefix if it is the default prefix.
       * Note: we ignore [default], and take always [default_prefix] as
       * reference.
       *)
      match default_prefix with
	  Some d -> if name_p = d then name_local else name
	| None   -> name in

    (* Output start tag, and contained attributes: *)
    wms ("<" ^ name');
    List.iter (write_att "") atts;

    ( match scope_opt with
	  Some scope ->
	    (* Namespace case: *)
	    let mng = scope # namespace_manager in
	    (* Check whether additional namespace prefixes need to be
	     * declared:
	     *)
	    let (nsdecls, prefixes') = 
	      List.fold_left
		(fun (decls_acc, pref_acc) (n,_) ->
		   let p = extract_prefix n in
		   if p = "" then
		     (decls_acc, pref_acc)
		   else
		     let uri = mng # get_primary_uri p in
		     if StringMap.mem p pref_acc then
		       (decls_acc, pref_acc)
		     else
		       ( (p,uri) :: decls_acc,
			 StringMap.add p uri pref_acc )
		)
		([], prefixes)
		((name,"") :: atts)
	    in
	    (* Check whether the default namespace needs to be declared: *)
	    let nsdefault =
	      match default_prefix with
		  Some p ->
		    if default = None then 
		      let uri = mng # get_primary_uri p in
		      [ uri ] 
		    else []
		| None ->
		    []
	    in
	    (* Print the declarations: *)
	    List.iter (write_att "xmlns:") nsdecls;
	    List.iter (fun (  v) -> write_att "" ("xmlns", v)) nsdefault;
	    (* Push new state to stack:*)
	    let default' = 
	      match nsdefault with
		  [ uri ] -> Some uri
		| [] -> None
		| _ -> assert false in
	    Stack.push (name', prefixes', default') write_stack;

	| None ->
	    (* Non-namespace case: *)
	    Stack.push (name', prefixes, default) write_stack;
    );
    if minimized then (
      wms "\n/>";
      ignore(Stack.pop write_stack)
    )
    else
      wms "\n>";
  in

  let display_start_tag name atts scope_opt minimized =
    let (_, prefixes, default) = 
      try Stack.top write_stack
      with Stack.Empty -> ("", StringMap.empty, None) in
    (* prefixes: namespaces already in scope. The default declaration
     *   is represented as prefix "".
     * default: not used
     *)

    match scope_opt with
	Some scope ->
	  (* namespace case *)
	  let name_p, name_local = namespace_split name in
  
	  (* Get the required declarations: *)
	  let mng = scope # namespace_manager in
	  let eff_decl = scope # effective_declaration in
	  let eff_decl_to_add =
            (* The prefixes in [eff_decl] that are not in [prefixes] *)
            List.filter
              (fun (dp, uri) ->
		 try
		   StringMap.find dp prefixes <> uri
		 with
                     Not_found -> true
              )
              eff_decl in
	  let eff_decl_to_add' = ref [] in    (* further prefixes *)
          
	  let prefixes' =
            ref (List.fold_left
		   (fun acc (dp, uri) ->
                      StringMap.add dp uri acc)
		   prefixes
		   eff_decl_to_add) in

	  let search_prefix uri =
            (* Slow! *)
            let p =
              StringMap.fold
		(fun _p _uri y -> if _uri = uri then _p else y)
		!prefixes'
		"" in
            if p = "" then raise Not_found;
            p
	  in

	  let invent_new_prefix uri =
            let n = ref 0 in
            while StringMap.mem ("ns" ^ string_of_int !n) !prefixes' do
              incr n
            done;
            let p = "ns" ^ string_of_int !n in
            prefixes' := StringMap.add p uri !prefixes';
            eff_decl_to_add' := (p, uri) :: !eff_decl_to_add';
            p
	  in

	  let write_att_remap (aname, avalue) =
            let (p,local) = namespace_split aname in
            if p = "" then
              write_att "" (aname,avalue)
            else
              let d =
		try scope # display_prefix_of_normprefix p
		with Namespace_not_in_scope _ ->
		  (* Display prefix is missing. This is an error, but we
		   * can search or invent a new prefix on the fly.
		   *)
		  ( let uri = mng # get_primary_uri p in
                    try
                      search_prefix uri
                    with
			Not_found ->
			  invent_new_prefix uri )
              in
              write_att (d ^ ":") (local, avalue)
	  in

	  let this_display_prefix =
            try 
	      scope # display_prefix_of_normprefix name_p
            with Namespace_not_in_scope _ ->
              (* Display prefix is missing. This is an error, but we
               * can search or invent a new prefix on the fly.
               *)
              ( let uri = mng # get_primary_uri name_p in
		try
		  search_prefix uri
		with
                    Not_found ->
                      invent_new_prefix uri )
	  in
          
	  let name' =
            if this_display_prefix = "" then
              name_local  (* within default namespace *)
            else
              this_display_prefix ^ ":" ^ name_local in

	  wms ("<" ^ name');
	  List.iter write_att_remap atts;
	  List.iter
            (fun (n,v) ->
               if n = "" then
		 write_att "" ("xmlns",v)
               else
		 write_att "xmlns:" (n,v))
	    (eff_decl_to_add @ !eff_decl_to_add');

	  if minimized then
	    wms "\n/>"
	  else (
	    wms "\n>";
	    Stack.push (name', !prefixes', default) write_stack;
	  )

      | None ->
	  (* non-namespace case *)
	  (* Output start tag, and contained attributes: *)
	  wms ("<" ^ name);
	  List.iter (write_att "") atts;
	  if minimized then
	    wms "\n/>"
	  else (
	    wms "\n>";
	    Stack.push (name, prefixes, default) write_stack;
	  )
  in

  let write_end_tag() =
    let (name, _, _) = 
      try Stack.pop write_stack 
      with Stack.Empty -> 
	failwith "Pxp_event.write/display: Unbalanced start/end tags"
    in
    wms "</";
    wms name;
    wms "\n>"
  in

  let rec wr_dsp_event ev_opt =
    let ev =
      match ev_opt with
	| Some ev -> ev
	| None -> get_ev() in
    match ev with
      | Some(E_start_doc(version,dtd)) ->
	  wms ("<?xml version=\"" ^ version ^ "\" ");
	  wms ("encoding=\"" ^ Netconversion.string_of_encoding enc ^ "\" ");
	  if dtd # standalone_declaration then
	    wms "standalone=\"yes\" ";
	  wms "?>\n";
	  ( match dtd_style with
		`Ignore -> ()
	      | `Include -> 
		  dtd # write out enc true
	      | `Reference ->
		  ( match dtd # id with
			Some (External _) -> 
			  dtd # write_ref out enc
		      | _ ->
			  failwith "Pxp_event.write/display: Cannot output DTD as reference"
		  )
	  );
	  wr_dsp_event None
      | Some (E_end_doc lit_name) ->
	  wr_dsp_event None
      | Some (E_start_tag (name, atts, scope_opt, _)) ->
	  let minimized, ev_opt =
	    match minimization with
	      | `None -> 
		  (false, None)
	      | `AllEmpty ->
		  (* Peek at the next event: *)
		  let ev' = get_ev() in
		  let do_minimize =
		    match ev' with
		      | Some(E_end_tag (name', _)) ->
			  name = name'
		      | _ -> 
			  false in
		  if do_minimize then
		    (true, None)   (* ==> consume ev'! *)
		  else
		    (false, Some ev') in
	  if do_display then
	    display_start_tag name atts scope_opt minimized
	  else
	    write_start_tag name atts scope_opt minimized;
	  wr_dsp_event ev_opt
      | Some (E_end_tag (_, _)) ->
	  write_end_tag();
	  wr_dsp_event None
      | Some (E_char_data data) ->
	  write_data_string ~from_enc:rep_enc ~to_enc:enc out data;
	  wr_dsp_event None
      | Some (E_pinstr (target, value, ent_id)) ->
	  wms "<? "; wms target; wms " "; wms value; wms "?>";
	  wr_dsp_event None
      | Some (E_comment data) ->
	  wms "<!--"; wms data; wms "-->";
	  wr_dsp_event None
      | Some E_start_super ->
	  wr_dsp_event None
      | Some E_end_super ->
	  wr_dsp_event None
      | Some (E_position (_,_,_)) ->
	  wr_dsp_event None
      | Some (E_error exn) ->
	  failwith "Pxp_event.write/display: Cannot output E_error event"
      | Some E_end_of_stream ->
	  wr_dsp_event None
      | None ->
	  ()
  in

  wr_dsp_event None
;;


let write_events ?default ?(dtd_style = `Include) ?(minimization=`None) = 
  wr_dsp false default dtd_style minimization ;;
let display_events ?(dtd_style = `Include) ?(minimization=`None) = 
  wr_dsp true None dtd_style minimization ;;


let string_of_event e =
  match e with
    | E_start_doc(v,dtd) ->
	sprintf "E_start_doc(%s,<%d>)" v (Oo.id dtd)
    | E_end_doc name ->
	sprintf "E_end_doc(%s)" name
    | E_start_tag(name,attlist,scope_opt,entid) ->
	sprintf "E_start_tag(%s,%s,%s,<%d>)"
	  name 
	  (String.concat " " (List.map (fun (n,v) -> n ^ "=" ^ v) attlist))
	  (match scope_opt with
	     | None -> "None"
	     | Some scope -> sprintf "<%d>" (Oo.id scope)
	  )
	  (Oo.id entid)
    | E_end_tag(name,entid) ->
	sprintf "E_end_tag(%s,<%d>)" name (Oo.id entid)
    | E_start_super ->
	"E_start_super"
    | E_end_super ->
	"E_end_super"
    | E_char_data data ->
	sprintf "E_char_data(\"%s\")" (String.escaped data)
    | E_pinstr(target,data,entid) ->
	sprintf "E_pinstr(%s,%s,<%d>)" target data (Oo.id entid)
    | E_comment data ->
	sprintf "E_comment(\"%s\")" (String.escaped data)
    | E_position(ent,line,col) ->
	sprintf "E_position(%s,%d,%d)" ent line col
    | E_error e ->
	sprintf "E_error(%s)" (Pxp_types.string_of_exn e)
    | E_end_of_stream ->
	"E_end_of_stream"
;;

This web site is published by Informatikbüro Gerd Stolpmann
Powered by Caml