Plasma GitLab Archive
Projects Blog Knowledge

(* $Id: pxp_types.ml 707 2004-09-04 17:25:35Z gerd $
 * ----------------------------------------------------------------------
 * PXP: The polymorphic XML parser for Objective Caml.
 * Copyright by Gerd Stolpmann. See LICENSE for details.
 *)

include Pxp_core_types

open Pxp_entity
open Pxp_dtd
open Pxp_entity_manager
open Pxp_reader
open Netchannels


type config =
    { warner : collect_warnings;
      swarner : symbolic_warnings option;
      enable_pinstr_nodes : bool;
      enable_super_root_node : bool;
      enable_comment_nodes : bool;
      drop_ignorable_whitespace : bool;
      encoding : rep_encoding;
      recognize_standalone_declaration : bool;
      store_element_positions : bool;
      idref_pass : bool;
      validate_by_dfa : bool;
      accept_only_deterministic_models : bool;
      disable_content_validation : bool;
      name_pool : pool;
      enable_name_pool_for_element_types    : bool;
      enable_name_pool_for_attribute_names  : bool;
      enable_name_pool_for_attribute_values : bool;
      (* enable_name_pool_for_notation_names   : bool; *)
      enable_name_pool_for_pinstr_targets   : bool;
      enable_namespace_processing : namespace_manager option;
      escape_contents : 
	             (Pxp_lexer_types.token -> entity_manager -> string) option;
      escape_attributes : 
	             (Pxp_lexer_types.token -> int -> entity_manager -> string)
	             option;
      debugging_mode : bool;
    }

let default_config =
  let w = new drop_warnings in
  { warner = w;
    swarner = None;
    enable_pinstr_nodes = false;
    enable_super_root_node = false;
    enable_comment_nodes = false;
    drop_ignorable_whitespace = true;
    encoding = `Enc_iso88591;
    recognize_standalone_declaration = true;
    store_element_positions = true;
    idref_pass = false;
    validate_by_dfa = true;
    accept_only_deterministic_models = true;
    disable_content_validation = false;
    name_pool = make_probabilistic_pool 10;
    enable_name_pool_for_element_types = false;
    enable_name_pool_for_attribute_names = false;
    enable_name_pool_for_pinstr_targets = false;
    enable_name_pool_for_attribute_values = false;
    enable_namespace_processing = None;
    escape_contents = None;
    escape_attributes = None;
    debugging_mode = false;
  }

let default_namespace_config =
  { default_config with
      enable_namespace_processing = Some (new namespace_manager)
  }

type source = Pxp_dtd.source =
    Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver)
  | ExtID of (ext_id * Pxp_reader.resolver)
  | XExtID of (ext_id * string option * Pxp_reader.resolver)


let from_obj_channel ?(alt = []) ?system_id ?fixenc 
                     ?id:init_id ?system_encoding ch =
  let channel_id = allocate_private_id() in
  let r = new resolve_to_any_obj_channel
	    ~channel_of_id:(fun rid ->
			      if rid.rid_private = Some channel_id then begin
				let active_id =
				  match init_id with
				      None -> 
					(match system_id with
					     None -> None
					   | Some sid ->
					       Some (resolver_id_of_ext_id
						       (System sid))
					)
				    | Some xid ->
					Some (resolver_id_of_ext_id xid)
				in
				(ch, fixenc, active_id)
			      end
			      else
				raise Not_competent)
	    ()
  in
  let r_total = new combine 
		  ( [r] @
		    (if init_id <> None then 
		       [ new resolve_as_file 
			   ?system_encoding 
			   ~base_url_defaults_to_cwd:false
			   ()
		       ] 
		     else
		       []) @
		    alt) in
  ExtID(Private channel_id, r_total)
;;


let from_channel ?alt ?system_id ?fixenc ?id ?system_encoding ch =
  from_obj_channel 
    ?alt ?system_id ?system_encoding ?id ?fixenc 
    (new input_channel ch)
;;
    

let from_file ?(alt = []) ?(system_encoding = `Enc_utf8) ?enc utf8_filename =
  let r =
    new resolve_as_file
      ~system_encoding
      ~base_url_defaults_to_cwd:false
      ()
  in

  let url = make_file_url
	      ~system_encoding
	      ?enc
	      utf8_filename in

  let xid = System (Neturl.string_of_url url) in

  let sysbase =
    let cwd = Sys.getcwd() in
    let cwd_slash = if cwd = "/" then cwd else cwd ^ "/" in
    let sysbase_url = make_file_url 
			~system_encoding ~enc:system_encoding cwd_slash in
    Some(Neturl.string_of_url sysbase_url) 
  in
  XExtID(xid, sysbase, new combine (r :: alt))
;;


let from_string ?(alt = []) ?system_id ?fixenc s =
  let channel_id = allocate_private_id() in
  let r = new resolve_to_any_obj_channel
	    ~channel_of_id:(fun rid ->
			      if rid.rid_private = Some channel_id then begin
				let active_id =
				  match system_id with
				      None -> None
				    | Some sid ->
					Some 
					  (resolver_id_of_ext_id(System sid))
				in
				let ch = new input_string s in
				(ch, fixenc, active_id)
			      end
			      else
				raise Not_competent)
	    ()
  in
  let r_total = new combine ( [r] @ alt ) in
  ExtID(Private channel_id, r_total)
;;


let open_source cfg src use_document_entity dtd =
  let w = cfg.warner in
  let sw = cfg.swarner in
  let r, en =
    match src with
	Entity(m,r')  -> r', m dtd
      | ExtID(xid,r') -> r',
	                 if use_document_entity then
                           new document_entity
			     r' dtd "[toplevel]" sw w xid None
                             cfg.encoding
			 else
                           new external_entity
			     r' dtd "[toplevel]" sw w xid None false
                             cfg.encoding
      | XExtID(xid,sysbase,r') -> r',
	                          if use_document_entity then
				    new document_entity
				      r' dtd "[toplevel]" sw w xid sysbase
				      cfg.encoding
				  else
				    new external_entity
				      r' dtd "[toplevel]" sw w xid sysbase 
				      false cfg.encoding
  in
  r # init_rep_encoding cfg.encoding;
  r # init_warner sw w;
  en # set_debugging_mode (cfg.debugging_mode);
  (r, en)
;;


type entry =
    [ `Entry_document     of [ `Val_mode_dtd | 
			       `Extend_dtd_fully | `Parse_xml_decl ] list
    | `Entry_declarations of [ `Val_mode_dtd | `Extend_dtd_fully ] list
    | `Entry_content      of [ `Dummy ] list
    | `Entry_expr         of [ `Dummy ] list
    ]


type event =
  | E_start_doc of (string * dtd)
  | E_end_doc of string
  | E_start_tag of (string * (string * string) list * 
		    namespace_scope option *
		    Pxp_lexer_types.entity_id)
  | E_end_tag    of (string * Pxp_lexer_types.entity_id)
  | E_char_data of  string
  | E_pinstr of (string * string * Pxp_lexer_types.entity_id)
  | E_pinstr_member of (string * string * Pxp_lexer_types.entity_id)
  | E_comment of string
  | E_start_super
  | E_end_super
  | E_position of (string * int * int)
  | E_error of exn
  | E_end_of_stream



This web site is published by Informatikbüro Gerd Stolpmann
Powered by Caml