(* $Id: pxp_document.ml 740 2009-02-02 15:28:19Z gerd $ * ---------------------------------------------------------------------- * PXP: The polymorphic XML parser for Objective Caml. * Copyright by Gerd Stolpmann. See LICENSE for details. *) open Pxp_types open Pxp_lexer_types open Pxp_dtd open Pxp_aux open Pxp_dfa let method_na s = raise (Method_not_applicable s) ;; let nsmethod_na s = (* Use this only if the method is not available AND if the method is a * namespace method AND if the class is not namespace-aware *) raise (Namespace_method_not_applicable s) ;; exception Skip type node_type = T_element of string | T_data | T_super_root | T_pinstr of string | T_comment | T_none | T_attribute of string | T_namespace of string ;; type data_node_classification = CD_normal | CD_other | CD_empty | CD_ignorable | CD_error of exn ;; class type ['node] extension = object ('self) method clone : 'self method node : 'node method set_node : 'node -> unit end ;; class type [ 'ext ] node = object ('self) constraint 'ext = 'ext node #extension method extension : 'ext method delete : unit method remove : unit -> unit method remove_nodes : ?pos:int -> ?len:int -> unit -> unit method parent : 'ext node method root : 'ext node method orphaned_clone : 'self method orphaned_flat_clone : 'self method classify_data_node : 'ext node -> data_node_classification method append_node : 'ext node -> unit method insert_nodes : ?pos:int -> 'ext node list -> unit method add_node : ?force:bool -> 'ext node -> unit method add_pinstr : proc_instruction -> unit method pinstr : string -> proc_instruction list method pinstr_names : string list method node_position : int method node_path : int list method sub_nodes : 'ext node list method iter_nodes : ('ext node -> unit) -> unit method iter_nodes_sibl : ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit method nth_node : int -> 'ext node method previous_node : 'ext node method next_node : 'ext node method set_nodes : 'ext node list -> unit method data : string method set_data : string -> unit method node_type : node_type method entity_id : Pxp_lexer_types.entity_id method position : (string * int * int) method attribute : string -> att_value method attribute_names : string list method attribute_type : string -> att_type method attributes : (string * att_value) list method required_string_attribute : string -> string method required_list_attribute : string -> string list method optional_string_attribute : string -> string option method optional_list_attribute : string -> string list method id_attribute_name : string method id_attribute_value : string method idref_attribute_names : string list method quick_set_attributes : (string * att_value) list -> unit method set_attributes : (string * att_value) list -> unit method set_attribute : ?force:bool -> string -> att_value -> unit method reset_attribute : string -> unit method attributes_as_nodes : 'ext node list method set_comment : string option -> unit method comment : string option method normprefix : string method display_prefix : string method localname : string method namespace_uri : string method namespace_scope : namespace_scope method set_namespace_scope : namespace_scope -> unit method namespaces_as_nodes : 'ext node list method namespace_manager : namespace_manager method dtd : dtd method encoding : rep_encoding method create_element : ?name_pool_for_attribute_values:pool -> ?entity_id:Pxp_lexer_types.entity_id -> ?position:(string * int * int) -> ?valcheck:bool -> ?att_values:( (string * att_value) list) -> dtd -> node_type -> (string * string) list -> 'ext node method create_data : dtd -> string -> 'ext node method create_other : ?entity_id:Pxp_lexer_types.entity_id -> ?position:(string * int * int) -> dtd -> node_type -> 'ext node method local_validate : ?use_dfa:bool -> ?check_data_nodes:bool -> unit -> unit method validate_contents : ?use_dfa:bool -> ?check_data_nodes:bool -> unit -> unit method complement_attlist : unit -> unit method validate_attlist : unit -> unit method validate : unit -> unit method write : ?prefixes:string list -> ?default:string -> ?minimization:[`AllEmpty | `DeclaredEmpty | `None] -> output_stream -> encoding -> unit method display : ?prefixes:(string StringMap.t) -> ?minimization:[`AllEmpty | `DeclaredEmpty | `None] -> output_stream -> encoding -> unit method internal_adopt : 'ext node option -> int -> unit method internal_set_pos : int -> unit method internal_delete : 'ext node -> unit method internal_init : Pxp_lexer_types.entity_id -> (string * int * int) -> pool option -> bool -> dtd -> string -> (string * string) list -> (string * att_value) list -> unit method internal_init_other : Pxp_lexer_types.entity_id -> (string * int * int) -> dtd -> node_type -> unit method dump : Format.formatter -> unit end ;; type 'ext spec_table = { mapping : (string, 'ext node) Hashtbl.t; data_node : 'ext node; default_element : 'ext node; super_root_node : 'ext node option; pinstr_mapping : (string, 'ext node) Hashtbl.t; default_pinstr_node : 'ext node option; comment_node : 'ext node option; } ;; type 'ext spec = Spec_table of 'ext spec_table ;; let make_spec_from_mapping ?super_root_exemplar ?comment_exemplar ?default_pinstr_exemplar ?pinstr_mapping ~data_exemplar ~default_element_exemplar ~element_mapping () = Spec_table { mapping = element_mapping; data_node = data_exemplar; default_element = default_element_exemplar; super_root_node = super_root_exemplar; comment_node = comment_exemplar; default_pinstr_node = default_pinstr_exemplar; pinstr_mapping = (match pinstr_mapping with None -> Hashtbl.create 1 | Some m -> m ) } ;; let make_spec_from_alist ?super_root_exemplar ?comment_exemplar ?default_pinstr_exemplar ?(pinstr_alist = []) ~data_exemplar ~default_element_exemplar ~element_alist () = let m = List.length pinstr_alist in let pinstr_mapping = Hashtbl.create m in List.iter (fun (name,ex) -> Hashtbl.add pinstr_mapping name ex) pinstr_alist; let n = List.length element_alist in let element_mapping = Hashtbl.create n in List.iter (fun (name,ex) -> Hashtbl.add element_mapping name ex) element_alist; make_spec_from_mapping ?super_root_exemplar: super_root_exemplar ?comment_exemplar: comment_exemplar ?default_pinstr_exemplar: default_pinstr_exemplar ~pinstr_mapping: pinstr_mapping ~data_exemplar: data_exemplar ~default_element_exemplar: default_element_exemplar ~element_mapping: element_mapping () ;; (**********************************************************************) (* * CLASS HIERARCHY: * * common_node_features * | * +- tree_features * | | * | +- leaf_features * | | | * | | +- data_impl pinstr_features * | | +- comment_impl | * | | +- pinstr_impl -----------------+ * | | | * | +- container_features | * | | | * | +- element_impl ----------------+ * | | | | * | | +- namespace_element_impl | * | | | * | +- super_root_impl--------------+ * | * +-- attribute_impl * | | * | +- namespace_attribute_impl * | * +-- namespace_impl * * The classes ending in _impl are real classes that can be * instantiated. These classes have all the type 'ext node. * The classes ending in _features are virtual only and define * some properties that are needed in several classes (mixin classes). * There are also (not shown) many classes called no_*_feature * that define methods such that a feature is turned off. *) (**********************************************************************) (* common_node_features *) (**********************************************************************) (* These features are inherited by all implementations: * * - Every node has a DTD * - Every node may have a parent *) class virtual ['ext] common_node_features = object (self) val mutable parent = (None : 'ext node option) val mutable node_position = -1 val mutable dtd = (None : dtd option) method virtual remove : unit -> unit method delete = self # remove() (* Not every class defines [remove]! *) (* --------------- DTD -------------------- *) method dtd = match dtd with None -> failwith "Pxp_document.common_node_features#dtd: No DTD available" | Some d -> d method encoding = match dtd with None -> failwith "Pxp_document.common_node_features#encoding: No DTD available" | Some d -> d # encoding (* ---------- PARENT ------------- *) method parent = match parent with None -> raise Not_found | Some p -> p method root = match parent with None -> (self : 'ext #node :> 'ext node) | Some p -> p # root method node_position = if node_position >= 0 then node_position else raise Not_found method node_path = let rec collect n path = try let n' = n # parent in (* may raise Not_found *) let p = n' # node_position in collect n' (p :: path) with Not_found -> (* n is the root *) path in collect (self : 'ext #node :> 'ext node) (self # local_node_path) method private local_node_path = (* to be overridden *) try [self # node_position] with Not_found -> [] method internal_adopt (new_parent : 'ext node option) pos = begin match parent with None -> () | Some p -> if new_parent <> None then failwith "Pxp_document.common_node_features#internal_adopt: Tried to add a bound element" end; parent <- new_parent; node_position <- pos method internal_set_pos pos = node_position <- pos end ;; (**********************************************************************) (* tree_features *) (**********************************************************************) (* All nodes that occur in the regular XML tree have these features: * * - There is an extension object * - The node can be removed * - There are siblings *) class virtual ['ext] tree_features an_ext = object (self) constraint 'ext = 'ext node #extension inherit ['ext] common_node_features val mutable extension = an_ext initializer extension # set_node (self : 'ext #node :> 'ext node) method extension = (extension : 'ext) method remove () = match parent with None -> () | Some p -> p # internal_delete (self : 'ext #node :> 'ext node) method previous_node = self # parent # nth_node (self # node_position - 1) method next_node = self # parent # nth_node (self # node_position + 1) end ;; (**********************************************************************) (* leaf_features *) (**********************************************************************) class virtual ['ext] leaf_features an_ext = object (self) constraint 'ext = 'ext node #extension inherit ['ext] tree_features an_ext (* Cloning leaves is very simple: *) method orphaned_clone = let x = extension # clone in let n = {< parent = None; node_position = -1; extension = x; >} in x # set_node (n : 'ext #node :> 'ext node); n method orphaned_flat_clone = let x = extension # clone in let n = {< parent = None; node_position = -1; extension = x; >} in x # set_node (n : 'ext #node :> 'ext node); n end ;; (**********************************************************************) (* no_pinstr_feature *) (**********************************************************************) class virtual ['ext] no_pinstr_feature = object (self) (* Methods returning constant values: *) method pinstr (_:string) : proc_instruction list = [] method pinstr_names : string list = [] method add_pinstr (_:proc_instruction) : unit = method_na "add_pinstr" end ;; (**********************************************************************) (* no_subnodes_feature *) (**********************************************************************) (* These Node types do not implement methods accessing or * modifying the subnodes. *) class virtual ['ext] no_subnodes_feature = object (self) (* Methods returning constant values: *) method sub_nodes : 'ext node list = [] method iter_nodes (_:'ext node -> unit) : unit = () method iter_nodes_sibl (_:'ext node option -> 'ext node -> 'ext node option -> unit) : unit = () method nth_node (_:int) : 'ext node = raise Not_found (* Unavailable methods: *) method set_nodes (_:'ext node list) : unit = method_na "set_nodes" method remove_nodes ?(pos:int option) ?(len:int option) () : unit = method_na "remove_nodes" method append_node (_:'ext node) : unit = method_na "append_node" method insert_nodes ?(pos:int option) (_:'ext node list) : unit = method_na "insert_nodes" method add_node ?(force:bool option) (_:'ext node) : unit = method_na "add_node" (* Impossible methods: *) method internal_delete (_:'ext node) : unit = assert false end ;; (**********************************************************************) (* no_attributes_feature *) (**********************************************************************) (* Node types without attributes do not implement the attribute * methods. *) class virtual ['ext] no_attributes_feature = object (self) (* Methods returning constant values: *) method attribute (_:string) : att_value = raise Not_found method attribute_names : string list = [] method attribute_type (_:string) : att_type = raise Not_found method attributes : (string * att_value) list = [] method required_string_attribute (_:string) : string = failwith "Pxp_document#required_string_attribute: not found" method required_list_attribute (_:string) : string list = failwith "Pxp_document#required_list_attribute: not found" method optional_string_attribute (_:string) : string option = None method optional_list_attribute (_:string) : string list = [] method id_attribute_name : string = raise Not_found method id_attribute_value : string = raise Not_found method idref_attribute_names : string list = [] method attributes_as_nodes : 'ext node list = [] (* Unavailable methods: *) method quick_set_attributes (_ : (string * att_value) list) : unit = method_na "quick_set_attributes" method set_attributes (_ : (string * att_value) list) : unit = method_na "set_attributes" method set_attribute ?(force : bool option) (_:string) (_:att_value) : unit= method_na "set_attribute" method reset_attribute (_:string) : unit = method_na "reset_attribute" end ;; (**********************************************************************) (* no_validation_feature *) (**********************************************************************) (* Implementations that are always valid *) class virtual ['ext] no_validation_feature = object (self) method local_validate ?(use_dfa:bool option) ?(check_data_nodes:bool option) () = () method validate_contents ?(use_dfa:bool option) ?(check_data_nodes:bool option) () = () method complement_attlist () = () method validate_attlist () = () method validate () = () method classify_data_node (_:'ext node) : data_node_classification = method_na "classify_data_node" end ;; (**********************************************************************) (* no_namespace_feature *) (**********************************************************************) class virtual ['ext] no_namespace_feature = object (self) method normprefix : string = nsmethod_na "normprefix" method display_prefix : string = nsmethod_na "display_prefix" method localname : string = nsmethod_na "localname" method namespace_uri : string = nsmethod_na "namespace_uri" method namespace_scope : namespace_scope = nsmethod_na "namespace_scope" method set_namespace_scope : namespace_scope -> unit = nsmethod_na "set_namespace_scope" method namespaces_as_nodes : 'ext node list = nsmethod_na "namespaces_as_nodes" method namespace_manager : namespace_manager = nsmethod_na "namespace_manager" end ;; (**********************************************************************) (* no_comments_feature *) (**********************************************************************) class virtual ['ext] no_comments_feature = object (self) method comment : string option = method_na "comment" method set_comment (_ : string option) : unit = method_na "set_comment" end ;; (**********************************************************************) let null_entity_id = Pxp_dtd.Entity.create_entity_id() ;; let no_position = ("?", 0, 0) ;; let format_att_value fmt v = match v with Implied_value -> Format.pp_print_string fmt "Implied_value" | Value s -> Format.pp_print_string fmt ("Value \"" ^ String.escaped s ^ "\"") | Valuelist l -> Format.pp_print_string fmt "Valuelist ["; Format.pp_print_string fmt (String.concat "; " (List.map (fun s -> "\"" ^ String.escaped s ^ "\"" ) l ) ); Format.pp_print_string fmt "]"; ;; (**********************************************************************) (* data_impl *) (**********************************************************************) class ['ext] data_impl an_ext : ['ext] node = object (self) (* Inherited features: *) inherit ['ext] leaf_features an_ext inherit ['ext] no_subnodes_feature inherit ['ext] no_pinstr_feature inherit ['ext] no_attributes_feature inherit ['ext] no_validation_feature inherit ['ext] no_namespace_feature inherit ['ext] no_comments_feature val mutable content = ("" : string) method position = no_position method entity_id = null_entity_id method data = content method node_type = T_data method create_element ?name_pool_for_attribute_values ?entity_id ?position ?valcheck ?att_values _ _ _ = method_na "create_element" method create_other ?entity_id ?position _ _ = method_na "create_other" method create_data new_dtd new_str = let x = extension # clone in let n = ( {< parent = None; extension = x; dtd = Some new_dtd; content = new_str; >} : 'ext #node :> 'ext node) in x # set_node n; n method set_data str = content <- str method write ?(prefixes = ([]: string list)) ?default ?minimization os enc = let encoding = self # encoding in write_data_string ~from_enc:encoding ~to_enc:enc os content method display ?prefixes ?minimization os enc = let encoding = self # encoding in write_data_string ~from_enc:encoding ~to_enc:enc os content method internal_init _ _ _ _ _ _ _ _ = assert false method internal_init_other _ _ _ _ = assert false method dump fmt = Format.pp_open_hbox fmt (); Format.pp_print_string fmt "* T_data \""; Format.pp_print_string fmt (String.escaped content); Format.pp_print_string fmt "\""; Format.pp_close_box fmt (); end ;; (**********************************************************************) (* attribute_impl *) (**********************************************************************) class ['ext] attribute_impl ~element ~name value init_dtd : ['ext] node = object (self) inherit ['ext] common_node_features inherit ['ext] no_subnodes_feature inherit ['ext] no_pinstr_feature inherit ['ext] no_validation_feature inherit ['ext] no_namespace_feature inherit ['ext] no_comments_feature val mutable element_name = element val mutable att_name = name val mutable att_value = value initializer dtd <- Some init_dtd method private local_node_path = (* overrides definition from common_node_features: *) [ -1; self # node_position ] method orphaned_clone = {< parent = None; node_position = -1 >} method orphaned_flat_clone = {< parent = None; node_position = -1 >} method node_type = T_attribute att_name method attribute n = if n = att_name then att_value else raise Not_found method attribute_names = [ att_name ] method attribute_type n = let eltype = self # dtd # element element_name in ( try let atype, adefault = eltype # attribute n in atype with Undeclared -> A_cdata ) method attributes = [ att_name, att_value ] method required_string_attribute n = if n = att_name then match att_value with Value s -> s | Valuelist l -> String.concat " " l | Implied_value -> raise Not_found else failwith "Pxp_document.attribute_impl#required_string_attribute: not found" method required_list_attribute n = if n = att_name then match att_value with Value s -> [ s ] | Valuelist l -> l | Implied_value -> raise Not_found else failwith "Pxp_document.attribute_impl#required_list_attribute: not found" method optional_string_attribute n = if n = att_name then match att_value with Value s -> Some s | Valuelist l -> Some(String.concat " " l) | Implied_value -> None else None method optional_list_attribute n = if n = att_name then match att_value with Value s -> [ s ] | Valuelist l -> l | Implied_value -> [] else [] method data = match att_value with Value s -> s | Valuelist l -> String.concat " " l | Implied_value -> raise Not_found method dump fmt = Format.pp_open_hbox fmt (); Format.pp_print_string fmt "+ T_attribute "; Format.pp_print_string fmt att_name; Format.pp_print_string fmt "="; format_att_value fmt att_value; Format.pp_close_box fmt (); method position = no_position method entity_id = self # parent # entity_id method previous_node = self # parent # nth_node (self # node_position - 1) method next_node = self # parent # nth_node (self # node_position + 1) (* Non-applicable attribute methods: *) method quick_set_attributes _ = method_na "quick_set_attributes" method set_attributes _ = method_na "set_attributes" method set_attribute ?force _ _ = method_na "set_attribute" method reset_attribute _ = method_na "reset_attribute" method attributes_as_nodes = method_na "attributes_as_nodes" method id_attribute_name = method_na "id_attribute_name" method id_attribute_value = method_na "id_attribute_value" method idref_attribute_names = method_na "idref_attribute_names" (* Non-applicable methods: *) method remove _ = method_na "remove" method extension = method_na "extension" method set_data _ = method_na "set_data" method internal_init _ _ _ _ _ _ _ _ = assert false method internal_init_other _ _ _ _ = assert false method create_element ?name_pool_for_attribute_values ?entity_id ?position ?valcheck ?att_values _ _ _ = method_na "create_element" method create_data _ _ = method_na "create_data" method create_other ?entity_id ?position _ _ = method_na "create_other" method write ?prefixes ?default ?minimization _ _ = method_na "write" method display ?prefixes ?minimization _ _ = method_na "display" end ;; let attribute_name n = match n # node_type with T_attribute name -> name | _ -> invalid_arg "Pxp_document.attribute_name" ;; let attribute_value n = match n # node_type with T_attribute name -> n # attribute name | _ -> invalid_arg "Pxp_document.attribute_value" ;; let attribute_string_value n = match n # node_type with T_attribute name -> n # data | _ -> invalid_arg "Pxp_document.attribute_string_value" ;; (**********************************************************************) (* comment_impl *) (**********************************************************************) class [ 'ext ] comment_impl an_ext : ['ext] node = object(self) (* Inherited features: *) inherit ['ext] leaf_features an_ext inherit ['ext] no_subnodes_feature inherit ['ext] no_pinstr_feature inherit ['ext] no_attributes_feature inherit ['ext] no_validation_feature inherit ['ext] no_namespace_feature val mutable position = no_position val mutable comment = None val mutable ent_id = null_entity_id method node_type = T_comment method position = position method entity_id = ent_id method comment = comment method set_comment c = comment <- c method data = match comment with None -> raise Not_found | Some s -> s method write ?prefixes ?default ?minimization os enc = let encoding = self # encoding in let wms = write_markup_string ~from_enc:encoding ~to_enc:enc os in wms ("<!--"); ( match comment with None -> () | Some c -> wms c; ); wms ("-->"); method display ?prefixes ?minimization os enc = self # write os enc method dump fmt = Format.pp_open_vbox fmt 2; Format.pp_print_string fmt "* T_comment"; Format.pp_close_box fmt (); method create_element ?name_pool_for_attribute_values ?entity_id ?position ?valcheck ?att_values _ _ _ = method_na "create_element" method create_data _ _ = method_na "create_data" method create_other ?(entity_id = null_entity_id) ?(position = no_position) new_dtd new_ntype = if new_ntype <> T_comment then failwith "Pxp_document.comment_impl#create_other: bad type"; let x = extension # clone in let obj = ( {< parent = None; node_position = -1; extension = x; >} : 'ext #node :> 'ext node ) in x # set_node obj; obj # internal_init_other entity_id position new_dtd new_ntype; obj method internal_init _ new_pos attval_name_pool new_dtd new_name new_attlist = assert false method internal_init_other new_ent_id new_pos new_dtd new_ntype = (* resets the contents of the object *) parent <- None; node_position <- -1; position <- new_pos; ent_id <- new_ent_id; dtd <- Some new_dtd; comment <- None; method set_data _ = method_na "set_data" end ;; (**********************************************************************) (* pinstr_features *) (**********************************************************************) class virtual [ 'ext ] pinstr_features = object (self) val mutable pinstr = (StringMap.empty : proc_instruction list StringMap.t) (* Maps have several advantages: * - The empty map does not allocate any memory * - The one-element map is very fast (better than hashtable) * - Maps are purely functional, and we do not care about them * in clone operations. *) method virtual encoding : rep_encoding method add_pinstr pi = if pi # encoding <> self # encoding then failwith "Pxp_document.pinstr_features # add_pinstr: Inconsistent encodings"; let name = pi # target in let old_list = try StringMap.find name pinstr with Not_found -> [] in pinstr <- StringMap.add name (old_list @ [pi]) pinstr; method pinstr name = try StringMap.find name pinstr with Not_found -> [] method pinstr_names = let nl = ref [] in StringMap.iter (fun n _ -> nl := n :: !nl) pinstr; !nl (* To be included in subclasses: *) method private write_pinstr os enc = StringMap.iter (fun n pilist -> List.iter (fun pi -> pi # write os enc) pilist ) pinstr; method private dump_pinstr fmt = StringMap.iter (fun _ pilist -> List.iter (fun pi -> Format.pp_print_cut fmt (); Format.pp_print_string fmt "+ <?"; Format.pp_print_string fmt (pi # target); Format.pp_print_string fmt " "; Format.pp_print_string fmt (pi # value); Format.pp_print_string fmt "?>" ) pilist ) pinstr end ;; (**********************************************************************) (* pinstr_impl *) (**********************************************************************) class [ 'ext ] pinstr_impl an_ext : ['ext] node = object(self) (* Inherited features: *) inherit ['ext] leaf_features an_ext inherit ['ext] pinstr_features as super inherit ['ext] no_subnodes_feature inherit ['ext] no_comments_feature inherit ['ext] no_attributes_feature inherit ['ext] no_validation_feature inherit ['ext] no_namespace_feature val mutable position = no_position val mutable pinstr_name = "" val mutable ent_id = null_entity_id method node_type = T_pinstr pinstr_name method position = position method entity_id = ent_id method data = match self # pinstr pinstr_name with [ pi ] -> pi # value | _ -> assert false method write ?prefixes ?default ?minimization os enc = self # write_pinstr os enc method display ?prefixes ?minimization os enc = self # write_pinstr os enc method dump fmt = Format.pp_open_vbox fmt 2; Format.pp_print_string fmt "* T_pinstr \""; Format.pp_print_string fmt pinstr_name; Format.pp_print_string fmt "\""; self # dump_pinstr fmt; Format.pp_close_box fmt (); method add_pinstr pi = (* Overrides the definition in pinstr_features: * fail if applied more than once *) if pinstr <> StringMap.empty then failwith "Pxp_document.pinstr_impl # add_pinstr: the node can only contain one processing instruction"; super # add_pinstr pi method create_element ?name_pool_for_attribute_values ?entity_id ?position ?valcheck ?att_values _ _ _ = method_na "create_element" method create_data _ _ = method_na "create_data" method create_other ?(entity_id = null_entity_id) ?(position = no_position) new_dtd new_ntype = ( match new_ntype with T_pinstr _ -> () | _ -> failwith "Pxp_document.pinstr_impl#create_other: bad type"; ); let x = extension # clone in let obj = ( {< parent = None; node_position = -1; extension = x; >} : 'ext #node :> 'ext node ) in x # set_node obj; obj # internal_init_other entity_id position new_dtd new_ntype; obj method internal_init _ new_pos attval_name_pool new_dtd new_name new_attlist = assert false method internal_init_other new_ent_id new_pos new_dtd new_ntype = (* resets the contents of the object *) parent <- None; node_position <- -1; position <- new_pos; ent_id <- new_ent_id; dtd <- Some new_dtd; pinstr <- StringMap.empty; (match new_ntype with T_pinstr n -> pinstr_name <- n | _ -> assert false ) method set_data _ = method_na "set_data" end ;; let pinstr n = match n # node_type with T_pinstr pi -> ( match n # pinstr pi with [ pi_obj ] -> pi_obj | _ -> assert false ) | _ -> invalid_arg "Pxp_document.pinstr" ;; (**********************************************************************) let flag_ext_decl = 1;; (* Whether the element is externally declared *) (* REMOVED in PXP 1.1: *) (* let flag_keep_always_whitespace = 2;; *) (* Whether the "keep whitespace mode" is on *) type 'a list_or_array = LA_not_available | LA_list of 'a list | LA_array of 'a array ;; (* Perhaps we need also the hybrid LA_list_array storing both representations. *) let list_split n l = (* Returns l1, l2 with l = List.rev l1 @ l2 and length l1 = n *) let rec split n l h = if n = 0 then h, l else match l with [] -> failwith "list_split" | x :: l' -> split (n-1) l' (x::h) in split n l [] ;; (**********************************************************************) (* container_features *) (**********************************************************************) class virtual ['ext] container_features an_ext = (* For elements and super root nodes *) object (self:'self) inherit ['ext] tree_features an_ext as super val mutable rev_nodes = ([] : 'c list) val mutable nodes = LA_not_available val mutable size = 0 val mutable position = no_position val mutable ent_id = null_entity_id method position = position method entity_id = ent_id method append_node n = (* general DTD check: *) begin match dtd with None -> () | Some d -> if n # dtd != d then failwith "Pxp_document.container_features # append_node: the sub node has a different DTD"; end; (* Add the node: *) n # internal_adopt (Some (self : 'ext #node :> 'ext node)) size; rev_nodes <- n :: rev_nodes; nodes <- LA_not_available; size <- size + 1 method add_node ?(force = false) n = (* compatibility with PXP 1.0: *) if force then self # append_node n else match self # classify_data_node n with CD_other | CD_normal -> self # append_node n | CD_empty | CD_ignorable -> () | CD_error ex -> raise ex method insert_nodes ?(pos = size) new_nodes = if pos < 0 || pos > size then invalid_arg "insert_nodes: ~pos"; List.iter (fun n -> try ignore(n # parent); invalid_arg "insert_nodes: new node is not orphan" with Not_found -> () ) new_nodes; let succ_nodes, rev_pred_nodes = list_split (size-pos) rev_nodes in (* succ_nodes: the nodes at position pos * rev_pred_nodes: the nodes before position pos in reverse order *) rev_nodes <- List.rev_append succ_nodes (List.rev_append new_nodes rev_pred_nodes); let k = ref 0 in List.iter (fun n -> n # internal_adopt (Some (self : 'ext #node :> 'ext node)) (pos + !k); (* internal_adopt cannot fail because of the above orphan check *) incr k ) new_nodes; let number_new_nodes = !k in (* renumber succ_nodes: *) List.iter (fun n -> n # internal_set_pos (pos + !k); incr k ) succ_nodes; nodes <- LA_not_available; size <- size + number_new_nodes; () method remove_nodes ?(pos = 0) ?(len = size) () = if pos < 0 || len < 0 || pos + len > size then invalid_arg "remove_nodes: ~pos or ~len"; let succ_nodes, rev_pred_nodes = list_split (size-pos) rev_nodes in (* succ_nodes: the nodes at position pos * rev_pred_nodes: the nodes before position pos in reverse order * (nodes to keep) *) let rev_pred_nodes', succ_nodes' = list_split len succ_nodes in (* succ_nodes': the nodes at position pos+len * (nodes to keep) * rev_pred_nodes': the nodes from position pos to pos+len-1 (rev order) * (nodes to delete) *) List.iter (fun n -> n # internal_adopt None (-1) (* internal_adopt cannot fail because of an invariant *) ) rev_pred_nodes'; rev_nodes <- List.rev_append succ_nodes' rev_pred_nodes; nodes <- LA_not_available; size <- size - len; let k = ref pos in List.iter (fun n -> n # internal_set_pos !k; incr k ) succ_nodes'; () method sub_nodes = match nodes with LA_not_available -> if rev_nodes = [] then [] else begin let cl = List.rev rev_nodes in nodes <- LA_list cl; cl end | LA_list cl -> cl | LA_array a -> Array.to_list a (* does this lead to performance problems ? *) method iter_nodes f = match nodes with LA_not_available -> if rev_nodes <> [] then begin let cl = List.rev rev_nodes in nodes <- LA_list cl; List.iter f cl end | LA_list cl -> List.iter f cl | LA_array a -> Array.iter f a method iter_nodes_sibl f = let rec next last_node l = match l with [] -> () | [x] -> f last_node x None | x :: y :: l' -> f last_node x (Some y); next (Some x) l' in match nodes with LA_not_available -> if rev_nodes <> [] then begin let cl = List.rev rev_nodes in nodes <- LA_list cl; next None cl end | LA_list cl -> next None cl | LA_array a -> ( match a with [| |] -> () | [| a0 |] -> f None a0 None | _ -> let previous = ref None in for i = 0 to Array.length a - 2 do let nextnode = Some (a.(i+1)) in f !previous a.(i) nextnode; previous := Some a.(i); done; f !previous a.(Array.length a - 1) None ) method nth_node p = if p < 0 or p >= size then raise Not_found; match nodes with LA_not_available -> if rev_nodes = [] then invalid_arg "Array.get" else begin let a = Array.of_list (List.rev rev_nodes) in nodes <- LA_array a; a.(p) end | LA_list l -> let a = Array.of_list l in nodes <- LA_array a; a.(p) | LA_array a -> a.(p) method set_nodes nl = (* For every node in nl must hold: * (a) the node is already a child of this node, or * (b) the node is a root node *) List.iter (fun n -> try let parent = n # parent in if parent != (self : 'ext #node :> 'ext node) then failwith "Pxp_document.container_features # set_nodes: node is already a member of another tree"; with Not_found -> () ) nl; (* All ok. We set now the parent of the old subnodes to None, * then the parent of the new subnodes to self. *) List.iter (fun n -> n # internal_adopt None (-1)) rev_nodes; let k = ref 0 in List.iter (fun n -> n # internal_adopt (Some (self : 'ext #node :> 'ext node)) !k; incr k ) nl; size <- !k; rev_nodes <- List.rev nl; nodes <- LA_not_available; method orphaned_clone : 'self = let sub_clones = List.map (fun m -> m # orphaned_clone) rev_nodes in let x = extension # clone in let n = {< parent = None; node_position = -1; extension = x; rev_nodes = sub_clones; nodes = LA_not_available; >} in let pos = ref (size - 1) in List.iter (fun m -> m # internal_adopt (Some (n : 'ext #node :> 'ext node)) !pos; decr pos ) sub_clones; x # set_node (n : 'ext #node :> 'ext node); n method orphaned_flat_clone : 'self = let x = extension # clone in let n = {< parent = None; node_position = -1; extension = x; rev_nodes = []; nodes = LA_not_available; size = 0; >} in x # set_node (n : 'ext #node :> 'ext node); n method internal_delete n = rev_nodes <- List.filter (fun n' -> n' != n) rev_nodes; size <- size - 1; let p = ref (size-1) in List.iter (fun n' -> n' # internal_set_pos !p; decr p) rev_nodes; nodes <- LA_not_available; n # internal_adopt None (-1); method data = let cl = self # sub_nodes in String.concat "" (List.map (fun n -> match n # node_type with T_element _ | T_super_root | T_data -> n # data | _ -> "" ) cl ) end ;; (**********************************************************************) (* element_impl *) (**********************************************************************) (*------------------------- attributes -------------------------------*) type 'ext attlist = | No_atts | Atts of (att_value array * (* declared attribs *) att_value StringMap.t) (* undeclared attribs *) | Atts_with_nodes of ((att_value * 'ext node) array * (att_value * 'ext node) StringMap.t) ;; (* - Declared attributes: * They are simply in an array; the index of the array element is the position * vr.att_lookup contains for the attribute. * As an exception of the rule, the array may be empty indicating that * currently all attributes are treated as undeclared attributes. * - Undeclared attributes: * They are contained in a StringMap.t mapping from attribute names to * attribute values. *) let att_assoc vr n l = let assoc a m = try if a = [| |] then raise Not_found; let k = Str_hashtbl.find vr.att_lookup n in a.(k) with Not_found -> StringMap.find n m in match l with No_atts -> raise Not_found | Atts (a,m) -> assoc a m | Atts_with_nodes (a,m) -> fst(assoc a m) ;; let set_att ?(force=false) vr l n v = (* Does not work with Atts_with_nodes *) match l with No_atts -> Atts ( [| |], StringMap.add n v StringMap.empty ) | Atts_with_nodes(_,_) -> assert false | Atts(a,m) -> try if a = [| |] then raise Not_found; let k = Str_hashtbl.find vr.att_lookup n in a.(k) <- v; l with Not_found -> if not force then ( if not(StringMap.mem n m) then failwith "Pxp_document # set_attribute: no such attribute"; ); Atts(a, StringMap.add n v m) ;; let reset_att vr l n = (* Does not work with Atts_with_nodes *) match l with No_atts -> failwith "Pxp_document # reset_attribute: no such attribute"; | Atts_with_nodes(_,_) -> assert false | Atts(a,m) -> try if a = [| |] then raise Not_found; let k = Str_hashtbl.find vr.att_lookup n in let (_,v) = vr.init_att_vals.(k) in a.(k) <- v; l with Not_found -> ( if not(StringMap.mem n m) then failwith "Pxp_document # reset_attribute: no such attribute"; ); Atts(a, StringMap.remove n m) ;; let stringmap_to_list f m = let l = ref [] in StringMap.iter (fun n v -> l := f n v :: !l) m; !l ;; let array_to_list vr f a = let l = ref [] in for k = Array.length a - 1 downto 0 do let n = fst vr.init_att_vals.(k) in let v = a.(k) in l := f n v :: !l done; !l ;; let attlist_to_list vr f l = match l with No_atts -> [] | Atts (a,m) -> array_to_list vr f a @ stringmap_to_list f m | Atts_with_nodes (a,m) -> let f' n (v,_) = f n v in array_to_list vr f' a @ stringmap_to_list f' m ;; let attlist_iter vr f l = let iter f a m = Array.iteri (fun k v -> let n = fst vr.init_att_vals.(k) in f n v ) a; StringMap.iter f m in match l with No_atts -> () | Atts (a,m) -> iter f a m | Atts_with_nodes (a,m) -> iter (fun n (v,_) -> f n v) a m ;; let attlist_of_list l = let m = ref StringMap.empty in List.iter (fun (n,v) -> m := StringMap.add n v !m ) l; Atts( [| |], !m) ;; let attlist_with_nodes vr f l = match l with No_atts -> No_atts | Atts(a,m) -> let a' = Array.mapi (fun k v -> let n = fst vr.init_att_vals.(k) in let node = f n v k in (v,node) ) a in let k = ref (Array.length a) in let m' = StringMap.mapi (fun n v -> let node = f n v !k in incr k; (v,node) ) m in Atts_with_nodes(a',m') | Atts_with_nodes(_,_) -> assert false ;; let nodes_of_attlist vr l = match l with No_atts -> [] | Atts (_,_) -> assert false | Atts_with_nodes (a,m) -> let f n (v,node) = node in array_to_list vr f a @ stringmap_to_list f m ;; let attlist_without_nodes l = match l with No_atts -> No_atts | Atts (_,_) -> l | Atts_with_nodes (a,m) -> let a' = Array.map fst a in let m' = StringMap.map fst m in Atts(a',m') ;; let attlist_has_nodes l = match l with Atts_with_nodes (_,_) -> true | _ -> false ;; (*------------------------- validation -------------------------------*) exception Found;; let rec is_empty_node_list cl = (* Whether the node list counts as empty or not. *) match cl with [] -> true | n :: cl' -> ( match n # node_type with | T_element _ -> false | _ -> is_empty_node_list cl' (* ignore other nodes *) ) ;; let only_whitespace error_name s = (* Checks that the string "s" contains only whitespace. On failure, * Validation_error is raised. *) if not (Pxp_lib.only_whitespace s) then raise(Validation_error(error_name() ^ " must not have character contents")); () ;; let rec run_regexp cl ml = (* Validates regexp content models ml against instances cl. This * function works for deterministic and non-determninistic models. * The implementation uses backtracking and may sometimes be slow. * * cl: the list of children that will have to be matched * ml: the list of regexps that will have to match (to be read as * sequence) * returns () meaning that no match has been found, or raises Found. *) match ml with [] -> if cl = [] then raise Found; (* Frequent case *) if is_empty_node_list cl then raise Found; (* General condition *) | Seq seq :: ml' -> assert (seq <> []); (* necessary to ensure termination *) run_regexp cl (seq @ ml') | Alt alts :: ml' -> let rec find alts = match alts with [] -> () | alt :: alts' -> run_regexp cl (alt :: ml'); find alts' in assert (alts <> []); (* Alt [] matches nothing *) find alts | Repeated re :: ml' -> let rec norm re = (* to avoid infinite loops *) match re with Repeated subre -> norm subre (* necessary *) | Optional subre -> norm subre (* necessary *) | Repeated1 subre -> norm subre (* an optimization *) | _ -> re in let re' = norm re in run_regexp cl (re' :: Repeated re' :: ml'); run_regexp cl ml' | Repeated1 re :: ml' -> run_regexp cl (re :: Repeated re :: ml') | Optional re :: ml' -> run_regexp cl (re :: ml'); run_regexp cl ml'; | Child chld :: ml' -> match cl with [] -> () | sub_el :: cl' -> begin match sub_el # node_type with T_data -> (* Ignore data *) run_regexp cl' ml (* Note: It can happen that we find a data node here * if the 'keep_always_whitespace' mode is turned on. *) | T_element nt -> if nt = chld then run_regexp cl' ml' | _ -> (* Ignore this element *) run_regexp cl' ml end ;; let run_dfa cl dfa = (* Validates regexp content models ml against instances cl. This * function works ONLY for deterministic models. * The implementation executes the automaton. *) let current_vertex = ref dfa.dfa_start in let rec next_step cl = match cl with el :: cl' -> begin match el # node_type with T_data -> (* Ignore data *) next_step cl' (* Note: It can happen that we find a data node here * if the 'keep_always_whitespace' mode is turned on. *) | T_element nt -> begin try current_vertex := Graph.follow_edge !current_vertex nt; next_step cl' with Not_found -> false end | _ -> (* Ignore this node *) next_step cl' end | [] -> VertexSet.mem !current_vertex dfa.dfa_stops in next_step cl ;; let validate_content ?(use_dfa=None) model (el : 'a node) = (* checks that the nodes of 'el' matches the DTD. Returns 'true' * on success and 'false' on failure. *) match model with Unspecified -> true | Any -> true | Empty -> let cl = el # sub_nodes in is_empty_node_list cl | Mixed (MPCDATA :: mix) -> let mix' = List.map (function MPCDATA -> assert false | MChild x -> x) mix in begin try el # iter_nodes (fun sub_el -> let nt = sub_el # node_type in match nt with | T_element name -> if not (List.mem name mix') then raise Not_found; | _ -> () ); true with Not_found -> false end | Regexp re -> let cl = el # sub_nodes in begin match use_dfa with None -> (* General backtracking implementation: *) begin try run_regexp cl [re]; false with Found -> true end | Some dfa -> run_dfa cl dfa end | _ -> assert false ;; let no_validation = { content_model = Any; content_dfa = lazy None; id_att_name = None; idref_att_names = []; init_att_vals = [| |]; att_lookup = Str_hashtbl.create 1; att_info = [| |]; att_required = []; accept_undeclared_atts = true; } ;; (*--------------------------------------------------------------------*) (* Make that some slots are visible in namespace_element_impl: *) class type [ 'ext ] element_node = object ('self) inherit ['ext] node val mutable attributes : 'ext attlist val mutable vr : validation_record method private get_nsdecls : string list -> (string * att_value) list method private get_nsname : string -> string -> string method private make_attribute_node : string -> string -> att_value -> dtd -> 'ext node end ;; class [ 'ext ] element_impl an_ext (* : ['ext] element_node *) = object(self) inherit ['ext] container_features an_ext inherit ['ext] pinstr_features inherit ['ext] no_comments_feature inherit ['ext] no_namespace_feature val mutable vr = no_validation val mutable flags = 0 (* bit string of flags; see the values flag_* above *) val mutable ntype = T_none val mutable attributes = No_atts method node_type = ntype method dump fmt = Format.pp_open_vbox fmt 2; ( match ntype with T_none -> Format.pp_print_string fmt "* T_none"; | T_element n -> Format.pp_print_string fmt "* T_element \""; Format.pp_print_string fmt n; Format.pp_print_string fmt "\""; | _ -> assert false ); attlist_iter vr (fun n v -> Format.pp_print_cut fmt (); Format.pp_print_string fmt n; Format.pp_print_string fmt "="; format_att_value fmt v; ) attributes; if attlist_has_nodes attributes then begin List.iter (fun n -> Format.pp_print_cut fmt (); n # dump fmt; ) (nodes_of_attlist vr attributes); end; self # dump_pinstr fmt; List.iter (fun n -> Format.pp_print_cut fmt (); n # dump fmt; ) (List.rev rev_nodes); Format.pp_close_box fmt (); method private set_flag which value = flags <- (flags land (lnot which)) lor (if value then which else 0) method private error_name = match ntype with T_element n -> "Element `" ^ n ^ "'" | _ -> assert false method classify_data_node n = try begin match n # node_type with T_data -> begin match vr.content_model with Any -> CD_normal | Unspecified -> CD_normal | Empty -> if n # data <> "" then CD_error(Validation_error(self # error_name ^ " must be empty")) else CD_empty | Mixed _ -> CD_normal | Regexp _ -> (* May raise an exception: *) only_whitespace (fun()->self # error_name) (n # data); (* TODO: following check faster *) if n # dtd # standalone_declaration && n # data <> "" then begin (* The standalone declaration is violated if the * element declaration is contained in an external * entity. *) if flags land flag_ext_decl <> 0 then raise (Validation_error (self # error_name ^ " violates standalone declaration" ^ " because extra white space separates" ^ " the sub elements")); end; CD_ignorable end | _ -> (* n is not a data node: *) CD_other end; with ex -> CD_error ex method attributes = attlist_to_list vr (fun an ax -> (an,ax)) attributes method attribute n = att_assoc vr n attributes method attribute_names = attlist_to_list vr (fun an _ -> an) attributes method attribute_type n = match ntype with T_element name -> let d = match dtd with None -> assert false | Some d -> d in let eltype = d # element name in ( try let atype, adefault = eltype # attribute n in atype with Undeclared -> A_cdata ) | _ -> method_na "attribute_type" method required_string_attribute n = try match att_assoc vr n attributes with Value s -> s | Valuelist l -> String.concat " " l | Implied_value -> raise Not_found with Not_found -> failwith "Pxp_document, method required_string_attribute: not found" method optional_string_attribute n = try match att_assoc vr n attributes with Value s -> Some s | Valuelist l -> Some (String.concat " " l) | Implied_value -> None with Not_found -> None method required_list_attribute n = try match att_assoc vr n attributes with Value s -> [ s ] | Valuelist l -> l | Implied_value -> raise Not_found with Not_found -> failwith "Pxp_document, method required_list_attribute: not found" method optional_list_attribute n = try match att_assoc vr n attributes with Value s -> [ s ] | Valuelist l -> l | Implied_value -> [] with Not_found -> [] method id_attribute_name = match vr.id_att_name with None -> raise Not_found | Some name -> name method id_attribute_value = match vr.id_att_name with None -> raise Not_found | Some name -> begin match att_assoc vr name attributes (* may raise Not_found *) with Value s -> s | _ -> raise Not_found end method idref_attribute_names = vr.idref_att_names method quick_set_attributes atts = attributes <- attlist_of_list atts; method set_attributes atts = attributes <- attlist_of_list atts; method set_attribute ?force n v = attributes <- set_att ?force vr (attlist_without_nodes attributes) n v method reset_attribute n = attributes <- reset_att vr (attlist_without_nodes attributes) n method private make_attribute_node element_name att_name value dtd = (* to be overridden *) new attribute_impl ~element:element_name ~name:att_name value dtd method attributes_as_nodes = match attributes with | No_atts -> [] | Atts (_,_) -> let dtd = self # dtd in let element_name = match ntype with T_element n -> n | _ -> assert false in let atts' = attlist_with_nodes vr (fun n v pos -> let a = self # make_attribute_node element_name n v dtd in a # internal_adopt (Some (self : 'ext #node :> 'ext node)) pos; a ) attributes in attributes <- atts'; nodes_of_attlist vr atts' | _ -> nodes_of_attlist vr attributes method create_element ?name_pool_for_attribute_values ?(entity_id = null_entity_id) ?(position = no_position) ?(valcheck = true) ?(att_values = []) new_dtd new_type new_attlist = let x = extension # clone in let obj = ( {< parent = None; extension = x; pinstr = StringMap.empty; >} : 'ext #node :> 'ext node ) in (* It is ok that obj does not reset rev_nodes because the init_* * methods do that. *) x # set_node obj; match new_type with | T_element name -> obj # internal_init entity_id position name_pool_for_attribute_values valcheck new_dtd name new_attlist att_values; obj | T_none -> (* a special case to make create_no_node work *) obj # internal_init entity_id position None false new_dtd "_xxx_" [] []; obj | _ -> failwith "create_element: Cannot create such node" (* New attribute parsing algorithm: * * Get the attribute declaration of the element type. This is a * record containing: * - init_att_vals: an array of initial attribute values (default values * or implied values) * - init_att_found: an array of booleans storing whether a default * value has been replaced by an actual value. Initially, an array * of 'false'. * - att_info: an array of attribute types and other per-attribute * information. All arrays have the same size, and for the same * attribute the same array position is used. * - att_lookup: a hashtable that allows quick lookup of the array * position for a given attribute name * - att_required: a list of indexes of attributes that are required * * DECLARED ATTRIBUTES: * * In the first round the array init_att_vals is modified such that * the actual values replace the defaults. Furthermore, the undeclared * attributes are accumulated in a list undeclared_atts: * * (Round 1 can be left out if there are no declared attributes.) * * Input: The list of actual attributes new_attlist * * Initialization: init_att_vals is a copy of the array returned by * the DTD. Also, init_att_found must be initialized. * * For every element att of new_attlist: * (1) Look the index k of the attribute att up (by using att_lookup). * If the attribute is undeclared, add it to undeclared_atts and * continue with the next attribute. Otherwise continue with (2). * (2) If init_att_found.(k): failure (attribute has been defined twice) * (3) Get the type t of the attribute (by using att_info) * (4) Get the normalized value v of the attribute (by calling * value_of_attribute with the lexical value and t) * (5) Try to overwrite the init_att_vals.(k). This may fail because the * default value is #FIXED. * (6) If necessary: Check whether the stand-alone declaration is * violated by the attribute * (7) Set init_att_found.(k) to true. * * After that loop, check for every member of att_required whether * init_att_found is set. If not, a required attribute is missing. * * If the DTD does not allow undeclared attributes, there is no * second round: undeclared_atts <> [] is a violation of the * validation constraint. * * UNDECLARED ATTRIBUTES: * * These are simply collected in a StringMap which is also useful * for detecting duplicates. * * MERGE RESULTS: * * Finally, init_att_vals and extra_att_vals are merged. *) method internal_init new_ent_id new_pos attval_name_pool valcheck_element_exists new_dtd new_name new_attlist new_attvalues = (* resets the contents of the object *) parent <- None; node_position <- -1; rev_nodes <- []; nodes <- LA_not_available; size <- 0; ntype <- T_element new_name; position <- new_pos; ent_id <- new_ent_id; dtd <- Some new_dtd; pinstr <- StringMap.empty; let lfactory = new_dtd # lexer_factory in let sadecl = new_dtd # standalone_declaration in let mk_pool_value av0 = match attval_name_pool with None -> av0 | Some pool -> (match av0 with Implied_value -> Implied_value | Value s -> Value (pool_string pool s) | Valuelist l -> Valuelist (List.map (pool_string pool) l) ) in let undeclared_atts = ref StringMap.empty in let add_undeclared_att n v = if StringMap.mem n !undeclared_atts then raise (WF_error("Attribute `" ^ n ^ "' occurs twice")) else undeclared_atts := StringMap.add n v !undeclared_atts in let add_undeclared_att_string n s = add_undeclared_att n (mk_pool_value(Value s)) in let add_undeclared_att_pair (n,v) = add_undeclared_att n v in let add_undeclared_att_string_pair (n,s) = add_undeclared_att n (mk_pool_value(Value s)) in let att_vals = (* att_vals: the array of declared attributes or [| |] *) try (* catch Undeclared in the following block: *) let eltype = if valcheck_element_exists then new_dtd # element new_name (* may raise Undeclared *) else raise Undeclared (* raise Undeclared anyway *) in vr <- eltype # internal_vr; self # set_flag flag_ext_decl (eltype # externally_declared); (* init_att_vals, cur_att_vals: about declared attributes only *) let init_att_vals = vr.init_att_vals in (* contains pairs (att_name, att_default_value) *) let cur_att_vals = Array.map snd init_att_vals in (* contains current value *) let m = Array.length init_att_vals in if m > 0 then begin (* round 1 *) let att_found = Array.create m false in (* whether the declared attribute has been found *) (* First iterate over new_attlist, then over new_attvalues: *) (* new_attlist: *) List.iter (fun (att_name, att_val) -> let bad = ref false in (* if !bad = true, Not_found must not happen *) try let k = Str_hashtbl.find vr.att_lookup att_name in (* or raise Not_found *) bad := true; if att_found.(k) then raise (WF_error("Attribute `" ^ att_name ^ "' occurs twice in element `" ^ new_name ^ "'")); let att_type, att_fixed = vr.att_info.(k) in let v0 = value_of_attribute lfactory new_dtd att_name att_type att_val in let v = mk_pool_value v0 in if att_fixed then begin let v' = cur_att_vals.(k) in if v <> v' then raise (Validation_error ("Attribute `" ^ att_name ^ "' is fixed, but has here a different value")); end else begin cur_att_vals.(k) <- v end; (* If necessary, check whether normalization violates * the standalone declaration. *) if sadecl && eltype # attribute_violates_standalone_declaration att_name (Some att_val) then raise (Validation_error ("Attribute `" ^ att_name ^ "' of element type `" ^ new_name ^ "' violates standalone declaration")); att_found.(k) <- true; with Not_found -> assert(not !bad); (* Raised by Hashtbl.find *) add_undeclared_att_string att_name att_val; ) new_attlist; (* new_attvalues: Almost the same. *) List.iter (fun (att_name, v) -> let bad = ref false in try let k = Str_hashtbl.find vr.att_lookup att_name in (* or raise Not_found *) bad := true; if att_found.(k) then raise (WF_error("Attribute `" ^ att_name ^ "' occurs twice in element `" ^ new_name ^ "'")); let att_type, att_fixed = vr.att_info.(k) in check_value_of_attribute lfactory new_dtd att_name att_type v; (* Check: Only atts flagged as #IMPLIED * can be set to Implied_value *) if v = Implied_value then begin let d = cur_att_vals.(k) in if d = Implied_value then begin if List.mem k vr.att_required then raise (Validation_error ("Attribute `" ^ att_name ^ "' has Implied_value, but is declared as #REQUIRED")); end else raise (Validation_error ("Attribute `" ^ att_name ^ "' has Implied_value, but is not declared as #IMPLIED")); end; if att_fixed then begin let v' = cur_att_vals.(k) in if v <> v' then raise (Validation_error ("Attribute `" ^ att_name ^ "' is fixed, but has here a different value")); end else begin cur_att_vals.(k) <- v end; att_found.(k) <- true; with Not_found -> assert(not !bad); (* Raised by Hashtbl.find *) add_undeclared_att att_name v; ) new_attvalues; (* Check required attributes: *) List.iter (fun k -> if not att_found.(k) then begin let n, _ = init_att_vals.(k) in raise(Validation_error("Required attribute `" ^ n ^ "' is missing")) end ) vr.att_required; (* Check standalone declaration of missing atts: *) if sadecl then begin for k = 0 to m - 1 do if not att_found.(k) then begin let n, _ = init_att_vals.(k) in if eltype # attribute_violates_standalone_declaration n None then raise (Validation_error ("Attribute `" ^ n ^ "' of element type `" ^ new_name ^ "' violates standalone declaration")); end done end; end (* of round 1 *) else begin (* m = 0 *) List.iter add_undeclared_att_string_pair new_attlist; List.iter add_undeclared_att_pair new_attvalues; end; cur_att_vals (* result *) with Undeclared -> (* raised by #element *) vr <- no_validation; List.iter add_undeclared_att_string_pair new_attlist; List.iter add_undeclared_att_pair new_attvalues; [| |] in if !undeclared_atts <> StringMap.empty && not vr.accept_undeclared_atts then begin raise (Validation_error ("The following attributes are not declared: " ^ String.concat ", " (stringmap_to_list (fun n v -> n) !undeclared_atts))) end; attributes <- Atts(att_vals, !undeclared_atts) method private update_vr = if vr == no_validation then begin match ntype with T_element name -> let dtd = self # dtd in ( try let eltype = dtd # element name (* may raise Undeclared *) in vr <- eltype # internal_vr; self # set_flag flag_ext_decl (eltype # externally_declared); with Undeclared -> () ) | _ -> () end method validate_contents ?(use_dfa=false) ?(check_data_nodes=true) () = (* validates that the content of this element matches the model *) self # update_vr; if vr.content_model <> Any then begin if check_data_nodes then begin List.iter (fun n -> match n # node_type with T_data -> ( match self # classify_data_node n with CD_normal | CD_other | CD_empty | CD_ignorable -> () | CD_error e -> raise e ) | _ -> () ) rev_nodes; () end; let dfa = if use_dfa then Lazy.force vr.content_dfa else None in if not (validate_content ~use_dfa:dfa vr.content_model (self : 'ext #node :> 'ext node)) then raise(Validation_error(self # error_name ^ " does not match its content model")) end method local_validate ?use_dfa ?check_data_nodes () = self # validate_contents ?use_dfa ?check_data_nodes () method complement_attlist () = (* Iterate over init_att_vals of the validation record vr. Add every * missing attribute to the current set of attributes. *) self # update_vr; let old_atts = attlist_without_nodes attributes in let new_array, new_map = match old_atts with No_atts -> [| |], ref StringMap.empty | Atts(a,m) -> a, ref m | _ -> assert false in Array.iter (fun (name,value) -> try ignore(att_assoc vr name old_atts) with Not_found -> new_map := StringMap.add name value !new_map; ) vr.init_att_vals; attributes <- Atts(new_array, !new_map) method validate_attlist () = (* Only defined for elements: * Create a duplicate node and call internal_init for it. The * resulting validated attlist must be equal to the current attlist. *) let atts = self # attributes in let dup = {< parent = None; node_position = -1; rev_nodes = []; nodes = LA_not_available; size = 0; >} in (* dup: flat duplicate without duplicated extension *) let name = match ntype with T_element n -> n | _ -> assert false in dup # internal_init null_entity_id no_position None true (self#dtd) name [] atts; (* It is possible that dup has now more attributes than atts, * because values in atts are missing for which the DTD defines a * default. (This is an error.) * The order of attributes is unspecified, so we must compare * attribute by attribute. *) let dup_atts = dup # attributes in let l_atts = List.length atts in let l_dup_atts = List.length dup_atts in if l_atts <> l_dup_atts then begin assert(l_atts < l_dup_atts); List.iter (fun (n,_) -> if not (List.mem_assoc n atts) then raise(Validation_error("Attribute `" ^ n ^ "' is missing")) ) dup_atts; assert false; end method validate () = self # validate_attlist(); self # validate_contents ~use_dfa:true (); (* Use DFA if available *) method private get_nsdecls prefixes = (* to be overridden *) [] method private get_nsname name default = (* to be overridden *) name method write ?(prefixes = ([] : string list)) ?default ?(minimization=`None) os enc = let encoding = self # encoding in let wms = write_markup_string ~from_enc:encoding ~to_enc:enc os in let write_att p aname avalue = match avalue with Implied_value -> () | Value v -> wms ("\n" ^ p ^ aname ^ "=\""); write_data_string ~from_enc:encoding ~to_enc:enc os v; wms "\""; | Valuelist l -> let v = String.concat " " l in wms ("\n" ^ p ^ aname ^ "=\""); write_data_string ~from_enc:encoding ~to_enc:enc os v; wms "\"" in let nsdecls = self # get_nsdecls prefixes in let nsdefault = match default with None -> [] | Some d -> if List.mem "" prefixes then [] else let d_uri = self # namespace_manager # get_primary_uri d in [Value d_uri] in let name' = match ntype with T_element name -> ( match default with Some d -> self # get_nsname name d | None -> name ) | _ -> assert false in wms ("<" ^ name'); attlist_iter vr (write_att "") attributes; List.iter (fun (n,v) -> write_att "xmlns:" n v) nsdecls; List.iter (fun ( v) -> write_att "" "xmlns" v) nsdefault; let sub_nodes = self # sub_nodes in (* Check for minimization: *) let can_minimize = (pinstr = StringMap.empty) && (sub_nodes = []) in let do_minimize = can_minimize && match minimization with | `None -> false | `AllEmpty -> true | `DeclaredEmpty -> vr.content_model = Empty in if do_minimize then wms "\n/>" else ( wms "\n>"; self # write_pinstr os enc; let prefixes' = (List.map fst nsdecls) @ prefixes in let prefixes'' = if nsdefault <> [] then "" :: prefixes' else prefixes' in List.iter (fun n -> n # write ?prefixes:(Some prefixes'') ?default ?minimization:(Some minimization) os enc ) sub_nodes; wms ("</" ^ name' ^ "\n>"); ) method display ?prefixes ?minimization os enc = (* Overriden in namespace_element_impl, so this is only for the * non-namespace case: *) self # write ?minimization os enc method internal_init_other _ new_pos new_dtd new_ntype = method_na "internal_init_other" method set_data _ = method_na "set_data" method create_other ?entity_id ?position _ _ = method_na "create_other" method create_data _ _ = method_na "create_data" end ;; (**********************************************************************) (* super_root_impl *) (**********************************************************************) class [ 'ext ] super_root_impl an_ext : ['ext] node = object(self) inherit ['ext] container_features an_ext inherit ['ext] pinstr_features inherit ['ext] no_comments_feature inherit ['ext] no_namespace_feature inherit ['ext] no_attributes_feature inherit ['ext] no_validation_feature method node_type = T_super_root method create_other ?(entity_id = null_entity_id) ?(position = no_position) new_dtd new_ntype = let x = extension # clone in let obj = ( {< parent = None; extension = x; >} : 'ext #node :> 'ext node ) in (* It is ok that obj does not reset rev_nodes because the init_* * methods do that. *) x # set_node obj; match new_ntype with | (T_super_root) -> obj # internal_init_other entity_id position new_dtd new_ntype; obj | _ -> failwith "create_other: Cannot create such node" method internal_init_other new_ent_id new_pos new_dtd new_ntype = (* resets the contents of the object *) parent <- None; node_position <- -1; rev_nodes <- []; nodes <- LA_not_available; size <- 0; position <- new_pos; ent_id <- new_ent_id; dtd <- Some new_dtd; pinstr <- StringMap.empty (* TODO: Super root nodes are always roots *) method dump fmt = Format.pp_open_vbox fmt 2; Format.pp_print_string fmt "* T_super_root"; self # dump_pinstr fmt; List.iter (fun n -> Format.pp_print_cut fmt (); n # dump fmt; ) (List.rev rev_nodes); Format.pp_close_box fmt (); method write ?prefixes ?default ?minimization os enc = self # write_pinstr os enc; List.iter (fun n -> n # write ?prefixes ?default ?minimization os enc) (self # sub_nodes); method display ?prefixes ?minimization os enc = self # write_pinstr os enc; List.iter (fun n -> n # display ?prefixes ?minimization os enc) (self # sub_nodes); method create_element ?name_pool_for_attribute_values ?entity_id ?position ?valcheck ?att_values _ _ _ = method_na "create_element" method create_data _ _ = method_na "create_data" method internal_init _ new_pos attval_name_pool new_dtd new_name new_attlist = method_na "internal_init" method set_data _ = method_na "set_data" end ;; (**********************************************************************) class ['ext] namespace_attribute_impl ~element ~name value dtd = object (self) inherit [ 'ext ] attribute_impl ~element ~name value dtd as super (* Note: Inheriting from an *_impl class can be problematic * as not all methods and/or values may be visible *) val mutable normprefix = "" val mutable localname = "" initializer let (p,l) = namespace_split name in normprefix <- p; localname <- l; method normprefix = normprefix method localname = localname method display_prefix = self # parent # namespace_scope # display_prefix_of_normprefix normprefix method namespace_uri = self # namespace_manager # get_primary_uri normprefix method namespace_scope = self # parent # namespace_scope method set_namespace_scope x = method_na "set_namespace_scope" method namespaces_as_nodes = [] method namespace_manager = self # dtd # namespace_manager end ;; (**********************************************************************) (* namespace_impl *) (**********************************************************************) class [ 'ext ] namespace_impl srcprefix normprefix init_dtd : ['ext] node = object (self) inherit ['ext] common_node_features inherit ['ext] no_attributes_feature inherit ['ext] no_subnodes_feature inherit ['ext] no_pinstr_feature inherit ['ext] no_comments_feature inherit ['ext] no_validation_feature val normprefix = normprefix val srcprefix = srcprefix initializer dtd <- Some init_dtd method private local_node_path = (* Overrides definition in common_node_features: *) [ -2; self # node_position ] method orphaned_clone = {< parent = None; node_position = -1 >} method orphaned_flat_clone = {< parent = None; node_position = -1 >} method node_type = T_namespace srcprefix method data = (self # namespace_manager) # get_primary_uri normprefix method normprefix = normprefix (* CHECK in the light of new namespace impl *) (* This is a hack to ensure whenever there is no srcprefix we will * not have a normprefix, either. * However, there may be a namespace URI *) (* if srcprefix = "" then "" else normprefix *) method display_prefix = srcprefix method namespace_uri = (* XPath requires this to be null: *) raise Not_found method namespace_manager = self # dtd # namespace_manager method namespace_scope = self # parent # namespace_scope method namespaces_as_nodes = [] method dump fmt = Format.pp_open_vbox fmt 2; Format.pp_print_string fmt "+ T_namespace"; Format.pp_print_cut fmt (); Format.pp_print_string fmt "normprefix="; Format.pp_print_string fmt (self # normprefix); Format.pp_print_cut fmt (); Format.pp_print_string fmt "display prefix="; Format.pp_print_string fmt srcprefix; Format.pp_print_cut fmt (); Format.pp_print_string fmt "uri="; ( try Format.pp_print_string fmt (self # data); with Not_found -> Format.pp_print_string fmt "<Not found>" ); Format.pp_close_box fmt () (* Senseless methods: *) method position = no_position method entity_id = null_entity_id (* Non-applicable methods: *) method extension = method_na "extension" method internal_init _ _ _ _ _ _ _ _ = method_na "internal_init" method internal_init_other _ _ _ _ = method_na "internal_init_other" method set_data _ = method_na "set_data" method set_namespace_scope _ = method_na "set_namespace_scope" method create_element ?name_pool_for_attribute_values ?entity_id ?position ?valcheck ?att_values _ _ _ = method_na "create_element" method create_data _ _ = method_na "create_data" method create_other ?entity_id ?position _ _ = method_na "create_other" method write ?prefixes ?default ?minimization _ _ = method_na "write" method display ?prefixes ?minimization _ _ = method_na "display" method localname = method_na "localname" method previous_node = method_na "previous_node" method next_node = method_na "next_node" method remove _ = method_na "remove" end ;; let namespace_normprefix n = match n # node_type with T_namespace _ -> n # normprefix | _ -> invalid_arg "Pxp_document.namespace_normprefix" ;; let namespace_display_prefix n = match n # node_type with T_namespace _ -> n # display_prefix | _ -> invalid_arg "Pxp_document.namespace_display_prefix" ;; let namespace_uri n = match n # node_type with T_namespace _ -> n # data (* sic! *) | _ -> invalid_arg "Pxp_document.namespace_uri" ;; (**********************************************************************) (* namespace_element_impl *) (**********************************************************************) class [ 'ext ] namespace_element_impl an_ext = object (self) inherit [ 'ext ] element_impl an_ext as super (* Note: Inheriting from an *_impl class can be problematic * as not all methods and/or values may be visible *) val mutable normprefix = "" val mutable localname = "" val mutable scope = None val mutable nsnodes = None method normprefix = normprefix method localname = localname method namespace_uri = (* IDEA: Map Not_found to Namespace_not_declared *) self # namespace_manager # get_primary_uri normprefix method display_prefix = self # namespace_scope # display_prefix_of_normprefix normprefix method namespace_scope = match scope with None -> ( let empty_scope = new namespace_scope_impl self#namespace_manager None [] in scope <- Some empty_scope; empty_scope ) | Some x -> x method set_namespace_scope x = let m = self#namespace_manager in if m <> x # namespace_manager then failwith "set_namespace_scope: Invalid namespace manager"; scope <- Some x; nsnodes <- None; (* force recomputation *) method namespace_manager = self # dtd # namespace_manager method namespaces_as_nodes = match nsnodes with None -> let dtd = self#dtd in let m = self#namespace_manager in let s = self#namespace_scope in let l1 = s#effective_declaration in (* pairs (dsp_prefix, uri) *) let pos = ref 0 in let l2 = List.map (fun (dsp_prefix, uri) -> let norm_prefix = try m#get_normprefix uri with Not_found -> "<unknown>" (* CHECK *) in let n = new namespace_impl dsp_prefix norm_prefix dtd in n # internal_adopt (Some (self : 'ext #node :> 'ext node)) !pos; incr pos; n ) l1 in nsnodes <- Some l2; l2 | Some l -> l method internal_init new_ent_id new_pos attval_name_pool valcheck_element_exists new_dtd new_name new_attlist new_attvalues = super # internal_init ent_id new_pos attval_name_pool valcheck_element_exists new_dtd new_name new_attlist new_attvalues; let (p,l) = namespace_split new_name in normprefix <- p; localname <- l; (* TODO: Use pools *) (* CHECK: It is possible to create elements with non-existing * normprefixes. This may cause errors later (namespace URI not * found). Maybe it is better to catch this case here. *) method private get_nsname name default = (* Overrides the definition in element_impl *) (* If the prefix of [name] is [default], strip the prefix: *) let prefix, localname = namespace_split name in if prefix = default then localname else name method private get_nsdecls prefixes = (* Overrides the definition in element_impl *) (* This method modifies the behaviour of 'write'. In 'prefixes' the * list of already printed namespace declarations is passed to this * method. The task is to check whether additional declarations are * necessary and to pass them back as list of pairs (normprefix, uri). *) let scan_att name value = (* return prefix of attribute *) extract_prefix name in let rec add_prefixes prefixes candidates = match candidates with [] -> [] | "" :: candidates' -> add_prefixes prefixes candidates' | p :: candidates' -> if List.mem p prefixes then add_prefixes prefixes candidates' else p :: (add_prefixes (p :: prefixes) candidates') in let p_candidates = normprefix :: (attlist_to_list vr scan_att attributes) in let prefixes' = add_prefixes prefixes p_candidates in let mng = self # namespace_manager in List.map (fun p -> p, Value (mng # get_primary_uri p) ) prefixes' method private make_attribute_node element_name att_name value dtd = (* This method modifies the behaviour of attributes_as_nodes *) new namespace_attribute_impl ~element:element_name ~name:att_name value dtd method display ?(prefixes = StringMap.empty) ?(minimization=`None) os enc = let encoding = self # encoding in let wms = write_markup_string ~from_enc:encoding ~to_enc:enc os in (* Get the required declarations: *) let mng = self # namespace_manager in let scope = self # namespace_scope in let eff_decl = scope # effective_declaration in let eff_decl_to_add = (* The prefixes in [eff_decl] that are not in [prefixes] *) List.filter (fun (dp, uri) -> try StringMap.find dp prefixes <> uri with Not_found -> true ) eff_decl in let eff_decl_to_add' = ref [] in (* further prefixes *) let prefixes' = ref (List.fold_left (fun acc (dp, uri) -> StringMap.add dp uri acc) prefixes eff_decl_to_add) in let search_prefix uri = (* Slow! *) let p = StringMap.fold (fun _p _uri y -> if _uri = uri then _p else y) !prefixes' "" in if p = "" then raise Not_found; p in let invent_new_prefix uri = let n = ref 0 in while StringMap.mem ("ns" ^ string_of_int !n) !prefixes' do incr n done; let p = "ns" ^ string_of_int !n in prefixes' := StringMap.add p uri !prefixes'; eff_decl_to_add' := (p, uri) :: !eff_decl_to_add'; p in let write_att p aname avalue = match avalue with Implied_value -> () | Value v -> wms ("\n" ^ p ^ aname ^ "=\""); write_data_string ~from_enc:encoding ~to_enc:enc os v; wms "\""; | Valuelist l -> let v = String.concat " " l in wms ("\n" ^ p ^ aname ^ "=\""); write_data_string ~from_enc:encoding ~to_enc:enc os v; wms "\"" in let write_att_remap aname avalue = let (p,local) = namespace_split aname in if p = "" then write_att "" aname avalue else let d = try scope # display_prefix_of_normprefix p with Namespace_not_in_scope _ -> (* Display prefix is missing. This is an error, but we * can search or invent a new prefix on the fly. *) ( let uri = mng # get_primary_uri p in try search_prefix uri with Not_found -> invent_new_prefix uri ) in write_att (d ^ ":") local avalue in let this_display_prefix = try self # display_prefix with Namespace_not_in_scope _ -> (* Display prefix is missing. This is an error, but we * can search or invent a new prefix on the fly. *) ( let uri = mng # get_primary_uri self#normprefix in try search_prefix uri with Not_found -> invent_new_prefix uri ) in let this_localname = self # localname in let name = if this_display_prefix = "" then this_localname (* within default namespace *) else this_display_prefix ^ ":" ^ this_localname in wms ("<" ^ name); attlist_iter vr write_att_remap attributes; List.iter (fun (n,v) -> if n = "" then write_att "" "xmlns" (Value v) else write_att "xmlns:" n (Value v)) (eff_decl_to_add @ !eff_decl_to_add'); let sub_nodes = self # sub_nodes in (* Check for minimization: *) let can_minimize = (pinstr = StringMap.empty) && (sub_nodes = []) in let do_minimize = can_minimize && match minimization with | `None -> false | `AllEmpty -> true | `DeclaredEmpty -> vr.content_model = Empty in if do_minimize then wms "\n/>" else ( wms "\n>"; super # write_pinstr os enc; List.iter (fun n -> n # display ?prefixes:(Some !prefixes') ?minimization:(Some minimization) os enc) sub_nodes; wms ("</" ^ name ^ "\n>") ) end ;; (**********************************************************************) let spec_table_find_exemplar tab eltype = try Hashtbl.find tab.mapping eltype with Not_found -> tab.default_element ;; let create_data_node spec dtd str = match spec with Spec_table tab -> let exemplar = tab.data_node in exemplar # create_data dtd str ;; let get_data_exemplar spec = match spec with Spec_table tab -> tab.data_node ;; let create_element_node ?name_pool_for_attribute_values ?entity_id ?position ?valcheck ?att_values spec dtd eltype atts = match spec with Spec_table tab -> let exemplar = spec_table_find_exemplar tab eltype in exemplar # create_element ?name_pool_for_attribute_values ?entity_id ?position ?valcheck ?att_values dtd (T_element eltype) atts ;; let get_element_exemplar spec eltype atts = match spec with Spec_table tab -> spec_table_find_exemplar tab eltype ;; let create_super_root_node ?entity_id ?position spec dtd = match spec with Spec_table tab -> ( match tab.super_root_node with None -> failwith "Pxp_document.create_super_root_node: No exemplar" | Some x -> x # create_other ?entity_id ?position:position dtd T_super_root ) ;; let get_super_root_exemplar spec = match spec with Spec_table tab -> ( match tab.super_root_node with None -> raise Not_found | Some x -> x ) ;; (* TODO: This function is broken, because an element will no longer * accept the type T_none *) let create_no_node ?entity_id ?position spec dtd = match spec with Spec_table tab -> let x = tab.default_element in x # create_element ?entity_id ?position:position dtd T_none [] ;; let create_comment_node ?entity_id ?position spec dtd text = match spec with Spec_table tab -> ( match tab.comment_node with None -> failwith "Pxp_document.create_comment_node: No exemplar" | Some x -> let e = x # create_other ?entity_id ?position dtd T_comment in e # set_comment (Some text); e ) ;; let get_comment_exemplar spec = match spec with Spec_table tab -> ( match tab.comment_node with None -> raise Not_found | Some x -> x ) ;; let create_pinstr_node ?entity_id ?position spec dtd pi = let target = pi # target in let exemplar = match spec with Spec_table tab -> ( try Hashtbl.find tab.pinstr_mapping target with Not_found -> ( match tab.default_pinstr_node with None -> failwith "Pxp_document.create_pinstr_node: No exemplar" | Some x -> x ) ) in let el = exemplar # create_other ?entity_id ?position dtd (T_pinstr target) in el # add_pinstr pi; el ;; let get_pinstr_exemplar spec pi = let target = pi # target in match spec with Spec_table tab -> ( try Hashtbl.find tab.pinstr_mapping target with Not_found -> ( match tab.default_pinstr_node with None -> raise Not_found | Some x -> x ) ) ;; (* TODO: try to avoid sub_nodes in the following; replace it with * iter_nodes. *) (**********************************************************************) let find ?(deeply=false) f base = let rec search_flat children = match children with [] -> raise Not_found | n :: children' -> if f n then n else search_flat children' in let rec search_deep children = match children with [] -> raise Not_found | n :: children' -> if f n then n else try search_deep (n # sub_nodes) with Not_found -> search_deep children' in (if deeply then search_deep else search_flat) (base # sub_nodes) ;; let find_all ?(deeply=false) f base = let rec search_flat children = match children with [] -> [] | n :: children' -> if f n then n :: search_flat children' else search_flat children' in let rec search_deep children = match children with [] -> [] | n :: children' -> let rest = search_deep (n # sub_nodes) @ search_deep children' in if f n then n :: rest else rest in (if deeply then search_deep else search_flat) (base # sub_nodes) ;; let find_element ?deeply eltype base = find ?deeply:deeply (fun n -> match n # node_type with T_element name -> name = eltype | _ -> false) base ;; let find_all_elements ?deeply eltype base = find_all ?deeply:deeply (fun n -> match n # node_type with T_element name -> name = eltype | _ -> false) base ;; exception Skip;; let map_tree ~pre ?(post=(fun x -> x)) base = let rec map_rec n = let n' = pre n in ( match n' # node_type with T_element _ | T_super_root -> let children = n # sub_nodes in let children' = map_children children in n' # set_nodes children'; | _ -> () ); post n' and map_children l = match l with [] -> [] | child :: l' -> (try let child' = map_rec child in child' :: map_children l' with Skip -> map_children l' ) in try map_rec base with Skip -> raise Not_found ;; let map_tree_sibl ~pre ?(post=(fun _ x _ -> x)) base = let rec map_rec l n r = let n' = pre l n r in ( match n' # node_type with T_element _ | T_super_root -> let children = n # sub_nodes in let children' = map_children None children in let children'' = postprocess_children None children' in n' # set_nodes children''; | _ -> () ); n' and map_children predecessor l = (match l with [] -> [] | child :: l' -> let successor = match l' with [] -> None | x :: _ -> Some x in (try let child' = map_rec predecessor child successor in child' :: map_children (Some child) l' with Skip -> map_children (Some child) l' ) ) and postprocess_children predecessor l = (match l with [] -> [] | child :: l' -> let successor = match l' with [] -> None | x :: _ -> Some x in (try let child' = post predecessor child successor in child' :: postprocess_children (Some child) l' with Skip -> postprocess_children (Some child) l' ) ) in try let base' = map_rec None base None in post None base' None with Skip -> raise Not_found ;; let iter_tree ?(pre=(fun x -> ())) ?(post=(fun x -> ())) base = let rec iter_rec n = pre n; let children = n # sub_nodes in iter_children children; post n and iter_children l = match l with [] -> () | child :: l' -> (try iter_rec child; iter_children l' with Skip -> iter_children l' ) in try iter_rec base with Skip -> () ;; let iter_tree_sibl ?(pre=(fun _ _ _ -> ())) ?(post=(fun _ _ _ -> ())) base = let rec iter_rec l n r = pre l n r; let children = n # sub_nodes in iter_children None children; post l n r and iter_children predecessor l = (match l with [] -> () | child :: l' -> let successor = match l' with [] -> None | x :: _ -> Some x in (try iter_rec predecessor child successor; iter_children (Some child) l' with Skip -> iter_children (Some child) l' ) ) in try iter_rec None base None with Skip -> () ;; (**********************************************************************) let validate tree = iter_tree ~pre:(fun n -> n # validate()) tree ;; (**********************************************************************) let compare a b = let rec cmp p1 p2 = match p1, p2 with [], [] -> 0 | [], _ -> -1 | _, [] -> 1 | x::p1', y::p2' -> if x = y then cmp p1' p2' else x - y in let a_path = a # node_path in let b_path = b # node_path in cmp a_path b_path ;; type 'ext ord_index = ('ext node, int) Hashtbl.t;; let create_ord_index base = (* Note: Attribute and namespace nodes are not entered into the * ordinal index; but some ordinal numbers are reserved for them. *) let n = ref 0 in iter_tree ~pre:(fun _ -> incr n) base; let idx = Hashtbl.create !n in let k = ref 0 in iter_tree ~pre:(fun node -> match node # node_type with T_element _ -> Hashtbl.add idx node (!k + 2); k := !k + 3 (* Reserve !k for namespace nodes, and !k+1 for * attribute nodes *) | _ -> Hashtbl.add idx node !k; incr k ) base; idx ;; let ord_number idx node = Hashtbl.find idx node ;; let ord_compare idx a b = let get_index x = match x # node_type with T_attribute _ -> Hashtbl.find idx (x # parent) - 1, x # node_position | T_namespace _ -> Hashtbl.find idx (x # parent) - 2, x # node_position | _ -> Hashtbl.find idx x, 0 in let ord_a, subord_a = get_index a in let ord_b, subord_b = get_index b in let d = ord_a - ord_b in if d = 0 then subord_a - subord_b else d ;; (**********************************************************************) type stripping_mode = [ `Strip_one_lf | `Strip_one | `Strip_seq | `Disabled ] let strip_whitespace ?(force = false) ?(left = `Disabled) ?(right = `Disabled) ?(delete_empty_nodes = true) start = let strip_left = match left with `Disabled -> (fun s -> s) | `Strip_one_lf -> (fun s -> if String.length s > 0 && s.[0] = '\n' then String.sub s 1 (String.length s - 1) else s ) | `Strip_one -> (fun s -> if String.length s > 0 then let c = s.[0] in if c = ' ' || c = '\n' || c = '\r' || c = '\t' then String.sub s 1 (String.length s - 1) else s else s ) | `Strip_seq -> (fun s -> let k = ref 0 in while !k < String.length s && (let c = s.[ !k ] in c = ' ' || c = '\n' || c = '\r' || c = '\t') do incr k done; if !k > 0 then String.sub s !k (String.length s - !k) else s ) in let strip_right = match right with `Disabled -> (fun s -> s) | `Strip_one_lf -> (fun s -> let l = String.length s in if l > 0 && s.[l - 1] = '\n' then String.sub s 0 (l - 1) else s ) | `Strip_one -> (fun s -> let l = String.length s in if l > 0 then let c = s.[ l - 1] in if c = ' ' || c = '\n' || c = '\r' || c = '\t' then String.sub s 0 (l - 1) else s else s ) | `Strip_seq -> (fun s -> let l = String.length s in let k = ref (l-1) in while !k >= 0 && (let c = s.[ !k ] in c = ' ' || c = '\n' || c = '\r' || c = '\t') do decr k done; if !k < l-1 then String.sub s 0 (!k + 1) else s ) in let rec strip_elements n preserve_space = let preserve_space' = not force && try let space = n # attribute "xml:space" in let preserve = (space = Value "preserve") in let default = (space = Value "default") in if not (preserve || default) then raise Not_found; preserve with Not_found -> preserve_space in if not preserve_space' then begin let left_side_done = ref false in let right_side = ref None in n # iter_nodes (fun sub -> match sub # node_type with T_data -> if not !left_side_done then begin let s = strip_left (sub # data) in sub # set_data s; if s = "" && delete_empty_nodes then sub # delete; left_side_done := true end; right_side := Some sub; (* candidate for right side *) | T_element _ -> left_side_done := true; right_side := None; strip_elements sub preserve_space' | T_comment -> () | _ -> left_side_done := true; right_side := None; ); match !right_side with None -> () | Some sub -> let s = strip_right (sub # data) in sub # set_data s; if s = "" && delete_empty_nodes then sub # delete; end else begin (* It is possible that there is a sub node that says again * xml:space = "default". *) n # iter_nodes (fun sub -> match sub # node_type with T_element _ -> strip_elements sub preserve_space' | _ -> () ) end in let rec preserve_mode n = try let space = n # attribute "xml:space" in let preserve = (space = Value "preserve") in let default = (space = Value "default") in if not (preserve || default) then raise Not_found; preserve with Not_found -> let p = try Some (n # parent) with Not_found -> None in match p with Some parent -> preserve_mode parent | None -> false in let preserve_space = not force && preserve_mode start in match start # node_type with T_data -> if not preserve_space then begin let s = start # data in let s' = strip_left (strip_right s) in start # set_data s'; if s' = "" && delete_empty_nodes then start # delete; end | T_element _ | T_super_root -> strip_elements start preserve_space | _ -> () ;; (**********************************************************************) let normalize tree = (* Concatenate consecutive data nodes: *) iter_tree_sibl ~pre:(fun l n r -> match l with None -> (* No left sibling: Nothing to do *) () | Some ln -> (* If ln and n are both data nodes, concatenate them *) if n # node_type = T_data && ln # node_type = T_data then begin n # set_data (ln # data ^ n # data); ln # set_data ""; end ) tree; (* Remove empty data nodes: *) iter_tree ~pre:(fun n -> if n # node_type = T_data && n # data = "" then begin n # delete; raise Skip; end ) tree ;; (**********************************************************************) (* document *) (**********************************************************************) class ['ext] document ?swarner the_warner enc = object (self) inherit ['ext] pinstr_features val mutable xml_version = "1.0" val mutable dtd = (None : dtd option) val mutable root = (None : 'ext node option) val mutable raw_root_name = "" val encoding = (enc : rep_encoding) val warner = (the_warner : collect_warnings) val swarner = (swarner : symbolic_warnings option) method init_xml_version s = if s <> "1.0" then warn swarner warner (`W_XML_version_not_supported s); xml_version <- s method init_root r real_root_element_name = let dtd_r = r # dtd in if dtd_r # encoding <> encoding then failwith "Pxp_document.document#init_root: encoding mismatch"; match r # node_type with (**************** CASE: We have a super root element ***************) | T_super_root -> if not (dtd_r # arbitrary_allowed) then begin match dtd_r # root with Some declared_root_element_name -> let _real_root_element = try List.find (fun r' -> match r' # node_type with | T_element _ -> true | _ -> false) (r # sub_nodes) with Not_found -> failwith "Pxp_document.document#init_root: Super root does not contain root element" (* TODO: Check also that there is at most one * element in the super root node *) in if real_root_element_name <> declared_root_element_name then raise (Validation_error ("The root element is `" ^ real_root_element_name ^ "' but is declared as `" ^ declared_root_element_name ^ "'")); | None -> () end; (* All is okay, so store dtd and root node: *) dtd <- Some dtd_r; root <- Some r; raw_root_name <- real_root_element_name; (**************** CASE: No super root element **********************) | T_element root_element_name -> if not (dtd_r # arbitrary_allowed) then begin match dtd_r # root with Some declared_root_element_name -> if real_root_element_name <> declared_root_element_name then raise (Validation_error ("The root element is `" ^ real_root_element_name ^ "' but is declared as `" ^ declared_root_element_name ^ "'")) | None -> (* This may happen if you initialize your DTD yourself. * The value 'None' means that the method 'set_root' was * never called for the DTD; we interpret it here as: * The root element does not matter. *) () end; (* All is okay, so store dtd and root node: *) dtd <- Some dtd_r; root <- Some r; raw_root_name <- real_root_element_name; | _ -> failwith "Pxp_document.document#init_root: the root node must be an element or super-root" method private top_element = (* Top-most element node *) match root with | None -> failwith "Pxp_document.document: No top-level element found" | Some r -> ( match r # node_type with | T_super_root -> ( try List.find (fun r' -> match r' # node_type with | T_element _ -> true | _ -> false) (r # sub_nodes) with Not_found -> failwith "Pxp_document.document: No top-level element found" ) | T_element _ -> r | _ -> failwith "Pxp_document.document: No top-level element found" ) method xml_version = xml_version method xml_standalone = match dtd with None -> false | Some d -> d # standalone_declaration method dtd = match dtd with None -> failwith "Pxp_document.document#dtd: Document has no DTD" | Some d -> d method encoding = encoding method root = match root with None -> failwith "Pxp_document.document#root: Document has no root element" | Some r -> r method raw_root_name = match root with None -> failwith "Pxp_document.document#raw_root_name: Document has no root element" | Some _ -> raw_root_name method write ?default ?(prefer_dtd_reference = false) ?(dtd_style=`Included) ?minimization os enc = let (dtd_style : [`Omit|`Reference|`Included|`Auto]) = if prefer_dtd_reference then `Reference else dtd_style in let encoding = self # encoding in let wms = write_markup_string ~from_enc:encoding ~to_enc:enc os in let r = self # root in wms ("<?xml version='1.0' encoding='" ^ Netconversion.string_of_encoding enc ^ "'?>\n"); begin match dtd with None -> () | Some d -> let have_dtd_root = d # root <> None in let eff_dtd_style = match dtd_style with | `Omit -> `Omit | `Reference -> ( match d # id with | Some (External _) -> `Reference | _ -> `Included ) | `Included -> `Included | `Auto -> if have_dtd_root then match d # id with | Some (External _) -> `Reference | _ -> `Included else `Omit in let root_to_write = lazy ( match d # root with | None -> (* No DTD root: Look at the tree, and find out what will be printed for the topmost element *) let e = self # top_element in ( match e # node_type with | T_element name -> ( match default with | None -> name | Some defns -> (* our best effort... *) let prefix, localname = namespace_split name in if prefix = defns then localname else name ) | _ -> assert false ) | Some r -> (* If there is a DTD root, always write this *) r ) in ( match eff_dtd_style with | `Omit -> () | `Reference -> let root = Lazy.force root_to_write in d # write_ref ~root os enc | `Included -> let root = Lazy.force root_to_write in d # write ~root os enc true ) end; self # write_pinstr os enc; r # write ?default ?minimization os enc; wms "\n"; method display ?(prefer_dtd_reference = false) ?(dtd_style=`Included) ?minimization os enc = let encoding = self # encoding in let wms = write_markup_string ~from_enc:encoding ~to_enc:enc os in let (dtd_style : [`Omit|`Reference|`Included|`Auto]) = if prefer_dtd_reference then `Reference else dtd_style in let r = self # root in wms ("<?xml version='1.0' encoding='" ^ Netconversion.string_of_encoding enc ^ "'?>\n"); begin match dtd with None -> () | Some d -> let have_dtd_root = d # root <> None in let eff_dtd_style = match dtd_style with | `Omit -> `Omit | `Reference -> ( match d # id with | Some (External _) -> `Reference | _ -> `Included ) | `Included -> `Included | `Auto -> if have_dtd_root then match d # id with | Some (External _) -> `Reference | _ -> `Included else `Omit in let root_to_write = lazy ( match d # root with | None -> (* No DTD root: Look at the tree, and find out what will be printed for the topmost element *) let e = self # top_element in ( match e # node_type with | T_element name -> ( try let pr = e # display_prefix in (* If now a Namespace_not_in_scope is raised, we cannot do anything! *) if pr = "" then e # localname else pr ^ ":" ^ e # localname with | Namespace_method_not_applicable _ -> name ) | _ -> assert false ) | Some r -> (* If there is a DTD root, always write this *) r ) in ( match eff_dtd_style with | `Omit -> () | `Reference -> let root = Lazy.force root_to_write in d # write_ref ~root os enc | `Included -> let root = Lazy.force root_to_write in d # write ~root os enc true ) end; self # write_pinstr os enc; r # display ?minimization os enc; wms "\n"; method dump fmt = Format.pp_open_vbox fmt 2; Format.pp_print_string fmt "* document"; self # dump_pinstr fmt; (match root with None -> () | Some r -> Format.pp_print_cut fmt (); r # dump fmt ); Format.pp_close_box fmt (); end ;; (**********************************************************************) let print_node (n : 'ext node) = n # dump (Format.std_formatter) ;; let print_doc (n : 'ext document) = n # dump (Format.std_formatter) ;; (**********************************************************************) exception Error_event of exn type state = Null | Start_seen (* Start tag seen *) | End_seen (* End tag seen *) | NA (* Not applicable: Start/end tag cannot occur any longer *) ;; type 'ext solid_xml = [ `Node of 'ext node | `Document of 'ext document ] ;; let solidify ?dtd cfg spec next_ev : 'ext solid_xml = let eff_dtd = ref (match dtd with Some d -> if d#encoding <> cfg.encoding then failwith "Pxp_document.solidify: Encoding mismatch"; d | None -> let d = new dtd ?swarner:cfg.swarner cfg.warner cfg.encoding in d # allow_arbitrary; ( match cfg.enable_namespace_processing with Some m -> d # set_namespace_manager m; | None -> () ); d) in let return = ref None in let return_doc = ref None in let stack = Stack.create() in let depth = ref 0 in (* Stack depth, without super root node *) let super_root_details = ref None in let doc_state = ref Null in let super_state = ref Null in let root_state = ref Null in let pos = ref None in let eof = ref false in let unexpected txt = failwith ("Pxp_document.solidify: Unexpected " ^ txt ^ " event") in let create_delayed_super_root entity_id (srpos, children, pilist) = let n = create_super_root_node ~entity_id ?position:srpos spec !eff_dtd in List.iter (fun pi -> n # add_pinstr pi) (List.rev pilist); n # set_nodes (List.rev children); n in while not !eof do let ev = next_ev() in match ev with Some (E_start_doc(xml_version,found_dtd)) -> if !doc_state <> Null then unexpected "E_start_doc"; doc_state := Start_seen; pos := None; let doc = new document ?swarner:cfg.swarner cfg.warner cfg.encoding in doc # init_xml_version xml_version; if dtd = None then ( if found_dtd#encoding <> cfg.encoding then failwith "Pxp_document.solidify: Encoding mismatch"; eff_dtd := found_dtd; ); return_doc := Some doc; | Some (E_end_doc lit_root) -> if ((!doc_state <> Start_seen) || (!super_state = Start_seen) || (!root_state <> End_seen)) then unexpected "T_end_doc"; doc_state := End_seen; pos := None; assert(Stack.is_empty stack); (match (!return, !return_doc) with | (Some r, Some r_doc) -> let r' = match !super_root_details with | None -> r | Some srparams -> create_delayed_super_root r#entity_id srparams in return := Some r'; r_doc # init_root r' lit_root; | _ -> assert false ); | Some E_start_super -> if !super_state <> Null then unexpected "E_start_super"; if !doc_state = Null then doc_state := NA; super_state := Start_seen; if cfg.enable_super_root_node then ( (* The creation of the super root node is delayed until we know the entity ID of the top element *) super_root_details := Some(!pos, [], []); (* first list: the children in reverse order second list: the PI's to attach in reverse order *) ) | Some E_end_super -> if (!super_state <> Start_seen || !root_state <> End_seen) then unexpected "E_end_super"; super_state := End_seen; if cfg.enable_super_root_node then ( assert(Stack.is_empty stack); assert(!super_root_details <> None); ) | Some (E_start_tag(name,atts,scope_opt,eid)) -> let is_root = (!depth = 0) in if is_root then ( if !root_state <> Null then unexpected "E_start_tag"; root_state := Start_seen; if !doc_state = Null then doc_state := NA; if !super_state = Null then super_state := NA; ); let n = create_element_node ?name_pool_for_attribute_values: (if cfg.enable_name_pool_for_attribute_values then Some cfg.name_pool else None) ~entity_id:eid ?position:!pos spec !eff_dtd name atts in ( match scope_opt with None -> () | Some scope -> n # set_namespace_scope scope ); ( try let parent = Stack.top stack in parent # append_node n with Stack.Empty -> return := Some n; ( match !super_root_details with | None -> () | Some(srpos,children,pilist) -> super_root_details := Some(srpos,n::children,pilist) ) ); Stack.push n stack; incr depth; pos := None | Some (E_end_tag(name,eid)) -> decr depth; let is_root = (!depth = 0) in if is_root then ( if !root_state <> Start_seen then unexpected "E_end_tag"; root_state := End_seen; ); if Stack.is_empty stack then unexpected "E_end_tag"; let top = Stack.pop stack in ( match top # node_type with T_element _ -> () | _ -> unexpected "E_end_tag" ); if not cfg.disable_content_validation then top # validate_contents ~use_dfa:cfg.validate_by_dfa ~check_data_nodes:true (); pos := None | Some (E_char_data data) -> if !root_state <> Start_seen then unexpected "E_char_data"; ( try let n = create_data_node spec !eff_dtd data in (Stack.top stack) # append_node n with Stack.Empty -> assert false ); pos := None | Some (E_pinstr(target,value,eid)) -> (* A PI may occur everywhere between start_doc and end_doc. *) if !doc_state = End_seen then unexpected "E_pinstr"; if !doc_state = Null then doc_state := NA; if !super_state = Null then super_state := NA; if !super_state <> Start_seen && !root_state <> Start_seen then unexpected "E_pinstr"; let pi = new proc_instruction target value !eff_dtd#encoding in if !depth = 0 then ( match !super_root_details with | None -> (* Add processing instruction to document, if any *) ( match !return_doc with Some doc -> doc # add_pinstr pi | None -> () (* PI is lost *) ) | Some (srpos, children, pilist) -> (* Add PI to super root node (attached, or as regular child) *) if cfg.enable_pinstr_nodes then ( let n = create_pinstr_node ~entity_id:eid ?position:!pos spec !eff_dtd pi in super_root_details := Some(srpos, n::children, pilist) ) else ( super_root_details := Some(srpos, children, pi::pilist) ) ) else ( (* Add PI to parent element (attached, or as regular child) *) if cfg.enable_pinstr_nodes then ( let n = create_pinstr_node ~entity_id:eid ?position:!pos spec !eff_dtd pi in (Stack.top stack) # append_node n ) else (Stack.top stack) # add_pinstr pi ); pos := None | Some (E_comment data) -> (* A comment may occur everywhere between start_doc and end_doc. * Only below the super root or the simple root node it is * accepted, however. *) if !doc_state = End_seen then unexpected "E_comment"; if !doc_state = Null then doc_state := NA; if !super_state = Null then super_state := NA; if !super_state <> Start_seen && !root_state <> Start_seen then unexpected "E_comment"; if cfg.enable_comment_nodes then ( if !depth = 0 then ( match !super_root_details with | None -> () | Some (srpos, children, pilist) -> (* Add comment to super root node, if enabled *) let n = create_comment_node ?position:!pos spec !eff_dtd data in super_root_details := Some(srpos, n::children, pilist); ) else ( let n = create_comment_node ?position:!pos spec !eff_dtd data in (Stack.top stack) # append_node n ) ); pos := None | Some (E_position(ent,line,colpos)) -> pos := Some (ent,line,colpos) | Some (E_error err) -> raise(Error_event err) | Some E_end_of_stream | None -> if (!doc_state = Start_seen) || (!super_state = Start_seen) || (!root_state <> End_seen) then unexpected "E_end_of_stream/actual end"; pos := None; eof := (ev = None) done; ( match !return, !return_doc with _, Some doc -> `Document doc | Some n, None -> let n' = match !super_root_details with | None -> n | Some srparams -> create_delayed_super_root n#entity_id srparams in `Node n' | _ -> assert false ) ;; type 'ext flux_state = [ `Node_start of 'ext node | `Node_end of 'ext node | `Output of (event list * 'ext flux_state) | `EOS | `None ] let liquefy_node ?(omit_end = false) ?(omit_positions = false) (init_fstate : 'ext flux_state) (init_node : 'ext node) = let fstate = ref init_fstate in let rec generate arg = match !fstate with | `Node_start n -> (* Find next node: *) let fstate' = ( match n#sub_nodes with [] -> `Node_end n | n' :: _ -> `Node_start n' ) in fstate := fstate'; (* Do action for n: *) ( match n # node_type with | T_element name -> let atts = List.flatten (List.map (fun (n,v) -> match v with Value s -> [n, s] | Valuelist l -> [n, String.concat " " l] | Implied_value -> [] ) n # attributes) in let scope_opt = try Some(n # namespace_scope) with Namespace_method_not_applicable _ -> None in let (entity,line,colpos) = n # position in let pos = E_position(entity,line,colpos) in let eid = n # entity_id in let tag = E_start_tag(name, atts, scope_opt, eid) in let out = if omit_positions then [ tag ] else [ pos; tag ] in let out_pinstr = List.flatten (List.map (fun target -> List.map (fun pi -> E_pinstr(target,pi#value,eid) ) (n # pinstr target) ) n # pinstr_names ) in fstate := `Output(out @ out_pinstr, !fstate); generate arg | T_data -> Some(E_char_data(n # data)) | T_super_root -> let out = [ E_start_super ] in let eid = n # entity_id in let out_pinstr = List.flatten (List.map (fun target -> List.map (fun pi -> E_pinstr(target,pi#value,eid) ) (n # pinstr target) ) n # pinstr_names ) in fstate := `Output(out @ out_pinstr, !fstate); generate arg | T_pinstr target -> let (entity,line,colpos) = n # position in let pos = E_position(entity,line,colpos) in let value = (List.hd (n # pinstr target)) # value in let eid = n # entity_id in let ev = E_pinstr(target,value,eid) in let out = if omit_positions then [ ev ] else [ pos; ev ] in fstate := `Output(out, !fstate); generate arg | T_comment -> let (entity,line,colpos) = n # position in let pos = E_position(entity,line,colpos) in let s = try n # data with Not_found -> "" in let ev = E_comment s in let out = if omit_positions then [ ev ] else [ pos; ev ] in fstate := `Output(out, !fstate); generate arg | _ -> failwith "Pxp_document.liquefy_node: Unexpected node type" ); | `Node_end n -> let fstate' = if n = init_node then ( if omit_end then `None else `EOS) else ( try `Node_start(n # next_node) with Not_found -> ( try `Node_end(n # parent) with Not_found -> if omit_end then `None else `EOS ) ) in fstate := fstate'; (* Do action for n: *) ( match n # node_type with T_element name -> let eid = n # entity_id in Some(E_end_tag(name,eid)) | T_super_root -> Some E_end_super | _ -> generate arg ); | `Output(events, fstate') -> ( match events with ev :: events' -> fstate := `Output(events', fstate'); Some ev | [] -> fstate := fstate'; generate arg ) | `EOS -> fstate := `None; Some E_end_of_stream | `None -> None in generate ;; let liquefy_doc ?(omit_end = false) ?(omit_positions = false) (doc : 'ext document) = let eid = Pxp_dtd.Entity.create_entity_id() in let fstate = ref `Start in let rec generate arg = match !fstate with `Start -> let out_pinstr = List.flatten (List.map (fun target -> List.map (fun pi -> E_pinstr(target,pi#value,eid) ) (doc # pinstr target) ) doc # pinstr_names ) in let node_fstate = `Output(out_pinstr, `Node_start(doc#root)) in fstate := `Nodes (liquefy_node ~omit_end:true ~omit_positions node_fstate doc#root); Some(E_start_doc(doc#xml_version,doc#dtd)) | `Nodes g -> let e = g arg in if e = None then ( fstate := `End; generate arg ) else e | `End -> fstate := if omit_end then `None else `EOS; Some(E_end_doc doc#raw_root_name) | `EOS -> fstate := `None; Some E_end_of_stream | `None -> None in generate ;; let liquefy ?omit_end ?omit_positions solid = match solid with `Node n -> liquefy_node ?omit_end ?omit_positions (`Node_start n) n | `Document d -> liquefy_doc ?omit_end ?omit_positions d ;;