(* $Id$ * ---------------------------------------------------------------------- * PXP: The polymorphic XML parser for Objective Caml. * Copyright by Gerd Stolpmann. See LICENSE for details. *) open Pxp_core_types.I open Pxp_lexer_types open Pxp_lexers open Pxp_entity open Pxp_aux open Pxp_dfa (**********************************************************************) type validation_record = { content_model : content_model_type; content_dfa : dfa_definition option Lazy.t; id_att_name : string option; idref_att_names : string list; att_lookup : int Str_hashtbl.t; init_att_vals : (string * att_value) array; att_info : (att_type * bool) array; att_required : int list; accept_undeclared_atts : bool; } ;; (* class type? *) class namespace_manager = object (self) val uri_of_prefix = Hashtbl.create 10 (* not unique *) val prefix_of_uri = Hashtbl.create 10 (* unique *) val primary_uri_of_prefix = Hashtbl.create 10 (* unique *) initializer ignore(self # add_namespace "xml" "http://www.w3.org/XML/1998/namespace") method add_uri (np:string) (uri:string) = if not (Hashtbl.mem uri_of_prefix np) then raise(Namespace_prefix_not_managed np); try let np' = Hashtbl.find prefix_of_uri uri in if np <> np' then raise(Namespace_error "add_uri: the URI is already managed") with Not_found -> Hashtbl.add uri_of_prefix np uri; Hashtbl.add prefix_of_uri uri np; () method add_namespace np uri = let l = Hashtbl.find_all uri_of_prefix np in if l = [] then begin if Hashtbl.mem prefix_of_uri uri then raise(Namespace_error "add_namespace: the URI is already managed"); Hashtbl.add uri_of_prefix np uri; Hashtbl.add primary_uri_of_prefix np uri; Hashtbl.add prefix_of_uri uri np; end else if l <> [ uri ] then raise(Namespace_error "add_namespace: the namespace does already exist") method lookup_or_add_namespace prefix (uri:string) = let rec add_loop n = let p = prefix ^ (if n=0 then "" else string_of_int n) in if Hashtbl.mem uri_of_prefix p then begin add_loop (n+1) end else begin Hashtbl.add uri_of_prefix p uri; Hashtbl.add primary_uri_of_prefix p uri; Hashtbl.add prefix_of_uri uri p; p end in try Hashtbl.find prefix_of_uri uri with Not_found -> add_loop (if prefix = "" then 1 else 0) (* prefix = "": make sure that such a prefix is never added *) method get_primary_uri normprefix = try Hashtbl.find primary_uri_of_prefix normprefix with Not_found -> raise(Namespace_prefix_not_managed normprefix) method get_uri_list normprefix = Hashtbl.find_all uri_of_prefix normprefix method get_normprefix uri = try Hashtbl.find prefix_of_uri uri with Not_found -> raise(Namespace_not_managed uri) method iter_namespaces f = Hashtbl.iter (fun p uri -> f p) primary_uri_of_prefix method as_declaration = let l = ref [] in Hashtbl.iter (fun p uri -> l := (p, uri) :: !l) primary_uri_of_prefix; !l end ;; let create_namespace_manager () = new namespace_manager;; class type namespace_scope = object method namespace_manager : namespace_manager method parent_scope : namespace_scope option method declaration : (string * string) list method effective_declaration : (string * string) list method display_prefix_of_uri : string -> string method display_prefix_of_normprefix : string -> string method uri_of_display_prefix : string -> string method normprefix_of_display_prefix : string -> string end ;; module StrSet = Set.Make(String);; class namespace_scope_impl mng parent_opt decl : namespace_scope = object(self) method namespace_manager = mng method parent_scope = parent_opt method declaration = decl method effective_declaration = let rec collect visible d s = match d with | ("", "") :: d' -> if StrSet.mem "" visible then collect visible d' s (* no effect *) else collect (StrSet.add "" visible) d' s (* hide inner default *) | (dp, uri) :: d' -> if StrSet.mem dp visible then collect visible d' s else (dp, uri) :: collect (StrSet.add dp visible) d' s | [] -> ( match s # parent_scope with Some s' -> collect visible s'#declaration s' | None -> [] ) in collect StrSet.empty self#declaration (self : #namespace_scope :> namespace_scope) method display_prefix_of_uri uri = try fst(List.find (fun (p,u) -> u = uri) decl) with Not_found -> ( match parent_opt with Some pa -> pa # display_prefix_of_uri uri | None -> raise(Namespace_not_in_scope uri) ) method display_prefix_of_normprefix np = let uris = mng # get_uri_list np in if uris = [] then raise(Namespace_prefix_not_managed np); try fst(List.find (fun (p,u) -> List.mem u uris) decl) with Not_found -> ( match parent_opt with Some pa -> pa # display_prefix_of_normprefix np | None -> raise(Namespace_not_in_scope (List.hd(List.rev uris))) ) method uri_of_display_prefix dp = try List.assoc dp decl with Not_found -> ( match parent_opt with Some pa -> pa # uri_of_display_prefix dp | None -> raise Not_found ) method normprefix_of_display_prefix dp = let uri = self # uri_of_display_prefix dp in mng # get_normprefix uri end ;; let create_namespace_scope ?parent ?(decl = []) mng = new namespace_scope_impl mng parent decl ;; class dtd ?swarner the_warner init_encoding = object (self) val mutable root = (None : string option) val mutable id = (None : dtd_id option) val mutable mng = (None : namespace_manager option) val warner = (the_warner : collect_warnings) val swarner = (swarner : symbolic_warnings option) val encoding = init_encoding val lfactory = Pxp_lexers.get_lexer_factory init_encoding val elements = (Str_hashtbl.create 100 : dtd_element Str_hashtbl.t) val gen_entities = (Str_hashtbl.create 100 : (entity * bool) Str_hashtbl.t) val par_entities = (Str_hashtbl.create 100 : entity Str_hashtbl.t) val notations = (Str_hashtbl.create 100 : dtd_notation Str_hashtbl.t) val pinstr = (Str_hashtbl.create 100 : proc_instruction Str_hashtbl.t) val mutable element_names = [] val mutable gen_entity_names = [] val mutable par_entity_names = [] val mutable notation_names = [] val mutable pinstr_names = [] val mutable allow_arbitrary = false val mutable standalone_declaration = false val mutable validated = false initializer let w = new drop_warnings in self # add_gen_entity (new internal_entity self "lt" None w "&#60;" false false encoding) false; self # add_gen_entity (new internal_entity self "gt" None w ">" false false encoding) false; self # add_gen_entity (new internal_entity self "amp" None w "&#38;" false false encoding) false; self # add_gen_entity (new internal_entity self "apos" None w "'" false false encoding) false; self # add_gen_entity (new internal_entity self "quot" None w """ false false encoding) false; method encoding = encoding method lexer_factory = lfactory method warner = warner method swarner = swarner method set_root r = if root = None then root <- Some r else assert false method set_id j = if id = None then id <- Some j else assert false method standalone_declaration = standalone_declaration method set_standalone_declaration b = standalone_declaration <- b method allow_arbitrary = allow_arbitrary <- true method disallow_arbitrary = allow_arbitrary <- false method arbitrary_allowed = allow_arbitrary method root = root method id = id method namespace_manager = match mng with None -> raise(Namespace_method_not_applicable "namespace_manager") | Some m -> m method set_namespace_manager m = mng <- Some m method add_element el = (* raises Not_found if 'el' has already been added *) (* Note: 'el' is encoded in the same way as 'self'! *) let name = el # name in check_name ?swarner warner name; if Str_hashtbl.mem elements name then raise Not_found; Str_hashtbl.add elements name el; element_names <- name :: element_names; validated <- false method add_gen_entity en extdecl = (* The following is commented out; perhaps there should be an option * to reactivate it on demand *) (* raises Validation_error if the predefines entities 'lt', 'gt', 'amp', * 'quot', and 'apos' are redeclared with an improper value. *) if en # encoding <> encoding then failwith "Pxp_dtd.dtd # add_gen_entity: Inconsistent encodings"; let name = en # name in check_name ?swarner warner name; if Str_hashtbl.mem gen_entities name then begin if List.mem name [ "lt"; "gt"; "amp"; "quot"; "apos" ] then begin (* These are allowed to be declared several times *) let (rt,_) = en # replacement_text in let toks = tokens_of_content_string lfactory rt in try begin match toks with [CRef 60] -> if name <> "lt" then raise Not_found | [CharData ">"] -> if name <> "gt" then raise Not_found | [CRef 62] -> if name <> "gt" then raise Not_found | [CRef 38] -> if name <> "amp" then raise Not_found | [CharData "'"] -> if name <> "apos" then raise Not_found | [CRef 39] -> if name <> "apos" then raise Not_found | [CharData "\""] -> if name <> "quot" then raise Not_found | [CRef 34] -> if name <> "quot" then raise Not_found | _ -> raise Not_found end with Not_found -> raise (Validation_error("Predefined entity `" ^ name ^ "' redeclared")) end else warn swarner warner (`W_entity_declared_twice name) end else begin Str_hashtbl.add gen_entities name (en, extdecl); gen_entity_names <- name :: gen_entity_names end method add_par_entity en = if en # encoding <> encoding then failwith "Pxp_dtd.dtd # add_par_entity: Inconsistent encodings"; let name = en # name in check_name ?swarner warner name; if not (Str_hashtbl.mem par_entities name) then begin Str_hashtbl.add par_entities name en; par_entity_names <- name :: par_entity_names end else warn swarner warner (`W_entity_declared_twice name) method add_notation no = (* raises Validation_error if 'no' already added *) if no # encoding <> encoding then failwith "Pxp_dtd.dtd # add_notation: Inconsistent encodings"; let name = no # name in check_name ?swarner warner name; if Str_hashtbl.mem notations name then raise (Validation_error("Notation `" ^ name ^ "' declared twice")); Str_hashtbl.add notations name no; notation_names <- name :: notation_names method add_pinstr pi = if pi # encoding <> encoding then failwith "Pxp_dtd.dtd # add_pinstr: Inconsistent encodings"; let name = pi # target in check_name ?swarner warner name; if String.length name >= 4 && String.sub name 0 4 = "pxp:" then begin match name with "pxp:dtd" -> let _, optname, atts = pi # parse_pxp_option in begin match optname with "optional-element-and-notation-declarations" -> self # allow_arbitrary | "optional-attribute-declarations" -> let el_string = try List.assoc "elements" atts with Not_found -> raise(Error("Missing `elements' attribute for pxp:dtd")) in let el = split_attribute_value lfactory el_string in List.iter (fun e_name -> let e = try Str_hashtbl.find elements e_name with Not_found -> raise(Error("Reference to unknown element `" ^ e_name ^ "'")) in e # allow_arbitrary ) el | "namespace" -> let prefix = try List.assoc "prefix" atts with Not_found -> raise(Error("Missing `prefix' attribute for pxp:dtd")) in let uri = try List.assoc "uri" atts with Not_found -> raise(Error("Missing `uri' attribute for pxp:dtd")) in ( match mng with None -> raise(Error("Cannot do pxp:dtd instruction: namespaces not enabled")) | Some m -> ( try m # add_uri prefix uri with Namespace_prefix_not_managed _ -> m # add_namespace prefix uri ) ) | _ -> raise(Error("Unknown PXP option `" ^ optname ^ "'")) end | _ -> raise(Error("The processing instruction target `" ^ name ^ "' is not defined by this PXP version")) end; Str_hashtbl.add pinstr name pi; if not (List.mem name pinstr_names) then pinstr_names <- pinstr_names @ [name]; method element name = (* returns the element 'name' or raises Validation_error if not found *) try Str_hashtbl.find elements name with Not_found -> if allow_arbitrary then raise Undeclared else raise(Validation_error("Reference to undeclared element `" ^ name ^ "'")) method element_names = (* returns the list of all names of element declarations *) element_names method gen_entity name = (* returns the entity 'name' or raises WF_error if not found *) try Str_hashtbl.find gen_entities name with Not_found -> raise(WF_error("Reference to undeclared general entity `" ^ name ^ "'")) method gen_entity_names = gen_entity_names method par_entity name = (* returns the entity 'name' or raises WF_error if not found *) try Str_hashtbl.find par_entities name with Not_found -> raise(WF_error("Reference to undeclared parameter entity `" ^ name ^ "'")) method par_entity_names = par_entity_names method notation name = (* returns the notation 'name' or raises Validation_error if not found *) try Str_hashtbl.find notations name with Not_found -> if allow_arbitrary then raise Undeclared else raise(Validation_error("Reference to undeclared notation `" ^ name ^ "'")) method notation_names = notation_names method pinstr name = (* returns the list of all processing instructions contained in the DTD * with target 'name' *) Str_hashtbl.find_all pinstr name method pinstr_names = pinstr_names method write_ref ?root:proot os enc = let write_sysid s = write_markup_string ~from_enc:`Enc_utf8 ~to_enc:enc os ( if String.contains s '"' then "'" ^ s ^ "'" else "\"" ^ s ^ "\"" ) in let wms = write_markup_string ~from_enc:encoding ~to_enc:enc os in wms "<!DOCTYPE "; ( match proot with | None -> ( match root with None -> failwith "#write: DTD without root"; | Some r -> wms r ) | Some r -> wms r ); begin match id with None -> failwith "#write_ref: DTD does not have an ID" | Some (External (Public (p,s))) -> wms " PUBLIC "; write_sysid p; wms " "; write_sysid s | Some (External (System s)) -> wms " SYSTEM "; write_sysid s | Some (External _) -> failwith "#write_ref: External ID cannot be represented" | Some Internal -> failwith "#write_ref: Cannot write internal ID" | Some (Derived _) -> failwith "#write_ref: Cannot write derived ID" end; wms ">\n"; method write ?root:proot os enc doctype = let wms = write_markup_string ~from_enc:encoding ~to_enc:enc os in let write_sysid s = write_markup_string ~from_enc:`Enc_utf8 ~to_enc:enc os ( if String.contains s '"' then "'" ^ s ^ "'" else "\"" ^ s ^ "\"" ) in if doctype then begin wms "<!DOCTYPE "; ( match proot with | None -> ( match root with None -> failwith "#write: DTD without root"; | Some r -> wms r ); | Some r -> wms r ); wms " [\n"; end; (* Notations: *) List.iter (fun name -> let notation = try Str_hashtbl.find notations name with Not_found -> assert false in notation # write os enc) (List.sort compare notation_names); (* Unparsed entities: *) List.iter (fun name -> let ent,_ = try Str_hashtbl.find gen_entities name with Not_found -> assert false in if ent # is_ndata then begin let xid = ent # ext_id in let notation = ent # notation in wms ("<!ENTITY " ^ name ^ " " ); ( match xid with System s -> wms "SYSTEM "; write_sysid s; | Public (p,s) -> wms "PUBLIC "; write_sysid p; if (s <> "") then begin wms " "; write_sysid s; end; | Anonymous -> failwith "#write: External ID `Anonymous' cannot be represented" | Private _ -> failwith "#write: External ID `Private' cannot be represented" ); wms (" NDATA " ^ notation ^ ">\n"); end ) (List.sort compare gen_entity_names); (* Elements: *) List.iter (fun name -> let element = try Str_hashtbl.find elements name with Not_found -> assert false in element # write os enc) (List.sort compare element_names); (* Processing instructions: *) List.iter (fun name -> List.iter (fun pi -> pi # write os enc) (Str_hashtbl.find_all pinstr name) ) (List.sort compare pinstr_names); if doctype then wms "]>\n"; (************************************************************) (* VALIDATION *) (************************************************************) method only_deterministic_models = Str_hashtbl.iter (fun n el -> let cm = el # content_model in match cm with Regexp _ -> if el # content_dfa = None then raise(Validation_error("The content model of element `" ^ n ^ "' is not deterministic")) | _ -> () ) elements; method validate = if validated || allow_arbitrary then () else begin (* Validity constraint: Notations in NDATA entity declarations must * be declared *) List.iter (fun name -> let ent,_ = try Str_hashtbl.find gen_entities name with Not_found -> assert false in if ent # is_ndata then begin let _xid = ent # ext_id in let notation = ent # notation in try ignore(self # notation notation) (* Raises Validation_error if the constraint is violated *) with Undeclared -> () end ) gen_entity_names; (* Validate the elements: *) Str_hashtbl.iter (fun n el -> el # validate) elements; (* Check the root element: This check is commented out because: * - it is performed anyway when the root element is validated * - it does not work in namespace mode (missing ns normalization) *) (* -- begin match root with None -> () | Some r -> begin try let _ = Str_hashtbl.find elements r in () with Not_found -> raise(Validation_error("The root element is not declared")) end end; --*) validated <- true; end method invalidate = validated <- false (************************************************************) end (**********************************************************************) and dtd_element the_dtd the_name = object (self) val dtd = (the_dtd : dtd) val name = the_name val lfactory = the_dtd # lexer_factory val mutable content_model = Unspecified val mutable content_model_validated = false val mutable content_dfa = lazy None val mutable externally_declared = false val mutable attributes = ([] : (string * ((att_type * att_default) * bool)) list) val mutable attributes_validated = false val mutable id_att_name = None val mutable idref_att_names = [] val mutable allow_arbitrary = false val mutable vr = (None : validation_record option) method name = name method set_cm_and_extdecl m extdecl = if content_model = Unspecified then begin content_model <- m; content_model_validated <- false; content_dfa <- lazy (self # compute_content_dfa); externally_declared <- extdecl; self # update_vr; dtd # invalidate end else raise(Validation_error("Element `" ^ name ^ "' has already a content model")) method content_model = content_model method content_dfa = Lazy.force content_dfa method private compute_content_dfa = match content_model with Regexp re -> ( try Some (dfa_of_regexp_content_model re) with Not_found -> None ) | _ -> None method externally_declared = externally_declared method encoding = dtd # encoding method allow_arbitrary = allow_arbitrary <- true; self # update_vr; method disallow_arbitrary = allow_arbitrary <- false; self # update_vr; method arbitrary_allowed = allow_arbitrary method add_attribute aname t d extdecl = let swarner = dtd#swarner and warner = dtd#warner in if aname <> "xml:lang" && aname <> "xml:space" then check_name ?swarner warner aname; if List.mem_assoc aname attributes then warn swarner warner (`W_multiple_attribute_declarations(name,aname)) else begin begin match aname with "xml:space" -> begin match t with A_enum l -> let ok = List.for_all (fun tok -> List.mem tok ["default";"preserve"]) l in if not ok then raise(Validation_error("Declaration of attribute `xml:space' does not conform to XML specification")) | _ -> raise(Validation_error("Declaration of attribute `xml:space' does not conform to XML specification")) end | _ -> () end; begin match t with A_id -> id_att_name <- Some aname; | (A_idref | A_idrefs) -> idref_att_names <- aname :: idref_att_names; | _ -> () end; attributes <- (aname, ((t,d),extdecl)) :: attributes; attributes_validated <- false; dtd # invalidate; self # update_vr; end method attribute attname = try fst (List.assoc attname attributes) with Not_found -> if allow_arbitrary then raise Undeclared else raise(Validation_error("Attribute `" ^ attname ^ "' of element `" ^ name ^ "' not declared")) method attribute_violates_standalone_declaration attname v = try let (atype, adefault), extdecl = List.assoc attname attributes in extdecl && ( match v with None -> adefault <> D_required && adefault <> D_implied (* i.e. adefault matches D_default or D_fixed *) | Some s -> atype <> A_cdata && normalization_changes_value lfactory atype s ) with Not_found -> if allow_arbitrary then raise Undeclared else raise(Validation_error("Attribute `" ^ attname ^ "' of element `" ^ name ^ "' not declared")) method attribute_names = List.map fst attributes method names_of_required_attributes = List.flatten (List.map (fun (n,((t,d),_)) -> if d = D_required then [n] else []) attributes) method id_attribute_name = id_att_name method idref_attribute_names = idref_att_names method private update_vr = vr <- None method internal_vr = ( match vr with None -> let n = List.length attributes in let init_att_vals = Array.make n ("", Implied_value) in let att_lookup = Str_hashtbl.create n in let att_info = Array.make n (A_cdata, false) in let att_required = ref [] in let k = ref 0 in List.iter (fun (n, ((t,d), ext)) -> Str_hashtbl.add att_lookup n !k; let init_val = match d with (D_required | D_implied) -> Implied_value | D_default v -> value_of_attribute lfactory dtd n t v | D_fixed v -> value_of_attribute lfactory dtd n t v in init_att_vals.( !k ) <- (n, init_val); att_info.( !k ) <- (t, match d with D_fixed _ -> true | _ -> false); if d = D_required then att_required := !k :: !att_required; incr k; ) attributes; vr <- Some { content_model = content_model; content_dfa = content_dfa; id_att_name = id_att_name; idref_att_names = idref_att_names; init_att_vals = init_att_vals; att_lookup = att_lookup; att_info = att_info; att_required = !att_required; accept_undeclared_atts = allow_arbitrary; } | _ -> () ); ( match vr with None -> assert false | Some vr' -> vr' ) method write os enc = let encoding = self # encoding in let wms = write_markup_string ~from_enc:encoding ~to_enc:enc os in let rec write_contentspec cs = match cs with Unspecified -> failwith "#write: Unspecified content model found" | Empty -> wms "EMPTY" | Any -> wms "ANY" | Mixed ml -> wms "("; write_mixedspec_list ml; wms ")*"; | Regexp re -> write_children re false and write_mixedspec_list ml = match ml with MPCDATA :: ml' -> wms "#PCDATA"; if ml' <> [] then wms "|"; write_mixedspec_list ml'; | MChild s :: ml' -> wms s; if ml' <> [] then wms "|"; write_mixedspec_list ml'; | [] -> () and write_children re cp = match re with Optional re' -> let p = needs_parens re' in if p then wms "("; write_children re' cp; if p then wms ")"; wms "?"; | Repeated re' -> let p = needs_parens re' in if p then wms "("; write_children re' cp; if p then wms ")"; wms "*"; | Repeated1 re' -> let p = needs_parens re' in if p then wms "("; write_children re' cp; if p then wms ")"; wms "+"; | Alt re' -> wms "("; ( match re' with re1' :: rer' -> write_children re1' true; List.iter (fun ren' -> wms "|"; write_children ren' true; ) rer'; | [] -> failwith "#write: Illegal content model" ); wms ")"; | Seq re' -> wms "("; ( match re' with re1' :: rer' -> write_children re1' true; List.iter (fun ren' -> wms ","; write_children ren' true; ) rer'; | [] -> failwith "#write: Illegal content model" ); wms ")"; | Child ch -> if not cp then wms "("; wms ch; if not cp then wms ")"; and needs_parens re = match re with (Optional _ | Repeated _ | Repeated1 _ ) -> true | _ -> false in wms ("<!ELEMENT " ^ name ^ " "); write_contentspec content_model; wms ">\n"; wms ("<!ATTLIST " ^ name); List.iter (fun (n,((t,d),_)) -> wms ("\n " ^ n); ( match t with A_cdata -> wms " CDATA"; | A_id -> wms " ID"; | A_idref -> wms " IDREF"; | A_idrefs -> wms " IDREFS"; | A_entity -> wms " ENTITY"; | A_entities -> wms " ENTITIES"; | A_nmtoken -> wms " NMTOKEN"; | A_nmtokens -> wms " NMTOKENS"; | A_notation nl -> wms " NOTATION ("; ( match nl with nl1:: nl' -> wms nl1; List.iter (fun n -> wms ("|" ^ n); ) nl' | [] -> failwith "#write: Illegal content model"; ); wms ")"; | A_enum el -> wms " ("; ( match el with el1:: el' -> wms el1; List.iter (fun e -> wms ("|" ^ e); ) el' | [] -> failwith "#write: Illegal content model"; ); wms ")"; ); ( match d with D_required -> wms " #REQUIRED" | D_implied -> wms " #IMPLIED" | D_default s -> wms " \""; write_data_string ~from_enc:encoding ~to_enc:enc os s; wms "\""; | D_fixed s -> wms " FIXED \""; write_data_string ~from_enc:encoding ~to_enc:enc os s; wms "\""; ); ) (List.sort (fun (n1,x1) (n2,x2) -> compare n1 n2) attributes); wms ">\n"; (************************************************************) (* VALIDATION *) (************************************************************) method validate = self # validate_attributes(); self # validate_content_model() method private validate_attributes() = if attributes_validated then () else begin (* Validity Constraint: One ID per Element Type *) let n = count (fun (n,((t,d),_)) -> t = A_id) attributes in if n > 1 then raise(Validation_error("More than one ID attribute for element `" ^ name ^ "'")); (* Validity Constraint: ID Attribute Default *) if List.exists (fun (n,((t,d),_)) -> t = A_id && (d <> D_required && d <> D_implied)) attributes then raise(Validation_error("ID attribute must be #IMPLIED or #REQUIRED; element `" ^ name ^ "'")); (* Validity Constraint: One Notation per Element Type *) let n = count (fun (n,((t,d),_)) -> match t with A_notation _ -> true | _ -> false) attributes in if n > 1 then raise(Validation_error("More than one NOTATION attribute for element `" ^ name ^ "'")); (* Validity Constraint: Notation Attributes [second part] *) List.iter (fun (n,((t,d),_)) -> match t with A_notation l -> List.iter (fun nname -> let _ = dtd # notation nname in ()) l | _ -> ()) attributes; (* Validity Constraint: Attribute Default Legal *) List.iter (fun (n,((t,d),_)) -> let check v = let lexical_error() = lazy (raise(Validation_error("Default value for attribute `" ^ n ^ "' is lexically malformed"))) in check_attribute_value_lexically lfactory (lexical_error()) t v; begin match t with (A_entity|A_entities) -> List.iter (fun nd -> let en, extdecl = dtd # gen_entity nd in if not (en # is_ndata) then raise(Validation_error("Attribute default value must be the name of an NDATA entity; attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'")); (* if dtd # standalone_declaration && extdecl then raise(Validation_error("Attribute default value violates the standalone declaration; attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'")); -- This is checked anyway when the attribute value is normalized *) ) (split_attribute_value lfactory v) | A_notation nl -> if not (List.mem v nl) then raise(Validation_error("Illegal default value for attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'")); | A_enum nl -> if not (List.mem v nl) then raise(Validation_error("Illegal default value for attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'")); | _ -> () end in match d with D_required -> () | D_implied -> () | D_default v -> check v | D_fixed v -> check v ) attributes; (* Ok: This element declaration is valid *) attributes_validated <- true; end method private validate_content_model () = (* checks: * - Validity Constraint: No Duplicate Types * It is not an error if there is a child in the declaration for which * no element declaration is provided. *) match content_model with Unspecified -> warn (dtd#swarner) (dtd#warner) (`W_element_mentioned_but_not_declared name); () | Empty -> () | Any -> () | Mixed (pcdata :: l) -> (* MPCDATA is always the first element by construction *) assert (pcdata = MPCDATA); if check_dups l then raise (Validation_error("Double children in declaration for element `" ^ name ^ "'")) | Regexp _ -> () | _ -> assert false (************************************************************) end and dtd_notation the_name the_xid init_encoding = object (self) val name = the_name val xid = (the_xid : ext_id) val encoding = (init_encoding : Pxp_core_types.I.rep_encoding) method name = name method ext_id = xid method encoding = encoding method write os enc = let wms = write_markup_string ~from_enc:encoding ~to_enc:enc os in let write_sysid s = if String.contains s '"' then wms ("'" ^ s ^ "'") else wms ("\"" ^ s ^ "\""); in wms ("<!NOTATION " ^ name ^ " "); ( match xid with System s -> wms "SYSTEM "; write_sysid s; | Public (p,s) -> wms "PUBLIC "; write_sysid p; if (s <> "") then begin wms " "; write_sysid s; end; | Anonymous -> failwith "#write: External ID `Anonymous' cannot be represented" | Private _ -> failwith "#write: External ID `Private' cannot be represented" ); wms ">\n"; end and proc_instruction the_target the_value init_encoding = object (self) val target = the_target val value = (the_value : string) val encoding = (init_encoding : Pxp_core_types.I.rep_encoding) initializer match target with ("xml"|"xmL"|"xMl"|"xML"|"Xml"|"XmL"|"XMl"|"XML") -> (* This is an error, not a warning, because I do not have a * "warner" object by hand. *) raise(WF_error("Reserved processing instruction")) | _ -> () method target = target method value = value method encoding = encoding method write os enc = let wms = write_markup_string ~from_enc:encoding ~to_enc:enc os in wms "<?"; wms target; wms " "; wms value; wms "?>"; method parse_pxp_option = let lfactory = get_lexer_factory encoding in try let toks = tokens_of_xml_pi lfactory value in (* may raise WF_error *) begin match toks with (Pro_name option_name) :: toks' -> let atts = decode_xml_pi toks' in (* may raise WF_error *) (target, option_name, atts) | _ -> raise(Error("Bad PXP processing instruction")) end with WF_error _ -> raise(Error("Bad PXP processing instruction")) end ;; let create_dtd ?swarner ?(warner = new drop_warnings) enc = new dtd ?swarner warner enc ;; type source = Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver) | ExtID of (ext_id * Pxp_reader.resolver) | XExtID of (ext_id * string option * Pxp_reader.resolver) ;; module Entity = struct let get_name ent = ent # name let get_full_name ent = ent # full_name let get_encoding ent = ent # encoding let get_type ent = if ent # is_ndata then `NDATA else try ignore(ent # ext_id); `External with Not_found -> `Internal let replacement_text ent = fst(ent # replacement_text) let get_xid ent = try Some(ent # ext_id) with Not_found -> None let get_resolver_id ent = try Some(ent # resolver_id) with Not_found -> None let get_notation ent = if ent # is_ndata then Some (ent # notation) else None let create_internal_entity ~name ~value dtd = new internal_entity dtd name (dtd # swarner) (dtd # warner) value false false (dtd # encoding) let create_ndata_entity ~name ~xid ~notation dtd = new ndata_entity name xid notation dtd#encoding let create_external_entity ?(doc_entity = false) ?system_base ~name ~xid ~resolver dtd = if doc_entity then new document_entity resolver dtd name dtd#swarner dtd#warner xid system_base dtd#encoding else new external_entity resolver dtd name dtd#swarner dtd#warner xid system_base false dtd#encoding let from_external_source ?doc_entity ~name dtd src = match src with ExtID(xid,resolver) -> create_external_entity ?doc_entity ~name ~xid ~resolver dtd | XExtID(xid,system_base,resolver) -> create_external_entity ?doc_entity ?system_base ~name ~xid ~resolver dtd | Entity(make,resolver) -> make dtd (* resolver ignored *) let entity_id ent = (ent :> entity_id) class fake = object method pxp_magic_coercion() : unit = raise Not_found end let create_entity_id () = new fake let lookup eid = try let () = eid#pxp_magic_coercion() in assert false with | Not_found -> invalid_arg "Pxp_dtd.Event.lookup" | Pxp_entity.Coerced_entity e -> e end