Plasma GitLab Archive
Projects Blog Knowledge

(* $Id$
 * ----------------------------------------------------------------------
 * PXP: The polymorphic XML parser for Objective Caml.
 * Copyright by Gerd Stolpmann. See LICENSE for details.
 *)

open Pxp_core_types.I
open Pxp_lexer_types
open Pxp_lexers
open Pxp_entity
open Pxp_aux
open Pxp_dfa

(**********************************************************************)

type validation_record =
    { content_model   : content_model_type;
      content_dfa     : dfa_definition option Lazy.t;
      id_att_name     : string option;
      idref_att_names : string list;
      att_lookup      : int Str_hashtbl.t;
      init_att_vals   : (string * att_value) array;
      att_info        : (att_type * bool) array;
      att_required    : int list;
      accept_undeclared_atts : bool;
    }
;;


(* class type? *)

class namespace_manager =
object (self)
    val uri_of_prefix = Hashtbl.create 10  (* not unique *)
    val prefix_of_uri = Hashtbl.create 10  (* unique *)
    val primary_uri_of_prefix = Hashtbl.create 10  (* unique *)

    initializer
      ignore(self # add_namespace "xml" "http://www.w3.org/XML/1998/namespace")

    method add_uri (np:string) (uri:string) =
      if not (Hashtbl.mem uri_of_prefix np) then 
	raise(Namespace_prefix_not_managed np);
      try
	let np' = Hashtbl.find prefix_of_uri uri in
	if np <> np' then
	  raise(Namespace_error "add_uri: the URI is already managed")
      with
	  Not_found ->
	    Hashtbl.add uri_of_prefix np uri;
	    Hashtbl.add prefix_of_uri uri np;
	    ()

    method add_namespace np uri =
      let l = Hashtbl.find_all uri_of_prefix np in
      if l = [] then begin
	if Hashtbl.mem prefix_of_uri uri then
	  raise(Namespace_error "add_namespace: the URI is already managed");
	Hashtbl.add uri_of_prefix np uri;
	Hashtbl.add primary_uri_of_prefix np uri;
	Hashtbl.add prefix_of_uri uri np;
      end
      else 
	if l <> [ uri ] then
	  raise(Namespace_error "add_namespace: the namespace does already exist")

    method lookup_or_add_namespace prefix (uri:string) =
      let rec add_loop n =
	let p = prefix ^ (if n=0 then "" else string_of_int n) in
	if Hashtbl.mem uri_of_prefix p then begin
	  add_loop (n+1)
	end
	else begin
	  Hashtbl.add uri_of_prefix p uri;
	  Hashtbl.add primary_uri_of_prefix p uri;
	  Hashtbl.add prefix_of_uri uri p;
	  p
	end
      in
      try
	Hashtbl.find prefix_of_uri uri
      with
	  Not_found ->
	    add_loop (if prefix = "" then 1 else 0)
	      (* prefix = "": make sure that such a prefix is never added *)

    method get_primary_uri normprefix =
      try
	Hashtbl.find primary_uri_of_prefix normprefix
      with
	  Not_found -> 
	    raise(Namespace_prefix_not_managed normprefix)

    method get_uri_list normprefix =
      Hashtbl.find_all uri_of_prefix normprefix

    method get_normprefix uri =
      try
	Hashtbl.find prefix_of_uri uri
      with
	  Not_found ->
	    raise(Namespace_not_managed uri)

    method iter_namespaces f =
      Hashtbl.iter 
	(fun p uri -> f p)
	primary_uri_of_prefix

    method as_declaration =
      let l = ref [] in
      Hashtbl.iter 
	(fun p uri -> l := (p, uri) :: !l)
	primary_uri_of_prefix;
      !l
      
  end
;;


let create_namespace_manager () = new namespace_manager;;


class type namespace_scope =
object
  method namespace_manager : namespace_manager
  method parent_scope : namespace_scope option
  method declaration : (string * string) list
  method effective_declaration : (string * string) list
  method display_prefix_of_uri : string -> string
  method display_prefix_of_normprefix : string -> string
  method uri_of_display_prefix : string -> string
  method normprefix_of_display_prefix : string -> string
end
;;



module StrSet = Set.Make(String);;

class namespace_scope_impl mng parent_opt decl : namespace_scope =
object(self)
  method namespace_manager = mng
  method parent_scope = parent_opt
  method declaration = decl

  method effective_declaration =
    let rec collect visible d s =
      match d with
	| ("", "") :: d' ->
	    if StrSet.mem "" visible then
	      collect visible d' s  (* no effect *)
	    else
	      collect (StrSet.add "" visible) d' s  (* hide inner default *)
	| (dp, uri) :: d' ->
	    if StrSet.mem dp visible then
	      collect visible d' s
	    else
	      (dp, uri) :: collect (StrSet.add dp visible) d' s
	| [] ->
	    ( match s # parent_scope with
		  Some s' ->
		    collect visible s'#declaration s'
		| None ->
		    []
	    )
    in
    collect StrSet.empty self#declaration (self : #namespace_scope :> namespace_scope)

  method display_prefix_of_uri uri =
    try
      fst(List.find (fun (p,u) -> u = uri) decl)
    with
	Not_found ->
	  ( match parent_opt with
		Some pa -> pa # display_prefix_of_uri uri
	      | None    -> raise(Namespace_not_in_scope uri)
	  )

  method display_prefix_of_normprefix np =
    let uris = mng # get_uri_list np in
    if uris = [] then raise(Namespace_prefix_not_managed np);
    try
      fst(List.find (fun (p,u) -> List.mem u uris) decl)
    with
	Not_found ->
	  ( match parent_opt with
		Some pa -> pa # display_prefix_of_normprefix np
	      | None    -> raise(Namespace_not_in_scope
				   (List.hd(List.rev uris)))
	  )

  method uri_of_display_prefix dp =
    try
      List.assoc dp decl
    with
	Not_found -> 
	  ( match parent_opt with
		Some pa -> pa # uri_of_display_prefix dp
	      | None    -> raise Not_found
	  )

  method normprefix_of_display_prefix dp =
    let uri = self # uri_of_display_prefix dp in
    mng # get_normprefix uri

end
;;


let create_namespace_scope ?parent ?(decl = []) mng =
  new namespace_scope_impl mng parent decl ;;


class dtd  ?swarner the_warner init_encoding =
  object (self)
    val mutable root = (None : string option)
    val mutable id =   (None : dtd_id option)
    val mutable mng =  (None : namespace_manager option)

    val warner       = (the_warner : collect_warnings)
    val swarner      = (swarner : symbolic_warnings option)
    val encoding     = init_encoding
    val lfactory     = Pxp_lexers.get_lexer_factory init_encoding

    val elements     = (Str_hashtbl.create 100 : dtd_element Str_hashtbl.t)
    val gen_entities = (Str_hashtbl.create 100 : (entity * bool) Str_hashtbl.t)
    val par_entities = (Str_hashtbl.create 100 : entity Str_hashtbl.t)
    val notations    = (Str_hashtbl.create 100 : dtd_notation Str_hashtbl.t)
    val pinstr       = (Str_hashtbl.create 100 : proc_instruction Str_hashtbl.t)
    val mutable element_names = []
    val mutable gen_entity_names = []
    val mutable par_entity_names = []
    val mutable notation_names = []
    val mutable pinstr_names = []

    val mutable allow_arbitrary = false
    val mutable standalone_declaration = false

    val mutable validated = false

    initializer
    let w = new drop_warnings in
    self # add_gen_entity 
      (new internal_entity self "lt"   None w "&#38;#60;" false false encoding)
      false;
    self # add_gen_entity 
      (new internal_entity self "gt"   None w "&#62;"     false false encoding)
      false;
    self # add_gen_entity 
      (new internal_entity self "amp"  None w "&#38;#38;" false false encoding)
      false;
    self # add_gen_entity 
      (new internal_entity self "apos" None w "&#39;"     false false encoding)
      false;
    self # add_gen_entity 
      (new internal_entity self "quot" None w "&#34;"     false false encoding)
      false;


    method encoding = encoding

    method lexer_factory = lfactory

    method warner = warner

    method swarner = swarner

    method set_root r =
      if root = None then
	root <- Some r
      else
	assert false


    method set_id j =
      if id = None then
	id <- Some j
      else
	assert false


    method standalone_declaration = standalone_declaration

    method set_standalone_declaration b =
      standalone_declaration <- b

    method allow_arbitrary =
      allow_arbitrary <- true

    method disallow_arbitrary =
      allow_arbitrary <- false

    method arbitrary_allowed = allow_arbitrary

    method root = root
    method id = id

    method namespace_manager =
      match mng with
	  None -> raise(Namespace_method_not_applicable "namespace_manager")
	| Some m -> m

    method set_namespace_manager m =
      mng <- Some m

    method add_element el =
      (* raises Not_found if 'el' has already been added *)
      (* Note: 'el' is encoded in the same way as 'self'! *)
      let name = el # name in
      check_name ?swarner warner name;
      if Str_hashtbl.mem elements name then
	raise Not_found;
      Str_hashtbl.add elements name el;
      element_names <- name :: element_names;
      validated <- false


    method add_gen_entity en extdecl =
      (* The following is commented out; perhaps there should be an option
       * to reactivate it on demand
       *)
      (* raises Validation_error if the predefines entities 'lt', 'gt', 'amp',
       * 'quot', and 'apos' are redeclared with an improper value.
       *)
      if en # encoding <> encoding then
	failwith "Pxp_dtd.dtd # add_gen_entity: Inconsistent encodings";
      let name = en # name in
      check_name ?swarner warner name;
      if Str_hashtbl.mem gen_entities name then begin
	if List.mem name [ "lt"; "gt"; "amp"; "quot"; "apos" ] then begin
	  (* These are allowed to be declared several times *)
	  let (rt,_) = en # replacement_text in
	  let toks = tokens_of_content_string lfactory rt in
	  try
	    begin match toks with
	      [CRef 60]       -> if name <> "lt"   then raise Not_found
	    | [CharData ">"]  -> if name <> "gt"   then raise Not_found
	    | [CRef 62]       -> if name <> "gt"   then raise Not_found
	    | [CRef 38]       -> if name <> "amp"  then raise Not_found
	    | [CharData "'"]  -> if name <> "apos" then raise Not_found
	    | [CRef 39]       -> if name <> "apos" then raise Not_found
	    | [CharData "\""] -> if name <> "quot" then raise Not_found
	    | [CRef 34]       -> if name <> "quot" then raise Not_found
	    | _               -> raise Not_found
	    end
	  with
	      Not_found ->
		raise (Validation_error("Predefined entity `" ^ name ^
					"' redeclared"))
	end
	else
	  warn swarner warner (`W_entity_declared_twice name)
      end
      else begin
	Str_hashtbl.add gen_entities name (en, extdecl);
	gen_entity_names <- name :: gen_entity_names
      end


    method add_par_entity en =
      if en # encoding <> encoding then
	failwith "Pxp_dtd.dtd # add_par_entity: Inconsistent encodings";
      let name = en # name in
      check_name ?swarner warner name;
      if not (Str_hashtbl.mem par_entities name) then begin
	Str_hashtbl.add par_entities name en;
	par_entity_names <- name :: par_entity_names
      end
      else
	warn swarner warner (`W_entity_declared_twice name)


    method add_notation no =
      (* raises Validation_error if 'no' already added *)
      if no # encoding <> encoding then
	failwith "Pxp_dtd.dtd # add_notation: Inconsistent encodings";
      let name = no # name in
      check_name ?swarner warner name;
      if Str_hashtbl.mem notations name then
	raise (Validation_error("Notation `" ^ name ^ "' declared twice"));
      Str_hashtbl.add notations name no;
      notation_names <- name :: notation_names


    method add_pinstr pi =
      if pi # encoding <> encoding then
	failwith "Pxp_dtd.dtd # add_pinstr: Inconsistent encodings";
      let name = pi # target in
      check_name ?swarner warner name;

      if String.length name >= 4 && String.sub name 0 4 = "pxp:" then begin
	match name with
	    "pxp:dtd" -> 
	      let _, optname, atts = pi # parse_pxp_option in
	      begin match optname with
		  "optional-element-and-notation-declarations" ->
		    self # allow_arbitrary
		| "optional-attribute-declarations" ->
		    let el_string = 
		      try List.assoc "elements" atts
		      with Not_found ->
			raise(Error("Missing `elements' attribute for pxp:dtd"))
		    in
		    let el = split_attribute_value lfactory el_string in
		    List.iter
		      (fun e_name ->
			 let e =
			   try Str_hashtbl.find elements e_name
			   with
			       Not_found ->
				 raise(Error("Reference to unknown element `" ^
					     e_name ^ "'"))
			 in
			 e # allow_arbitrary
		      )
		      el
		| "namespace" ->
		    let prefix = 
		      try List.assoc "prefix" atts
		      with Not_found ->
			raise(Error("Missing `prefix' attribute for pxp:dtd"))
		    in
		    let uri =
		      try List.assoc "uri" atts
		      with Not_found ->
			raise(Error("Missing `uri' attribute for pxp:dtd"))
		    in
		    ( match mng with
			  None ->
			    raise(Error("Cannot do pxp:dtd instruction: namespaces not enabled"))
			| Some m ->
			    ( try m # add_uri prefix uri
			      with Namespace_prefix_not_managed _ ->
				m # add_namespace prefix uri
			    )
		    )
		| _ ->
		    raise(Error("Unknown PXP option `" ^ 
				optname ^ "'"))
	      end
	  | _ ->
	      raise(Error("The processing instruction target `" ^ 
			  name ^ "' is not defined by this PXP version"))
      end;
      Str_hashtbl.add pinstr name pi;
      if not (List.mem name pinstr_names) then
	pinstr_names <- pinstr_names @ [name];


    method element name =
      (* returns the element 'name' or raises Validation_error if not found *)
      try
	Str_hashtbl.find elements name
      with
	  Not_found ->
	    if allow_arbitrary then
	      raise Undeclared
	    else
	      raise(Validation_error("Reference to undeclared element `" ^ name ^ "'"))

    method element_names =
      (* returns the list of all names of element declarations *)
      element_names


    method gen_entity name =
      (* returns the entity 'name' or raises WF_error if not found *)
      try
	Str_hashtbl.find gen_entities name
      with
	  Not_found ->
	    raise(WF_error("Reference to undeclared general entity `" ^ name ^ "'"))


    method gen_entity_names = gen_entity_names


    method par_entity name =
      (* returns the entity 'name' or raises WF_error if not found *)
      try
	Str_hashtbl.find par_entities name
      with
	  Not_found ->
	    raise(WF_error("Reference to undeclared parameter entity `" ^ name ^ "'"))


    method par_entity_names = par_entity_names


    method notation name =
      (* returns the notation 'name' or raises Validation_error if not found *)
      try
	Str_hashtbl.find notations name
      with
	  Not_found ->
	    if allow_arbitrary then
	      raise Undeclared
	    else
	      raise(Validation_error("Reference to undeclared notation `" ^ name ^ "'"))


    method notation_names = notation_names


    method pinstr name =
      (* returns the list of all processing instructions contained in the DTD
       * with target 'name'
       *)
      Str_hashtbl.find_all pinstr name


    method pinstr_names = pinstr_names

    method write_ref ?root:proot os enc =
      let write_sysid s =
	write_markup_string 
	  ~from_enc:`Enc_utf8 ~to_enc:enc os
	  ( if String.contains s '"' then
	      "'" ^ s ^ "'"
	    else
	      "\"" ^ s ^ "\""
	  )
      in
      let wms = 
	write_markup_string ~from_enc:encoding ~to_enc:enc os in

      wms "<!DOCTYPE ";
      ( match proot with
	  | None ->
	      ( match root with
		    None -> failwith "#write: DTD without root";
		  | Some r -> wms r
	      )
	  | Some r -> wms r
      );
      begin match id with
	  None ->
	    failwith "#write_ref: DTD does not have an ID"
	| Some (External (Public (p,s))) ->
	    wms " PUBLIC ";
	    write_sysid p;
	    wms " ";
	    write_sysid s
	| Some (External (System s)) ->
	    wms " SYSTEM ";
	    write_sysid s
	| Some (External _) ->
	    failwith "#write_ref: External ID cannot be represented"
	| Some Internal ->
	    failwith "#write_ref: Cannot write internal ID"
	| Some (Derived _) ->
	    failwith "#write_ref: Cannot write derived ID"
      end;
      wms ">\n";


    method write ?root:proot os enc doctype = 
      let wms = 
	write_markup_string ~from_enc:encoding ~to_enc:enc os in

      let write_sysid s =
	write_markup_string 
	  ~from_enc:`Enc_utf8 ~to_enc:enc os
	  ( if String.contains s '"' then
	      "'" ^ s ^ "'"
	    else
	      "\"" ^ s ^ "\""
	  )
      in

      if doctype then begin
	wms "<!DOCTYPE ";
	( match proot with
	    | None ->
		( match root with
		      None -> failwith "#write: DTD without root";
		    | Some r -> wms r
		);
	    | Some r -> wms r
	);
	wms " [\n";
      end;

      (* Notations: *)
      List.iter
	(fun name ->
	   let notation = 
	     try Str_hashtbl.find notations name with Not_found -> assert false in
	   notation # write os enc)
	(List.sort compare notation_names);

      (* Unparsed entities: *)
      List.iter
	(fun name ->
	   let ent,_ = 
	     try Str_hashtbl.find gen_entities name with Not_found -> assert false 
	   in
	   if ent # is_ndata then begin
	     let xid = ent # ext_id in
	     let notation = ent # notation in
	     wms ("<!ENTITY " ^ name ^ " " );
	     ( match xid with
		   System s ->
		     wms "SYSTEM ";
		     write_sysid s;
		 | Public (p,s) ->
		     wms "PUBLIC ";
		     write_sysid p;
		     if (s <> "") then begin
		       wms " ";
		       write_sysid s;
		     end;
		 | Anonymous ->
		     failwith "#write: External ID `Anonymous' cannot be represented"
		 | Private _ ->
		     failwith "#write: External ID `Private' cannot be represented"
	     );
	     wms (" NDATA " ^ notation ^ ">\n");
	   end
	)
	(List.sort compare gen_entity_names);

      (* Elements: *)
      List.iter
	(fun name ->
	   let element = 
	     try Str_hashtbl.find elements name with Not_found -> assert false in
	   element # write os enc)
	(List.sort compare element_names);

      (* Processing instructions: *)
      List.iter
	(fun name ->
	   List.iter
	     (fun pi ->
		pi # write os enc)
	     (Str_hashtbl.find_all pinstr name)
	)
	(List.sort compare pinstr_names);

      if doctype then 
	wms "]>\n";


    (************************************************************)
    (*                    VALIDATION                            *)
    (************************************************************)

    method only_deterministic_models =
      Str_hashtbl.iter
	(fun n el ->
	   let cm = el # content_model in
	   match cm with
	       Regexp _ ->
		 if el # content_dfa = None then
		   raise(Validation_error("The content model of element `" ^
					  n ^ "' is not deterministic"))
	     | _ ->
		 ()
	)
	elements;
      

    method validate =
      if validated || allow_arbitrary then
	()
      else begin
	(* Validity constraint: Notations in NDATA entity declarations must
	 * be declared
	 *)
	List.iter
	  (fun name ->
	     let ent,_ = 
	       try Str_hashtbl.find gen_entities name with Not_found -> assert false 
	     in
	     if ent # is_ndata then begin
	       let _xid = ent # ext_id in
	       let notation = ent # notation in
	       try
		 ignore(self # notation notation)
		   (* Raises Validation_error if the constraint is violated *)
	       with
		   Undeclared -> ()
	     end
	  )
	  gen_entity_names;

	(* Validate the elements: *)
	Str_hashtbl.iter
	  (fun n el ->
	     el # validate)
	  elements;

	(* Check the root element: This check is commented out because:
	 * - it is performed anyway when the root element is validated
	 * - it does not work in namespace mode (missing ns normalization)
	 *)
(* --
	begin match root with
	    None -> ()
	  | Some r ->
	      begin try
		let _ = Str_hashtbl.find elements r in ()
	      with
		  Not_found ->
		    raise(Validation_error("The root element is not declared"))
	      end
	end;
  --*)
	validated <- true;
      end

    method invalidate =
      validated <- false

    (************************************************************)

  end


(**********************************************************************)

and dtd_element the_dtd the_name =
  object (self)
    val dtd = (the_dtd : dtd)
    val name = the_name
    val lfactory = the_dtd # lexer_factory
    val mutable content_model = Unspecified
    val mutable content_model_validated = false
    val mutable content_dfa = lazy None

    val mutable externally_declared = false

    val mutable attributes = 
	    ([] : (string * ((att_type * att_default) * bool)) list)
    val mutable attributes_validated = false

    val mutable id_att_name = None
    val mutable idref_att_names = []

    val mutable allow_arbitrary = false

    val mutable vr = (None : validation_record option)

    method name = name

    method set_cm_and_extdecl m extdecl =
      if content_model = Unspecified then begin
	content_model <- m;
	content_model_validated <- false;
	content_dfa <- lazy (self # compute_content_dfa);
	externally_declared <- extdecl;
	self # update_vr;
	dtd # invalidate
      end
      else
	raise(Validation_error("Element `" ^ name ^ "' has already a content model"))

    method content_model = content_model

    method content_dfa = Lazy.force content_dfa
      
    method private compute_content_dfa =
      match content_model with
	  Regexp re ->
	    ( try Some (dfa_of_regexp_content_model re)
	      with Not_found -> None
	    )
	| _ ->
	    None

    method externally_declared = externally_declared

    method encoding = dtd # encoding

    method allow_arbitrary =
      allow_arbitrary <- true;
      self # update_vr;

    method disallow_arbitrary =
      allow_arbitrary <- false;
      self # update_vr;

    method arbitrary_allowed = allow_arbitrary

    method add_attribute aname t d extdecl =
      let swarner = dtd#swarner 
      and warner = dtd#warner in
      if aname <> "xml:lang" && aname <> "xml:space" then
	check_name ?swarner warner aname;
      if List.mem_assoc aname attributes then
	warn swarner warner (`W_multiple_attribute_declarations(name,aname))
      else begin
	begin match aname with
	    "xml:space" ->
	      begin match t with
		  A_enum l ->
		    let ok =
		      List.for_all
			(fun tok -> List.mem tok ["default";"preserve"])
			l
		    in
		    if not ok then
		      raise(Validation_error("Declaration of attribute `xml:space' does not conform to XML specification"))
		| _ ->
		    raise(Validation_error("Declaration of attribute `xml:space' does not conform to XML specification"))
	      end
	  | _ -> ()
	end; 
	begin match t with
	    A_id ->
	      id_att_name <- Some aname;
	  | (A_idref | A_idrefs) ->
	      idref_att_names <- aname :: idref_att_names;
	  | _ ->
	      ()
	end;
	attributes <- (aname, ((t,d),extdecl)) :: attributes;
	attributes_validated <- false;
	dtd # invalidate;
	self # update_vr;
      end

    method attribute attname =
      try
	fst (List.assoc attname attributes)
      with
	  Not_found ->
	    if allow_arbitrary then
	      raise Undeclared
	    else
	      raise(Validation_error("Attribute `" ^ attname ^ "' of element `"
				     ^ name ^ "' not declared"))

    method attribute_violates_standalone_declaration attname v =
      try
	let (atype, adefault), extdecl = List.assoc attname attributes in
	extdecl &&
	( match v with
	      None -> 
		adefault <> D_required && adefault <> D_implied
		(* i.e. adefault matches D_default or D_fixed *)
	    | Some s ->
		atype <> A_cdata &&
		normalization_changes_value lfactory atype s
	)
      with
	  Not_found ->
	    if allow_arbitrary then
	      raise Undeclared
	    else
	      raise(Validation_error("Attribute `" ^ attname ^ "' of element `"
				     ^ name ^ "' not declared"))


    method attribute_names =
      List.map fst attributes

    method names_of_required_attributes =
      List.flatten
	(List.map
	   (fun (n,((t,d),_)) ->
	      if d = D_required then
		[n]
	      else
		[])
	   attributes)

    method id_attribute_name = id_att_name

    method idref_attribute_names = idref_att_names


    method private update_vr =
      vr <- None

    method internal_vr =
      (	match vr with
	    None ->
	      let n = List.length attributes in
	      let init_att_vals = Array.make n ("", Implied_value) in
	      let att_lookup = Str_hashtbl.create n in
	      let att_info = Array.make n (A_cdata, false) in
	      let att_required = ref [] in
	      let k = ref 0 in
	      List.iter
		(fun (n, ((t,d), ext)) ->

		   Str_hashtbl.add att_lookup n !k;

		   let init_val = 
		     match d with
			 (D_required | D_implied) -> Implied_value
		       | D_default v ->
			   value_of_attribute lfactory dtd n t v
		       | D_fixed v ->
			   value_of_attribute lfactory dtd n t v
		   in

		   init_att_vals.( !k ) <- (n, init_val);
		   att_info.( !k ) <- (t, match d with D_fixed _ -> true 
				                     | _         -> false);
		   if d = D_required then
		     att_required := !k :: !att_required;
		   incr k;
		)
		attributes;
		
	      vr <- Some { content_model = content_model;
			   content_dfa =  content_dfa;
			   id_att_name = id_att_name;
			   idref_att_names = idref_att_names;
			   init_att_vals = init_att_vals;
			   att_lookup = att_lookup;
			   att_info = att_info;
			   att_required = !att_required;
			   accept_undeclared_atts = allow_arbitrary;
			 }
	  | _ -> ()
      );
      ( match vr with
	    None -> assert false
	  | Some vr' -> vr'
      )

    method write os enc = 
      let encoding = self # encoding in
      let wms = 
	write_markup_string ~from_enc:encoding ~to_enc:enc os in

      let rec write_contentspec cs =
	match cs with
	    Unspecified ->
	      failwith "#write: Unspecified content model found"
	  | Empty ->
	      wms "EMPTY"
	  | Any ->
	      wms "ANY"
	  | Mixed ml ->
	      wms "(";
	      write_mixedspec_list ml;
	      wms ")*";
	  | Regexp re ->
	      write_children re false

      and write_mixedspec_list ml =
	match ml with
	    MPCDATA :: ml' ->
	      wms "#PCDATA";
	      if ml' <> [] then wms "|";
	      write_mixedspec_list ml';
	  | MChild s :: ml' ->
	      wms s;
	      if ml' <> [] then wms "|";
	      write_mixedspec_list ml';
	  | [] ->
	      ()

      and write_children re cp =
	match re with
	    Optional re' ->
	      let p = needs_parens re' in
	      if p then wms "(";
	      write_children re' cp;
	      if p then wms ")";
	      wms "?";
	  | Repeated re' ->
	      let p = needs_parens re' in
	      if p then wms "(";
	      write_children re' cp;
	      if p then wms ")";
	      wms "*";
	  | Repeated1 re' ->
	      let p = needs_parens re' in
	      if p then wms "(";
	      write_children re' cp;
	      if p then wms ")";
	      wms "+";
	  | Alt re' ->
	      wms "(";
	      ( match re' with
		    re1' :: rer' ->
		      write_children re1' true;
		      List.iter
			(fun ren' ->
			   wms "|";
			   write_children ren' true;
			)
			rer';
		  | [] ->
		      failwith "#write: Illegal content model"
	      );
	      wms ")";
	  | Seq re' ->
	      wms "(";
	      ( match re' with
		    re1' :: rer' ->
		      write_children re1' true;
		      List.iter
			(fun ren' ->
			   wms ",";
			   write_children ren' true;
			)
			rer';
		  | [] ->
		      failwith "#write: Illegal content model"
	      );
	      wms ")";
	  | Child ch ->
	      if not cp then wms "(";
	      wms ch;
	      if not cp then wms ")";

      and needs_parens re =
	match re with
	    (Optional _ | Repeated _ | Repeated1 _ ) -> true
	  | _ -> false
      in

      wms ("<!ELEMENT " ^ name ^ " ");
      write_contentspec content_model;
      wms ">\n";

      wms ("<!ATTLIST " ^ name);
      List.iter
	(fun (n,((t,d),_)) ->
	   wms ("\n  " ^ n);
	   ( match t with
		 A_cdata       -> wms " CDATA";
	       | A_id          -> wms " ID";
	       | A_idref       -> wms " IDREF";
	       | A_idrefs      -> wms " IDREFS";
	       | A_entity      -> wms " ENTITY";
	       | A_entities    -> wms " ENTITIES";
	       | A_nmtoken     -> wms " NMTOKEN";
	       | A_nmtokens    -> wms " NMTOKENS";
	       | A_notation nl -> 
		   wms " NOTATION (";
		   ( match nl with
			 nl1:: nl' ->
			   wms nl1;
			   List.iter
			     (fun n ->
				wms ("|" ^ n);
			     )
			     nl'
		       | [] ->
			   failwith "#write: Illegal content model";
		   );
		   wms ")";
	       | A_enum el     ->
		   wms " (";
		   ( match el with
			 el1:: el' ->
			   wms el1;
			   List.iter
			     (fun e ->
				wms ("|" ^ e);
			     )
			     el'
		       | [] ->
			   failwith "#write: Illegal content model";
		   );
		   wms ")";
	   );
	   ( match d with
		 D_required -> wms " #REQUIRED"
	       | D_implied  -> wms " #IMPLIED"
	       | D_default s ->
		   wms " \"";
		   write_data_string ~from_enc:encoding ~to_enc:enc os s;
		   wms "\"";
	       | D_fixed s ->
		   wms " FIXED \"";
		   write_data_string ~from_enc:encoding ~to_enc:enc os s;
		   wms "\"";
	   );
	)
	(List.sort (fun (n1,x1) (n2,x2) -> compare n1 n2) attributes);

      wms ">\n";

    (************************************************************)
    (*                    VALIDATION                            *)
    (************************************************************)

    method validate =
      self # validate_attributes();
      self # validate_content_model()

    method private validate_attributes() =
      if attributes_validated then
	()
      else begin
	(* Validity Constraint: One ID per Element Type *)
	let n = count (fun (n,((t,d),_)) -> t = A_id) attributes in
	if n > 1 then
	  raise(Validation_error("More than one ID attribute for element `" ^ name ^ "'"));
	(* Validity Constraint: ID Attribute Default *)
	if List.exists
	     (fun (n,((t,d),_)) ->
		t = A_id && (d <> D_required && d <> D_implied))
	     attributes
	then
	  raise(Validation_error("ID attribute must be #IMPLIED or #REQUIRED; element `" ^ name ^ "'"));
	(* Validity Constraint: One Notation per Element Type *)
	let n = count (fun (n,((t,d),_)) ->
			 match t with A_notation _ -> true | _ -> false)
		      attributes in
	if n > 1 then
	  raise(Validation_error("More than one NOTATION attribute for element `" ^ name ^ "'"));
	(* Validity Constraint: Notation Attributes [second part] *)
	List.iter
	  (fun (n,((t,d),_)) ->
	     match t with
		 A_notation l ->
		   List.iter
		     (fun nname ->
			let _ = dtd # notation nname in ())
		     l
	       | _ -> ())
	  attributes;
	(* Validity Constraint: Attribute Default Legal *)
	List.iter
	  (fun (n,((t,d),_)) ->

	     let check v =
	       let lexical_error() =
		 lazy (raise(Validation_error("Default value for attribute `" ^ n ^ "' is lexically malformed"))) in
	       check_attribute_value_lexically lfactory (lexical_error()) t v;
	       begin match t with
		   (A_entity|A_entities) ->
		     List.iter
		       (fun nd ->
			  let en, extdecl = dtd # gen_entity nd in
			  if not (en # is_ndata) then
			    raise(Validation_error("Attribute default value must be the name of an NDATA entity; attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
(*			  if dtd # standalone_declaration && extdecl then
			    raise(Validation_error("Attribute default value violates the standalone declaration; attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'")); 
-- This is checked anyway when the attribute value is normalized
*)
		       )
		       (split_attribute_value lfactory v)
		 | A_notation nl ->
		     if not (List.mem v nl) then
		       raise(Validation_error("Illegal default value for attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
		 | A_enum nl ->
		     if not (List.mem v nl) then
		       raise(Validation_error("Illegal default value for attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
		 | _          -> ()
	       end
	     in

	     match d with
		 D_required -> ()
	       | D_implied -> ()
	       | D_default v -> check v
	       | D_fixed v   -> check v
	  )
	  attributes;

	(* Ok: This element declaration is valid *)
	attributes_validated <- true;

      end

    method private validate_content_model () =
      (* checks:
       * - Validity Constraint: No Duplicate Types
       * It is not an error if there is a child in the declaration for which
       * no element declaration is provided.
       *)
      match content_model with
	  Unspecified ->
	    warn (dtd#swarner) (dtd#warner)
 	         (`W_element_mentioned_but_not_declared name);
	    ()
	| Empty -> ()
	| Any -> ()
	| Mixed (pcdata :: l) ->
	    (* MPCDATA is always the first element by construction *)
	    assert (pcdata = MPCDATA);
	    if check_dups l then
	      raise (Validation_error("Double children in declaration for element `" ^ name ^ "'"))
	| Regexp _ -> ()
	| _ -> assert false



    (************************************************************)

  end

and dtd_notation the_name the_xid init_encoding =
object (self)
    val name = the_name
    val xid = (the_xid : ext_id)
    val encoding = (init_encoding : Pxp_core_types.I.rep_encoding)
    method name = name
    method ext_id = xid
    method encoding = encoding

    method write os enc = 
      let wms = 
	write_markup_string ~from_enc:encoding ~to_enc:enc os in

      let write_sysid s =
	if String.contains s '"' then
	  wms ("'" ^ s ^ "'")
	else
	  wms ("\"" ^ s ^ "\"");
      in

      wms ("<!NOTATION " ^ name ^ " ");
      ( match xid with
	    System s ->
	      wms "SYSTEM ";
	      write_sysid s;
	  | Public (p,s) ->
	      wms "PUBLIC ";
	      write_sysid p;
	      if (s <> "") then begin
		wms " ";
		write_sysid s;
	      end;
	  | Anonymous ->
	      failwith "#write: External ID `Anonymous' cannot be represented"
	  | Private _ ->
	      failwith "#write: External ID `Private' cannot be represented"
      );
      wms ">\n";

  end

and proc_instruction the_target the_value init_encoding =
object (self)
    val target = the_target
    val value = (the_value : string)
    val encoding = (init_encoding : Pxp_core_types.I.rep_encoding)

    initializer
      match target with
	  ("xml"|"xmL"|"xMl"|"xML"|"Xml"|"XmL"|"XMl"|"XML") ->
	    (* This is an error, not a warning, because I do not have a
	     * "warner" object by hand.
	     *)
	    raise(WF_error("Reserved processing instruction"))
	| _ -> ()

    method target = target
    method value = value
    method encoding = encoding

    method write os enc = 
      let wms = 
	write_markup_string ~from_enc:encoding ~to_enc:enc os in

      wms "<?";
      wms target;
      wms " ";
      wms value;
      wms "?>";

    method parse_pxp_option =
      let lfactory = get_lexer_factory encoding in
      try
	let toks = tokens_of_xml_pi lfactory value in (* may raise WF_error *)
	begin match toks with
	    (Pro_name option_name) :: toks' ->
	      let atts = decode_xml_pi toks' in       (* may raise WF_error *)
	      (target, option_name, atts)
	  | _ ->
	      raise(Error("Bad PXP processing instruction"))
	end
      with
	  WF_error _ ->
	    raise(Error("Bad PXP processing instruction"))

  end
;;

let create_dtd ?swarner ?(warner = new drop_warnings) enc =
  new dtd ?swarner warner enc ;;


type source =
    Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver)
  | ExtID of (ext_id * Pxp_reader.resolver)
  | XExtID of (ext_id * string option * Pxp_reader.resolver)
;;

module Entity = struct
  let get_name ent = ent # name
  let get_full_name ent = ent # full_name
  let get_encoding ent = ent # encoding
  let get_type ent =
    if ent # is_ndata then `NDATA else
      try ignore(ent # ext_id); `External with Not_found -> `Internal
  let replacement_text ent = fst(ent # replacement_text)
  let get_xid ent =
    try Some(ent # ext_id) with Not_found -> None
  let get_resolver_id ent =
    try Some(ent # resolver_id) with Not_found -> None
  let get_notation ent =
    if ent # is_ndata then Some (ent # notation) else None
  let create_internal_entity ~name ~value dtd =
    new internal_entity dtd name (dtd # swarner) (dtd # warner) value 
        false false (dtd # encoding)
  let create_ndata_entity ~name ~xid ~notation dtd =
    new ndata_entity name xid notation dtd#encoding
  let create_external_entity ?(doc_entity = false) ?system_base 
                             ~name ~xid ~resolver dtd =
    if doc_entity then
      new document_entity resolver dtd name dtd#swarner dtd#warner xid
	                  system_base dtd#encoding
    else
      new external_entity resolver dtd name dtd#swarner dtd#warner xid
	                  system_base false dtd#encoding
  let from_external_source ?doc_entity ~name dtd src =
    match src with
	ExtID(xid,resolver) -> 
	  create_external_entity ?doc_entity ~name ~xid ~resolver dtd
      | XExtID(xid,system_base,resolver) -> 
	  create_external_entity ?doc_entity ?system_base 
                                 ~name ~xid ~resolver dtd
      | Entity(make,resolver) ->
	  make dtd  (* resolver ignored *)

  let entity_id ent = (ent :> entity_id)

  class fake = object method pxp_magic_coercion() : unit = raise Not_found end

  let create_entity_id () = new fake

  let lookup eid =
    try 
      let () = eid#pxp_magic_coercion() in
      assert false
    with
      | Not_found -> invalid_arg "Pxp_dtd.Event.lookup"
      | Pxp_entity.Coerced_entity e -> e

end



This web site is published by Informatikbüro Gerd Stolpmann
Powered by Caml