(* $Id: pxp_dtd.ml,v 1.21 2002/03/10 23:39:28 gerd Exp $
* ----------------------------------------------------------------------
* PXP: The polymorphic XML parser for Objective Caml.
* Copyright by Gerd Stolpmann. See LICENSE for details.
*)
open Pxp_types
open Pxp_lexer_types
open Pxp_lexers
open Pxp_entity
open Pxp_aux
open Pxp_dfa
(**********************************************************************)
type validation_record =
{ content_model : content_model_type;
content_dfa : dfa_definition option Lazy.t;
id_att_name : string option;
idref_att_names : string list;
att_lookup : int Str_hashtbl.t;
init_att_vals : (string * att_value) array;
att_info : (att_type * bool) array;
att_required : int list;
accept_undeclared_atts : bool;
}
;;
class namespace_manager =
object (self)
val uri_of_prefix = Hashtbl.create 10 (* not unique *)
val prefix_of_uri = Hashtbl.create 10 (* unique *)
val primary_uri_of_prefix = Hashtbl.create 10 (* unique *)
initializer
ignore(self # add_namespace "xml" "http://www.w3.org/XML/1998/namespace")
method add_uri (np:string) (uri:string) =
if not (Hashtbl.mem uri_of_prefix np) then raise Not_found;
try
let np' = Hashtbl.find prefix_of_uri uri in
if np <> np' then
raise(Namespace_error "add_uri: the URI is already managed")
with
Not_found ->
Hashtbl.add uri_of_prefix np uri;
Hashtbl.add prefix_of_uri uri np;
()
method add_namespace np uri =
let l = Hashtbl.find_all uri_of_prefix np in
if l = [] then begin
if Hashtbl.mem prefix_of_uri uri then
raise(Namespace_error "add_namespace: the URI is already managed");
Hashtbl.add uri_of_prefix np uri;
Hashtbl.add primary_uri_of_prefix np uri;
Hashtbl.add prefix_of_uri uri np;
end
else
if l <> [ uri ] then
raise(Namespace_error "add_namespace: the namespace does already exist")
method lookup_or_add_namespace prefix (uri:string) =
let rec add_loop n =
let p = prefix ^ (if n=0 then "" else string_of_int n) in
if Hashtbl.mem uri_of_prefix p then begin
add_loop (n+1)
end
else begin
Hashtbl.add uri_of_prefix p uri;
Hashtbl.add primary_uri_of_prefix p uri;
Hashtbl.add prefix_of_uri uri p;
p
end
in
try
Hashtbl.find prefix_of_uri uri
with
Not_found ->
add_loop (if prefix = "" then 1 else 0)
(* prefix = "": make sure that such a prefix is never added *)
method get_primary_uri normprefix =
Hashtbl.find primary_uri_of_prefix normprefix
method get_uri_list normprefix =
Hashtbl.find_all uri_of_prefix normprefix
method get_normprefix uri =
Hashtbl.find prefix_of_uri uri
method iter_namespaces f =
Hashtbl.iter
(fun p uri -> f p)
primary_uri_of_prefix
end
;;
class dtd the_warner init_encoding =
object (self)
val mutable root = (None : string option)
val mutable id = (None : dtd_id option)
val mutable mng = (None : namespace_manager option)
val warner = (the_warner : collect_warnings)
val encoding = init_encoding
val lexerset = Pxp_lexers.get_lexer_set init_encoding
val elements = (Str_hashtbl.create 100 : dtd_element Str_hashtbl.t)
val gen_entities = (Str_hashtbl.create 100 : (entity * bool) Str_hashtbl.t)
val par_entities = (Str_hashtbl.create 100 : entity Str_hashtbl.t)
val notations = (Str_hashtbl.create 100 : dtd_notation Str_hashtbl.t)
val pinstr = (Str_hashtbl.create 100 : proc_instruction Str_hashtbl.t)
val mutable element_names = []
val mutable gen_entity_names = []
val mutable par_entity_names = []
val mutable notation_names = []
val mutable pinstr_names = []
val mutable allow_arbitrary = false
val mutable standalone_declaration = false
val mutable validated = false
initializer
let w = new drop_warnings in
self # add_gen_entity
(new internal_entity self "lt" w "&#60;" false false encoding)
false;
self # add_gen_entity
(new internal_entity self "gt" w ">" false false encoding)
false;
self # add_gen_entity
(new internal_entity self "amp" w "&#38;" false false encoding)
false;
self # add_gen_entity
(new internal_entity self "apos" w "'" false false encoding)
false;
self # add_gen_entity
(new internal_entity self "quot" w """ false false encoding)
false;
method encoding = encoding
method warner = warner
method set_root r =
if root = None then
root <- Some r
else
assert false
method set_id j =
if id = None then
id <- Some j
else
assert false
method standalone_declaration = standalone_declaration
method set_standalone_declaration b =
standalone_declaration <- b
method allow_arbitrary =
allow_arbitrary <- true
method disallow_arbitrary =
allow_arbitrary <- false
method arbitrary_allowed = allow_arbitrary
method root = root
method id = id
method namespace_manager =
match mng with
None -> raise(Namespace_method_not_applicable "namespace_manager")
| Some m -> m
method set_namespace_manager m =
mng <- Some m
method add_element el =
(* raises Not_found if 'el' has already been added *)
(* Note: 'el' is encoded in the same way as 'self'! *)
let name = el # name in
check_name warner name;
if Str_hashtbl.mem elements name then
raise Not_found;
Str_hashtbl.add elements name el;
element_names <- name :: element_names;
validated <- false
method add_gen_entity en extdecl =
(* The following is commented out; perhaps there should be an option
* to reactivate it on demand
*)
(* raises Validation_error if the predefines entities 'lt', 'gt', 'amp',
* 'quot', and 'apos' are redeclared with an improper value.
*)
if en # encoding <> encoding then
failwith "Pxp_dtd.dtd # add_gen_entity: Inconsistent encodings";
let name = en # name in
check_name warner name;
if Str_hashtbl.mem gen_entities name then begin
if List.mem name [ "lt"; "gt"; "amp"; "quot"; "apos" ] then begin
(* These are allowed to be declared several times *)
let (rt,_) = en # replacement_text in
let toks = tokens_of_content_string lexerset rt in
try
begin match toks with
[CRef 60] -> if name <> "lt" then raise Not_found
| [CharData ">"] -> if name <> "gt" then raise Not_found
| [CRef 62] -> if name <> "gt" then raise Not_found
| [CRef 38] -> if name <> "amp" then raise Not_found
| [CharData "'"] -> if name <> "apos" then raise Not_found
| [CRef 39] -> if name <> "apos" then raise Not_found
| [CharData "\""] -> if name <> "quot" then raise Not_found
| [CRef 34] -> if name <> "quot" then raise Not_found
| _ -> raise Not_found
end
with
Not_found ->
raise (Validation_error("Predefined entity `" ^ name ^
"' redeclared"))
end
else
warner # warn ("Entity `" ^ name ^ "' declared twice")
end
else begin
Str_hashtbl.add gen_entities name (en, extdecl);
gen_entity_names <- name :: gen_entity_names
end
method add_par_entity en =
if en # encoding <> encoding then
failwith "Pxp_dtd.dtd # add_par_entity: Inconsistent encodings";
let name = en # name in
check_name warner name;
if not (Str_hashtbl.mem par_entities name) then begin
Str_hashtbl.add par_entities name en;
par_entity_names <- name :: par_entity_names
end
else
warner # warn ("Entity `" ^ name ^ "' declared twice")
method add_notation no =
(* raises Validation_error if 'no' already added *)
if no # encoding <> encoding then
failwith "Pxp_dtd.dtd # add_notation: Inconsistent encodings";
let name = no # name in
check_name warner name;
if Str_hashtbl.mem notations name then
raise (Validation_error("Notation `" ^ name ^ "' declared twice"));
Str_hashtbl.add notations name no;
notation_names <- name :: notation_names
method add_pinstr pi =
if pi # encoding <> encoding then
failwith "Pxp_dtd.dtd # add_pinstr: Inconsistent encodings";
let name = pi # target in
check_name warner name;
if String.length name >= 4 && String.sub name 0 4 = "pxp:" then begin
match name with
"pxp:dtd" ->
let _, optname, atts = pi # parse_pxp_option in
begin match optname with
"optional-element-and-notation-declarations" ->
self # allow_arbitrary
| "optional-attribute-declarations" ->
let lexers = Pxp_lexers.get_lexer_set encoding in
let el_string =
try List.assoc "elements" atts
with Not_found ->
raise(Error("Missing `elements' attribute for pxp:dtd"))
in
let el = split_attribute_value lexers el_string in
List.iter
(fun e_name ->
let e =
try Str_hashtbl.find elements e_name
with
Not_found ->
raise(Error("Reference to unknown element `" ^
e_name ^ "'"))
in
e # allow_arbitrary
)
el
| "namespace" ->
let prefix =
try List.assoc "prefix" atts
with Not_found ->
raise(Error("Missing `prefix' attribute for pxp:dtd"))
in
let uri =
try List.assoc "uri" atts
with Not_found ->
raise(Error("Missing `uri' attribute for pxp:dtd"))
in
( match mng with
None ->
raise(Error("Cannot do pxp:dtd instruction: namespaces not enabled"))
| Some m ->
( try m # add_uri prefix uri
with Not_found ->
m # add_namespace prefix uri
)
)
| _ ->
raise(Error("Unknown PXP option `" ^
optname ^ "'"))
end
| _ ->
raise(Error("The processing instruction target `" ^
name ^ "' is not defined by this PXP version"))
end;
Str_hashtbl.add pinstr name pi;
if not (List.mem name pinstr_names) then
pinstr_names <- name :: pinstr_names;
method element name =
(* returns the element 'name' or raises Validation_error if not found *)
try
Str_hashtbl.find elements name
with
Not_found ->
if allow_arbitrary then
raise Undeclared
else
raise(Validation_error("Reference to undeclared element `" ^ name ^ "'"))
method element_names =
(* returns the list of all names of element declarations *)
element_names
method gen_entity name =
(* returns the entity 'name' or raises WF_error if not found *)
try
Str_hashtbl.find gen_entities name
with
Not_found ->
raise(WF_error("Reference to undeclared general entity `" ^ name ^ "'"))
method gen_entity_names = gen_entity_names
method par_entity name =
(* returns the entity 'name' or raises WF_error if not found *)
try
Str_hashtbl.find par_entities name
with
Not_found ->
raise(WF_error("Reference to undeclared parameter entity `" ^ name ^ "'"))
method par_entity_names = par_entity_names
method notation name =
(* returns the notation 'name' or raises Validation_error if not found *)
try
Str_hashtbl.find notations name
with
Not_found ->
if allow_arbitrary then
raise Undeclared
else
raise(Validation_error("Reference to undeclared notation `" ^ name ^ "'"))
method notation_names = notation_names
method pinstr name =
(* returns the list of all processing instructions contained in the DTD
* with target 'name'
*)
Str_hashtbl.find_all pinstr name
method pinstr_names = pinstr_names
method write_ref os enc =
let write_sysid s =
write_markup_string
~from_enc:`Enc_utf8 ~to_enc:enc os
( if String.contains s '"' then
"'" ^ s ^ "'"
else
"\"" ^ s ^ "\""
)
in
let wms =
write_markup_string ~from_enc:encoding ~to_enc:enc os in
wms "<!DOCTYPE ";
( match root with
None -> failwith "#write: DTD without root";
| Some r -> wms r
);
begin match id with
None ->
failwith "#write_ref: DTD does not have an ID"
| Some (External (Public (p,s))) ->
wms " PUBLIC ";
write_sysid p;
wms " ";
write_sysid s
| Some (External (System s)) ->
wms " SYSTEM ";
write_sysid s
| Some (External _) ->
failwith "#write_ref: External ID cannot be represented"
| Some Internal ->
failwith "#write_ref: Cannot write internal ID"
| Some (Derived _) ->
failwith "#write_ref: Cannot write derived ID"
end;
wms ">\n";
method write os enc doctype =
let wms =
write_markup_string ~from_enc:encoding ~to_enc:enc os in
let write_sysid s =
write_markup_string
~from_enc:`Enc_utf8 ~to_enc:enc os
( if String.contains s '"' then
"'" ^ s ^ "'"
else
"\"" ^ s ^ "\""
)
in
if doctype then begin
wms "<!DOCTYPE ";
( match root with
None -> failwith "#write: DTD without root";
| Some r -> wms r
);
wms " [\n";
end;
(* Notations: *)
List.iter
(fun name ->
let notation =
try Str_hashtbl.find notations name with Not_found -> assert false in
notation # write os enc)
(List.sort compare notation_names);
(* Unparsed entities: *)
List.iter
(fun name ->
let ent,_ =
try Str_hashtbl.find gen_entities name with Not_found -> assert false
in
if ent # is_ndata then begin
let xid = ent # ext_id in
let notation = ent # notation in
wms ("<!ENTITY " ^ name ^ " " );
( match xid with
System s ->
wms "SYSTEM ";
write_sysid s;
| Public (p,s) ->
wms "PUBLIC ";
write_sysid p;
if (s <> "") then begin
wms " ";
write_sysid s;
end;
| Anonymous ->
failwith "#write: External ID `Anonymous' cannot be represented"
| Private _ ->
failwith "#write: External ID `Private' cannot be represented"
);
wms (" NDATA " ^ notation ^ ">\n");
end
)
(List.sort compare gen_entity_names);
(* Elements: *)
List.iter
(fun name ->
let element =
try Str_hashtbl.find elements name with Not_found -> assert false in
element # write os enc)
(List.sort compare element_names);
(* Processing instructions: *)
List.iter
(fun name ->
List.iter
(fun pi ->
pi # write os enc)
(Str_hashtbl.find_all pinstr name)
)
(List.sort compare pinstr_names);
if doctype then
wms "]>\n";
(************************************************************)
(* VALIDATION *)
(************************************************************)
method only_deterministic_models =
Str_hashtbl.iter
(fun n el ->
let cm = el # content_model in
match cm with
Regexp _ ->
if el # content_dfa = None then
raise(Validation_error("The content model of element `" ^
n ^ "' is not deterministic"))
| _ ->
()
)
elements;
method validate =
if validated or allow_arbitrary then
()
else begin
(* Validity constraint: Notations in NDATA entity declarations must
* be declared
*)
List.iter
(fun name ->
let ent,_ =
try Str_hashtbl.find gen_entities name with Not_found -> assert false
in
if ent # is_ndata then begin
let xid = ent # ext_id in
let notation = ent # notation in
try
ignore(self # notation notation)
(* Raises Validation_error if the constraint is violated *)
with
Undeclared -> ()
end
)
gen_entity_names;
(* Validate the elements: *)
Str_hashtbl.iter
(fun n el ->
el # validate)
elements;
(* Check the root element: *)
(* TODO: Check if this piece of code is executed at all! *)
begin match root with
None -> ()
| Some r ->
begin try
let _ = Str_hashtbl.find elements r in ()
with
Not_found ->
raise(Validation_error("The root element is not declared"))
end
end;
validated <- true;
end
method invalidate =
validated <- false
(************************************************************)
end
(**********************************************************************)
and dtd_element the_dtd the_name =
object (self)
val dtd = (the_dtd : dtd)
val name = the_name
val lexerset = Pxp_lexers.get_lexer_set (the_dtd # encoding)
val mutable content_model = Unspecified
val mutable content_model_validated = false
val mutable content_dfa = lazy None
val mutable externally_declared = false
val mutable attributes =
([] : (string * ((att_type * att_default) * bool)) list)
val mutable attributes_validated = false
val mutable id_att_name = None
val mutable idref_att_names = []
val mutable allow_arbitrary = false
val mutable vr = (None : validation_record option)
method name = name
method set_cm_and_extdecl m extdecl =
if content_model = Unspecified then begin
content_model <- m;
content_model_validated <- false;
content_dfa <- lazy (self # compute_content_dfa);
externally_declared <- extdecl;
self # update_vr;
dtd # invalidate
end
else
raise(Validation_error("Element `" ^ name ^ "' has already a content model"))
method content_model = content_model
method content_dfa = Lazy.force content_dfa
method private compute_content_dfa =
match content_model with
Regexp re ->
( try Some (dfa_of_regexp_content_model re)
with Not_found -> None
)
| _ ->
None
method externally_declared = externally_declared
method encoding = dtd # encoding
method allow_arbitrary =
allow_arbitrary <- true;
self # update_vr;
method disallow_arbitrary =
allow_arbitrary <- false;
self # update_vr;
method arbitrary_allowed = allow_arbitrary
method add_attribute aname t d extdecl =
if aname <> "xml:lang" & aname <> "xml:space" then
check_name (dtd#warner) aname;
if List.mem_assoc aname attributes then
dtd # warner # warn ("More than one declaration for attribute `" ^
aname ^ "' of element type `" ^ name ^ "'")
else begin
begin match aname with
"xml:space" ->
begin match t with
A_enum l ->
let ok =
List.for_all
(fun tok -> List.mem tok ["default";"preserve"])
l
in
if not ok then
raise(Validation_error("Declaration of attribute `xml:space' does not conform to XML specification"))
| _ ->
raise(Validation_error("Declaration of attribute `xml:space' does not conform to XML specification"))
end
| _ -> ()
end;
begin match t with
A_id ->
id_att_name <- Some aname;
| (A_idref | A_idrefs) ->
idref_att_names <- aname :: idref_att_names;
| _ ->
()
end;
attributes <- (aname, ((t,d),extdecl)) :: attributes;
attributes_validated <- false;
dtd # invalidate;
self # update_vr;
end
method attribute attname =
try
fst (List.assoc attname attributes)
with
Not_found ->
if allow_arbitrary then
raise Undeclared
else
raise(Validation_error("Attribute `" ^ attname ^ "' of element `"
^ name ^ "' not declared"))
method attribute_violates_standalone_declaration attname v =
try
let (atype, adefault), extdecl = List.assoc attname attributes in
extdecl &&
( match v with
None ->
adefault <> D_required && adefault <> D_implied
(* i.e. adefault matches D_default or D_fixed *)
| Some s ->
atype <> A_cdata &&
normalization_changes_value lexerset atype s
)
with
Not_found ->
if allow_arbitrary then
raise Undeclared
else
raise(Validation_error("Attribute `" ^ attname ^ "' of element `"
^ name ^ "' not declared"))
method attribute_names =
List.map fst attributes
method names_of_required_attributes =
List.flatten
(List.map
(fun (n,((t,d),_)) ->
if d = D_required then
[n]
else
[])
attributes)
method id_attribute_name = id_att_name
method idref_attribute_names = idref_att_names
method private update_vr =
vr <- None
method internal_vr =
( match vr with
None ->
let n = List.length attributes in
let init_att_vals = Array.create n ("", Implied_value) in
let att_lookup = Str_hashtbl.create n in
let att_info = Array.create n (A_cdata, false) in
let att_required = ref [] in
let k = ref 0 in
List.iter
(fun (n, ((t,d), ext)) ->
Str_hashtbl.add att_lookup n !k;
let init_val =
match d with
(D_required | D_implied) -> Implied_value
| D_default v ->
value_of_attribute lexerset dtd n t v
| D_fixed v ->
value_of_attribute lexerset dtd n t v
in
init_att_vals.( !k ) <- (n, init_val);
att_info.( !k ) <- (t, match d with D_fixed _ -> true
| _ -> false);
if d = D_required then
att_required := !k :: !att_required;
incr k;
)
attributes;
vr <- Some { content_model = content_model;
content_dfa = content_dfa;
id_att_name = id_att_name;
idref_att_names = idref_att_names;
init_att_vals = init_att_vals;
att_lookup = att_lookup;
att_info = att_info;
att_required = !att_required;
accept_undeclared_atts = allow_arbitrary;
}
| _ -> ()
);
( match vr with
None -> assert false
| Some vr' -> vr'
)
method write os enc =
let encoding = self # encoding in
let wms =
write_markup_string ~from_enc:encoding ~to_enc:enc os in
let rec write_contentspec cs =
match cs with
Unspecified ->
failwith "#write: Unspecified content model found"
| Empty ->
wms "EMPTY"
| Any ->
wms "ANY"
| Mixed ml ->
wms "(";
write_mixedspec_list ml;
wms ")*";
| Regexp re ->
write_children re false
and write_mixedspec_list ml =
match ml with
MPCDATA :: ml' ->
wms "#PCDATA";
if ml' <> [] then wms "|";
write_mixedspec_list ml';
| MChild s :: ml' ->
wms s;
if ml' <> [] then wms "|";
write_mixedspec_list ml';
| [] ->
()
and write_children re cp =
match re with
Optional re' ->
let p = needs_parens re' in
if p then wms "(";
write_children re' cp;
if p then wms ")";
wms "?";
| Repeated re' ->
let p = needs_parens re' in
if p then wms "(";
write_children re' cp;
if p then wms ")";
wms "*";
| Repeated1 re' ->
let p = needs_parens re' in
if p then wms "(";
write_children re' cp;
if p then wms ")";
wms "+";
| Alt re' ->
wms "(";
( match re' with
re1' :: rer' ->
write_children re1' true;
List.iter
(fun ren' ->
wms "|";
write_children ren' true;
)
rer';
| [] ->
failwith "#write: Illegal content model"
);
wms ")";
| Seq re' ->
wms "(";
( match re' with
re1' :: rer' ->
write_children re1' true;
List.iter
(fun ren' ->
wms ",";
write_children ren' true;
)
rer';
| [] ->
failwith "#write: Illegal content model"
);
wms ")";
| Child ch ->
if not cp then wms "(";
wms ch;
if not cp then wms ")";
and needs_parens re =
match re with
(Optional _ | Repeated _ | Repeated1 _ ) -> true
| _ -> false
in
wms ("<!ELEMENT " ^ name ^ " ");
write_contentspec content_model;
wms ">\n";
wms ("<!ATTLIST " ^ name);
List.iter
(fun (n,((t,d),_)) ->
wms ("\n " ^ n);
( match t with
A_cdata -> wms " CDATA";
| A_id -> wms " ID";
| A_idref -> wms " IDREF";
| A_idrefs -> wms " IDREFS";
| A_entity -> wms " ENTITY";
| A_entities -> wms " ENTITIES";
| A_nmtoken -> wms " NMTOKEN";
| A_nmtokens -> wms " NMTOKENS";
| A_notation nl ->
wms " NOTATION (";
( match nl with
nl1:: nl' ->
wms nl1;
List.iter
(fun n ->
wms ("|" ^ n);
)
nl'
| [] ->
failwith "#write: Illegal content model";
);
wms ")";
| A_enum el ->
wms " (";
( match el with
el1:: el' ->
wms el1;
List.iter
(fun e ->
wms ("|" ^ e);
)
el'
| [] ->
failwith "#write: Illegal content model";
);
wms ")";
);
( match d with
D_required -> wms " #REQUIRED"
| D_implied -> wms " #IMPLIED"
| D_default s ->
wms " \"";
write_data_string ~from_enc:encoding ~to_enc:enc os s;
wms "\"";
| D_fixed s ->
wms " FIXED \"";
write_data_string ~from_enc:encoding ~to_enc:enc os s;
wms "\"";
);
)
(List.sort (fun (n1,x1) (n2,x2) -> compare n1 n2) attributes);
wms ">\n";
(************************************************************)
(* VALIDATION *)
(************************************************************)
method validate =
self # validate_attributes();
self # validate_content_model()
method private validate_attributes() =
if attributes_validated then
()
else begin
(* Validity Constraint: One ID per Element Type *)
let n = count (fun (n,((t,d),_)) -> t = A_id) attributes in
if n > 1 then
raise(Validation_error("More than one ID attribute for element `" ^ name ^ "'"));
(* Validity Constraint: ID Attribute Default *)
if List.exists
(fun (n,((t,d),_)) ->
t = A_id & (d <> D_required & d <> D_implied))
attributes
then
raise(Validation_error("ID attribute must be #IMPLIED or #REQUIRED; element `" ^ name ^ "'"));
(* Validity Constraint: One Notation per Element Type *)
let n = count (fun (n,((t,d),_)) ->
match t with A_notation _ -> true | _ -> false)
attributes in
if n > 1 then
raise(Validation_error("More than one NOTATION attribute for element `" ^ name ^ "'"));
(* Validity Constraint: Notation Attributes [second part] *)
List.iter
(fun (n,((t,d),_)) ->
match t with
A_notation l ->
List.iter
(fun nname ->
let _ = dtd # notation nname in ())
l
| _ -> ())
attributes;
(* Validity Constraint: Attribute Default Legal *)
List.iter
(fun (n,((t,d),_)) ->
let check v =
let lexical_error() =
lazy (raise(Validation_error("Default value for attribute `" ^ n ^ "' is lexically malformed"))) in
check_attribute_value_lexically lexerset (lexical_error()) t v;
begin match t with
(A_entity|A_entities) ->
List.iter
(fun nd ->
let en, extdecl = dtd # gen_entity nd in
if not (en # is_ndata) then
raise(Validation_error("Attribute default value must be the name of an NDATA entity; attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
(* if dtd # standalone_declaration && extdecl then
raise(Validation_error("Attribute default value violates the standalone declaration; attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
-- This is checked anyway when the attribute value is normalized
*)
)
(split_attribute_value lexerset v)
| A_notation nl ->
if not (List.mem v nl) then
raise(Validation_error("Illegal default value for attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
| A_enum nl ->
if not (List.mem v nl) then
raise(Validation_error("Illegal default value for attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
| _ -> ()
end
in
match d with
D_required -> ()
| D_implied -> ()
| D_default v -> check v
| D_fixed v -> check v
)
attributes;
(* Ok: This element declaration is valid *)
attributes_validated <- true;
end
method private validate_content_model () =
(* checks:
* - Validity Constraint: No Duplicate Types
* It is not an error if there is a child in the declaration for which
* no element declaration is provided.
*)
match content_model with
Unspecified ->
dtd # warner # warn ("Element type `" ^ name ^ "' mentioned but not declared");
()
| Empty -> ()
| Any -> ()
| Mixed (pcdata :: l) ->
(* MPCDATA is always the first element by construction *)
assert (pcdata = MPCDATA);
if check_dups l then
raise (Validation_error("Double children in declaration for element `" ^ name ^ "'"))
| Regexp _ -> ()
| _ -> assert false
(************************************************************)
end
and dtd_notation the_name the_xid init_encoding =
object (self)
val name = the_name
val xid = (the_xid : ext_id)
val encoding = (init_encoding : Pxp_types.rep_encoding)
method name = name
method ext_id = xid
method encoding = encoding
method write os enc =
let wms =
write_markup_string ~from_enc:encoding ~to_enc:enc os in
let write_sysid s =
if String.contains s '"' then
wms ("'" ^ s ^ "'")
else
wms ("\"" ^ s ^ "\"");
in
wms ("<!NOTATION " ^ name ^ " ");
( match xid with
System s ->
wms "SYSTEM ";
write_sysid s;
| Public (p,s) ->
wms "PUBLIC ";
write_sysid p;
if (s <> "") then begin
wms " ";
write_sysid s;
end;
| Anonymous ->
failwith "#write: External ID `Anonymous' cannot be represented"
| Private _ ->
failwith "#write: External ID `Private' cannot be represented"
);
wms ">\n";
end
and proc_instruction the_target the_value init_encoding =
object (self)
val target = the_target
val value = (the_value : string)
val encoding = (init_encoding : Pxp_types.rep_encoding)
initializer
match target with
("xml"|"xmL"|"xMl"|"xML"|"Xml"|"XmL"|"XMl"|"XML") ->
(* This is an error, not a warning, because I do not have a
* "warner" object by hand.
*)
raise(WF_error("Reserved processing instruction"))
| _ -> ()
method target = target
method value = value
method encoding = encoding
method write os enc =
let wms =
write_markup_string ~from_enc:encoding ~to_enc:enc os in
wms "<?";
wms target;
wms " ";
wms value;
wms "?>";
method parse_pxp_option =
let lexers = get_lexer_set encoding in
try
let toks = tokens_of_xml_pi lexers value in (* may raise WF_error *)
begin match toks with
(Pro_name option_name) :: toks' ->
let atts = decode_xml_pi toks' in (* may raise WF_error *)
(target, option_name, atts)
| _ ->
raise(Error("Bad PXP processing instruction"))
end
with
WF_error _ ->
raise(Error("Bad PXP processing instruction"))
end
;;
type source =
Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver)
| ExtID of (ext_id * Pxp_reader.resolver)
;;
module Entity = struct
let get_name ent = ent # name
let get_full_name ent = ent # full_name
let get_encoding ent = ent # encoding
let get_type ent =
if ent # is_ndata then `NDATA else
try ignore(ent # ext_id); `External with Not_found -> `Internal
let replacement_text ent = fst(ent # replacement_text)
let get_xid ent =
try Some(ent # ext_id) with Not_found -> None
let get_notation ent =
if ent # is_ndata then Some (ent # notation) else None
let create_internal_entity ~name ~value dtd =
new internal_entity dtd name (dtd # warner) value false false
(dtd # encoding)
let create_ndata_entity ~name ~xid ~notation dtd =
new ndata_entity name xid notation dtd#encoding
let create_external_entity ?(doc_entity = false) ~name ~xid ~resolver dtd =
if doc_entity then
new document_entity resolver dtd name dtd#warner xid dtd#encoding
else
new external_entity resolver dtd name dtd#warner xid false dtd#encoding
let from_external_source ?doc_entity ~name dtd src =
match src with
ExtID(xid,resolver) ->
create_external_entity ?doc_entity ~name ~xid ~resolver dtd
| Entity(make,resolver) ->
make dtd (* resolver ignored *)
end
(* ======================================================================
* History:
*
* $Log: pxp_dtd.ml,v $
* Revision 1.21 2002/03/10 23:39:28 gerd
* Extended the Entity module
*
* Revision 1.20 2001/12/03 23:45:55 gerd
* new method [write_ref]
*
* Revision 1.19 2001/07/02 23:21:40 gerd
* Added the Entity module.
*
* Revision 1.18 2001/06/29 13:57:30 gerd
* Weakened the xml:space check.
*
* Revision 1.17 2001/06/08 01:15:46 gerd
* Moved namespace_manager from Pxp_document to Pxp_dtd. This
* makes it possible that the DTD can recognize the processing instructions
* <?pxp:dtd namespace prefix="..." uri="..."?>, and add the namespace
* declaration to the manager.
*
* Revision 1.16 2001/06/07 22:48:38 gerd
* Improvement: 'write' writes sorted attributes. This makes
* many regression tests simpler.
*
* Revision 1.15 2001/04/22 14:14:41 gerd
* Updated to support private IDs.
*
* Revision 1.14 2000/10/01 19:47:19 gerd
* Using Str_hashtbl instead of Hashtbl.
*
* Revision 1.13 2000/09/22 22:54:30 gerd
* Optimized the attribute checker (internal_init of element
* nodes). The validation_record has now more fields to support
* internal_init.
*
* Revision 1.12 2000/09/16 22:40:50 gerd
* Bug processing processing instructions: Method
* pinstr_names returned wrong results; method write wrote
* the wrong instructions.
*
* Revision 1.11 2000/09/09 16:41:32 gerd
* New type validation_record.
*
* Revision 1.10 2000/08/18 21:18:45 gerd
* Updated wrong comments for methods par_entity and gen_entity.
* These can raise WF_error and not Validation_error, and this is the
* correct behaviour.
*
* Revision 1.9 2000/07/25 00:30:01 gerd
* Added support for pxp:dtd PI options.
*
* Revision 1.8 2000/07/23 02:16:34 gerd
* Support for DFAs.
*
* Revision 1.7 2000/07/16 17:50:01 gerd
* Fixes in 'write'
*
* Revision 1.6 2000/07/16 16:34:41 gerd
* New method 'write', the successor of 'write_compact_as_latin1'.
*
* Revision 1.5 2000/07/14 13:56:48 gerd
* Added methods id_attribute_name and idref_attribute_names.
*
* Revision 1.4 2000/07/09 00:13:37 gerd
* Added methods gen_entity_names, par_entity_names.
*
* Revision 1.3 2000/07/04 22:10:55 gerd
* Update: collect_warnings -> drop_warnings.
* Update: Case ext_id = Anonymous.
*
* Revision 1.2 2000/06/14 22:19:06 gerd
* Added checks such that it is impossible to mix encodings.
*
* Revision 1.1 2000/05/29 23:48:38 gerd
* Changed module names:
* Markup_aux into Pxp_aux
* Markup_codewriter into Pxp_codewriter
* Markup_document into Pxp_document
* Markup_dtd into Pxp_dtd
* Markup_entity into Pxp_entity
* Markup_lexer_types into Pxp_lexer_types
* Markup_reader into Pxp_reader
* Markup_types into Pxp_types
* Markup_yacc into Pxp_yacc
* See directory "compatibility" for (almost) compatible wrappers emulating
* Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
*
* ======================================================================
*
* Revision 1.18 2000/05/28 17:24:55 gerd
* Bugfixes.
*
* Revision 1.17 2000/05/27 19:21:25 gerd
* Implemented the changes of rev. 1.10 of markup_dtd.mli.
*
* Revision 1.16 2000/05/20 20:31:40 gerd
* Big change: Added support for various encodings of the
* internal representation.
*
* Revision 1.15 2000/05/14 21:50:07 gerd
* Updated: change in internal_entity.
*
* Revision 1.14 2000/05/06 23:08:46 gerd
* It is possible to allow undeclared attributes.
*
* Revision 1.13 2000/05/01 20:42:46 gerd
* New method write_compact_as_latin1.
*
* Revision 1.12 2000/05/01 15:16:57 gerd
* The errors "undeclared parameter/general entities" are
* well-formedness errors, not validation errors.
*
* Revision 1.11 2000/03/11 22:58:15 gerd
* Updated to support Markup_codewriter.
*
* Revision 1.10 2000/01/20 20:53:47 gerd
* Changed such that it runs with Markup_entity's new interface.
*
* Revision 1.9 1999/11/09 22:15:41 gerd
* Added method "arbitrary_allowed".
*
* Revision 1.8 1999/09/01 22:52:22 gerd
* If 'allow_arbitrary' is in effect, no validation happens anymore.
*
* Revision 1.7 1999/09/01 16:21:24 gerd
* Added several warnings.
* The attribute type of "xml:space" is now strictly checked.
*
* Revision 1.6 1999/08/15 20:34:21 gerd
* Improved error messages.
* Bugfix: It is no longer allowed to create processing instructions
* with target "xml".
*
* Revision 1.5 1999/08/15 02:20:16 gerd
* New feature: a DTD can allow arbitrary elements.
*
* Revision 1.4 1999/08/15 00:21:39 gerd
* Comments have been updated.
*
* Revision 1.3 1999/08/14 22:12:52 gerd
* Several functions have now a "warner" as argument which is
* an object with a "warn" method. This is used to warn about characters
* that cannot be represented in the Latin 1 alphabet.
* Bugfix: if two general entities with the same name are definied,
* the first counts, not the second.
*
* Revision 1.2 1999/08/11 14:56:35 gerd
* Declaration of the predfined entities {lt,gt,amp,quot,apos}
* is no longer forbidden; but the original definition cannot be overriddden.
* TODO: If these entities are redeclared with problematic values,
* the user should be warned.
*
* Revision 1.1 1999/08/10 00:35:51 gerd
* Initial revision.
*
*
*)