module Pxp_document:Tree representation of XML documentssig
..end
Intro_trees
.type
node_type =
| |
T_element of |
(* | An element node with this element type | *) |
| |
T_data |
(* | A data node | *) |
| |
T_super_root |
(* | The super root node | *) |
| |
T_pinstr of |
(* | A processing instruction with this target | *) |
| |
T_comment |
(* | A comment | *) |
| |
T_none |
(* | Sometimes used if the nodes are non-standard | *) |
| |
T_attribute of |
(* | An attribute node for this attribute name | *) |
| |
T_namespace of |
(* | A namespace node for this normalized prefix | *) |
T_element name
: The node is an element and has element type name
T_data
: The node is a data nodeT_super_root
: The node is a super root nodeT_pinstr name
: The node contains a processing instruction with
target name
T_comment
: The node is a commentT_attribute name
: The node contains an attribute called name
T_namespace prefix
: The node identifies a namespace for the
normalized prefix
T_none
: This is a "bottom value" used if there is no reasonable
type.type
data_node_classification =
| |
CD_normal |
| |
CD_other |
| |
CD_empty |
| |
CD_ignorable |
| |
CD_error of |
classify_data_node
:CD_normal
: Adding n
does not violate any validation
constraintCD_other
: n
is not a data nodeCD_empty
: The element obj
is declared as EMTPY
, and
n
contains the empty string. It is allowed to append
n
but it does not make senseCD_ignorable
: The element obj
is declared such that
it is forbidden to put character data into it. However,
the node n
only contains white space which is allowed
as an exception to this rule. This means that it is allowed
to append n
but n
would not contain any information
except formatting hints.CD_error e
: It is an error to append n
. The exception
e
, usually a Validation_error
, contains details about
the problem.class type['a]
extension =object
..end
extension
is, as the name says, the extensible part of the
nodes.
class type['a node #extension as 'a]
node =object
..end
node
defines the interface of the nodes that are part
of XML document trees.
class['a node #extension as 'a]
data_impl :'a ->
['a]
node
node
which
realizes data nodes.
class['a node #extension as 'a]
element_impl :'a ->
['a]
node
node
which
realizes element nodes.
class['a node #extension as 'a]
comment_impl :'a ->
['a]
node
node
which
realizes comment nodes.
class['a node #extension as 'a]
pinstr_impl :'a ->
['a]
node
node
which
realizes processing instruction nodes.
class['a node #extension as 'a]
super_root_impl :'a ->
['a]
node
node
which
realizes super root nodes.
class['a node #extension as 'a]
attribute_impl :element:string -> name:string -> Pxp_types.att_value -> Pxp_dtd.dtd ->
['a]
node
node
which
realizes attribute nodes.
class['a node #extension as 'a]
namespace_impl :string -> string -> Pxp_dtd.dtd ->
['a]
node
class['a node #extension as 'a]
namespace_element_impl :'a ->
['a]
node
node
which
realizes element nodes.
class['a node #extension as 'a]
namespace_attribute_impl :element:string -> name:string -> Pxp_types.att_value -> Pxp_dtd.dtd ->
['a]
node
val pinstr : ('a node #extension as 'a) node ->
Pxp_dtd.proc_instruction
pinstr n
:
Returns the processing instruction contained in a
processing instruction node.
This function raises Invalid_argument
if invoked for a different node
type than T_pinstr
.val attribute_name : ('a node #extension as 'a) node ->
string
attribute_name n
Returns the name of the attribute contained in an attribute
node. Raises Invalid_argument
if n
does not have node type
T_attribute
.val attribute_value : ('a node #extension as 'a) node ->
Pxp_types.att_value
attribute_value n
:
Returns the value of the attribute contained in an attribute
node. Raises Invalid_argument
if n
does not have node type
T_attribute
.val attribute_string_value : ('a node #extension as 'a) node ->
string
attribute_string_value n
:
Returns the string value of the attribute contained in an attribute
node. Raises Invalid_argument
if n
does not have node type
T_attribute
.val namespace_normprefix : ('a node #extension as 'a) node ->
string
Invalid_argument
if n
does not have node type
T_namespace
.val namespace_display_prefix : ('a node #extension as 'a) node ->
string
Invalid_argument
if n
does not have node type
T_namespace
.val namespace_uri : ('a node #extension as 'a) node ->
string
Invalid_argument
if n
does not have node type
T_namespace
.type 'a node #extension as 'a
spec
val make_spec_from_mapping : ?super_root_exemplar:('a node #extension as 'a)
node ->
?comment_exemplar:'a node ->
?default_pinstr_exemplar:'a node ->
?pinstr_mapping:(string, 'a node) Hashtbl.t ->
data_exemplar:'a node ->
default_element_exemplar:'a node ->
element_mapping:(string, 'a node) Hashtbl.t ->
unit -> 'a spec
make_spec_from_mapping
~super_root_exemplar ~comment_exemplar ~default_pinstr_exemplar
~pinstr_mapping ~data_exemplar ~default_element_exemplar
~element_mapping
()
:
Creates a spec
from the arguments. Some arguments are optional,
some arguments are mandatory.super_root_exemplar
: Specifies the exemplar to be used for
new super root nodes. This exemplar is optional.comment_exemplar
: Specifies the exemplar to be used for
new comment nodes. This exemplar is optional.pinstr_exemplar
: Specifies the exemplar to be used for
new processing instruction nodes by a hashtable mapping target
names to exemplars. This hashtable is optional.default_pinstr_exemplar
: Specifies the exemplar to be used for
new processing instruction nodes. This exemplar will be used
for targets that are not contained in the ~pinstr_exemplar
hashtable. This exemplar is optional.data_exemplar
: Specifies the exemplar to be used for
new data nodes. This exemplar is mandatory.element_mapping
: Specifies the exemplar to be used for
new element nodes by a hashtable mapping element types to
exemplars. This hashtable is mandatory (but may be empty).default_element_exemplar
: Specifies the exemplar to be used for
new element nodes. This exemplar will be used
for element types that are not contained in the ~element_mapping
hashtable. This exemplar is mandatory.val make_spec_from_alist : ?super_root_exemplar:('a node #extension as 'a)
node ->
?comment_exemplar:'a node ->
?default_pinstr_exemplar:'a node ->
?pinstr_alist:(string * 'a node) list ->
data_exemplar:'a node ->
default_element_exemplar:'a node ->
element_alist:(string * 'a node) list ->
unit -> 'a spec
make_spec_from_alist
~super_root_exemplar ~comment_exemplar ~default_pinstr_exemplar
~pinstr_alist ~data_exemplar ~default_element_exemplar
~element_alist
()
:
Creates a spec
from the arguments. This is a convenience
function for make_spec_from_mapping
; instead of requiring hashtables
the function allows it to pass associative lists.val get_data_exemplar : ('a node #extension as 'a) spec ->
'a node
val get_element_exemplar : ('a node #extension as 'a) spec ->
string -> (string * string) list -> 'a node
val get_super_root_exemplar : ('a node #extension as 'a) spec ->
'a node
val get_comment_exemplar : ('a node #extension as 'a) spec ->
'a node
val get_pinstr_exemplar : ('a node #extension as 'a) spec ->
Pxp_dtd.proc_instruction -> 'a node
Not_found
).val create_data_node : ('a node #extension as 'a) spec ->
Pxp_dtd.dtd -> string -> 'a node
create_data_node spec dtd datastring
:
Creates a new data node from the exemplar contained in spec
.
The new node contains datastring
and is connected with the dtd
.val create_element_node : ?name_pool_for_attribute_values:Pxp_types.pool ->
?entity_id:Pxp_types.entity_id ->
?position:string * int * int ->
?valcheck:bool ->
?att_values:(string * Pxp_types.att_value) list ->
('a node #extension as 'a) spec ->
Pxp_dtd.dtd -> string -> (string * string) list -> 'a node
create_element_node ~name_pool_for_attribute_values
~position ~valcheck ~att_values spec dtd eltype
att_list
:
Creates a new element node from the exemplar(s) contained in
spec
:dtd
.eltype
.att_list
and att_values
; att_list
passes attribute values
as strings while att_values
passes attribute values as
type att_value
~position
(if passed)~name_pool_for_attribute_values
will be used, if passed.~valcheck = true
(the default), the attribute list is
immediately validated. If ~valcheck = false
, the validation
is left out; in this case you can pass any element type and
and any attributes, and it does not matter whether and how
they are declared.valcheck=true
as this mode is implemented by weakening the validation
constraints in the DTD object. See
Parsing in well-formedness mode for explanations.
val create_super_root_node : ?entity_id:Pxp_types.entity_id ->
?position:string * int * int ->
('a node #extension as 'a) spec ->
Pxp_dtd.dtd -> 'a node
create_super_root_node ~position spec dtd
:
Creates a new super root node from the exemplar contained in
spec
. The new node is connected to dtd
, and the position
triple is set to ~position
.
The function fails if there is no super root exemplar in spec
.
val create_comment_node : ?entity_id:Pxp_types.entity_id ->
?position:string * int * int ->
('a node #extension as 'a) spec ->
Pxp_dtd.dtd -> string -> 'a node
create_comment_node ~position spec dtd commentstring
:
Creates a new comment node from the exemplar contained in
spec
. The new node is connected to dtd
, and the position
triple is set to ~position
. The contents of the node are set
to commentstring
.
The function fails if there is no comment exemplar in spec
.
val create_pinstr_node : ?entity_id:Pxp_types.entity_id ->
?position:string * int * int ->
('a node #extension as 'a) spec ->
Pxp_dtd.dtd -> Pxp_dtd.proc_instruction -> 'a node
create_pinstr_node ~position spec dtd pi
:
Creates a new processing instruction node from the exemplar
contained in spec
. The new node is connected to dtd
, and the
position triple is set to ~position
. The contents of the node are set
to pi
.
The function fails if there is no processing instruction exemplar in
spec
.
val create_no_node : ?entity_id:Pxp_types.entity_id ->
?position:string * int * int ->
('a node #extension as 'a) spec ->
Pxp_dtd.dtd -> 'a node
compare
and ord_compare
implement the so-called
"document order". The basic principle is that the nodes are linearly
ordered by their occurence in the textual XML representation of the
tree. While this is clear for element nodes, data nodes, comments, and
processing instructions, a more detailed definition is necessary for the
other node types. In particular, attribute nodes of an element node
occur before any regular subnode of the element, and namespace nodes
of that element occur even before the attribute nodes. So the order
of nodes of
<sample a1="5" a2="6"><subnode/></sample>
is
If there is a super root node, it will be handled as the very first
node.
val compare : ('a node #extension as 'a) node ->
'a node -> int
compare n1 n2
:
Returns -1 if n1
occurs before n2
, or +1 if n1
occurs
after n2
, or 0 if both nodes are identical.
If the nodes are unrelated (do not have a common ancestor), the result
is undefined (Note: this case is different from ord_compare
).
This test is rather slow, but it works even if the XML tree changes
dynamically (in contrast to ord_compare
below).type 'a node #extension as 'a
ord_index
val create_ord_index : ('a node #extension as 'a) node ->
'a ord_index
create_ord_index startnode
:
Creates an ordinal index for the subtree starting at startnode
.
This index assigns to every node an ordinal number (beginning with 0) such
that nodes are numbered upon the order of the first character in the XML
representation (document order).
Note that the index is not automatically updated when the tree is
modified.val ord_number : ('a node #extension as 'a) ord_index ->
'a node -> int
Not_found
.
Note that attribute nodes and namespace nodes are treated specially:
All attribute nodes for a certain element node have the _same_
ordinal index. All namespace nodes for a certain element node
have the _same_ ordinal index.
(So ord_number x = ord_number y does not imply x == y for these
nodes. However, this is true for the other node types.)
It is not recommended to work with the ordinal number directly but
to call ord_compare which already handles the special cases.
val ord_compare : ('a node #extension as 'a) ord_index ->
'a node -> 'a node -> int
ord_compare idx n1 n2
:
Compares two nodes like compare
:
Returns -1 if n1
occurs before n2
, or +1 if n1
occurs
after n2
, or 0 if both nodes are identical.
If one of the nodes does not occur in the ordinal index, Not_found
is raised. (Note that this is a different behaviour than what compare
would do.)
This test is much faster than compare
.
val find : ?deeply:bool ->
(('a node #extension as 'a) node ->
bool) ->
'a node -> 'a node
find ~deeply f startnode
Searches the first node in the tree below startnode
for which
the predicate f is true, and returns it. Raises Not_found
if there is no such node.
By default, ~deeply=false
. In this case, only the children of
startnode
are searched.
If passing ~deeply=true
, the children are searched recursively
(depth-first search). Note that even in this case startnode
itself
is not checked.
Attribute and namespace nodes are ignored.
val find_all : ?deeply:bool ->
(('a node #extension as 'a) node ->
bool) ->
'a node -> 'a node list
find_all ~deeply f startnode
:
Searches all nodes in the tree below startnode
for which
the predicate f is true, and returns them.
By default, ~deeply=false
. In this case, only the children of
startnode
are searched.
If passing ~deeply=true
, the children are searched recursively
(depth-first search). Note that even in this case startnode
itself
is not checked.
Attribute and namespace nodes are ignored.
val find_element : ?deeply:bool ->
string ->
('a node #extension as 'a) node ->
'a node
find_element ~deeply eltype startnode
:
Searches the first element in the tree below startnode
that has the element type eltype
, and returns it. Raises Not_found
if there is no such node.
By default, ~deeply=false
. In this case, only the children of
startnode
are searched.
If passing ~deeply=true
, the children are searched recursively
(depth-first search). Note that even in this case startnode
itself
is not checked.
val find_all_elements : ?deeply:bool ->
string ->
('a node #extension as 'a) node ->
'a node list
find_all_elements ~deeply eltype startnode
:
Searches all elements in the tree below startnode
having the element type eltype
, and returns them.
By default, ~deeply=false
. In this case, only the children of
startnode
are searched.
If passing ~deeply=true
, the children are searched recursively
(depth-first search). Note that even in this case startnode
itself
is not checked.
exception Skip
map_tree
, map_tree_sibl
, iter_tree
, and iter_tree_sibl
to skip the current node, and to proceed with the next node.
See these function for details.val map_tree : pre:(('a node #extension as 'a) node ->
('b node #extension as 'b) node) ->
?post:('b node -> 'b node) ->
'a node -> 'b node
map_tree ~pre ~post startnode
Maps the tree beginning at startnode
to a second tree
using the following algorithm.
startnode
and the whole tree below it are recursively traversed.
After entering a node, the function ~pre is called. The result of
this function must be a new node; it must not have children nor a
parent. For example, you can pass
~pre:(fun n -> n # orphaned_flat_clone)
to copy the original node. After that, the children are processed
in the same way (from left to right) resulting in a list of
mapped children. These are added to the mapped node as its
children.
Now, the ~post function is invoked with the mapped node as argument, and the result is the result of the function (~post should return a root node, too; if not specified, the identity is the ~post function).
Both ~pre and ~post may raise Skip
which causes that the node is
left out (i.e. the mapped tree does neither contain the node nor
any children of the node).
If the top node is skipped, the exception Not_found
is
raised.
For example, the following piece of code duplicates a tree, but removes all comment nodes:
map_tree ~pre:(fun n -> if n # node_type = T_comment then raise Skip else n # orphaned_flat_clone) startnode
Attribute and namespace nodes are ignored.
val map_tree_sibl : pre:(('a node #extension as 'a) node
option ->
'a node ->
'a node option ->
('b node #extension as 'b) node) ->
?post:('b node option ->
'b node ->
'b node option -> 'b node) ->
'a node -> 'b node
map_tree_sibl ~pre ~post startnode
:
Maps the tree beginning at startnode
to a second tree
using the following algorithm.
startnode
and the whole tree below it are recursively traversed.
After entering a node, the function ~pre is called with three
arguments: some previous node, the current node, and some next node.
The previous and the next node may not exist because the current
node is the first or the last in the current list of nodes.
In this case, None
is passed as previous or next node, resp.
The result of this function invocation must be a new node;
it must not have children nor a parent. For example, you can pass
~pre:(fun prev n next -> n # orphaned_flat_clone)
to copy the original node. After that, the children are processed
in the same way (from left to right) resulting in a list of
mapped children.
Now, the ~post function is applied to the list of mapped children
resulting in a list of postprocessed children. (Note: this part
works rather differently than map_tree
.) ~post has three arguments:
some previous child, the current child, and some next child.
The previous and the next child are None
if non-existing.
The postprocessed children are appended to the mapped node resulting
in the mapped tree.
Both ~pre and ~post may raise Skip
which causes that the node is
left out (i.e. the mapped tree does neither contain the node nor
any children of the node).
If the top node is skipped, the exception Not_found
is
raised.
Attribute and namespace nodes are ignored.
val iter_tree : ?pre:(('a node #extension as 'a) node ->
unit) ->
?post:('a node -> unit) -> 'a node -> unit
iter_tree ~pre ~post startnode
:
Iterates over the tree beginning at startnode
using the following algorithm.
startnode
and the whole tree below it are recursively traversed.
After entering a node, the function ~pre is called. Now, the children
are processed recursively. Finally, the ~post function is invoked.
The ~pre function may raise Skip
causing that the children
and the invocation of the ~post function are skipped.
If the ~post function raises Skip
nothing special happens.
Attribute and namespace nodes are ignored.
val iter_tree_sibl : ?pre:(('a node #extension as 'a) node
option -> 'a node -> 'a node option -> unit) ->
?post:('a node option ->
'a node -> 'a node option -> unit) ->
'a node -> unit
iter_tree_sibl ~pre ~post startnode
:
Iterates over the tree beginning at startnode
using the following algorithm.
startnode
and the whole tree below it are recursively traversed.
After entering a node, the function ~pre is called with three
arguments: some previous node, the current node, and some next node.
The previous and the next node may be None
if non-existing.
Now, the children are processed recursively.
Finally, the ~post function is invoked with the same three
arguments.
The ~pre function may raise Skip
causing that the children
and the invocation of the ~post function are skipped.
If the ~post function raises Skip
nothing special happens.
Attribute and namespace nodes are ignored.
typestripping_mode =
[ `Disabled | `Strip_one | `Strip_one_lf | `Strip_seq ]
`Strip_one_lf
: If there is a linefeed character at the beginning/at
the end, it will be removed. If there are more linefeed characters,
only the first/the last is removed.
(This is the SGML rule to strip whitespace.)`Strip_one
: If there is a whitespace character at the beginning/at
the end, it will be removed. If there are more whitespace characters,
only the first/the last is removed. Whitespace characters are space,
newline, carriage return, and tab.`Strip_seq
: All whitespace characters at the beginning/at the end are
removed.`Disabled
: Do not strip whitespace.val strip_whitespace : ?force:bool ->
?left:stripping_mode ->
?right:stripping_mode ->
?delete_empty_nodes:bool ->
('a node #extension as 'a) node ->
unit
strip_whitespace ~force ~left ~right ~delete_empty_nodes startnode
:
Modifies the passed tree in-place by the following rules:xml:space="preserve"
region, unless ~force:true
is passed
to the function (default is ~force:false
). Only if whitespace
stripping is allowed, the following rules are carried out.
Note that the detection of regions with preserved whitespace takes
the parent nodes of the passed startnode
into account.~left
, and whitespace at the end of the node
is removed according to ~right
.~left
, and whitespace at the end
of the last data subnode is removed according to ~right
. Furthermore,
these rules are recursively applied to all subelements (but not to
other node types).~delete_empty_nodes
(default true):
If data nodes become empty after removal of whitespace, they are
deleted from the XML tree. ~force:false
~left:`Disabled
~right:`Disabled
val normalize : ('a node #extension as 'a) node ->
unit
startnode
such that
neither empty data nodes nor adjacent data nodes exist. Normalization
works in-place.
The tree parsers always return normalized trees. This function may
still be useful to enforce normalized trees after modifying them.
val validate : ('a node #extension as 'a) node ->
unit
validate startnode
:
Validates the tree denoted by startnode
. In contrast to
startnode # validate()
this function validates recursively.class['a node #extension as 'a]
document :?swarner:Pxp_types.symbolic_warnings -> Pxp_types.collect_warnings -> Pxp_types.rep_encoding ->
object
..end
#install_printer
directive of the toploopval print_node : ('a node #extension as 'a) node ->
unit
val print_doc : ('a node #extension as 'a) document ->
unit
exception Error_event of exn
E_error
eventtype'a node #extension as 'a
solid_xml =[ `Document of 'a document | `Node of 'a node ]
`Node n
, or a closed `Document
val solidify : ?dtd:Pxp_dtd.dtd ->
Pxp_types.config ->
('a node #extension as 'a) spec ->
(unit -> Pxp_types.event option) -> 'a solid_xml
unit->event
function, and
creates a node tree according to config, dtd, spec.
The event stream may be either:
`Entry_document
).
In this case `Document d
is returned.`Entry_element_content
).
In this case `Node n
is returned.`Entry_content
). An attempt will result in an exception.
Document streams contain a DTD. The found DTD is used for the node tree. Content streams, on the contrary, do not contain DTDs. In this case, an empty DTD is created (in well-formedness mode).
The dtd
argument overrides any DTD, no matter whether found
in the stream or freshly created.
If the DTD allows validation, the returned tree is validated.
The data nodes are not normalized unless the arriving data events
are already normalized. To get this effect, filter the stream
with Pxp_event.norm_cdata_filter
before calling solidify.
Ignorable whitespace is not automatically removed. To get this
effect, filter the stream with
Pxp_event.drop_ignorable_whitespace_filter
before calling solidify.
The uniqueness of ID attributes is not checked.
val liquefy : ?omit_end:bool ->
?omit_positions:bool ->
('a node #extension as 'a) solid_xml ->
'b -> Pxp_types.event option
solidify
: The passed node or document is transformed
into an event stream.
omit_end
: If true, the E_end_of_stream
event is omitted at the end.
Useful to concatenate several streams. Default: false.omit_positions
: If true, no E_position
events are generated.
Default:false.