Plasma GitLab Archive
Projects Blog Knowledge

(* $Id: pxp_event.mli 719 2007-05-07 15:04:40Z gerd $
 * ----------------------------------------------------------------------
 * PXP: The polymorphic XML parser for Objective Caml.
 * Copyright by Gerd Stolpmann. See LICENSE for details.
 *)

open Pxp_types
open Pxp_dtd

(**********************************************************************)
(*                   Event streams and lists                          *)
(**********************************************************************)

val to_list : (unit -> event option) -> event list
  (* Fetch all events from the event stream, and return the corresponding 
   * list of events.
   *)

val of_list : event list -> (unit -> event option)
  (* Pull the events from the input list *)

val concat : (unit -> event option) list -> 
             (unit -> event option) 
  (* Pull the events from the streams in turn *)

val iter : (event -> unit) -> (unit -> event option) -> unit
  (* Iterates over the events of the stream and calls the function *)

(* Missing: map, fold, ... *)

val extract : event -> (unit -> event option) -> (unit -> event option)
  (* let next' = extract e next:
   * Extracts a subexpression from the stream [next] prepended by [e].
   * A subexpression consists of either
   * - a single data, comment, PI, or error event
   * - a start tag, either of an element, a super root, or a document,
   *   until the corresponding end tag
   * - a position event followed by a subexpression
   * The returned stream contains all events of the subexpression.
   * When the extracted stream is read, the original stream is read, too.
   *
   * Example:
   * let l = [ E_pinstr; E_start_tag; E_data; E_start_tag; E_end_tag;
   *           E_comment; E_end_tag; E_data ];;
   * let g = of_list l;;
   * g();;
   * let Some e = g();;         (* e = E_start_tag *)
   * let g' = extract e g;;
   * g'();;                     (* returns Some E_start_tag *)
   * ...
   * g'();;                     (* returns Some E_end_tag *)
   * g'();;                     (* returns None, end of subexpression *)
   * g();;                      (* returns Some E_data *)
   * g();;                      (* returns None *)
   *)


(**********************************************************************)
(*                            Filters                                 *)
(**********************************************************************)

type filter = (unit -> event option) -> (unit -> event option)

val norm_cdata_filter : filter
  (* This filter
   *  - removes empty E_char_data events
   *  - concatenates adjacent E_char_data events
   * but does not touch any other parts of the event stream.
   *)

val drop_ignorable_whitespace_filter : filter
  (* This filter 
   *  - checks whether character data between elements in a 
   *    "regexp" or "non-PCDATA mixed" content model consists 
   *    only of whitespace, and
   *  - removes these whitespace characters from the event stream.
   * If the check fails, a WF_Error will be raised.
   *
   * This filter works only if the DTD found in the event stream
   * actually contains element declarations. This is usually enabled
   * by including the `Extend_dtd_fully or `Val_mode_dtd options to 
   * the [entry] passed to the [create_pull_parser] call. Furthermore, 
   * there must be an E_start_doc event.
   *
   * This filter does not perform any other validation checks.
   *)

val pfilter : (event -> bool) -> filter
  (* Filters an event stream by a predicate
   *
   * Example: Remove comments:
   * pfilter (function E_comment _ -> false | _ -> true) g
   *)


(* Missing: ID check *)

(**********************************************************************)
(*                            Printing                                *)
(**********************************************************************)

type dtd_style =
    [ `Ignore
    | `Include
    | `Reference
    ]

val write_events : 
  ?default:string ->        (* Default: none *)
  ?dtd_style:dtd_style ->   (* Default: `Include *)
  ?minimization:[`AllEmpty | `None] ->   (* Default: `None *)
  output_stream -> 
  encoding -> 
  rep_encoding -> 
  (unit -> event option) -> 
    unit
  (* Writes the events to the [output_stream]. The events must be encoded
   * as indicated by the [rep_encoding] argument, but the output is written
   * as specified by the [encoding] argument.
   *
   * The normalized namespace prefixes are declared as needed. Additionally,
   * one can set the default namespace by passing [default], which must be
   * the normalized prefix of the default namespace.
   *
   * For E_doc_start events, the DTD may be written. This is controlled by
   * [dtd_style]:
   * - `Ignore: No DOCTYPE clause is written
   * - `Include: The DOCTYPE clause is written, and the DTD is included
   *   in the internal subset
   * - `Reference: The DOCTYPE clause is written as a reference to an
   *   external DTD
   *
   * Option [~minimization]: How to write out empty elements. [`AllEmpty]
   * means that all empty elements are minimized (using the <name/>
   * form). [`None] does not minimize at all and is the default.
   *)

val display_events : 
  ?dtd_style:dtd_style ->   (* Default: `Include *)
  ?minimization:[`AllEmpty | `None] ->   (* Default: `None *)
  output_stream -> 
  encoding -> 
  rep_encoding -> 
  (unit -> event option) -> 
    unit
  (* Writes the events to the [output_stream]. The events must be encoded
   * as indicated by the [rep_encoding] argument, but the output is written
   * as specified by the [encoding] argument.
   *
   * Namespace prefixes are declared as defined in the namespace scopes.
   * Missing prefixes are invented on the fly. 
   *
   * The way the DTD is printed can be set as in [write_events].
   *)

This web site is published by Informatikbüro Gerd Stolpmann
Powered by Caml