Plasma GitLab Archive
Projects Blog Knowledge

(* $Id: pxp_types.ml,v 1.13 2001/06/27 23:33:53 gerd Exp $
 * ----------------------------------------------------------------------
 * PXP: The polymorphic XML parser for Objective Caml.
 * Copyright by Gerd Stolpmann. See LICENSE for details.
 *)

class private_id = object end;;

type ext_id =
    System of string
  | Public of (string * string)
  | Anonymous
  | Private of private_id


let allocate_private_id () = new private_id;;
  (* private_id is a class because allocate_private_id becomes thread-safe in
   * this case (without needing to lock a global variable).
   * Note that when you compare objects (=, < etc) you actually compare
   * the object IDs. Hence, private_id has the intended semantics.
   *)


type dtd_id =
    External of ext_id
  | Derived of ext_id
  | Internal
;;

type content_model_type =
    Unspecified
  | Empty
  | Any
  | Mixed of mixed_spec list
  | Regexp of regexp_spec

and mixed_spec =
    MPCDATA
  | MChild of string

and regexp_spec =
    Optional of regexp_spec
  | Repeated of regexp_spec
  | Repeated1 of regexp_spec
  | Alt of regexp_spec list
  | Seq of regexp_spec list
  | Child of string
;;


type att_type =
    A_cdata
  | A_id
  | A_idref
  | A_idrefs
  | A_entity
  | A_entities
  | A_nmtoken
  | A_nmtokens
  | A_notation of string list
  | A_enum of string list
;;


type att_default =
    D_required
  | D_implied
  | D_default of string  (* The default value is already expanded *)
  | D_fixed of string    (* The default value is already expanded *)
;;


type att_value =
    Value of string
  | Valuelist of string list
  | Implied_value
;;


class type collect_warnings =
  object 
    method warn : string -> unit
  end
;;


class drop_warnings =
  object 
    method warn (w:string) = ()
  end
;;


type encoding = Netconversion.encoding;;

type rep_encoding =
  (* The subset of 'encoding' that may be used for internal representation
   * of strings.
   *)
  [  `Enc_utf8       (* UTF-8 *)
  |  `Enc_iso88591   (* ISO-8859-1 *)
  ]
;;


exception Validation_error of string

exception WF_error of string

exception Namespace_error of string

exception Error of string

exception Character_not_supported

exception At of (string * exn)

exception Undeclared

exception Method_not_applicable of string

exception Namespace_method_not_applicable of string

let rec string_of_exn x0 =
  match x0 with
      At (s, x) ->
        s ^ string_of_exn x
    | Validation_error s ->
        "ERROR (Validity constraint): "  ^ s
    | WF_error s ->
        "ERROR (Well-formedness constraint): " ^ s
    | Error s ->
	"ERROR: " ^ s
    | Character_not_supported ->
        "RESTRICTION: Character not supported"
    | Netconversion.Malformed_code ->
        "ERROR: Bad character stream"
    | Undeclared ->
        "INFORMATION: Undeclared"
    | Method_not_applicable mname ->
	"INTERNAL ERROR (method `" ^ mname ^ "' not applicable)"
    | Parsing.Parse_error ->
	"SYNTAX ERROR"
    | _ ->
        "Other exception: " ^ Printexc.to_string x0
;;


type output_stream =
  [ `Out_buffer of Buffer.t
  | `Out_channel of out_channel
  | `Out_function of (string -> int -> int -> unit)
  ]
;;


let write os str pos len =
  match os with
      `Out_buffer b -> Buffer.add_substring b str pos len
    | `Out_channel ch -> output ch str pos len
    | `Out_function f -> f str pos len
;;


type pool =
    { mutable pool_start : pool_node;
      mutable pool_end   : pool_node;
      mutable pool_array : pool_node_record array;
      mutable pool_count : int;
      mutable pool_max   : int;
    }
and pool_node =
    No_node
  | Pool_node of pool_node_record
and pool_node_record =
    { mutable previous_pool_node : pool_node;
      mutable next_pool_node : pool_node;
      mutable pool_string : string option;
    }
;;


let make_probabilistic_pool ?(fraction = 0.3) size =
  let make_node _ =
    { previous_pool_node = No_node;
      next_pool_node = No_node;
      pool_string = None;
    }
  in
  let m = truncate (fraction *. float size) in
  if size < 0 || m < 1 || m > size then
    invalid_arg "make_probabilistic_pool";
  { pool_start = No_node;
    pool_end = No_node;
    pool_array = Array.init size make_node;
    pool_count = 0;
    pool_max = m;
  }
;;


let pool_string p s =
  let size = Array.length p.pool_array in
  let h = Hashtbl.hash s mod size in
  let n =  p.pool_array.(h) in
  match n.pool_string with
      None ->
	(* The pool node is free. We occupy the node, and insert it at the
	 * beginning of the node list. 
	 *)
	n.pool_string <- Some s;
	n.previous_pool_node <- No_node;
	n.next_pool_node <- p.pool_start;
	begin match p.pool_start with
	    No_node -> ()
	  | Pool_node start ->
	      start.previous_pool_node <- Pool_node n
	end;
	p.pool_start <- Pool_node n;
	if p.pool_end = No_node then p.pool_end <- Pool_node n;
	p.pool_count <- p.pool_count + 1;
	(* If the node list is longer than pool_max, the last node of the
	 * list is deleted.
	 *)
	if p.pool_count > p.pool_max then begin
	  (* ==> p.pool_count >= 2 *)
	  let last = 
	    match p.pool_end with
		No_node -> assert false
	      | Pool_node x -> x in
	  let last' =
	    match last.previous_pool_node with
		No_node -> assert false
	      | Pool_node x -> x in
	  last'.next_pool_node <- No_node;
	  p.pool_end <- Pool_node last';
	  p.pool_count <- p.pool_count - 1;
	  last.pool_string <- None;
	end;
	s

    | Some s' ->
	if s = s' then begin
	  (* We have found the pool string. To make it more unlikely that n
	   * is removed from the pool, n is moved to the beginning of the
	   * node list.
	   *)
	  begin match n.previous_pool_node with
	      No_node ->
		(* n is already at the beginning *)
		()
	    | Pool_node n_pred ->
		n_pred.next_pool_node <- n.next_pool_node;
		begin match n.next_pool_node with
		    No_node ->
		      (* n is at the end of the list *)
		      p.pool_end <- Pool_node n_pred;
		  | Pool_node n_succ ->
		      n_succ.previous_pool_node <- Pool_node n_pred
		end;
		(* Now n is deleted from the list. Insert n again at the
		 * beginning of the list.
		 *)
		n.previous_pool_node <- No_node;
		n.next_pool_node <- p.pool_start;
		begin match p.pool_start with
		    No_node -> ()
		  | Pool_node start ->
		      start.previous_pool_node <- Pool_node n
		end;
		p.pool_start <- Pool_node n;
	  end;
	  (* Return the found pool string: *)
	  s'             
	end
	else begin
	  (* n contains a different string which happened to have the same
	   * hash index.
	   *)
	  s
	end
;;


(* ======================================================================
 * History:
 *
 * $Log: pxp_types.ml,v $
 * Revision 1.13  2001/06/27 23:33:53  gerd
 * 	Type output_stream is now a polymorphic variant
 *
 * Revision 1.12  2001/06/07 22:49:51  gerd
 * 	New namespace exceptions.
 *
 * Revision 1.11  2001/04/26 23:57:04  gerd
 * 	New exception Method_not_applicable. It is raised if there are
 * classes A and B both conforming to class type C, but A does not implement
 * a method required by the class type. In this case, invoking the method
 * in A raises Method_not_applicable.
 * 	This feature is mainly used in Pxp_document.
 *
 * Revision 1.10  2001/04/22 14:14:41  gerd
 * 	Updated to support private IDs.
 *
 * Revision 1.9  2000/09/17 00:12:19  gerd
 * 	Bugfix in the pool implementation.
 *
 * Revision 1.8  2000/09/09 16:38:47  gerd
 * 	New type 'pool'.
 *
 * Revision 1.7  2000/08/14 22:24:55  gerd
 * 	Moved the module Pxp_encoding to the netstring package under
 * the new name Netconversion.
 *
 * Revision 1.6  2000/07/27 00:41:15  gerd
 * 	new 8 bit codes
 *
 * Revision 1.5  2000/07/16 18:31:09  gerd
 * 	The exception Illegal_character has been dropped.
 *
 * Revision 1.4  2000/07/14 21:25:27  gerd
 * 	Simplified the type 'collect_warnings'.
 *
 * Revision 1.3  2000/07/08 16:23:50  gerd
 * 	Added the exception 'Error'.
 *
 * Revision 1.2  2000/07/04 22:14:05  gerd
 * 	Implemented the changes of rev. 1.2 of pxp_types.mli.
 *
 * Revision 1.1  2000/05/29 23:48:38  gerd
 * 	Changed module names:
 * 		Markup_aux          into Pxp_aux
 * 		Markup_codewriter   into Pxp_codewriter
 * 		Markup_document     into Pxp_document
 * 		Markup_dtd          into Pxp_dtd
 * 		Markup_entity       into Pxp_entity
 * 		Markup_lexer_types  into Pxp_lexer_types
 * 		Markup_reader       into Pxp_reader
 * 		Markup_types        into Pxp_types
 * 		Markup_yacc         into Pxp_yacc
 * See directory "compatibility" for (almost) compatible wrappers emulating
 * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
 *
 * ======================================================================
 * Old logs from markup_types.ml:
 *
 * Revision 1.7  2000/05/29 21:14:57  gerd
 * 	Changed the type 'encoding' into a polymorphic variant.
 *
 * Revision 1.6  2000/05/20 20:31:40  gerd
 * 	Big change: Added support for various encodings of the
 * internal representation.
 *
 * Revision 1.5  2000/05/01 20:43:19  gerd
 * 	New type output_stream; new function 'write'.
 *
 * Revision 1.4  1999/09/01 16:25:35  gerd
 * 	Dropped Illegal_token and Content_not_allowed_here. WF_error can
 * be used instead.
 *
 * Revision 1.3  1999/08/15 02:22:33  gerd
 * 	Added exception Undeclared.
 *
 * Revision 1.2  1999/08/14 22:14:58  gerd
 * 	New class "collect_warnings".
 *
 * Revision 1.1  1999/08/10 00:35:52  gerd
 * 	Initial revision.
 *
 *
 *)

This web site is published by Informatikbüro Gerd Stolpmann
Powered by Caml