Plasma GitLab Archive
Projects Blog Knowledge

(* $Id$
 * ----------------------------------------------------------------------
 * PXP: The polymorphic XML parser for Objective Caml.
 * Copyright by Gerd Stolpmann. See LICENSE for details.
 *)

(* Syntax extension to construct XML trees *)

open Netulex.Ulexing
module Ulexing = Netulex.Ulexing

(**********************************************************************)
(* pa_macro:
 *
 * OCAML_NEW_LOC: If defined, camlp4 locations have type
 *                (Lexing.position * Lexing.position) (O'Caml >= 3.08)
 *                Otherwise, locations have type int*int (O'Caml <= 3.07).
 *)

(**********************************************************************)
(* Lexer bases on ulex *)

type pos = int * int * int
  (* Triple (line, line start, char pos):
   * - line: line number. First line has number 1
   * - line start: Character count where the current line started
   * - char pos: Character count of the position
   *
   * When OCAML_NEW_LOC, the triple is fully used. Otherwise,
   * we always have line=1, line_start=0, and only char_pos contains
   * useful information.
   *)

exception Lex_error of (string * pos)
  (* Message, position *)

type token =
    [ `Langle | `Rangle | `Rangle_empty | `Lbracket | `Rbracket
    | `Equal | `Lparen | `Rparen | `List_concat | `String_concat
    | `Comment | `PI | `Super | `End_ocaml_comment | `Other | `EOF
    | `Data | `Langle_colon
    | `Literal of string | `Name of string | `Anti of string ]

  (* Tokens are always encoded in UTF-8! *)

let regexp int = [ '0'-'9' ]+ | "0" ['X' 'x'] ['0'-'9' 'a'-'f' 'A'-'F']+ ;;

let regexp int_entity = "&" "#" int ";" ;;

let regexp name = (xml_letter | '_' | ':' | int_entity) 
                  (xml_letter | xml_digit | '.' | ':' | '-' | '_' | 
		   xml_combining_char | xml_extender | int_entity) *
;;

let rec scan line line_start =
  let pos1 lexbuf = 
    (!line, !line_start, lexeme_start lexbuf) in
  let pos2 lexbuf = 
    (!line, !line_start, lexeme_end lexbuf) in
  lexer
    "<"    -> `Langle, (pos1 lexbuf), (pos2 lexbuf)
  | "<:"   -> `Langle_colon, (pos1 lexbuf), (pos2 lexbuf)
  | ">"    -> `Rangle, (pos1 lexbuf), (pos2 lexbuf)
  | "/>"   -> `Rangle_empty, (pos1 lexbuf), (pos2 lexbuf)
  | "["    -> `Lbracket, (pos1 lexbuf), (pos2 lexbuf)
  | "]"    -> `Rbracket, (pos1 lexbuf), (pos2 lexbuf)
  | "="    -> `Equal, (pos1 lexbuf), (pos2 lexbuf)
  | "("    -> `Lparen, (pos1 lexbuf), (pos2 lexbuf)
  | ")"    -> `Rparen, (pos1 lexbuf), (pos2 lexbuf)
  | "@"    -> `List_concat, (pos1 lexbuf), (pos2 lexbuf)
  | "^"    -> `String_concat, (pos1 lexbuf), (pos2 lexbuf)
  | "<!>"  -> `Comment, (pos1 lexbuf), (pos2 lexbuf)
  | "<?>"  -> `PI, (pos1 lexbuf), (pos2 lexbuf)
  | "<^>"  -> `Super, (pos1 lexbuf), (pos2 lexbuf)
  | "<*>"  -> `Data, (pos1 lexbuf), (pos2 lexbuf)
  | "(:"   -> let p1 = pos1 lexbuf in
              let text = 
		String.concat "" (scan_antiquot p1 line line_start lexbuf) in
	      let p2 = pos2 lexbuf in
	      (`Anti text, p1, p2)
  | "(*"   -> let p1 = pos1 lexbuf in
              skip_comment p1 line line_start lexbuf;
              scan line line_start lexbuf
  | "*)"   -> `End_ocaml_comment, (pos1 lexbuf), (pos2 lexbuf)
  | '"' [^ '"']* '"' -> 
              let n = lexeme_length lexbuf in
              let s = utf8_sub_lexeme lexbuf 1 (n-2) in
              `Literal s, (pos1 lexbuf), (pos2 lexbuf)
  | '"'      -> raise(Lex_error("Unterminated string literal",
				(pos1 lexbuf)))
  | name     -> `Name (utf8_lexeme lexbuf), (pos1 lexbuf), (pos2 lexbuf)
  | "\r\n" 
  | "\r"
  | "\n"     -> IFDEF OCAML_NEW_LOC THEN 
                  incr line; 
                  line_start := lexeme_end lexbuf
		ELSE
		  ()		  
		END;
		scan line line_start lexbuf
  | [ ' ' '\t' ]+ -> scan line line_start lexbuf
  | _        -> `Other, (pos1 lexbuf), (pos2 lexbuf) (* May occur in comments *)
  | eof      -> `EOF, (pos1 lexbuf), (pos2 lexbuf)

and scan_antiquot start_pos line line_start = 
  lexer
    ":)"     -> []
  | [^ ':' '\r' '\n' ]+ -> 
                let s = utf8_lexeme lexbuf in
                s :: scan_antiquot start_pos line line_start lexbuf
  | ":"      -> ":" :: scan_antiquot start_pos line line_start lexbuf
  | "\r\n"
  | "\r"
  | "\n"     -> IFDEF OCAML_NEW_LOC THEN
                  incr line; 
                  line_start := lexeme_end lexbuf
                ELSE
                  ()
		END;
		let s = utf8_lexeme lexbuf in
		s :: scan_antiquot start_pos line line_start lexbuf
  | eof      -> raise(Lex_error("Unterminated antiquotation",
				start_pos))

and skip_comment start_pos line line_start lexbuf =
  match scan line line_start lexbuf with
      (`EOF,_ ,_) ->  raise(Lex_error("Unterminated comment", start_pos))
    | (`End_ocaml_comment,_,_) -> ()
    | _ -> skip_comment start_pos line line_start lexbuf
;;


type charset_decl =
    { source_enc : Netconversion.encoding;
      rep_enc : Netconversion.encoding;
    }
;;


let default_decl =
  { source_enc = `Enc_iso88591;
    rep_enc = `Enc_iso88591
  } ;;

let current_decl = ref default_decl ;;

let reset_decl() =
  current_decl := default_decl ;;


let current_file = ref "" ;;

let check_file() =
  if !Pcaml.input_file <> !current_file then (
    reset_decl();
    current_file := !Pcaml.input_file
  )
;;


let unichar = Netconversion.ustring_of_uchar `Enc_utf8;;


let scan_entities line line_start offset =
  lexer
    "&#" int ";" ->
      let l = lexeme_length lexbuf in
      let s = utf8_sub_lexeme lexbuf 2 (l-3) in
      unichar (int_of_string s)
  | "&lt;" ->
      "<"
  | "&gt;" ->
      ">"
  | "&apos;" ->
      "'"
  | "&amp;" ->
      "&"
  | "&quot;" ->
      "\""
  | "&" ->
      raise(Lex_error("'&' must be written as '&amp;'",
		      (!line,
		       !line_start,
		       lexeme_start lexbuf + offset)))
  | "\r\n"
  | "\r"
  | "\n"     -> IFDEF OCAML_NEW_LOC THEN
                  incr line; 
                  line_start := lexeme_end lexbuf + offset
                ELSE
		  ()
		END;
		utf8_lexeme lexbuf
  | _ ->
      utf8_lexeme lexbuf
  | eof ->
      raise End_of_file
;;


let convert_entities (line,line_start,pos) offset s =
  let lexbuf = from_ulb_lexbuf (Netulex.ULB.from_string `Enc_utf8 s) in
  let rline = ref line in
  let rline_start = ref line_start in
  let b = Buffer.create 200 in
  try
    while true do
      Buffer.add_string b (scan_entities rline rline_start (pos+offset) lexbuf)
    done;
    assert false;
  with
      End_of_file ->
	Buffer.contents b
;;


let scan_string s : (token * pos * pos) Stream.t =
  let src_enc = !current_decl.source_enc in
  let line = ref 1 in
  let line_start = ref 0 in
  let lexbuf = from_ulb_lexbuf (Netulex.ULB.from_string src_enc s) in
  Stream.from
    (fun count ->
       try
	 ( match scan line line_start lexbuf with
	       (`Name s, p1, p2) ->
		 Some (`Name (convert_entities p1 0 s), p1, p2)
	     | (`Literal s, p1, p2) ->
		 Some (`Literal (convert_entities p1 1 s), p1, p2)
	     | other ->
		 Some other
	 )
       with
	   Error ->
	     raise(Lex_error("Lexical error",
			     (!line,
			      !line_start,
			      lexeme_start lexbuf)))
    )
;;


(**********************************************************************)
(* Stream Parser *)

exception Syntax_error of pos * pos;;


type ast_node0 =
    [ `Element of (ast_string * ast_attr list * ast_node_list)
    | `Data of ast_string
    | `Comment of ast_string
    | `PI of (ast_string * ast_string)
    | `Super of ast_node_list
    | `Meta of (string * ast_attr list * ast_node)
    | `Ident of string
    | `Anti of string
       (* The following are the same as ast_string0. They are interpreted
	* as data node:
	*)
    | `Literal of string
    | `Concat of ast_node list
    ]
and ast_node = (ast_node0 * pos * pos)

and ast_node_list0 =
    [ `Nodes of ast_node list
    | `Concat of ast_node_list list
    | `Ident of string
    | `Anti of string
    ]
and ast_node_list = (ast_node_list0 * pos * pos)

and ast_string0 =
    [ `Literal of string
    | `Concat of ast_string list
    | `Ident of string
    | `Anti of string
    ]
and ast_string = (ast_string0 * pos * pos)

and ast_attr0 =
    [ `Attr of ast_string * ast_string
    | `Anti of string
    ]
and ast_attr = (ast_attr0 * pos * pos)

and ast_any_node =
    [ `Node of ast_node
    | `Nodelist of ast_node_list
    ]
;;


(* Note that the syntax allows that strings are interpreted as data
 * nodes, e.g.
 *
 * <a>[ "x" ]: Element "a" with data sub node "x"
 *
 * ==> Every string can also be interpreted as data node.
 *
 * At some locations we do not allow that because _only_ strings
 * are reasonable. Then string_restr=true ("string restriction").
 * E.g. 
 *
 * <a x="x"/>  is ok, but
 * <a x=<b/>/> is nonsense
 *
 * Not every expression composed of strings and nodes is sound. E.g.
 * "x" ^ "y" is ok (string concatenation), but "x" ^ <a/> is not.
 * The latter is accepted by the grammar, but must be rejected by
 * an additional type check.
 *)


let ensure_at_end s =
  match Stream.peek s with
      Some(`EOF, _, _) -> ()
    | _ -> raise Stream.Failure
;;


let last_pos s =
  match Stream.peek s with
      Some(_, _, pos) -> pos
    | _ -> assert false
;;
  

let check_meta name atts =
  match name with
      "scope" ->
	()
    | "autoscope" ->
	if atts <> [] then raise Stream.Failure;
    | "emptyscope" ->
	if atts <> [] then raise Stream.Failure;
    | _ ->
	raise Stream.Failure
;;


let rec parse_any_expr (s : (token * pos * pos) Stream.t) : ast_any_node =
  match Stream.peek s with
      Some(`Lbracket, _, _) ->
	let v = `Nodelist(parse_nodelist_expr s) in
	ensure_at_end s;
	v
    | Some _ ->
	let v = `Node(parse_expr false s) in
	ensure_at_end s;
	v
    | None ->
	raise Stream.Failure

and parse_expr string_restr : (token * pos * pos) Stream.t -> ast_node =
  parser
      [< (nl1, p1, p2) as nl = parse_factor string_restr;
	 c1 = parse_cont string_restr;
      >] ->
	match c1 with
	    None ->
	      nl
	  | Some(`Concat l, p1', p2') ->
	      (`Concat(nl :: l), p1, p2')
	  | Some(other, p1', p2') ->
	      (`Concat([nl; (other, p1', p2')]), p1, p2')

and parse_cont string_restr : (token * pos * pos) Stream.t -> ast_node option =
  parser
      [< '(`String_concat, p1, p2);
	 e = parse_expr string_restr;
      >] ->
	Some e
    | [< >] ->
	None

and parse_factor string_restr : (token * pos * pos) Stream.t -> ast_node =
  parser
      [< '(`Langle, p1, p2) when not string_restr; 
	 name = parse_element_name;
	 attrs, flag, p' = parse_attrs;
	 (subnodes0, p1', p2') as subnodes = 
	   if flag then
	     (fun _ -> `Nodes [], p', p')
	   else
	     parse_nodelist_expr;
      >] ->
	( `Element(name, attrs, subnodes), p1, p2'  )
    | [< '(`Langle_colon, p1, p2) when not string_restr; 
	 '(`Name name, _, _);
	 attrs, flag, p' = parse_attrs;
	 (subnode0, p1', p2') as subnode = parse_expr string_restr;
      >] ->
	( check_meta name attrs;
	  `Meta(name, attrs, subnode), p1, p2'  )
    | [< '(`Comment, p1, p2) when not string_restr;
	 (contents0, p1', p2') as contents = parse_string_expr
      >] ->
	( `Comment contents, p1, p2' )
    | [< '(`PI, p1, p2) when not string_restr;
	 (contents0, p1', p2') as contents = parse_string_expr;
	 (contents0', p1'', p2'') as contents' = parse_string_expr
      >] ->
	( `PI(contents,contents'), p1, p2'' )
    | [< '(`Super, p1, p2) when not string_restr;
	 (subnodes0, p1', p2') as subnodes = parse_nodelist_expr;
      >] -> 
	( `Super subnodes, p1, p2' )
    | [< '(`Data, p1, p2) when not string_restr;
         (contents0, p1', p2') as contents = parse_string_expr
      >] ->
         ( `Data contents, p1, p2' )
    | [< '(`Lparen, p1, p2);
	 (inner, p1', p2') = parse_expr string_restr;
	 '(`Rparen, p1'', p2'') 
      >] ->
	(inner, p1, p2'')
    | [< '(`Anti text, p1, p2) >] ->
	( `Anti text, p1, p2 )
    | [< '(`Literal s, p1, p2) >] ->
	(`Literal s, p1, p2)
    | [< '(`Name n, p1, p2) >] ->
	( `Ident n, p1, p2)

and parse_element_name : (token * pos * pos) Stream.t -> ast_string =
  parser
      [< '(`Name n, p1, p2) >] ->
	( `Literal n, p1, p2 )
    | [< '(`Anti text, p1, p2) >] ->
	( `Anti text, p1, p2 )
    | [< '(`Lparen, p1, p2);
	 s = parse_string_expr;
	 '(`Rparen, p1', p2') 
      >] ->
	let (str, _, _) = s in
	(str, p1, p2')

and parse_attrs : (token * pos * pos) Stream.t -> ast_attr list * bool * pos =
  parser
      [< '(`Name n, p1, p2);
	 '(`Equal, p1', p2');
	 (s, p1'', p2'') = parse_string_expr;
	 (rest, flag, p) = parse_attrs
      >] ->
	let name_str = (`Literal n, p1, p2) in
	let val_str  = (s, p1'', p2'') in
	( `Attr(name_str, val_str), p1, p2'' ) :: rest, flag, p
    | [< '(`Lparen, p1, p2);
	 (s1, p1', p2') = parse_string_expr;
	 '(`Rparen, p1'', p2'');
	 '(`Equal, _, _);
	 (s2, p1''', p2''') = parse_string_expr;
	 (rest, flag, p) = parse_attrs
      >] ->
	let name_str = (s1, p1', p2') in
	let val_str  = (s2, p1''', p2''') in
	( `Attr(name_str, val_str), p1, p2''' ) :: rest, flag, p
    | [< '(`Anti text, p1, p2);
	 (rest, flag, p) = parse_attrs
      >] ->
	( `Anti text, p1, p2 ) :: rest, flag, p
    | [< '(`Rangle, p1, p2) >] ->
	[], false, p2
    | [< '(`Rangle_empty, p1, p2) >] ->
	[], true, p2

and parse_nodelist_expr : (token * pos * pos) Stream.t -> ast_node_list =
  parser
      [< (nl1, p1, p2) as nl = parse_nodelist_factor;
	 c1 = parse_nodelist_cont;
      >] ->
	match c1 with
	    None ->
	      nl
	  | Some(`Concat l, p1', p2') ->
	      (`Concat(nl :: l), p1, p2')
	  | Some(other, p1', p2') ->
	      (`Concat([nl; (other, p1', p2')]), p1, p2')

and parse_nodelist_cont : (token * pos * pos) Stream.t -> ast_node_list option =
  parser
      [< '(`List_concat, p1, p2);
	 e = parse_nodelist_expr;
      >] ->
	Some e
    | [< >] ->
	None

and parse_nodelist_factor : (token * pos * pos) Stream.t -> ast_node_list =
  parser
      [< '(`Lbracket, p1, p2);
	 (l, p1', p2') = parse_bracket_expr
      >] ->
	( `Nodes l, p1, p2' )
    | [< '(`Lparen, p1, p2);
	 (e, p1', p2') = parse_nodelist_expr;
	 '(`Rparen, p1'', p2'')
      >] ->
	( e, p1, p2'' )
    | [< '(`Anti text, p1, p2) >] ->
	( `Anti text, p1, p2 )
    | [< '(`Name n, p1, p2) >] ->
	( `Ident n, p1, p2)
    | [< (n, p1, p2) = parse_expr false >] ->
	`Nodes [n, p1, p2], p1, p2

and parse_bracket_expr : (token * pos * pos) Stream.t -> ast_node list * pos * pos =
  parser
      [< '(`Rbracket, p1, p2) >] ->
	( [], p1, p2 )
    | [< (n, p1, p2) = parse_expr false;
	 (b, p1', p2') = parse_bracket_expr >] ->
	((n, p1, p2) :: b, p1, p2')

and parse_string_expr (s : (token * pos * pos) Stream.t) : ast_string =
  let rec coerce (e : ast_node) : ast_string =
    match e with
	(`Concat l, p1, p2) ->
	  `Concat (List.map coerce l), p1, p2
      | `Anti _, _, _ as e' -> e'
      | `Ident _, _, _ as e' -> e'
      | `Literal _, _, _ as e' -> e'
      | _ -> assert false
  in
  coerce(parse_expr true s)
;;


let rec parse_charset_decl0 : (token * pos * pos) Stream.t -> charset_decl =
  parser
      [< '(`Name n, p1, p2);
	 '(`Equal, _, _);
	 '(`Literal s, p1', p2');
	 cur = parse_charset_decl0 >] ->
	
	( match n with
	      "source" ->
		let e = Netconversion.encoding_of_string s in
		{ cur with source_enc = e }
	    | "representation" ->
		let e = Netconversion.encoding_of_string s in
		{ cur with rep_enc = e }
	    | _ ->
		raise Stream.Failure
	)

    | [< >] ->
	(* default: *)
	!current_decl
;;


let parse_charset_decl s =
  let v = parse_charset_decl0 s in
  ensure_at_end s;
  v
;;


let call_parser f s =
  try
    f s 
  with
      Stream.Failure
    | Stream.Error _ ->
	( match Stream.peek s with
	      Some(_, p1, p2) ->
		raise(Syntax_error(p1,p2))
	    | None ->
		assert false  (* must not happen *)
	)
;;


(**********************************************************************)
(* Type checker *)

(* Typing checking also transforms the ast such that 
 * `Literal and `Concat are never used in the context of a
 * node expression.
 *)

exception Typing_error of (string * pos * pos);;

let rec check_any_expr : ast_any_node -> ast_any_node =
  function
      `Node n -> `Node(check_node_expr n)
    | `Nodelist l -> `Nodelist(check_nodelist_expr l)

and check_nodelist_expr : ast_node_list -> ast_node_list =
  function
      (`Nodes l,p1,p2) -> (`Nodes(List.map check_node_expr l),p1,p2)
    | (`Concat l,p1,p2) -> (`Concat(List.map check_nodelist_expr l),p1,p2)
    | ((`Ident _|`Anti _),_,_) as nl -> nl

and check_node_expr : ast_node -> ast_node =
  function
      ((`Data _ | `Comment _ | `PI _ | `Ident _ | `Anti _),_,_) as n -> n
    | (`Element(name,attrs,children),p1,p2) ->
	(`Element(name,attrs,check_nodelist_expr children),p1,p2)
    | (`Super children,p1,p2) ->
	(`Super(check_nodelist_expr children),p1,p2)
    | (`Literal s,p1,p2) ->
	(`Data(`Literal s,p1,p2),p1,p2)
    | (`Concat l,p1,p2) ->
	(`Data(`Concat(List.map check_node_expr_as_string l),p1,p2),p1,p2)
    | (`Meta(n,a,child),p1,p2) ->
	(`Meta(n,a,check_node_expr child),p1,p2)

and check_node_expr_as_string : ast_node -> ast_string =
  function
      ((`Data _ | `Comment _ | `PI _ | `Element _ | `Super _),p1,p2) ->
	raise(Typing_error("Nodes cannot be used as strings", p1, p2))
    | ((`Ident _| `Anti _ | `Literal _),p1,p2) as n ->
	n
    | (`Concat l,p1,p2) ->
	(`Concat(List.map check_node_expr_as_string l),p1,p2)
    | (`Meta(n,a,child),p1,p2) ->
	raise(Typing_error("Meta node not allowed in string context", p1, p2))
;;

(**********************************************************************)
(* Code generator for tree expressions *)

let mkloc ((_p1_line,_p1_line_start,p1_pos)) 
          ((_p2_line,_p2_line_start,p2_pos)) =
  (* Differs in O'Caml 3.07 and 3.08 *)
  IFDEF OCAML_NEW_LOC THEN
    let l1 = { Lexing.pos_fname = "";
	       Lexing.pos_lnum = _p1_line;
	       Lexing.pos_bol = _p1_line_start;
	       Lexing.pos_cnum = p1_pos } in
    let l2 = { Lexing.pos_fname = "";
	       Lexing.pos_lnum = _p2_line;
	       Lexing.pos_bol = _p2_line_start;
	       Lexing.pos_cnum = p2_pos } in
    (l1,l2)
  ELSE
    (p1_pos,p2_pos)
  END
;;


let raise_at (p1:pos) (p2:pos) exn =
(*
  let (p1_l,p1_s,p1_p) = p1 in 
  Printf.eprintf "Raise_at %d %d %d\n" p1_l p1_s p1_p;
  let (p2_l,p2_s,p2_p) = p2 in 
  Printf.eprintf "Raise_at %d %d %d\n" p2_l p2_s p2_p;
*)
  Stdpp.raise_with_loc (mkloc p1 p2) exn
;;


let catch_errors f =
  try f()
  with
      Lex_error(msg, p) ->
	let (l,s,c) = p in
	raise_at p (l,s,c+1) (Failure ("pxp-pp: Lexical error: " ^ msg))
    | Syntax_error(p1,p2) ->
	raise_at p1 p2 (Failure "pxp-pp: Syntax error")
    | Typing_error(msg,p1,p2) ->
	raise_at p1 p2 (Failure("pxp-pp: Typing error: " ^ msg))
;;


let generate_list loc el =
  List.fold_right (fun x l -> <:expr< [$x$ :: $l$] >>) el <:expr< [] >>
;;


let generate_ann_list loc el =
  List.fold_right (fun (ann,x) l -> 
		     match ann with
			 `Single -> <:expr< [$x$ :: $l$] >>
		       | `List   -> <:expr< $x$ @ $l$ >>) 
                  el 
                  <:expr< [] >>
;;


let generate_ident loc name =
  (* TODO: "." separation *)
  (* TODO: Convert back to latin 1 *)
  <:expr< $lid:name$ >>
;;


let expand_tree_expr (valcheck:bool) (s:string) : MLast.expr =
  (* valcheck: Whether to do DTD validation *)

  check_file();

  let valcheck_expr =
    let loc = mkloc (0,0,0) (0,0,0) in
    if valcheck then <:expr< True >> else <:expr< False >> in

  let to_rep s =
    Netconversion.convert 
      ~in_enc:`Enc_utf8 ~out_enc:(!current_decl.rep_enc) s in

  let to_src s =
    Netconversion.convert 
      ~in_enc:`Enc_utf8 ~out_enc:(!current_decl.source_enc) s in

  let rec generate_for_any_expr : ast_any_node -> MLast.expr =
    function
	`Node n -> generate_for_node_expr false n
      | `Nodelist nl -> generate_for_nodelist_expr false nl

  and generate_for_node_expr nsmode : ast_node -> MLast.expr = (
    (* nsmode: Whether there is a variable [scope] in the environment *)
    function
	(`Element(name,attrs,subnodes),p1,p2) ->
	  let loc = mkloc p1 p2 in
	  let name_expr = generate_for_string_expr name in
	  let attrs_expr_l = List.map generate_for_attr_expr attrs in
	  let attrs_expr = generate_ann_list loc attrs_expr_l in
	  let subnodes_expr = generate_for_nodelist_expr nsmode subnodes in
	  let el_only_expr =
	    <:expr< Pxp_document.create_element_node
	              ~valcheck:$valcheck_expr$
	              spec dtd $name_expr$ $attrs_expr$ >> in
	  let do_validation =
	    if valcheck then
	      <:expr< node#validate_contents() >>
	    else
	      <:expr< () >> in
	  let do_set_scope =
	    if nsmode then
	      <:expr< node#set_namespace_scope scope >>
	    else
	      <:expr< () >> in
	  <:expr< let node = $el_only_expr$ in
                  do { node # set_nodes $subnodes_expr$;
		       $do_set_scope$;
		       $do_validation$;
                       node } >>
      | (`Data text,p1,p2) ->
	  let text_expr = generate_for_string_expr text in
	  let loc = mkloc p1 p2 in
	  <:expr< Pxp_document.create_data_node spec dtd $text_expr$ >>
      | (`Comment text,p1,p2) ->
	  let text_expr = generate_for_string_expr text in
	  let loc = mkloc p1 p2 in
	  <:expr< Pxp_document.create_comment_node spec dtd $text_expr$ >>
      | (`PI(target,value),p1,p2) ->
	  let target_expr = generate_for_string_expr target in
	  let value_expr = generate_for_string_expr value in
	  let loc = mkloc p1 p2 in
	  <:expr< Pxp_document.create_pinstr_node spec dtd
	          (new Pxp_dtd.proc_instruction 
                         $target_expr$ $value_expr$ dtd#encoding)
	        >>
      | (`Super subnodes,p1,p2) ->
	  let subnodes_expr = generate_for_nodelist_expr nsmode subnodes in
	  let loc = mkloc p1 p2 in
	  <:expr< let node = Pxp_document.create_super_root_node spec dtd in
                  do { node # set_nodes $subnodes_expr$;
	               node } >>
      | (`Meta(name,attrs,subnode),p1,p2) ->
	  let loc = mkloc p1 p2 in
	  ( match name with
		"scope"      -> generate_scope loc attrs subnode
	      | "autoscope"  -> generate_autoscope loc subnode
	      | "emptyscope" -> generate_emptyscope loc subnode
	      | _            -> assert false (* already caught above *)
	  )
      | (`Ident name,p1,p2) ->
	  let loc = mkloc p1 p2 in
	  generate_ident loc (to_src name)
      | (`Anti text,p1,p2) ->
	  Grammar.Entry.parse Pcaml.expr_eoi (Stream.of_string (to_src text))
      | _ ->
	  (* `Literal and `Concat are impossible after type check *)
	  assert false )

  and generate_for_nodelist_expr nsmode : ast_node_list -> MLast.expr = (
    function
	(`Nodes l, p1, p2) ->
	  let loc = mkloc p1 p2 in
	  let l' = List.map (generate_for_node_expr nsmode) l in
	  generate_list loc l'
      | (`Concat l, p1, p2) ->
	  let loc = mkloc p1 p2 in
	  let l' = List.map (generate_for_nodelist_expr nsmode) l in
	  let l'' = generate_list loc l' in
	  <:expr< List.concat $l''$ >>
      | (`Ident name, p1, p2) ->
	  let loc = mkloc p1 p2 in
	  generate_ident loc (to_src name)
      | (`Anti text, p1, p2) ->
	  Grammar.Entry.parse Pcaml.expr_eoi (Stream.of_string (to_src text))
  )

  and generate_for_attr_expr : ast_attr -> [`Single|`List] * MLast.expr = (
    function
	(`Attr(n,v), p1, p2) ->
	  let loc = mkloc p1 p2 in
	  let n_expr = generate_for_string_expr n in
	  let v_expr = generate_for_string_expr v in
	  `Single, <:expr< ($n_expr$, $v_expr$) >>
      | (`Anti text, p1, p2) ->
	  `List, 
	  Grammar.Entry.parse Pcaml.expr_eoi (Stream.of_string (to_src text))
  )

  and generate_scope loc attrs subnode : MLast.expr = (
    let subexpr = generate_for_node_expr true subnode in
    if attrs = [] then
      subexpr
    else
      let decl_expr_l = List.map generate_for_attr_expr attrs in
      let decl_expr = generate_ann_list loc decl_expr_l in
      <:expr< let scope = 
                new Pxp_dtd.namespace_scope_impl 
		  (dtd # namespace_manager)
		  (Some scope)
		  $decl_expr$ in $subexpr$>>
  )

  and generate_autoscope loc subnode : MLast.expr = (
    let subexpr = generate_for_node_expr true subnode in
    <:expr< let scope =
            ( let mng = dtd # namespace_manager in
	      new Pxp_dtd.namespace_scope_impl 
		mng None mng#as_declaration ) in $subexpr$ >>
  )

  and generate_emptyscope loc subnode : MLast.expr = (
    let subexpr = generate_for_node_expr true subnode in
    <:expr< let scope =
            ( let mng = dtd # namespace_manager in
	      new Pxp_dtd.namespace_scope_impl 
		mng None [] ) in $subexpr$ >>
  )

  and generate_for_string_expr : ast_string -> MLast.expr = (
    function
	(`Literal s, p1, p2) ->
	  let loc = mkloc p1 p2 in
	  let s' = to_rep s in
	  <:expr< $str:s'$ >>
      | (`Concat l, p1, p2) ->
	  let loc = mkloc p1 p2 in
	  let l' = List.map generate_for_string_expr l in
	  let l'' = generate_list loc l' in
	  <:expr< String.concat "" $l''$ >>
      | (`Ident name, p1, p2) ->
	  let loc = mkloc p1 p2 in
	  generate_ident loc (to_src name)
      | (`Anti text, p1, p2) ->
	  Grammar.Entry.parse Pcaml.expr_eoi (Stream.of_string (to_src text))
  )

  in

  catch_errors
    (fun () ->
       let stream = scan_string s in
       let ast = call_parser parse_any_expr stream in
       let ast' = check_any_expr ast in
       let ocaml_expr = generate_for_any_expr ast' in
       let loc = mkloc (1,0,0) (last_pos stream) in
       <:expr< $anti:ocaml_expr$ >>
    )
;;


(**********************************************************************)
(* Code generator for event streams *)


type ann = [`Single|`Tree|`Forest];;

let generate_event_generator
      (generate_tree : (ann * MLast.expr) list -> MLast.expr)
      (generate_forest : (ann * MLast.expr) list -> MLast.expr)
      (s:string) 
      : MLast.expr =
  (* Generates code to generate events. The input arguments
   * [generate_tree] and [generate_forest] process an intermediate
   * representation, the so-called annotated expression lists
   * (type (ann * MLast.expr) list), and return the final code.
   *
   * Kinds of annotations:
   * - `Single: The expression is a single event, i.e. an O'Caml value
   *   of type [event].
   * - `Tree: The expression represents a list of events corresponding
   *   to a node tree. It is left
   *   open how such lists are represented. The expression is either
   *   an O'Caml identifier or a subexpression from an antiquotation.
   * - `Forest: The expression represents a list of events corresponding
   *   to a list of node trees.
   *
   * The argument [generate_tree] is a function that generates the
   * final code for an annotated list of expressions. It can be expected
   * that the input list for [generate_tree] represents a node tree.
   *
   * The argument [generate_forest] does the same for an annotated
   * list of expressions that represents a list of node trees.
   *)

  let to_rep s =
    Netconversion.convert 
      ~in_enc:`Enc_utf8 ~out_enc:(!current_decl.rep_enc) s in

  let to_src s =
    Netconversion.convert 
      ~in_enc:`Enc_utf8 ~out_enc:(!current_decl.source_enc) s in

  let rec generate_for_any_expr loc : ast_any_node -> MLast.expr =
    function
	`Node n -> 
	  let e = generate_tree (generate_for_node_expr false n) in
	  <:expr< let _eid = Pxp_dtd.Entity.create_entity_id() in $e$ >>
      | `Nodelist nl -> 
	  let e = generate_forest (generate_for_nodelist_expr false nl) in
	  <:expr< let _eid = Pxp_dtd.Entity.create_entity_id() in $e$ >>

  and generate_for_node_expr nsmode : ast_node -> (ann * MLast.expr) list = (
    (* nsmode: Whether there is a variable [scope] in the environment *)
    function
	(`Element(name,attrs,subnodes),p1,p2) ->
	  let loc = mkloc p1 p2 in
	  let name_expr = generate_for_string_expr name in
	  let attrs_expr_l = List.map generate_for_attr_expr attrs in
	  let attrs_expr = generate_ann_list loc attrs_expr_l in
	  let subnodes_expr = generate_for_nodelist_expr nsmode subnodes in
	  let scope_opt_expr =
	    if nsmode then <:expr< Some scope >> else <:expr< None >> in

	  let start_tag =
	    <:expr< Pxp_types.E_start_tag($name_expr$,
					  $attrs_expr$,
					  $scope_opt_expr$,
					  _eid) >> in
	  let end_tag =
	    <:expr< Pxp_types.E_end_tag($name_expr$,_eid) >> in

	  [`Single, start_tag] @ subnodes_expr @ [`Single, end_tag]
      | (`Data text,p1,p2) ->
	  let text_expr = generate_for_string_expr text in
	  let loc = mkloc p1 p2 in
	  [ `Single, <:expr< Pxp_types.E_char_data($text_expr$) >> ]
      | (`Comment text,p1,p2) ->
	  let text_expr = generate_for_string_expr text in
	  let loc = mkloc p1 p2 in
	  [ `Single, <:expr< Pxp_types.E_comment($text_expr$) >> ]
      | (`PI(target,value),p1,p2) ->
	  let target_expr = generate_for_string_expr target in
	  let value_expr = generate_for_string_expr value in
	  let loc = mkloc p1 p2 in
	  [ `Single, <:expr< Pxp_types.E_pinstr($target_expr$,$value_expr$,_eid) >> ]
      | (`Super subnodes,p1,p2) ->
	  let subnodes_expr = generate_for_nodelist_expr nsmode subnodes in
	  let loc = mkloc p1 p2 in
	  ( [ `Single, <:expr< Pxp_types.E_start_super >> ] @
	    subnodes_expr @
	    [ `Single, <:expr< Pxp_types.E_end_super >> ] )
      | (`Meta(name,attrs,subnode),p1,p2) ->
	  let loc = mkloc p1 p2 in
	  ( match name with
		"scope"      -> generate_scope loc attrs subnode
	      | "autoscope"  -> generate_autoscope loc subnode
	      | "emptyscope" -> generate_emptyscope loc subnode
	      | _            -> assert false (* already caught above *)
	  )
      | (`Ident name,p1,p2) ->
	  let loc = mkloc p1 p2 in
	  [ `Tree, (generate_ident loc (to_src name)) ]
      | (`Anti text,p1,p2) ->
	  let expr = 
	    Grammar.Entry.parse Pcaml.expr_eoi (Stream.of_string (to_src text))
	  in
	  [ `Tree, expr ]
      | _ ->
	  (* `Literal and `Concat are impossible after type check *)
	  assert false )

  and generate_for_nodelist_expr nsmode : 
                                 ast_node_list -> (ann * MLast.expr) list = (
    function
	(`Nodes l, p1, p2) ->
	  (* let loc = mkloc p1 p2 in *)
	  let l' = List.map (generate_for_node_expr nsmode) l in
	  List.flatten l'
      | (`Concat l, p1, p2) ->
	  (* let loc = mkloc p1 p2 in *)
	  let l' = List.map (generate_for_nodelist_expr nsmode) l in
	  List.flatten l'
      | (`Ident name, p1, p2) ->
	  let loc = mkloc p1 p2 in
	  [ `Forest, (generate_ident loc (to_src name)) ]
      | (`Anti text, p1, p2) ->
	  let expr = 
	    Grammar.Entry.parse Pcaml.expr_eoi (Stream.of_string (to_src text))
	  in
	  [ `Forest, expr ]
  )

  and generate_for_attr_expr : ast_attr -> [`Single|`List] * MLast.expr = (
    function
	(`Attr(n,v), p1, p2) ->
	  let loc = mkloc p1 p2 in
	  let n_expr = generate_for_string_expr n in
	  let v_expr = generate_for_string_expr v in
	  `Single, <:expr< ($n_expr$, $v_expr$) >>
      | (`Anti text, p1, p2) ->
	  `List, 
	  Grammar.Entry.parse Pcaml.expr_eoi (Stream.of_string (to_src text))
  )

  and generate_scope loc attrs subnode : (ann * MLast.expr) list = (
    let subexpr = generate_for_node_expr true subnode in
    if attrs = [] then
      subexpr
    else
      let decl_expr_l = List.map generate_for_attr_expr attrs in
      let decl_expr = generate_ann_list loc decl_expr_l in
      let old_scope_expr = <:expr< Some scope >> in
      let scope_expr =
	<:expr< new Pxp_dtd.namespace_scope_impl 
		  (dtd # namespace_manager)
		  $old_scope_expr$
		  $decl_expr$>> in
      let compiled_subexpr = generate_tree subexpr in
      [ `Tree, ( <:expr< let scope = $scope_expr$ in $compiled_subexpr$ >> ) ]
  )

  and generate_autoscope loc subnode : (ann * MLast.expr) list = (
    let subexpr = generate_for_node_expr true subnode in
    let compiled_subexpr = generate_tree subexpr in
    let scope_expr = 
      <:expr< ( let mng = dtd # namespace_manager in
		new Pxp_dtd.namespace_scope_impl 
		  mng None mng#as_declaration ) >> in
    [ `Tree, ( <:expr< let scope = $scope_expr$ in $compiled_subexpr$ >> ) ]
  )

  and generate_emptyscope loc subnode : (ann * MLast.expr) list = (
    let subexpr = generate_for_node_expr true subnode in
    let compiled_subexpr = generate_tree subexpr in
    let scope_expr = 
      <:expr< ( let mng = dtd # namespace_manager in
		new Pxp_dtd.namespace_scope_impl 
		  mng None [] ) >> in
    [ `Tree, ( <:expr< let scope = $scope_expr$ in $compiled_subexpr$ >> ) ]
  )

  and generate_for_string_expr : ast_string -> MLast.expr = (
    function
	(`Literal s, p1, p2) ->
	  let loc = mkloc p1 p2 in
	  let s' = to_rep s in
	  <:expr< $str:s'$ >>
      | (`Concat l, p1, p2) ->
	  let loc = mkloc p1 p2 in
	  let l' = List.map generate_for_string_expr l in
	  let l'' = generate_list loc l' in
	  <:expr< String.concat "" $l''$ >>
      | (`Ident name, p1, p2) ->
	  let loc = mkloc p1 p2 in
	  generate_ident loc (to_src name)
      | (`Anti text, p1, p2) ->
	  Grammar.Entry.parse Pcaml.expr_eoi (Stream.of_string (to_src text))
  )

  in

  catch_errors
    (fun () ->
       let stream = scan_string s in
       let ast = call_parser parse_any_expr stream in
       let ast' = check_any_expr ast in
       let loc = mkloc (1,0,0) (last_pos stream) in
       let expr = generate_for_any_expr loc ast' in
       <:expr< $anti:expr$ >>
    )
;;


let expand_evlist_expr s =
  let loc = mkloc (0,0,0) (0,0,0) in  (* ??? *)
  let rec generate_tree annlist =
    match annlist with
	(`Single, e) :: annlist' ->
	  let rest = generate_tree annlist' in
	  <:expr< [$e$ :: $rest$] >>
      | ((`Tree | `Forest), e) :: annlist' ->
	  let rest = generate_tree annlist' in
	  <:expr< $e$ @ $rest$ >>
      | [] ->
	  <:expr< [] >>
  in
  let generate_forest annlist = generate_tree annlist in
  check_file();
  generate_event_generator generate_tree generate_forest s
;;


let expand_evpull_expr s =
  let loc = mkloc (0,0,0) (0,0,0) in  (* ??? *)
  let generate_tree annlist =
    let rec generate_match k annlist =
      match annlist with
	(`Single, e) :: annlist' ->
	  ( <:patt< $int:string_of_int k$ >>, 
	    None, 
	    <:expr< let ev = $e$ in
	            do { _state.val := $int:string_of_int(k+1)$;
	                  Some ev } 
            >> ) :: generate_match (k+1) annlist'
      | ((`Tree | `Forest), e) :: annlist' ->
	  ( <:patt< $int:string_of_int k$ >>, 
	    None, 
	    <:expr< match $e$ _arg with
		    [ None -> do { _state.val := $int:string_of_int(k+1)$;
				   _generator _arg }
	            | Some Pxp_types.E_end_of_stream -> _generator _arg
                    | Some ev -> Some ev ]
            >> ) :: generate_match (k+1) annlist'
      | [] ->
	  [ <:patt< $int:string_of_int k$ >>,
	    None,
	    <:expr< None >>;

	    <:patt< _ >>,
	    None,
	    <:expr< assert False >>
	  ]
    in
    <:expr< let rec _generator =
	       let _state = ref 0 in
	       fun _arg ->
	         match _state.val with
	         [$list:generate_match 0 annlist$]
            in _generator >>
  in
  let generate_forest annlist = generate_tree annlist in
  check_file();
  generate_event_generator generate_tree generate_forest s
;;
  


(**********************************************************************)
(* Other expanders *)

let expand_charset_expr s =
  check_file();
  catch_errors
    (fun () ->
       let stream = scan_string s in
       let decl = call_parser parse_charset_decl stream in
       current_decl := decl;
       let loc = mkloc (1,0,0) (last_pos stream) in
       <:expr< () >>
    )
;;


let expand_text_expr s =
  check_file();
  let loc = mkloc (1,0,0) (1,0,String.length s) in
  <:expr< $str:s$ >>
;;


let na_pat _ =
  failwith "not available as pattern"
;;

Quotation.add
  "pxp_charset" (Quotation.ExAst(expand_charset_expr, na_pat)) ;;
Quotation.add 
  "pxp_tree" (Quotation.ExAst(expand_tree_expr false, na_pat)) ;;
Quotation.add 
  "pxp_vtree" (Quotation.ExAst(expand_tree_expr true, na_pat)) ;;
Quotation.add
  "pxp_evlist" (Quotation.ExAst(expand_evlist_expr, na_pat)) ;;
Quotation.add
  "pxp_evpull" (Quotation.ExAst(expand_evpull_expr, na_pat)) ;;
Quotation.add
  "pxp_text" (Quotation.ExAst(expand_text_expr, na_pat)) ;;

This web site is published by Informatikbüro Gerd Stolpmann
Powered by Caml