(* $Id$ * ---------------------------------------------------------------------- * PXP: The polymorphic XML parser for Objective Caml. * Copyright by Gerd Stolpmann. See LICENSE for details. *) (* Syntax extension to construct XML trees *) open Netulex.Ulexing module Ulexing = Netulex.Ulexing (**********************************************************************) (* pa_macro: * * OCAML_NEW_LOC: If defined, camlp4 locations have type * (Lexing.position * Lexing.position) (O'Caml >= 3.08) * Otherwise, locations have type int*int (O'Caml <= 3.07). *) (**********************************************************************) (* Lexer bases on ulex *) type pos = int * int * int (* Triple (line, line start, char pos): * - line: line number. First line has number 1 * - line start: Character count where the current line started * - char pos: Character count of the position * * When OCAML_NEW_LOC, the triple is fully used. Otherwise, * we always have line=1, line_start=0, and only char_pos contains * useful information. *) exception Lex_error of (string * pos) (* Message, position *) type token = [ `Langle | `Rangle | `Rangle_empty | `Lbracket | `Rbracket | `Equal | `Lparen | `Rparen | `List_concat | `String_concat | `Comment | `PI | `Super | `End_ocaml_comment | `Other | `EOF | `Data | `Langle_colon | `Literal of string | `Name of string | `Anti of string ] (* Tokens are always encoded in UTF-8! *) let regexp int = [ '0'-'9' ]+ | "0" ['X' 'x'] ['0'-'9' 'a'-'f' 'A'-'F']+ ;; let regexp int_entity = "&" "#" int ";" ;; let regexp name = (xml_letter | '_' | ':' | int_entity) (xml_letter | xml_digit | '.' | ':' | '-' | '_' | xml_combining_char | xml_extender | int_entity) * ;; let rec scan line line_start = let pos1 lexbuf = (!line, !line_start, lexeme_start lexbuf) in let pos2 lexbuf = (!line, !line_start, lexeme_end lexbuf) in lexer "<" -> `Langle, (pos1 lexbuf), (pos2 lexbuf) | "<:" -> `Langle_colon, (pos1 lexbuf), (pos2 lexbuf) | ">" -> `Rangle, (pos1 lexbuf), (pos2 lexbuf) | "/>" -> `Rangle_empty, (pos1 lexbuf), (pos2 lexbuf) | "[" -> `Lbracket, (pos1 lexbuf), (pos2 lexbuf) | "]" -> `Rbracket, (pos1 lexbuf), (pos2 lexbuf) | "=" -> `Equal, (pos1 lexbuf), (pos2 lexbuf) | "(" -> `Lparen, (pos1 lexbuf), (pos2 lexbuf) | ")" -> `Rparen, (pos1 lexbuf), (pos2 lexbuf) | "@" -> `List_concat, (pos1 lexbuf), (pos2 lexbuf) | "^" -> `String_concat, (pos1 lexbuf), (pos2 lexbuf) | "<!>" -> `Comment, (pos1 lexbuf), (pos2 lexbuf) | "<?>" -> `PI, (pos1 lexbuf), (pos2 lexbuf) | "<^>" -> `Super, (pos1 lexbuf), (pos2 lexbuf) | "<*>" -> `Data, (pos1 lexbuf), (pos2 lexbuf) | "(:" -> let p1 = pos1 lexbuf in let text = String.concat "" (scan_antiquot p1 line line_start lexbuf) in let p2 = pos2 lexbuf in (`Anti text, p1, p2) | "(*" -> let p1 = pos1 lexbuf in skip_comment p1 line line_start lexbuf; scan line line_start lexbuf | "*)" -> `End_ocaml_comment, (pos1 lexbuf), (pos2 lexbuf) | '"' [^ '"']* '"' -> let n = lexeme_length lexbuf in let s = utf8_sub_lexeme lexbuf 1 (n-2) in `Literal s, (pos1 lexbuf), (pos2 lexbuf) | '"' -> raise(Lex_error("Unterminated string literal", (pos1 lexbuf))) | name -> `Name (utf8_lexeme lexbuf), (pos1 lexbuf), (pos2 lexbuf) | "\r\n" | "\r" | "\n" -> IFDEF OCAML_NEW_LOC THEN incr line; line_start := lexeme_end lexbuf ELSE () END; scan line line_start lexbuf | [ ' ' '\t' ]+ -> scan line line_start lexbuf | _ -> `Other, (pos1 lexbuf), (pos2 lexbuf) (* May occur in comments *) | eof -> `EOF, (pos1 lexbuf), (pos2 lexbuf) and scan_antiquot start_pos line line_start = lexer ":)" -> [] | [^ ':' '\r' '\n' ]+ -> let s = utf8_lexeme lexbuf in s :: scan_antiquot start_pos line line_start lexbuf | ":" -> ":" :: scan_antiquot start_pos line line_start lexbuf | "\r\n" | "\r" | "\n" -> IFDEF OCAML_NEW_LOC THEN incr line; line_start := lexeme_end lexbuf ELSE () END; let s = utf8_lexeme lexbuf in s :: scan_antiquot start_pos line line_start lexbuf | eof -> raise(Lex_error("Unterminated antiquotation", start_pos)) and skip_comment start_pos line line_start lexbuf = match scan line line_start lexbuf with (`EOF,_ ,_) -> raise(Lex_error("Unterminated comment", start_pos)) | (`End_ocaml_comment,_,_) -> () | _ -> skip_comment start_pos line line_start lexbuf ;; type charset_decl = { source_enc : Netconversion.encoding; rep_enc : Netconversion.encoding; } ;; let default_decl = { source_enc = `Enc_iso88591; rep_enc = `Enc_iso88591 } ;; let current_decl = ref default_decl ;; let reset_decl() = current_decl := default_decl ;; let current_file = ref "" ;; let check_file() = if !Pcaml.input_file <> !current_file then ( reset_decl(); current_file := !Pcaml.input_file ) ;; let unichar = Netconversion.ustring_of_uchar `Enc_utf8;; let scan_entities line line_start offset = lexer "&#" int ";" -> let l = lexeme_length lexbuf in let s = utf8_sub_lexeme lexbuf 2 (l-3) in unichar (int_of_string s) | "<" -> "<" | ">" -> ">" | "'" -> "'" | "&" -> "&" | """ -> "\"" | "&" -> raise(Lex_error("'&' must be written as '&'", (!line, !line_start, lexeme_start lexbuf + offset))) | "\r\n" | "\r" | "\n" -> IFDEF OCAML_NEW_LOC THEN incr line; line_start := lexeme_end lexbuf + offset ELSE () END; utf8_lexeme lexbuf | _ -> utf8_lexeme lexbuf | eof -> raise End_of_file ;; let convert_entities (line,line_start,pos) offset s = let lexbuf = from_ulb_lexbuf (Netulex.ULB.from_string `Enc_utf8 s) in let rline = ref line in let rline_start = ref line_start in let b = Buffer.create 200 in try while true do Buffer.add_string b (scan_entities rline rline_start (pos+offset) lexbuf) done; assert false; with End_of_file -> Buffer.contents b ;; let scan_string s : (token * pos * pos) Stream.t = let src_enc = !current_decl.source_enc in let line = ref 1 in let line_start = ref 0 in let lexbuf = from_ulb_lexbuf (Netulex.ULB.from_string src_enc s) in Stream.from (fun count -> try ( match scan line line_start lexbuf with (`Name s, p1, p2) -> Some (`Name (convert_entities p1 0 s), p1, p2) | (`Literal s, p1, p2) -> Some (`Literal (convert_entities p1 1 s), p1, p2) | other -> Some other ) with Error -> raise(Lex_error("Lexical error", (!line, !line_start, lexeme_start lexbuf))) ) ;; (**********************************************************************) (* Stream Parser *) exception Syntax_error of pos * pos;; type ast_node0 = [ `Element of (ast_string * ast_attr list * ast_node_list) | `Data of ast_string | `Comment of ast_string | `PI of (ast_string * ast_string) | `Super of ast_node_list | `Meta of (string * ast_attr list * ast_node) | `Ident of string | `Anti of string (* The following are the same as ast_string0. They are interpreted * as data node: *) | `Literal of string | `Concat of ast_node list ] and ast_node = (ast_node0 * pos * pos) and ast_node_list0 = [ `Nodes of ast_node list | `Concat of ast_node_list list | `Ident of string | `Anti of string ] and ast_node_list = (ast_node_list0 * pos * pos) and ast_string0 = [ `Literal of string | `Concat of ast_string list | `Ident of string | `Anti of string ] and ast_string = (ast_string0 * pos * pos) and ast_attr0 = [ `Attr of ast_string * ast_string | `Anti of string ] and ast_attr = (ast_attr0 * pos * pos) and ast_any_node = [ `Node of ast_node | `Nodelist of ast_node_list ] ;; (* Note that the syntax allows that strings are interpreted as data * nodes, e.g. * * <a>[ "x" ]: Element "a" with data sub node "x" * * ==> Every string can also be interpreted as data node. * * At some locations we do not allow that because _only_ strings * are reasonable. Then string_restr=true ("string restriction"). * E.g. * * <a x="x"/> is ok, but * <a x=<b/>/> is nonsense * * Not every expression composed of strings and nodes is sound. E.g. * "x" ^ "y" is ok (string concatenation), but "x" ^ <a/> is not. * The latter is accepted by the grammar, but must be rejected by * an additional type check. *) let ensure_at_end s = match Stream.peek s with Some(`EOF, _, _) -> () | _ -> raise Stream.Failure ;; let last_pos s = match Stream.peek s with Some(_, _, pos) -> pos | _ -> assert false ;; let check_meta name atts = match name with "scope" -> () | "autoscope" -> if atts <> [] then raise Stream.Failure; | "emptyscope" -> if atts <> [] then raise Stream.Failure; | _ -> raise Stream.Failure ;; let rec parse_any_expr (s : (token * pos * pos) Stream.t) : ast_any_node = match Stream.peek s with Some(`Lbracket, _, _) -> let v = `Nodelist(parse_nodelist_expr s) in ensure_at_end s; v | Some _ -> let v = `Node(parse_expr false s) in ensure_at_end s; v | None -> raise Stream.Failure and parse_expr string_restr : (token * pos * pos) Stream.t -> ast_node = parser [< (nl1, p1, p2) as nl = parse_factor string_restr; c1 = parse_cont string_restr; >] -> match c1 with None -> nl | Some(`Concat l, p1', p2') -> (`Concat(nl :: l), p1, p2') | Some(other, p1', p2') -> (`Concat([nl; (other, p1', p2')]), p1, p2') and parse_cont string_restr : (token * pos * pos) Stream.t -> ast_node option = parser [< '(`String_concat, p1, p2); e = parse_expr string_restr; >] -> Some e | [< >] -> None and parse_factor string_restr : (token * pos * pos) Stream.t -> ast_node = parser [< '(`Langle, p1, p2) when not string_restr; name = parse_element_name; attrs, flag, p' = parse_attrs; (subnodes0, p1', p2') as subnodes = if flag then (fun _ -> `Nodes [], p', p') else parse_nodelist_expr; >] -> ( `Element(name, attrs, subnodes), p1, p2' ) | [< '(`Langle_colon, p1, p2) when not string_restr; '(`Name name, _, _); attrs, flag, p' = parse_attrs; (subnode0, p1', p2') as subnode = parse_expr string_restr; >] -> ( check_meta name attrs; `Meta(name, attrs, subnode), p1, p2' ) | [< '(`Comment, p1, p2) when not string_restr; (contents0, p1', p2') as contents = parse_string_expr >] -> ( `Comment contents, p1, p2' ) | [< '(`PI, p1, p2) when not string_restr; (contents0, p1', p2') as contents = parse_string_expr; (contents0', p1'', p2'') as contents' = parse_string_expr >] -> ( `PI(contents,contents'), p1, p2'' ) | [< '(`Super, p1, p2) when not string_restr; (subnodes0, p1', p2') as subnodes = parse_nodelist_expr; >] -> ( `Super subnodes, p1, p2' ) | [< '(`Data, p1, p2) when not string_restr; (contents0, p1', p2') as contents = parse_string_expr >] -> ( `Data contents, p1, p2' ) | [< '(`Lparen, p1, p2); (inner, p1', p2') = parse_expr string_restr; '(`Rparen, p1'', p2'') >] -> (inner, p1, p2'') | [< '(`Anti text, p1, p2) >] -> ( `Anti text, p1, p2 ) | [< '(`Literal s, p1, p2) >] -> (`Literal s, p1, p2) | [< '(`Name n, p1, p2) >] -> ( `Ident n, p1, p2) and parse_element_name : (token * pos * pos) Stream.t -> ast_string = parser [< '(`Name n, p1, p2) >] -> ( `Literal n, p1, p2 ) | [< '(`Anti text, p1, p2) >] -> ( `Anti text, p1, p2 ) | [< '(`Lparen, p1, p2); s = parse_string_expr; '(`Rparen, p1', p2') >] -> let (str, _, _) = s in (str, p1, p2') and parse_attrs : (token * pos * pos) Stream.t -> ast_attr list * bool * pos = parser [< '(`Name n, p1, p2); '(`Equal, p1', p2'); (s, p1'', p2'') = parse_string_expr; (rest, flag, p) = parse_attrs >] -> let name_str = (`Literal n, p1, p2) in let val_str = (s, p1'', p2'') in ( `Attr(name_str, val_str), p1, p2'' ) :: rest, flag, p | [< '(`Lparen, p1, p2); (s1, p1', p2') = parse_string_expr; '(`Rparen, p1'', p2''); '(`Equal, _, _); (s2, p1''', p2''') = parse_string_expr; (rest, flag, p) = parse_attrs >] -> let name_str = (s1, p1', p2') in let val_str = (s2, p1''', p2''') in ( `Attr(name_str, val_str), p1, p2''' ) :: rest, flag, p | [< '(`Anti text, p1, p2); (rest, flag, p) = parse_attrs >] -> ( `Anti text, p1, p2 ) :: rest, flag, p | [< '(`Rangle, p1, p2) >] -> [], false, p2 | [< '(`Rangle_empty, p1, p2) >] -> [], true, p2 and parse_nodelist_expr : (token * pos * pos) Stream.t -> ast_node_list = parser [< (nl1, p1, p2) as nl = parse_nodelist_factor; c1 = parse_nodelist_cont; >] -> match c1 with None -> nl | Some(`Concat l, p1', p2') -> (`Concat(nl :: l), p1, p2') | Some(other, p1', p2') -> (`Concat([nl; (other, p1', p2')]), p1, p2') and parse_nodelist_cont : (token * pos * pos) Stream.t -> ast_node_list option = parser [< '(`List_concat, p1, p2); e = parse_nodelist_expr; >] -> Some e | [< >] -> None and parse_nodelist_factor : (token * pos * pos) Stream.t -> ast_node_list = parser [< '(`Lbracket, p1, p2); (l, p1', p2') = parse_bracket_expr >] -> ( `Nodes l, p1, p2' ) | [< '(`Lparen, p1, p2); (e, p1', p2') = parse_nodelist_expr; '(`Rparen, p1'', p2'') >] -> ( e, p1, p2'' ) | [< '(`Anti text, p1, p2) >] -> ( `Anti text, p1, p2 ) | [< '(`Name n, p1, p2) >] -> ( `Ident n, p1, p2) | [< (n, p1, p2) = parse_expr false >] -> `Nodes [n, p1, p2], p1, p2 and parse_bracket_expr : (token * pos * pos) Stream.t -> ast_node list * pos * pos = parser [< '(`Rbracket, p1, p2) >] -> ( [], p1, p2 ) | [< (n, p1, p2) = parse_expr false; (b, p1', p2') = parse_bracket_expr >] -> ((n, p1, p2) :: b, p1, p2') and parse_string_expr (s : (token * pos * pos) Stream.t) : ast_string = let rec coerce (e : ast_node) : ast_string = match e with (`Concat l, p1, p2) -> `Concat (List.map coerce l), p1, p2 | `Anti _, _, _ as e' -> e' | `Ident _, _, _ as e' -> e' | `Literal _, _, _ as e' -> e' | _ -> assert false in coerce(parse_expr true s) ;; let rec parse_charset_decl0 : (token * pos * pos) Stream.t -> charset_decl = parser [< '(`Name n, p1, p2); '(`Equal, _, _); '(`Literal s, p1', p2'); cur = parse_charset_decl0 >] -> ( match n with "source" -> let e = Netconversion.encoding_of_string s in { cur with source_enc = e } | "representation" -> let e = Netconversion.encoding_of_string s in { cur with rep_enc = e } | _ -> raise Stream.Failure ) | [< >] -> (* default: *) !current_decl ;; let parse_charset_decl s = let v = parse_charset_decl0 s in ensure_at_end s; v ;; let call_parser f s = try f s with Stream.Failure | Stream.Error _ -> ( match Stream.peek s with Some(_, p1, p2) -> raise(Syntax_error(p1,p2)) | None -> assert false (* must not happen *) ) ;; (**********************************************************************) (* Type checker *) (* Typing checking also transforms the ast such that * `Literal and `Concat are never used in the context of a * node expression. *) exception Typing_error of (string * pos * pos);; let rec check_any_expr : ast_any_node -> ast_any_node = function `Node n -> `Node(check_node_expr n) | `Nodelist l -> `Nodelist(check_nodelist_expr l) and check_nodelist_expr : ast_node_list -> ast_node_list = function (`Nodes l,p1,p2) -> (`Nodes(List.map check_node_expr l),p1,p2) | (`Concat l,p1,p2) -> (`Concat(List.map check_nodelist_expr l),p1,p2) | ((`Ident _|`Anti _),_,_) as nl -> nl and check_node_expr : ast_node -> ast_node = function ((`Data _ | `Comment _ | `PI _ | `Ident _ | `Anti _),_,_) as n -> n | (`Element(name,attrs,children),p1,p2) -> (`Element(name,attrs,check_nodelist_expr children),p1,p2) | (`Super children,p1,p2) -> (`Super(check_nodelist_expr children),p1,p2) | (`Literal s,p1,p2) -> (`Data(`Literal s,p1,p2),p1,p2) | (`Concat l,p1,p2) -> (`Data(`Concat(List.map check_node_expr_as_string l),p1,p2),p1,p2) | (`Meta(n,a,child),p1,p2) -> (`Meta(n,a,check_node_expr child),p1,p2) and check_node_expr_as_string : ast_node -> ast_string = function ((`Data _ | `Comment _ | `PI _ | `Element _ | `Super _),p1,p2) -> raise(Typing_error("Nodes cannot be used as strings", p1, p2)) | ((`Ident _| `Anti _ | `Literal _),p1,p2) as n -> n | (`Concat l,p1,p2) -> (`Concat(List.map check_node_expr_as_string l),p1,p2) | (`Meta(n,a,child),p1,p2) -> raise(Typing_error("Meta node not allowed in string context", p1, p2)) ;; (**********************************************************************) (* Code generator for tree expressions *) let mkloc ((_p1_line,_p1_line_start,p1_pos)) ((_p2_line,_p2_line_start,p2_pos)) = (* Differs in O'Caml 3.07 and 3.08 *) IFDEF OCAML_NEW_LOC THEN let l1 = { Lexing.pos_fname = ""; Lexing.pos_lnum = _p1_line; Lexing.pos_bol = _p1_line_start; Lexing.pos_cnum = p1_pos } in let l2 = { Lexing.pos_fname = ""; Lexing.pos_lnum = _p2_line; Lexing.pos_bol = _p2_line_start; Lexing.pos_cnum = p2_pos } in (l1,l2) ELSE (p1_pos,p2_pos) END ;; let raise_at (p1:pos) (p2:pos) exn = (* let (p1_l,p1_s,p1_p) = p1 in Printf.eprintf "Raise_at %d %d %d\n" p1_l p1_s p1_p; let (p2_l,p2_s,p2_p) = p2 in Printf.eprintf "Raise_at %d %d %d\n" p2_l p2_s p2_p; *) Stdpp.raise_with_loc (mkloc p1 p2) exn ;; let catch_errors f = try f() with Lex_error(msg, p) -> let (l,s,c) = p in raise_at p (l,s,c+1) (Failure ("pxp-pp: Lexical error: " ^ msg)) | Syntax_error(p1,p2) -> raise_at p1 p2 (Failure "pxp-pp: Syntax error") | Typing_error(msg,p1,p2) -> raise_at p1 p2 (Failure("pxp-pp: Typing error: " ^ msg)) ;; let generate_list loc el = List.fold_right (fun x l -> <:expr< [$x$ :: $l$] >>) el <:expr< [] >> ;; let generate_ann_list loc el = List.fold_right (fun (ann,x) l -> match ann with `Single -> <:expr< [$x$ :: $l$] >> | `List -> <:expr< $x$ @ $l$ >>) el <:expr< [] >> ;; let generate_ident loc name = (* TODO: "." separation *) (* TODO: Convert back to latin 1 *) <:expr< $lid:name$ >> ;; let expand_tree_expr (valcheck:bool) (s:string) : MLast.expr = (* valcheck: Whether to do DTD validation *) check_file(); let valcheck_expr = let loc = mkloc (0,0,0) (0,0,0) in if valcheck then <:expr< True >> else <:expr< False >> in let to_rep s = Netconversion.convert ~in_enc:`Enc_utf8 ~out_enc:(!current_decl.rep_enc) s in let to_src s = Netconversion.convert ~in_enc:`Enc_utf8 ~out_enc:(!current_decl.source_enc) s in let rec generate_for_any_expr : ast_any_node -> MLast.expr = function `Node n -> generate_for_node_expr false n | `Nodelist nl -> generate_for_nodelist_expr false nl and generate_for_node_expr nsmode : ast_node -> MLast.expr = ( (* nsmode: Whether there is a variable [scope] in the environment *) function (`Element(name,attrs,subnodes),p1,p2) -> let loc = mkloc p1 p2 in let name_expr = generate_for_string_expr name in let attrs_expr_l = List.map generate_for_attr_expr attrs in let attrs_expr = generate_ann_list loc attrs_expr_l in let subnodes_expr = generate_for_nodelist_expr nsmode subnodes in let el_only_expr = <:expr< Pxp_document.create_element_node ~valcheck:$valcheck_expr$ spec dtd $name_expr$ $attrs_expr$ >> in let do_validation = if valcheck then <:expr< node#validate_contents() >> else <:expr< () >> in let do_set_scope = if nsmode then <:expr< node#set_namespace_scope scope >> else <:expr< () >> in <:expr< let node = $el_only_expr$ in do { node # set_nodes $subnodes_expr$; $do_set_scope$; $do_validation$; node } >> | (`Data text,p1,p2) -> let text_expr = generate_for_string_expr text in let loc = mkloc p1 p2 in <:expr< Pxp_document.create_data_node spec dtd $text_expr$ >> | (`Comment text,p1,p2) -> let text_expr = generate_for_string_expr text in let loc = mkloc p1 p2 in <:expr< Pxp_document.create_comment_node spec dtd $text_expr$ >> | (`PI(target,value),p1,p2) -> let target_expr = generate_for_string_expr target in let value_expr = generate_for_string_expr value in let loc = mkloc p1 p2 in <:expr< Pxp_document.create_pinstr_node spec dtd (new Pxp_dtd.proc_instruction $target_expr$ $value_expr$ dtd#encoding) >> | (`Super subnodes,p1,p2) -> let subnodes_expr = generate_for_nodelist_expr nsmode subnodes in let loc = mkloc p1 p2 in <:expr< let node = Pxp_document.create_super_root_node spec dtd in do { node # set_nodes $subnodes_expr$; node } >> | (`Meta(name,attrs,subnode),p1,p2) -> let loc = mkloc p1 p2 in ( match name with "scope" -> generate_scope loc attrs subnode | "autoscope" -> generate_autoscope loc subnode | "emptyscope" -> generate_emptyscope loc subnode | _ -> assert false (* already caught above *) ) | (`Ident name,p1,p2) -> let loc = mkloc p1 p2 in generate_ident loc (to_src name) | (`Anti text,p1,p2) -> Grammar.Entry.parse Pcaml.expr_eoi (Stream.of_string (to_src text)) | _ -> (* `Literal and `Concat are impossible after type check *) assert false ) and generate_for_nodelist_expr nsmode : ast_node_list -> MLast.expr = ( function (`Nodes l, p1, p2) -> let loc = mkloc p1 p2 in let l' = List.map (generate_for_node_expr nsmode) l in generate_list loc l' | (`Concat l, p1, p2) -> let loc = mkloc p1 p2 in let l' = List.map (generate_for_nodelist_expr nsmode) l in let l'' = generate_list loc l' in <:expr< List.concat $l''$ >> | (`Ident name, p1, p2) -> let loc = mkloc p1 p2 in generate_ident loc (to_src name) | (`Anti text, p1, p2) -> Grammar.Entry.parse Pcaml.expr_eoi (Stream.of_string (to_src text)) ) and generate_for_attr_expr : ast_attr -> [`Single|`List] * MLast.expr = ( function (`Attr(n,v), p1, p2) -> let loc = mkloc p1 p2 in let n_expr = generate_for_string_expr n in let v_expr = generate_for_string_expr v in `Single, <:expr< ($n_expr$, $v_expr$) >> | (`Anti text, p1, p2) -> `List, Grammar.Entry.parse Pcaml.expr_eoi (Stream.of_string (to_src text)) ) and generate_scope loc attrs subnode : MLast.expr = ( let subexpr = generate_for_node_expr true subnode in if attrs = [] then subexpr else let decl_expr_l = List.map generate_for_attr_expr attrs in let decl_expr = generate_ann_list loc decl_expr_l in <:expr< let scope = new Pxp_dtd.namespace_scope_impl (dtd # namespace_manager) (Some scope) $decl_expr$ in $subexpr$>> ) and generate_autoscope loc subnode : MLast.expr = ( let subexpr = generate_for_node_expr true subnode in <:expr< let scope = ( let mng = dtd # namespace_manager in new Pxp_dtd.namespace_scope_impl mng None mng#as_declaration ) in $subexpr$ >> ) and generate_emptyscope loc subnode : MLast.expr = ( let subexpr = generate_for_node_expr true subnode in <:expr< let scope = ( let mng = dtd # namespace_manager in new Pxp_dtd.namespace_scope_impl mng None [] ) in $subexpr$ >> ) and generate_for_string_expr : ast_string -> MLast.expr = ( function (`Literal s, p1, p2) -> let loc = mkloc p1 p2 in let s' = to_rep s in <:expr< $str:s'$ >> | (`Concat l, p1, p2) -> let loc = mkloc p1 p2 in let l' = List.map generate_for_string_expr l in let l'' = generate_list loc l' in <:expr< String.concat "" $l''$ >> | (`Ident name, p1, p2) -> let loc = mkloc p1 p2 in generate_ident loc (to_src name) | (`Anti text, p1, p2) -> Grammar.Entry.parse Pcaml.expr_eoi (Stream.of_string (to_src text)) ) in catch_errors (fun () -> let stream = scan_string s in let ast = call_parser parse_any_expr stream in let ast' = check_any_expr ast in let ocaml_expr = generate_for_any_expr ast' in let loc = mkloc (1,0,0) (last_pos stream) in <:expr< $anti:ocaml_expr$ >> ) ;; (**********************************************************************) (* Code generator for event streams *) type ann = [`Single|`Tree|`Forest];; let generate_event_generator (generate_tree : (ann * MLast.expr) list -> MLast.expr) (generate_forest : (ann * MLast.expr) list -> MLast.expr) (s:string) : MLast.expr = (* Generates code to generate events. The input arguments * [generate_tree] and [generate_forest] process an intermediate * representation, the so-called annotated expression lists * (type (ann * MLast.expr) list), and return the final code. * * Kinds of annotations: * - `Single: The expression is a single event, i.e. an O'Caml value * of type [event]. * - `Tree: The expression represents a list of events corresponding * to a node tree. It is left * open how such lists are represented. The expression is either * an O'Caml identifier or a subexpression from an antiquotation. * - `Forest: The expression represents a list of events corresponding * to a list of node trees. * * The argument [generate_tree] is a function that generates the * final code for an annotated list of expressions. It can be expected * that the input list for [generate_tree] represents a node tree. * * The argument [generate_forest] does the same for an annotated * list of expressions that represents a list of node trees. *) let to_rep s = Netconversion.convert ~in_enc:`Enc_utf8 ~out_enc:(!current_decl.rep_enc) s in let to_src s = Netconversion.convert ~in_enc:`Enc_utf8 ~out_enc:(!current_decl.source_enc) s in let rec generate_for_any_expr loc : ast_any_node -> MLast.expr = function `Node n -> let e = generate_tree (generate_for_node_expr false n) in <:expr< let _eid = Pxp_dtd.Entity.create_entity_id() in $e$ >> | `Nodelist nl -> let e = generate_forest (generate_for_nodelist_expr false nl) in <:expr< let _eid = Pxp_dtd.Entity.create_entity_id() in $e$ >> and generate_for_node_expr nsmode : ast_node -> (ann * MLast.expr) list = ( (* nsmode: Whether there is a variable [scope] in the environment *) function (`Element(name,attrs,subnodes),p1,p2) -> let loc = mkloc p1 p2 in let name_expr = generate_for_string_expr name in let attrs_expr_l = List.map generate_for_attr_expr attrs in let attrs_expr = generate_ann_list loc attrs_expr_l in let subnodes_expr = generate_for_nodelist_expr nsmode subnodes in let scope_opt_expr = if nsmode then <:expr< Some scope >> else <:expr< None >> in let start_tag = <:expr< Pxp_types.E_start_tag($name_expr$, $attrs_expr$, $scope_opt_expr$, _eid) >> in let end_tag = <:expr< Pxp_types.E_end_tag($name_expr$,_eid) >> in [`Single, start_tag] @ subnodes_expr @ [`Single, end_tag] | (`Data text,p1,p2) -> let text_expr = generate_for_string_expr text in let loc = mkloc p1 p2 in [ `Single, <:expr< Pxp_types.E_char_data($text_expr$) >> ] | (`Comment text,p1,p2) -> let text_expr = generate_for_string_expr text in let loc = mkloc p1 p2 in [ `Single, <:expr< Pxp_types.E_comment($text_expr$) >> ] | (`PI(target,value),p1,p2) -> let target_expr = generate_for_string_expr target in let value_expr = generate_for_string_expr value in let loc = mkloc p1 p2 in [ `Single, <:expr< Pxp_types.E_pinstr($target_expr$,$value_expr$,_eid) >> ] | (`Super subnodes,p1,p2) -> let subnodes_expr = generate_for_nodelist_expr nsmode subnodes in let loc = mkloc p1 p2 in ( [ `Single, <:expr< Pxp_types.E_start_super >> ] @ subnodes_expr @ [ `Single, <:expr< Pxp_types.E_end_super >> ] ) | (`Meta(name,attrs,subnode),p1,p2) -> let loc = mkloc p1 p2 in ( match name with "scope" -> generate_scope loc attrs subnode | "autoscope" -> generate_autoscope loc subnode | "emptyscope" -> generate_emptyscope loc subnode | _ -> assert false (* already caught above *) ) | (`Ident name,p1,p2) -> let loc = mkloc p1 p2 in [ `Tree, (generate_ident loc (to_src name)) ] | (`Anti text,p1,p2) -> let expr = Grammar.Entry.parse Pcaml.expr_eoi (Stream.of_string (to_src text)) in [ `Tree, expr ] | _ -> (* `Literal and `Concat are impossible after type check *) assert false ) and generate_for_nodelist_expr nsmode : ast_node_list -> (ann * MLast.expr) list = ( function (`Nodes l, p1, p2) -> (* let loc = mkloc p1 p2 in *) let l' = List.map (generate_for_node_expr nsmode) l in List.flatten l' | (`Concat l, p1, p2) -> (* let loc = mkloc p1 p2 in *) let l' = List.map (generate_for_nodelist_expr nsmode) l in List.flatten l' | (`Ident name, p1, p2) -> let loc = mkloc p1 p2 in [ `Forest, (generate_ident loc (to_src name)) ] | (`Anti text, p1, p2) -> let expr = Grammar.Entry.parse Pcaml.expr_eoi (Stream.of_string (to_src text)) in [ `Forest, expr ] ) and generate_for_attr_expr : ast_attr -> [`Single|`List] * MLast.expr = ( function (`Attr(n,v), p1, p2) -> let loc = mkloc p1 p2 in let n_expr = generate_for_string_expr n in let v_expr = generate_for_string_expr v in `Single, <:expr< ($n_expr$, $v_expr$) >> | (`Anti text, p1, p2) -> `List, Grammar.Entry.parse Pcaml.expr_eoi (Stream.of_string (to_src text)) ) and generate_scope loc attrs subnode : (ann * MLast.expr) list = ( let subexpr = generate_for_node_expr true subnode in if attrs = [] then subexpr else let decl_expr_l = List.map generate_for_attr_expr attrs in let decl_expr = generate_ann_list loc decl_expr_l in let old_scope_expr = <:expr< Some scope >> in let scope_expr = <:expr< new Pxp_dtd.namespace_scope_impl (dtd # namespace_manager) $old_scope_expr$ $decl_expr$>> in let compiled_subexpr = generate_tree subexpr in [ `Tree, ( <:expr< let scope = $scope_expr$ in $compiled_subexpr$ >> ) ] ) and generate_autoscope loc subnode : (ann * MLast.expr) list = ( let subexpr = generate_for_node_expr true subnode in let compiled_subexpr = generate_tree subexpr in let scope_expr = <:expr< ( let mng = dtd # namespace_manager in new Pxp_dtd.namespace_scope_impl mng None mng#as_declaration ) >> in [ `Tree, ( <:expr< let scope = $scope_expr$ in $compiled_subexpr$ >> ) ] ) and generate_emptyscope loc subnode : (ann * MLast.expr) list = ( let subexpr = generate_for_node_expr true subnode in let compiled_subexpr = generate_tree subexpr in let scope_expr = <:expr< ( let mng = dtd # namespace_manager in new Pxp_dtd.namespace_scope_impl mng None [] ) >> in [ `Tree, ( <:expr< let scope = $scope_expr$ in $compiled_subexpr$ >> ) ] ) and generate_for_string_expr : ast_string -> MLast.expr = ( function (`Literal s, p1, p2) -> let loc = mkloc p1 p2 in let s' = to_rep s in <:expr< $str:s'$ >> | (`Concat l, p1, p2) -> let loc = mkloc p1 p2 in let l' = List.map generate_for_string_expr l in let l'' = generate_list loc l' in <:expr< String.concat "" $l''$ >> | (`Ident name, p1, p2) -> let loc = mkloc p1 p2 in generate_ident loc (to_src name) | (`Anti text, p1, p2) -> Grammar.Entry.parse Pcaml.expr_eoi (Stream.of_string (to_src text)) ) in catch_errors (fun () -> let stream = scan_string s in let ast = call_parser parse_any_expr stream in let ast' = check_any_expr ast in let loc = mkloc (1,0,0) (last_pos stream) in let expr = generate_for_any_expr loc ast' in <:expr< $anti:expr$ >> ) ;; let expand_evlist_expr s = let loc = mkloc (0,0,0) (0,0,0) in (* ??? *) let rec generate_tree annlist = match annlist with (`Single, e) :: annlist' -> let rest = generate_tree annlist' in <:expr< [$e$ :: $rest$] >> | ((`Tree | `Forest), e) :: annlist' -> let rest = generate_tree annlist' in <:expr< $e$ @ $rest$ >> | [] -> <:expr< [] >> in let generate_forest annlist = generate_tree annlist in check_file(); generate_event_generator generate_tree generate_forest s ;; let expand_evpull_expr s = let loc = mkloc (0,0,0) (0,0,0) in (* ??? *) let generate_tree annlist = let rec generate_match k annlist = match annlist with (`Single, e) :: annlist' -> ( <:patt< $int:string_of_int k$ >>, None, <:expr< let ev = $e$ in do { _state.val := $int:string_of_int(k+1)$; Some ev } >> ) :: generate_match (k+1) annlist' | ((`Tree | `Forest), e) :: annlist' -> ( <:patt< $int:string_of_int k$ >>, None, <:expr< match $e$ _arg with [ None -> do { _state.val := $int:string_of_int(k+1)$; _generator _arg } | Some Pxp_types.E_end_of_stream -> _generator _arg | Some ev -> Some ev ] >> ) :: generate_match (k+1) annlist' | [] -> [ <:patt< $int:string_of_int k$ >>, None, <:expr< None >>; <:patt< _ >>, None, <:expr< assert False >> ] in <:expr< let rec _generator = let _state = ref 0 in fun _arg -> match _state.val with [$list:generate_match 0 annlist$] in _generator >> in let generate_forest annlist = generate_tree annlist in check_file(); generate_event_generator generate_tree generate_forest s ;; (**********************************************************************) (* Other expanders *) let expand_charset_expr s = check_file(); catch_errors (fun () -> let stream = scan_string s in let decl = call_parser parse_charset_decl stream in current_decl := decl; let loc = mkloc (1,0,0) (last_pos stream) in <:expr< () >> ) ;; let expand_text_expr s = check_file(); let loc = mkloc (1,0,0) (1,0,String.length s) in <:expr< $str:s$ >> ;; let na_pat _ = failwith "not available as pattern" ;; Quotation.add "pxp_charset" (Quotation.ExAst(expand_charset_expr, na_pat)) ;; Quotation.add "pxp_tree" (Quotation.ExAst(expand_tree_expr false, na_pat)) ;; Quotation.add "pxp_vtree" (Quotation.ExAst(expand_tree_expr true, na_pat)) ;; Quotation.add "pxp_evlist" (Quotation.ExAst(expand_evlist_expr, na_pat)) ;; Quotation.add "pxp_evpull" (Quotation.ExAst(expand_evpull_expr, na_pat)) ;; Quotation.add "pxp_text" (Quotation.ExAst(expand_text_expr, na_pat)) ;;