Plasma GitLab Archive
Projects Blog Knowledge

(*
 * <COPYRIGHT>
 * Copyright 2002 Joachim Schrod Network and Publication Consultance GmbH, Gerd Stolpmann
 *
 * <GPL>
 * This file is part of WDialog.
 *
 * WDialog is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * WDialog is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with WDialog; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 * </>
 *)

(* $Id: wd_templrep.ml,v 3.6 2005-06-11 14:24:14 stolpmann Exp $
 * ----------------------------------------------------------------------
 *
 *)


open Pxp_types
open Pxp_document
open Pxp_dtd

open Wd_types

module D = Wd_dictionary

(* The data type of a tree with placeholders (parameters). The placeholders
 * may occur in data nodes, or in attribute values.
 *)

type expr =
    Expr_var of string
  | Expr_strconst of string
  | Expr_apply of (string * expr list)
  | Expr_param of (string * string list)

type 'ext tree =
    Element of 'ext tree_element    (* <x> ... </x> *)
  | Data of 'ext tree_data          (* arbitrary text *)
  | Param of (string * string list) (* ${paramname/enc1/enc2/...} *)
  | Expr of (expr * string * string list * 'ext node * (string*int*int))  
      (* for $[expr/enc1/enc2/...]: 
       *   (expr, expr_as_string, enc_list, parent_node, parent_node_pos) 
       *)
constraint 'ext = 'ext node #extension

and 'ext tree_element =
    { el_name : string;           (* The name of the element *)
      el_attributes : (string * attspec list) list;
        (* (name, value) list. The values are sequences of normal text and
	 * parameters.
	 * E.g. att="a $x y" is repesented as
	 *      [ "att", [ Att_verb "a "; Att_param "x"; Att_verb " y" ]]
	 *
	 * Note:
	 * Attributes with an empty string as value are represented as
	 * [ Att_verb "" ]. Attributes with implied value are represented
	 * as [].
	 *)
      el_unsplitted_attributes : (string * att_value) list;
        (* The original attributes. *)
      el_have_params_in_atts : bool;
        (* Whether there are parameters in attribute values or not *)
      el_node : 'ext node;
        (* A reference to the original XML node *)
      el_subtrees : 'ext tree list;    (* The trees within this element *)
    }

and attspec =
    Att_verb of string                   (* normal text *)
  | Att_param of (string * string list)  (* ${paramname/enc1/enc2/...} *)
  | Att_expr of (expr * string * string list)     
      (* for $[expr/enc1/enc2/...]:
       *    (expr, expr_as_string, enc_list)
       *)

and 'ext tree_data =
    { dt_value : string;      (* The string *)
      dt_node : 'ext node;    (* Reference to the original node *)
      dt_clonable : bool;
        (* true if dt_value = dt_node # data, i.e. if there is not any
	 * '$' sequence in the data node, and the node need not to be
	 * splitted up.
	 * There is an important optimization basing on this information.
	 *)
    }
;;


type 'ext t =
    { contents : 'ext tree list;
      app : application_type;
      template_name : string;      (* Only used for error messages *)
      mk_uiencode : unit -> 'ext node;
    }
;;


type 'ext param =
    { param_tree : 'ext Pxp_document.node;
      param_text : string Lazy.t;
    }


(**********************************************************************)

(* Parse expressions: *)

(* Syntax:
 * expr_oe -> expr [ "/" oelist ]
 * oelist -> oe-name [ "/" oelist ]
 * expr -> variable
 *       | number
 *       | function-name "(" [arguments] ")" 
 *       | "$" parameter-name
 *       | "${" parameter-name [ "/" oelist ] "}"
 * arguments -> expr [ "," arguments ]
 *)

module L = Wd_brexpr_lex

let parse_expr_oe enc s =
  (* enc: character encoding of [s]
   * s: the string to parse
   * return value: a pair of the scanned [expr] and a string list containing
   *               the scanned output encodings
   *)
  let scan = Wd_brexpr_lex.scan in
  let ulb = Netulex.ULB.from_string_inplace (enc :> Netconversion.encoding) s in
  let buf = Netulex.Ulexing.from_ulb_lexbuf ulb in

  let raise_bad() =
    raise (Formal_user_error("Bad syntax in expression: $[" ^ s ^ "]")) 
  in

  let rec parse_expr tok =
    match tok with
      | L.Token name ->
	  ( let next = scan buf in
	    match next with
	      | L.LParen ->
		  let args = parse_arguments() in
		  let next' = scan buf in
		  (Expr_apply (name, args), next')
		  
	      | _ ->
		  (Expr_var name, next)
	  )
      | L.Number n ->
	  (Expr_strconst (string_of_int n), scan buf)
      | L.Dollar ->
	  ( let next = scan buf in
	    match next with
	      | L.Token name ->
		  (Expr_param(name,[]), scan buf)
	      | L.LBrace ->
		  let next' = scan buf in
		  ( match next' with
		      | L.Token name ->
			  let next'' = scan buf in
			  let (oelist, next''') = 
			    ( match next'' with
				| L.Slash ->
				    parse_out_enc()
				| _ ->
				    ([], next'')
			    ) in
			  if next''' <> L.RBrace then
			    raise_bad();
			  (Expr_param(name,oelist), scan buf)
		      | _ ->
			  raise_bad();
		  )
	      | _ ->
		  raise_bad()
	  )
      | _ ->
	  raise_bad()


  and parse_arguments () =
    let tok = scan buf in
    match tok with
      | L.RParen ->
	  []
      | _ ->
	  parse_arguments_1 tok

  and parse_arguments_1 tok =
    let (arg,next) = parse_expr tok in
    match next with
      | L.RParen ->
	  [arg]
      | L.Comma ->
	  arg :: (parse_arguments_1 (scan buf))
      | _ ->
	  raise_bad()

  and parse_out_enc() =
    let tok = scan buf in
    match tok with
      | L.Token name ->
	  let tok' = scan buf in
	  ( match tok' with
	      | L.Slash ->
		  let r, tok'' = parse_out_enc() in
		  (name :: r, tok'')
	      | _ ->
		  ([name], tok')
	  )
      | _ ->
	  raise_bad()
  in
   
  let tok = scan buf in
  let expr, next = parse_expr tok in
  ( match next with
      | L.Eof ->
	  (expr, [])
      | L.Slash ->
	  let oelist, next' = parse_out_enc() in
	  if next' <> L.Eof then raise_bad();
	  (expr, oelist)
      | _ ->
	  raise_bad()
  )
;;

(**********************************************************************)


(* Used for splitting text containing parameters: *)
let param_re =
    (Pcre.regexp
       "\\$([a-zA-Z0-9_]+|{([^} \t\r\n]+)}|\\[([^\\] \t\r\n]+)\\]|\\$)")
        (* $name or ${name} or $[name] or $$ *)
;;

let param_slash_re = Pcre.regexp "/";;

let pcre_remove_groups l =
  (* Remove the silly Group and NoGroup markers *)
  List.filter
    (function (Pcre.Delim _|Pcre.Text _) -> true | _ -> false)
    l
;;


let get_parameters t =
  let params = ref D.empty in
  let add p =
    params := D.add p () !params in
  let rec scan_tree =
    function
	Element e ->
	  List.iter
	    (fun (_,al) ->
	       List.iter
		 (function
		      Att_param(p,_) ->
			add p
		    | _ ->
			()
		 )
		 al;
	    )
	    e.el_attributes;
	  List.iter scan_tree e.el_subtrees;
      | Param (p,_) ->
	  add p
      | _ ->
	  ()
  in
  List.iter scan_tree t.contents;
  !params
;;


let split_param s =
  (* s is either ${name/enc1/...} or $[name/enc1/...] *)
  let s = String.sub s 2 (String.length s - 3) in
  match Pcre.split ~rex:param_slash_re s with
      [] -> ("", [])              (* Will hopefully be later caught! *)
    | name :: enc -> (name, enc)
;;


let scan_attributes n =
  (* Transform the attribute list of 'n' into a list like el_attributes. *)
  let enc = n # encoding in
  let pcre_postprocess l =
    let l' =
      List.map
	(function
	     Pcre.Delim "$$" ->
	       Att_verb "$"
	   | Pcre.Delim p ->
	       if String.length p >= 2 then begin
		 if p.[1] = '{' then
		   let name,enclist = split_param p in
		   Att_param (name,enclist)
		 else
		   if p.[1] = '[' then
		     let expr,enclist = 
		       parse_expr_oe enc (String.sub p 2 (String.length p - 3))
		     in
		     Att_expr (expr,p,enclist)
		   else
		     Att_param (String.sub p 1 (String.length p - 1), [])
	       end
	       else
		 Att_param (String.sub p 1 (String.length p - 1), [])
	   | Pcre.Text t ->
	       Att_verb t
	   | (Pcre.Group(_,_)|Pcre.NoGroup) ->
	       assert false
	)
	(pcre_remove_groups l)
    in
    (* As [] is reserved for implied values, the empty string/list is recoded.
     *)
    if l' = [] then [ Att_verb "" ] else l'
  in
  let atts = n # attributes in
  let atts' =
    List.map
      (fun (att_name, att_value) ->
	 match att_value with
	     Value s ->
	       att_name, pcre_postprocess(Pcre.full_split
					    ~rex:param_re ~max:(-1) s)
	   | Valuelist sl ->
	       let s = String.concat " " sl in
	       att_name, pcre_postprocess(Pcre.full_split
					    ~rex:param_re ~max:(-1) s)
	   | Implied_value ->
	       att_name, []
      )
      atts
  in
  atts'
;;


let att_param_exists l =
  (* return 'true' iff there is an Att_param or Att_expr in l *)
  List.exists
    (fun (n,v) ->
       List.exists
	 (function
	      Att_param _ 
	    | Att_expr(_,_,_) -> true
	    | Att_verb _ -> false)
	 v
    )
    l
;;


let rec find_parent_element n =
  match n # node_type with
      T_element _ -> n
    | _ -> find_parent_element (n#parent)
;;

let rec prepare_tree_with_parameters ~mk_uiencode name app nl =

  (* We need the 'treelist', the 'tree' type representation of the
   * prepared XML tree 'nl' (=nodelist).
   * The other components 'app' and 'template_name' are already known
   * and can be entered directly.
   *)
  let treelist = List.flatten (List.map scan_tree nl) in
  { contents = treelist;
    app = app;
    template_name = name;
    mk_uiencode = mk_uiencode;
  }

and scan_tree n =
  (* Transform the node tree 'n' into the corresponding 'tree list' value *)

  let enc = n # encoding in

  match n # node_type with
      T_element element_name ->
	(* An element is simply converted to the corresponding representation.
	 * Only the attributes have to be processed, but that is done
	 * separately in the function scan_attributes.
	 *)
	let scanned_atts = scan_attributes n in
	[ Element
	    { el_name = element_name;
	      el_attributes = scanned_atts;
	      el_unsplitted_attributes = n # attributes;
	      el_have_params_in_atts = att_param_exists scanned_atts;
	      el_node = n;
	      el_subtrees =
		List.flatten
		  (List.map scan_tree (n # sub_nodes));
	    }
	]
    | T_data ->
	(* A data node may contain parameters. So we must apply a regexp
	 * on the data string to split the node up.
	 *)
	let l = Pcre.full_split ~rex:param_re ~max:(-1) (n # data) in
	let l' =
	  List.map
	    (function
		 Pcre.Delim "$$" ->
		   Data
		     { dt_value = "$";
		       dt_node = n;
		       dt_clonable = false;
		     }
	       | Pcre.Delim p ->
		   if String.length p >= 2 then begin
		     if p.[1] = '{' then
		       let (name,enclist) = split_param p in
			 Param(name,enclist)
		     else
		       if p.[1] = '[' then
			 let expr,enclist = 
			   parse_expr_oe 
			     enc (String.sub p 2 (String.length p - 3)) in
			 let pos =
			   try (find_parent_element n) # position with Not_found -> assert false in
			   Expr(expr,p,enclist,n,pos)
 		       else
		     Param (String.sub p 1 (String.length p - 1), [])
		   end
		   else
		     Param (String.sub p 1 (String.length p - 1), [])
	       | Pcre.Text t ->
		   Data
		     { dt_value = t;
		       dt_node = n;
		       dt_clonable = (List.length l = 1);
		     }
	       | (Pcre.Group(_,_)|Pcre.NoGroup) ->
		   assert false
	    )
	    (pcre_remove_groups l)
	in
	l'
    | _ ->
	assert false
;;


let instantiate ?(eval_expr = fun _ -> assert false) r params container =

  let dtd = r.app#dtd in

  let rec lookup key l =
    (* Look up the parameter 'key' in the parameter dict list 'l', or
     * raise Not_found.
     *)
    match l with
	[] -> raise Not_found
      | x :: l' ->
	  try
	    Wd_dictionary.find key x
	  with
	      Not_found -> lookup key l'
  in

  let rec encode s enclist =
    (* Apply the encodings [enclist] to [s], one after the other *)
    match enclist with
	[] -> s
      | enc :: enclist' ->
	  let f = r.app#output_encoding enc in
	  encode (f s) enclist'
  in

  let transform_to_uiencode n enclist =
    (* Wrap the node tree [n] into <ui:encode enc="ENCLIST">...</ui:encode> *)
    if enclist = [] then
      n
    else
      let n' = r.mk_uiencode() in
      n' # set_attributes [ "enc", Valuelist enclist ];
      n' # set_nodes [n];
      n'
  in

  let lookup_text p =
    (* Return the text string for parameter 'p' or raise a Runtime_error,
     * if 'p' does not exist
     *)
    try
      Lazy.force (lookup p params).param_text
    with
	Not_found ->
	  raise (Instantiation_error
		   (r.template_name ^
		    ": Nothing to instantiate for template parameter `$" ^ p ^ "'"))
  in

  let lookup_tree p =
    (* Return the XML tree for parameter 'p' or raise a Runtime_error,
     * if 'p' does not exist
     *)
    try
      (lookup p params).param_tree
    with
	Not_found ->
	  raise (Instantiation_error
		   (r.template_name ^
		    ": Nothing to instantiate for template parameter `$" ^ p ^ "'"))
  in

  let rec subst_expr_params expr =
    match expr with
	Expr_var _ -> expr
      | Expr_strconst _ -> expr
      | Expr_apply(fn_name, args) ->
	  Expr_apply(fn_name, List.map subst_expr_params args)
      | Expr_param(p_name, enclist) ->
	  let p = encode (lookup_text p_name) enclist in
	  Expr_strconst p
  in

  let eval_expr_prot (entity,line,col) expr expr_as_string =
    try
      eval_expr (subst_expr_params expr)
    with
	(Instantiation_error msg as e) ->
	  if line >= 0 then
	    raise
	      (Instantiation_error
		 ("In entity " ^  entity ^ ", near line " ^ string_of_int line ^ ", position " ^
		  string_of_int col ^ ", within the expression " ^ expr_as_string ^ ": " ^ msg))
	  else
	    raise e
  in

  let rec inst_attributes pos l =
    (* Instantiate the attributes in the list 'l', an el_attributes-like
     * list. Returns the attributes in a form that can be passed as
     * argument to the method create_element.
     *)
    match l with
	[] -> []
      | (n, v) :: l' ->
	  if v = [] then
	    (* Implied value: leave this out *)
	    inst_attributes pos l'
	  else
	    (* Normal value *)
	    let v' =
	      String.concat
		""
		(List.map
		   (function
			Att_verb s  ->
			  s
		      | Att_param(p,enclist) ->
			  encode (lookup_text p) enclist
		      | Att_expr(expr,expr_str,enclist) ->
			  encode (eval_expr_prot pos expr expr_str) enclist
		   )
		   v
		)
	    in
	    (n, v') :: inst_attributes pos l'
  in

  let rec inst tree =
    (* Instantiates the 'tree' by descending recursively into the tree,
     * calling the right instantiation function for every node,
     * and creating a new XML tree with the same structure as 'tree'.
     * The XML tree is returned.
     *)
    match tree with
	Element el ->
	  (* If there are no attributes with parameters, some speedup is
	   * possible.
	   *)
	  if el.el_have_params_in_atts then begin
	    (* The slower but general case. *)
	    let pos = el.el_node # position in
	    let inst_atts = inst_attributes pos el.el_attributes in
	    let c = el.el_node # create_element
		      dtd (T_element el.el_name) inst_atts in
	    c # set_nodes (List.map inst (el.el_subtrees));
	    c
	  end
	  else begin
	    (* The simple and fast case *)
	    let c = el.el_node # orphaned_flat_clone in
	    c # quick_set_attributes (el.el_unsplitted_attributes);
	    c # set_nodes (List.map inst (el.el_subtrees));
	    c
	  end

      | Data dt ->
	  if dt.dt_clonable then
	    (* Optimization: We can simply clone dt_node to get the new
	     * node. This is much faster because:
	     * - The clone and the original share the data string; it is
	     *   not copied
	     * - The 'data_node' class (in Parse) does some optimizations, too.
	     *   Especially the HTML-escaped version of the data string is
	     *   precomputed, and this second string can be shared, too.
	     *)
	    dt.dt_node # orphaned_flat_clone
	  else
	    (* The general case: Create a new data node with contents
	     * dt_value.
	     *)
	    dt.dt_node # create_data dtd dt.dt_value

      | Param (p,enclist) ->
	  let replacement = lookup_tree p in
	  transform_to_uiencode replacement#orphaned_clone enclist

      | Expr (expr,expr_str,enclist,dt_node,pos) ->
	  let value = eval_expr_prot pos expr expr_str in
	  dt_node # create_data dtd (encode value enclist)
  in

  let nodes = List.map inst r.contents in
  container # set_nodes nodes
;;

(* ======================================================================
 * History:
 *
 * $Log: wd_templrep.ml,v $
 * Revision 3.6  2005-06-11 14:24:14  stolpmann
 * Extension of bracket expressions: many new functions.
 * Functions in bracket expressions may now lazily evaluate their arguments.
 * ui:if and ui:ifvar may refer to any functions defined for bracket
 * expressions.
 * New: ui:ifexpr
 * Parsing of bracket expressions is now done with ulex. Private interfaces
 * of PXP are no longer used for this purpose.
 * Serialization has been rewritten, and it is now safe.
 * Fix: In ui:context there may now be macro references at any location.
 * Also documented all these changes.
 *
 * Revision 3.5  2004/08/01 18:31:35  stolpmann
 * 	Updated PXP lexer code to SVN version of PXP.
 * Note: This is a temporary solution, not for release!
 *
 * Revision 3.4  2002/10/20 19:39:17  stolpmann
 * 	New feature: The brackets $[...] can contain expressions,
 * not only variables
 *
 * Revision 3.3  2002/02/26 23:56:35  stolpmann
 * 	Fix: $[var] in attributes is no longer optimized away in
 * some cases
 *
 * Revision 3.2  2002/02/14 16:15:21  stolpmann
 * 	Added copyright notice.
 *
 * Revision 3.1  2002/02/12 20:29:20  stolpmann
 * 	Initial release at sourceforge.
 *
 * Revision 1.8  2002/01/30 15:13:33  gerd
 * 	New: The notations ${name/enc1/...} and $[name/enc1/...] are
 * recognized and handled properly.
 *
 * Revision 1.7  2002/01/27 19:13:15  gerd
 * 	New: [get_parameters]
 *
 * Revision 1.6  2002/01/26 22:38:26  gerd
 * 	Changed the type for passing parameters to templates. It is
 * now syntax_tree dict instead of a simple list. Furthermore, the
 * method [instantiate] no longer returns the new context. The new element
 * ui:context creates the new contexts now.
 *
 * Revision 1.5  2002/01/24 23:34:33  gerd
 * 	The function [instantiate] raises [Instantiation_error] on
 * errors.
 *
 * Revision 1.4  2002/01/14 15:03:24  gerd
 * 	Major change: Typing has been completely revised, and almost
 * every tiny thing has now a new type. Also renamed a lot.
 *
 * Revision 1.3  2001/05/21 12:52:27  gerd
 * 	Added a type constraint due to stricter type checking in
 * O'Caml 3.01
 *
 * Revision 1.2  2000/09/21 15:12:34  gerd
 * 	Updated for O'Caml 3 and PXP
 *
 * Revision 1.1  2000/05/08 10:36:37  gerd
 * 	Initial revision.
 *
 *
 *)

This web site is published by Informatikbüro Gerd Stolpmann
Powered by Caml