(* * <COPYRIGHT> * Copyright 2002 Joachim Schrod Network and Publication Consultance GmbH, Gerd Stolpmann * * <GPL> * This file is part of WDialog. * * WDialog is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * WDialog is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with WDialog; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * </> *) (* $Id: wd_encoding.ml,v 3.6 2004-03-30 22:16:37 stolpmann Exp $ * ---------------------------------------------------------------------- * *) let subst rex f s = let matches = try Pcre.exec_all ~rex s with Not_found -> [||] in let substitutes = Array.map (fun m -> let (pos,pos') = Pcre.get_substring_ofs m 0 in f pos (Pcre.get_substring m 0)) matches in let newlen = ref (String.length s) in Array.iteri (fun i m -> let sb = substitutes.(i) in let p0,p1 = Pcre.get_substring_ofs m 0 in newlen := !newlen - (p1-p0) + String.length sb ) matches; let s' = String.create !newlen in let k = ref 0 in (* position in s *) let k' = ref 0 in (* position in s' *) Array.iteri (fun i m -> let sb = substitutes.(i) in let p0,p1 = Pcre.get_substring_ofs m 0 in String.blit s !k s' !k' (p0 - !k); k' := !k' + (p0 - !k); String.blit sb 0 s' !k' (String.length sb); k' := !k' + (String.length sb); k := p1; ) matches; String.blit s !k s' !k' (String.length s - !k); assert(!k' + String.length s - !k = String.length s'); s' ;; let encode_as_html_re = (Pcre.regexp "[\\<\\>\\&\\\"]");; let encode_as_html = subst encode_as_html_re (fun pos -> function "<" -> "<" | ">" -> ">" | "&" -> "&" | "\"" -> """ | s -> "&#" ^ string_of_int (Char.code s.[0]) ^ ";" (* This case is actually not used! *) ) ;; let repeat n s = (* Repeat the string [s] [n] times *) let l = String.length s in let s' = String.create (l * n) in for k = 0 to n-1 do String.blit s 0 s' (k*l) l done; s' ;; let encode_as_pre_re = Pcre.regexp "[ \r\n\t]";; let encode_as_pre = let last_nl = ref 0 in subst encode_as_pre_re (fun pos -> function " " -> " " | "\r" -> "" (* simply drop *) | "\n" -> last_nl := pos+1; "<br>" | "\t" -> let spaces = 8 - (pos - !last_nl) mod 8 in last_nl := pos+1; repeat spaces " " | _ -> assert false ) ;; let encode_as_para_re = Pcre.regexp "\r?\n(\r?\n)+";; let encode_as_para s = "<p>" ^ subst encode_as_para_re (fun pos _ -> "<p>") s ;; let encode_as_js_string_re = (Pcre.regexp "\\\\|\\'|\\\"|\\<|\\%|\\x00|[\001-\031]|\127");; let encode_as_js_string = subst encode_as_js_string_re (fun pos s -> match s with "\\" -> "\\\\" | "\"" -> "\\\"" | "'" -> "\\'" | _ -> let c = s.[0] in Printf.sprintf "\\x%02x" (Char.code c) ) ;; type pair = Term of string | Pair of pair * pair ;; let encode_as_js_longstring ~(enc : Pxp_types.rep_encoding) s = let utf8_hack = (enc = `Enc_utf8) in (* For UTF8: avoid that multi-byte characters are broken up *) let maximum = 80 in (* maximum line length in bytes *) let rec find_char_boundary k = if utf8_hack && k < String.length s && Char.code s.[k] >= 128 && Char.code s.[k] < 192 then find_char_boundary (k+1) (* The byte at position k is not the first byte of a multi-byte * character. Try the next byte. *) else k in let rec divide startpos endpos = if endpos - startpos <= maximum then begin let t = encode_as_js_string (String.sub s startpos (endpos-startpos)) in if (String.length t) > maximum then begin let m = find_char_boundary ((startpos + endpos) asr 1) in Pair(divide startpos m, divide m endpos) end else Term t end else let m = find_char_boundary ((startpos + endpos) asr 1) in Pair(divide startpos m, divide m endpos) in let rec total_length = function Term t -> String.length t | Pair(p1,p2) -> total_length p1 + total_length p2 + 4 in let rec join s' pos = function Term t -> String.blit t 0 s' !pos (String.length t); pos := !pos + (String.length t) | Pair(p1,p2) -> join s' pos p1; s'.[ !pos ] <- '"'; s'.[ !pos+1 ] <- '+'; s'.[ !pos+2 ] <- '\n'; s'.[ !pos+3 ] <- '"'; pos := !pos + 4; join s' pos p2; in let expr = divide 0 (String.length s) in let l = total_length expr in let s' = String.create l in let pos = ref 0 in join s' pos expr; assert (!pos = l); s' ;; (* ====================================================================== * History: * * $Log: wd_encoding.ml,v $ * Revision 3.6 2004-03-30 22:16:37 stolpmann * Bugfix in encode_as_pre (TAB expansion was wrong) * * Revision 3.5 2003/03/21 12:50:31 stolpmann * Fix: encode_as_js_longstring can cope with UTF8-encoded strings * * Revision 3.4 2003/03/08 17:46:48 stolpmann * Fixed bug with zero byte in a Pcre pattern * * Revision 3.3 2003/02/21 21:21:43 stolpmann * js encoding: Do not escape characters >= 128 because we * do not know the character set. * * Revision 3.2 2002/02/14 16:15:21 stolpmann * Added copyright notice. * * Revision 3.1 2002/02/12 20:29:18 stolpmann * Initial release at sourceforge. * * Revision 2.2 2002/01/30 15:12:49 gerd * HTML encoding: no longer generates entities for characters * >= 128. The advantage is that the function now works for both * `Enc_iso88591 and `Enc_utf8. * * Revision 2.1 2002/01/28 02:12:36 gerd * Initial revision. * * *)