Plasma GitLab Archive
Projects Blog Knowledge

(* $Id: netencoding.ml 1998 2014-08-24 20:41:09Z gerd $
 * ----------------------------------------------------------------------
 *
 *)


module Base64 = struct
  let b64_pattern plus slash =
    [| 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M';
       'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z';
       'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm';
       'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z';
       '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9'; plus; slash |];;


  let rfc_pattern = b64_pattern '+' '/';;
  let url_pattern = b64_pattern '-' '/';;

  let encode_with_options b64 equal s pos len 
                          linelen first_linelen crlf =
  (* encode using "base64".
   * 'b64': The encoding table, created by b64_pattern.
   * 'equal': The character that should be used instead of '=' in the original
   *          encoding scheme. Pass '=' to get the original encoding scheme.
   * s, pos, len, linelen: See the interface description of encode_substring.
   * first_linelen: The length of the first line.
   *
   * Returns: (s,last_linelen) where [s] is the encoded string, and 
   *   [last_linelen] is the length of the last line
   *)
    assert (Array.length b64 = 64);
    if len < 0 || pos < 0 || pos > String.length s || linelen < 0 then
      invalid_arg "Netencoding.Base64.encode";
    if pos + len > String.length s then
      invalid_arg "Netencoding.Base64.encode";

    let linelen = (linelen asr 2) lsl 2 in
    let first_linelen = (first_linelen asr 2) lsl 2 in

    let l_t = if len = 0 then 0 else ((len - 1) / 3 + 1) * 4 in
    (* l_t: length of the result without additional line endings *)

    let factor = if crlf then 2 else 1 in
    let l_t' = 
      if linelen < 4 then
	l_t
      else
	if l_t <= first_linelen then 
	  ( if l_t = 0 then 0 else l_t + factor )
	else 
	  let n_lines = ((l_t - first_linelen - 1) / linelen) + 2 in
	  l_t + n_lines * factor
    in
    (* l_t': length of the result with CRLF or LF characters *)
    
    let t = String.make l_t' equal in
    let j = ref 0 in
    let q = ref (linelen - first_linelen) in
    for k = 0 to len / 3 - 1 do
      let p = pos + 3*k in
      (* p >= pos >= 0: this is evident
       * p+2 < pos+len <= String.length s:
       *   Because k <= len/3-1
       *         3*k <= 3*(len/3-1) = len - 3
       *   pos+3*k+2 <= pos + len - 3 + 2 = pos + len - 1 < pos + len
       * So it is proved that the following unsafe string accesses always
       * work.
       *)
      let bits = (Char.code (String.unsafe_get s (p))   lsl 16) lor
		 (Char.code (String.unsafe_get s (p+1)) lsl  8) lor
		 (Char.code (String.unsafe_get s (p+2))) in
      (* Obviously, 'bits' is a 24 bit entity (i.e. bits < 2**24) *)
      assert(!j + 3 < l_t');
      String.unsafe_set t !j     (Array.unsafe_get b64 ( bits lsr 18));
      String.unsafe_set t (!j+1) (Array.unsafe_get b64 ((bits lsr 12) land 63));
      String.unsafe_set t (!j+2) (Array.unsafe_get b64 ((bits lsr  6) land 63));
      String.unsafe_set t (!j+3) (Array.unsafe_get b64 ( bits         land 63));
      j := !j + 4;
      if linelen > 3 then begin
	q := !q + 4;
	if !q + 4 > linelen then begin
	  (* The next 4 characters won't fit on the current line. So insert
	   * a line ending.
	   *)
	  if crlf then begin
	    t.[ !j ] <- '\013';
	    t.[ !j+1 ] <- '\010';
	    j := !j + 2;
	  end
	  else begin 
	    t.[ !j ] <- '\010';
	    incr j
	  end;
	  q := 0;
	end;
      end;
    done;
    (* padding if needed: *)
    let m = len mod 3 in
    begin
      match m with
	  0 -> ()
	| 1 ->
            let bits = Char.code (s.[pos + len - 1]) in
	    t.[ !j     ] <- b64.( bits lsr 2);
	    t.[ !j + 1 ] <- b64.( (bits land 0x03) lsl 4);
	    j := !j + 4;
	    q := !q + 4;
	| 2 ->
	    let bits = (Char.code (s.[pos + len - 2]) lsl 8) lor
                       (Char.code (s.[pos + len - 1])) in
	    t.[ !j     ] <- b64.( bits lsr 10);
	    t.[ !j + 1 ] <- b64.((bits lsr  4) land 0x3f);
	    t.[ !j + 2 ] <- b64.((bits lsl  2) land 0x3f);
	    j := !j + 4;
	    q := !q + 4;
	| _ -> assert false
    end;

    (* If required, add another line end: *)

    if linelen > 3 && !q > 0 && len > 0 then begin
      if crlf then begin
	t.[ !j ] <- '\013';
	t.[ !j+1 ] <- '\010';
	j := !j + 2;
      end
      else begin 
	t.[ !j ] <- '\010';
	incr j;
      end;	
    end;

    (t, !q) ;;



  let encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s =
    let l = match len with None -> String.length s - pos | Some x -> x in
    let s,_ = 
      encode_with_options rfc_pattern '=' s pos l linelength linelength crlf in
    s
  ;;


  let url_encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s =
    let l = match len with None -> String.length s - pos | Some x -> x in
    let s,_ = 
      encode_with_options url_pattern '.' s pos l linelength linelength crlf in
    s
  ;;


  let encoding_pipe_conv ?(linelength = 0) ?(crlf = false) lastlen 
                         incoming incoming_eof outgoing =
    let linelength = (linelength asr 2) lsl 2 in
    let len = Netbuffer.length incoming in
    let len' =
      if incoming_eof then 
	len
      else
	len - (len mod 3)    (* only process a multiple of three characters *)
    in
    let (s,ll) = 
      encode_with_options 
	rfc_pattern '=' (Netbuffer.unsafe_buffer incoming) 0 len' 
	linelength (linelength - !lastlen) crlf
    in
    Netbuffer.delete incoming 0 len';
    (* LF/CRLF: Unless s = "", s ends with a LF/CRLF. This is only right
     * if ll = 0 or at EOF. In the other cases, this additional LF/CRLF
     * must not be added to [outgoing].
     *)
    if linelength < 3  ||  ll=0  ||  s="" then begin
      Netbuffer.add_string outgoing s;
    end
    else begin
      let sl = String.length s in
      assert(s.[sl-1] = '\n');
      let sl' = if crlf then sl-2 else sl-1 in
      Netbuffer.add_sub_string outgoing s 0 sl';
    end;
    lastlen := ll;
    (* Ensure there is a LF/CRLF at the end: *)
    if incoming_eof && linelength > 3 && ll > 0 then
      Netbuffer.add_string outgoing (if crlf then "\r\n" else "\n");

      (* TODO: Can be improved by using Netbuffer.add_inplace
       *)


  class encoding_pipe ?linelength ?crlf () =
    let lastlen = ref 0 in
    Netchannels.pipe ~conv:(encoding_pipe_conv ?linelength ?crlf lastlen) ()
    

  let decode_prefix t pos len p_url p_spaces p_full p_null =
    (* Decodes the prefix of a Base64-encoded string. Returns a triple
     * (s,n,eof) where s is the decoded prefix, and n is the number of 
     * processed characters from t (i.e. the characters pos to pos+n-1 have
     * been processed), and where eof is the boolean flag whether the
     * padding '=' characters at the end of the string have been seen.
     *
     * p_url: accepts strings produced by url_endode
     * p_spaces: accepts spaces in [t] (at the price of reduced speed)
     * p_full: [t] must be a closed encoded string (i.e. no prefix)
     * p_null: [t] must be an encoded null string
     *)

    if len < 0 || pos < 0 || pos > String.length t then
      invalid_arg "Netencoding.Base64.decode";
    if pos + len > String.length t then
      invalid_arg "Netencoding.Base64.decode";

    (* Compute the number of effective characters l_t in 't';
     * pad_chars: number of '=' characters at the end of the string.
     *)
    let l_t, pad_chars =
      if p_spaces then begin
	(* Count all non-whitespace characters: *)
	let c = ref 0 in
	let p = ref 0 in
	for i = pos to pos + len - 1 do
	  match String.unsafe_get t i with
	      (' '|'\t'|'\r'|'\n'|'>') -> ()
	    | ('='|'.') as ch ->
		if ch = '.' && not p_url then
		  invalid_arg "Netencoding.Base64.decode";
		incr c;
		incr p;
		if !p > 2 then
		  invalid_arg "Netencoding.Base64.decode";
		for j = i+1 to pos + len - 1 do
		  match String.unsafe_get t j with
		      (' '|'\t'|'\r'|'\n'|'.'|'=') -> ()
		    | _ ->
			(* Only another '=' or spaces allowed *)
			invalid_arg "Netencoding.Base64.decode";
		done
	    | _ -> incr c
	done;
	!c, !p
      end
      else
	len,
	( if len > 0 then (
	    if String.sub t (len - 2) 2 = "==" || 
	       (p_url && String.sub t (len - 2) 2 = "..") then 2
	    else 
	      if String.sub t (len - 1) 1 = "=" || 
		 (p_url && String.sub t (len - 1) 1 = ".") then 1
	      else
		0
	  )
	  else 0 
	)
    in

    if p_null && l_t <> 0 then invalid_arg "Netencoding.Base64.decode";

    (* Compute the number of characters [l_t] that can be processed now
     * (i.e. the effective prefix)
     *)
    let l_t, pad_chars =
      let m = l_t mod 4 in
      if m = 0 then (
	(l_t, pad_chars)         (* a multiple of 4 *)
      ) else (
	if p_full then invalid_arg "Netencoding.Base64.decode";
	(l_t - m, 0)             (* rounded to a multiple of 4 *)
      )
    in

    let l_s = (l_t / 4) * 3 - pad_chars in
    let s = String.create l_s in

    let decode_char c =
      match c with
	  'A' .. 'Z'  -> Char.code(c) - 65     (* 65 = Char.code 'A' *)
	| 'a' .. 'z'  -> Char.code(c) - 71     (* 71 = Char.code 'a' - 26 *)
	| '0' .. '9'  -> Char.code(c) + 4      (* -4 = Char.code '0' - 52 *)
	| '+'         -> 62
	| '-'         -> if not p_url then 
	                   invalid_arg "Netencoding.Base64.decode";
	                 62
	| '/'         -> 63
	| _           -> invalid_arg "Netencoding.Base64.decode";
    in

    (* Decode all but the last quartet: *)

    let cursor = ref pos in
    let rec next_char() = 
      match t.[ !cursor ] with
	  (' '|'\t'|'\r'|'\n'|'>') -> 
	    if p_spaces then (incr cursor; next_char())
	    else invalid_arg "Netencoding.Base64.decode"
	| c ->
	    incr cursor; c
    in

    if p_spaces then begin
      for k = 0 to l_t / 4 - 2 do
	let q = 3*k in
	let c0 = next_char() in
	let c1 = next_char() in
	let c2 = next_char() in
	let c3 = next_char() in
	let n0 = decode_char c0 in
	let n1 = decode_char c1 in
	let n2 = decode_char c2 in
	let n3 = decode_char c3 in
	let x0 = (n0 lsl 2) lor (n1 lsr 4) in
	let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
	let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
	String.unsafe_set s q     (Char.chr x0);
	String.unsafe_set s (q+1) (Char.chr x1);
	String.unsafe_set s (q+2) (Char.chr x2);
      done;
    end
    else begin
      (* Much faster: *)
      for k = 0 to l_t / 4 - 2 do
	let p = pos + 4*k in
	let q = 3*k in
	let c0 = String.unsafe_get t p in
	let c1 = String.unsafe_get t (p + 1) in
	let c2 = String.unsafe_get t (p + 2) in
	let c3 = String.unsafe_get t (p + 3) in
	let n0 = decode_char c0 in
	let n1 = decode_char c1 in
	let n2 = decode_char c2 in
	let n3 = decode_char c3 in
	let x0 = (n0 lsl 2) lor (n1 lsr 4) in
	let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
	let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
	String.unsafe_set s q     (Char.chr x0);
	String.unsafe_set s (q+1) (Char.chr x1);
	String.unsafe_set s (q+2) (Char.chr x2);
      done;
      cursor := pos + l_t - 4;
    end;

    (* Decode the last quartet: *)

    if l_t > 0 then begin
      let q = 3*(l_t / 4 - 1) in
      let c0 = next_char() in
      let c1 = next_char() in
      let c2 = next_char() in
      let c3 = next_char() in

      if (c2 = '=' && c3 = '=') || (p_url && c2 = '.' && c3 = '.') then begin
	let n0 = decode_char c0 in
	let n1 = decode_char c1 in
	let x0 = (n0 lsl 2) lor (n1 lsr 4) in
	s.[ q ]   <- Char.chr x0;
      end
      else
	if (c3 = '=') || (p_url && c3 = '.') then begin
	  let n0 = decode_char c0 in
	  let n1 = decode_char c1 in
	  let n2 = decode_char c2 in
	  let x0 = (n0 lsl 2) lor (n1 lsr 4) in
	  let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
	  s.[ q ]   <- Char.chr x0;
	  s.[ q+1 ] <- Char.chr x1;
	end
	else begin
	  let n0 = decode_char c0 in
	  let n1 = decode_char c1 in
	  let n2 = decode_char c2 in
	  let n3 = decode_char c3 in
	  let x0 = (n0 lsl 2) lor (n1 lsr 4) in
	  let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
	  let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
	  s.[ q ]   <- Char.chr x0;
	  s.[ q+1 ] <- Char.chr x1;
	  s.[ q+2 ] <- Char.chr x2;
	end

    end
    else cursor := 0;

    (s, !cursor - pos, pad_chars > 0) 
  ;;


  let decode ?(pos=0) ?len ?(url_variant=true) ?(accept_spaces=false) s =
    let l = match len with None -> String.length s - pos | Some x -> x in
    let (s,_,_) = decode_prefix s pos l url_variant accept_spaces true false in
    s
  ;;


  (* TODO: Use Netbuffer.add_inplace instead of creating an intermediate 
   * string s in [decoding_pipe_conv].
   *)

  let decoding_pipe_conv url_variant accept_spaces padding_seen
                         incoming incoming_eof outgoing =
    let len = Netbuffer.length incoming in
    let t = Netbuffer.unsafe_buffer incoming in
    if !padding_seen then begin
      (* Only accept the null string: *)
      let _,_,_ = decode_prefix t 0 len url_variant accept_spaces false true in
      Netbuffer.clear incoming
    end 
    else begin
      let (s,n,ps) = 
	decode_prefix t 0 len url_variant accept_spaces incoming_eof false in
      padding_seen := ps;
      if incoming_eof then 
	Netbuffer.clear incoming
      else
	Netbuffer.delete incoming 0 n;
      Netbuffer.add_string outgoing s
    end;


  class decoding_pipe ?(url_variant=true) ?(accept_spaces=false) () =
    let padding_seen = ref false in
    Netchannels.pipe 
      ~conv:(decoding_pipe_conv url_variant accept_spaces padding_seen) ()


  module Deprecated = struct

    let encode_substring s ~pos ~len ~linelength ~crlf =
      let s,_ =
	encode_with_options 
	  rfc_pattern '=' s pos len linelength linelength crlf in
      s ;;

    let decode_ignore_spaces s =
      let (s,_,_) = decode_prefix s 0 (String.length s) true true true false in
      s ;;

    let decode_substring s ~pos ~len ~url_variant ~accept_spaces =
      let (s,_,_) = decode_prefix s pos len url_variant accept_spaces true false
      in
      s ;;

  end

end



module QuotedPrintable = struct

  let encode_substring ?(crlf = true) ?(eot = false) ?(line_length = ref 0) s ~pos ~len =
    (* line_length:
     * - on input, the length of the line where the encoding starts
     * - on output, the length of the last written line
     * eot:
     * - false: it is known that the chunk is not at the end of text
     * - true: the chunk may be at the end of the text
     * eot has only an effect on trailing spaces
     *)
    
    if len < 0 or pos < 0 or pos > String.length s then
      invalid_arg "Netencoding.QuotedPrintable.encode";
    if pos + len > String.length s then
      invalid_arg "Netencoding.QuotedPrintable.encode";

    let eol_len = if crlf then 2 else 1 in    (* length of eol *)

    (* Note: The [count] algorithm must strictly correspond to the
     * "for" loop below.
     *)

    let rec count l n i =
      (* l: output line length
       * n: output byte count
       * i: input byte count
       *)
      if i < len then
	match String.unsafe_get s (pos+i) with
	    '\r' ->              (* CR is deleted *)
	      count l n (i+1)
	  | '\n' ->              (* LF may be expanded to CR/LF *)
	      count 0 (n+eol_len) (i+1)
	  | ('\000'..'\031'|'\127'..'\255'|
	     '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') ->
	      if l <= 69 then
		count (l+3) (n+3) (i+1)
	      else
		(* Add soft line break after the encoded char: *)
		count 0 (n+4+eol_len) (i+1)
	  | 'F' when l=0 ->
	      (* Protect 'F' at the beginning of lines *)
	      count (l+3) (n+3) (i+1)
	  | ' ' when (i=len-1 && eot) ||   (* at end of text *)
	             l>69 ||               (* line too long *)
                     (i<len-1 && (s.[pos+i+1]='\r' || s.[pos+i+1]='\n')) 
		        (* end of line *)
		     ->
	      (* Protect spaces only if they occur at the end of a line,
	       * or just before soft line breaks
	       *)
	      if l <= 69 then
		count (l+3) (n+3) (i+1)
	      else
		(* Add soft line after the encoded space: *)
		count 0 (n+4+eol_len) (i+1)
	  | _ ->
	      if l>71 then
		(* Add soft line break after the char: *)
		count 0 (n+2+eol_len) (i+1)
	      else
		count (l+1) (n+1) (i+1)
      else
	n
    in

    let t_len = count !line_length 0 0 in
    let t = String.create t_len in
    
    let hexdigit =
      [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
	 '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in

    let k = ref 0 in

    let add_quoted c =
      t.[ !k ]   <- '=';
      t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 );
      t.[ !k+2 ] <- hexdigit.( Char.code c land 15 )
    in

    let add_soft_break() =
      t.[ !k ]   <- '=';
      if crlf then (
	t.[ !k+1 ] <- '\r';
	t.[ !k+2 ] <- '\n';
      )
      else
	t.[ !k+1 ] <- '\n';
    in

    (* In the following, the soft break criterion is [!l > 72]. Why?
     * We need to be able to add at least an encoded char (3 bytes)
     * plus the "=" sign for the soft break. So we are on the safe side
     * when there are four bytes space on the line. Lines must not be
     * longer than 76 chars (w/o CRLF), so 76-4=72.
     *)

    let l = ref !line_length in
    for i = 0 to len - 1 do
      match String.unsafe_get s i with
	  '\r' ->   (* CR is deleted *)
	    ()
	| '\n' ->   (* LF is expanded to CR/LF *)
	    if crlf then (
	      t.[ !k ] <- '\r';
	      t.[ !k+1 ] <- '\n';
	      k := !k + 2;
	    ) else (
	      t.[ !k ] <- '\n';
	      k := !k + 1;
	    );
	    l := 0
	| ('\000'..'\031'|'\127'..'\255'|
	   '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') as c ->
	    add_quoted c;
	    k := !k + 3;
	    l := !l + 3;
	    if !l > 72 then (
	      (* Add soft line break: *)
	      add_soft_break();
	      k := !k + 1 + eol_len;
	      l := 0
	    )
	| 'F' when !l = 0 ->
	    (* Protect 'F' at the beginning of lines *)
	    add_quoted 'F';
	    k := !k + 3;
	    l := !l + 3;
	| ' ' when ((i=len-1 && eot) ||
	              !l > 69 ||
                     (i<len-1 && (s.[pos+i+1]='\r' || s.[pos+i+1]='\n'))) ->
	    add_quoted ' ';
	    k := !k + 3;
	    l := !l + 3;
	    if !l > 72 then (
	      add_soft_break();
	      k := !k + 1 + eol_len;
	      l := 0;
	    )
	| c ->
	    String.unsafe_set t !k c;
	    incr k;
	    incr l;
	    if !l > 72 then (
	      add_soft_break();
	      k := !k + 1 + eol_len;
	      l := 0;
	    )
    done;

    assert(!k == t_len);

    line_length := !l;

    t ;;


  let encode ?crlf ?(pos=0) ?len s =
    let l = match len with None -> String.length s - pos | Some x -> x in 
    encode_substring ?crlf ~eot:true s ~pos ~len:l;;


  let encoding_pipe_conv ?crlf line_length incoming incoming_eof outgoing =
    (* Problematic case: the incoming buffer ends with a space, but we are
     * not at EOF. It is possible that a LF immediately follows, and that
     * the space needs to be quoted.
     * Solution: Do not convert such spaces, they remain in the buffer.
     *)
    let s = Netbuffer.unsafe_buffer incoming in
    let len = Netbuffer.length incoming in
    let (len',eot) =
      if not incoming_eof && len > 0 && s.[len-1] = ' ' then
	(len-1, false)
      else
	(len, true)
    in
    let s' = encode_substring ?crlf ~eot ~line_length s ~pos:0 ~len:len' in
    Netbuffer.add_string outgoing s';
    Netbuffer.delete incoming 0 len'
  ;;
    

  class encoding_pipe ?crlf () =
    let line_length = ref 0 in
    Netchannels.pipe ~conv:(encoding_pipe_conv ?crlf line_length) ()


  let decode_substring s ~pos ~len =
    
    if len < 0 || pos < 0 || pos > String.length s then
      invalid_arg "Netencoding.QuotedPrintable.decode";
    if pos + len > String.length s then
      invalid_arg "Netencoding.QuotedPrintable.decode";

    let decode_hex c =
      match c with
	  '0'..'9' -> Char.code c - 48
	| 'A'..'F' -> Char.code c - 55
	| 'a'..'f' -> Char.code c - 87
	| _ ->
	   invalid_arg "Netencoding.QuotedPrintable.decode";
    in 

    let rec count n i =
      if i < len then
	match String.unsafe_get s (pos+i) with
	    '=' ->
	      if i+1 = len then
		(* A '=' at EOF is ignored *)
		count n (i+1)
	      else
		if i+1 < len then
		  match s.[pos+i+1] with
		      '\r' ->
			(* Official soft break *)
			if i+2 < len && s.[pos+i+2] = '\n' then
			  count n (i+3)
			else
			  count n (i+2)
		    | '\n' ->
			(* Inofficial soft break *)
			count n (i+2)
		    | _ ->
			if i+2 >= len then
			  invalid_arg 
			    "Netencoding.QuotedPrintable.decode";
			let _ = decode_hex s.[pos+i+1] in
			let _ = decode_hex s.[pos+i+2] in
			count (n+1) (i+3)
		else
		  invalid_arg "Netencoding.QuotedPrintable.decode"
	  | _ ->
	      count (n+1) (i+1)
      else
	n
    in

    let l = count 0 0 in
    let t = String.create l in
    let k = ref pos in
    let e = pos + len in
    let i = ref 0 in

    while !i < l do
      match String.unsafe_get s !k with
	  '=' ->
	    if !k+1 = e then
	      (* A '=' at EOF is ignored *)
	      ()
	    else
	      if !k+1 < e then
		match s.[!k+1] with
		    '\r' ->
		      (* Official soft break *)
		      if !k+2 < e & s.[!k+2] = '\n' then
			k := !k + 3
		      else
			k := !k + 2
		  | '\n' ->
		      (* Inofficial soft break *)
		      k := !k + 2
		  | _ ->
		      if !k+2 >= e then
			invalid_arg 
			  "Netencoding.QuotedPrintable.decode_substring";
		      let x1 = decode_hex s.[!k+1] in
		      let x2 = decode_hex s.[!k+2] in
		      t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2);
		      k := !k + 3;
		      incr i
	      else
		invalid_arg "Netencoding.QuotedPrintable.decode_substring"
	| c ->
	    String.unsafe_set t !i c;
	    incr k;
	    incr i
    done;

    t ;;


  let decode ?(pos=0) ?len s =
    let l = match len with None -> String.length s - pos | Some x -> x in 
    decode_substring s pos l;;


  let decoding_pipe_conv incoming incoming_eof outgoing =
    (* Problematic case: The incoming buffer ends with '=' or '=X'. In this
     * case these characters remain in the buffer, because they will be
     * completed to a full hex sequence by the next conversion call.
     *)
    let s = Netbuffer.unsafe_buffer incoming in
    let len = Netbuffer.length incoming in
    let len' =
      if not incoming_eof then begin
	if len > 0 && s.[len-1] = '=' then
	  len - 1  
	else
	  if len > 1 && s.[len-2] = '=' then
	    len - 2
	  else
	    len
      end
      else
	len
    in
    let s' = decode ~len:len' s in
    Netbuffer.add_string outgoing s';
    Netbuffer.delete incoming 0 len'
  ;;

    
  class decoding_pipe () =
    Netchannels.pipe ~conv:decoding_pipe_conv ()


  module Deprecated = struct
    let encode_substring = encode_substring ?line_length:None
    let decode_substring = decode_substring
  end
end

	      
module Q = struct

  let encode_substring s ~pos ~len =
    
    if len < 0 || pos < 0 || pos > String.length s then
      invalid_arg "Netencoding.Q.encode_substring";
    if pos + len > String.length s then
      invalid_arg "Netencoding.Q.encode_substring";

    let rec count n i =
      if i < len then
	match String.unsafe_get s (pos+i) with
	  | ('A'..'Z'|'a'..'z'|'0'..'9') ->
	      count (n+1) (i+1)
	  | _ ->
	      count (n+3) (i+1)
      else
	n
    in

    let l = count 0 0 in
    let t = String.create l in
    
    let hexdigit =
      [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
	 '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in

    let k = ref 0 in

    let add_quoted c =
      t.[ !k ]   <- '=';
      t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 );
      t.[ !k+2 ] <- hexdigit.( Char.code c land 15 )
    in

    for i = 0 to len - 1 do
      match String.unsafe_get s i with
	| ('A'..'Z'|'a'..'z'|'0'..'9') as c ->
	    String.unsafe_set t !k c;
	    incr k
	| c ->
	    add_quoted c;
	    k := !k + 3
    done;

    t ;;


  let encode ?(pos=0) ?len s =
    let l = match len with None -> String.length s - pos | Some x -> x in 
    encode_substring s pos l;;



  let decode_substring s ~pos ~len =
    
    if len < 0 || pos < 0 || pos > String.length s then
      invalid_arg "Netencoding.Q.decode_substring";
    if pos + len > String.length s then
      invalid_arg "Netencoding.Q.decode_substring";

    let decode_hex c =
      match c with
	  '0'..'9' -> Char.code c - 48
	| 'A'..'F' -> Char.code c - 55
	| 'a'..'f' -> Char.code c - 87
	| _ ->
	   invalid_arg "Netencoding.Q.decode_substring";
    in 

    let rec count n i =
      if i < len then
	match String.unsafe_get s (pos+i) with
	    '=' ->
	      if i+2 >= len then
		invalid_arg "Netencoding.Q.decode_substring";
	      let _ = decode_hex s.[pos+i+1] in
	      let _ = decode_hex s.[pos+i+2] in
	      count (n+1) (i+3)
	  | _ ->  (* including '_' *)
	      count (n+1) (i+1)
      else
	n
    in

    let l = count 0 0 in
    let t = String.create l in
    let k = ref pos in
    let e = pos + len in
    let i = ref 0 in

    while !i < l do
      match String.unsafe_get s !k with
	  '=' ->
	    if !k+2 >= e then
	      invalid_arg "Netencoding.Q.decode_substring";
	    let x1 = decode_hex s.[!k+1] in
	    let x2 = decode_hex s.[!k+2] in
	    t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2);
	    k := !k + 3;
	    incr i
	| '_' ->
	    String.unsafe_set t !i ' ';
	    incr k;
	    incr i
	| c ->
	    String.unsafe_set t !i c;
	    incr k;
	    incr i
    done;

    t ;;


  let decode ?(pos=0) ?len s =
    let l = match len with None -> String.length s - pos | Some x -> x in 
    decode_substring s pos l ;;


  module Deprecated = struct
    let encode_substring = encode_substring
    let decode_substring = decode_substring
  end

end


module Url = struct
  let hex_digits =
    [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
       '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F' |];;

  let to_hex2 k =
    (* Converts k to a 2-digit hex string *)
    let s = String.create 2 in
    s.[0] <- hex_digits.( (k lsr 4) land 15 );
    s.[1] <- hex_digits.( k land 15 );
    s ;;


  let of_hex1 c =
    match c with
	('0'..'9') -> Char.code c - Char.code '0'
      | ('A'..'F') -> Char.code c - Char.code 'A' + 10
      | ('a'..'f') -> Char.code c - Char.code 'a' + 10
      | _ ->
	raise Not_found ;;



  let url_encoding_re =
    Netstring_str.regexp "[^A-Za-z0-9_.!*-]";;

  let url_decoding_re =
    Netstring_str.regexp "\\+\\|%..\\|%.\\|%";;


  let encode ?(plus = true) s =
    Netstring_str.global_substitute
      url_encoding_re
      (fun r _ ->
	 match Netstring_str.matched_string r s with
	     " " when plus -> "+"
	   | x ->
	       let k = Char.code(x.[0]) in
	       "%" ^ to_hex2 k
      )
      s ;;


  let decode ?(plus = true) ?(pos=0) ?len s =
    let s_l = String.length s in
    let s1 = 
      if pos = 0 && len=None then s else 
	let len = match len with Some n -> n | None -> s_l in
	String.sub s pos len in
    let l = String.length s1 in
    Netstring_str.global_substitute
      url_decoding_re
      (fun r _ ->
	 match Netstring_str.matched_string r s1 with
	   | "+" -> if plus then " " else "+"
	   | _ ->
	       let i = Netstring_str.match_beginning r in
	       (* Assertion: s1.[i] = '%' *)
	       if i+2 >= l then failwith "Netencoding.Url.decode";
	       let c1 = s1.[i+1] in
	       let c2 = s1.[i+2] in
	       begin
		 try
		   let k1 = of_hex1 c1 in
		   let k2 = of_hex1 c2 in
		   String.make 1 (Char.chr((k1 lsl 4) lor k2))
		 with
		     Not_found ->
		       failwith "Netencoding.Url.decode"
	       end
      )
      s1 ;;

  let url_split_re =
    Netstring_str.regexp "[&=]";;

  let mk_url_encoded_parameters nv_pairs =
    String.concat "&"
      (List.map
	 (fun (name,value) ->
	    let name_encoded = encode name in
	    let value_encoded = encode value in
	    name_encoded ^ "=" ^ value_encoded
	 )
	 nv_pairs
      )
  ;;

  let dest_url_encoded_parameters parstr =
    let rec parse_after_amp tl =
      match tl with
	  Netstring_str.Text name :: Netstring_str.Delim "=" :: Netstring_str.Text value :: tl' ->
	    (decode name, decode value) :: parse_next tl'
	| Netstring_str.Text name :: Netstring_str.Delim "=" :: Netstring_str.Delim "&" :: tl' ->
	    (decode name, "") :: parse_after_amp tl'
	| Netstring_str.Text name :: Netstring_str.Delim "=" :: [] ->
	    [decode name, ""]
	| _ ->
	    failwith "Netencoding.Url.dest_url_encoded_parameters"
    and parse_next tl =
      match tl with
	  [] -> []
	| Netstring_str.Delim "&" :: tl' ->
	    parse_after_amp tl'
	| _ ->
	    failwith "Netencoding.Url.dest_url_encoded_parameters"
    in
    let toklist = Netstring_str.full_split url_split_re parstr in
    match toklist with
	[] -> []
      | _ -> parse_after_amp toklist
  ;;

  let mk_url_encoded_parameters params =
    String.concat "&"
      (List.map (fun (name, value) -> encode name ^ "=" ^ encode value) params)

end


module Html = struct

  let etable =
    [ "lt", 60;
      "gt", 62;
      "amp", 38;
      "quot", 34;     
         (* Note: &quot; is new in HTML-4.0, but it has been widely used
	  * much earlier.
	  *)
      "apos", 39;
         (* Only used if contained in unsafe_chars *)
      (* ISO-8859-1: *)
      "nbsp", 160;
      "iexcl", 161;
      "cent", 162;
      "pound", 163;
      "curren", 164;
      "yen", 165;
      "brvbar", 166;
      "sect", 167;
      "uml", 168;
      "copy", 169;
      "ordf", 170;
      "laquo", 171;
      "not", 172;
      "shy", 173;
      "reg", 174;
      "macr", 175;
      "deg", 176;
      "plusmn", 177;
      "sup2", 178;
      "sup3", 179;
      "acute", 180;
      "micro", 181;
      "para", 182;
      "middot", 183;
      "cedil", 184;
      "sup1", 185;
      "ordm", 186;
      "raquo", 187;
      "frac14", 188;
      "frac12", 189;
      "frac34", 190;
      "iquest", 191;
      "Agrave", 192;
      "Aacute", 193;
      "Acirc", 194;
      "Atilde", 195;
      "Auml", 196;
      "Aring", 197;
      "AElig", 198;
      "Ccedil", 199;
      "Egrave", 200;
      "Eacute", 201;
      "Ecirc", 202;
      "Euml", 203;
      "Igrave", 204;
      "Iacute", 205;
      "Icirc", 206;
      "Iuml", 207;
      "ETH", 208;
      "Ntilde", 209;
      "Ograve", 210;
      "Oacute", 211;
      "Ocirc", 212;
      "Otilde", 213;
      "Ouml", 214;
      "times", 215;
      "Oslash", 216;
      "Ugrave", 217;
      "Uacute", 218;
      "Ucirc", 219;
      "Uuml", 220;
      "Yacute", 221;
      "THORN", 222;
      "szlig", 223;
      "agrave", 224;
      "aacute", 225;
      "acirc", 226;
      "atilde", 227;
      "auml", 228;
      "aring", 229;
      "aelig", 230;
      "ccedil", 231;
      "egrave", 232;
      "eacute", 233;
      "ecirc", 234;
      "euml", 235;
      "igrave", 236;
      "iacute", 237;
      "icirc", 238;
      "iuml", 239;
      "eth", 240;
      "ntilde", 241;
      "ograve", 242;
      "oacute", 243;
      "ocirc", 244;
      "otilde", 245;
      "ouml", 246;
      "divide", 247;
      "oslash", 248;
      "ugrave", 249;
      "uacute", 250;
      "ucirc", 251;
      "uuml", 252;
      "yacute", 253;
      "thorn", 254;
      "yuml", 255;
      (* Other: *)
      "fnof", 402;
      "Alpha", 913;
      "Beta", 914;
      "Gamma", 915;
      "Delta", 916;
      "Epsilon", 917;
      "Zeta", 918;
      "Eta", 919;
      "Theta", 920;
      "Iota", 921;
      "Kappa", 922;
      "Lambda", 923;
      "Mu", 924;
      "Nu", 925;
      "Xi", 926;
      "Omicron", 927;
      "Pi", 928;
      "Rho", 929;
      "Sigma", 931;
      "Tau", 932;
      "Upsilon", 933;
      "Phi", 934;
      "Chi", 935;
      "Psi", 936;
      "Omega", 937;
      "alpha", 945;
      "beta", 946;
      "gamma", 947;
      "delta", 948;
      "epsilon", 949;
      "zeta", 950;
      "eta", 951;
      "theta", 952;
      "iota", 953;
      "kappa", 954;
      "lambda", 955;
      "mu", 956;
      "nu", 957;
      "xi", 958;
      "omicron", 959;
      "pi", 960;
      "rho", 961;
      "sigmaf", 962;
      "sigma", 963;
      "tau", 964;
      "upsilon", 965;
      "phi", 966;
      "chi", 967;
      "psi", 968;
      "omega", 969;
      "thetasym", 977;
      "upsih", 978;
      "piv", 982;
      "bull", 8226;
      "hellip", 8230;
      "prime", 8242;
      "Prime", 8243;
      "oline", 8254;
      "frasl", 8260;
      "weierp", 8472;
      "image", 8465;
      "real", 8476;
      "trade", 8482;
      "alefsym", 8501;
      "larr", 8592;
      "uarr", 8593;
      "rarr", 8594;
      "darr", 8595;
      "harr", 8596;
      "crarr", 8629;
      "lArr", 8656;
      "uArr", 8657;
      "rArr", 8658;
      "dArr", 8659;
      "hArr", 8660;
      "forall", 8704;
      "part", 8706;
      "exist", 8707;
      "empty", 8709;
      "nabla", 8711;
      "isin", 8712;
      "notin", 8713;
      "ni", 8715;
      "prod", 8719;
      "sum", 8721;
      "minus", 8722;
      "lowast", 8727;
      "radic", 8730;
      "prop", 8733;
      "infin", 8734;
      "ang", 8736;
      "and", 8743;
      "or", 8744;
      "cap", 8745;
      "cup", 8746;
      "int", 8747;
      "there4", 8756;
      "sim", 8764;
      "cong", 8773;
      "asymp", 8776;
      "ne", 8800;
      "equiv", 8801;
      "le", 8804;
      "ge", 8805;
      "sub", 8834;
      "sup", 8835;
      "nsub", 8836;
      "sube", 8838;
      "supe", 8839;
      "oplus", 8853;
      "otimes", 8855;
      "perp", 8869;
      "sdot", 8901;
      "lceil", 8968;
      "rceil", 8969;
      "lfloor", 8970;
      "rfloor", 8971;
      "lang", 9001;
      "rang", 9002;
      "loz", 9674;
      "spades", 9824;
      "clubs", 9827;
      "hearts", 9829;
      "diams", 9830;
      "OElig", 338;
      "oelig", 339;
      "Scaron", 352;
      "scaron", 353;
      "Yuml", 376;
      "circ", 710;
      "tilde", 732;
      "ensp", 8194;
      "emsp", 8195;
      "thinsp", 8201;
      "zwnj", 8204;
      "zwj", 8205;
      "lrm", 8206;
      "rlm", 8207;
      "ndash", 8211;
      "mdash", 8212;
      "lsquo", 8216;
      "rsquo", 8217;
      "sbquo", 8218;
      "ldquo", 8220;
      "rdquo", 8221;
      "bdquo", 8222;
      "dagger", 8224;
      "Dagger", 8225;
      "permil", 8240;
      "lsaquo", 8249;
      "rsaquo", 8250;
      "euro", 8364;
    ] ;;

  let quick_etable_html =
    let ht = Hashtbl.create 50 in
    List.iter (fun (name,value) -> 
		 Hashtbl.add ht name value
	      ) 
              etable;
    ht ;;

  let quick_etable_xml =
    let ht = Hashtbl.create 5 in
    List.iter (fun name -> 
		 let value = List.assoc name etable in
		 Hashtbl.add ht name value
	      ) 
              [ "lt"; "gt"; "amp"; "quot"; "apos"];
    ht ;;


  let rev_etable =
    (* Only code points 0 to 255: *)
    let a = Array.make 256 "" in
    List.iter (fun (name,value) -> 
		 if value <= 255 then
		   a.(value) <- "&" ^ name ^ ";"
	      ) etable;
    a ;;

  let rev_etable_rest =
    (* Only code points >= 256: *)
    let ht = Hashtbl.create 150 in
    List.iter (fun (name,value) -> 
		 if value >= 256 then
		   Hashtbl.add ht value ("&" ^ name ^ ";")
	      ) etable;
    ht ;;


  let unsafe_chars_html4 = "<>\"&\000\001\002\003\004\005\006\007\008\011\012\014\015\016\017\018\019\020\021\022\023\024\025\026\027\028\029\030\031\127" ;;


  let regexp_ht = Hashtbl.create 7
  let regexp_ht_mutex = !Netsys_oothr.provider # create_mutex()

  let regexp_set s =
    Netsys_oothr.serialize
      regexp_ht_mutex
      (fun () ->
	 try
	   Hashtbl.find regexp_ht s
	 with
	   | Not_found ->
	       let re = Netstring_str.regexp (Netstring_str.quote_set s) in
	       if Hashtbl.length regexp_ht < 100 then  (* avoid leak *)
		 Hashtbl.replace regexp_ht s re;
	       re
      )
      ()


  (* The functions [encode_quickly] and [encode_ascii] are special cases of 
   * [encode] that can be implemented by regular expressions.
   *)

  let encode_quickly ~prefer_name ~unsafe_chars () =
    (* Preconditions: in_enc = out_enc, and the encoding must be a single-byte,
     * ASCII-compatible encoding.
     *)
    if unsafe_chars = "" then
      (fun s -> s)
    else
      let unsafe_re = 
	regexp_set unsafe_chars in
      Netstring_str.global_substitute
	unsafe_re
	(fun r s ->
	   let t = Netstring_str.matched_string r s in
	   let p = Char.code (t.[0]) in    (* p is an ASCII code point *)
	   let name = rev_etable.(p) in
	   if prefer_name && name <> "" then
	     name
	   else
	     "&#" ^ string_of_int p ^ ";"
	)
  ;;

  let msb_set = (
    let s = String.create 128 in
    for k = 0 to 127 do s.[k] <- Char.chr (128+k) done;
    s
  )

  let encode_ascii ~in_enc ~prefer_name ~unsafe_chars () =
    (* Preconditions: out_enc = `Enc_usascii, and in_enc must be a single-byte,
     * ASCII-compatible encoding.
     *)
    let unsafe_chars1 = unsafe_chars ^ msb_set in
    let unsafe_re = 
      regexp_set unsafe_chars1 in
    (* unicode_of.[q] = p: the code point q+128 of in_enc is the same as the
     * Unicode code point p
     *)
    let unicode_of = Array.make 128 (-1) in
    for i = 0 to 127 do
      try
	let s = String.make 1 (Char.chr (i+128)) in
	let u = Netconversion.uarray_of_ustring in_enc s in
	match u with
	    [| u0 |] -> unicode_of.(i) <- u0
	  | _ -> assert false
      with
	  Netconversion.Malformed_code -> 
	    unicode_of.(i) <- (-1)
    done;
    Netstring_str.global_substitute
      unsafe_re
      (fun r s ->
	 let t = Netstring_str.matched_string r s in
	 (* p is the code point in the encoding ~in_enc; p' is the Unicode
	  * code point:
	  *)
	 let p = Char.code (t.[0]) in
	 let p' = if p < 128 then p else unicode_of.(p - 128) in
	 if p' < 0 then raise Netconversion.Malformed_code;
	 let name = 
	   if prefer_name then begin
	     if p' <= 255 then rev_etable.(p') else
	       try
		 Hashtbl.find rev_etable_rest p'
	       with
		   Not_found -> ""
	   end
	   else "" in
	 if name = "" then
	   "&#" ^ string_of_int p' ^ ";"
	 else
	   name
      )
  ;;


  let encode_from_latin1 =            (* backwards compatible *)
    encode_ascii 
      ~in_enc:`Enc_iso88591 ~prefer_name:true ~unsafe_chars:unsafe_chars_html4
      ()
  ;;


  let encode 
        ~in_enc
        ?(out_enc = `Enc_usascii)
	?(prefer_name = true)
	?(unsafe_chars = unsafe_chars_html4)
	() =
    (* This function implements the general case *)
    (* Check arguments: *)
    if not (Netconversion.is_ascii_compatible out_enc) then
      invalid_arg "Netencoding.Html.encode: out_enc not ASCII-compatible";
    for i = 0 to String.length unsafe_chars - 1 do
      if Char.code(unsafe_chars.[i]) >= 128 then
	invalid_arg "Netencoding.Html.encode: non-ASCII character in unsafe_chars";
    done;
    (* Are there better implementations than the general one? *)
    let in_single = Netconversion.is_single_byte in_enc in
    let in_subset = match in_enc with `Enc_subset(_,_) -> true | _ -> false in
    if not in_subset && in_enc=out_enc && in_single then 
      encode_quickly ~prefer_name ~unsafe_chars ()
    else if not in_subset && out_enc=`Enc_usascii && in_single then
      encode_ascii ~in_enc ~prefer_name ~unsafe_chars ()
    else begin
      (* ... only the general implementation is applicable. *)
      (* Create the domain function: *)
      let dom_array = Array.make 128 true in
      let dom p =  p >= 128 || dom_array.(p) in
      (* Set dom_array from unsafe_chars: *)
      for i = 0 to String.length unsafe_chars - 1 do
	let c = Char.code(unsafe_chars.[i]) in
	dom_array.(c) <- false
      done;
      (* Create the substitution function: *)
      let subst p =
	let name = 
	  if prefer_name then begin
	    if p <= 255 then rev_etable.(p) else
	       try
		 Hashtbl.find rev_etable_rest p
	       with
		   Not_found -> ""
	  end
	  else "" in
	if name = "" then
	  "&#" ^ string_of_int p ^ ";"
	else
	  name
      in
      (* Recode: *)
      Netconversion.recode_string 
        ~in_enc ~out_enc:(`Enc_subset(out_enc,dom)) ~subst
    end
  ;;

  type entity_set = [ `Html | `Xml | `Empty ];;

  let eref_re = 
    Netstring_str.regexp "&\\(\
                            #\\([0-9]+\\);\\|\
                            #[xX]\\([0-9a-fA-F]+\\);\\|\
                            \\([a-zA-Z]+\\);\
                          \\)" ;;

  let total_enc =
    (* every byte must have a corresponding Unicode code point, i.e. the
     * encoding must be "byte-total"
     *)
    function
	`Enc_iso88591
      | `Enc_iso88592
      | `Enc_iso88593
      | `Enc_iso88594
      | `Enc_iso88595
      | `Enc_iso88599
      | `Enc_iso885910
      | `Enc_iso885913
      | `Enc_iso885914
      | `Enc_iso885915
      | `Enc_iso885916  -> true
      | _ -> false
  ;;

  let hex_digit_of_char c =
    match c with
       '0'..'9' -> Char.code c - 48
      | 'A'..'F' -> Char.code c - 55
      | 'a'..'f' -> Char.code c - 87
      | _ -> assert false

  let hex_of_string s =
    let n = ref 0 in
    for i = 0 to String.length s - 1 do
      let d = hex_digit_of_char s.[i] in
      n := (!n lsl 4) lor d
    done;
    !n


  let search_all re s pos =
    let rec search p acc =
      match 
	try Some(Netstring_str.search_forward re s p) with Not_found -> None
      with
	| Some (k,r) ->
	    search (k+1) ( (k,r) :: acc )
	| None ->
	    List.rev acc in
    search pos []
      

  let decode 
        ~in_enc
        ~out_enc
        ?(lookup=fun name -> 
	    failwith ("Netencoding.Html.decode: Unknown entity `" ^ name ^ "'"))
        ?(subst=fun p ->
	    failwith ("Netencoding.Html.decode: Character cannot be represented: " ^ string_of_int p))
	?(entity_base = (`Html : entity_set))
	() =
    (* Argument checks: *)
    if not (Netconversion.is_ascii_compatible in_enc) then
      invalid_arg "Netencoding.Html.decode: in_enc not ASCII-compatible";
    (* makechar: *)
    let raw_makechar = Netconversion.makechar out_enc in
    let makechar p =
      try raw_makechar p
      with Not_found -> subst p
    in
    (* Entity lookup: *)
    let lookup_entity =
      match entity_base with
	  `Html	    
	| `Xml ->
	    let ht = 
	      if entity_base = `Html 
	      then quick_etable_html 
	      else quick_etable_xml in
	    ( fun name ->
		try 
		  makechar(Hashtbl.find ht name)
		with
		    Not_found -> lookup name
	    )
	| `Empty ->
	    lookup
    in
    (* Recode strings: *)
    let recode_str =
      if total_enc in_enc && in_enc = out_enc then
	(fun s -> s)
      else
	Netconversion.recode_string ~in_enc ~out_enc ~subst
    in
    (fun s ->
       (* Find all occurrences of &name; or &#num; or &#xnum; *)
       let occurrences = search_all eref_re s 0  in
       (* Collect the resulting string in a buffer *)
       let buf = Buffer.create 250 in
       let n = ref 0 in
       List.iter
	 (fun (n0,r) ->
	    let n1 = Netstring_str.match_end r in
	    if n0 > !n then
	      Buffer.add_string buf (recode_str (String.sub s !n (n0 - !n)));
	    (* TODO: avoid String.sub *)
	    let replacement =
	      let num = 
		try Netstring_str.matched_group r 2 s with Not_found -> "" in
	      (* Note: Older versions of Pcre return "" when the substring
	       * did not match, newer versions raise Not_found  
	       *)
	      if num <> "" then begin
		let n = int_of_string num in
		makechar n
	      end
	      else begin
		let xnum = 
		  try Netstring_str.matched_group r 3 s with Not_found -> "" in
		(* Note: Older versions of Pcre return "" when the substring
		 * did not match, newer versions raise Not_found  
		 *)
		if xnum <> "" then begin
		  let n = hex_of_string xnum in
		  makechar n
		end
		else begin
		  let name = 
		    try Netstring_str.matched_group r 4 s with Not_found -> "" in
		  (* Note: Older versions of Pcre return "" when the substring
	           * did not match, newer versions raise Not_found  
	           *)
		  assert(name <> "");
		  lookup_entity name
		end
	      end
	    in
	    Buffer.add_string buf replacement;
	    n := n1;
	 )
	 occurrences;
       let n0 = String.length s in
       if n0 > !n then
	 Buffer.add_string buf (recode_str (String.sub s !n (n0 - !n)));
       (* TODO: avoid String.sub *)
       (* Return *)
       Buffer.contents buf
    )
  ;;
     
  let decode_to_latin1 =
    decode ~in_enc:`Enc_iso88591 ~out_enc:`Enc_iso88591 
           ~lookup:(fun s -> "&" ^ s ^ ";")
           ~subst:(fun p -> "&#" ^ string_of_int p ^ ";")
           ()

end

This web site is published by Informatikbüro Gerd Stolpmann
Powered by Caml