Plasma GitLab Archive
Projects Blog Knowledge

(* $Id: netstring_pcre.mlp 2048 2014-09-16 11:46:38Z gerd $
 * ----------------------------------------------------------------------
 *
 *)

#ifdef HAVE_PCRE

type regexp = Pcre.regexp;;
type split_result = Pcre.split_result =
  | Text of string
  | Delim of string
  | Group of int * string
  | NoGroup
;;

type result = Pcre.substrings;;

let regexp s =
  Pcre.regexp ~flags:[`MULTILINE] s
;;

let regexp_case_fold s =
  Pcre.regexp ~flags:[`MULTILINE; `CASELESS] s
;;

let quote s =
  Pcre.quote s
;;

let regexp_string s =
  regexp (quote s)
;;

let regexp_string_case_fold s =
  regexp_case_fold (quote s)
;;

let string_match pat s pos =
  try
    let result = Pcre.exec ~rex:pat ~flags:[`ANCHORED] ~pos s in
    Some result
  with Not_found -> None
;;

let search_forward pat s pos =
  let result = Pcre.exec ~rex:pat ~pos s in
  fst (Pcre.get_substring_ofs result 0), result
;;

let search_backward pat s pos =
  let rec search p =
    try
      (* `ANCHORED: virtually prepends "^" to the regexp *)
      let result = Pcre.exec ~flags:[`ANCHORED] ~rex:pat ~pos:p s in
      fst (Pcre.get_substring_ofs result 0), result
    with
      Not_found ->
        if p > 0 then search (p-1) else raise Not_found
  in
  search pos
;;

let matched_string result _ =
  (* Unfortunately, Pcre.get_substring will not raise Not_found if there is
   * no matched string. Instead, it returns "", but this value cannot be
   * distinguished from an empty match.
   * The workaround is to call Pcre.get_substring_ofs first. This function
   * will raise Not_found if there is not any matched string.
   *
   * NOTE: Current versions of Pcre do return Not_found!
   *)
  ignore(Pcre.get_substring_ofs result 0);
  Pcre.get_substring result 0
;;

let match_beginning result =
  fst (Pcre.get_substring_ofs result 0)
;;

let match_end result =
  snd (Pcre.get_substring_ofs result 0)
;;

let matched_group result n _ =
  (* See also the comment for [matched_string] *)
  if n < 0 || n >= Pcre.num_of_subs result then raise Not_found;
  ignore(Pcre.get_substring_ofs result n);
  Pcre.get_substring result n
;;

let group_beginning result n =
  fst (Pcre.get_substring_ofs result n)
;;

let group_end result n =
  snd (Pcre.get_substring_ofs result n)
;;

let templ_re = Pcre.regexp "(?:\\\\\\d)|[\\$\\\\]" ;;
  (* matches a backslash and a digit, or a single dollar or a single
   * backslash.
   *)

let tr_templ s =
  (* Convert \n to $n etc. *)
  (* Unfortunately we cannot just replace \ by $. *)
  let rec tr l =
    match l with
	Pcre.Delim "$" :: l' -> "$$" :: tr l'
      | Pcre.Delim "\\" :: Pcre.Delim "$" :: l'  -> "$$" :: tr l'
      | Pcre.Delim "\\" :: Pcre.Delim s :: l' -> s :: tr l'
      | Pcre.Delim "\\" :: Pcre.Text s :: l' -> s :: tr l'
      | [ Pcre.Delim "\\" ] -> failwith "trailing backslash"
      | Pcre.Delim d :: l' ->
	  assert(d.[0] = '\\');
	  let n = Char.code d.[1] - Char.code '0' in
	  if n = 0 then
	    "$&" :: tr l'
	  else
	    ("$" ^ string_of_int n ^ "$!") :: tr l'
      | Pcre.Text t :: l' -> t :: tr l'
      | Pcre.Group(_,_) :: _ -> assert false
      | Pcre.NoGroup :: _ -> assert false
      | [] -> []
  in
  let l = Pcre.full_split ~rex:templ_re ~max:(-1) s in
  String.concat "" (tr l)
;;


let global_replace pat templ s =
  Pcre.replace ~rex:pat ~itempl:(Pcre.subst (tr_templ templ)) s
;;

let replace_first pat templ s =
  Pcre.replace_first ~rex:pat ~itempl:(Pcre.subst (tr_templ templ)) s
;;

let global_substitute pat subst s =
  Pcre.substitute_substrings ~rex:pat ~subst:(fun r -> subst r s) s
;;

let string_before s n =
  String.sub s 0 n
;;

let string_after s n =
  String.sub s n (String.length s - n)
;;

let first_chars s len =
  String.sub s 0 len
;;

let last_chars s len =
  String.sub s (String.length s - len) len
;;

(*
 * Uncomment for next version of Pcre
let substitute_first ?groups ~pat ~subst s =
  Pcre.substitute_substrings_first ~rex:pat ~subst:(fun r -> subst r s) s
;;
*)

let substitute_first pat subst s =
  (* Do it yourself in the meantime *)
  try
    let substrs = Pcre.exec ~rex:pat s in (* or Not_found *)
    let (match_beg,match_end) = Pcre.get_substring_ofs substrs 0 in
    let replacement_text = subst substrs s in
    String.concat "" [string_before s match_beg;
		      replacement_text;
		      string_after s match_end]
  with
      Not_found -> s
;;
  


(* Copied from Str for exact compatibility: *)
let bounded_split expr text num =
  let start =
    try
      let start_substrs = Pcre.exec ~rex:expr ~flags:[`ANCHORED] text in
                          (* or Not_found *)
      let (_,match_end) = Pcre.get_substring_ofs start_substrs 0 in
      match_end
    with
	Not_found -> 0
  in
  let rec split start n =
    if start >= String.length text then [] else
    if n = 1 then [string_after text start] else
      try
	let next_substrs = Pcre.exec ~rex:expr ~pos:start text 
	in (* or Not_found *)
	let (pos,match_end) = Pcre.get_substring_ofs next_substrs 0 in
        String.sub text start (pos-start) :: split match_end (n-1)
      with Not_found ->
        [string_after text start] in
  split start num
;;

let split sep s = bounded_split sep s 0
;;

(* Copied from Str for exact compatibility: *)
let bounded_split_delim expr text num =
  let rec split start n =
    if start > String.length text then [] else
    if n = 1 then [string_after text start] else
      try
	let next_substrs = Pcre.exec ~rex:expr ~pos:start text 
	in (* or Not_found *)
	let (pos,match_end) = Pcre.get_substring_ofs next_substrs 0 in
        String.sub text start (pos-start) :: split match_end (n-1)
      with Not_found ->
        [string_after text start] in
  if text = "" then [] else split 0 num
;;

let split_delim sep text = bounded_split_delim sep text 0 ;;

let full_split sep s =
  Pcre.full_split ~rex:sep ~max:(-1) s
;;

let bounded_full_split sep s max =
  let max' = if max <= 0 then -1 else max in
  Pcre.full_split ~rex:sep ~max:max' s
;;

#else
type regexp
type split_result =
  | Text of string
  | Delim of string
  | Group of int * string
  | NoGroup
type result

let regexp _ = 
  invalid_arg "Netstring_pcre: unavailable"
let regexp_case_fold _ = 
  invalid_arg "Netstring_pcre: unavailable"
let quote _ = 
  invalid_arg "Netstring_pcre: unavailable"
let regexp_string _ = 
  invalid_arg "Netstring_pcre: unavailable"
let regexp_string_case_fold _ = 
  invalid_arg "Netstring_pcre: unavailable"
let string_match _ _ _ = 
  invalid_arg "Netstring_pcre: unavailable"
let search_forward _ _ _ = 
  invalid_arg "Netstring_pcre: unavailable"
let search_backward _ _ _ =
  invalid_arg "Netstring_pcre: unavailable"
let matched_string _ _ = 
  invalid_arg "Netstring_pcre: unavailable"
let match_beginning _ = 
  invalid_arg "Netstring_pcre: unavailable"
let match_end _ = 
  invalid_arg "Netstring_pcre: unavailable"
let matched_group _ _ _ = 
  invalid_arg "Netstring_pcre: unavailable"
let group_beginning _ _ = 
  invalid_arg "Netstring_pcre: unavailable"
let group_end _ _ = 
  invalid_arg "Netstring_pcre: unavailable"
let global_replace _ _ _ = 
  invalid_arg "Netstring_pcre: unavailable"
let replace_first _ _ _ = 
  invalid_arg "Netstring_pcre: unavailable"
let global_substitute _ _ _ = 
  invalid_arg "Netstring_pcre: unavailable"
let substitute_first _ _ _ = 
  invalid_arg "Netstring_pcre: unavailable"
let split _ _ = 
  invalid_arg "Netstring_pcre: unavailable"
let bounded_split _ _ _ = 
  invalid_arg "Netstring_pcre: unavailable"
let split_delim _ _ = 
  invalid_arg "Netstring_pcre: unavailable"
let bounded_split_delim _ _ = 
  invalid_arg "Netstring_pcre: unavailable"
let full_split _ _ = 
  invalid_arg "Netstring_pcre: unavailable"
let bounded_full_split _ _ _ = 
  invalid_arg "Netstring_pcre: unavailable"
let string_before _ _ = 
  invalid_arg "Netstring_pcre: unavailable"
let string_after _ _ = 
  invalid_arg "Netstring_pcre: unavailable"
let first_chars _ _ = 
  invalid_arg "Netstring_pcre: unavailable"
let last_chars _ _ = 
  invalid_arg "Netstring_pcre: unavailable"

#endif

This web site is published by Informatikbüro Gerd Stolpmann
Powered by Caml