(* $Id: netstring_pcre.mlp 1791 2012-07-19 23:01:25Z gerd $
* ----------------------------------------------------------------------
*
*)
IFDEF HAVE_PCRE THEN
type regexp = Pcre.regexp;;
type split_result = Pcre.split_result =
| Text of string
| Delim of string
| Group of int * string
| NoGroup
;;
type result = Pcre.substrings;;
let regexp s =
Pcre.regexp ~flags:[`MULTILINE] s
;;
let regexp_case_fold s =
Pcre.regexp ~flags:[`MULTILINE; `CASELESS] s
;;
let quote s =
Pcre.quote s
;;
let regexp_string s =
regexp (quote s)
;;
let regexp_string_case_fold s =
regexp_case_fold (quote s)
;;
let string_match pat s pos =
try
let result = Pcre.exec ~rex:pat ~flags:[`ANCHORED] ~pos s in
Some result
with Not_found -> None
;;
let search_forward pat s pos =
let result = Pcre.exec ~rex:pat ~pos s in
fst (Pcre.get_substring_ofs result 0), result
;;
let search_backward pat s pos =
let rec search p =
try
(* `ANCHORED: virtually prepends "^" to the regexp *)
let result = Pcre.exec ~flags:[`ANCHORED] ~rex:pat ~pos:p s in
fst (Pcre.get_substring_ofs result 0), result
with
Not_found ->
if p > 0 then search (p-1) else raise Not_found
in
search pos
;;
let matched_string result _ =
(* Unfortunately, Pcre.get_substring will not raise Not_found if there is
* no matched string. Instead, it returns "", but this value cannot be
* distinguished from an empty match.
* The workaround is to call Pcre.get_substring_ofs first. This function
* will raise Not_found if there is not any matched string.
*
* NOTE: Current versions of Pcre do return Not_found!
*)
ignore(Pcre.get_substring_ofs result 0);
Pcre.get_substring result 0
;;
let match_beginning result =
fst (Pcre.get_substring_ofs result 0)
;;
let match_end result =
snd (Pcre.get_substring_ofs result 0)
;;
let matched_group result n _ =
(* See also the comment for [matched_string] *)
if n < 0 || n >= Pcre.num_of_subs result then raise Not_found;
ignore(Pcre.get_substring_ofs result n);
Pcre.get_substring result n
;;
let group_beginning result n =
fst (Pcre.get_substring_ofs result n)
;;
let group_end result n =
snd (Pcre.get_substring_ofs result n)
;;
let templ_re = Pcre.regexp "(?:\\\\\\d)|[\\$\\\\]" ;;
(* matches a backslash and a digit, or a single dollar or a single
* backslash.
*)
let tr_templ s =
(* Convert \n to $n etc. *)
(* Unfortunately we cannot just replace \ by $. *)
let rec tr l =
match l with
Pcre.Delim "$" :: l' -> "$$" :: tr l'
| Pcre.Delim "\\" :: Pcre.Delim "$" :: l' -> "$$" :: tr l'
| Pcre.Delim "\\" :: Pcre.Delim s :: l' -> s :: tr l'
| Pcre.Delim "\\" :: Pcre.Text s :: l' -> s :: tr l'
| [ Pcre.Delim "\\" ] -> failwith "trailing backslash"
| Pcre.Delim d :: l' ->
assert(d.[0] = '\\');
let n = Char.code d.[1] - Char.code '0' in
if n = 0 then
"$&" :: tr l'
else
("$" ^ string_of_int n ^ "$!") :: tr l'
| Pcre.Text t :: l' -> t :: tr l'
| Pcre.Group(_,_) :: _ -> assert false
| Pcre.NoGroup :: _ -> assert false
| [] -> []
in
let l = Pcre.full_split ~rex:templ_re ~max:(-1) s in
String.concat "" (tr l)
;;
let global_replace pat templ s =
Pcre.replace ~rex:pat ~itempl:(Pcre.subst (tr_templ templ)) s
;;
let replace_first pat templ s =
Pcre.replace_first ~rex:pat ~itempl:(Pcre.subst (tr_templ templ)) s
;;
let global_substitute pat subst s =
Pcre.substitute_substrings ~rex:pat ~subst:(fun r -> subst r s) s
;;
let string_before s n =
String.sub s 0 n
;;
let string_after s n =
String.sub s n (String.length s - n)
;;
let first_chars s len =
String.sub s 0 len
;;
let last_chars s len =
String.sub s (String.length s - len) len
;;
(*
* Uncomment for next version of Pcre
let substitute_first ?groups ~pat ~subst s =
Pcre.substitute_substrings_first ~rex:pat ~subst:(fun r -> subst r s) s
;;
*)
let substitute_first pat subst s =
(* Do it yourself in the meantime *)
try
let substrs = Pcre.exec ~rex:pat s in (* or Not_found *)
let (match_beg,match_end) = Pcre.get_substring_ofs substrs 0 in
let replacement_text = subst substrs s in
String.concat "" [string_before s match_beg;
replacement_text;
string_after s match_end]
with
Not_found -> s
;;
(* Copied from Str for exact compatibility: *)
let bounded_split expr text num =
let start =
try
let start_substrs = Pcre.exec ~rex:expr ~flags:[`ANCHORED] text in
(* or Not_found *)
let (_,match_end) = Pcre.get_substring_ofs start_substrs 0 in
match_end
with
Not_found -> 0
in
let rec split start n =
if start >= String.length text then [] else
if n = 1 then [string_after text start] else
try
let next_substrs = Pcre.exec ~rex:expr ~pos:start text
in (* or Not_found *)
let (pos,match_end) = Pcre.get_substring_ofs next_substrs 0 in
String.sub text start (pos-start) :: split match_end (n-1)
with Not_found ->
[string_after text start] in
split start num
;;
let split sep s = bounded_split sep s 0
;;
(* Copied from Str for exact compatibility: *)
let bounded_split_delim expr text num =
let rec split start n =
if start > String.length text then [] else
if n = 1 then [string_after text start] else
try
let next_substrs = Pcre.exec ~rex:expr ~pos:start text
in (* or Not_found *)
let (pos,match_end) = Pcre.get_substring_ofs next_substrs 0 in
String.sub text start (pos-start) :: split match_end (n-1)
with Not_found ->
[string_after text start] in
if text = "" then [] else split 0 num
;;
let split_delim sep text = bounded_split_delim sep text 0 ;;
let full_split sep s =
Pcre.full_split ~rex:sep ~max:(-1) s
;;
let bounded_full_split sep s max =
let max' = if max <= 0 then -1 else max in
Pcre.full_split ~rex:sep ~max:max' s
;;
ELSE
type regexp
type split_result =
| Text of string
| Delim of string
| Group of int * string
| NoGroup
type result
let regexp _ =
invalid_arg "Netstring_pcre: unavailable"
let regexp_case_fold _ =
invalid_arg "Netstring_pcre: unavailable"
let quote _ =
invalid_arg "Netstring_pcre: unavailable"
let regexp_string _ =
invalid_arg "Netstring_pcre: unavailable"
let regexp_string_case_fold _ =
invalid_arg "Netstring_pcre: unavailable"
let string_match _ _ _ =
invalid_arg "Netstring_pcre: unavailable"
let search_forward _ _ _ =
invalid_arg "Netstring_pcre: unavailable"
let search_backward _ _ _ =
invalid_arg "Netstring_pcre: unavailable"
let matched_string _ _ =
invalid_arg "Netstring_pcre: unavailable"
let match_beginning _ =
invalid_arg "Netstring_pcre: unavailable"
let match_end _ =
invalid_arg "Netstring_pcre: unavailable"
let matched_group _ _ _ =
invalid_arg "Netstring_pcre: unavailable"
let group_beginning _ _ =
invalid_arg "Netstring_pcre: unavailable"
let group_end _ _ =
invalid_arg "Netstring_pcre: unavailable"
let global_replace _ _ _ =
invalid_arg "Netstring_pcre: unavailable"
let replace_first _ _ _ =
invalid_arg "Netstring_pcre: unavailable"
let global_substitute _ _ _ =
invalid_arg "Netstring_pcre: unavailable"
let substitute_first _ _ _ =
invalid_arg "Netstring_pcre: unavailable"
let split _ _ =
invalid_arg "Netstring_pcre: unavailable"
let bounded_split _ _ _ =
invalid_arg "Netstring_pcre: unavailable"
let split_delim _ _ =
invalid_arg "Netstring_pcre: unavailable"
let bounded_split_delim _ _ =
invalid_arg "Netstring_pcre: unavailable"
let full_split _ _ =
invalid_arg "Netstring_pcre: unavailable"
let bounded_full_split _ _ _ =
invalid_arg "Netstring_pcre: unavailable"
let string_before _ _ =
invalid_arg "Netstring_pcre: unavailable"
let string_after _ _ =
invalid_arg "Netstring_pcre: unavailable"
let first_chars _ _ =
invalid_arg "Netstring_pcre: unavailable"
let last_chars _ _ =
invalid_arg "Netstring_pcre: unavailable"
ENDIF