(* $Id: netstring_pcre.ml 1003 2006-09-24 15:17:15Z gerd $
* ----------------------------------------------------------------------
*
*)
type regexp = Pcre.regexp;;
type split_result = Pcre.split_result =
| Text of string
| Delim of string
| Group of int * string
| NoGroup
;;
type result = Pcre.substrings;;
let regexp s =
Pcre.regexp ~flags:[`MULTILINE] s
;;
let regexp_case_fold s =
Pcre.regexp ~flags:[`MULTILINE; `CASELESS] s
;;
let quote s =
Pcre.quote s
;;
let regexp_string s =
regexp (quote s)
;;
let regexp_string_case_fold s =
regexp_case_fold (quote s)
;;
let string_match ?groups pat s pos =
try
let result = Pcre.exec ~rex:pat ~flags:[`ANCHORED] ~pos s in
Some result
with Not_found -> None
;;
let search_forward ?groups pat s pos =
let result = Pcre.exec ~rex:pat ~pos s in
fst (Pcre.get_substring_ofs result 0), result
;;
let search_backward ?groups pat s pos =
let rec search p =
try
(* `ANCHORED: virtually prepends "^" to the regexp *)
let result = Pcre.exec ~flags:[`ANCHORED] ~rex:pat ~pos:p s in
fst (Pcre.get_substring_ofs result 0), result
with
Not_found ->
if p > 0 then search (p-1) else raise Not_found
in
search pos
;;
let matched_string result _ =
(* Unfortunately, Pcre.get_substring will not raise Not_found if there is
* no matched string. Instead, it returns "", but this value cannot be
* distinguished from an empty match.
* The workaround is to call Pcre.get_substring_ofs first. This function
* will raise Not_found if there is not any matched string.
*
* NOTE: Current versions of Pcre do return Not_found!
*)
ignore(Pcre.get_substring_ofs result 0);
Pcre.get_substring result 0
;;
let match_beginning result =
fst (Pcre.get_substring_ofs result 0)
;;
let match_end result =
snd (Pcre.get_substring_ofs result 0)
;;
let matched_group result n _ =
(* See also the comment for [matched_string] *)
if n < 0 || n >= Pcre.num_of_subs result then raise Not_found;
ignore(Pcre.get_substring_ofs result n);
Pcre.get_substring result n
;;
let group_beginning result n =
fst (Pcre.get_substring_ofs result n)
;;
let group_end result n =
snd (Pcre.get_substring_ofs result n)
;;
let global_replace pat templ s =
Pcre.replace ~rex:pat ~itempl:(Pcre.subst templ) s
;;
let replace_first pat templ s =
Pcre.replace_first ~rex:pat ~itempl:(Pcre.subst templ) s
;;
let global_substitute ?groups pat subst s =
Pcre.substitute_substrings ~rex:pat ~subst:(fun r -> subst r s) s
;;
let string_before s n =
String.sub s 0 n
;;
let string_after s n =
String.sub s n (String.length s - n)
;;
let first_chars s len =
String.sub s 0 len
;;
let last_chars s len =
String.sub s (String.length s - len) len
;;
(*
* Uncomment for next version of Pcre
let substitute_first ?groups ~pat ~subst s =
Pcre.substitute_substrings_first ~rex:pat ~subst:(fun r -> subst r s) s
;;
*)
let substitute_first ?groups pat subst s =
(* Do it yourself in the meantime *)
try
let substrs = Pcre.exec ~rex:pat s in (* or Not_found *)
let (match_beg,match_end) = Pcre.get_substring_ofs substrs 0 in
let replacement_text = subst substrs s in
String.concat "" [string_before s match_beg;
replacement_text;
string_after s match_end]
with
Not_found -> s
;;
(* Copied from Str for exact compatibility: *)
let bounded_split expr text num =
let start =
try
let start_substrs = Pcre.exec ~rex:expr ~flags:[`ANCHORED] text in
(* or Not_found *)
let (_,match_end) = Pcre.get_substring_ofs start_substrs 0 in
match_end
with
Not_found -> 0
in
let rec split start n =
if start >= String.length text then [] else
if n = 1 then [string_after text start] else
try
let next_substrs = Pcre.exec ~rex:expr ~pos:start text
in (* or Not_found *)
let (pos,match_end) = Pcre.get_substring_ofs next_substrs 0 in
String.sub text start (pos-start) :: split match_end (n-1)
with Not_found ->
[string_after text start] in
split start num
;;
let split sep s = bounded_split sep s 0
;;
(* Copied from Str for exact compatibility: *)
let bounded_split_delim expr text num =
let rec split start n =
if start > String.length text then [] else
if n = 1 then [string_after text start] else
try
let next_substrs = Pcre.exec ~rex:expr ~pos:start text
in (* or Not_found *)
let (pos,match_end) = Pcre.get_substring_ofs next_substrs 0 in
String.sub text start (pos-start) :: split match_end (n-1)
with Not_found ->
[string_after text start] in
if text = "" then [] else split 0 num
;;
let split_delim sep text = bounded_split_delim sep text 0 ;;
let full_split sep s =
Pcre.full_split ~rex:sep ~max:(-1) s
;;
let bounded_full_split sep s max =
let max' = if max <= 0 then -1 else max in
Pcre.full_split ~rex:sep ~max:max' s
;;