(* $Id: netstring_pcre.mlp 1588 2011-04-28 13:59:54Z gerd $ * ---------------------------------------------------------------------- * *) IFDEF HAVE_PCRE THEN type regexp = Pcre.regexp;; type split_result = Pcre.split_result = | Text of string | Delim of string | Group of int * string | NoGroup ;; type result = Pcre.substrings;; let regexp s = Pcre.regexp ~flags:[`MULTILINE] s ;; let regexp_case_fold s = Pcre.regexp ~flags:[`MULTILINE; `CASELESS] s ;; let quote s = Pcre.quote s ;; let regexp_string s = regexp (quote s) ;; let regexp_string_case_fold s = regexp_case_fold (quote s) ;; let string_match pat s pos = try let result = Pcre.exec ~rex:pat ~flags:[`ANCHORED] ~pos s in Some result with Not_found -> None ;; let search_forward pat s pos = let result = Pcre.exec ~rex:pat ~pos s in fst (Pcre.get_substring_ofs result 0), result ;; let search_backward pat s pos = let rec search p = try (* `ANCHORED: virtually prepends "^" to the regexp *) let result = Pcre.exec ~flags:[`ANCHORED] ~rex:pat ~pos:p s in fst (Pcre.get_substring_ofs result 0), result with Not_found -> if p > 0 then search (p-1) else raise Not_found in search pos ;; let matched_string result _ = (* Unfortunately, Pcre.get_substring will not raise Not_found if there is * no matched string. Instead, it returns "", but this value cannot be * distinguished from an empty match. * The workaround is to call Pcre.get_substring_ofs first. This function * will raise Not_found if there is not any matched string. * * NOTE: Current versions of Pcre do return Not_found! *) ignore(Pcre.get_substring_ofs result 0); Pcre.get_substring result 0 ;; let match_beginning result = fst (Pcre.get_substring_ofs result 0) ;; let match_end result = snd (Pcre.get_substring_ofs result 0) ;; let matched_group result n _ = (* See also the comment for [matched_string] *) if n < 0 || n >= Pcre.num_of_subs result then raise Not_found; ignore(Pcre.get_substring_ofs result n); Pcre.get_substring result n ;; let group_beginning result n = fst (Pcre.get_substring_ofs result n) ;; let group_end result n = snd (Pcre.get_substring_ofs result n) ;; let templ_re = Pcre.regexp "(?:\\\\\\d)|[\\$\\\\]" ;; (* matches a backslash and a digit, or a single dollar or a single * backslash. *) let tr_templ s = (* Convert \n to $n etc. *) (* Unfortunately we cannot just replace \ by $. *) let rec tr l = match l with Pcre.Delim "$" :: l' -> "$$" :: tr l' | Pcre.Delim "\\" :: Pcre.Delim "$" :: l' -> "$$" :: tr l' | Pcre.Delim "\\" :: Pcre.Delim s :: l' -> s :: tr l' | Pcre.Delim "\\" :: Pcre.Text s :: l' -> s :: tr l' | [ Pcre.Delim "\\" ] -> failwith "trailing backslash" | Pcre.Delim d :: l' -> assert(d.[0] = '\\'); let n = Char.code d.[1] - Char.code '0' in if n = 0 then "$&" :: tr l' else ("$" ^ string_of_int n ^ "$!") :: tr l' | Pcre.Text t :: l' -> t :: tr l' | Pcre.Group(_,_) :: _ -> assert false | Pcre.NoGroup :: _ -> assert false | [] -> [] in let l = Pcre.full_split ~rex:templ_re ~max:(-1) s in String.concat "" (tr l) ;; let global_replace pat templ s = Pcre.replace ~rex:pat ~itempl:(Pcre.subst (tr_templ templ)) s ;; let replace_first pat templ s = Pcre.replace_first ~rex:pat ~itempl:(Pcre.subst (tr_templ templ)) s ;; let global_substitute pat subst s = Pcre.substitute_substrings ~rex:pat ~subst:(fun r -> subst r s) s ;; let string_before s n = String.sub s 0 n ;; let string_after s n = String.sub s n (String.length s - n) ;; let first_chars s len = String.sub s 0 len ;; let last_chars s len = String.sub s (String.length s - len) len ;; (* * Uncomment for next version of Pcre let substitute_first ?groups ~pat ~subst s = Pcre.substitute_substrings_first ~rex:pat ~subst:(fun r -> subst r s) s ;; *) let substitute_first pat subst s = (* Do it yourself in the meantime *) try let substrs = Pcre.exec ~rex:pat s in (* or Not_found *) let (match_beg,match_end) = Pcre.get_substring_ofs substrs 0 in let replacement_text = subst substrs s in String.concat "" [string_before s match_beg; replacement_text; string_after s match_end] with Not_found -> s ;; (* Copied from Str for exact compatibility: *) let bounded_split expr text num = let start = try let start_substrs = Pcre.exec ~rex:expr ~flags:[`ANCHORED] text in (* or Not_found *) let (_,match_end) = Pcre.get_substring_ofs start_substrs 0 in match_end with Not_found -> 0 in let rec split start n = if start >= String.length text then [] else if n = 1 then [string_after text start] else try let next_substrs = Pcre.exec ~rex:expr ~pos:start text in (* or Not_found *) let (pos,match_end) = Pcre.get_substring_ofs next_substrs 0 in String.sub text start (pos-start) :: split match_end (n-1) with Not_found -> [string_after text start] in split start num ;; let split sep s = bounded_split sep s 0 ;; (* Copied from Str for exact compatibility: *) let bounded_split_delim expr text num = let rec split start n = if start > String.length text then [] else if n = 1 then [string_after text start] else try let next_substrs = Pcre.exec ~rex:expr ~pos:start text in (* or Not_found *) let (pos,match_end) = Pcre.get_substring_ofs next_substrs 0 in String.sub text start (pos-start) :: split match_end (n-1) with Not_found -> [string_after text start] in if text = "" then [] else split 0 num ;; let split_delim sep text = bounded_split_delim sep text 0 ;; let full_split sep s = Pcre.full_split ~rex:sep ~max:(-1) s ;; let bounded_full_split sep s max = let max' = if max <= 0 then -1 else max in Pcre.full_split ~rex:sep ~max:max' s ;; ELSE type regexp type split_result = | Text of string | Delim of string | Group of int * string | NoGroup type result let regexp _ = invalid_arg "Netstring_pcre: unavailable" let regexp_case_fold _ = invalid_arg "Netstring_pcre: unavailable" let quote _ = invalid_arg "Netstring_pcre: unavailable" let regexp_string _ = invalid_arg "Netstring_pcre: unavailable" let regexp_string_case_fold _ = invalid_arg "Netstring_pcre: unavailable" let string_match _ _ _ = invalid_arg "Netstring_pcre: unavailable" let search_forward _ _ _ = invalid_arg "Netstring_pcre: unavailable" let search_backward _ _ _ = invalid_arg "Netstring_pcre: unavailable" let matched_string _ _ = invalid_arg "Netstring_pcre: unavailable" let match_beginning _ = invalid_arg "Netstring_pcre: unavailable" let match_end _ = invalid_arg "Netstring_pcre: unavailable" let matched_group _ _ _ = invalid_arg "Netstring_pcre: unavailable" let group_beginning _ _ = invalid_arg "Netstring_pcre: unavailable" let group_end _ _ = invalid_arg "Netstring_pcre: unavailable" let global_replace _ _ _ = invalid_arg "Netstring_pcre: unavailable" let replace_first _ _ _ = invalid_arg "Netstring_pcre: unavailable" let global_substitute _ _ _ = invalid_arg "Netstring_pcre: unavailable" let substitute_first _ _ _ = invalid_arg "Netstring_pcre: unavailable" let split _ _ = invalid_arg "Netstring_pcre: unavailable" let bounded_split _ _ _ = invalid_arg "Netstring_pcre: unavailable" let split_delim _ _ = invalid_arg "Netstring_pcre: unavailable" let bounded_split_delim _ _ = invalid_arg "Netstring_pcre: unavailable" let full_split _ _ = invalid_arg "Netstring_pcre: unavailable" let bounded_full_split _ _ _ = invalid_arg "Netstring_pcre: unavailable" let string_before _ _ = invalid_arg "Netstring_pcre: unavailable" let string_after _ _ = invalid_arg "Netstring_pcre: unavailable" let first_chars _ _ = invalid_arg "Netstring_pcre: unavailable" let last_chars _ _ = invalid_arg "Netstring_pcre: unavailable" ENDIF