Plasma GitLab Archive
Projects Blog Knowledge

(* $Id: netstring_str.mli 1588 2011-04-28 13:59:54Z gerd $
 * ----------------------------------------------------------------------
 *
 *)

(** Wrapper for regexps with [Str] syntax
 *
 * This module was written at a time when we had only the [Str] module
 * for regular expressions. However, [Str] has an interface that does
 * not work for multi-threaded programs, because the state of the module
 * is visible to the outside. The module [Netstring_str] is similar to [Str],
 * but has a thread-compatible interface.
 *
 * Now we have the much better PCRE library for regular expressions.
 * This implementation of [Netstring_str] bases on the PCRE engine but retains
 * the old interface as much as possible. This includes that the [regexp]
 * function parses old-style regular expressions and internally translates 
 * them to PCRE-style expressions.
 *
 * See also {!Netstring_pcre}. The difference is that [Netstring_pcre] expects
 * already PCRE-style regular expressions and does not translate them.
 *)

(** {b Supported regexp syntax}
 *
 * {[
 * .             matches every character but newline
 * e*            matches e several times
 * e+            matches e several times but at least once
 * e?            matches e optionally
 * e{m,n}        matches e at least m times and at most n times
 * e1\|e2        matches e1 or e2
 * [set]         matches the characters from set
 * [^set]        matches the characters except from set
 * \(...\)       group paranthesis
 * \n            back reference (n is digit)
 * ^             matches at beginning of line
 * $             matches at end of line
 * ]}
 *
 * This is exactly what [Str] supports. Character classes
 * are not implemented.
 *)

type regexp
  (** The type of regular expressions *)

type split_result = Str.split_result = Text of string | Delim of string;;
  (** Here we keep compatibility with [Str] *)

type result;;
  (** The type of matching results *)

val regexp: string -> regexp
  (** Parses a regexp *)
val regexp_case_fold: string -> regexp
  (** Parses a case-insensitive regexp *)
val quote: string -> string
  (** Quotes a string such that it can be included in a regexp *)
val regexp_string: string -> regexp
  (** Returns a regexp that matches exactly the string *)
val regexp_string_case_fold: string -> regexp
  (** Returns a case-insensitive regexp that matches exactly the string *)

val quote_set : string -> string
  (** Returns a regexp (as string) that matches any of the characters in
      the argument. The argument must be non-empty
   *)

val string_match: 
      regexp -> string -> int -> result option
  (** Matches the string at the position with the regexp. Returns
   * [None] if no match is found. Returns [Some r] on success,
   * and [r] describes the match.
   *)

val search_forward: 
      regexp -> string -> int -> (int * result)
  (** Searches a match of the string with the regexp, starting at
   * the position and in forward direction.
   * Raises [Not_found] if no match could be found.
   * Returns [(p,r)] when a match at position [p] is found,
   * described by [r].
   *)
val search_backward: 
      regexp -> string -> int -> (int * result)
  (** Searches a match of the string with the regexp, starting at
   * the position and in backward direction.
   * Raises [Not_found] if no match could be found.
   * Returns [(p,r)] when a match at position [p] is found,
   * described by [r].
   *)

(* NOT SUPPORTED:
 *
 *  val string_partial_match: 
 *        ?groups:int -> pat:regexp -> string -> pos:int -> result option
 *
 * The function [string_partial_match] cannot be emulated using PCRE.
 *)

val matched_string : result -> string -> string
  (** Extracts the matched part from the string. The string argument
   * must be the same string passed to [string_match] or the search
   * functions, and the result argument must be the corresponding
   * result.
   *)

val match_beginning : result -> int
  (** Returns the position where the matched part begins *)

val match_end : result -> int
  (** Returns the position where the matched part ends *)

val matched_group : result -> int -> string -> string
  (** Extracts the substring the nth group matches from the whole
   * string. The string argument
   * must be the same string passed to [string_match] or the search
   * functions, and the result argument must be the corresponding
   * result.
   *)

val group_beginning : result -> int -> int
  (** Returns the position where the substring matching the nth
   * group begins 
   *)

val group_end : result -> int -> int
  (** Returns the position where the substring matching the nth
   * group ends 
   *)

val global_replace: regexp -> (*templ:*) string -> string -> string
  (** [global_replace re templ s]: Replaces all matchings of [re] in
   * [s] by [templ].
   *
   * In [templ] one can refer to matched groups by the backslash notation:
   * [\1] refers to the first group, [\2] to the second etc.
   * [\0] is the whole match. [\\ ] is the backslash character.
   *)

val replace_first: regexp -> (*templ:*) string -> string -> string
  (** [replace_first re templ s]: Replaces the first match of [re] in
   * [s] by [templ].
   *
   * In [templ] one can refer to matched groups by the backslash notation:
   * [\1] refers to the first group, [\2] to the second etc.
   * [\0] is the whole match. [\\ ] is the backslash character.
   *)

val global_substitute:
       regexp -> (result -> string -> string) -> string -> string
  (** [global_substitute re subst s]: Applies the substitution function
   * [subst] to all matchings of [re] in [s], and returns the 
   * transformed string. [subst] is called with the current [result]
   * of the match and the whole string [s].
   *)

val substitute_first:
       regexp -> (result -> string -> string) -> string -> string
  (** [substitute_first re subst s]: Applies the substitution function
   * [subst] to the first matching of [re] in [s], and returns the 
   * transformed string. [subst] is called with the current [result]
   * of the match and the whole string [s].
   *)

(* replace_matched: not available *)

val split: regexp -> string -> string list
  (** Splits the string according to the regexp in substrings.
   * Occurrences of the delimiter at the beginning and the end
   * are ignored.
   *)

val bounded_split: regexp -> string -> int -> string list
  (** Splits into at most [n] substrings, based on [split] *)
val split_delim: regexp -> string -> string list
  (** Same as [split], but occurrences of the delimiter at the beginning 
   * and the end are returned as empty strings
   *)
val bounded_split_delim: regexp -> string -> int -> string list
  (** Splits into at most [n] substrings, based on [split_delim] *)
val full_split: regexp -> string -> split_result list
  (** Like [split_delim], but returns the delimiters in the result *)
val bounded_full_split: regexp -> string -> int -> split_result list
  (** Splits into at most [n] substrings, based on [full_split] *)

val string_before: string -> int -> string
  (** The first [n] characters of a string *)
val string_after: string -> int -> string
  (** The last [n] characters of a string *)
val first_chars: string -> int -> string
  (** Same as [string_before] *)
val last_chars: string -> int -> string
  (** Same as [string_after] *)

(* Private: *)

(* NOT NECESSARY:
 * val init_mt : (unit -> unit) -> (unit -> unit) -> unit
 * In previous versions of Netstring_str it was necessary to intialize the
 * module in a special way when used in multi-threaded environment. This
 * is no longer necessary.
 *)

This web site is published by Informatikbüro Gerd Stolpmann
Powered by Caml