Plasma GitLab Archive
Projects Blog Knowledge

(* $Id$
 * ----------------------------------------------------------------------
 *
 *)

open Printf

exception Malformed_URL

type url_syntax_option =
    Url_part_not_recognized
  | Url_part_allowed
  | Url_part_required


type url_syntax =
    { url_enable_scheme    : url_syntax_option;
      url_enable_user      : url_syntax_option;
      url_enable_user_param : url_syntax_option;
      url_enable_password  : url_syntax_option;
      url_enable_host      : url_syntax_option;
      url_enable_port      : url_syntax_option;
      url_enable_path      : url_syntax_option;
      url_enable_param     : url_syntax_option;
      url_enable_query     : url_syntax_option;
      url_enable_fragment  : url_syntax_option;
      url_enable_other     : url_syntax_option;
      url_accepts_8bits    : bool;
      url_is_valid         : url -> bool;
      url_enable_relative  : bool;
    }

and url =
    { 
      url_syntax   : url_syntax;
      mutable url_validity : bool;
      url_scheme   : string option;
      url_user     : string option;
      url_user_param : string list;
      url_password : string option;
      url_host     : string option;
      url_port     : int option;
      url_path     : string list;
      url_param    : string list;
      url_query    : string option;
      url_fragment : string option;
      url_other    : string option;
    }
;;


type char_category =
    Accepted
  | Rejected
  | Separator



let scan_url_part s k_from k_to cats accept_8bits =
  (* Scans the longest word of accepted characters from position 'k_from'
   * in 's' until at most position 'k_to'. The character following the
   * word (if any) must be a separator character.
   * On success, the function returns the position of the last character
   * of the word + 1.
   * If there is any rejected character before the separator or the end
   * of the string (i.e. position 'k_to') is reached, the exception
   * Malformed_URL is raised.
   * Furthermore, if the character '%' is accepted it is checked whether
   * two hexadecimal digits follow (which must be accepted, too). If this
   * is not true, the exception Malformed_URL is raised, too.
   * 'cats': contains for every character code (0 to 255) the category
   * of the character.
   *)
  let check_hex c =
    if cats.( Char.code c ) <> Accepted then raise Malformed_URL;
    match c with
	('0'..'9'|'A'..'F'|'a'..'f') -> ()
      | _ -> raise Malformed_URL
  in

  let rec scan k =
    if k >= k_to then
      k
    else begin
      let c = s.[k] in
      let cat = cats.(Char.code c) in
      match cat with
	  Accepted -> 
	    if c = '%' then begin
	      if k+2 >= k_to then raise Malformed_URL;
	      let c1 = s.[k+1] in
	      let c2 = s.[k+2] in
	      check_hex c1;
	      check_hex c2;
	      scan (k+3)
	    end
	    else
	      scan (k+1)
	| Separator -> k
	| Rejected -> 
	    if accept_8bits && c >= '\128' 
	    then scan (k+1)
	    else raise Malformed_URL
    end
  in

  assert (Array.length cats = 256);
  assert (k_from >= 0);
  assert (k_from <= k_to);
  assert (k_to <= String.length s);
  
  scan k_from
;;

  
(* Create a categorization: *)

let lalpha = [ 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm';
	       'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z' ]

let ualpha = [ 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M';
	       'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z' ]

let digit = [ '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9' ]

let hex_digit = [ 'a'; 'b'; 'c'; 'd'; 'e'; 'f';
		  'A'; 'B'; 'C'; 'D'; 'E'; 'F';
		] @ digit

let safe = [ '$'; '-'; '_'; '.'; '+' ]

let extra = [ '!'; '*'; '\''; '('; ')'; ',' ]

let make_cats accepted separators =
  (* create a categorization:
   * - All characters listed in 'separators' are separators.
   * - All characters listed in 'accepted' and which do not occur in
   *   'separators' are accepted characters.
   * - All other characters are rejected.
   *)
  let cats = Array.make 256 Rejected in
  List.iter
    (fun c ->
       cats.(Char.code c) <- Accepted
    )
    accepted;

  List.iter
    (fun c ->
       cats.(Char.code c) <- Separator
    )
    separators;
  cats
;;


let scheme_cats =
  make_cats (lalpha @ ualpha @ ['+'; '-'; '.']) [':'] ;;

    (* scheme_cats: character categorization to _extract_ the URL scheme *)


let login_cats =
  make_cats 
    (lalpha @ ualpha @ digit @ safe @ extra @ [';'; '&'; '='; '%'])  
    [':'; '@'; '/'; '#'; '?' ]
;;

    (* login_cats: character categorization to _extract_ user name, password,
     * host name, and port.
     *
     * Note: user_params are extracted in a second step.
     *
     * Note: '?' is now a separator, as described in RFC 2396.
     *)

let host_cats =
  make_cats
    (lalpha @ ualpha @ digit @ ['.'; '-'])
    []
;;
    (* host_cats: character categorization to _check_ whether the host name
     * is formed only by legal characters.
     * Especially '%' is not allowed here!
     * IPv6 addresses are checked separately.
     *)

let ipv6_cats =
  make_cats
    (hex_digit @ [ ':' ])
    [ ]

let ipv6_sep_cats =
  make_cats
    (hex_digit @ [ ':' ])
    [ ']' ]

let port_cats =
  make_cats
    digit
    []
;;

    (* port_cats: character categorization to _check_ whether the port number
     * is formed only by legal characters.
     * Especially '%' is not allowed here!
     *)

let path_cats separators =
  make_cats
    (lalpha @ ualpha @ digit @ safe @ extra @ 
              ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/'; '~'])
    separators
;;


let separators_from_syntax syn =
  let include_if syn_option clist =
    if syn_option <> Url_part_not_recognized then
      clist
    else
      []
  in
  (include_if syn.url_enable_param [';']) @
  (include_if syn.url_enable_query ['?']) @
  (include_if syn.url_enable_fragment ['#'])
;;


let path_cats_from_syntax syn extraseps =
  let separators = separators_from_syntax syn in
  path_cats (separators @ extraseps)
;;

(* path_cats_from_syntax:
 * Computes a character categorization to extract the path from an URL.
 * This depends on the syntax because the list of possible separators
 * contains the characters that may begin the next URL clause.
 *
 * Notes:
 * - The '#' is rejected unless fragments are enabled. 
 * - The '~' is accepted although this violates RFC 1738 (but it is ok
 *   according to RFC 2396)
 *)


let other_cats_from_syntax syn =
  let include_if syn_option clist =
    if syn_option <> Url_part_not_recognized then
      clist
    else
      []
  in
  let separators =
    (include_if syn.url_enable_param [';']) @
    (include_if syn.url_enable_query ['?']) @
    (include_if syn.url_enable_fragment ['#'])
  in

  make_cats
    (lalpha @ ualpha @ digit @ safe @ extra @ 
              (separators @ ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/']))
    []
;;

    (* other_cats: character categorization to extract or check the
     * "other" part of the URL.
     *)


let extract_url_scheme s = 
  let l = String.length s in
  let k = scan_url_part s 0 l scheme_cats false in
          (* or raise Malformed_URL *)
  if k = l then raise Malformed_URL;
  assert (s.[k] = ':');
  STRING_LOWERCASE (String.sub s 0 k)
;;


let ( => ) a b = not a || b;;   (* implication *)

let ( <=> ) (a:bool) b = ( a = b );;  (* equivalence *)

let url_syntax_is_valid syn =
  let recognized x = x <> Url_part_not_recognized in
  let _not_recognized x = x = Url_part_not_recognized in
  (recognized syn.url_enable_password   => recognized syn.url_enable_user) &&
  (recognized syn.url_enable_user_param => recognized syn.url_enable_user) &&
  (recognized syn.url_enable_port       => recognized syn.url_enable_host) &&
  (recognized syn.url_enable_user       => recognized syn.url_enable_host) &&
  not ( (recognized syn.url_enable_user ||
	 recognized syn.url_enable_password ||
	 recognized syn.url_enable_host ||
	 recognized syn.url_enable_port ||
	 recognized syn.url_enable_path) &&
	(recognized syn.url_enable_other))
;;


let partial_url_syntax syn =
  let weaken =
    function
	Url_part_not_recognized -> Url_part_not_recognized
      | Url_part_allowed        -> Url_part_allowed
      | Url_part_required       -> Url_part_allowed
  in
  if not syn.url_enable_relative then
    failwith "Neturl.partial_url_syntax: This syntax does not support relative URLs";
  { url_enable_scheme    = weaken syn.url_enable_scheme;
    url_enable_user      = weaken syn.url_enable_user;
    url_enable_user_param= weaken syn.url_enable_user_param;
    url_enable_password  = weaken syn.url_enable_password;
    url_enable_host      = weaken syn.url_enable_host;
    url_enable_port      = weaken syn.url_enable_port;
    url_enable_path      = weaken syn.url_enable_path;
    url_enable_param     = weaken syn.url_enable_param;
    url_enable_query     = weaken syn.url_enable_query;
    url_enable_fragment  = weaken syn.url_enable_fragment;
    url_enable_other     = weaken syn.url_enable_other;
    url_accepts_8bits    = syn.url_accepts_8bits;
    url_is_valid         = syn.url_is_valid;
    url_enable_relative  = true;
  }
;;



let file_url_syntax =
  { url_enable_scheme    = Url_part_required;
    url_enable_user      = Url_part_not_recognized;
    url_enable_user_param= Url_part_not_recognized;
    url_enable_password  = Url_part_not_recognized;
    url_enable_host      = Url_part_allowed;
    url_enable_port      = Url_part_not_recognized;
    url_enable_path      = Url_part_required;
    url_enable_param     = Url_part_not_recognized;
    url_enable_query     = Url_part_not_recognized;
    url_enable_fragment  = Url_part_not_recognized;
    url_enable_other     = Url_part_not_recognized;
    url_accepts_8bits    = false;
    url_is_valid         = (fun _ -> true);
    url_enable_relative  = true;
  }
;;


let ftp_url_syntax =
  { url_enable_scheme    = Url_part_required;
    url_enable_user      = Url_part_allowed;
    url_enable_user_param= Url_part_not_recognized;
    url_enable_password  = Url_part_allowed;
    url_enable_host      = Url_part_required;
    url_enable_port      = Url_part_allowed;
    url_enable_path      = Url_part_allowed;
    url_enable_param     = Url_part_allowed;
    url_enable_query     = Url_part_not_recognized;
    url_enable_fragment  = Url_part_not_recognized;
    url_enable_other     = Url_part_not_recognized;
    url_accepts_8bits    = false;
    url_is_valid         = (fun _ -> true);
    url_enable_relative  = true;
  }
;;


let http_url_syntax =
  { url_enable_scheme    = Url_part_required;
    url_enable_user      = Url_part_allowed;
    url_enable_user_param= Url_part_not_recognized;
    url_enable_password  = Url_part_allowed;
    url_enable_host      = Url_part_required;
    url_enable_port      = Url_part_allowed;
    url_enable_path      = Url_part_allowed;
    url_enable_param     = Url_part_not_recognized;
    url_enable_query     = Url_part_allowed;
    url_enable_fragment  = Url_part_not_recognized;
    url_enable_other     = Url_part_not_recognized;
    url_accepts_8bits    = false;
    url_is_valid         = (fun _ -> true);
    url_enable_relative  = true;
  }
;;


let pop_url_syntax =
  { url_enable_scheme    = Url_part_required;
    url_enable_user      = Url_part_allowed;
    url_enable_user_param= Url_part_allowed;
    url_enable_password  = Url_part_allowed;
    url_enable_host      = Url_part_required;
    url_enable_port      = Url_part_allowed;
    url_enable_path      = Url_part_not_recognized;
    url_enable_param     = Url_part_not_recognized;
    url_enable_query     = Url_part_not_recognized;
    url_enable_fragment  = Url_part_not_recognized;
    url_enable_other     = Url_part_not_recognized;
    url_accepts_8bits    = false;
    url_is_valid         = (fun _ -> true);
    url_enable_relative  = false;
  }
;;


let imap_url_syntax =
  { url_enable_scheme    = Url_part_required;
    url_enable_user      = Url_part_allowed;
    url_enable_user_param= Url_part_allowed;
    url_enable_password  = Url_part_allowed;
    url_enable_host      = Url_part_required;
    url_enable_port      = Url_part_allowed;
    url_enable_path      = Url_part_allowed;
    url_enable_param     = Url_part_not_recognized;
    url_enable_query     = Url_part_allowed;
    url_enable_fragment  = Url_part_not_recognized;
    url_enable_other     = Url_part_not_recognized;
    url_accepts_8bits    = false;
    url_is_valid         = (fun _ -> true);
    url_enable_relative  = true;
  }
;;


let mailto_url_syntax =
  { url_enable_scheme    = Url_part_required;
    url_enable_user      = Url_part_not_recognized;
    url_enable_user_param= Url_part_not_recognized;
    url_enable_password  = Url_part_not_recognized;
    url_enable_host      = Url_part_not_recognized;
    url_enable_port      = Url_part_not_recognized;
    url_enable_path      = Url_part_not_recognized;
    url_enable_param     = Url_part_not_recognized;
    url_enable_query     = Url_part_allowed;
    url_enable_fragment  = Url_part_not_recognized;
    url_enable_other     = Url_part_required;
    url_accepts_8bits    = false;
    url_is_valid         = (fun _ -> true);
    url_enable_relative  = false;
  }
;;


let news_url_syntax =
  { url_enable_scheme    = Url_part_required;
    url_enable_user      = Url_part_not_recognized;
    url_enable_user_param= Url_part_not_recognized;
    url_enable_password  = Url_part_not_recognized;
    url_enable_host      = Url_part_not_recognized;
    url_enable_port      = Url_part_not_recognized;
    url_enable_path      = Url_part_not_recognized;
    url_enable_param     = Url_part_not_recognized;
    url_enable_query     = Url_part_not_recognized;
    url_enable_fragment  = Url_part_not_recognized;
    url_enable_other     = Url_part_required;
    url_accepts_8bits    = false;
    url_is_valid         = (fun _ -> true);
    url_enable_relative  = false;
  }
;;
let cid_url_syntax = news_url_syntax;;
let data_url_syntax = news_url_syntax;;

let nntp_url_syntax =
  { url_enable_scheme    = Url_part_required;
    url_enable_user      = Url_part_not_recognized;
    url_enable_user_param= Url_part_not_recognized;
    url_enable_password  = Url_part_not_recognized;
    url_enable_host      = Url_part_required;
    url_enable_port      = Url_part_allowed;
    url_enable_path      = Url_part_required;
    url_enable_param     = Url_part_not_recognized;
    url_enable_query     = Url_part_not_recognized;
    url_enable_fragment  = Url_part_not_recognized;
    url_enable_other     = Url_part_not_recognized;
    url_accepts_8bits    = false;
    url_is_valid         = (fun url -> 
			      List.length url.url_path = 3
			   );
    url_enable_relative  = true;
  }
;;


let ipp_url_syntax =
  { url_enable_scheme    = Url_part_required;
    url_enable_user      = Url_part_not_recognized;
    url_enable_user_param= Url_part_not_recognized;
    url_enable_password  = Url_part_not_recognized;
    url_enable_host      = Url_part_required;
    url_enable_port      = Url_part_allowed;
    url_enable_path      = Url_part_allowed;
    url_enable_param     = Url_part_not_recognized;
    url_enable_query     = Url_part_allowed;
    url_enable_fragment  = Url_part_not_recognized;
    url_enable_other     = Url_part_not_recognized;
    url_accepts_8bits    = false;
    url_is_valid         = (fun _ -> true);
    url_enable_relative  = true;
  }
;;


let null_url_syntax =
  { url_enable_scheme    = Url_part_not_recognized;
    url_enable_user      = Url_part_not_recognized;
    url_enable_user_param= Url_part_not_recognized;
    url_enable_password  = Url_part_not_recognized;
    url_enable_host      = Url_part_not_recognized;
    url_enable_port      = Url_part_not_recognized;
    url_enable_path      = Url_part_not_recognized;
    url_enable_param     = Url_part_not_recognized;
    url_enable_query     = Url_part_not_recognized;
    url_enable_fragment  = Url_part_not_recognized;
    url_enable_other     = Url_part_not_recognized;
    url_accepts_8bits    = false;
    url_is_valid         = (fun _ -> true);
    url_enable_relative  = false;
  }
;;


let ip_url_syntax =
  { url_enable_scheme    = Url_part_allowed;
    url_enable_user      = Url_part_allowed;
    url_enable_user_param= Url_part_not_recognized;
                           (* so user parameters are parsed as part
			    * of the user string! (Most generic.)
			    *)
    url_enable_password  = Url_part_allowed;
    url_enable_host      = Url_part_allowed;
    url_enable_port      = Url_part_allowed;
    url_enable_path      = Url_part_allowed;
    url_enable_param     = Url_part_allowed;
    url_enable_query     = Url_part_allowed;
    url_enable_fragment  = Url_part_allowed;
    url_enable_other     = Url_part_not_recognized;
    url_accepts_8bits    = false;
    url_is_valid         = (fun _ -> true);
    url_enable_relative  = true;
  }
;;


let ldap_url_syntax =
  { url_enable_scheme    = Url_part_required;
    url_enable_user      = Url_part_not_recognized;
    url_enable_user_param= Url_part_not_recognized;
    url_enable_password  = Url_part_not_recognized;
    url_enable_host      = Url_part_allowed;
    url_enable_port      = Url_part_allowed;
    url_enable_path      = Url_part_allowed;
    url_enable_param     = Url_part_not_recognized;
    url_enable_query     = Url_part_allowed;
    url_enable_fragment  = Url_part_not_recognized;
    url_enable_other     = Url_part_not_recognized;
    url_accepts_8bits    = false;
    url_is_valid         = (fun _ -> true);
    url_enable_relative  = true;
  }
;;


let common_url_syntax =
  let h = Hashtbl.create 10 in
  Hashtbl.add h "file"   file_url_syntax;
  Hashtbl.add h "ftp"    ftp_url_syntax;
  Hashtbl.add h "http"   http_url_syntax;
  Hashtbl.add h "https"  http_url_syntax;
  Hashtbl.add h "pop"    pop_url_syntax;
  Hashtbl.add h "pops"   pop_url_syntax;
  Hashtbl.add h "imap"   imap_url_syntax;
  Hashtbl.add h "imaps"  imap_url_syntax;
  Hashtbl.add h "mailto" mailto_url_syntax;
  Hashtbl.add h "news"   news_url_syntax;
  Hashtbl.add h "nntp"   nntp_url_syntax;
  Hashtbl.add h "nntps"  nntp_url_syntax;
  Hashtbl.add h "cid"    cid_url_syntax;
  Hashtbl.add h "mid"    cid_url_syntax;
  Hashtbl.add h "data"   data_url_syntax;
  Hashtbl.add h "ipp"    ipp_url_syntax;
  Hashtbl.add h "ipps"   ipp_url_syntax;
  Hashtbl.add h "ldap"   ldap_url_syntax;
  Hashtbl.add h "ldaps"  ldap_url_syntax;
  h
;;


let url_conforms_to_syntax url =
  let recognized x = x <> Url_part_not_recognized in
  let required x = x = Url_part_required in
  let present x    = x <> None in
  let syn = url.url_syntax in
  (present url.url_scheme   => recognized syn.url_enable_scheme)   &&
  (present url.url_user     => recognized syn.url_enable_user)     &&
  ((url.url_user_param <> []) => recognized syn.url_enable_user_param) &&
  (present url.url_password => recognized syn.url_enable_password) &&
  (present url.url_host     => recognized syn.url_enable_host)     &&
  (present url.url_port     => recognized syn.url_enable_port)     &&
  ((url.url_path <> [])     => recognized syn.url_enable_path)     &&
  ((url.url_param <> [])    => recognized syn.url_enable_param)    &&
  (present url.url_query    => recognized syn.url_enable_query)    &&
  (present url.url_fragment => recognized syn.url_enable_fragment) &&
  (present url.url_other    => recognized syn.url_enable_other)    &&
  (required syn.url_enable_scheme   => present url.url_scheme)     &&
  (required syn.url_enable_user     => present url.url_user)       &&
  (required syn.url_enable_user_param => (url.url_user_param <> [])) &&
  (required syn.url_enable_password => present url.url_password)   &&
  (required syn.url_enable_host     => present url.url_host)       &&
  (required syn.url_enable_port     => present url.url_port)       &&
  (required syn.url_enable_path     => (url.url_path <> []))       &&
  (required syn.url_enable_param    => (url.url_param <> []))      &&
  (required syn.url_enable_query    => present url.url_query)      &&
  (required syn.url_enable_fragment => present url.url_fragment)   &&
  (required syn.url_enable_other    => present url.url_other)      &&
  (url.url_validity || syn.url_is_valid url)
;;


let url_syntax_of_url url = url.url_syntax
;;


let host_of_addr ip =
  match Netsys.domain_of_inet_addr ip with
    | Unix.PF_INET -> Unix.string_of_inet_addr ip
    | Unix.PF_INET6 -> "[" ^ Unix.string_of_inet_addr ip ^ "]"
    | _ -> assert false


let modify_url
      ?syntax
      ?(encoded = false)
      ?scheme
      ?user
      ?user_param
      ?password
      ?host
      ?addr
      ?port
      ?socksymbol
      ?path
      ?param
      ?query
      ?fragment
      ?other
      url 
  =

  let enc ?(plus=false) x =
    if encoded then
      x
    else
      match x with
	  None -> None
	| Some x' -> Some (Netencoding.Url.encode ~plus x')
  in
  let enc_list ?(plus=false) l = 
    if encoded then
      l
    else
      List.map (Netencoding.Url.encode ~plus) l 
  in

  let new_syntax =
    match syntax with
	None -> url.url_syntax
      | Some syn -> syn
  in

  let check_string s_opt cats =
    match s_opt with
	None   -> ()
      | Some s ->
	  let l = String.length s in
	  let k = scan_url_part s 0 l cats new_syntax.url_accepts_8bits in
	          (* or raise Malformed_URL *)
	  if k <> l then raise Malformed_URL
  in

  let check_string_list p cats sep =
    List.iter
      (fun p_component ->
	 let l = String.length p_component in
	 let k = 
	   scan_url_part p_component 0 l cats new_syntax.url_accepts_8bits in
	   (* or raise Malformed_URL *)
	 if k <> l then raise Malformed_URL;
	 if String.contains p_component sep then raise Malformed_URL;
      )
      p
  in

  let check_host s =
    let l = String.length s in
    if String.length s >= 2 && s.[0] = '[' then (
      if s.[l-1] <> ']' then raise Malformed_URL;
      let ipv6 = String.sub s 1 (l-2) in
      check_string (Some ipv6) ipv6_cats
    )
    else
      check_string (Some s) host_cats
  in

  (* Create the modified record: *)
  let url_host_0 =
    match addr with
      | Some a -> Some(host_of_addr a)
      | None ->
	  ( match host with
	      | Some h -> Some h
	      | None -> url.url_host
	  ) in
  let url_port_0 =
    match port with
      | Some p -> Some p
      | None -> url.url_port in
  let (url_host, url_port) =
    match (socksymbol : Netsockaddr.socksymbol option) with
      | Some(`Inet(ip,p)) -> (Some(host_of_addr ip), Some p)
      | Some(`Inet_byname(h,p)) -> (Some h, Some p)
      | Some _ -> failwith "Neturl: Unacceptable socksymbol"
      | None -> (url_host_0, url_port_0) in
  let url' =
    { 
      url_syntax   = new_syntax;
      url_validity = false;
      url_scheme   = if scheme   = None then url.url_scheme   else scheme;
      url_user     = if user     = None then url.url_user     else enc user;
      url_user_param = ( match user_param with 
			     None -> url.url_user_param
			   | Some p -> enc_list p);
      url_password = if password = None then url.url_password else enc password;
      url_host     = url_host;
      url_port     = url_port;
      url_path     = (match path with
			  None -> url.url_path
			| Some p -> enc_list p);
      url_param    = (match param with
			  None -> url.url_param
			| Some p -> enc_list p);
      url_query    = if query    = None then url.url_query    else enc ~plus:true query;
      url_fragment = if fragment = None then url.url_fragment else enc fragment;
      url_other    = if other    = None then url.url_other    else enc other;
    }
  in
  (* Check whether the URL conforms to the syntax:
   *)
  if not (url_conforms_to_syntax url') then raise Malformed_URL;
  if url'.url_password <> None && url'.url_user = None then raise Malformed_URL;
  if url'.url_user_param <> [] && url'.url_user = None then raise Malformed_URL;
  if url'.url_user <> None && url'.url_host = None then raise Malformed_URL;
  if url'.url_port <> None && url'.url_host = None then raise Malformed_URL;
  (* Check every part: *)
  check_string url'.url_scheme          scheme_cats;
  check_string url'.url_user            login_cats;
  check_string_list url'.url_user_param login_cats ';';
  check_string url'.url_password        login_cats;
  ( match url'.url_host with
      | None -> ()
      | Some s -> check_host s
  );
  (match url'.url_port with 
       None -> ()
     | Some p -> if p < 0 || p > 65535 then raise Malformed_URL
  );
  let path_cats  = path_cats_from_syntax  new_syntax [] in
  let other_cats = other_cats_from_syntax new_syntax in
  let query_cats =
    let syn = { new_syntax with
                url_enable_param = Url_part_not_recognized;
                url_enable_query = Url_part_not_recognized
              } in
    path_cats_from_syntax syn [] in
  check_string url'.url_query    query_cats;
  check_string url'.url_fragment path_cats;
  check_string url'.url_other    other_cats;
  (* Check the lists: *)
  check_string_list url'.url_param path_cats ';';
  check_string_list url'.url_path  path_cats '/';
  (* Further path checks: *)
  begin match url'.url_path with
      [] ->
	(* The path is empty: There must not be 'param' *)
	if url'.url_host <> None then begin
	  if url'.url_param <> [] then raise Malformed_URL;
	  (* if url'.url_query <> None then raise Malformed_URL; 
	   * Allowed since RFC 2396
	   *)
	end
    | ["";""] ->
	(* This is illegal. *)
	raise Malformed_URL;
    | "" :: p' ->
	(* The path is absolute: ensure there is no double slash with host *)
	( match url'.url_path with
	    | "" :: "" :: _ ->
		(* Double slash at beginning of path: Only allowed we have
                 * a host name!
                 *)
		if host = None then raise Malformed_URL
	    | _ -> ()
	)
    | _ ->
	(* The path is relative: there must not be a host *)
	if url'.url_host <> None then raise Malformed_URL;
  end;
  (* Cache that the URL is valid: *)
  url'.url_validity <- true;

  url'
;;


let null_url =
  { 
    url_syntax   = null_url_syntax;
    url_validity = true;
    url_scheme   = None;
    url_user     = None;
    url_user_param = [];
    url_password = None;
    url_host     = None;
    url_port     = None;
    url_path     = [];
    url_param    = [];
    url_query    = None;
    url_fragment = None;
    url_other    = None;
  }
;;


let make_url
      ?(encoded = false)
      ?scheme
      ?user
      ?user_param
      ?password
      ?host
      ?addr
      ?port
      ?socksymbol
      ?path
      ?param
      ?query
      ?fragment
      ?other
      syntax
  =

  if not (url_syntax_is_valid syntax) then
    invalid_arg "Neturl.make_url";

  modify_url
    ~encoded
    ~syntax
    ?scheme
    ?user
    ?user_param
    ?password
    ?host
    ?addr
    ?port
    ?socksymbol
    ?path
    ?param
    ?query
    ?fragment
    ?other
    null_url
;;


let remove_from_url
      ?(scheme = false)
      ?(user = false)
      ?(user_param = false)
      ?(password = false)
      ?(host = false)
      ?(port = false)
      ?(path = false)
      ?(param = false)
      ?(query = false)
      ?(fragment = false)
      ?(other = false)
      url
  =

  make_url
    ~encoded:   true
    ?scheme:    (if scheme     then None else url.url_scheme)
    ?user:      (if user       then None else url.url_user)
    ?user_param:(if user_param then None else Some url.url_user_param)
    ?password:  (if password   then None else url.url_password)
    ?host:      (if host       then None else url.url_host)
    ?port:      (if port       then None else url.url_port)
    ?path:      (if path       then None else Some url.url_path)
    ?param:     (if param      then None else Some url.url_param)
    ?query:     (if query      then None else url.url_query)
    ?fragment:  (if fragment   then None else url.url_fragment)
    ?other:     (if other      then None else url.url_other)
    url.url_syntax
;;


let default_url
      ?(encoded = false)
      ?scheme
      ?user
      ?(user_param = [])
      ?password
      ?host
      ?port
      ?(path = [])
      ?(param = [])
      ?query
      ?fragment
      ?other
      url
  =

  let encode = Netencoding.Url.encode ~plus:true in

  let enc x =
    if encoded then
      x
    else
      match x with
	  None -> None
	| Some x' -> Some (encode x')
  in

  let enc_list l = 
    if encoded then
      l
    else
      List.map encode l 
  in

  let pass_if_missing current arg =
    match current with
	None -> arg
      | _    -> current
  in

  make_url
    ~encoded:   true
    ?scheme:    (pass_if_missing url.url_scheme   scheme)
    ?user:      (pass_if_missing url.url_user     (enc user))
    ~user_param:(if url.url_user_param = [] then enc_list user_param else url.url_user_param)
    ?password:  (pass_if_missing url.url_password (enc password))
    ?host:      (pass_if_missing url.url_host     host)
    ?port:      (pass_if_missing url.url_port     port)
    ~path:      (if url.url_path  = [] then enc_list path  else url.url_path)
    ~param:     (if url.url_param = [] then enc_list param else url.url_param)
    ?query:     (pass_if_missing url.url_query    (enc query))
    ?fragment:  (pass_if_missing url.url_fragment (enc fragment))
    ?other:     (pass_if_missing url.url_other    (enc other))
    url.url_syntax
;;


let undefault_url
      ?scheme
      ?user
      ?user_param
      ?password
      ?host
      ?port
      ?path
      ?param
      ?query
      ?fragment
      ?other
      url
  =

  let remove_if_matching current arg =
    match current with
	None -> None
      | Some x -> 
	  (match arg with
	       None -> current
	     | Some x' ->
		 if x=x' then
		   None
		 else
		   current)
  in

  let remove_list_if_matching current arg =
    match arg with
	None -> current
      | Some x ->
	  if x = current then [] else current
  in

  make_url
    ~encoded:   true
    ?scheme:    (remove_if_matching url.url_scheme   scheme)
    ?user:      (remove_if_matching url.url_user     user)
    ~user_param:(remove_list_if_matching url.url_user_param user_param)
    ?password:  (remove_if_matching url.url_password password)
    ?host:      (remove_if_matching url.url_host     host)
    ?port:      (remove_if_matching url.url_port     port)
    ~path:      (remove_list_if_matching url.url_path path)
    ~param:     (remove_list_if_matching url.url_param param)
    ?query:     (remove_if_matching url.url_query    query)
    ?fragment:  (remove_if_matching url.url_fragment fragment)
    ?other:     (remove_if_matching url.url_other    other)
    url.url_syntax
;;


let url_provides 
      ?(scheme = false)
      ?(user = false)
      ?(user_param = false)
      ?(password = false)
      ?(host = false)
      ?(port = false)
      ?(path = false)
      ?(param = false)
      ?(query = false)
      ?(fragment = false)
      ?(other = false)
      url
  =
  
  (scheme     => (url.url_scheme   <> None)) &&
  (user       => (url.url_user     <> None)) &&
  (user_param => (url.url_param    <> [])) &&
  (password   => (url.url_password <> None)) &&
  (host       => (url.url_host     <> None)) &&
  (port       => (url.url_port     <> None)) &&
  (path       => (url.url_path     <> [])) &&
  (param      => (url.url_param    <> [])) &&
  (query      => (url.url_query    <> None)) &&
  (fragment   => (url.url_fragment <> None)) &&
  (other      => (url.url_other    <> None))
;;
  

let return_if value =
  match value with
      None -> raise Not_found
    | Some x -> x
;;


let decode_if ?(plus=false) want_encoded value =
  let value' = return_if value in
  if want_encoded then
    value'
  else
    Netencoding.Url.decode ~plus value'
;;


let decode_path_if ?(plus=false) want_encoded value =
  if want_encoded then
    value
  else
    List.map (Netencoding.Url.decode ~plus) value 
;;


let url_scheme                      url = return_if url.url_scheme;;
let url_user       ?(encoded=false) url = decode_if encoded url.url_user;;
let url_user_param ?(encoded=false) url = decode_path_if encoded url.url_user_param;;
let url_password   ?(encoded=false) url = decode_if encoded url.url_password;;
let url_host                        url = return_if url.url_host;;
let url_port                        url = return_if url.url_port;;
let url_path       ?(encoded=false) url = decode_path_if encoded url.url_path;;
let url_param      ?(encoded=false) url = decode_path_if encoded url.url_param;;
let url_query      ?(encoded=false) url = decode_if ~plus:true encoded url.url_query;;
let url_fragment   ?(encoded=false) url = decode_if encoded url.url_fragment;;
let url_other      ?(encoded=false) url = decode_if encoded url.url_other;;


let url_addr url =
  match url.url_host with
    | None -> raise Not_found
    | Some h ->
	let l = String.length h in
	if l >= 2 && h.[0] = '[' && h.[l-1] = ']' then
	  let a = String.sub h 1 (l-2) in
	  try Unix.inet_addr_of_string a
	  with _ -> raise Not_found
	else (
	  try Unix.inet_addr_of_string h
	  with _ -> raise Not_found
	)

let url_socksymbol url dp =
  let p =
    match url.url_port with
      | None -> dp
      | Some p -> p in
  try
    let a = url_addr url in
    `Inet(a, p)
  with
    | Not_found ->
	(match url.url_host with
	   | None -> raise Not_found
	   | Some h -> `Inet_byname(h, p)
	)


let string_of_url url =
  if not (url.url_validity) then
    failwith "Neturl.string_of_url: URL not flagged as valid";
  (match url.url_scheme with
       None -> ""
     | Some s -> s ^ ":") ^ 
  (match url.url_host with
       None -> ""
     | Some host ->
	 "//" ^ 
	 (match url.url_user with
	      None -> "" 
	    | Some user -> 
		user ^ 
		(String.concat ""
		   (List.map
		      (fun p -> ";" ^ p)
		      url.url_user_param)) ^
		(match url.url_password with
		     None -> ""
		   | Some password ->
		       ":" ^ password 
		) ^ 
		"@") ^ 
	 host ^ 
	 (match url.url_port with
	      None -> ""
	    | Some port ->
		":" ^ string_of_int port)) ^ 
  (match url.url_path with
     | [""] ->
	 "/"
     | x :: p  when  url.url_scheme = None &&
                     url.url_host = None &&
	             String.contains x ':' 
	->
	  (* Really a special case: The colon contained in 'x' may cause
	   * that a prefix of 'x' is interpreted as URL scheme. In this
	   * case, "./" is prepended (as recommended in RFC 1808, 5.3).
	   *)
	  "./"
     | _ ->
	 ""
  ) ^
  String.concat "/" url.url_path ^ 
  (match url.url_other with
       None -> ""
     | Some other ->
	 other) ^ 
  String.concat ""  (List.map (fun s -> ";" ^ s) url.url_param) ^ 
  (match url.url_query with
       None -> ""
     | Some query ->
	 "?" ^ query) ^ 
  (match url.url_fragment with
       None -> ""
     | Some fragment ->
	 "#" ^ fragment)
;;


let semi_re = Netstring_str.regexp ";";;


let url_of_string url_syntax s =
  let l = String.length s in
  let recognized x = x <> Url_part_not_recognized in

  let rec collect_words ?(ipv6=false) terminators eof_char cats k =
    (* Collect words as recognized by 'cats', starting at position 'k' in
     * 's'. Collection stops if one the characters listed in 'terminators'
     * is found. If the end of the string is reached, it is treated as
     * 'eof_char'.
     * 
     * if ipv6: words "[ipv6addr]" are also recognized.
     *)
    let word, sep, k_end =
      if ipv6 && k < l && s.[k] = '[' then (
	let k' = scan_url_part s (k+1) l ipv6_sep_cats false in
	if k' >= l then raise Malformed_URL;
	if s.[k'] <> ']' then raise Malformed_URL;
	let word, sep =
	  String.sub s k (k'+1-k), (if k'+1<l then s.[k'+1] else eof_char) in
	if sep <> eof_char then (
	  if cats.(Char.code sep) <> Separator then raise Malformed_URL
	);
	(word, sep, k'+1)
      )
      else (
	let k' = scan_url_part s k l cats url_syntax.url_accepts_8bits in  
	(* or raise Malformed_URL *)
	let word, sep =
	  String.sub s k (k'-k), (if k'<l then s.[k'] else eof_char) in
	(word, sep, k')
      ) in
    if List.mem sep terminators then
      [word, sep], k_end
    else
      let word_sep_list', k_end' = 
	collect_words ~ipv6 terminators eof_char cats (k_end+1) in
      ((word, sep) :: word_sep_list'), k_end'
  in

  (* Try to extract the scheme name: *)
  let scheme, k1 =
    if recognized url_syntax.url_enable_scheme then
      try
	let k = scan_url_part s 0 l scheme_cats false in
        (* or raise Malformed_URL *)
	if k = l then raise Malformed_URL;
	assert (s.[k] = ':');
	Some (String.sub s 0 k), (k+1)
      with
	  Malformed_URL -> None, 0
    else
      None, 0
  in

  (* If there is a "//", a host will follow: *)
  let host, port, userinfo, password, k2 =
    if recognized url_syntax.url_enable_host  &&
       k1 + 2 <= l  &&  s.[k1]='/'  && s.[k1+1]='/' then begin

      let word_sep_list, k' = 
	collect_words ~ipv6:true [ '/'; '?'; '#' ] '/' login_cats (k1+2) in
          (* or raise Malformed_URL *)

      let int x =
	try int_of_string x with _ -> raise Malformed_URL in

      match word_sep_list with
	  [ host, ('/'|'?'|'#') ] ->
	    Some host, None, None, None, k'
	| [ host, ':'; port, ('/'|'?'|'#') ] ->
	    Some host, Some (int port), None, None, k'
	| [ user, '@'; host, ('/'|'?'|'#') ] ->
	    Some host, None, Some user, None, k'
	| [ user, '@'; host, ':'; port, ('/'|'?'|'#') ] ->
	    Some host, Some (int port), Some user, None, k'
	| [ user, ':'; password, '@'; host, ('/'|'?'|'#') ] ->
	    Some host, None, Some user, Some password, k'
	| [ user, ':'; password, '@'; host, ':'; port, ('/'|'?'|'#') ] ->
	    Some host, Some (int port), Some user, Some password, k'
	| _ ->
	    raise Malformed_URL
    end
    else
      None, None, None, None, k1
  in

  (* Separate user from user_param: *)
  let user, user_param =
    match userinfo with
	None -> (None, [])
      | Some u ->
	  if recognized url_syntax.url_enable_user_param then (
	    let l = Netstring_str.split_delim semi_re u in
	    match l with
		[] -> (Some "", [])
	      | user :: user_param -> (Some user, user_param)
	  )
	  else (Some u, [])
  in

  let path, k3 =
    if recognized url_syntax.url_enable_path  &&
       k2 < l  (*  &&  s.[k2]='/'  *)
    then begin
      let cats = path_cats_from_syntax url_syntax [ '/' ] in
      let seps = separators_from_syntax url_syntax in

      (* Note: '>' is not allowed within URLs; because of this we can use
       * it as end-of-string character.
       *)

      let word_sep_list, k' = collect_words ('>'::seps) '>' cats k2 in
          (* or raise Malformed_URL *)
      match word_sep_list with
	  [ "", '/'; "", _ ] ->
	    [ "" ], k'
	| [ "", _ ] ->
	    [], k'
	| _ ->
	    List.map fst word_sep_list, k'
    end
    else begin
      (* If there is a single '/': skip it *)
      if not (recognized url_syntax.url_enable_other) &&
	 k2 < l  &&  s.[k2]='/'
      then
	[], (k2+1)
      else
	[], k2
    end
  in

  let other, k4 =
    if recognized url_syntax.url_enable_other  &&
       k3 < l 
    then begin
      
      let cats = other_cats_from_syntax url_syntax in

      (* Note: '>' is not allowed within URLs; because of this we can use
       * it as end-of-string character.
       *)

      let word_sep_list, k' = collect_words ['>';'#'] '>' cats k3 in
          (* or raise Malformed_URL *)

      match word_sep_list with
	  [ other, _ ] -> Some other, k'
	| _ -> assert false
    end
    else
      None, k3
  in

  let param, k5 =
    if recognized url_syntax.url_enable_param  &&
       k4 < l  &&  s.[k4]=';' 
    then begin
      let cats  = path_cats_from_syntax url_syntax [] in
      let seps  = separators_from_syntax url_syntax in
      let seps' = List.filter (fun c -> c <> ';') seps in

      (* Note: '>' is not allowed within URLs; because of this we can use
       * it as end-of-string character.
       *)

      let word_sep_list, k' = collect_words ('>'::seps') '>' cats (k4+1) in
          (* or raise Malformed_URL *)
      
      List.map fst word_sep_list, k'
    end
    else
      [], k4
  in

  let query, k6 =
    if recognized url_syntax.url_enable_query  &&
       k5 < l  &&  s.[k5]='?'
    then begin
      let syn = { url_syntax with
                  url_enable_param = Url_part_not_recognized;
                  url_enable_query = Url_part_not_recognized
                } in
      let cats  = path_cats_from_syntax syn [] in
      let seps  = separators_from_syntax syn in
      
      (* Note: '>' is not allowed within URLs; because of this we can use
       * it as end-of-string character.
       *)

      let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k5+1) in
          (* or raise Malformed_URL *)

      match word_sep_list with
	  [ query, _ ] -> Some query, k'
	| _ -> assert false
    end
    else
      None, k5
  in

  let fragment, k7 =
    if recognized url_syntax.url_enable_fragment  &&
       k6 < l  &&  s.[k6]='#'
    then begin
      let cats  = path_cats_from_syntax url_syntax [] in
      let seps  = separators_from_syntax url_syntax in
      
      (* Note: '>' is not allowed within URLs; because of this we can use
       * it as end-of-string character.
       *)

      let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k6+1) in
          (* or raise Malformed_URL *)

      match word_sep_list with
	  [ fragment, _ ] -> Some fragment, k'
	| _ -> assert false
    end
    else
      None, k6
  in

  if k7 <> l then raise Malformed_URL;

  make_url
    ~encoded:true
    ?scheme
    ?user
    ~user_param
    ?password
    ?host
    ?port
    ~path
    ~param
    ?query
    ?fragment
    ?other
    url_syntax
;;



let problem_re = Netstring_str.regexp "[ <>\"{}|\\^`]"
let problem_hash_re = Netstring_str.regexp "[ <>\"{}|\\^`#]"

let fixup_url_string ?(escape_hash=false) =
  Netstring_str.global_substitute 
    (if escape_hash then problem_hash_re else problem_re)
    (fun m s ->
       sprintf "%%%02x" (Char.code s.[Netstring_str.match_beginning m]))
;;

  



let parse_url ?(schemes = common_url_syntax)
              ?base_syntax
	      ?(accept_8bits = false)
	      ?(enable_fragment = false)
	      s =
  let scheme =
    try
      Some (extract_url_scheme s)
    with
	Malformed_URL -> None in
  let syntax =
    match scheme with
	None ->
	  ( match base_syntax with
		None -> 
		  raise Malformed_URL
	      | Some syn -> 
		  partial_url_syntax syn
	  )
      | Some sch ->
	  try
	    Hashtbl.find schemes sch
	  with
	      Not_found -> raise Malformed_URL
  in
  let syntax' =
    if accept_8bits then
      { syntax with url_accepts_8bits = true } 
    else
      syntax in
  let syntax'' =
    if enable_fragment && 
       syntax.url_enable_fragment = Url_part_not_recognized then
      { syntax' with url_enable_fragment = Url_part_allowed } 
    else
      syntax' in
  url_of_string syntax'' s
;;


let split_path s =
  let l = String.length s in
  let rec collect_words k =
    let k' = 
      try
	String.index_from s k '/'
      with
	  Not_found -> l
    in
    let word = String.sub s k (k'-k) in
    if k' >= l then
      [word]
    else
      word :: collect_words (k'+1)
  in
  match collect_words 0 with
      [ "" ] -> []
    | [ "";"" ] -> [ "" ]
    | other -> other
;;


let join_path l = 
  match l with
      [ "" ] -> "/"
    | _      -> String.concat "/" l;;


let norm_path l = 

  let rec remove_slash_slash l first =
    match l with
      | [ "" ] ->
	  [ "" ]
      | [ ""; "" ] when first ->
	  [ "" ]
      | "" :: l' when not first ->
	  remove_slash_slash l' false
      | x :: l' ->
	  x :: remove_slash_slash l' false
      | [] ->
	  []
  in

  let rec remove_dot l first =
    match l with
      | ([ "." ] | ["."; ""]) ->
	  if first then [] else [ "" ]
      |	"." :: x :: l' ->
	  remove_dot (x :: l') false
      | x :: l' ->
	  x :: remove_dot l' false
      | [] ->
	  []
  in

  let rec remove_dot_dot_once l first =
    match l with
	x :: ".." :: [] when x <> "" && x <> ".." && not first ->
	  [ "" ]
      |	x :: ".." :: l' when x <> "" && x <> ".." ->
	  l'
      | x :: l' ->
	  x :: remove_dot_dot_once l' false
      | [] ->
	  raise Not_found
  in

  let rec remove_dot_dot l =
    try
      let l' = remove_dot_dot_once l true in
      remove_dot_dot l'
    with
	Not_found -> l
  in

  let l' = remove_dot_dot (remove_dot (remove_slash_slash l true) true) in
  match l' with
      [".."] -> [".."; ""]
    | ["";""] -> [ "" ]
    | _      -> l'
;;


let apply_relative_url baseurl relurl =
  if not (baseurl.url_validity) || not (relurl.url_validity) then
    failwith "Neturl.apply_relative_url: URL not flagged as valid";

  if relurl.url_scheme <> None then
    modify_url 
      ~syntax:baseurl.url_syntax           (* inherit syntax *)
      relurl
  else
    if relurl.url_host <> None then
      modify_url 
	~syntax:baseurl.url_syntax         (* inherit syntax and scheme *)
	?scheme:baseurl.url_scheme
	relurl
    else
      match relurl.url_path with
	  "" :: other ->
	    (* An absolute path *)
	    modify_url 
	      ~syntax:baseurl.url_syntax   (* inherit syntax, scheme, and *)
	      ~encoded:true
	      ?scheme:baseurl.url_scheme   (* login info *)
	      ?host:baseurl.url_host
	      ?port:baseurl.url_port
	      ?user:baseurl.url_user
	      ~user_param:baseurl.url_user_param
	      ?password:baseurl.url_password
	      relurl
	| [] ->
	    (* Empty: Inherit also path, params, query, and fragment *)
	    let new_params, new_query, new_fragment =
	      match relurl.url_param, relurl.url_query, relurl.url_fragment
	      with
		  [], None, None ->
		    (* Inherit all three *)
		    baseurl.url_param, baseurl.url_query, baseurl.url_fragment
		| [], None, f ->
		    (* Inherit params and query *)
		    baseurl.url_param, baseurl.url_query, f
		| [], q, f ->
		    (* Inherit params *)
		    baseurl.url_param, q, f
		| p, q, f ->
		    (* Inherit none of them *)
		    p, q, f
	    in
	    modify_url 
	      ~syntax:baseurl.url_syntax
	      ~encoded:true
	      ?scheme:baseurl.url_scheme
	      ?host:baseurl.url_host
	      ?port:baseurl.url_port
	      ?user:baseurl.url_user
	      ~user_param:baseurl.url_user_param
	      ?password:baseurl.url_password
	      ~path:baseurl.url_path
	      ~param:new_params
	      ?query:new_query
	      ?fragment:new_fragment
	      relurl
	| relpath ->
	    (* A relative path *)
	    let rec change_path basepath =
	      match basepath with
		| [] ->
		    if baseurl.url_host = None then
		      relpath
		    else
		      "" :: relpath
		| [ "" ] ->
		    "" :: relpath
		| [ x ] ->
		    relpath
		| x :: basepath' ->
		    x :: change_path basepath'
	    in
	    let new_path = norm_path (change_path baseurl.url_path) in
	    modify_url 
	      ~syntax:baseurl.url_syntax   (* inherit syntax, scheme, and *)
	      ~encoded:true
	      ?scheme:baseurl.url_scheme   (* login info *)
	      ?host:baseurl.url_host
	      ?port:baseurl.url_port
	      ?user:baseurl.url_user
	      ~user_param:baseurl.url_user_param
	      ?password:baseurl.url_password
	      ~path:new_path               (* and change path *)
	      relurl

;;


let ensure_absolute_url ?base u =
  if u.url_scheme = None then (
    match base with
	None -> raise Malformed_URL
      | Some b -> apply_relative_url b u
  )
  else
    u
;;
  

let print_url url =
  Format.print_string ("<URL:" ^ string_of_url url ^ ">")
;;


let backslash_re = Netstring_str.regexp "\\\\";;
let drive_letter_re = Netstring_str.regexp "^\\([A-Za-z]\\):/";;
let drive_letter_re' = Netstring_str.regexp "^/\\([A-Za-z]\\):/";;
let unc_path_re = Netstring_str.regexp "^//\\([^/]+\\)\\(/|$\\)";;


let os_type = Sys.os_type;;

let classify_path p =
  match os_type with
      "Unix" ->
	if p <> "" && p.[0] = '/' then
	  `Absolute_local p
	else
	  `Relative p
    | "Win32" ->
	let p' = Netstring_str.global_replace backslash_re "/" p in
	( match Netstring_str.string_match drive_letter_re p' 0 with
	      Some m ->
		`Absolute_local ("/" ^ p')
	    | None ->
		( match Netstring_str.string_match unc_path_re p' 0 with
		      Some m ->
			let host = Netstring_str.matched_group m 1 p' in
			let host_e = Netstring_str.group_end m 1 in
			let path = String.sub p' host_e (String.length p' - host_e) in
			let path = if path = "" then "/" else path in
			`Absolute_remote(host,path)
		    | None ->
			if p' <> "" && p'.[0] = '/' then
			  `Relative_drive p'
			else
			  `Relative p'
		)
	)
    | "Cygwin" ->
	let p' = Netstring_str.global_replace backslash_re "/" p in
	( match Netstring_str.string_match drive_letter_re p' 0 with
	      Some m ->
		let letter = Netstring_str.matched_group m 1 p' in
		let rest = String.sub p' 2 (String.length p' - 2) in
		`Absolute_local("/cygdrive/" ^ letter ^ rest)
	    | None ->
		( match Netstring_str.string_match unc_path_re p' 0 with
		      Some m ->
			let host = Netstring_str.matched_group m 1 p' in
			let host_e = Netstring_str.group_end m 1 in
			let path = String.sub p' host_e (String.length p' - host_e) in
			let path = if path = "" then "/" else path in
			`Absolute_remote(host,path)
		    | None ->
			if p' <> "" && p'.[0] = '/' then
			  `Absolute_local p'
			else
			  `Relative p'
		)
	)
    | _ ->
	assert false
;;

let file_url_of_local_path ?(getcwd = Sys.getcwd) p =
  (* Classify p, and make it absolute: *)
  let p_class = classify_path p in
  let p_abs_class =
    match p_class with
	`Relative r ->
	  ( match classify_path (getcwd()) with
		`Absolute_local l ->
		  if l = "/" then 
		    `Absolute_local("/" ^ r)
		  else
		    `Absolute_local(l ^ "/" ^ r)
	      | `Absolute_remote(h,l) ->
		  if l = "/" then 
		    `Absolute_remote(h,"/" ^ r)
		  else
		    `Absolute_remote(h,l ^ "/" ^ r)
	      | _ ->
		  failwith "Neturl.file_url_of_local_path: cwd is not absolute"
	  )
      | `Relative_drive r ->
	  ( match classify_path (getcwd()) with
		`Absolute_local l ->
		  ( match Netstring_str.string_match drive_letter_re' l 0 with
			Some m ->
			  let letter = Netstring_str.matched_group m 1 l in
			  `Absolute_local("/" ^ letter ^ ":" ^ r)
		      | None ->
			  assert false
		  )
	      | `Absolute_remote(h,l) ->
		  `Absolute_remote(h,r)
	      | _ ->
		  failwith "Neturl.file_url_of_local_path: cwd is not absolute"
	  )
      | other -> other
  in
  (* Generate the URL: *)
  let syntax = { file_url_syntax with url_accepts_8bits = true } in
  match p_abs_class with
      `Absolute_local l ->
	let path = split_path l in
	make_url ~scheme:"file" ~host:"localhost" ~path syntax
    | `Absolute_remote(host,l) ->
	let path = split_path l in
	make_url ~scheme:"file" ~host ~path syntax
    | _ ->
	assert false
;;


let drive_letter_comp_re = Netstring_str.regexp "^\\([A-Za-z]\\)\\(:|\\|\\)";;

let local_path_of_file_url u =
  let local_path p =
    if p = [] || List.hd p <> "" then
      failwith "Neturl.local_path_of_file_url: URL is not absolute";
    match os_type with
	"Unix" ->
	  join_path p
      | "Win32" ->
	  (* There must be a drive letter: *)
	  ( match p with
		("" :: drive :: rest) ->
		  (match Netstring_str.string_match drive_letter_comp_re drive 0 with
		       Some m ->
			 let letter = Netstring_str.matched_group m 1 drive in
			 let rest = if rest = [] then [""] else rest in
			 join_path((letter ^ ":") :: rest)
		     | None ->
			 failwith "Neturl.local_path_of_file_url: URL is not absolute"; 
		  )
	      | _ ->
		  failwith "Neturl.local_path_of_file_url: URL is not absolute"; 
	  )
      | "Cygwin" ->
	  (* Recognize drive letters: *)
	  ( match p with
		("" :: drive :: rest) ->
		  (match Netstring_str.string_match drive_letter_comp_re drive 0 with
		       Some m ->
			 let letter = Netstring_str.matched_group m 1 drive in
			 join_path("" :: "cygdrive" :: letter :: rest)
		     | None ->
			 join_path p
		  )
	      | _ ->
		  join_path p
	  )
      | _ ->
	  assert false
  in
  let remote_path host p =
    if p = [] || List.hd p <> "" then
      failwith "Neturl.local_path_of_file_url: URL is not absolute";
    match os_type with
	"Unix" ->
	  failwith "Neturl.local_path_of_file_url: Cannot process non-local file URLs"
      | "Win32" | "Cygwin" ->
	  join_path( "" :: "" :: host :: List.tl p)
      | _ ->
	  assert false
  in
  let opt f =
    try Some(f u) with Not_found -> None
  in
  match (opt url_scheme), (opt url_host), (url_path u) with
      (Some "file", (Some("localhost"|"")|None), []) ->
	local_path [""]
    | (Some "file", (Some("localhost"|"")|None), p) ->
	local_path p
    | (Some "file", Some host, []) ->
	remote_path host [""]
    | (Some "file", Some host, p) ->
	remote_path host p
    | (Some _, _, _) ->
	failwith "Neturl.local_path_of_file_url: Unexpected scheme"
    | (None, _, _) ->
	failwith "Neturl.local_path_of_file_url: Missing scheme (relative URL?)"
;;

This web site is published by Informatikbüro Gerd Stolpmann
Powered by Caml