(* netcgi_common.mli
Copyright (C) 2005-2006
Christophe Troestler
email: Christophe.Troestler@umh.ac.be
WWW: http://math.umh.ac.be/an/
This library is free software; see the file LICENSE for more information.
This library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
LICENSE for more details.
*)
(** Functions to develop new connectors.
*
* The following module is provided as a set of helper functions to
* define new connectors. As a normal user of [Netcgi], {b you should
* not use this module}. *)
(** The goal of this module is to factor out common routines to easily
set up new connectors. Here is the normal flow of operations:
- You start by reading the request environment properties as well
as the input header. Often both are undistinguished and provided
through some sort of meta-variables. The function
{!Netcgi_common.update_props_inheader} helps you to sort them and
to normalize the input header fields. You also need to set up a
{!Netchannels.out_obj_channel} to the output stream of your
connector. Then, {!Netcgi_common.cgi_environment} can create an
environment object for you. If [stderr] output is not appropriate
(e.g. ot is not redirected to the server log), you need to
override [#log_error].
- From the environment object and arguments, {!Netcgi_common.cgi}
creates a CGI object. Often, arguments are read from the
environment [#cgi_query_string] (in case of GET) or from an input
channel (in case of POST). {!Netcgi_common.cgi_with_args} handles
this for you: it requires a {!Netchannels.in_obj_channel} from
which the arguments are read (only used in the case of POST).
- {!Netcgi_common.exn_handler_default} provides a default error
page for uncaught exceptions. It also allows the user to pass his
own exception handler that has precedence on the default one.
To see this schema in use, we recommend you have a look to the
implementation of the CGI connector because it is very simple.
*)
(** {2 Arguments} *)
type representation = [ `Simple of Netmime.mime_body
| `MIME of Netmime.mime_message ]
type store = [`Memory | `File of string]
exception Oversized
(** See {!Netcgi.Argument.Oversized}. *)
exception Too_many_arguments
(** Hit the limit [max_arguments] *)
(** See {!Netcgi.cgi_argument}. *)
class type cgi_argument =
object
method name : string
method value : string
method open_value_rd : unit -> Netchannels.in_obj_channel
method store : store
method content_type : unit -> string * (string * Mimestring.s_param) list
method charset : string
method filename : string option
method representation : representation
method finalize : unit -> unit
end
(** See {!Netcgi.rw_cgi_argument}.
@deprecated Arguments are read-only. *)
class type rw_cgi_argument =
object
inherit cgi_argument
method ro : bool
method set_value : string -> unit
method open_value_wr : unit -> Netchannels.out_obj_channel
end
(** See {!Netcgi.Argument.simple}. We reveal more of the object than
{!Netcgi.Argument.simple} for the backward compatibility layer.
*)
class simple_arg : ?ro:bool -> string -> string -> rw_cgi_argument
(** See {!Netcgi.Argument.mime}. We reveal more of the object than
{!Netcgi.Argument.mime} for the backward compatibility layer. *)
class mime_arg : ?work_around_backslash_bug:bool -> ?name:string ->
Netmime.mime_message -> rw_cgi_argument
(** {2 Cookies} *)
(** The cookie implementation has been moved to {!Nethttp.Cookie}.
New code should directly call the functions defined there.
*)
module Cookie :
sig
type t = Nethttp.Cookie.t
val make :
?max_age:int ->
?domain:string ->
?path:string ->
?secure:bool ->
?comment:string ->
?comment_url:string ->
?ports:int list ->
string -> string -> t
val name : t -> string
val value : t -> string
val max_age : t -> int option
(** The expiration time of the cookie, in seconds. [None] means
that the cookie will be discarded when the browser exits.
This information is not returned by the browser. *)
val domain : t -> string option
val path : t -> string option
val secure : t -> bool
(** Tells whether the cookie is secure.
This information is not returned by the browser. *)
val comment : t -> string
(** Returns the comment associated to the cookie or [""] if it
does not exists. This information is not returned by the
browser. *)
val comment_url : t -> string
(** Returns the comment URL associated to the cookie or [""] if it
does not exists. This information is not returned by the
browser. *)
val ports : t -> int list option
val set_value : t -> string -> unit
val set_max_age : t -> int option -> unit
val set_domain : t -> string option -> unit
val set_path : t -> string option -> unit
val set_secure : t -> bool -> unit
val set_comment : t -> string -> unit
val set_comment_url : t -> string -> unit
val set_ports : t -> int list option -> unit
val set : #Netmime.mime_header -> t list -> unit
(** [set http_header cookies] sets the [cookies] in [http_header]
using version 0 or version 1 depending on whether version 1
fields are used. For better browser compatibility, if
"Set-cookie2" (RFC 2965) is issued, then a "Set-cookie"
precedes (declaring the same cookie with a limited number of
options).
{b Deprecated name.} Use {!Nethttp.Header.set_set_cookie_ct}.
*)
val get : #Netmime.mime_header -> t list
(** Decode the cookie header, may they be version 0 or 1.
{b Deprecated name.} Use {!Nethttp.Header.get_cookie_ct}.
*)
val of_record : Nethttp.cookie -> t
(** Conversion from the deprecated style of cookie.
{b Deprecated name.} Use {!Nethttp.Cookie.of_netscape_cookie}.
*)
val to_record : t -> Nethttp.cookie
(** Conversion to the deprecated style of cookie (some parameters
are dropped).
{b Deprecated name.} Use {!Nethttp.Cookie.to_netscape_cookie}.
*)
end
(************************************************************************)
(** {2 Environment} *)
(** See {!Netcgi.config}. *)
type config = {
tmp_directory : string;
tmp_prefix : string;
permitted_http_methods : [`GET | `HEAD | `POST | `DELETE | `PUT] list;
permitted_input_content_types : string list;
input_content_length_limit : int;
max_arguments : int;
workarounds : [ `MSIE_Content_type_bug | `Backslash_bug
| `Work_around_MSIE_Content_type_bug (* @deprecated *)
| `Work_around_backslash_bug (* @deprecated *)
] list;
default_exn_handler : bool;
}
(** See {!Netcgi.output_type}. *)
type output_type =
[ `Direct of string
| `Transactional of config ->
Netchannels.out_obj_channel -> Netchannels.trans_out_obj_channel
]
val fix_MSIE_Content_type_bug : string -> string
(** [fix_MSIE_Content_type_bug ct] transforms the content-type
string [ct] to fix the MSIE Content-Type bug. *)
val is_MSIE : string -> bool
(** [is_MSIE user_agent] tells whether the [user_agent] is Microsoft
Internet Explorer. Useful to know when to apply
{!Netcgi_common.fix_MSIE_Content_type_bug}. *)
(* {b Notes about the header}
The fact that the header is stored in variables of the environment
and not send to the several advantages:
- It is possible to change header fields at every moment before
the commitment happens. For example, it is possible to set the
content-length field which is normally only known just at the
time of the commit operation.
- The [environment] object can process the header; for example
it can fix header fields.
- It is simpler to connect to environments which transport the
header in non-standard ways. Example: Assume that the
environment is the web server process (e.g. we are an Apache
module). Typically the header must be stored in different
structures than the body of the message. *)
(** [new cgi_environment ~config ~properties ~input_header out_obj]
generates a {!Netcgi.cgi_environment} object, from the arguments.
The creation of such an object {i does not} raise any exception.
The method [#out_channel] of the created environment returns
[out_obj].
@param config give the configuration options. Of particular
interest here is [config.workarounds]. If
[`MSIE_Content_type_bug] is present, a fix will be applied to
[input_header].
@param properties CGI-like properties as (name, value) pairs.
Examples: [("REQUEST_METHOD", "POST")], [("SERVER_PROTOCOL",
"HTTP/1.1")]. Note that "CONTENT_TYPE" and "CONTENT_LENGTH" are
part of the input header. It is highly recommended to use
{!Netcgi_common.update_props_inheader} to build this list.
@param input_header is a list of (field, value) pairs of the HTTP
input request. It is ASSUMED that field names in [input_header]
are lowercase in order to apply a fix to the MSIE Content-Type
bug. Also remember that the separator is '-', not '_'. Both
requirements will be stafisfied if you use
{!Netcgi_common.update_props_inheader} to build [input_header].
{b Notes:} The header is kept into variables and
[#send_output_header] sents it directly to [out_obj]. This has
several advantages:
- It is possible to change header fields at every moment before
the commitment happens. For example, it is possible to set the
content-length field which is normally only known just at the
time of the commit operation.
- The [environment] object can process the header; for example
it can fix header fields.
- It is simpler to connect to environments which transport the
header in non-standard ways. Example: Assume that the
environment is the web server process (e.g. we are an Apache
module). Typically the header must be stored in different
structures than the body of the message.
*)
class cgi_environment :
config:config ->
properties:(string * string) list ->
input_header:(string * string) list ->
Netchannels.out_obj_channel ->
object
val mutable header_not_sent : bool
(** [true] iff the output headers have not been sent.
[#send_output_header] must set it to false once it did its
job. *)
method cgi_gateway_interface : string
method cgi_server_name : string
method cgi_server_port : int option
method cgi_server_protocol : string
method cgi_server_software : string
method cgi_request_method : string
method cgi_script_name : string
method cgi_path_info : string
method cgi_path_translated : string
method cgi_auth_type : string
method cgi_remote_addr : string
method cgi_remote_host : string
method cgi_remote_user : string
method cgi_remote_ident : string
method cgi_query_string : string
method protocol : Nethttp.protocol
method cgi_property : ?default:string -> string -> string
method cgi_properties : (string * string) list
(** Return the parameter [properties]. *)
method cgi_https : bool
(** @raise HTTP if the HTTPS property is not understood. *)
method input_header : Netmime.mime_header
(** For special header structures, just override this method. *)
method input_header_field : ?default:string -> string -> string
method multiple_input_header_field : string -> string list
method input_header_fields : (string * string) list
method cookie : string -> Cookie.t
method cookies : Cookie.t list
method user_agent : string
method input_content_length : int
method input_content_type_string : string
method input_content_type :
unit -> string * (string * Mimestring.s_param) list
method output_header : Netmime.mime_header
(** For special header structures, override this method and
maybe [#send_output_header]. *)
method output_header_field : ?default:string -> string -> string
method multiple_output_header_field : string -> string list
method output_header_fields : (string * string) list
method set_output_header_field : string -> string -> unit
method set_multiple_output_header_field : string -> string list -> unit
method set_output_header_fields : (string * string) list -> unit
method set_status : Nethttp.http_status -> unit
method send_output_header : unit -> unit
method output_ch : Netchannels.out_obj_channel
(** @deprecated Use [#out_channel] instead. *)
method out_channel : Netchannels.out_obj_channel
method log_error : string -> unit
(** You may want to override this with your custom logging method.
By default, [#log_error msg] adds a timestamp to [msg] and
sends th result to [stderr]. *)
method config : config
end
(************************************************************************)
(** {2 CGI} *)
type other_url_spec = [ `Env | `This of string | `None ]
(** See {!Netcgi.other_url_spec}. *)
type query_string_spec = [ `Env | `This of cgi_argument list | `None
| `Args of rw_cgi_argument list ]
(** See {!Netcgi.query_string_spec}. *)
type cache_control = [ `No_cache | `Max_age of int | `Unspecified ]
(** See {!Netcgi.cache_control}. *)
type request_method = [`GET | `HEAD | `POST | `DELETE | `PUT of cgi_argument]
val string_of_request_method : request_method -> string
type arg_store_type =
[`Memory | `File | `Automatic | `Discard
| `Memory_max of float | `File_max of float | `Automatic_max of float]
type arg_store = cgi_environment -> string -> Netmime.mime_header_ro ->
arg_store_type
(** See {!Netcgi.arg_store}. *)
(** [cgi env op meth args] constructs {!Netcgi.cgi} objects. The
environment [#out_channel] is wrapped into a transactional channel
or a discarding channel according to [op] ([`Direct] or
[`Transactional]) and [request_method] ([`HEAD] requests must only
return a header). For standard cases, when POST and PUT arguments
are available through a [Netchannels.in_obj_channel], we recommend
you use {!Netcgi_common.cgi_with_args} that will parse the
arguments for you and check preconditions.
Remark: You may think that the argument [meth] is superfluous as
it can be deduced from [env#cgi_request_method]. While it is true
for [`DELETE], [`GET], [`HEAD] and [`POST], the [`PUT] takes a
{!Netcgi_common.cgi_argument} parameter. Setting correctly this
parameter and decoding [env#cgi_request_method] is done for you by
{!Netcgi_common.cgi_with_args}. *)
class cgi : cgi_environment -> output_type -> request_method ->
cgi_argument list ->
object
method argument : string -> cgi_argument
method argument_value : ?default:string -> string -> string
method argument_exists : string -> bool
method multiple_argument : string -> cgi_argument list
method arguments : cgi_argument list
method environment : cgi_environment
method request_method : request_method
method finalize : unit -> unit
(** The following environment properties must be available for this
method to work properly (please make sure your connector
provides them; the CGI spec make them compulsory anyway):
- [cgi_server_name]
- [cgi_server_port]
- [cgi_script_name]
- [cgi_path_info] *)
method url : ?protocol:Nethttp.protocol ->
?with_authority:other_url_spec -> (* default: `Env *)
?with_script_name:other_url_spec -> (* default: `Env *)
?with_path_info:other_url_spec -> (* default: `Env *)
?with_query_string:query_string_spec -> (* default: `None *)
unit -> string
method set_header :
?status:Nethttp.http_status ->
?content_type:string ->
?content_length:int ->
?set_cookie:Nethttp.cookie list ->
?set_cookies:Cookie.t list ->
?cache:cache_control ->
?filename:string ->
?language:string ->
?script_type:string ->
?style_type:string ->
?fields:(string * string list) list ->
unit -> unit
method set_redirection_header :
?set_cookies:Cookie.t list ->
?fields:(string * string list) list ->
string -> unit
method output : Netchannels.trans_out_obj_channel
(** @deprecated Use [#out_channel] instead. *)
method out_channel : Netchannels.trans_out_obj_channel
method at_exit : (unit -> unit) -> unit
end
val cgi_with_args :
(cgi_environment -> output_type -> request_method -> cgi_argument list
-> 'a) ->
cgi_environment -> output_type -> Netchannels.in_obj_channel -> arg_store
-> 'a
(** [cgi_with_args (new cgi) env out op ?put_arg in_chan] constructs
a {!Netcgi.cgi} object. However, [new cgi] can be replaced by
any function, so it is easy to use this to construct extensions
of the [cgi] class (as needed by some connectors). The
arguments of the cgi object are taken from the environment [env]
(for HEAD, GET, DELETE) or from the [in_chan] (for POST, PUT)
and processed according to [arg_store].
@raise HTTP if the data does not conform the standards or it not
allowed. *)
(** {2 Exceptions} *)
exception HTTP of Nethttp.http_status * string
(** Exception raised by various functions of this library to return
to the user agent an appropriate error page with the specified
http-status (this exception must be caught by the connector and
a default answer sent).
The string is a description of the cause of the error.
This exception is for use by connectors only, users should deal
with the exceptions in their code by generating a response with
the usual [#set_header] and [#out_channel] of {!Netcgi.cgi}. *)
val exn_handler_default : cgi_environment ->
exn_handler:(cgi_environment -> (unit -> unit) -> unit) ->
finally:(unit -> unit) ->
(unit -> exn option) -> unit
(** [exn_handler_default env ~exn_handler ~finally run_cgi] will
basically execute [exn_handler env run_cgi]. Provided that the
environment config [default_exn_handler] is set to [true] (the
default), any exception [e] not caught by the user provided
[exn_handler] (or that is raised by it) will be passed to the
default handler of OCamlNet which will rollback the current
output, produce a page describing the exception [e], and close
the output channel of [env]. Note that the default handler
treats [HTTP] exceptions specially -- for example, the response
to [HTTP(`Method_not_allowed,...)] includes an "Allow" header
(as mandated by HTTP/1.1);...
Note that, regardless of the value of [default_exn_handler], the
[Exit] exception is always caught and treated like an
acceptable early termination (thus produces no error page).
Whether [run_cgi] terminates normally or by an exception,
[finally()] is executed last.
Sometimes, you want that some "special" exceptions (for example
exceptions internal to the connector) CANNOT to be caught by
[exn_handler]. In this case, [run_cgi()] catches the exception,
say [e], and returns it as [Some e]. The exception [e] will "go
through" [exn_handler_default], it will not even be passed to
the default handler. Therefore, you must take care that it is
handled by the surrounding code or your connector may die
without an error message. Of course, [run_cgi] must return
[None] if no "special" exception is raised.
REMARK: Stricly speaking, [exn_handler env run_cgi] is obviously
not possible because the return type of [run_cgi] is not [unit]
but you can ignore that to understand what this function does. *)
val error_page : cgi_environment -> Nethttp.http_status ->
(string * string list) list ->
string -> string ->
unit
(** [error_page env status fields msg detail]: Logs an error message and
outputs an error page via [env].
[status] is the status of the error page, e.g. [`Internal_server_error].
[fields] are put into the response header of the error page.
[msg] occurs in the log message and in the error page, and should
be a concise description without linefeeds. [detail] is only
printed to the error page, and may be longer than this, and may
also include HTML markup.
*)
(************************************************************************)
(** {2 Useful functions}
The following functions are used in several connectors and are
gouped here for convenience.
*)
val update_props_inheader : string * string ->
((string * string) list * (string * string) list as 'a) -> 'a
(** [update_props_inheader (name, value) (props, inheader)] returns
[(props, inheader)] to which the new parameter [name]-[value]
has been added -- to [props] or [inheader], depending on [name].
Unless you know what you are going, you {b must} use this
function to classify parameters as it also performs some
standardisation. *)
val rm_htspace : (char -> bool) -> string -> int -> int -> string
(** [rm_htspace is_space s low up] returns the substring [s.[low
.. up - 1]] stripped of possible heading and trailing spaces
identified by the function [is_space].
@raise Invalid_argument if [low < 0] or [up > String.length s] *)
val rev_split : (char -> bool) -> string -> string list
(** [split_rev is_delim s] split [s] at each character [is_delim]
and returns the list of substrings in reverse order. Several
consecutive delimiters are treated as a single one. The
substrings do not share [s] memory. *)
val is_prefix : string -> string -> bool
(** [is_prefix pre s] checks whether [pre] is a prefix of [s]. *)
type http_method =
[`GET | `HEAD | `POST | `DELETE | `PUT]
val string_of_http_method : http_method -> string
(** Returns the string value of the method *)