(* $Id: http_client.mli,v 1.4 1999/07/08 03:00:03 gerd Exp $
 * ----------------------------------------------------------------------
 *
 *)

(**********************************************************************)
(* HTTP/1.1 client                                                    *)
(* written by Gerd Stolpmann                                          *)
(**********************************************************************)

(* Implements much of HTTP/1.1.
 * Implemented:
 *  - chunked messages
 *  - persistent connections
 *  - modular authentication methods, currently Basic and Digest
 * Left out:
 *  - multipart messages, including multipart/byterange
 *  - content encoding (compression)    (1)
 *  - content digests specified by RFC 2068 and 2069   (1)
 *  - conditional and partial GET   (1)
 *  - following code 303 redirections automatically    (1)
 *  - client-side caching   (1)
 *  - HTTP/1.0 persistent connections
 *
 * (1) These features can be implemented on top of this module if really needed,
 *     but there is no special support for them.
 *)

(* RESTRICTED THREAD-SAFETY - NEW SINCE RELEASE 0.3:
 *
 * The module can be compiled such that it is thread-safe. In particular,
 * one has to use the netclient_mt.cm[x]a archive, and thread-safety is
 * restricted to the following kinds of usage:
 * - The golden rule is that threads must not share pipeline objects.
 *   If every thread uses its own pipeline, every thread will have its own
 *   set of state variables.
 *   It is not detected if two threads errornously share a pipeline,
 *   neither by an error message nor by implicit serialization. Strange
 *   things may happen.
 * - The same applies to the other objects: "get", "trace", "options",
 *   "head", "post", "put", "delete", "basic_auth_method", and 
 *   "digest_auth_method". But sharing these objects would make no sense
 *   at all.
 * - Of course, it would be possible to use lots of mutexes to make
 *   the module fully thread-safe. Perhaps in the next release.
 * - The Convenience module serializes; see below.
 *)


(**********************************************************************
 *** NOTE: At the end of this interface specification there is a    ***
 *** SIMPLIFIED interface that is sufficient for many applications. ***
 *** The simplified interface is recommended for beginners.         ***
 **********************************************************************)



exception Header_is_incomplete;;
exception Body_is_incomplete;;
exception Body_maybe_complete;;
exception Http_error of (int * string);;
exception Broken_connection;;   (* Connection closed during request *)

type secret;;
  (* You cannot call methods requiring a parameter of type secret *)

type token;;
  (* internally used *)

type verbose =
    Verbose_status             (* Inform about connection states *)
  | Verbose_request_header     (* Print all request headers *)
  | Verbose_response_header    (* Print all response headers *)
  | Verbose_request_contents   (* Print the request body *)
  | Verbose_response_contents  (* Print the response body *)
;;


(*************************************************************)
(***** class message: core functionality of all messages *****)
(*************************************************************)

class virtual message :
  object
    (* "message" contains data of a request, and if successful, data of the
     * response.
     *)

    method virtual prepare : bool -> unit

    (* public state: *)

    method is_served : bool
	(* true iff request/response cycle was done *)
    method get_host : unit -> string
	(* host name of the content server *)
    method get_port : unit -> int
	(* port number of the content server *)
    method get_req_body : unit -> string
	(* What has been sent as body in the (last) request *)
    method get_req_header : unit -> (string * string) list
	(* What has been sent as header in the (last) request. Returns
	 * (key, value) pairs, where the keys are all in lowercase.
	 *
	 * NOTE ABOUT THE REQUEST HEADER:
	 * The header of a "message" object is initially empty. You can
	 * set header entries using "set_req_header" before the request.
	 * When sending the request some header entries are automatically
	 * added, such as "host", "content-length" and "authorization".
	 *)
    method assoc_req_header : string -> string
	(* Query a specific header entry. The name of the entry must be
	 * given in lowercase characters. 
	 *)
    method set_req_header : string -> string -> unit
	(* Set the request header entry with given "name" to "value". *)
    method get_req_uri : unit -> string
	(* Get the "request URI". This value is only set after doing the
	 * request.
	 *)
    method get_req_method : unit -> string
	(* Get the name of the request method. This value is only set after 
	 * doing the request.
	 *)
    method get_resp_header : unit -> (string * string) list
	(* Get the header of the last response. The keys are in lowercase
	 * characters again.
	 *)
    method assoc_resp_header : string -> string
	(* Query a specific header entry of the response. The name of the
	 * entry must be given in lowercase characters.
	 *)
    method get_resp_body : unit -> string
	(* Returns the body of the last response if the response status
	 * is OK (i.e. the code is in the range 200 to 299).
	 * Otherwise, Http_error (code, body) is raised where 'code' is
	 * the response code and 'body' is the body of the (errorneous)
	 * response.
	 *)
    method dest_status : unit -> (string * int * string)
	(* Returns the status line of the last response (but status lines
	 * with code 100 are ignored).
	 * The returned triple is (http_string, code, text)
	 *)


    (* proxy control: *)

    method no_proxy : unit -> unit
	(* Forces that this request is done without proxy. *)

    method is_proxy_allowed : unit -> bool
	(* Returns if this object would allow a proxy *)

    (* methods for convenience: *)

    method dump_header : string -> (string * string) list -> unit
	(* Writes the given header list to stderr. Every line is
	 * prefixed by the given string.
	 *)

    (* private state: *)

    method init_query : secret -> string -> unit

    method set_served : secret -> unit
    method set_unserved : secret -> unit
    method get_request : secret -> string
    method set_response : secret -> string -> unit

    (* private methods: *)

    method decode_header : secret -> unit
    method decode_header_at : secret -> string -> int -> 
                                ( (string * string) list * int )
    method decode_body : secret -> bool -> bool -> unit
    method body_is_complete : secret -> (string * string) list -> 
                                string -> int -> unit
  end
;;


(*****************************************)
(***** message types by http methods *****)
(*****************************************)

class get : string ->            (* The query, "http://server/path" *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;


class trace : string -> int ->     
  (* (1) The query, "http://server/path" 
   * (2) maximum number of hops
   *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;


class options : string ->        (* The query, "http://server/path" *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;


class head : string ->           (* The query, "http://server/path" *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;


class post : string -> (string * string) list ->
  (* (1) The query, "http://server/path"
   * (2) The parameters that are transferred using the mime type
   *     application/x-www-form-urlencoded
   *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;


class put : string -> string ->
  (* (1) The query, "http://server/path"
   * (2) The body to be transferred
   *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;


class delete : string ->         (* The query, "http://server/path" *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;



(**********************************)
(***** Authentication methods *****)
(**********************************)

class basic_auth_method :
  object
    val mutable current_realm : string
    method name : string 
    method set_realm : string -> string -> string -> unit
	(* set_realm realm user password:
	 * adds that (user,password) should be used for the given realm
	 *)
    method get_credentials : unit -> (string * string)
	(* get (user,password) for the current realm or raise Not_found.
	 * This method may be overridden. For example, an interactive
	 * application may open a dialoge box to get the credentials of
	 * an unknown realm.
	 *)
    (* The following methods should not be used from outside *)
    method www_authenticate : message -> token list -> unit
    method set_authorization : message -> string -> unit
    method update : message -> token list -> unit
  end
;;


class digest_auth_method :
  object
    inherit basic_auth_method
  end
;;



(**********************************************)
(***** class pipeline: the http processor *****)
(**********************************************)

class pipeline :
  object
    (* A "pipeline" object is a FIFO queue of messages. 
     * Note that in spite of the name no asynchronous pipelining is implemented.
     * This means that the client waits for the response of the last request
     * before a new request is sent to the same host.
     * Perhaps true pipelining is added some day. (The point is that this
     * interface already specifies true pipelining even if this feature is
     * not yet implemented.)
     *
     * A "pipeline" leaves the connection to the last peer (either the
     * content server or the proxy) open and reuses it the next time.
     * Such connections are called "persistent".
     * Note that Web and proxy servers usually have a small timeout for
     * persistent connections (several seconds) and persistency is only
     * worth while if several requests are done immediately in turn.
     *)

    method add_authentication_method : basic_auth_method -> unit
	(* adds an authentication method *)
    method set_proxy : string -> int -> unit
	(* set_proxy name port:
	 * sets that a proxy 'name' listening on 'port' should be used
	 *)
    method set_proxy_auth : string -> string -> unit
	(* sets user and password for the proxy. Only the "basic" authentication
	 * method is implemented.
	 *)
    method avoid_proxy_for : string list -> unit
	(* sets a list of host names or domain suffixes for which no proxy
	 * should be used. 
	 * e.g. [ "localhost"; ".our.net" ]
	 *)
    method avoid_persistent_connection : unit -> unit
	(* sets that for every request a new connection is opened. 
	 * This is not recommended because it decreases performance.
	 *)
    method reset : unit -> unit
	(* Empties the pipeline and closes the connection *)
    method add : message -> unit
	(* Adds the message at the end of the pipeline. The state of the
	 * message is set to "unserved".
	 *)
    method empty : unit -> bool
	(* Is the pipeline empty? *)
    method pipeline : message list
	(* returns the list representing the pipeline *)
    method run : unit -> unit
      (* Runs through the requests in the pipeline. If a request can be
       * fulfilled, i.e. the server sends a response, the state of the
       * request is set and the request is removed from the pipeline.
       * If a request cannot be fulfilled (no response, bad response, 
       * network error), an exception is raised and the request remains in
       * the pipeline (and is even the head of the pipeline). The
       * processing of the pipeline stops in this case.
       *
       * This function handles the following HTTP return codes itself:
       * 100: This is an intermediate return code and simply ignored.
       * 301: If the method is GET or HEAD, the redirection is followed.
       * 302: If the method is GET or HEAD, the redirection is followed.
       *
       * All other return codes remain uninterpreted, it is up to the
       * caller of this function to react on them.
       *
       * Exception Broken_connection:
       *  - The server has closed the connection before the full request
       *    could be sent. It is unclear if something happened or not.
       *    The application should figure out the current state and 
       *    retry the request.
       *  - Also raised if only parts of the response have been received
       *    and the server closed the connection. This is the same problem.
       *    Note that this can only be detected if a "content-length" has
       *    been sent or "chunked encoding" was chosen. Should normally
       *    work for persistent connections.
       *  - NOT raised if the server forces a "broken pipe" (normally
       *    indicates a serious server problem). The intention of
       *    Broken_connection is that retrying the request will probably
       *    succeed.
       *)
    method close_connection : unit -> unit
	(* If there is a pending connection to a server, it is closed. *)
    method abort_connection : unit -> unit
	(* Same as 'close_connection' but does not inform the server about
         * ending the connection. This might lead to timeouts on the server
	 * side.
	 *)
    method verbose : verbose list -> unit
	(* Sets the level of verbosity *)
    method very_verbose : unit -> unit
	(* sets maximum verbosity *)
  end
;;


(**************************************************)
(***** Convenience module for simple purposes *****)
(**************************************************)

(* Do 'open Http_client.Convenience' for simple applications.
 *
 * - The environment variables "http_proxy" and "no_proxy" determine 
 *   the proxy settings. "http_proxy" must be an http-URL that contains
 *   the proxy's name, its port, and optionally user and password.
 *   E.g. "http://eric:eric'spassword@proxy:8080/".
 *   The variable "no_proxy" is a comma-separated list of hosts and
 *   domains for which no proxy must be used.
 *   E.g. "localhost, sun, moon, .intra.net"
 * - There is a default behaviour to authenticate. Both "basic" and "digest"
 *   methods are enabled. Two global variables, http_user and http_password
 *   set the user and password if the URL does not specify them. In the case
 *   that user and password are included in the URL, these values are always
 *   used.
 * - There is a default error behaviour. If a request fails, it is automatically
 *   repeated. The variable http_trials specifies the number of times a request
 *   is submitted at most.
 *   Requests are not repeated if there is a HTTP return code that indicates
 *   a normal operating condition.
 *   POST and DELETE requests are never repeated.
 *)

(* RESTRICTED THREAD SAFETY - NEW SINCE RELEASE 0.3:
 *
 * The Convenience module is fully thread-safe with the exception of the
 * exported variables (http_trials, http_user, and http_password). Note
 * that all threads share the same pipeline, and access to the pipeline
 * is serialized.
 * The latter simply means that it always works, but that threads may 
 * block each other (i.e. the program slows down if more than one thread
 * wants to open http connections at the same time).
 *)


module Convenience :
    sig
      val http_trials : int ref
        (* number of times every request is tried. Default: 3 *)

      val http_user : string ref
	(* The default user if authentication is required *)

      val http_password : string ref
	(* The default password if authentication is required *)

      val http_get_message : string -> message
	(* Does a "GET" request with the given URL and returns the message.
	 * The URL may contain a user and a password, as in
	 * "http://user:password@server.com/path". If authentication is
	 * required by the server, the user and password values from the
	 * URL are taken, if present. Otherwise, http_user and http_password
	 * are used. If http_user is "", authentication fails always.
	 *)
	  
      val http_head_message : string -> message
	(* Does a "HEAD" request with the given URL and returns the reply.
	 * See also http_get_message.
	 *)

      val http_post_message : string -> (string * string) list -> message
	(* Does a "POST" request with the given URL and returns the reply.
	 * The list contains the parameters send with the POST request.
	 * See also http_get_message.
	 *)

      val http_put_message : string -> string -> message
	(* Does a "PUT" request with the given URL and returns the reply.
         * The second argument contains the contents to be put.
	 * See also http_get_message.
	 *)

      val http_delete_message : string -> message
	(* Does a "DELETE" request with the given URL and returns the reply.
	 * See also http_get_message.
	 *)

      val http_get : string -> string
	(* Does a "GET" request with the given URL and returns the message
	 * body. See also http_get_message.
         *)

      val http_post : string -> (string * string) list -> string
	(* Does a "POST" request with the given URL and returns the message
	 * The list contains the parameters send with the POST request.
	 * body. See also http_get_message.
         *)

      val http_put : string -> string -> string
	(* Does a "PUT" request with the given URL and returns the message
	 * body. The second argument contains the contents to be put.
         * See also http_get_message.
         *)

      val http_delete : string -> string
	(* Does a "DELETE" request with the given URL and returns the message
	 * body. See also http_get_message.
         *)

      val http_verbose : unit -> unit
	(* Turns on debug messages on stderr. *)

    end


(* ======================================================================
 * History:
 * 
 * $Log: http_client.mli,v $
 * Revision 1.4  1999/07/08 03:00:03  gerd
 * 	Added comments how to use the new MT support.
 *
 * Revision 1.3  1999/06/10 19:28:01  gerd
 * 	Added Message_maybe_complete.
 *
 * Revision 1.2  1999/06/10 00:12:54  gerd
 * 	Change: The HTTP response codes 301 and 302 ("moved permanently",
 * resp. "moved temporarily") are now interpreted by the 'pipeline' class.
 * (But 303 not (yet?) -- perhaps there should be a switch to turn this
 * feature on or off once it gets implemented...)
 * 	Bugfix: The decision whether a proxy should be used or not works
 * now. The weird function 'dest_query' has gone, it is substituted by
 * 'init_query' which can be called in the initializers of the method
 * subclasses. Thus the query is analyzed very early which is very useful
 * in order to decide if a proxy is needed to reach a certain host.
 * 	Added: There is now a Convenience module which has a simplified
 * interface. It is sufficient for many applications that only want to do
 * simple GET and POST operations.
 *
 * Revision 1.1  1999/03/26 01:16:42  gerd
 * 	initial revision
 *
 * 
 *)
