Plasma GitLab Archive
Projects Blog Knowledge

(*
  Copyright 2010 Gerd Stolpmann

  This file is part of Plasma, a distributed filesystem and a
  map/reduce computation framework. Unless you have a written license
  agreement with the copyright holder (Gerd Stolpmann), the following
  terms apply:

  Plasma is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.

  Plasma is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with Foobar.  If not, see <http://www.gnu.org/licenses/>.

*)
(* $Id: plasma_filebuf.mli 434 2011-10-07 12:35:20Z gerd $ *)

(** Buffer for [read] and [write] *)

type errno = Plasma_util.errno
type strmem = [`String of string | `Memory of Netsys_mem.memory]

type buf_state =
    [ `Invalid
    | `Clean
    | `Dirty
    | `Reading of int64 option Uq_engines.engine
    | `Writing of int64 option Uq_engines.engine
    | `Written
    | `Dropped
    ]
  (** Buffer states:
      - an [`Invalid] buffer is reserved for a certain block but it is
        not yet filled with any meaningful data
      - a [`Clean] buffer contains valid data of a file which is unmodified
      - a [`Dirty] buffer contains modified data of a file (which still needs
        to be written)
      - a [`Reading] buffer is being filled with data. For the user this is
        very much like [`Invalid] (data are unusable), but the argument 
        engine terminates when the buffer changes state again.
      - a [`Writing] buffer is being written out. For the user this is
        very much like [`Clean] (data can be read but not modified again).
        The argument engine terminates when the buffer changes state again.
      - the [`Written] state is used after [`Writing] as long as it is
        still unclear whether the write is successful or not. On success,
        the buffer can be set to [`Clean] again. On error, it will go back
        to [`Dirty]. 
      - a [`Dropped] buffer is reused for a different purpose
   *)

type buffer =
    { buf_inode : int64;
      buf_index : int64;
      buf : Netsys_mem.memory;
      buf_ord : int;              (* ordinal number of the buffer *)
      mutable buf_seqno : int64;  (* for cache validation only *)
      mutable buf_state : buf_state;
      mutable buf_dirty : bool;
      mutable buf_delayed_drop : bool;
      mutable buf_flushing : bool;
    }
  (** Buffer descriptor. The user of this API should never modify entries
      of it.

      A buffer descriptor is handed out for the lifetime of a buffer. The
      states are:
       - A freshly designated buffer is in [`Invalid] state
       - While reading the block the buffer is in [`Reading] state
       - If [`Reading] is successful and there is no pressure to reassign
         the buffer immediately, it becomes [`Clean]. A [`Clean] buffer
         can be dropped by the system at any time.
       - When the contents are modified the buffer becomes [`Dirty]
       - Dirty buffers can be written to disk. The buffer enters [`Writing]
         state. Note that it is not allowed to modify the buffer while in
         [`Writing] state - one must wait for the completion of the write
         first
       - A written buffer is set to [`Written]. This is a special state
         meaning that the write is done but not yet committed.
       - The contents of a written buffer can be modified. This is only
         recorded by the [buf_dirty] flag.
       - After the commit of the write, the [`Written] buffer becomes
         [`Clean] or [`Dirty], depending on the argument flag. If the
         commit is not successful the buffer 
         is set to [`Dirty].

      When a clean buffer is dropped, the state in the descriptor is set
      to [`Dropped]. At the same time, the buffer memory is reused for a
      different descriptor.

      The [`Invalid] state must not be kept for longer than a moment.
      If there are several requests for the same block, and the buffer
      is [`Invalid] the other requests can only use busy waiting to
      handle this case.

      The [buf_dirty] flag can be set in some contexts to indicate that
      there was a data modification and [`Dirty] should be entered next.
   *)

type flush_request =
    { flush_inode : int64;
      mutable flush_min_eof : int64;
      mutable flush_min_mtime : Plasma_rpcapi_aux.time;
      mutable flush_index_list : Plasma_util.I64Set.t;
    }

type buffer_system

val create_buffer_system : int -> int -> Unixqueue.event_system -> buffer_system
  (** [create_buffer_system n_bufs blocksize esys] *)

val mem_size : buffer_system -> int
  (** size of buffers in bytes *)

val blit_from_buffer : 
      buffer_system -> buffer -> int -> strmem -> int -> int -> unit

val blit_to_buffer :
      buffer_system -> strmem -> int -> buffer -> int -> int -> unit

val clean_access : buffer_system -> buffer -> unit
  (** a clean buffer is continued to be used as clean buffer
      after a read access
   *)

val dirty_access : buffer_system -> buffer -> int64 -> unit
  (** A clean or invalid buffer is set to dirty. The int is the EOF position
      that should now be ensured at least
   *)

val switch_to_reading : 
      buffer_system -> buffer -> int64 option Uq_engines.engine -> 
      int64 option -> unit
  (** [switch_to_reading sys b e eof_opt]:
      
      The buffer is switched to [`Reading] state (from either [`Invalid],
      [`Clean] or [`Dirty]). The engine [e] must be in a non-final
      state. When the engine terminates, this is taken as indication that
      the read is finished. The return value is [seqno_opt]. If [Some seqno]
      the read has been
      successful and the buffer is filled with data of this sequence number
      of the file. The return value [None] means error. This function arranges
      that the buffer is switched to a follow-up state when the engine
      is finished:
       - if the buffer is scheduled for being dropped, this is done now
       - the buffer is also dropped when the read is non-successful
       - if [eof_opt <> None] the buffer becomes [`Dirty], and the
         EOF value is considered for the flush request
       - if there is pressure for memory the buffer is reassigned
       - otherwise the buffer becomes [`Clean]
   *)

val switch_to_writing :
      buffer_system -> buffer -> int64 option Uq_engines.engine -> unit
  (** The buffer is switched to [`Writing] state (from [`Dirty]).
      The argument engine must be in a non-final
      state. When the engine terminates, this is taken as indication that
      the buffer is written out. This function arranges that the
      buffer state is switched again when this occurs. The follow-up
      state is always [`Written].

      The engine returns the new sequence number.

      A [`Written] buffer should be committed, and then one of three
      functions needs to be called:
       - [write_committed]: if the commit has been successful
       - [write_erroneous]: if an error occurred
       - [write_cancelled]: if the block will no longer be written because
         there was an error for another block of the same transaction
   *)

val schedule_drop : buffer_system -> buffer -> unit
  (** If the buffer can be immediately invalidated this is done. Otherwise
      the [buf_delayed_drop] flag is set, and and the end of the ongoing
      read/write the buffer will be set to [`Dropped].
   *)

val schedule_drop_inode : buffer_system -> int64 -> unit
  (** Same for a whole inode. It is no error if there is no buffer
      for this inode
   *)


val lookup_buffer : buffer_system -> int64 -> int64 -> buffer
  (** [lookup_buffer sys inode index]: Looks the buffer up for [inode]
      and [index], or raises [Not_found] if there is none yet, or 
      the existing descriptor is in [`Dropped] state.
   *)

val request_buffer_e : 
         buffer_system -> int64 -> int64 -> buffer option Uq_engines.engine
  (** [request_buffer_e sys inode index]: This function is to be used
      when [lookup_buffer] raises [Not_found] to get a new buffer.
      The new buffer is initially in [`Invalid] state. Note that
      competing engine-driven threads can see this state in a certain
      moment.

      Note that one should immediately set the buffer to a different state
      when the engine is done.

      The function returns [None] via the engine when the buffer already
      exists or when a concurrent request was faster (if there are
      several calls of [request_buffer_e] for the same inode/index pair,
      only one call gets the buffer, and the other calls see [None]).
      The function also returns [None] when an error is recorded
      for this inode (with [write_erroneous]). Because of the latter,
      it is recommended to [reset_inode_error] just before requesting
      a buffer.
   *)

val select_for_flush : buffer_system -> int64 -> flush_request
  (** [select_for_flush sys inode]: Returns a list of blocks that need
      to be written. Once a block is returned by [select_for_flush]
      it is not again returned in future calls of this function
      unless it is set to [`Dirty] again after leaving the [`Written]
      state.

      The [buf_flushing] flag is used for managing this. If [buf_flushing]
      it is prevented that a call of [dirty_access] records the block
      again for flushing. This flag is cleared when entering [`Writing].
      
      This function may raise [Not_found] if nothing appropriate is found.
   *)

val select_inodes : buffer_system -> int64 list
  (** Returns the inodes that can be flushed, in the order of precedence.
   *)

val get_flush_min_eof : buffer_system -> int64 -> int64
val get_flush_min_mtime : buffer_system -> int64 -> Plasma_rpcapi_aux.time
  (** Return the values from the flush record (or [Not_found]) *)

val write_committed : buffer_system -> buffer -> unit
  (** Records that the write is committed: If the buffer is in
      [`Written false] state it is reset to [`Clean] (or reassigned for
      a different purpose). If it is in [`Written true] state it is set
      to [`Dirty].

      It is an error to call this function for a different state.
   *)

val write_cancelled : buffer_system -> buffer -> unit
  (** The write is cancelled. The buffer becomes dirty again *)

val write_erroneous : buffer_system -> int64 -> errno -> unit
  (** Records a write error. This should be called after
      [write_cancelled] with the error code.

      Any pending [request_buffer_e] for this inode is interrupted,
      so [None] is returned.
   *)

val release_flush_request : buffer_system -> flush_request -> unit
  (** Checks whether the buffers are in the right state after finishing
      or aborting a flush request. In particular, all buffers are set
      to [`Dirty] that are still in [`Written] state. Also, the
      [buf_flushing] flag is cleared.
   *)

val inode_error : buffer_system -> int64 -> errno option
  (** Whether there is a write error for this inode *)

val reset_inode_error : buffer_system -> int64 -> unit
  (** Resets the error *)

val n_waiting : buffer_system -> int
  (** The number of buffer requests that can be currently not satisfied *)

val dump_buffers : buffer_system -> unit
  (** Writes the buffer table to log (debug level) *)

val max_time : 
      Plasma_rpcapi_aux.time -> Plasma_rpcapi_aux.time -> Plasma_rpcapi_aux.time
  (** The max of two time structs *)

This web site is published by Informatikbüro Gerd Stolpmann
Powered by Caml