Plasma GitLab Archive
Projects Blog Knowledge

(*
  Copyright 2010 Gerd Stolpmann

  This file is part of Plasma, a distributed filesystem and a
  map/reduce computation framework. Unless you have a written license
  agreement with the copyright holder (Gerd Stolpmann), the following
  terms apply:

  Plasma is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.

  Plasma is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with Foobar.  If not, see <http://www.gnu.org/licenses/>.

*)
(* $Id: mapred_config.mli 436 2011-10-09 12:38:07Z gerd $ *)

(** Configuration of mapred servers *)

class type mapred_config =
object
  (** General *)

  method nn_clustername : string
    (** The clustername *)
  method nn_nodes : string list
    (** The name nodes in "host:port" syntax *)
  method mr_task_nodes : string list
    (** Task nodes (only hostname) *)
  method mr_task_port : int
    (** The port number *)
  method mr_task_tmpdir : string
    (** A directory where to put executables, logs, etc. *)

  (** Resource parameters *)

  (** There are two ways for limiting the resource consumption:
      - by setting parameters to absolute numbers
      - by setting parameters relatively to an automaticlly determined
        maximum

      The first method has always precedence. The second method is nicer
      because it also works well when the cluster is not homogeneous,
      and the systems differ in the amount of RAM and cores. However,
      getting the available resources is very OS-dependent, and there are
      only routines for a handfull of operating systems. Linux, BSD, and
      Solaris should work here.

      Note that the maximum for shared memory is assumed to be 1/8 of
      physical RAM (independent of real OS settings - this is really
      hard to find out).
   *)

  method mr_task_load_limit : float
    (** Load limit per task server (in number of tasks). Should be set to
	a small multiple of the number of cores of the biggest machine.
	This is a required parameter.

	This is now only used for planning the job execution, especially
	how fine-grained the job needs to be split into tasks.
	At runtime, the number of cores is determined dynamically to
	drive the execution.
     *)
  method mr_shm_low : int64 option
    (** Low watermark for shared memory. If shm consumption drops below this
	value shm is no longer considered as scarce resource. Default: [None]
     *)
  method mr_shm_low_factor : float
    (** Alternate way for setting the low watermark as fraction of
	available shared memory. This should be a number between 0 and 1.0.
	The factor is only considered if [mr_shm_low = None].
	Default: 0.25
     *)
  method mr_shm_high : int64 option
    (** High watermark for shared memory. If shm consumption is above this
	value shm is considered as scarce resource. Default: [None]
     *)
  method mr_shm_high_factor : float
    (** Alternate way for setting the high watermark as fraction of
	available shared memory. This should be a number between 0 and 1.0.
	The factor is only considered if [mr_shm_high = None].
	Default: 0.5
     *)
  method mr_shm_max : int64 option
    (** Maximum for shared memory. If this amount of shm consumption is
	reached, shm is considered as non-available. Default: [None]
     *)
  method mr_shm_max_factor : float
    (** Alternate way for setting the maximum as fraction of
	available shared memory. This should be a number between 0 and 1.0.
	The factor is only considered if [mr_shm_max = None].
	Default: 0.75
     *)
  method mr_buf_low : int64 option
    (** Low watermark for buffer memory. If bufmem consumption drops below this
	value bufmem is no longer considered as scarce resource. Default: [None]
     *)
  method mr_buf_low_factor : float
    (** Alternate way for setting the low watermark as fraction of
	available physical RAM. This should be a number between 0 and 1.0.
	The factor is only considered if [mr_buf_low = None].
	Default: 0.25
     *)
  method mr_buf_high : int64 option
    (** High watermark for buffer memory. If bufmem consumption is above this
	value bufmem is considered as scarce resource. Default: [None]
     *)
  method mr_buf_high_factor : float
    (** Alternate way for setting the high watermark as fraction of
	available physical RAM. This should be a number between 0 and 1.0.
	The factor is only considered if [mr_buf_high = None].
	Default: 0.5
     *)
  method mr_buf_max : int64 option
    (** Maximum for buffer memory. If this amount of bufmem consumption is
	reached, bufmem is considered as non-available. Default: [None]
     *)
  method mr_buf_max_factor : float
    (** Alternate way for setting the maximum as fraction of
	available physical RAM. This should be a number between 0 and 1.0.
	The factor is only considered if [mr_buf_max = None].
	Default: 0.75
     *)

  (** Buffer parameters *)

  method mr_buffer_size : int
    (** The normal size of I/O buffers. E.g. 64M. The [`Map], [`Emap]
	and [`Sort] tasks use file buffers of this size for each file
	they read from or write to. The [`Shuffle] tasks divide this size
	by the number of files they read or write in parallel.
     *)
  method mr_buffer_size_tight : int
    (** The size of I/O buffers when RAM is tight. E.g. 16M *)
  method mr_sort_size : int
    (** The size of the buffers for sorting. E.g. 128M *)
end


val extract_config : Netplex_types.config_file -> mapred_config
  (** Config section must look like:

      {[
         netplex {
           ...
           namenodes {
             clustername = "cluster1";
             node { addr = "hostname:port" }
             node { addr = "hostname:port" }
             ...
           }
           mapred {
             node { addr = "hostname" }
             node { addr = "hostname" }
             ...
             port = 1234;
             tmpdir = "/somewhere";
             load_limit = 8.1;
           }
        }
      ]}
   *)


val executable_version : unit -> string
  (** Returns an MD5 sum of the running executable *)

This web site is published by Informatikbüro Gerd Stolpmann
Powered by Caml