Plasma GitLab Archive
Projects Blog Knowledge

(*
  Copyright 2010 Gerd Stolpmann

  This file is part of Plasma, a distributed filesystem and a
  map/reduce computation framework. Unless you have a written license
  agreement with the copyright holder (Gerd Stolpmann), the following
  terms apply:

  Plasma is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.

  Plasma is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with Foobar.  If not, see <http://www.gnu.org/licenses/>.

*)
(* $Id: mapred_job_config.mli 283 2010-10-28 00:19:50Z gerd $ *)

(** Extract job configuration, and marshalling *)

type m_job_config


val extract_job_config : Netplex_types.config_file -> 
                         (string * string) list ->
                         string list ->
                           (Mapred_def.mapred_job_config * m_job_config)
  (** [let (jc, mjc) = extract_job_config cf args custom_params]:

      Extracts the job configuration from [cf]. The association list
      [args] may contain overrides (leftmost value is taken).

      Returns the configuration as object [jc], and in a marshallable
      representation [mjc].
   *)

val mapred_job_config : m_job_config -> Mapred_def.mapred_job_config
  (** Returns the config as object *)

val marshal : m_job_config -> string
val unmarshal : string -> m_job_config
  (** Marshal and unmarshal *)


(** The config file must look like (it can also contain unrelated entries):

    {[
       netplex {
         ...
         mapredjob {
           <name> = <value>;
           ...
         }
       }
    ]}

    The possible names are the method names of {!Mapred_def.mapred_job_config}.
    The values should have the right type.

    Example:

    {[
       netplex {
         mapredjob {
            name = "my_job";
            input_dir = "/input";
            output_dir = "/output";
            work_dir = "/work";
            log_dir = "/log";
            bigblock_size = 65536;
            map_tasks = 100;
            merge_limit = 4;
            split_limit = 4;
            partitions = 20;
         }
       }
    ]}

    Some settings have default values:
     - [name] is set to an automatically generated name
     - [bigblock_size] is 16M
     - [map_tasks] is 0 (meaning a good value is computed at runtime)
     - [merge_limit] and [split_limit] are 4
 *)

This web site is published by Informatikbüro Gerd Stolpmann
Powered by Caml