(* Copyright 2010 Gerd Stolpmann This file is part of Plasma, a distributed filesystem and a map/reduce computation framework. Unless you have a written license agreement with the copyright holder (Gerd Stolpmann), the following terms apply: Plasma is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Plasma is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Foobar. If not, see <http://www.gnu.org/licenses/>. *) (* $Id: mapred_job_config.mli 283 2010-10-28 00:19:50Z gerd $ *) (** Extract job configuration, and marshalling *) type m_job_config val extract_job_config : Netplex_types.config_file -> (string * string) list -> string list -> (Mapred_def.mapred_job_config * m_job_config) (** [let (jc, mjc) = extract_job_config cf args custom_params]: Extracts the job configuration from [cf]. The association list [args] may contain overrides (leftmost value is taken). Returns the configuration as object [jc], and in a marshallable representation [mjc]. *) val mapred_job_config : m_job_config -> Mapred_def.mapred_job_config (** Returns the config as object *) val marshal : m_job_config -> string val unmarshal : string -> m_job_config (** Marshal and unmarshal *) (** The config file must look like (it can also contain unrelated entries): {[ netplex { ... mapredjob { <name> = <value>; ... } } ]} The possible names are the method names of {!Mapred_def.mapred_job_config}. The values should have the right type. Example: {[ netplex { mapredjob { name = "my_job"; input_dir = "/input"; output_dir = "/output"; work_dir = "/work"; log_dir = "/log"; bigblock_size = 65536; map_tasks = 100; merge_limit = 4; split_limit = 4; partitions = 20; } } ]} Some settings have default values: - [name] is set to an automatically generated name - [bigblock_size] is 16M - [map_tasks] is 0 (meaning a good value is computed at runtime) - [merge_limit] and [split_limit] are 4 *)