Plasma GitLab Archive
Projects Blog Knowledge

/* $Id: pfs_nn_internal.x 271 2010-10-20 00:09:51Z gerd $ -*- c -*- */

/** Internal interfaces used by the namenodes
 */

#include "pfs_types.x"

#ifndef PFS_NN_INTERNAL_X
#define PFS_NN_INTERNAL_X

/** {1:elect [Elect]} */

/** The election happens at cluster startup. The goal is to determine
    the coordinator. Participants are all namenodes.
*/

program Elect {
    version V1 {

	/** {2 [null] } */

	void null(void) = 0;

	/** {2 [announce] } */

	bool announce(announcement) = 1;
	/** At cluster start the namenodes start calling the
           [announce] RPC of all other namenodes - until they get a
           reply from each, or until the end of the startup period is
           reached.

           If received within the startup period, the response is [true]
           if the announcement is better than the server to which it is sent.
           If received after startup, the response is [false], and the
           sender must not start up.

	   As all namenodes call [announce] of all other namenodes, the
	   question is whether there is a winner. If we assume there
	   is a total ordering between the [announcement]s, there is
	   a best announcement if no two namenodes emit equal announcements.
	   So given the announcements are all distinct, there is a winner.
	*/

	/** {2 [set_coordinator] } */

	void set_coordinator(longstring, longstring, longstring) = 2;
	/** When the end of the startup period is reached, one of the name
	   nodes sends [set_coordinator] to all other nodes, and becomes the
           coordinator. The coordinator must be eligible by all other nodes
           that actually respond. Also, the coordinator must have a highest
           revision number, and among all nodes with the highest revision
           number, the coordinator has the lowest rank.

           The first arg is the "host:port" name of the coordinator.
  
           The second arg is the clustername.

           The third arg is the revision identifier.
	*/

	/** There is right now no provision for the case that the coordinator
           crashes - no other node is then automatically elected. Best is
           to restart everything then.
	*/
	
    } = 1;
} = 0x8000f001;

/** {1:nameslave [Nameslave]} */

/** This RPC program is activated on the non-coordinator namenodes. It
    is called by the coordinator to push updates of the database.
*/

program Nameslave {
    version V1 {
	/* This is what the non-coordinators implement */

	/** {2 [null] } */

	void null(void) = 0;

	/** {2 [begin_transaction] } */

	void begin_transaction(longstring, longstring) = 1;
	/** Begin a transaction: clustername, expected_rev.
	   The 2nd arg is the expected revision string
	*/

	/** {2 [prepare_commit] } */

	bool prepare_commit(void) = 2;
	/** Result is true if the name database could be updated.
	 */

	/** {2 [commit] } */

	void commit(void) = 3;
	/** The response of [commit] is the ACK in the extended 2-phase
           commit protocol
	*/

	/* void abort(void) = 4; */

	/** Note that the names of the following RPCs correspond to
	    function names in {!Nn_db}:
	*/

	/** {2 [push_inode_ins] } */

	void push_inode_ins(hyper, inodeinfo) = 7;
	/** [push_inode_ins(inode, ii)] */

	/** {2 [push_inode_upd] } */

	void push_inode_upd(hyper, inodeinfo) = 8;
	/** [push_inode_upd(inode, ii)] */

	/** {2 [push_inode_upd_time] } */

	void push_inode_upd_time(hyper, time_opt, time_opt) = 18;
	/** [push_inode_upd_time(inode, mtime, ctime)] */

	/** {2 [push_inode_del] } */

	void push_inode_del(hyper) = 9;
	/** [push_inode_del(inode)] */

	/** {2 [push_blockalloc_upd] } */

	void push_blockalloc_upd(int, hyper, longstring) = 10;
	/** [push_blockalloc_upd(datastore,blkidx,blkmap)] */
      
	/** {2 [push_datastore_upd] } */

	void push_datastore_upd(int, longstring, hyper, bool) = 11;
	/** [push_upd_datastore(id,identity,size,enabled)]: Updates the
           datastore table. If the record is new, it is added.

           The blockalloc table is updated, too: For new stores, the
           rows are added. If the size of the existing store is increased,
           further rows are added.

           It is an error to decrease the size.
	*/

	/** {2 [push_datastore_del] } */

	void push_datastore_del(int) = 12;
	/** Deletes the datastore with this ID and all rows referencing it */

	/** {2 [push_revision_upd] } */

	void push_revision_upd(longstring) = 13;
	/** Sets the revision id in the db */

	/** {2 [push_inodeblocks_ins] } */

	void push_inodeblocks_ins(hyper, blocklist) = 14;
	/** [push_inodeblocks_ins(inode, bl)] */

	/** {2 [push_inodeblocks_del] } */

	void push_inodeblocks_del(hyper, hyper, hyper) = 15;
	/** [push_inodeblocks_del(inode, blkidx, len)] */

	/** {2 [push_names_ins] } */

	void push_names_ins(hyper, longstring, hyper) = 16;
	/** [push_names_ins(dir_inode, path, inode)] */

	/** {2 [push_names_del] } */

	void push_names_del(hyper, longstring) = 17;
	/** [push_names_del(dir_inode,path)] */

    } = 1;
} = 0x8000f002;


/** {1:monitor [Monitor]} */

program Monitor {
    version V1 {
	/** {2 [null] } */

	void null(void) = 0;

	/** {2 [start] } */

	void start(void) = 1;
	/** Starts the monitor: First, the state is loaded from the db.
           Second, all known datanodes are discovered and enabled.
           Third, the newsfeed for monitoring results is started.
           Fourth, the Dn_admin interface is enabled.
	*/

    } = 1;
} = 0x8000f003;


/** {1 Inodecache} */

/** {2:request_notifications [Request_notifications]} */

/** The inodecache calls the program [Request_notifications] which is
    available in the coordinator. Once something is to report, the
    coordinator calls the inodecache back. The callback is defined by
    the program [Notifications].
*/

program Request_notifications {
    /* Request_notifications is available on the coordinator */

    version V1 {
	/** {2 [null] } */

	void null(void) = 0;

	/** {2 [on_inode_update] } */

	bool on_inode_update(hyper, hyper, longstring) = 1;
	/** [on_inode_update(inode, exptime, socket)]: requests to be notified
	   when the [inode] changes or is deleted. Changes cover metadata and
	   data changes. [exptime] is the point in time when the notification
	   will expire. The [socket] is either an Internet socket in
	   "host:port" syntax or the path of a Unix Domain socket. The
	   notification will be that the RPC call
	   [Notifications.V1.inode_update] is invoked.

	   [on_inode_update] returns [true] when the request is successful.
	*/
	
    } = 1;
} = 0x8000f004;

/** {2:notifications [Notifications]} */

program Notifications {
    version V1 {

	/** {2 [null] } */

	void null(void) = 0;

	/** {2 [inode_update] } */

	void inode_update(hyper, bool) = 1;
	/** [inode_update(inode, expires)]: If [expires] is true, this call
	   just indicates that the notification request ends. If it is
	   false, the [inode] has been changed or is deleted.

	   It is allowed that this RPC is called more often than necessary.

	   Implementations of this call must be fast! If a transaction
	   changes an inode, the commit cannot be finished before this call
	   is responded.
	*/

    } = 1;
} = 0x8000f005;

#endif

This web site is published by Informatikbüro Gerd Stolpmann
Powered by Caml