/* $Id: pfs_nn_internal.x 271 2010-10-20 00:09:51Z gerd $ -*- c -*- */
/** Internal interfaces used by the namenodes
*/
#include "pfs_types.x"
#ifndef PFS_NN_INTERNAL_X
#define PFS_NN_INTERNAL_X
/** {1:elect [Elect]} */
/** The election happens at cluster startup. The goal is to determine
the coordinator. Participants are all namenodes.
*/
program Elect {
version V1 {
/** {2 [null] } */
void null(void) = 0;
/** {2 [announce] } */
bool announce(announcement) = 1;
/** At cluster start the namenodes start calling the
[announce] RPC of all other namenodes - until they get a
reply from each, or until the end of the startup period is
reached.
If received within the startup period, the response is [true]
if the announcement is better than the server to which it is sent.
If received after startup, the response is [false], and the
sender must not start up.
As all namenodes call [announce] of all other namenodes, the
question is whether there is a winner. If we assume there
is a total ordering between the [announcement]s, there is
a best announcement if no two namenodes emit equal announcements.
So given the announcements are all distinct, there is a winner.
*/
/** {2 [set_coordinator] } */
void set_coordinator(longstring, longstring, longstring) = 2;
/** When the end of the startup period is reached, one of the name
nodes sends [set_coordinator] to all other nodes, and becomes the
coordinator. The coordinator must be eligible by all other nodes
that actually respond. Also, the coordinator must have a highest
revision number, and among all nodes with the highest revision
number, the coordinator has the lowest rank.
The first arg is the "host:port" name of the coordinator.
The second arg is the clustername.
The third arg is the revision identifier.
*/
/** There is right now no provision for the case that the coordinator
crashes - no other node is then automatically elected. Best is
to restart everything then.
*/
} = 1;
} = 0x8000f001;
/** {1:nameslave [Nameslave]} */
/** This RPC program is activated on the non-coordinator namenodes. It
is called by the coordinator to push updates of the database.
*/
program Nameslave {
version V1 {
/* This is what the non-coordinators implement */
/** {2 [null] } */
void null(void) = 0;
/** {2 [begin_transaction] } */
void begin_transaction(longstring, longstring) = 1;
/** Begin a transaction: clustername, expected_rev.
The 2nd arg is the expected revision string
*/
/** {2 [prepare_commit] } */
bool prepare_commit(void) = 2;
/** Result is true if the name database could be updated.
*/
/** {2 [commit] } */
void commit(void) = 3;
/** The response of [commit] is the ACK in the extended 2-phase
commit protocol
*/
/* void abort(void) = 4; */
/** Note that the names of the following RPCs correspond to
function names in {!Nn_db}:
*/
/** {2 [push_inode_ins] } */
void push_inode_ins(hyper, inodeinfo) = 7;
/** [push_inode_ins(inode, ii)] */
/** {2 [push_inode_upd] } */
void push_inode_upd(hyper, inodeinfo) = 8;
/** [push_inode_upd(inode, ii)] */
/** {2 [push_inode_upd_time] } */
void push_inode_upd_time(hyper, time_opt, time_opt) = 18;
/** [push_inode_upd_time(inode, mtime, ctime)] */
/** {2 [push_inode_del] } */
void push_inode_del(hyper) = 9;
/** [push_inode_del(inode)] */
/** {2 [push_blockalloc_upd] } */
void push_blockalloc_upd(int, hyper, longstring) = 10;
/** [push_blockalloc_upd(datastore,blkidx,blkmap)] */
/** {2 [push_datastore_upd] } */
void push_datastore_upd(int, longstring, hyper, bool) = 11;
/** [push_upd_datastore(id,identity,size,enabled)]: Updates the
datastore table. If the record is new, it is added.
The blockalloc table is updated, too: For new stores, the
rows are added. If the size of the existing store is increased,
further rows are added.
It is an error to decrease the size.
*/
/** {2 [push_datastore_del] } */
void push_datastore_del(int) = 12;
/** Deletes the datastore with this ID and all rows referencing it */
/** {2 [push_revision_upd] } */
void push_revision_upd(longstring) = 13;
/** Sets the revision id in the db */
/** {2 [push_inodeblocks_ins] } */
void push_inodeblocks_ins(hyper, blocklist) = 14;
/** [push_inodeblocks_ins(inode, bl)] */
/** {2 [push_inodeblocks_del] } */
void push_inodeblocks_del(hyper, hyper, hyper) = 15;
/** [push_inodeblocks_del(inode, blkidx, len)] */
/** {2 [push_names_ins] } */
void push_names_ins(hyper, longstring, hyper) = 16;
/** [push_names_ins(dir_inode, path, inode)] */
/** {2 [push_names_del] } */
void push_names_del(hyper, longstring) = 17;
/** [push_names_del(dir_inode,path)] */
} = 1;
} = 0x8000f002;
/** {1:monitor [Monitor]} */
program Monitor {
version V1 {
/** {2 [null] } */
void null(void) = 0;
/** {2 [start] } */
void start(void) = 1;
/** Starts the monitor: First, the state is loaded from the db.
Second, all known datanodes are discovered and enabled.
Third, the newsfeed for monitoring results is started.
Fourth, the Dn_admin interface is enabled.
*/
} = 1;
} = 0x8000f003;
/** {1 Inodecache} */
/** {2:request_notifications [Request_notifications]} */
/** The inodecache calls the program [Request_notifications] which is
available in the coordinator. Once something is to report, the
coordinator calls the inodecache back. The callback is defined by
the program [Notifications].
*/
program Request_notifications {
/* Request_notifications is available on the coordinator */
version V1 {
/** {2 [null] } */
void null(void) = 0;
/** {2 [on_inode_update] } */
bool on_inode_update(hyper, hyper, longstring) = 1;
/** [on_inode_update(inode, exptime, socket)]: requests to be notified
when the [inode] changes or is deleted. Changes cover metadata and
data changes. [exptime] is the point in time when the notification
will expire. The [socket] is either an Internet socket in
"host:port" syntax or the path of a Unix Domain socket. The
notification will be that the RPC call
[Notifications.V1.inode_update] is invoked.
[on_inode_update] returns [true] when the request is successful.
*/
} = 1;
} = 0x8000f004;
/** {2:notifications [Notifications]} */
program Notifications {
version V1 {
/** {2 [null] } */
void null(void) = 0;
/** {2 [inode_update] } */
void inode_update(hyper, bool) = 1;
/** [inode_update(inode, expires)]: If [expires] is true, this call
just indicates that the notification request ends. If it is
false, the [inode] has been changed or is deleted.
It is allowed that this RPC is called more often than necessary.
Implementations of this call must be fast! If a transaction
changes an inode, the commit cannot be finished before this call
is responded.
*/
} = 1;
} = 0x8000f005;
#endif