/* $Id: pfs_nn_internal.x 235 2010-06-20 22:23:05Z gerd $ -*- c -*- */Internal interfaces used by the namenodes
#include "pfs_types.x" #ifndef PFS_NN_INTERNAL_X #define PFS_NN_INTERNAL_X
Elect
program Elect {
    version V1 {
 
null 
void null(void) = 0;
announce 
bool announce(announcement) = 1;At cluster start the namenodes start calling the
announce RPC of all other namenodes - until they get a
           reply from each, or until the end of the startup period is
           reached.
           If received within the startup period, the response is true
           if the announcement is better than the server to which it is sent.
           If received after startup, the response is false, and the
           sender must not start up.
	   As all namenodes call announce of all other namenodes, the
	   question is whether there is a winner. If we assume there
	   is a total ordering between the announcements, there is
	   a best announcement if no two namenodes emit equal announcements.
	   So given the announcements are all distinct, there is a winner.
set_coordinator 
void set_coordinator(longstring, longstring, longstring) = 2;When the end of the startup period is reached, one of the name nodes sends
set_coordinator to all other nodes, and becomes the
           coordinator. The coordinator must be eligible by all other nodes
           that actually respond. Also, the coordinator must have a highest
           revision number, and among all nodes with the highest revision
           number, the coordinator has the lowest rank.
The first arg is the "host:port" name of the coordinator.
The second arg is the clustername.
The third arg is the revision identifier.
There is right now no provision for the case that the coordinator crashes - no other node is then automatically elected. Best is to restart everything then.
	
    } = 1;
} = 0x8000f001;
 
Nameslave
program Nameslave {
    version V1 {
	/* This is what the non-coordinators implement */
 
null 
void null(void) = 0;
begin_transaction 
void begin_transaction(longstring, longstring) = 1;Begin a transaction: clustername, expected_rev. The 2nd arg is the expected revision string
prepare_commit 
bool prepare_commit(void) = 2;Result is true if the name database could be updated.
commit 
void commit(void) = 3;The response of
commit is the ACK in the extended 2-phase
           commit protocol
/* void abort(void) = 4; */Note that the names of the following RPCs correspond to function names in
Nn_db:
push_inode_ins 
void push_inode_ins(hyper, inodeinfo) = 7;
push_inode_ins(inode, ii) 
 
push_inode_upd 
void push_inode_upd(hyper, inodeinfo) = 8;
push_inode_upd(inode, ii) 
 
push_inode_upd_time 
void push_inode_upd_time(hyper, time_opt, time_opt) = 18;
push_inode_upd_time(inode, mtime, ctime) 
 
push_inode_del 
void push_inode_del(hyper) = 9;
push_inode_del(inode) 
 
push_blockalloc_upd 
void push_blockalloc_upd(int, hyper, longstring) = 10;
push_blockalloc_upd(datastore,blkidx,blkmap) 
 
push_datastore_upd 
void push_datastore_upd(int, longstring, hyper, bool) = 11;
push_upd_datastore(id,identity,size,enabled): Updates the
           datastore table. If the record is new, it is added.
The blockalloc table is updated, too: For new stores, the rows are added. If the size of the existing store is increased, further rows are added.
It is an error to decrease the size.
push_datastore_del 
void push_datastore_del(int) = 12;Deletes the datastore with this ID and all rows referencing it
push_revision_upd 
void push_revision_upd(longstring) = 13;Sets the revision id in the db
push_inodeblocks_ins 
void push_inodeblocks_ins(hyper, blocklist) = 14;
push_inodeblocks_ins(inode, bl) 
 
push_inodeblocks_del 
void push_inodeblocks_del(hyper, hyper, hyper) = 15;
push_inodeblocks_del(inode, blkidx, len) 
 
push_names_ins 
void push_names_ins(longstring, hyper) = 16;
push_names_ins(path, inode) 
 
push_names_del 
void push_names_del(longstring) = 17;
push_names_del(path) 
    } = 1;
} = 0x8000f002;
 
Monitor
program Monitor {
    version V1 {
 
null 
void null(void) = 0;
start 
void start(void) = 1;Starts the monitor: First, the state is loaded from the db. Second, all known datanodes are discovered and enabled. Third, the newsfeed for monitoring results is started. Fourth, the Dn_admin interface is enabled.
    } = 1;
} = 0x8000f003;
 
Request_notificationsRequest_notifications which is
    available in the coordinator. Once something is to report, the
    coordinator calls the inodecache back. The callback is defined by
    the program Notifications.
program Request_notifications {
    /* Request_notifications is available on the coordinator */
    version V1 {
 
null 
void null(void) = 0;
on_inode_update 
bool on_inode_update(hyper, hyper, longstring) = 1;
on_inode_update(inode, exptime, socket): requests to be notified
	   when the inode changes or is deleted. Changes cover metadata and
	   data changes. exptime is the point in time when the notification
	   will expire. The socket is either an Internet socket in
	   "host:port" syntax or the path of a Unix Domain socket. The
	   notification will be that the RPC call
	   Notifications.V1.inode_update is invoked.
	   on_inode_update returns true when the request is successful.
	
    } = 1;
} = 0x8000f004;
 
Notifications
program Notifications {
    version V1 {
 
null 
void null(void) = 0;
inode_update 
void inode_update(hyper, bool) = 1;
inode_update(inode, expires): If expires is true, this call
	   just indicates that the notification request ends. If it is
	   false, the inode has been changed or is deleted.
It is allowed that this RPC is called more often than necessary.
Implementations of this call must be fast! If a transaction changes an inode, the commit cannot be finished before this call is responded.
    } = 1;
} = 0x8000f005;
#endif