/* $Id: pfs_nn_internal.x 271 2010-10-20 00:09:51Z gerd $ -*- c -*- */ /** Internal interfaces used by the namenodes */ #include "pfs_types.x" #ifndef PFS_NN_INTERNAL_X #define PFS_NN_INTERNAL_X /** {1:elect [Elect]} */ /** The election happens at cluster startup. The goal is to determine the coordinator. Participants are all namenodes. */ program Elect { version V1 { /** {2 [null] } */ void null(void) = 0; /** {2 [announce] } */ bool announce(announcement) = 1; /** At cluster start the namenodes start calling the [announce] RPC of all other namenodes - until they get a reply from each, or until the end of the startup period is reached. If received within the startup period, the response is [true] if the announcement is better than the server to which it is sent. If received after startup, the response is [false], and the sender must not start up. As all namenodes call [announce] of all other namenodes, the question is whether there is a winner. If we assume there is a total ordering between the [announcement]s, there is a best announcement if no two namenodes emit equal announcements. So given the announcements are all distinct, there is a winner. */ /** {2 [set_coordinator] } */ void set_coordinator(longstring, longstring, longstring) = 2; /** When the end of the startup period is reached, one of the name nodes sends [set_coordinator] to all other nodes, and becomes the coordinator. The coordinator must be eligible by all other nodes that actually respond. Also, the coordinator must have a highest revision number, and among all nodes with the highest revision number, the coordinator has the lowest rank. The first arg is the "host:port" name of the coordinator. The second arg is the clustername. The third arg is the revision identifier. */ /** There is right now no provision for the case that the coordinator crashes - no other node is then automatically elected. Best is to restart everything then. */ } = 1; } = 0x8000f001; /** {1:nameslave [Nameslave]} */ /** This RPC program is activated on the non-coordinator namenodes. It is called by the coordinator to push updates of the database. */ program Nameslave { version V1 { /* This is what the non-coordinators implement */ /** {2 [null] } */ void null(void) = 0; /** {2 [begin_transaction] } */ void begin_transaction(longstring, longstring) = 1; /** Begin a transaction: clustername, expected_rev. The 2nd arg is the expected revision string */ /** {2 [prepare_commit] } */ bool prepare_commit(void) = 2; /** Result is true if the name database could be updated. */ /** {2 [commit] } */ void commit(void) = 3; /** The response of [commit] is the ACK in the extended 2-phase commit protocol */ /* void abort(void) = 4; */ /** Note that the names of the following RPCs correspond to function names in {!Nn_db}: */ /** {2 [push_inode_ins] } */ void push_inode_ins(hyper, inodeinfo) = 7; /** [push_inode_ins(inode, ii)] */ /** {2 [push_inode_upd] } */ void push_inode_upd(hyper, inodeinfo) = 8; /** [push_inode_upd(inode, ii)] */ /** {2 [push_inode_upd_time] } */ void push_inode_upd_time(hyper, time_opt, time_opt) = 18; /** [push_inode_upd_time(inode, mtime, ctime)] */ /** {2 [push_inode_del] } */ void push_inode_del(hyper) = 9; /** [push_inode_del(inode)] */ /** {2 [push_blockalloc_upd] } */ void push_blockalloc_upd(int, hyper, longstring) = 10; /** [push_blockalloc_upd(datastore,blkidx,blkmap)] */ /** {2 [push_datastore_upd] } */ void push_datastore_upd(int, longstring, hyper, bool) = 11; /** [push_upd_datastore(id,identity,size,enabled)]: Updates the datastore table. If the record is new, it is added. The blockalloc table is updated, too: For new stores, the rows are added. If the size of the existing store is increased, further rows are added. It is an error to decrease the size. */ /** {2 [push_datastore_del] } */ void push_datastore_del(int) = 12; /** Deletes the datastore with this ID and all rows referencing it */ /** {2 [push_revision_upd] } */ void push_revision_upd(longstring) = 13; /** Sets the revision id in the db */ /** {2 [push_inodeblocks_ins] } */ void push_inodeblocks_ins(hyper, blocklist) = 14; /** [push_inodeblocks_ins(inode, bl)] */ /** {2 [push_inodeblocks_del] } */ void push_inodeblocks_del(hyper, hyper, hyper) = 15; /** [push_inodeblocks_del(inode, blkidx, len)] */ /** {2 [push_names_ins] } */ void push_names_ins(hyper, longstring, hyper) = 16; /** [push_names_ins(dir_inode, path, inode)] */ /** {2 [push_names_del] } */ void push_names_del(hyper, longstring) = 17; /** [push_names_del(dir_inode,path)] */ } = 1; } = 0x8000f002; /** {1:monitor [Monitor]} */ program Monitor { version V1 { /** {2 [null] } */ void null(void) = 0; /** {2 [start] } */ void start(void) = 1; /** Starts the monitor: First, the state is loaded from the db. Second, all known datanodes are discovered and enabled. Third, the newsfeed for monitoring results is started. Fourth, the Dn_admin interface is enabled. */ } = 1; } = 0x8000f003; /** {1 Inodecache} */ /** {2:request_notifications [Request_notifications]} */ /** The inodecache calls the program [Request_notifications] which is available in the coordinator. Once something is to report, the coordinator calls the inodecache back. The callback is defined by the program [Notifications]. */ program Request_notifications { /* Request_notifications is available on the coordinator */ version V1 { /** {2 [null] } */ void null(void) = 0; /** {2 [on_inode_update] } */ bool on_inode_update(hyper, hyper, longstring) = 1; /** [on_inode_update(inode, exptime, socket)]: requests to be notified when the [inode] changes or is deleted. Changes cover metadata and data changes. [exptime] is the point in time when the notification will expire. The [socket] is either an Internet socket in "host:port" syntax or the path of a Unix Domain socket. The notification will be that the RPC call [Notifications.V1.inode_update] is invoked. [on_inode_update] returns [true] when the request is successful. */ } = 1; } = 0x8000f004; /** {2:notifications [Notifications]} */ program Notifications { version V1 { /** {2 [null] } */ void null(void) = 0; /** {2 [inode_update] } */ void inode_update(hyper, bool) = 1; /** [inode_update(inode, expires)]: If [expires] is true, this call just indicates that the notification request ends. If it is false, the [inode] has been changed or is deleted. It is allowed that this RPC is called more often than necessary. Implementations of this call must be fast! If a transaction changes an inode, the commit cannot be finished before this call is responded. */ } = 1; } = 0x8000f005; #endif