/* $Id: pfs_types.x 536 2011-12-11 22:59:10Z gerd $ -*- c -*- */Types for the RPC interfaces
 For users of the Plasma_client module: The types defined
    here are mapped to corresponding Ocaml types, and exported via
    the Plasma_rpcapi_aux module.
 Within the server, however, the mappings of Pfs_rpcapi_aux
    are used. (These mappings differ in some minor points from the ones
    used for the client.)
#ifndef PFS_TYPES_X #define PFS_TYPES_X
longstring
typedef string longstring<>;A string up to 4G length
longstrings
typedef longstring longstrings<>;An array of longstrings
longstring_opt
typedef longstring *longstring_opt;A longstring option
mediumstring
typedef string mediumstring<4096>;
mediumstrings
typedef mediumstring mediumstrings<4096>;
hypers
typedef hyper hypers<>;An array of hypers
trans_id
typedef hyper trans_id;transaction IDs can be used to run several transactions over the same TCP connection
ug
struct ug {
    mediumstring user;
    mediumstring group;
};
 Users and groups are given by name 
 time 
struct time {
    hyper tsecs;  /* Seconds since the epoch... */
    int   tnsecs; /* plus these nanoseconds */
};
 tsecs and tnsecs must be non-negative; tnsecs < 1E9. In 
    the filesystem procedure update_inodeinfo
    a negative tsecs is interpreted as "set the time to the current server
    time"
time_opt
typedef time *time_opt;an optional time struct
ftype_enum
enum ftype_enum {
    FTYPE_REGULAR = 0,
    FTYPE_DIRECTORY = 1,
    FTYPE_SYMLINK = 2
};
 File types 
 ftype
union ftype switch(ftype_enum d) {
  case FTYPE_REGULAR: 
    void;
  default:
    void;
};
 File types as union 
 ticketticket is handed out for blocks or block ranges, and permits
    read or write access on a datanode. The ticket is valid for a
    single datanode only, and only for the blocks range_start to
    range_start+range_length-1. The validity of the ticket is further
    restricted: It is revoked when the current transaction ends. Also,
    it is revoked when the time safetrans_tmo is reached.
    The safetrans_vfy is a cryptographically computed signature.
struct ticket {
    hyper      range_start;   /* First block */
    hyper      range_length;  /* number of blocks */
    hyper      safetrans_id;  /* safetrans ticket, first part */
    hyper      safetrans_tmo; /* safetrans ticket, timeout */
    hyper      safetrans_vfy; /* safetrans ticket, second part */
    bool       read_perm;     /* whether read permission is granted */
    bool       write_perm;    /* whether write permission is granted */
};
 safetrans_id identifies the datanode transaction. safetrans_tmo
    is the point in time when the access times out. After that
    the data nodes will not accept access to the blocks any longer.
    The verifier safetrans_vfy is a hash value built from the
    other information, and is used by the data
    node to check that only accessible blocks are written:
safetrans_vfy=extract_64_bits(MD5(safetrans_id ^ "/" ^ 
        safetrans_secret ^ "/" ^ range_start ^ "/" ^ range_length ^ "/" ^
	read_perm ^ "/" ^ write_perm))
     
     (Numbers converted to string via Int64.to_string, and booleans via
     string_of_bool.)
Usually, the safetrans feature is only used for securing block writes. The protocol would also allow it to use it for reads, though, and compatible clients should assume this.
blockinfoblockinfo says where the n-th block of a file is stored on a datanode.
    The number n is called the block index (starting at 0). 
    The datanode location is given by the identity of the datanode,
    and the block number of the datanode. Block numbers count from 0
    to s-1 when s is the number of blocks a datanode stores.
    In order to get some compression, adjacent blocks can share the
    same blockinfo. In this case, the index and block number in
    blockinfo refer to the first block of a range, and the length
    field denotes how long this range is. This method of compression is
    only used when all the other fields of the blocks of the range are
    identical.
    In blockinfo there is also the information to which machine the
    identity of the datanode is assigned, and whether the machine is
    alive. This is purely informational, and is intended to ease the
    implementation of clients.
Checksums are not yet implemented.
    The sequence number of the inode is increased whenever new data
    is written. It is also included in blockinfo to simplify the
    implementation of caches.
struct blockinfo {
    hyper      index;         /* block index */
    mediumstring node;          /* datanode server as "host:port" ("" if not known) */
    mediumstring identity;      /* datanode server as identity string */
    hyper      block;         /* block number on this node */
    hyper      length;        /* for how many blocks this info is valid */
    bool       node_alive;    /* informational: whether the node is alive */
    mediumstring *checksum;     /* optional checksum */
    hyper      inode_seqno;   /* current seqno of the inode */
    bool       inode_committed; /* whether [inode_seqno] is a committed version */
    ticket     ticket;        /* the access ticket */
};
 blocklistblockinfo structs for a block index say that the
       datanode is down (a broken file)inode_seqno and inode_committed fields have all the same
    values. This is not broken down per block (it would be possible
    that these values "remember" the sequence number when the block
    was first committed, resulting in finer granularity of the
    information.)
typedef blockinfo blocklist<>;
inodeinfoinodeinfo is what is stored for an inode. Documentation is
    inline below. Note that inodeinfo structs may be passed from
    the server to the client, and from the client to the server.
    In the latter case, the client may not know all fields, or
    may use special values in fields.
struct inodeinfo {
    ftype filetype;
 
    ug    usergroup;
 user and group can be passed in as empty
	strings to set the owner to the identity of the client. The empty
	strings are then replaced with the real user and group. When reading
	inodeinfo such strings can never be returned.
    int   mode;
 
    hyper eof;
 eof value is seen as a convention only. The server never
       automatically changes it when blocks are allocated or freed.
       This means eof can be set to a position before the last
       block or after the last block. It is just the interpretation
       of the user to use this number as eof position.
       Conventionally, eof is only meaningful for regular files.
    time  mtime;
    time  ctime;
 mtime of
        the directory. See the documentation for time how clients
	can request that the server fills in its own current time.
    int   replication;   
 Replication is only meaningful for regular files.
    hyper blocklimit;
 blocklimit on are not allocated. This
        field cannot be set by clients - the field value is ignored.
        Note that this is totally unrelated to eof which can be set to 
        any value independent on how many blocks are allocated.
        Also, there may be holes in the file before blocklimit.
    
    mediumstring field1;
 
    hyper seqno;
 seqno
	values are only valid within the transaction. 
	It is generally possible that seqno is set to values that were
	already generated for previous aborted transactions.
	The seqno makes it possible to
        easily check for any file modification. This field is guaranteed 
	to change for every data or metadata modification.
    bool committed;
 inodeinfo struct
	is committed data. If false, the struct has been modified by the
	transaction. This flag gives valuable information for deciding
	whether the struct can be cached or not. This field is automatically
	maintained and cannot be set directly.
    hyper create_verifier;
 
    bool anonymous;
 
};
entry
struct entry {
    mediumstring entry_name;    /* basename of a file in a directory */
    hyper        entry_inode;   /* inode of this file */
};
 entries
typedef entry entries<>;
fsstat
struct fsstat {
    hyper    total_blocks;
    hyper    used_blocks;
    hyper    trans_blocks;
 
    int      enabled_datanodes;
    int      alive_datanodes;
    mediumstrings dead_datanodes;
};
 errno_code
enum errno_code {
    OK = 0,
    ENOTRANS = 1,         /* no transaction */
    EFAILEDCOMMIT = 2,    /* general commit error */
    ELONGTRANS = 3,       /* transaction too long */
    EFAILED = 4,          /* general error */
    EPERM = 5,            /* not owner, or op is otherwise not permitted */
    ENOENT = 6,           /* No such file or directory */
    EACCESS = 7,          /* Permission denied */
    EEXIST = 8,           /* File exists */
    EFHIER = 9,           /* File hierarchy violation (e.g. move a directory into its own subdirectory) */
    EINVAL = 10,          /* invalid argument */
    EFBIG = 11,           /* file too big */
    ENOSPC = 12,          /* no space left */
    EROFS = 13,           /* read-only filesystem */
    ENAMETOOLONG = 14,    /* filename too long */
    ECONFLICT = 15,       /* update conflicts with another transaction */
    ECOORD = 16,          /* this is not the coordinator */
    ENONODE = 17,         /* unknown node */
    ETBUSY = 18,          /* transaction is busy (last command not finished) */
    ESTALE = 19,          /* no such inode */
    EIO = 20,             /* datanode error, not enough datanodes */
    ELOOP = 21,           /* looping symlinks */
    ENOTDIR = 22,         /* operation can only be done for directory */
    EISDIR = 23,          /* operation can only be done for non-directory */
    ENOTEMPTY = 24,       /* directory is non-empty but need to be */
    EBADPATH = 25         /* a path component is not a directory (POSIX sees this also as ENOTDIR) */
};
 Filesystem
    RPC's. These results are always unions of the possible error codes
    with the special value OK. For OK, a value of some type is
    returned as result value, and this type is the second parameter.
#define MK_RESULT_TYPE(name,type)        \
  union name switch(errno_code d) {      \
    case OK:                             \
      type;                              \
    default:                             \
      void;                              \
  }
 Creates the types:rvoidrinodeinforblocklistrfsstatrintrhyperrhypersrlongstringrlongstringsrentriesMK_RESULT_TYPE(rvoid,void); MK_RESULT_TYPE(rinodeinfo,inodeinfo t); MK_RESULT_TYPE(rblocklist,blocklist t); MK_RESULT_TYPE(rfsstat,fsstat t); MK_RESULT_TYPE(rint,int t); MK_RESULT_TYPE(rhyper,hyper t); MK_RESULT_TYPE(rhypers,hypers t); MK_RESULT_TYPE(rlongstring,longstring t); MK_RESULT_TYPE(rlongstrings,longstrings t); MK_RESULT_TYPE(rentries,entries t);
ds_info
struct ds_info {
    int        ds_id;
    mediumstring ds_identity;
    hyper      ds_size;
    bool       ds_enabled;
    mediumstring *ds_node;
    bool       ds_alive;
};
 The ds_info struct is the wire representation of 
    Nn_datastores.datastore
This is only used for internal purposes!
typedef ds_info ds_info_list<>;
dn_info
struct dn_info {
    mediumstring dn_identity;
    hyper        dn_size;
    mediumstring dn_node;
};
 The externally visible information about datanodes. Such structs only
    describe live datanodes.
typedef dn_info dn_info_list<>;
params
struct param {
    mediumstring name;
    mediumstring value;
};
typedef param params<>;
/* Revision numbers have the format:
   YYYYMMDDHHMMSSUUUUUU:<random hex digits>
   It is meaningful to sort revision numbers.
*/
#ifdef SERVER_CONTEXT
 readdata in server context
typedef string readdata<>;
writedata in server context
typedef _managed string writedata<>;(A managed string is represented differently in Ocamlnet's language mapping layer.)
announcement in server context
struct announcement {
    mediumstring     ann_clustername;
    /* clustername */
    mediumstring     ann_sender;
    /* sender host:port */
    mediumstrings    ann_eligible;
    /* list of hosts that are eligible (host:port syntax) */
    mediumstring     ann_revision;
    /* the revision number of the sender */
    mediumstring     ann_rank;
    /* configured rank */
    hyper            ann_random[2];
    /* random numbers for self-identification */
};
enum ann_enum {
    ANN_REJECT = 0,
    ANN_ACCEPT = 1,
    ANN_SELF = 2
};
union ann_result switch(ann_enum d) {
case ANN_REJECT:
    void;
default:
    void;
};
 For Datanode_ctrl.safetrans: 
struct enable_ticket {
    hyper st_id;
    hyper st_tmo;
    hyper st_secret;
};
typedef enable_ticket enable_tickets<>;
#else
 readdata in client context
typedef _managed string readdata<>;
writedata in client context
typedef _managed string writedata<>; #endif
dn_channel_enum More methods may be defined in the future.
enum dn_channel_enum {
    DNCH_RPC = 0,   /* the data is embedded into the RPC channel */
    DNCH_SHM = 1    /* the data is exchanged via a POSIX shm object */
};
 dn_channel_shm_obj 
struct dn_channel_shm_obj {
    mediumstring shm_path;     /* must be a path for POSIX shm */
    hyper        shm_offset;   /* the offset to the start in the file. */
    int          shm_length;   /* the length of the object */
};
 dn_channel_rd_req 
union dn_channel_rd_req switch (dn_channel_enum d) {
case DNCH_RPC:
    void;
case DNCH_SHM:
    dn_channel_shm_obj ch;
};
 dn_channel_rd_data 
union dn_channel_rd_data switch (dn_channel_enum d) {
case DNCH_RPC:
    readdata data;
case DNCH_SHM:
    void;
};
 dn_channel_wr_data 
union dn_channel_wr_data switch (dn_channel_enum d) {
case DNCH_RPC:
    writedata data;
case DNCH_SHM:
    dn_channel_shm_obj ch;
};
#endif
