diff options
Diffstat (limited to 'fs/orangefs')
-rw-r--r-- | fs/orangefs/downcall.h | 138 | ||||
-rw-r--r-- | fs/orangefs/protocol.h | 681 | ||||
-rw-r--r-- | fs/orangefs/pvfs2-bufmap.h | 76 | ||||
-rw-r--r-- | fs/orangefs/pvfs2-debug.h | 290 | ||||
-rw-r--r-- | fs/orangefs/pvfs2-debugfs.h | 3 | ||||
-rw-r--r-- | fs/orangefs/pvfs2-dev-proto.h | 102 | ||||
-rw-r--r-- | fs/orangefs/pvfs2-kernel.h | 864 | ||||
-rw-r--r-- | fs/orangefs/pvfs2-sysfs.h | 2 | ||||
-rw-r--r-- | fs/orangefs/upcall.h | 255 |
9 files changed, 2411 insertions, 0 deletions
diff --git a/fs/orangefs/downcall.h b/fs/orangefs/downcall.h new file mode 100644 index 000000000000..a79129f875f3 --- /dev/null +++ b/fs/orangefs/downcall.h @@ -0,0 +1,138 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* + * Definitions of downcalls used in Linux kernel module. + */ + +#ifndef __DOWNCALL_H +#define __DOWNCALL_H + +/* + * Sanitized the device-client core interaction + * for clean 32-64 bit usage + */ +struct pvfs2_io_response { + __s64 amt_complete; +}; + +struct pvfs2_iox_response { + __s64 amt_complete; +}; + +struct pvfs2_lookup_response { + struct pvfs2_object_kref refn; +}; + +struct pvfs2_create_response { + struct pvfs2_object_kref refn; +}; + +struct pvfs2_symlink_response { + struct pvfs2_object_kref refn; +}; + +struct pvfs2_getattr_response { + struct PVFS_sys_attr_s attributes; + char link_target[PVFS2_NAME_LEN]; +}; + +struct pvfs2_mkdir_response { + struct pvfs2_object_kref refn; +}; + +/* + * duplication of some system interface structures so that I don't have + * to allocate extra memory + */ +struct pvfs2_dirent { + char *d_name; + int d_length; + struct pvfs2_khandle khandle; +}; + +struct pvfs2_statfs_response { + __s64 block_size; + __s64 blocks_total; + __s64 blocks_avail; + __s64 files_total; + __s64 files_avail; +}; + +struct pvfs2_fs_mount_response { + __s32 fs_id; + __s32 id; + struct pvfs2_khandle root_khandle; +}; + +/* the getxattr response is the attribute value */ +struct pvfs2_getxattr_response { + __s32 val_sz; + __s32 __pad1; + char val[PVFS_MAX_XATTR_VALUELEN]; +}; + +/* the listxattr response is an array of attribute names */ +struct pvfs2_listxattr_response { + __s32 returned_count; + __s32 __pad1; + __u64 token; + char key[PVFS_MAX_XATTR_LISTLEN * PVFS_MAX_XATTR_NAMELEN]; + __s32 keylen; + __s32 __pad2; + __s32 lengths[PVFS_MAX_XATTR_LISTLEN]; +}; + +struct pvfs2_param_response { + __s64 value; +}; + +#define PERF_COUNT_BUF_SIZE 4096 +struct pvfs2_perf_count_response { + char buffer[PERF_COUNT_BUF_SIZE]; +}; + +#define FS_KEY_BUF_SIZE 4096 +struct pvfs2_fs_key_response { + __s32 fs_keylen; + __s32 __pad1; + char fs_key[FS_KEY_BUF_SIZE]; +}; + +struct pvfs2_downcall_s { + __s32 type; + __s32 status; + /* currently trailer is used only by readdir */ + __s64 trailer_size; + char * trailer_buf; + + union { + struct pvfs2_io_response io; + struct pvfs2_iox_response iox; + struct pvfs2_lookup_response lookup; + struct pvfs2_create_response create; + struct pvfs2_symlink_response sym; + struct pvfs2_getattr_response getattr; + struct pvfs2_mkdir_response mkdir; + struct pvfs2_statfs_response statfs; + struct pvfs2_fs_mount_response fs_mount; + struct pvfs2_getxattr_response getxattr; + struct pvfs2_listxattr_response listxattr; + struct pvfs2_param_response param; + struct pvfs2_perf_count_response perf_count; + struct pvfs2_fs_key_response fs_key; + } resp; +}; + +struct pvfs2_readdir_response_s { + __u64 token; + __u64 directory_version; + __u32 __pad2; + __u32 pvfs_dirent_outcount; + struct pvfs2_dirent *dirent_array; +}; + +#endif /* __DOWNCALL_H */ diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h new file mode 100644 index 000000000000..2fb3a63ae9ab --- /dev/null +++ b/fs/orangefs/protocol.h @@ -0,0 +1,681 @@ +#include <linux/spinlock_types.h> +#include <linux/types.h> +#include <linux/slab.h> + +extern struct client_debug_mask *cdm_array; +extern char *debug_help_string; +extern int help_string_initialized; +extern struct dentry *debug_dir; +extern struct dentry *help_file_dentry; +extern struct dentry *client_debug_dentry; +extern const struct file_operations debug_help_fops; +extern int client_all_index; +extern int client_verbose_index; +extern int cdm_element_count; +#define DEBUG_HELP_STRING_SIZE 4096 +#define HELP_STRING_UNINITIALIZED \ + "Client Debug Keywords are unknown until the first time\n" \ + "the client is started after boot.\n" +#define ORANGEFS_KMOD_DEBUG_HELP_FILE "debug-help" +#define ORANGEFS_KMOD_DEBUG_FILE "kernel-debug" +#define ORANGEFS_CLIENT_DEBUG_FILE "client-debug" +#define PVFS2_VERBOSE "verbose" +#define PVFS2_ALL "all" + +/* pvfs2-config.h ***********************************************************/ +#define PVFS2_VERSION_MAJOR 2 +#define PVFS2_VERSION_MINOR 9 +#define PVFS2_VERSION_SUB 0 + +/* khandle stuff ***********************************************************/ + +/* + * The 2.9 core will put 64 bit handles in here like this: + * 1234 0000 0000 5678 + * The 3.0 and beyond cores will put 128 bit handles in here like this: + * 1234 5678 90AB CDEF + * The kernel module will always use the first four bytes and + * the last four bytes as an inum. + */ +struct pvfs2_khandle { + unsigned char u[16]; +} __aligned(8); + +/* + * kernel version of an object ref. + */ +struct pvfs2_object_kref { + struct pvfs2_khandle khandle; + __s32 fs_id; + __s32 __pad1; +}; + +/* + * compare 2 khandles assumes little endian thus from large address to + * small address + */ +static inline int PVFS_khandle_cmp(const struct pvfs2_khandle *kh1, + const struct pvfs2_khandle *kh2) +{ + int i; + + for (i = 15; i >= 0; i--) { + if (kh1->u[i] > kh2->u[i]) + return 1; + if (kh1->u[i] < kh2->u[i]) + return -1; + } + + return 0; +} + +/* copy a khandle to a field of arbitrary size */ +static inline void PVFS_khandle_to(const struct pvfs2_khandle *kh, + void *p, int size) +{ + int i; + unsigned char *c = p; + + memset(p, 0, size); + + for (i = 0; i < 16 && i < size; i++) + c[i] = kh->u[i]; +} + +/* copy a khandle from a field of arbitrary size */ +static inline void PVFS_khandle_from(struct pvfs2_khandle *kh, + void *p, int size) +{ + int i; + unsigned char *c = p; + + memset(kh, 0, 16); + + for (i = 0; i < 16 && i < size; i++) + kh->u[i] = c[i]; +} + +/* pvfs2-types.h ************************************************************/ +typedef __u32 PVFS_uid; +typedef __u32 PVFS_gid; +typedef __s32 PVFS_fs_id; +typedef __u32 PVFS_permissions; +typedef __u64 PVFS_time; +typedef __s64 PVFS_size; +typedef __u64 PVFS_flags; +typedef __u64 PVFS_ds_position; +typedef __s32 PVFS_error; +typedef __s64 PVFS_offset; + +#define PVFS2_SUPER_MAGIC 0x20030528 +#define PVFS_ERROR_BIT (1 << 30) +#define PVFS_NON_ERRNO_ERROR_BIT (1 << 29) +#define IS_PVFS_ERROR(__error) ((__error)&(PVFS_ERROR_BIT)) +#define IS_PVFS_NON_ERRNO_ERROR(__error) \ +(((__error)&(PVFS_NON_ERRNO_ERROR_BIT)) && IS_PVFS_ERROR(__error)) +#define PVFS_ERROR_TO_ERRNO(__error) PVFS_get_errno_mapping(__error) + +/* 7 bits are used for the errno mapped error codes */ +#define PVFS_ERROR_CODE(__error) \ +((__error) & (__s32)(0x7f|PVFS_ERROR_BIT)) +#define PVFS_ERROR_CLASS(__error) \ +((__error) & ~((__s32)(0x7f|PVFS_ERROR_BIT|PVFS_NON_ERRNO_ERROR_BIT))) +#define PVFS_NON_ERRNO_ERROR_CODE(__error) \ +((__error) & (__s32)(127|PVFS_ERROR_BIT|PVFS_NON_ERRNO_ERROR_BIT)) + +/* PVFS2 error codes, compliments of asm/errno.h */ +#define PVFS_EPERM E(1) /* Operation not permitted */ +#define PVFS_ENOENT E(2) /* No such file or directory */ +#define PVFS_EINTR E(3) /* Interrupted system call */ +#define PVFS_EIO E(4) /* I/O error */ +#define PVFS_ENXIO E(5) /* No such device or address */ +#define PVFS_EBADF E(6) /* Bad file number */ +#define PVFS_EAGAIN E(7) /* Try again */ +#define PVFS_ENOMEM E(8) /* Out of memory */ +#define PVFS_EFAULT E(9) /* Bad address */ +#define PVFS_EBUSY E(10) /* Device or resource busy */ +#define PVFS_EEXIST E(11) /* File exists */ +#define PVFS_ENODEV E(12) /* No such device */ +#define PVFS_ENOTDIR E(13) /* Not a directory */ +#define PVFS_EISDIR E(14) /* Is a directory */ +#define PVFS_EINVAL E(15) /* Invalid argument */ +#define PVFS_EMFILE E(16) /* Too many open files */ +#define PVFS_EFBIG E(17) /* File too large */ +#define PVFS_ENOSPC E(18) /* No space left on device */ +#define PVFS_EROFS E(19) /* Read-only file system */ +#define PVFS_EMLINK E(20) /* Too many links */ +#define PVFS_EPIPE E(21) /* Broken pipe */ +#define PVFS_EDEADLK E(22) /* Resource deadlock would occur */ +#define PVFS_ENAMETOOLONG E(23) /* File name too long */ +#define PVFS_ENOLCK E(24) /* No record locks available */ +#define PVFS_ENOSYS E(25) /* Function not implemented */ +#define PVFS_ENOTEMPTY E(26) /* Directory not empty */ + /* +#define PVFS_ELOOP E(27) * Too many symbolic links encountered + */ +#define PVFS_EWOULDBLOCK E(28) /* Operation would block */ +#define PVFS_ENOMSG E(29) /* No message of desired type */ +#define PVFS_EUNATCH E(30) /* Protocol driver not attached */ +#define PVFS_EBADR E(31) /* Invalid request descriptor */ +#define PVFS_EDEADLOCK E(32) +#define PVFS_ENODATA E(33) /* No data available */ +#define PVFS_ETIME E(34) /* Timer expired */ +#define PVFS_ENONET E(35) /* Machine is not on the network */ +#define PVFS_EREMOTE E(36) /* Object is remote */ +#define PVFS_ECOMM E(37) /* Communication error on send */ +#define PVFS_EPROTO E(38) /* Protocol error */ +#define PVFS_EBADMSG E(39) /* Not a data message */ + /* +#define PVFS_EOVERFLOW E(40) * Value too large for defined data + * type + */ + /* +#define PVFS_ERESTART E(41) * Interrupted system call should be + * restarted + */ +#define PVFS_EMSGSIZE E(42) /* Message too long */ +#define PVFS_EPROTOTYPE E(43) /* Protocol wrong type for socket */ +#define PVFS_ENOPROTOOPT E(44) /* Protocol not available */ +#define PVFS_EPROTONOSUPPORT E(45) /* Protocol not supported */ + /* +#define PVFS_EOPNOTSUPP E(46) * Operation not supported on transport + * endpoint + */ +#define PVFS_EADDRINUSE E(47) /* Address already in use */ +#define PVFS_EADDRNOTAVAIL E(48) /* Cannot assign requested address */ +#define PVFS_ENETDOWN E(49) /* Network is down */ +#define PVFS_ENETUNREACH E(50) /* Network is unreachable */ + /* +#define PVFS_ENETRESET E(51) * Network dropped connection because + * of reset + */ +#define PVFS_ENOBUFS E(52) /* No buffer space available */ +#define PVFS_ETIMEDOUT E(53) /* Connection timed out */ +#define PVFS_ECONNREFUSED E(54) /* Connection refused */ +#define PVFS_EHOSTDOWN E(55) /* Host is down */ +#define PVFS_EHOSTUNREACH E(56) /* No route to host */ +#define PVFS_EALREADY E(57) /* Operation already in progress */ +#define PVFS_EACCES E(58) /* Access not allowed */ +#define PVFS_ECONNRESET E(59) /* Connection reset by peer */ +#define PVFS_ERANGE E(60) /* Math out of range or buf too small */ + +/***************** non-errno/pvfs2 specific error codes *****************/ +#define PVFS_ECANCEL (1|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_EDEVINIT (2|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_EDETAIL (3|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_EHOSTNTFD (4|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_EADDRNTFD (5|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_ENORECVR (6|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_ETRYAGAIN (7|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_ENOTPVFS (8|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_ESECURITY (9|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) + +/* + * NOTE: PLEASE DO NOT ARBITRARILY ADD NEW ERRNO ERROR CODES! + * + * IF YOU CHOOSE TO ADD A NEW ERROR CODE (DESPITE OUR PLEA), YOU ALSO + * NEED TO INCREMENT PVFS_ERRNO MAX (BELOW) AND ADD A MAPPING TO A + * UNIX ERRNO VALUE IN THE MACROS BELOW (USED IN + * src/common/misc/errno-mapping.c and the kernel module) + */ +#define PVFS_ERRNO_MAX 61 + +#define PVFS_ERROR_BMI (1 << 7) /* BMI-specific error */ +#define PVFS_ERROR_TROVE (2 << 7) /* Trove-specific error */ +#define PVFS_ERROR_FLOW (3 << 7) +#define PVFS_ERROR_SM (4 << 7) /* state machine specific error */ +#define PVFS_ERROR_SCHED (5 << 7) +#define PVFS_ERROR_CLIENT (6 << 7) +#define PVFS_ERROR_DEV (7 << 7) /* device file interaction */ + +#define PVFS_ERROR_CLASS_BITS \ + (PVFS_ERROR_BMI | \ + PVFS_ERROR_TROVE | \ + PVFS_ERROR_FLOW | \ + PVFS_ERROR_SM | \ + PVFS_ERROR_SCHED | \ + PVFS_ERROR_CLIENT | \ + PVFS_ERROR_DEV) + +#define DECLARE_ERRNO_MAPPING() \ +__s32 PINT_errno_mapping[PVFS_ERRNO_MAX + 1] = { \ + 0, /* leave this one empty */ \ + EPERM, /* 1 */ \ + ENOENT, \ + EINTR, \ + EIO, \ + ENXIO, \ + EBADF, \ + EAGAIN, \ + ENOMEM, \ + EFAULT, \ + EBUSY, /* 10 */ \ + EEXIST, \ + ENODEV, \ + ENOTDIR, \ + EISDIR, \ + EINVAL, \ + EMFILE, \ + EFBIG, \ + ENOSPC, \ + EROFS, \ + EMLINK, /* 20 */ \ + EPIPE, \ + EDEADLK, \ + ENAMETOOLONG, \ + ENOLCK, \ + ENOSYS, \ + ENOTEMPTY, \ + ELOOP, \ + EWOULDBLOCK, \ + ENOMSG, \ + EUNATCH, /* 30 */ \ + EBADR, \ + EDEADLOCK, \ + ENODATA, \ + ETIME, \ + ENONET, \ + EREMOTE, \ + ECOMM, \ + EPROTO, \ + EBADMSG, \ + EOVERFLOW, /* 40 */ \ + ERESTART, \ + EMSGSIZE, \ + EPROTOTYPE, \ + ENOPROTOOPT, \ + EPROTONOSUPPORT, \ + EOPNOTSUPP, \ + EADDRINUSE, \ + EADDRNOTAVAIL, \ + ENETDOWN, \ + ENETUNREACH, /* 50 */ \ + ENETRESET, \ + ENOBUFS, \ + ETIMEDOUT, \ + ECONNREFUSED, \ + EHOSTDOWN, \ + EHOSTUNREACH, \ + EALREADY, \ + EACCES, \ + ECONNRESET, /* 59 */ \ + ERANGE, \ + 0 /* PVFS_ERRNO_MAX */ \ +}; \ +const char *PINT_non_errno_strerror_mapping[] = { \ + "Success", /* 0 */ \ + "Operation cancelled (possibly due to timeout)", \ + "Device initialization failed", \ + "Detailed per-server errors are available", \ + "Unknown host", \ + "No address associated with name", \ + "Unknown server error", \ + "Host name lookup failure", \ + "Path contains non-PVFS elements", \ + "Security error", \ +}; \ +__s32 PINT_non_errno_mapping[] = { \ + 0, /* leave this one empty */ \ + PVFS_ECANCEL, /* 1 */ \ + PVFS_EDEVINIT, /* 2 */ \ + PVFS_EDETAIL, /* 3 */ \ + PVFS_EHOSTNTFD, /* 4 */ \ + PVFS_EADDRNTFD, /* 5 */ \ + PVFS_ENORECVR, /* 6 */ \ + PVFS_ETRYAGAIN, /* 7 */ \ + PVFS_ENOTPVFS, /* 8 */ \ + PVFS_ESECURITY, /* 9 */ \ +} + +/* + * NOTE: PVFS_get_errno_mapping will convert a PVFS_ERROR_CODE to an + * errno value. If the error code is a pvfs2 specific error code + * (i.e. a PVFS_NON_ERRNO_ERROR_CODE), PVFS_get_errno_mapping will + * return an index into the PINT_non_errno_strerror_mapping array which + * can be used for getting the pvfs2 specific strerror message given + * the error code. if the value is not a recognized error code, the + * passed in value will be returned unchanged. + */ +#define DECLARE_ERRNO_MAPPING_AND_FN() \ +extern __s32 PINT_errno_mapping[]; \ +extern __s32 PINT_non_errno_mapping[]; \ +extern const char *PINT_non_errno_strerror_mapping[]; \ +__s32 PVFS_get_errno_mapping(__s32 error) \ +{ \ + __s32 ret = error, mask = 0; \ + __s32 positive = ((error > -1) ? 1 : 0); \ + if (IS_PVFS_NON_ERRNO_ERROR((positive ? error : -error))) { \ + mask = (PVFS_NON_ERRNO_ERROR_BIT | \ + PVFS_ERROR_BIT | \ + PVFS_ERROR_CLASS_BITS); \ + ret = PVFS_NON_ERRNO_ERROR_CODE(((positive ? \ + error : \ + abs(error))) & \ + ~mask); \ + } \ + else if (IS_PVFS_ERROR((positive ? error : -error))) { \ + mask = (PVFS_ERROR_BIT | \ + PVFS_ERROR_CLASS_BITS); \ + ret = PINT_errno_mapping[PVFS_ERROR_CODE(((positive ? \ + error : \ + abs(error))) & \ + ~mask)]; \ + } \ + return ret; \ +} \ +__s32 PVFS_errno_to_error(int err) \ +{ \ + __s32 e = 0; \ + \ + for (; e < PVFS_ERRNO_MAX; ++e) \ + if (PINT_errno_mapping[e] == err) \ + return e | PVFS_ERROR_BIT; \ + \ + return err; \ +} \ +DECLARE_ERRNO_MAPPING() + +/* permission bits */ +#define PVFS_O_EXECUTE (1 << 0) +#define PVFS_O_WRITE (1 << 1) +#define PVFS_O_READ (1 << 2) +#define PVFS_G_EXECUTE (1 << 3) +#define PVFS_G_WRITE (1 << 4) +#define PVFS_G_READ (1 << 5) +#define PVFS_U_EXECUTE (1 << 6) +#define PVFS_U_WRITE (1 << 7) +#define PVFS_U_READ (1 << 8) +/* no PVFS_U_VTX (sticky bit) */ +#define PVFS_G_SGID (1 << 10) +#define PVFS_U_SUID (1 << 11) + +/* definition taken from stdint.h */ +#define INT32_MAX (2147483647) +#define PVFS_ITERATE_START (INT32_MAX - 1) +#define PVFS_ITERATE_END (INT32_MAX - 2) +#define PVFS_READDIR_START PVFS_ITERATE_START +#define PVFS_READDIR_END PVFS_ITERATE_END +#define PVFS_IMMUTABLE_FL FS_IMMUTABLE_FL +#define PVFS_APPEND_FL FS_APPEND_FL +#define PVFS_NOATIME_FL FS_NOATIME_FL +#define PVFS_MIRROR_FL 0x01000000ULL +#define PVFS_O_EXECUTE (1 << 0) +#define PVFS_FS_ID_NULL ((__s32)0) +#define PVFS_ATTR_SYS_UID (1 << 0) +#define PVFS_ATTR_SYS_GID (1 << 1) +#define PVFS_ATTR_SYS_PERM (1 << 2) +#define PVFS_ATTR_SYS_ATIME (1 << 3) +#define PVFS_ATTR_SYS_CTIME (1 << 4) +#define PVFS_ATTR_SYS_MTIME (1 << 5) +#define PVFS_ATTR_SYS_TYPE (1 << 6) +#define PVFS_ATTR_SYS_ATIME_SET (1 << 7) +#define PVFS_ATTR_SYS_MTIME_SET (1 << 8) +#define PVFS_ATTR_SYS_SIZE (1 << 20) +#define PVFS_ATTR_SYS_LNK_TARGET (1 << 24) +#define PVFS_ATTR_SYS_DFILE_COUNT (1 << 25) +#define PVFS_ATTR_SYS_DIRENT_COUNT (1 << 26) +#define PVFS_ATTR_SYS_BLKSIZE (1 << 28) +#define PVFS_ATTR_SYS_MIRROR_COPIES_COUNT (1 << 29) +#define PVFS_ATTR_SYS_COMMON_ALL \ + (PVFS_ATTR_SYS_UID | \ + PVFS_ATTR_SYS_GID | \ + PVFS_ATTR_SYS_PERM | \ + PVFS_ATTR_SYS_ATIME | \ + PVFS_ATTR_SYS_CTIME | \ + PVFS_ATTR_SYS_MTIME | \ + PVFS_ATTR_SYS_TYPE) + +#define PVFS_ATTR_SYS_ALL_SETABLE \ +(PVFS_ATTR_SYS_COMMON_ALL-PVFS_ATTR_SYS_TYPE) + +#define PVFS_ATTR_SYS_ALL_NOHINT \ + (PVFS_ATTR_SYS_COMMON_ALL | \ + PVFS_ATTR_SYS_SIZE | \ + PVFS_ATTR_SYS_LNK_TARGET | \ + PVFS_ATTR_SYS_DFILE_COUNT | \ + PVFS_ATTR_SYS_MIRROR_COPIES_COUNT | \ + PVFS_ATTR_SYS_DIRENT_COUNT | \ + PVFS_ATTR_SYS_BLKSIZE) +#define PVFS_XATTR_REPLACE 0x2 +#define PVFS_XATTR_CREATE 0x1 +#define PVFS_MAX_SERVER_ADDR_LEN 256 +#define PVFS_NAME_MAX 256 +/* + * max extended attribute name len as imposed by the VFS and exploited for the + * upcall request types. + * NOTE: Please retain them as multiples of 8 even if you wish to change them + * This is *NECESSARY* for supporting 32 bit user-space binaries on a 64-bit + * kernel. Due to implementation within DBPF, this really needs to be + * PVFS_NAME_MAX, which it was the same value as, but no reason to let it + * break if that changes in the future. + */ +#define PVFS_MAX_XATTR_NAMELEN PVFS_NAME_MAX /* Not the same as + * XATTR_NAME_MAX defined + * by <linux/xattr.h> + */ +#define PVFS_MAX_XATTR_VALUELEN 8192 /* Not the same as XATTR_SIZE_MAX + * defined by <linux/xattr.h> + */ +#define PVFS_MAX_XATTR_LISTLEN 16 /* Not the same as XATTR_LIST_MAX + * defined by <linux/xattr.h> + */ +/* + * PVFS I/O operation types, used in both system and server interfaces. + */ +enum PVFS_io_type { + PVFS_IO_READ = 1, + PVFS_IO_WRITE = 2 +}; + +/* + * If this enum is modified the server parameters related to the precreate pool + * batch and low threshold sizes may need to be modified to reflect this + * change. + */ +enum pvfs2_ds_type { + PVFS_TYPE_NONE = 0, + PVFS_TYPE_METAFILE = (1 << 0), + PVFS_TYPE_DATAFILE = (1 << 1), + PVFS_TYPE_DIRECTORY = (1 << 2), + PVFS_TYPE_SYMLINK = (1 << 3), + PVFS_TYPE_DIRDATA = (1 << 4), + PVFS_TYPE_INTERNAL = (1 << 5) /* for the server's private use */ +}; + +/* + * PVFS_certificate simply stores a buffer with the buffer size. + * The buffer can be converted to an OpenSSL X509 struct for use. + */ +struct PVFS_certificate { + __u32 buf_size; + unsigned char *buf; +}; + +/* + * A credential identifies a user and is signed by the client/user + * private key. + */ +struct PVFS_credential { + __u32 userid; /* user id */ + __u32 num_groups; /* length of group_array */ + __u32 *group_array; /* groups for which the user is a member */ + char *issuer; /* alias of the issuing server */ + __u64 timeout; /* seconds after epoch to time out */ + __u32 sig_size; /* length of the signature in bytes */ + unsigned char *signature; /* digital signature */ + struct PVFS_certificate certificate; /* user certificate buffer */ +}; +#define extra_size_PVFS_credential (PVFS_REQ_LIMIT_GROUPS * \ + sizeof(__u32) + \ + PVFS_REQ_LIMIT_ISSUER + \ + PVFS_REQ_LIMIT_SIGNATURE + \ + extra_size_PVFS_certificate) + +/* This structure is used by the VFS-client interaction alone */ +struct PVFS_keyval_pair { + char key[PVFS_MAX_XATTR_NAMELEN]; + __s32 key_sz; /* __s32 for portable, fixed-size structures */ + __s32 val_sz; + char val[PVFS_MAX_XATTR_VALUELEN]; +}; + +/* pvfs2-sysint.h ***********************************************************/ +/* Describes attributes for a file, directory, or symlink. */ +struct PVFS_sys_attr_s { + __u32 owner; + __u32 group; + __u32 perms; + __u64 atime; + __u64 mtime; + __u64 ctime; + __s64 size; + + /* NOTE: caller must free if valid */ + char *link_target; + + /* Changed to __s32 so that size of structure does not change */ + __s32 dfile_count; + + /* Changed to __s32 so that size of structure does not change */ + __s32 distr_dir_servers_initial; + + /* Changed to __s32 so that size of structure does not change */ + __s32 distr_dir_servers_max; + + /* Changed to __s32 so that size of structure does not change */ + __s32 distr_dir_split_size; + + __u32 mirror_copies_count; + + /* NOTE: caller must free if valid */ + char *dist_name; + + /* NOTE: caller must free if valid */ + char *dist_params; + + __s64 dirent_count; + enum pvfs2_ds_type objtype; + __u64 flags; + __u32 mask; + __s64 blksize; +}; + +#define PVFS2_LOOKUP_LINK_NO_FOLLOW 0 +#define PVFS2_LOOKUP_LINK_FOLLOW 1 + +/* pint-dev.h ***************************************************************/ + +/* parameter structure used in PVFS_DEV_DEBUG ioctl command */ +struct dev_mask_info_s { + enum { + KERNEL_MASK, + CLIENT_MASK, + } mask_type; + __u64 mask_value; +}; + +struct dev_mask2_info_s { + __u64 mask1_value; + __u64 mask2_value; +}; + +/* pvfs2-util.h *************************************************************/ +#define PVFS_util_min(x1, x2) (((x1) > (x2)) ? (x2) : (x1)) +__s32 PVFS_util_translate_mode(int mode); + +/* pvfs2-debug.h ************************************************************/ +#include "pvfs2-debug.h" + +/* pvfs2-internal.h *********************************************************/ +#define llu(x) (unsigned long long)(x) +#define lld(x) (long long)(x) + +/* pint-dev-shared.h ********************************************************/ +#define PVFS_DEV_MAGIC 'k' + +#define PVFS2_READDIR_DEFAULT_DESC_COUNT 5 + +#define DEV_GET_MAGIC 0x1 +#define DEV_GET_MAX_UPSIZE 0x2 +#define DEV_GET_MAX_DOWNSIZE 0x3 +#define DEV_MAP 0x4 +#define DEV_REMOUNT_ALL 0x5 +#define DEV_DEBUG 0x6 +#define DEV_UPSTREAM 0x7 +#define DEV_CLIENT_MASK 0x8 +#define DEV_CLIENT_STRING 0x9 +#define DEV_MAX_NR 0xa + +/* supported ioctls, codes are with respect to user-space */ +enum { + PVFS_DEV_GET_MAGIC = _IOW(PVFS_DEV_MAGIC, DEV_GET_MAGIC, __s32), + PVFS_DEV_GET_MAX_UPSIZE = + _IOW(PVFS_DEV_MAGIC, DEV_GET_MAX_UPSIZE, __s32), + PVFS_DEV_GET_MAX_DOWNSIZE = + _IOW(PVFS_DEV_MAGIC, DEV_GET_MAX_DOWNSIZE, __s32), + PVFS_DEV_MAP = _IO(PVFS_DEV_MAGIC, DEV_MAP), + PVFS_DEV_REMOUNT_ALL = _IO(PVFS_DEV_MAGIC, DEV_REMOUNT_ALL), + PVFS_DEV_DEBUG = _IOR(PVFS_DEV_MAGIC, DEV_DEBUG, __s32), + PVFS_DEV_UPSTREAM = _IOW(PVFS_DEV_MAGIC, DEV_UPSTREAM, int), + PVFS_DEV_CLIENT_MASK = _IOW(PVFS_DEV_MAGIC, + DEV_CLIENT_MASK, + struct dev_mask2_info_s), + PVFS_DEV_CLIENT_STRING = _IOW(PVFS_DEV_MAGIC, + DEV_CLIENT_STRING, + char *), + PVFS_DEV_MAXNR = DEV_MAX_NR, +}; + +/* + * version number for use in communicating between kernel space and user + * space + */ +/* +#define PVFS_KERNEL_PROTO_VERSION \ + ((PVFS2_VERSION_MAJOR * 10000) + \ + (PVFS2_VERSION_MINOR * 100) + \ + PVFS2_VERSION_SUB) +*/ +#define PVFS_KERNEL_PROTO_VERSION 0 + +/* + * describes memory regions to map in the PVFS_DEV_MAP ioctl. + * NOTE: See devpvfs2-req.c for 32 bit compat structure. + * Since this structure has a variable-sized layout that is different + * on 32 and 64 bit platforms, we need to normalize to a 64 bit layout + * on such systems before servicing ioctl calls from user-space binaries + * that may be 32 bit! + */ +struct PVFS_dev_map_desc { + void *ptr; + __s32 total_size; + __s32 size; + __s32 count; +}; + +/* gossip.h *****************************************************************/ + +#ifdef GOSSIP_DISABLE_DEBUG +#define gossip_debug(mask, format, f...) do {} while (0) +#else +extern __u64 gossip_debug_mask; +extern struct client_debug_mask client_debug_mask; + +/* try to avoid function call overhead by checking masks in macro */ +#define gossip_debug(mask, format, f...) \ +do { \ + if (gossip_debug_mask & mask) \ + printk(format, ##f); \ +} while (0) +#endif /* GOSSIP_DISABLE_DEBUG */ + +/* do file and line number printouts w/ the GNU preprocessor */ +#define gossip_ldebug(mask, format, f...) \ + gossip_debug(mask, "%s: " format, __func__, ##f) + +#define gossip_err printk +#define gossip_lerr(format, f...) \ + gossip_err("%s line %d: " format, \ + __FILE__, \ + __LINE__, \ + ##f) diff --git a/fs/orangefs/pvfs2-bufmap.h b/fs/orangefs/pvfs2-bufmap.h new file mode 100644 index 000000000000..e269deafbb74 --- /dev/null +++ b/fs/orangefs/pvfs2-bufmap.h @@ -0,0 +1,76 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef __PVFS2_BUFMAP_H +#define __PVFS2_BUFMAP_H + +/* used to describe mapped buffers */ +struct pvfs_bufmap_desc { + void *uaddr; /* user space address pointer */ + struct page **page_array; /* array of mapped pages */ + int array_count; /* size of above arrays */ + struct list_head list_link; +}; + +struct pvfs2_bufmap; + +struct pvfs2_bufmap *pvfs2_bufmap_ref(void); +void pvfs2_bufmap_unref(struct pvfs2_bufmap *bufmap); + +/* + * pvfs_bufmap_size_query is now an inline function because buffer + * sizes are not hardcoded + */ +int pvfs_bufmap_size_query(void); + +int pvfs_bufmap_shift_query(void); + +int pvfs_bufmap_initialize(struct PVFS_dev_map_desc *user_desc); + +int get_bufmap_init(void); + +void pvfs_bufmap_finalize(void); + +int pvfs_bufmap_get(struct pvfs2_bufmap **mapp, int *buffer_index); + +void pvfs_bufmap_put(struct pvfs2_bufmap *bufmap, int buffer_index); + +int readdir_index_get(struct pvfs2_bufmap **mapp, int *buffer_index); + +void readdir_index_put(struct pvfs2_bufmap *bufmap, int buffer_index); + +int pvfs_bufmap_copy_iovec_from_user(struct pvfs2_bufmap *bufmap, + int buffer_index, + const struct iovec *iov, + unsigned long nr_segs, + size_t size); + +int pvfs_bufmap_copy_iovec_from_kernel(struct pvfs2_bufmap *bufmap, + int buffer_index, + const struct iovec *iov, + unsigned long nr_segs, + size_t size); + +int pvfs_bufmap_copy_to_user_iovec(struct pvfs2_bufmap *bufmap, + int buffer_index, + const struct iovec *iov, + unsigned long nr_segs, + size_t size); + +int pvfs_bufmap_copy_to_kernel_iovec(struct pvfs2_bufmap *bufmap, + int buffer_index, + const struct iovec *iov, + unsigned long nr_segs, + size_t size); + +size_t pvfs_bufmap_copy_to_user_task_iovec(struct task_struct *tsk, + struct iovec *iovec, + unsigned long nr_segs, + struct pvfs2_bufmap *bufmap, + int buffer_index, + size_t bytes_to_be_copied); + +#endif /* __PVFS2_BUFMAP_H */ diff --git a/fs/orangefs/pvfs2-debug.h b/fs/orangefs/pvfs2-debug.h new file mode 100644 index 000000000000..4c27ad77fa16 --- /dev/null +++ b/fs/orangefs/pvfs2-debug.h @@ -0,0 +1,290 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* This file just defines debugging masks to be used with the gossip + * logging utility. All debugging masks for PVFS2 are kept here to make + * sure we don't have collisions. + */ + +#ifndef __PVFS2_DEBUG_H +#define __PVFS2_DEBUG_H + +#ifdef __KERNEL__ +#include <linux/types.h> +#else +#include <stdint.h> +#endif + +#define GOSSIP_NO_DEBUG (__u64)0 +#define GOSSIP_BMI_DEBUG_TCP ((__u64)1 << 0) +#define GOSSIP_BMI_DEBUG_CONTROL ((__u64)1 << 1) +#define GOSSIP_BMI_DEBUG_OFFSETS ((__u64)1 << 2) +#define GOSSIP_BMI_DEBUG_GM ((__u64)1 << 3) +#define GOSSIP_JOB_DEBUG ((__u64)1 << 4) +#define GOSSIP_SERVER_DEBUG ((__u64)1 << 5) +#define GOSSIP_STO_DEBUG_CTRL ((__u64)1 << 6) +#define GOSSIP_STO_DEBUG_DEFAULT ((__u64)1 << 7) +#define GOSSIP_FLOW_DEBUG ((__u64)1 << 8) +#define GOSSIP_BMI_DEBUG_GM_MEM ((__u64)1 << 9) +#define GOSSIP_REQUEST_DEBUG ((__u64)1 << 10) +#define GOSSIP_FLOW_PROTO_DEBUG ((__u64)1 << 11) +#define GOSSIP_NCACHE_DEBUG ((__u64)1 << 12) +#define GOSSIP_CLIENT_DEBUG ((__u64)1 << 13) +#define GOSSIP_REQ_SCHED_DEBUG ((__u64)1 << 14) +#define GOSSIP_ACACHE_DEBUG ((__u64)1 << 15) +#define GOSSIP_TROVE_DEBUG ((__u64)1 << 16) +#define GOSSIP_TROVE_OP_DEBUG ((__u64)1 << 17) +#define GOSSIP_DIST_DEBUG ((__u64)1 << 18) +#define GOSSIP_BMI_DEBUG_IB ((__u64)1 << 19) +#define GOSSIP_DBPF_ATTRCACHE_DEBUG ((__u64)1 << 20) +#define GOSSIP_MMAP_RCACHE_DEBUG ((__u64)1 << 21) +#define GOSSIP_LOOKUP_DEBUG ((__u64)1 << 22) +#define GOSSIP_REMOVE_DEBUG ((__u64)1 << 23) +#define GOSSIP_GETATTR_DEBUG ((__u64)1 << 24) +#define GOSSIP_READDIR_DEBUG ((__u64)1 << 25) +#define GOSSIP_IO_DEBUG ((__u64)1 << 26) +#define GOSSIP_DBPF_OPEN_CACHE_DEBUG ((__u64)1 << 27) +#define GOSSIP_PERMISSIONS_DEBUG ((__u64)1 << 28) +#define GOSSIP_CANCEL_DEBUG ((__u64)1 << 29) +#define GOSSIP_MSGPAIR_DEBUG ((__u64)1 << 30) +#define GOSSIP_CLIENTCORE_DEBUG ((__u64)1 << 31) +#define GOSSIP_CLIENTCORE_TIMING_DEBUG ((__u64)1 << 32) +#define GOSSIP_SETATTR_DEBUG ((__u64)1 << 33) +#define GOSSIP_MKDIR_DEBUG ((__u64)1 << 34) +#define GOSSIP_VARSTRIP_DEBUG ((__u64)1 << 35) +#define GOSSIP_GETEATTR_DEBUG ((__u64)1 << 36) +#define GOSSIP_SETEATTR_DEBUG ((__u64)1 << 37) +#define GOSSIP_ENDECODE_DEBUG ((__u64)1 << 38) +#define GOSSIP_DELEATTR_DEBUG ((__u64)1 << 39) +#define GOSSIP_ACCESS_DEBUG ((__u64)1 << 40) +#define GOSSIP_ACCESS_DETAIL_DEBUG ((__u64)1 << 41) +#define GOSSIP_LISTEATTR_DEBUG ((__u64)1 << 42) +#define GOSSIP_PERFCOUNTER_DEBUG ((__u64)1 << 43) +#define GOSSIP_STATE_MACHINE_DEBUG ((__u64)1 << 44) +#define GOSSIP_DBPF_KEYVAL_DEBUG ((__u64)1 << 45) +#define GOSSIP_LISTATTR_DEBUG ((__u64)1 << 46) +#define GOSSIP_DBPF_COALESCE_DEBUG ((__u64)1 << 47) +#define GOSSIP_ACCESS_HOSTNAMES ((__u64)1 << 48) +#define GOSSIP_FSCK_DEBUG ((__u64)1 << 49) +#define GOSSIP_BMI_DEBUG_MX ((__u64)1 << 50) +#define GOSSIP_BSTREAM_DEBUG ((__u64)1 << 51) +#define GOSSIP_BMI_DEBUG_PORTALS ((__u64)1 << 52) +#define GOSSIP_USER_DEV_DEBUG ((__u64)1 << 53) +#define GOSSIP_DIRECTIO_DEBUG ((__u64)1 << 54) +#define GOSSIP_MGMT_DEBUG ((__u64)1 << 55) +#define GOSSIP_MIRROR_DEBUG ((__u64)1 << 56) +#define GOSSIP_WIN_CLIENT_DEBUG ((__u64)1 << 57) +#define GOSSIP_SECURITY_DEBUG ((__u64)1 << 58) +#define GOSSIP_USRINT_DEBUG ((__u64)1 << 59) +#define GOSSIP_RCACHE_DEBUG ((__u64)1 << 60) +#define GOSSIP_SECCACHE_DEBUG ((__u64)1 << 61) + +#define GOSSIP_BMI_DEBUG_ALL ((__u64) (GOSSIP_BMI_DEBUG_TCP + \ + GOSSIP_BMI_DEBUG_CONTROL + \ + GOSSIP_BMI_DEBUG_GM + \ + GOSSIP_BMI_DEBUG_OFFSETS + \ + GOSSIP_BMI_DEBUG_IB + \ + GOSSIP_BMI_DEBUG_MX + \ + GOSSIP_BMI_DEBUG_PORTALS)) + +const char *PVFS_debug_get_next_debug_keyword(int position); + +#define GOSSIP_SUPER_DEBUG ((__u64)1 << 0) +#define GOSSIP_INODE_DEBUG ((__u64)1 << 1) +#define GOSSIP_FILE_DEBUG ((__u64)1 << 2) +#define GOSSIP_DIR_DEBUG ((__u64)1 << 3) +#define GOSSIP_UTILS_DEBUG ((__u64)1 << 4) +#define GOSSIP_WAIT_DEBUG ((__u64)1 << 5) +#define GOSSIP_ACL_DEBUG ((__u64)1 << 6) +#define GOSSIP_DCACHE_DEBUG ((__u64)1 << 7) +#define GOSSIP_DEV_DEBUG ((__u64)1 << 8) +#define GOSSIP_NAME_DEBUG ((__u64)1 << 9) +#define GOSSIP_BUFMAP_DEBUG ((__u64)1 << 10) +#define GOSSIP_CACHE_DEBUG ((__u64)1 << 11) +#define GOSSIP_DEBUGFS_DEBUG ((__u64)1 << 12) +#define GOSSIP_XATTR_DEBUG ((__u64)1 << 13) +#define GOSSIP_INIT_DEBUG ((__u64)1 << 14) +#define GOSSIP_SYSFS_DEBUG ((__u64)1 << 15) + +#define GOSSIP_MAX_NR 16 +#define GOSSIP_MAX_DEBUG (((__u64)1 << GOSSIP_MAX_NR) - 1) + +/*function prototypes*/ +__u64 PVFS_kmod_eventlog_to_mask(const char *event_logging); +__u64 PVFS_debug_eventlog_to_mask(const char *event_logging); +char *PVFS_debug_mask_to_eventlog(__u64 mask); +char *PVFS_kmod_mask_to_eventlog(__u64 mask); + +/* a private internal type */ +struct __keyword_mask_s { + const char *keyword; + __u64 mask_val; +}; + +#define __DEBUG_ALL ((__u64) -1) + +/* map all config keywords to pvfs2 debug masks here */ +static struct __keyword_mask_s s_keyword_mask_map[] = { + /* Log trove debugging info. Same as 'trove'. */ + {"storage", GOSSIP_TROVE_DEBUG}, + /* Log trove debugging info. Same as 'storage'. */ + {"trove", GOSSIP_TROVE_DEBUG}, + /* Log trove operations. */ + {"trove_op", GOSSIP_TROVE_OP_DEBUG}, + /* Log network debug info. */ + {"network", GOSSIP_BMI_DEBUG_ALL}, + /* Log server info, including new operations. */ + {"server", GOSSIP_SERVER_DEBUG}, + /* Log client sysint info. This is only useful for the client. */ + {"client", GOSSIP_CLIENT_DEBUG}, + /* Debug the varstrip distribution */ + {"varstrip", GOSSIP_VARSTRIP_DEBUG}, + /* Log job info */ + {"job", GOSSIP_JOB_DEBUG}, + /* Debug PINT_process_request calls. EXTREMELY verbose! */ + {"request", GOSSIP_REQUEST_DEBUG}, + /* Log request scheduler events */ + {"reqsched", GOSSIP_REQ_SCHED_DEBUG}, + /* Log the flow protocol events, including flowproto_multiqueue */ + {"flowproto", GOSSIP_FLOW_PROTO_DEBUG}, + /* Log flow calls */ + {"flow", GOSSIP_FLOW_DEBUG}, + /* Debug the client name cache. Only useful on the client. */ + {"ncache", GOSSIP_NCACHE_DEBUG}, + /* Debug read-ahead cache events. Only useful on the client. */ + {"mmaprcache", GOSSIP_MMAP_RCACHE_DEBUG}, + /* Debug the attribute cache. Only useful on the client. */ + {"acache", GOSSIP_ACACHE_DEBUG}, + /* Log/Debug distribution calls */ + {"distribution", GOSSIP_DIST_DEBUG}, + /* Debug the server-side dbpf attribute cache */ + {"dbpfattrcache", GOSSIP_DBPF_ATTRCACHE_DEBUG}, + /* Debug the client lookup state machine. */ + {"lookup", GOSSIP_LOOKUP_DEBUG}, + /* Debug the client remove state macine. */ + {"remove", GOSSIP_REMOVE_DEBUG}, + /* Debug the server getattr state machine. */ + {"getattr", GOSSIP_GETATTR_DEBUG}, + /* Debug the server setattr state machine. */ + {"setattr", GOSSIP_SETATTR_DEBUG}, + /* vectored getattr server state machine */ + {"listattr", GOSSIP_LISTATTR_DEBUG}, + /* Debug the client and server get ext attributes SM. */ + {"geteattr", GOSSIP_GETEATTR_DEBUG}, + /* Debug the client and server set ext attributes SM. */ + {"seteattr", GOSSIP_SETEATTR_DEBUG}, + /* Debug the readdir operation (client and server) */ + {"readdir", GOSSIP_READDIR_DEBUG}, + /* Debug the mkdir operation (server only) */ + {"mkdir", GOSSIP_MKDIR_DEBUG}, + /* Debug the io operation (reads and writes) + * for both the client and server */ + {"io", GOSSIP_IO_DEBUG}, + /* Debug the server's open file descriptor cache */ + {"open_cache", GOSSIP_DBPF_OPEN_CACHE_DEBUG}, + /* Debug permissions checking on the server */ + {"permissions", GOSSIP_PERMISSIONS_DEBUG}, + /* Debug the cancel operation */ + {"cancel", GOSSIP_CANCEL_DEBUG}, + /* Debug the msgpair state machine */ + {"msgpair", GOSSIP_MSGPAIR_DEBUG}, + /* Debug the client core app */ + {"clientcore", GOSSIP_CLIENTCORE_DEBUG}, + /* Debug the client timing state machines (job timeout, etc.) */ + {"clientcore_timing", GOSSIP_CLIENTCORE_TIMING_DEBUG}, + /* network encoding */ + {"endecode", GOSSIP_ENDECODE_DEBUG}, + /* Show server file (metadata) accesses (both modify and read-only). */ + {"access", GOSSIP_ACCESS_DEBUG}, + /* Show more detailed server file accesses */ + {"access_detail", GOSSIP_ACCESS_DETAIL_DEBUG}, + /* Debug the listeattr operation */ + {"listeattr", GOSSIP_LISTEATTR_DEBUG}, + /* Debug the state machine management code */ + {"sm", GOSSIP_STATE_MACHINE_DEBUG}, + /* Debug the metadata dbpf keyval functions */ + {"keyval", GOSSIP_DBPF_KEYVAL_DEBUG}, + /* Debug the metadata sync coalescing code */ + {"coalesce", GOSSIP_DBPF_COALESCE_DEBUG}, + /* Display the hostnames instead of IP addrs in debug output */ + {"access_hostnames", GOSSIP_ACCESS_HOSTNAMES}, + /* Show the client device events */ + {"user_dev", GOSSIP_USER_DEV_DEBUG}, + /* Debug the fsck tool */ + {"fsck", GOSSIP_FSCK_DEBUG}, + /* Debug the bstream code */ + {"bstream", GOSSIP_BSTREAM_DEBUG}, + /* Debug trove in direct io mode */ + {"directio", GOSSIP_DIRECTIO_DEBUG}, + /* Debug direct io thread management */ + {"mgmt", GOSSIP_MGMT_DEBUG}, + /* Debug mirroring process */ + {"mirror", GOSSIP_MIRROR_DEBUG}, + /* Windows client */ + {"win_client", GOSSIP_WIN_CLIENT_DEBUG}, + /* Debug robust security code */ + {"security", GOSSIP_SECURITY_DEBUG}, + /* Capability Cache */ + {"seccache", GOSSIP_SECCACHE_DEBUG}, + /* Client User Interface */ + {"usrint", GOSSIP_USRINT_DEBUG}, + /* rcache */ + {"rcache", GOSSIP_RCACHE_DEBUG}, + /* Everything except the periodic events. Useful for debugging */ + {"verbose", + (__DEBUG_ALL & + ~(GOSSIP_PERFCOUNTER_DEBUG | GOSSIP_STATE_MACHINE_DEBUG | + GOSSIP_ENDECODE_DEBUG | GOSSIP_USER_DEV_DEBUG)) + }, + /* No debug output */ + {"none", GOSSIP_NO_DEBUG}, + /* Everything */ + {"all", __DEBUG_ALL} +}; + +#undef __DEBUG_ALL + +/* + * Map all kmod keywords to kmod debug masks here. Keep this + * structure "packed": + * + * "all" is always last... + * + * keyword mask_val index + * foo 1 0 + * bar 2 1 + * baz 4 2 + * qux 8 3 + * . . . + */ +static struct __keyword_mask_s s_kmod_keyword_mask_map[] = { + {"super", GOSSIP_SUPER_DEBUG}, + {"inode", GOSSIP_INODE_DEBUG}, + {"file", GOSSIP_FILE_DEBUG}, + {"dir", GOSSIP_DIR_DEBUG}, + {"utils", GOSSIP_UTILS_DEBUG}, + {"wait", GOSSIP_WAIT_DEBUG}, + {"acl", GOSSIP_ACL_DEBUG}, + {"dcache", GOSSIP_DCACHE_DEBUG}, + {"dev", GOSSIP_DEV_DEBUG}, + {"name", GOSSIP_NAME_DEBUG}, + {"bufmap", GOSSIP_BUFMAP_DEBUG}, + {"cache", GOSSIP_CACHE_DEBUG}, + {"debugfs", GOSSIP_DEBUGFS_DEBUG}, + {"xattr", GOSSIP_XATTR_DEBUG}, + {"init", GOSSIP_INIT_DEBUG}, + {"sysfs", GOSSIP_SYSFS_DEBUG}, + {"none", GOSSIP_NO_DEBUG}, + {"all", GOSSIP_MAX_DEBUG} +}; + +static const int num_kmod_keyword_mask_map = (int) + (sizeof(s_kmod_keyword_mask_map) / sizeof(struct __keyword_mask_s)); + +static const int num_keyword_mask_map = (int) + (sizeof(s_keyword_mask_map) / sizeof(struct __keyword_mask_s)); + +#endif /* __PVFS2_DEBUG_H */ diff --git a/fs/orangefs/pvfs2-debugfs.h b/fs/orangefs/pvfs2-debugfs.h new file mode 100644 index 000000000000..a66b7d08c14d --- /dev/null +++ b/fs/orangefs/pvfs2-debugfs.h @@ -0,0 +1,3 @@ +int pvfs2_debugfs_init(void); +int pvfs2_kernel_debug_init(void); +void pvfs2_debugfs_cleanup(void); diff --git a/fs/orangefs/pvfs2-dev-proto.h b/fs/orangefs/pvfs2-dev-proto.h new file mode 100644 index 000000000000..9c82e6e651f3 --- /dev/null +++ b/fs/orangefs/pvfs2-dev-proto.h @@ -0,0 +1,102 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef _PVFS2_DEV_PROTO_H +#define _PVFS2_DEV_PROTO_H + +/* + * types and constants shared between user space and kernel space for + * device interaction using a common protocol + */ + +/* + * valid pvfs2 kernel operation types + */ +#define PVFS2_VFS_OP_INVALID 0xFF000000 +#define PVFS2_VFS_OP_FILE_IO 0xFF000001 +#define PVFS2_VFS_OP_LOOKUP 0xFF000002 +#define PVFS2_VFS_OP_CREATE 0xFF000003 +#define PVFS2_VFS_OP_GETATTR 0xFF000004 +#define PVFS2_VFS_OP_REMOVE 0xFF000005 +#define PVFS2_VFS_OP_MKDIR 0xFF000006 +#define PVFS2_VFS_OP_READDIR 0xFF000007 +#define PVFS2_VFS_OP_SETATTR 0xFF000008 +#define PVFS2_VFS_OP_SYMLINK 0xFF000009 +#define PVFS2_VFS_OP_RENAME 0xFF00000A +#define PVFS2_VFS_OP_STATFS 0xFF00000B +#define PVFS2_VFS_OP_TRUNCATE 0xFF00000C +#define PVFS2_VFS_OP_MMAP_RA_FLUSH 0xFF00000D +#define PVFS2_VFS_OP_FS_MOUNT 0xFF00000E +#define PVFS2_VFS_OP_FS_UMOUNT 0xFF00000F +#define PVFS2_VFS_OP_GETXATTR 0xFF000010 +#define PVFS2_VFS_OP_SETXATTR 0xFF000011 +#define PVFS2_VFS_OP_LISTXATTR 0xFF000012 +#define PVFS2_VFS_OP_REMOVEXATTR 0xFF000013 +#define PVFS2_VFS_OP_PARAM 0xFF000014 +#define PVFS2_VFS_OP_PERF_COUNT 0xFF000015 +#define PVFS2_VFS_OP_CANCEL 0xFF00EE00 +#define PVFS2_VFS_OP_FSYNC 0xFF00EE01 +#define PVFS2_VFS_OP_FSKEY 0xFF00EE02 +#define PVFS2_VFS_OP_READDIRPLUS 0xFF00EE03 +#define PVFS2_VFS_OP_FILE_IOX 0xFF00EE04 + +/* + * Misc constants. Please retain them as multiples of 8! + * Otherwise 32-64 bit interactions will be messed up :) + */ +#define PVFS2_NAME_LEN 0x00000100 +#define PVFS2_MAX_DEBUG_STRING_LEN 0x00000400 +#define PVFS2_MAX_DEBUG_ARRAY_LEN 0x00000800 + +/* + * MAX_DIRENT_COUNT cannot be larger than PVFS_REQ_LIMIT_LISTATTR. + * The value of PVFS_REQ_LIMIT_LISTATTR has been changed from 113 to 60 + * to accomodate an attribute object with mirrored handles. + * MAX_DIRENT_COUNT is replaced by MAX_DIRENT_COUNT_READDIR and + * MAX_DIRENT_COUNT_READDIRPLUS, since readdir doesn't trigger a listattr + * but readdirplus might. +*/ +#define MAX_DIRENT_COUNT_READDIR 0x00000060 +#define MAX_DIRENT_COUNT_READDIRPLUS 0x0000003C + +#include "upcall.h" +#include "downcall.h" + +/* + * These macros differ from proto macros in that they don't do any + * byte-swappings and are used to ensure that kernel-clientcore interactions + * don't cause any unaligned accesses etc on 64 bit machines + */ +#ifndef roundup4 +#define roundup4(x) (((x)+3) & ~3) +#endif + +#ifndef roundup8 +#define roundup8(x) (((x)+7) & ~7) +#endif + +/* strings; decoding just points into existing character data */ +#define enc_string(pptr, pbuf) do { \ + __u32 len = strlen(*pbuf); \ + *(__u32 *) *(pptr) = (len); \ + memcpy(*(pptr)+4, *pbuf, len+1); \ + *(pptr) += roundup8(4 + len + 1); \ +} while (0) + +#define dec_string(pptr, pbuf, plen) do { \ + __u32 len = (*(__u32 *) *(pptr)); \ + *pbuf = *(pptr) + 4; \ + *(pptr) += roundup8(4 + len + 1); \ + if (plen) \ + *plen = len;\ +} while (0) + +struct read_write_x { + __s64 off; + __s64 len; +}; + +#endif diff --git a/fs/orangefs/pvfs2-kernel.h b/fs/orangefs/pvfs2-kernel.h new file mode 100644 index 000000000000..6c787c4797d0 --- /dev/null +++ b/fs/orangefs/pvfs2-kernel.h @@ -0,0 +1,864 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* + * The PVFS2 Linux kernel support allows PVFS2 volumes to be mounted and + * accessed through the Linux VFS (i.e. using standard I/O system calls). + * This support is only needed on clients that wish to mount the file system. + * + */ + +/* + * Declarations and macros for the PVFS2 Linux kernel support. + */ + +#ifndef __PVFS2KERNEL_H +#define __PVFS2KERNEL_H + +#include <linux/kernel.h> +#include <linux/moduleparam.h> +#include <linux/statfs.h> +#include <linux/backing-dev.h> +#include <linux/device.h> +#include <linux/mpage.h> +#include <linux/namei.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/fs.h> +#include <linux/vmalloc.h> + +#include <linux/aio.h> +#include <linux/posix_acl.h> +#include <linux/posix_acl_xattr.h> +#include <linux/compat.h> +#include <linux/mount.h> +#include <linux/uaccess.h> +#include <linux/atomic.h> +#include <linux/uio.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/wait.h> +#include <linux/dcache.h> +#include <linux/pagemap.h> +#include <linux/poll.h> +#include <linux/rwsem.h> +#include <linux/xattr.h> +#include <linux/exportfs.h> + +#include <asm/unaligned.h> + +#include "pvfs2-dev-proto.h" + +#ifdef PVFS2_KERNEL_DEBUG +#define PVFS2_DEFAULT_OP_TIMEOUT_SECS 10 +#else +#define PVFS2_DEFAULT_OP_TIMEOUT_SECS 20 +#endif + +#define PVFS2_BUFMAP_WAIT_TIMEOUT_SECS 30 + +#define PVFS2_DEFAULT_SLOT_TIMEOUT_SECS 900 /* 15 minutes */ + +#define PVFS2_REQDEVICE_NAME "pvfs2-req" + +#define PVFS2_DEVREQ_MAGIC 0x20030529 +#define PVFS2_LINK_MAX 0x000000FF +#define PVFS2_PURGE_RETRY_COUNT 0x00000005 +#define PVFS2_SEEK_END 0x00000002 +#define PVFS2_MAX_NUM_OPTIONS 0x00000004 +#define PVFS2_MAX_MOUNT_OPT_LEN 0x00000080 +#define PVFS2_MAX_FSKEY_LEN 64 + +#define MAX_DEV_REQ_UPSIZE (2*sizeof(__s32) + \ +sizeof(__u64) + sizeof(struct pvfs2_upcall_s)) +#define MAX_DEV_REQ_DOWNSIZE (2*sizeof(__s32) + \ +sizeof(__u64) + sizeof(struct pvfs2_downcall_s)) + +#define BITS_PER_LONG_DIV_8 (BITS_PER_LONG >> 3) + +/* borrowed from irda.h */ +#ifndef MSECS_TO_JIFFIES +#define MSECS_TO_JIFFIES(ms) (((ms)*HZ+999)/1000) +#endif + +#define MAX_ALIGNED_DEV_REQ_UPSIZE \ + (MAX_DEV_REQ_UPSIZE + \ + ((((MAX_DEV_REQ_UPSIZE / \ + (BITS_PER_LONG_DIV_8)) * \ + (BITS_PER_LONG_DIV_8)) + \ + (BITS_PER_LONG_DIV_8)) - \ + MAX_DEV_REQ_UPSIZE)) + +#define MAX_ALIGNED_DEV_REQ_DOWNSIZE \ + (MAX_DEV_REQ_DOWNSIZE + \ + ((((MAX_DEV_REQ_DOWNSIZE / \ + (BITS_PER_LONG_DIV_8)) * \ + (BITS_PER_LONG_DIV_8)) + \ + (BITS_PER_LONG_DIV_8)) - \ + MAX_DEV_REQ_DOWNSIZE)) + +/* + * valid pvfs2 kernel operation states + * + * unknown - op was just initialized + * waiting - op is on request_list (upward bound) + * inprogr - op is in progress (waiting for downcall) + * serviced - op has matching downcall; ok + * purged - op has to start a timer since client-core + * exited uncleanly before servicing op + */ +enum pvfs2_vfs_op_states { + OP_VFS_STATE_UNKNOWN = 0, + OP_VFS_STATE_WAITING = 1, + OP_VFS_STATE_INPROGR = 2, + OP_VFS_STATE_SERVICED = 4, + OP_VFS_STATE_PURGED = 8, +}; + +#define set_op_state_waiting(op) ((op)->op_state = OP_VFS_STATE_WAITING) +#define set_op_state_inprogress(op) ((op)->op_state = OP_VFS_STATE_INPROGR) +#define set_op_state_serviced(op) ((op)->op_state = OP_VFS_STATE_SERVICED) +#define set_op_state_purged(op) ((op)->op_state |= OP_VFS_STATE_PURGED) + +#define op_state_waiting(op) ((op)->op_state & OP_VFS_STATE_WAITING) +#define op_state_in_progress(op) ((op)->op_state & OP_VFS_STATE_INPROGR) +#define op_state_serviced(op) ((op)->op_state & OP_VFS_STATE_SERVICED) +#define op_state_purged(op) ((op)->op_state & OP_VFS_STATE_PURGED) + +#define get_op(op) \ + do { \ + atomic_inc(&(op)->aio_ref_count); \ + gossip_debug(GOSSIP_DEV_DEBUG, \ + "(get) Alloced OP (%p:%llu)\n", \ + op, \ + llu((op)->tag)); \ + } while (0) + +#define put_op(op) \ + do { \ + if (atomic_sub_and_test(1, &(op)->aio_ref_count) == 1) { \ + gossip_debug(GOSSIP_DEV_DEBUG, \ + "(put) Releasing OP (%p:%llu)\n", \ + op, \ + llu((op)->tag)); \ + op_release(op); \ + } \ + } while (0) + +#define op_wait(op) (atomic_read(&(op)->aio_ref_count) <= 2 ? 0 : 1) + +/* + * Defines for controlling whether I/O upcalls are for async or sync operations + */ +enum PVFS_async_io_type { + PVFS_VFS_SYNC_IO = 0, + PVFS_VFS_ASYNC_IO = 1, +}; + +/* + * An array of client_debug_mask will be built to hold debug keyword/mask + * values fetched from userspace. + */ +struct client_debug_mask { + char *keyword; + __u64 mask1; + __u64 mask2; +}; + +/* + * pvfs2 kernel memory related flags + */ + +#if ((defined PVFS2_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) +#define PVFS2_CACHE_CREATE_FLAGS SLAB_RED_ZONE +#else +#define PVFS2_CACHE_CREATE_FLAGS 0 +#endif /* ((defined PVFS2_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) */ + +#define PVFS2_CACHE_ALLOC_FLAGS (GFP_KERNEL) +#define PVFS2_GFP_FLAGS (GFP_KERNEL) +#define PVFS2_BUFMAP_GFP_FLAGS (GFP_KERNEL) + +#define pvfs2_kmap(page) kmap(page) +#define pvfs2_kunmap(page) kunmap(page) + +/* pvfs2 xattr and acl related defines */ +#define PVFS2_XATTR_INDEX_POSIX_ACL_ACCESS 1 +#define PVFS2_XATTR_INDEX_POSIX_ACL_DEFAULT 2 +#define PVFS2_XATTR_INDEX_TRUSTED 3 +#define PVFS2_XATTR_INDEX_DEFAULT 4 + +#if 0 +#ifndef POSIX_ACL_XATTR_ACCESS +#define POSIX_ACL_XATTR_ACCESS "system.posix_acl_access" +#endif +#ifndef POSIX_ACL_XATTR_DEFAULT +#define POSIX_ACL_XATTR_DEFAULT "system.posix_acl_default" +#endif +#endif + +#define PVFS2_XATTR_NAME_ACL_ACCESS POSIX_ACL_XATTR_ACCESS +#define PVFS2_XATTR_NAME_ACL_DEFAULT POSIX_ACL_XATTR_DEFAULT +#define PVFS2_XATTR_NAME_TRUSTED_PREFIX "trusted." +#define PVFS2_XATTR_NAME_DEFAULT_PREFIX "" + +/* these functions are defined in pvfs2-utils.c */ +int orangefs_prepare_cdm_array(char *debug_array_string); +int orangefs_prepare_debugfs_help_string(int); + +/* defined in pvfs2-debugfs.c */ +int pvfs2_client_debug_init(void); + +void debug_string_to_mask(char *, void *, int); +void do_c_mask(int, char *, struct client_debug_mask **); +void do_k_mask(int, char *, __u64 **); + +void debug_mask_to_string(void *, int); +void do_k_string(void *, int); +void do_c_string(void *, int); +int check_amalgam_keyword(void *, int); +int keyword_is_amalgam(char *); + +/*these variables are defined in pvfs2-mod.c */ +extern char kernel_debug_string[PVFS2_MAX_DEBUG_STRING_LEN]; +extern char client_debug_string[PVFS2_MAX_DEBUG_STRING_LEN]; +extern char client_debug_array_string[PVFS2_MAX_DEBUG_STRING_LEN]; +/* HELLO +extern struct client_debug_mask current_client_mask; +*/ +extern unsigned int kernel_mask_set_mod_init; + +extern int pvfs2_init_acl(struct inode *inode, struct inode *dir); +extern const struct xattr_handler *pvfs2_xattr_handlers[]; + +extern struct posix_acl *pvfs2_get_acl(struct inode *inode, int type); +extern int pvfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type); + +int pvfs2_xattr_set_default(struct dentry *dentry, + const char *name, + const void *buffer, + size_t size, + int flags, + int handler_flags); + +int pvfs2_xattr_get_default(struct dentry *dentry, + const char *name, + void *buffer, + size_t size, + int handler_flags); + +/* + * Redefine xtvec structure so that we could move helper functions out of + * the define + */ +struct xtvec { + __kernel_off_t xtv_off; /* must be off_t */ + __kernel_size_t xtv_len; /* must be size_t */ +}; + +/* + * pvfs2 data structures + */ +struct pvfs2_kernel_op_s { + enum pvfs2_vfs_op_states op_state; + __u64 tag; + + /* + * Set uses_shared_memory to 1 if this operation uses shared memory. + * If true, then a retry on the op must also get a new shared memory + * buffer and re-populate it. + */ + int uses_shared_memory; + + struct pvfs2_upcall_s upcall; + struct pvfs2_downcall_s downcall; + + wait_queue_head_t waitq; + spinlock_t lock; + + int io_completed; + wait_queue_head_t io_completion_waitq; + + /* + * upcalls requiring variable length trailers require that this struct + * be in the request list even after client-core does a read() on the + * device to dequeue the upcall. + * if op_linger field goes to 0, we dequeue this op off the list. + * else we let it stay. What gets passed to the read() is + * a) if op_linger field is = 1, pvfs2_kernel_op_s itself + * b) else if = 0, we pass ->upcall.trailer_buf + * We expect to have only a single upcall trailer buffer, + * so we expect callers with trailers + * to set this field to 2 and others to set it to 1. + */ + __s32 op_linger, op_linger_tmp; + /* VFS aio fields */ + + /* used by the async I/O code to stash the pvfs2_kiocb_s structure */ + void *priv; + + /* used again for the async I/O code for deallocation */ + atomic_t aio_ref_count; + + int attempts; + + struct list_head list; +}; + +/* per inode private pvfs2 info */ +struct pvfs2_inode_s { + struct pvfs2_object_kref refn; + char link_target[PVFS_NAME_MAX]; + __s64 blksize; + /* + * Reading/Writing Extended attributes need to acquire the appropriate + * reader/writer semaphore on the pvfs2_inode_s structure. + */ + struct rw_semaphore xattr_sem; + + struct inode vfs_inode; + sector_t last_failed_block_index_read; + + /* + * State of in-memory attributes not yet flushed to disk associated + * with this object + */ + unsigned long pinode_flags; + + /* All allocated pvfs2_inode_s objects are chained to a list */ + struct list_head list; +}; + +#define P_ATIME_FLAG 0 +#define P_MTIME_FLAG 1 +#define P_CTIME_FLAG 2 +#define P_MODE_FLAG 3 + +#define ClearAtimeFlag(pinode) clear_bit(P_ATIME_FLAG, &(pinode)->pinode_flags) +#define SetAtimeFlag(pinode) set_bit(P_ATIME_FLAG, &(pinode)->pinode_flags) +#define AtimeFlag(pinode) test_bit(P_ATIME_FLAG, &(pinode)->pinode_flags) + +#define ClearMtimeFlag(pinode) clear_bit(P_MTIME_FLAG, &(pinode)->pinode_flags) +#define SetMtimeFlag(pinode) set_bit(P_MTIME_FLAG, &(pinode)->pinode_flags) +#define MtimeFlag(pinode) test_bit(P_MTIME_FLAG, &(pinode)->pinode_flags) + +#define ClearCtimeFlag(pinode) clear_bit(P_CTIME_FLAG, &(pinode)->pinode_flags) +#define SetCtimeFlag(pinode) set_bit(P_CTIME_FLAG, &(pinode)->pinode_flags) +#define CtimeFlag(pinode) test_bit(P_CTIME_FLAG, &(pinode)->pinode_flags) + +#define ClearModeFlag(pinode) clear_bit(P_MODE_FLAG, &(pinode)->pinode_flags) +#define SetModeFlag(pinode) set_bit(P_MODE_FLAG, &(pinode)->pinode_flags) +#define ModeFlag(pinode) test_bit(P_MODE_FLAG, &(pinode)->pinode_flags) + +/* per superblock private pvfs2 info */ +struct pvfs2_sb_info_s { + struct pvfs2_khandle root_khandle; + __s32 fs_id; + int id; + int flags; +#define PVFS2_OPT_INTR 0x01 +#define PVFS2_OPT_LOCAL_LOCK 0x02 + char devname[PVFS_MAX_SERVER_ADDR_LEN]; + struct super_block *sb; + int mount_pending; + struct list_head list; +}; + +/* + * a temporary structure used only for sb mount time that groups the + * mount time data provided along with a private superblock structure + * that is allocated before a 'kernel' superblock is allocated. +*/ +struct pvfs2_mount_sb_info_s { + void *data; + struct pvfs2_khandle root_khandle; + __s32 fs_id; + int id; +}; + +/* + * structure that holds the state of any async I/O operation issued + * through the VFS. Needed especially to handle cancellation requests + * or even completion notification so that the VFS client-side daemon + * can free up its vfs_request slots. + */ +struct pvfs2_kiocb_s { + /* the pointer to the task that initiated the AIO */ + struct task_struct *tsk; + + /* pointer to the kiocb that kicked this operation */ + struct kiocb *kiocb; + + /* buffer index that was used for the I/O */ + struct pvfs2_bufmap *bufmap; + int buffer_index; + + /* pvfs2 kernel operation type */ + struct pvfs2_kernel_op_s *op; + + /* The user space buffers from/to which I/O is being staged */ + struct iovec *iov; + + /* number of elements in the iovector */ + unsigned long nr_segs; + + /* set to indicate the type of the operation */ + int rw; + + /* file offset */ + loff_t offset; + + /* and the count in bytes */ + size_t bytes_to_be_copied; + + ssize_t bytes_copied; + int needs_cleanup; +}; + +struct pvfs2_stats { + unsigned long cache_hits; + unsigned long cache_misses; + unsigned long reads; + unsigned long writes; +}; + +extern struct pvfs2_stats g_pvfs2_stats; + +/* + NOTE: See Documentation/filesystems/porting for information + on implementing FOO_I and properly accessing fs private data +*/ +static inline struct pvfs2_inode_s *PVFS2_I(struct inode *inode) +{ + return container_of(inode, struct pvfs2_inode_s, vfs_inode); +} + +static inline struct pvfs2_sb_info_s *PVFS2_SB(struct super_block *sb) +{ + return (struct pvfs2_sb_info_s *) sb->s_fs_info; +} + +/* ino_t descends from "unsigned long", 8 bytes, 64 bits. */ +static inline ino_t pvfs2_khandle_to_ino(struct pvfs2_khandle *khandle) +{ + union { + unsigned char u[8]; + __u64 ino; + } ihandle; + + ihandle.u[0] = khandle->u[0] ^ khandle->u[4]; + ihandle.u[1] = khandle->u[1] ^ khandle->u[5]; + ihandle.u[2] = khandle->u[2] ^ khandle->u[6]; + ihandle.u[3] = khandle->u[3] ^ khandle->u[7]; + ihandle.u[4] = khandle->u[12] ^ khandle->u[8]; + ihandle.u[5] = khandle->u[13] ^ khandle->u[9]; + ihandle.u[6] = khandle->u[14] ^ khandle->u[10]; + ihandle.u[7] = khandle->u[15] ^ khandle->u[11]; + + return ihandle.ino; +} + +static inline struct pvfs2_khandle *get_khandle_from_ino(struct inode *inode) +{ + return &(PVFS2_I(inode)->refn.khandle); +} + +static inline __s32 get_fsid_from_ino(struct inode *inode) +{ + return PVFS2_I(inode)->refn.fs_id; +} + +static inline ino_t get_ino_from_khandle(struct inode *inode) +{ + struct pvfs2_khandle *khandle; + ino_t ino; + + khandle = get_khandle_from_ino(inode); + ino = pvfs2_khandle_to_ino(khandle); + return ino; +} + +static inline ino_t get_parent_ino_from_dentry(struct dentry *dentry) +{ + return get_ino_from_khandle(dentry->d_parent->d_inode); +} + +static inline int is_root_handle(struct inode *inode) +{ + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s: root handle: %pU, this handle: %pU:\n", + __func__, + &PVFS2_SB(inode->i_sb)->root_khandle, + get_khandle_from_ino(inode)); + + if (PVFS_khandle_cmp(&(PVFS2_SB(inode->i_sb)->root_khandle), + get_khandle_from_ino(inode))) + return 0; + else + return 1; +} + +static inline int match_handle(struct pvfs2_khandle resp_handle, + struct inode *inode) +{ + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s: one handle: %pU, another handle:%pU:\n", + __func__, + &resp_handle, + get_khandle_from_ino(inode)); + + if (PVFS_khandle_cmp(&resp_handle, get_khandle_from_ino(inode))) + return 0; + else + return 1; +} + +/* + * defined in pvfs2-cache.c + */ +int op_cache_initialize(void); +int op_cache_finalize(void); +struct pvfs2_kernel_op_s *op_alloc(__s32 type); +struct pvfs2_kernel_op_s *op_alloc_trailer(__s32 type); +char *get_opname_string(struct pvfs2_kernel_op_s *new_op); +void op_release(struct pvfs2_kernel_op_s *op); + +int dev_req_cache_initialize(void); +int dev_req_cache_finalize(void); +void *dev_req_alloc(void); +void dev_req_release(void *); + +int pvfs2_inode_cache_initialize(void); +int pvfs2_inode_cache_finalize(void); + +int kiocb_cache_initialize(void); +int kiocb_cache_finalize(void); +struct pvfs2_kiocb_s *kiocb_alloc(void); +void kiocb_release(struct pvfs2_kiocb_s *ptr); + +/* + * defined in pvfs2-mod.c + */ +void purge_inprogress_ops(void); + +/* + * defined in waitqueue.c + */ +int wait_for_matching_downcall(struct pvfs2_kernel_op_s *op); +int wait_for_cancellation_downcall(struct pvfs2_kernel_op_s *op); +void pvfs2_clean_up_interrupted_operation(struct pvfs2_kernel_op_s *op); +void purge_waiting_ops(void); + +/* + * defined in super.c + */ +struct dentry *pvfs2_mount(struct file_system_type *fst, + int flags, + const char *devname, + void *data); + +void pvfs2_kill_sb(struct super_block *sb); +int pvfs2_remount(struct super_block *sb); + +int fsid_key_table_initialize(void); +void fsid_key_table_finalize(void); + +/* + * defined in inode.c + */ +__u32 convert_to_pvfs2_mask(unsigned long lite_mask); +struct inode *pvfs2_new_inode(struct super_block *sb, + struct inode *dir, + int mode, + dev_t dev, + struct pvfs2_object_kref *ref); + +int pvfs2_setattr(struct dentry *dentry, struct iattr *iattr); + +int pvfs2_getattr(struct vfsmount *mnt, + struct dentry *dentry, + struct kstat *kstat); + +/* + * defined in xattr.c + */ +int pvfs2_setxattr(struct dentry *dentry, + const char *name, + const void *value, + size_t size, + int flags); + +ssize_t pvfs2_getxattr(struct dentry *dentry, + const char *name, + void *buffer, + size_t size); + +ssize_t pvfs2_listxattr(struct dentry *dentry, char *buffer, size_t size); + +/* + * defined in namei.c + */ +struct inode *pvfs2_iget(struct super_block *sb, + struct pvfs2_object_kref *ref); + +ssize_t pvfs2_inode_read(struct inode *inode, + char *buf, + size_t count, + loff_t *offset, + loff_t readahead_size); + +/* + * defined in devpvfs2-req.c + */ +int pvfs2_dev_init(void); +void pvfs2_dev_cleanup(void); +int is_daemon_in_service(void); +int fs_mount_pending(__s32 fsid); + +/* + * defined in pvfs2-utils.c + */ +__s32 fsid_of_op(struct pvfs2_kernel_op_s *op); + +int pvfs2_flush_inode(struct inode *inode); + +ssize_t pvfs2_inode_getxattr(struct inode *inode, + const char *prefix, + const char *name, + void *buffer, + size_t size); + +int pvfs2_inode_setxattr(struct inode *inode, + const char *prefix, + const char *name, + const void *value, + size_t size, + int flags); + +int pvfs2_inode_getattr(struct inode *inode, __u32 mask); + +int pvfs2_inode_setattr(struct inode *inode, struct iattr *iattr); + +void pvfs2_op_initialize(struct pvfs2_kernel_op_s *op); + +void pvfs2_make_bad_inode(struct inode *inode); + +void mask_blocked_signals(sigset_t *orig_sigset); + +void unmask_blocked_signals(sigset_t *orig_sigset); + +int pvfs2_unmount_sb(struct super_block *sb); + +int pvfs2_cancel_op_in_progress(__u64 tag); + +__u64 pvfs2_convert_time_field(void *time_ptr); + +int pvfs2_normalize_to_errno(__s32 error_code); + +extern struct mutex devreq_mutex; +extern struct mutex request_mutex; +extern int debug; +extern int op_timeout_secs; +extern int slot_timeout_secs; +extern struct list_head pvfs2_superblocks; +extern spinlock_t pvfs2_superblocks_lock; +extern struct list_head pvfs2_request_list; +extern spinlock_t pvfs2_request_list_lock; +extern wait_queue_head_t pvfs2_request_list_waitq; +extern struct list_head *htable_ops_in_progress; +extern spinlock_t htable_ops_in_progress_lock; +extern int hash_table_size; + +extern const struct address_space_operations pvfs2_address_operations; +extern struct backing_dev_info pvfs2_backing_dev_info; +extern struct inode_operations pvfs2_file_inode_operations; +extern const struct file_operations pvfs2_file_operations; +extern struct inode_operations pvfs2_symlink_inode_operations; +extern struct inode_operations pvfs2_dir_inode_operations; +extern const struct file_operations pvfs2_dir_operations; +extern const struct dentry_operations pvfs2_dentry_operations; +extern const struct file_operations pvfs2_devreq_file_operations; + +extern wait_queue_head_t pvfs2_bufmap_init_waitq; + +/* + * misc convenience macros + */ +#define add_op_to_request_list(op) \ +do { \ + spin_lock(&pvfs2_request_list_lock); \ + spin_lock(&op->lock); \ + set_op_state_waiting(op); \ + list_add_tail(&op->list, &pvfs2_request_list); \ + spin_unlock(&pvfs2_request_list_lock); \ + spin_unlock(&op->lock); \ + wake_up_interruptible(&pvfs2_request_list_waitq); \ +} while (0) + +#define add_priority_op_to_request_list(op) \ + do { \ + spin_lock(&pvfs2_request_list_lock); \ + spin_lock(&op->lock); \ + set_op_state_waiting(op); \ + \ + list_add(&op->list, &pvfs2_request_list); \ + spin_unlock(&pvfs2_request_list_lock); \ + spin_unlock(&op->lock); \ + wake_up_interruptible(&pvfs2_request_list_waitq); \ +} while (0) + +#define remove_op_from_request_list(op) \ + do { \ + struct list_head *tmp = NULL; \ + struct list_head *tmp_safe = NULL; \ + struct pvfs2_kernel_op_s *tmp_op = NULL; \ + \ + spin_lock(&pvfs2_request_list_lock); \ + list_for_each_safe(tmp, tmp_safe, &pvfs2_request_list) { \ + tmp_op = list_entry(tmp, \ + struct pvfs2_kernel_op_s, \ + list); \ + if (tmp_op && (tmp_op == op)) { \ + list_del(&tmp_op->list); \ + break; \ + } \ + } \ + spin_unlock(&pvfs2_request_list_lock); \ + } while (0) + +#define PVFS2_OP_INTERRUPTIBLE 1 /* service_operation() is interruptible */ +#define PVFS2_OP_PRIORITY 2 /* service_operation() is high priority */ +#define PVFS2_OP_CANCELLATION 4 /* this is a cancellation */ +#define PVFS2_OP_NO_SEMAPHORE 8 /* don't acquire semaphore */ +#define PVFS2_OP_ASYNC 16 /* Queue it, but don't wait */ + +int service_operation(struct pvfs2_kernel_op_s *op, + const char *op_name, + int flags); + +/* + * handles two possible error cases, depending on context. + * + * by design, our vfs i/o errors need to be handled in one of two ways, + * depending on where the error occured. + * + * if the error happens in the waitqueue code because we either timed + * out or a signal was raised while waiting, we need to cancel the + * userspace i/o operation and free the op manually. this is done to + * avoid having the device start writing application data to our shared + * bufmap pages without us expecting it. + * + * FIXME: POSSIBLE OPTIMIZATION: + * However, if we timed out or if we got a signal AND our upcall was never + * picked off the queue (i.e. we were in OP_VFS_STATE_WAITING), then we don't + * need to send a cancellation upcall. The way we can handle this is + * set error_exit to 2 in such cases and 1 whenever cancellation has to be + * sent and have handle_error + * take care of this situation as well.. + * + * if a pvfs2 sysint level error occured and i/o has been completed, + * there is no need to cancel the operation, as the user has finished + * using the bufmap page and so there is no danger in this case. in + * this case, we wake up the device normally so that it may free the + * op, as normal. + * + * note the only reason this is a macro is because both read and write + * cases need the exact same handling code. + */ +#define handle_io_error() \ +do { \ + if (!op_state_serviced(new_op)) { \ + pvfs2_cancel_op_in_progress(new_op->tag); \ + op_release(new_op); \ + } else { \ + wake_up_daemon_for_return(new_op); \ + } \ + new_op = NULL; \ + pvfs_bufmap_put(bufmap, buffer_index); \ + buffer_index = -1; \ +} while (0) + +#define get_interruptible_flag(inode) \ + ((PVFS2_SB(inode->i_sb)->flags & PVFS2_OPT_INTR) ? \ + PVFS2_OP_INTERRUPTIBLE : 0) + +#define add_pvfs2_sb(sb) \ +do { \ + gossip_debug(GOSSIP_SUPER_DEBUG, \ + "Adding SB %p to pvfs2 superblocks\n", \ + PVFS2_SB(sb)); \ + spin_lock(&pvfs2_superblocks_lock); \ + list_add_tail(&PVFS2_SB(sb)->list, &pvfs2_superblocks); \ + spin_unlock(&pvfs2_superblocks_lock); \ +} while (0) + +#define remove_pvfs2_sb(sb) \ +do { \ + struct list_head *tmp = NULL; \ + struct list_head *tmp_safe = NULL; \ + struct pvfs2_sb_info_s *pvfs2_sb = NULL; \ + \ + spin_lock(&pvfs2_superblocks_lock); \ + list_for_each_safe(tmp, tmp_safe, &pvfs2_superblocks) { \ + pvfs2_sb = list_entry(tmp, \ + struct pvfs2_sb_info_s, \ + list); \ + if (pvfs2_sb && (pvfs2_sb->sb == sb)) { \ + gossip_debug(GOSSIP_SUPER_DEBUG, \ + "Removing SB %p from pvfs2 superblocks\n", \ + pvfs2_sb); \ + list_del(&pvfs2_sb->list); \ + break; \ + } \ + } \ + spin_unlock(&pvfs2_superblocks_lock); \ +} while (0) + +#define pvfs2_lock_inode(inode) spin_lock(&inode->i_lock) +#define pvfs2_unlock_inode(inode) spin_unlock(&inode->i_lock) +#define pvfs2_current_signal_lock current->sighand->siglock +#define pvfs2_current_sigaction current->sighand->action + +#define fill_default_sys_attrs(sys_attr, type, mode) \ +do { \ + sys_attr.owner = from_kuid(current_user_ns(), current_fsuid()); \ + sys_attr.group = from_kgid(current_user_ns(), current_fsgid()); \ + sys_attr.size = 0; \ + sys_attr.perms = PVFS_util_translate_mode(mode); \ + sys_attr.objtype = type; \ + sys_attr.mask = PVFS_ATTR_SYS_ALL_SETABLE; \ +} while (0) + +#define pvfs2_inode_lock(__i) mutex_lock(&(__i)->i_mutex) + +#define pvfs2_inode_unlock(__i) mutex_unlock(&(__i)->i_mutex) + +static inline void pvfs2_i_size_write(struct inode *inode, loff_t i_size) +{ +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) + pvfs2_inode_lock(inode); +#endif + i_size_write(inode, i_size); +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) + pvfs2_inode_unlock(inode); +#endif +} + +static inline unsigned int diff(struct timeval *end, struct timeval *begin) +{ + if (end->tv_usec < begin->tv_usec) { + end->tv_usec += 1000000; + end->tv_sec--; + } + end->tv_sec -= begin->tv_sec; + end->tv_usec -= begin->tv_usec; + return (end->tv_sec * 1000000) + end->tv_usec; +} + +#endif /* __PVFS2KERNEL_H */ diff --git a/fs/orangefs/pvfs2-sysfs.h b/fs/orangefs/pvfs2-sysfs.h new file mode 100644 index 000000000000..f0b76382db02 --- /dev/null +++ b/fs/orangefs/pvfs2-sysfs.h @@ -0,0 +1,2 @@ +extern int orangefs_sysfs_init(void); +extern void orangefs_sysfs_exit(void); diff --git a/fs/orangefs/upcall.h b/fs/orangefs/upcall.h new file mode 100644 index 000000000000..1e07f626aac6 --- /dev/null +++ b/fs/orangefs/upcall.h @@ -0,0 +1,255 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef __UPCALL_H +#define __UPCALL_H + +/* + * Sanitized this header file to fix + * 32-64 bit interaction issues between + * client-core and device + */ +struct pvfs2_io_request_s { + __s32 async_vfs_io; + __s32 buf_index; + __s32 count; + __s32 __pad1; + __s64 offset; + struct pvfs2_object_kref refn; + enum PVFS_io_type io_type; + __s32 readahead_size; +}; + +struct pvfs2_iox_request_s { + __s32 buf_index; + __s32 count; + struct pvfs2_object_kref refn; + enum PVFS_io_type io_type; + __s32 __pad1; +}; + +struct pvfs2_lookup_request_s { + __s32 sym_follow; + __s32 __pad1; + struct pvfs2_object_kref parent_refn; + char d_name[PVFS2_NAME_LEN]; +}; + +struct pvfs2_create_request_s { + struct pvfs2_object_kref parent_refn; + struct PVFS_sys_attr_s attributes; + char d_name[PVFS2_NAME_LEN]; +}; + +struct pvfs2_symlink_request_s { + struct pvfs2_object_kref parent_refn; + struct PVFS_sys_attr_s attributes; + char entry_name[PVFS2_NAME_LEN]; + char target[PVFS2_NAME_LEN]; +}; + +struct pvfs2_getattr_request_s { + struct pvfs2_object_kref refn; + __u32 mask; + __u32 __pad1; +}; + +struct pvfs2_setattr_request_s { + struct pvfs2_object_kref refn; + struct PVFS_sys_attr_s attributes; +}; + +struct pvfs2_remove_request_s { + struct pvfs2_object_kref parent_refn; + char d_name[PVFS2_NAME_LEN]; +}; + +struct pvfs2_mkdir_request_s { + struct pvfs2_object_kref parent_refn; + struct PVFS_sys_attr_s attributes; + char d_name[PVFS2_NAME_LEN]; +}; + +struct pvfs2_readdir_request_s { + struct pvfs2_object_kref refn; + __u64 token; + __s32 max_dirent_count; + __s32 buf_index; +}; + +struct pvfs2_readdirplus_request_s { + struct pvfs2_object_kref refn; + __u64 token; + __s32 max_dirent_count; + __u32 mask; + __s32 buf_index; + __s32 __pad1; +}; + +struct pvfs2_rename_request_s { + struct pvfs2_object_kref old_parent_refn; + struct pvfs2_object_kref new_parent_refn; + char d_old_name[PVFS2_NAME_LEN]; + char d_new_name[PVFS2_NAME_LEN]; +}; + +struct pvfs2_statfs_request_s { + __s32 fs_id; + __s32 __pad1; +}; + +struct pvfs2_truncate_request_s { + struct pvfs2_object_kref refn; + __s64 size; +}; + +struct pvfs2_mmap_ra_cache_flush_request_s { + struct pvfs2_object_kref refn; +}; + +struct pvfs2_fs_mount_request_s { + char pvfs2_config_server[PVFS_MAX_SERVER_ADDR_LEN]; +}; + +struct pvfs2_fs_umount_request_s { + __s32 id; + __s32 fs_id; + char pvfs2_config_server[PVFS_MAX_SERVER_ADDR_LEN]; +}; + +struct pvfs2_getxattr_request_s { + struct pvfs2_object_kref refn; + __s32 key_sz; + __s32 __pad1; + char key[PVFS_MAX_XATTR_NAMELEN]; +}; + +struct pvfs2_setxattr_request_s { + struct pvfs2_object_kref refn; + struct PVFS_keyval_pair keyval; + __s32 flags; + __s32 __pad1; +}; + +struct pvfs2_listxattr_request_s { + struct pvfs2_object_kref refn; + __s32 requested_count; + __s32 __pad1; + __u64 token; +}; + +struct pvfs2_removexattr_request_s { + struct pvfs2_object_kref refn; + __s32 key_sz; + __s32 __pad1; + char key[PVFS_MAX_XATTR_NAMELEN]; +}; + +struct pvfs2_op_cancel_s { + __u64 op_tag; +}; + +struct pvfs2_fsync_request_s { + struct pvfs2_object_kref refn; +}; + +enum pvfs2_param_request_type { + PVFS2_PARAM_REQUEST_SET = 1, + PVFS2_PARAM_REQUEST_GET = 2 +}; + +enum pvfs2_param_request_op { + PVFS2_PARAM_REQUEST_OP_ACACHE_TIMEOUT_MSECS = 1, + PVFS2_PARAM_REQUEST_OP_ACACHE_HARD_LIMIT = 2, + PVFS2_PARAM_REQUEST_OP_ACACHE_SOFT_LIMIT = 3, + PVFS2_PARAM_REQUEST_OP_ACACHE_RECLAIM_PERCENTAGE = 4, + PVFS2_PARAM_REQUEST_OP_PERF_TIME_INTERVAL_SECS = 5, + PVFS2_PARAM_REQUEST_OP_PERF_HISTORY_SIZE = 6, + PVFS2_PARAM_REQUEST_OP_PERF_RESET = 7, + PVFS2_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS = 8, + PVFS2_PARAM_REQUEST_OP_NCACHE_HARD_LIMIT = 9, + PVFS2_PARAM_REQUEST_OP_NCACHE_SOFT_LIMIT = 10, + PVFS2_PARAM_REQUEST_OP_NCACHE_RECLAIM_PERCENTAGE = 11, + PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_TIMEOUT_MSECS = 12, + PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_HARD_LIMIT = 13, + PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_SOFT_LIMIT = 14, + PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_RECLAIM_PERCENTAGE = 15, + PVFS2_PARAM_REQUEST_OP_CLIENT_DEBUG = 16, + PVFS2_PARAM_REQUEST_OP_CCACHE_TIMEOUT_SECS = 17, + PVFS2_PARAM_REQUEST_OP_CCACHE_HARD_LIMIT = 18, + PVFS2_PARAM_REQUEST_OP_CCACHE_SOFT_LIMIT = 19, + PVFS2_PARAM_REQUEST_OP_CCACHE_RECLAIM_PERCENTAGE = 20, + PVFS2_PARAM_REQUEST_OP_CAPCACHE_TIMEOUT_SECS = 21, + PVFS2_PARAM_REQUEST_OP_CAPCACHE_HARD_LIMIT = 22, + PVFS2_PARAM_REQUEST_OP_CAPCACHE_SOFT_LIMIT = 23, + PVFS2_PARAM_REQUEST_OP_CAPCACHE_RECLAIM_PERCENTAGE = 24, + PVFS2_PARAM_REQUEST_OP_TWO_MASK_VALUES = 25, +}; + +struct pvfs2_param_request_s { + enum pvfs2_param_request_type type; + enum pvfs2_param_request_op op; + __s64 value; + char s_value[PVFS2_MAX_DEBUG_STRING_LEN]; +}; + +enum pvfs2_perf_count_request_type { + PVFS2_PERF_COUNT_REQUEST_ACACHE = 1, + PVFS2_PERF_COUNT_REQUEST_NCACHE = 2, + PVFS2_PERF_COUNT_REQUEST_CAPCACHE = 3, +}; + +struct pvfs2_perf_count_request_s { + enum pvfs2_perf_count_request_type type; + __s32 __pad1; +}; + +struct pvfs2_fs_key_request_s { + __s32 fsid; + __s32 __pad1; +}; + +struct pvfs2_upcall_s { + __s32 type; + __u32 uid; + __u32 gid; + int pid; + int tgid; + /* currently trailer is used only by readx/writex (iox) */ + __s64 trailer_size; + char *trailer_buf; + + union { + struct pvfs2_io_request_s io; + struct pvfs2_iox_request_s iox; + struct pvfs2_lookup_request_s lookup; + struct pvfs2_create_request_s create; + struct pvfs2_symlink_request_s sym; + struct pvfs2_getattr_request_s getattr; + struct pvfs2_setattr_request_s setattr; + struct pvfs2_remove_request_s remove; + struct pvfs2_mkdir_request_s mkdir; + struct pvfs2_readdir_request_s readdir; + struct pvfs2_readdirplus_request_s readdirplus; + struct pvfs2_rename_request_s rename; + struct pvfs2_statfs_request_s statfs; + struct pvfs2_truncate_request_s truncate; + struct pvfs2_mmap_ra_cache_flush_request_s ra_cache_flush; + struct pvfs2_fs_mount_request_s fs_mount; + struct pvfs2_fs_umount_request_s fs_umount; + struct pvfs2_getxattr_request_s getxattr; + struct pvfs2_setxattr_request_s setxattr; + struct pvfs2_listxattr_request_s listxattr; + struct pvfs2_removexattr_request_s removexattr; + struct pvfs2_op_cancel_s cancel; + struct pvfs2_fsync_request_s fsync; + struct pvfs2_param_request_s param; + struct pvfs2_perf_count_request_s perf_count; + struct pvfs2_fs_key_request_s fs_key; + } req; +}; + +#endif /* __UPCALL_H */ |