From 91fbc6edfa7086b5fcdb74ea82ab747104541f1f Mon Sep 17 00:00:00 2001 From: Anton Altaparmakov Date: Thu, 22 Sep 2005 13:26:44 +0100 Subject: NTFS: Fix sparse warnings that have crept in over time. Signed-off-by: Anton Altaparmakov --- fs/ntfs/ChangeLog | 4 ++++ fs/ntfs/layout.h | 7 ++++--- fs/ntfs/logfile.h | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog index c7e9237379c2..ee8665f62a65 100644 --- a/fs/ntfs/ChangeLog +++ b/fs/ntfs/ChangeLog @@ -22,6 +22,10 @@ ToDo/Notes: - Enable the code for setting the NT4 compatibility flag when we start making NTFS 1.2 specific modifications. +2.1.25-WIP + + - Fix sparse warnings that have crept in over time. + 2.1.24 - Lots of bug fixes and support more clean journal states. - Support journals ($LogFile) which have been modified by chkdsk. This diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h index 609ad1728ce4..dbf5c2a06dab 100644 --- a/fs/ntfs/layout.h +++ b/fs/ntfs/layout.h @@ -317,12 +317,13 @@ typedef u64 MFT_REF; typedef le64 leMFT_REF; #define MK_MREF(m, s) ((MFT_REF)(((MFT_REF)(s) << 48) | \ - ((MFT_REF)(m) & MFT_REF_MASK_CPU))) + ((MFT_REF)(m) & (u64)MFT_REF_MASK_CPU))) #define MK_LE_MREF(m, s) cpu_to_le64(MK_MREF(m, s)) -#define MREF(x) ((unsigned long)((x) & MFT_REF_MASK_CPU)) +#define MREF(x) ((unsigned long)((x) & (u64)MFT_REF_MASK_CPU)) #define MSEQNO(x) ((u16)(((x) >> 48) & 0xffff)) -#define MREF_LE(x) ((unsigned long)(le64_to_cpu(x) & MFT_REF_MASK_CPU)) +#define MREF_LE(x) ((unsigned long)(le64_to_cpu(x) & \ + (u64)MFT_REF_MASK_CPU)) #define MSEQNO_LE(x) ((u16)((le64_to_cpu(x) >> 48) & 0xffff)) #define IS_ERR_MREF(x) (((x) & 0x0000800000000000ULL) ? 1 : 0) diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h index 42388f95ea6d..a51f3dd0e9eb 100644 --- a/fs/ntfs/logfile.h +++ b/fs/ntfs/logfile.h @@ -113,7 +113,7 @@ typedef struct { */ enum { RESTART_VOLUME_IS_CLEAN = const_cpu_to_le16(0x0002), - RESTART_SPACE_FILLER = 0xffff, /* gcc: Force enum bit width to 16. */ + RESTART_SPACE_FILLER = const_cpu_to_le16(0xffff), /* gcc: Force enum bit width to 16. */ } __attribute__ ((__packed__)); typedef le16 RESTART_AREA_FLAGS; -- cgit v1.2.3 From 715dc636b64b57aee7aee7e8b5bf4f5267a6df48 Mon Sep 17 00:00:00 2001 From: Anton Altaparmakov Date: Fri, 23 Sep 2005 11:24:28 +0100 Subject: NTFS: Change ntfs_cluster_free() to require a write locked runlist on entry since we otherwise get into a lock reversal deadlock if a read locked runlist is passed in. In the process also change it to take an ntfs inode instead of a vfs inode as parameter. Signed-off-by: Anton Altaparmakov --- fs/ntfs/ChangeLog | 4 ++++ fs/ntfs/Makefile | 2 +- fs/ntfs/lcnalloc.c | 31 +++++++++++++------------------ fs/ntfs/lcnalloc.h | 27 +++++++++++++-------------- fs/ntfs/mft.c | 2 +- 5 files changed, 32 insertions(+), 34 deletions(-) (limited to 'fs') diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog index ee8665f62a65..574896f27c36 100644 --- a/fs/ntfs/ChangeLog +++ b/fs/ntfs/ChangeLog @@ -25,6 +25,10 @@ ToDo/Notes: 2.1.25-WIP - Fix sparse warnings that have crept in over time. + - Change ntfs_cluster_free() to require a write locked runlist on entry + since we otherwise get into a lock reversal deadlock if a read locked + runlist is passed in. In the process also change it to take an ntfs + inode instead of a vfs inode as parameter. 2.1.24 - Lots of bug fixes and support more clean journal states. diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile index 894b2b876d35..a3ce2c0e7dd9 100644 --- a/fs/ntfs/Makefile +++ b/fs/ntfs/Makefile @@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \ index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ unistr.o upcase.o -EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.24\" +EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.25-WIP\" ifeq ($(CONFIG_NTFS_DEBUG),y) EXTRA_CFLAGS += -DDEBUG diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c index 7b5934290685..5af3bf0b7eee 100644 --- a/fs/ntfs/lcnalloc.c +++ b/fs/ntfs/lcnalloc.c @@ -779,14 +779,13 @@ out: /** * __ntfs_cluster_free - free clusters on an ntfs volume - * @vi: vfs inode whose runlist describes the clusters to free - * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters + * @ni: ntfs inode whose runlist describes the clusters to free + * @start_vcn: vcn in the runlist of @ni at which to start freeing clusters * @count: number of clusters to free or -1 for all clusters - * @write_locked: true if the runlist is locked for writing * @is_rollback: true if this is a rollback operation * * Free @count clusters starting at the cluster @start_vcn in the runlist - * described by the vfs inode @vi. + * described by the vfs inode @ni. * * If @count is -1, all clusters from @start_vcn to the end of the runlist are * deallocated. Thus, to completely free all clusters in a runlist, use @@ -801,31 +800,28 @@ out: * Return the number of deallocated clusters (not counting sparse ones) on * success and -errno on error. * - * Locking: - The runlist described by @vi must be locked on entry and is - * locked on return. Note if the runlist is locked for reading the - * lock may be dropped and reacquired. Note the runlist may be - * modified when needed runlist fragments need to be mapped. + * Locking: - The runlist described by @ni must be locked for writing on entry + * and is locked on return. Note the runlist may be modified when + * needed runlist fragments need to be mapped. * - The volume lcn bitmap must be unlocked on entry and is unlocked * on return. * - This function takes the volume lcn bitmap lock for writing and * modifies the bitmap contents. */ -s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, - const BOOL write_locked, const BOOL is_rollback) +s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, s64 count, + const BOOL is_rollback) { s64 delta, to_free, total_freed, real_freed; - ntfs_inode *ni; ntfs_volume *vol; struct inode *lcnbmp_vi; runlist_element *rl; int err; - BUG_ON(!vi); + BUG_ON(!ni); ntfs_debug("Entering for i_ino 0x%lx, start_vcn 0x%llx, count " - "0x%llx.%s", vi->i_ino, (unsigned long long)start_vcn, + "0x%llx.%s", ni->mft_no, (unsigned long long)start_vcn, (unsigned long long)count, is_rollback ? " (rollback)" : ""); - ni = NTFS_I(vi); vol = ni->vol; lcnbmp_vi = vol->lcnbmp_ino; BUG_ON(!lcnbmp_vi); @@ -843,7 +839,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, total_freed = real_freed = 0; - rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, write_locked); + rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, TRUE); if (IS_ERR(rl)) { if (!is_rollback) ntfs_error(vol->sb, "Failed to find first runlist " @@ -897,7 +893,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, /* Attempt to map runlist. */ vcn = rl->vcn; - rl = ntfs_attr_find_vcn_nolock(ni, vcn, write_locked); + rl = ntfs_attr_find_vcn_nolock(ni, vcn, TRUE); if (IS_ERR(rl)) { err = PTR_ERR(rl); if (!is_rollback) @@ -965,8 +961,7 @@ err_out: * If rollback fails, set the volume errors flag, emit an error * message, and return the error code. */ - delta = __ntfs_cluster_free(vi, start_vcn, total_freed, write_locked, - TRUE); + delta = __ntfs_cluster_free(ni, start_vcn, total_freed, TRUE); if (delta < 0) { ntfs_error(vol->sb, "Failed to rollback (error %i). Leaving " "inconsistent metadata! Unmount and run " diff --git a/fs/ntfs/lcnalloc.h b/fs/ntfs/lcnalloc.h index e4d7fb98d685..a6a8827882e7 100644 --- a/fs/ntfs/lcnalloc.h +++ b/fs/ntfs/lcnalloc.h @@ -2,7 +2,7 @@ * lcnalloc.h - Exports for NTFS kernel cluster (de)allocation. Part of the * Linux-NTFS project. * - * Copyright (c) 2004 Anton Altaparmakov + * Copyright (c) 2004-2005 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -28,6 +28,7 @@ #include #include "types.h" +#include "inode.h" #include "runlist.h" #include "volume.h" @@ -42,18 +43,17 @@ extern runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn, const s64 count, const LCN start_lcn, const NTFS_CLUSTER_ALLOCATION_ZONES zone); -extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, - s64 count, const BOOL write_locked, const BOOL is_rollback); +extern s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, + s64 count, const BOOL is_rollback); /** * ntfs_cluster_free - free clusters on an ntfs volume - * @vi: vfs inode whose runlist describes the clusters to free - * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters + * @ni: ntfs inode whose runlist describes the clusters to free + * @start_vcn: vcn in the runlist of @ni at which to start freeing clusters * @count: number of clusters to free or -1 for all clusters - * @write_locked: true if the runlist is locked for writing * * Free @count clusters starting at the cluster @start_vcn in the runlist - * described by the vfs inode @vi. + * described by the ntfs inode @ni. * * If @count is -1, all clusters from @start_vcn to the end of the runlist are * deallocated. Thus, to completely free all clusters in a runlist, use @@ -65,19 +65,18 @@ extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, * Return the number of deallocated clusters (not counting sparse ones) on * success and -errno on error. * - * Locking: - The runlist described by @vi must be locked on entry and is - * locked on return. Note if the runlist is locked for reading the - * lock may be dropped and reacquired. Note the runlist may be - * modified when needed runlist fragments need to be mapped. + * Locking: - The runlist described by @ni must be locked for writing on entry + * and is locked on return. Note the runlist may be modified when + * needed runlist fragments need to be mapped. * - The volume lcn bitmap must be unlocked on entry and is unlocked * on return. * - This function takes the volume lcn bitmap lock for writing and * modifies the bitmap contents. */ -static inline s64 ntfs_cluster_free(struct inode *vi, const VCN start_vcn, - s64 count, const BOOL write_locked) +static inline s64 ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, + s64 count) { - return __ntfs_cluster_free(vi, start_vcn, count, write_locked, FALSE); + return __ntfs_cluster_free(ni, start_vcn, count, FALSE); } extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 2c32b84385a8..247586d1d5dc 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -1953,7 +1953,7 @@ restore_undo_alloc: a = ctx->attr; a->data.non_resident.highest_vcn = cpu_to_sle64(old_last_vcn - 1); undo_alloc: - if (ntfs_cluster_free(vol->mft_ino, old_last_vcn, -1, TRUE) < 0) { + if (ntfs_cluster_free(mft_ni, old_last_vcn, -1) < 0) { ntfs_error(vol->sb, "Failed to free clusters from mft data " "attribute.%s", es); NVolSetErrors(vol); -- cgit v1.2.3 From ede1327ea4ca8019ec6df24b3e837def091c26b8 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 30 Aug 2005 20:10:14 -0700 Subject: [PATCH] cifs: Add support for suspend cifsd had been preventing software suspend from completing. Signed-off-by: pavel@suse.de Signed-off-by: Steve French lightly modified Signed-off-by: Linus Torvalds --- fs/cifs/cifsfs.c | 2 ++ fs/cifs/connect.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 8cc23e7d0d5d..1ebf7dafc1d7 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -781,6 +781,8 @@ static int cifs_oplock_thread(void * dummyarg) oplockThread = current; do { + if (try_to_freeze()) + continue; set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1*HZ); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 2335f14a1583..47360156cc54 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -344,6 +344,8 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) } while (server->tcpStatus != CifsExiting) { + if (try_to_freeze()) + continue; if (bigbuf == NULL) { bigbuf = cifs_buf_get(); if(bigbuf == NULL) { -- cgit v1.2.3 From 838bf9675a3d1ede01408aa105357b9ab43faf1b Mon Sep 17 00:00:00 2001 From: Anton Altaparmakov Date: Mon, 26 Sep 2005 10:45:46 +0100 Subject: NTFS: Fix the definition of the CHKD ntfs record magic. It had an off by two error causing it to be CHKB instead of CHKD. Signed-off-by: Anton Altaparmakov --- fs/ntfs/layout.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h index dbf5c2a06dab..d5491de6abf4 100644 --- a/fs/ntfs/layout.h +++ b/fs/ntfs/layout.h @@ -123,7 +123,7 @@ enum { magic_RCRD = const_cpu_to_le32(0x44524352), /* Log record page. */ /* Found in $LogFile/$DATA. (May be found in $MFT/$DATA, also?) */ - magic_CHKD = const_cpu_to_le32(0x424b4843), /* Modified by chkdsk. */ + magic_CHKD = const_cpu_to_le32(0x444b4843), /* Modified by chkdsk. */ /* Found in all ntfs record containing records. */ magic_BAAD = const_cpu_to_le32(0x44414142), /* Failed multi sector -- cgit v1.2.3 From 5a8c0cc32bb6e029cd9c36f655c6b0955b0d9967 Mon Sep 17 00:00:00 2001 From: Anton Altaparmakov Date: Mon, 26 Sep 2005 10:48:54 +0100 Subject: NTFS: More $LogFile handling fixes: when chkdsk has been run, it can leave the restart pages in the journal without multi sector transfer protection fixups (i.e. the update sequence array is empty and in fact does not exist). Signed-off-by: Anton Altaparmakov --- fs/ntfs/ChangeLog | 18 +++++++++--------- fs/ntfs/Makefile | 2 +- fs/ntfs/logfile.c | 30 +++++++++++++++++++++++++----- 3 files changed, 35 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog index 574896f27c36..83f3322765cd 100644 --- a/fs/ntfs/ChangeLog +++ b/fs/ntfs/ChangeLog @@ -22,14 +22,6 @@ ToDo/Notes: - Enable the code for setting the NT4 compatibility flag when we start making NTFS 1.2 specific modifications. -2.1.25-WIP - - - Fix sparse warnings that have crept in over time. - - Change ntfs_cluster_free() to require a write locked runlist on entry - since we otherwise get into a lock reversal deadlock if a read locked - runlist is passed in. In the process also change it to take an ntfs - inode instead of a vfs inode as parameter. - 2.1.24 - Lots of bug fixes and support more clean journal states. - Support journals ($LogFile) which have been modified by chkdsk. This @@ -37,7 +29,8 @@ ToDo/Notes: The Windows boot will run chkdsk and then reboot. The user can then immediately boot into Linux rather than having to do a full Windows boot first before rebooting into Linux and we will recognize such a - journal and empty it as it is clean by definition. + journal and empty it as it is clean by definition. Note, this only + works if chkdsk left the journal in an obviously clean state. - Support journals ($LogFile) with only one restart page as well as journals with two different restart pages. We sanity check both and either use the only sane one or the more recent one of the two in the @@ -102,6 +95,13 @@ ToDo/Notes: my ways. - Fix various bugs in the runlist merging code. (Based on libntfs changes by Richard Russon.) + - Fix sparse warnings that have crept in over time. + - Change ntfs_cluster_free() to require a write locked runlist on entry + since we otherwise get into a lock reversal deadlock if a read locked + runlist is passed in. In the process also change it to take an ntfs + inode instead of a vfs inode as parameter. + - Fix the definition of the CHKD ntfs record magic. It had an off by + two error causing it to be CHKB instead of CHKD. 2.1.23 - Implement extension of resident files and make writing safe as well as many bug fixes, cleanups, and enhancements... diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile index a3ce2c0e7dd9..894b2b876d35 100644 --- a/fs/ntfs/Makefile +++ b/fs/ntfs/Makefile @@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \ index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ unistr.o upcase.o -EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.25-WIP\" +EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.24\" ifeq ($(CONFIG_NTFS_DEBUG),y) EXTRA_CFLAGS += -DDEBUG diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c index 0173e95500d9..0fd70295cca6 100644 --- a/fs/ntfs/logfile.c +++ b/fs/ntfs/logfile.c @@ -51,7 +51,8 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi, RESTART_PAGE_HEADER *rp, s64 pos) { u32 logfile_system_page_size, logfile_log_page_size; - u16 usa_count, usa_ofs, usa_end, ra_ofs; + u16 ra_ofs, usa_count, usa_ofs, usa_end = 0; + BOOL have_usa = TRUE; ntfs_debug("Entering."); /* @@ -86,6 +87,14 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi, (int)sle16_to_cpu(rp->minor_ver)); return FALSE; } + /* + * If chkdsk has been run the restart page may not be protected by an + * update sequence array. + */ + if (ntfs_is_chkd_record(rp->magic) && !le16_to_cpu(rp->usa_count)) { + have_usa = FALSE; + goto skip_usa_checks; + } /* Verify the size of the update sequence array. */ usa_count = 1 + (logfile_system_page_size >> NTFS_BLOCK_SIZE_BITS); if (usa_count != le16_to_cpu(rp->usa_count)) { @@ -102,6 +111,7 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi, "inconsistent update sequence array offset."); return FALSE; } +skip_usa_checks: /* * Verify the position of the restart area. It must be: * - aligned to 8-byte boundary, @@ -109,7 +119,8 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi, * - within the system page size. */ ra_ofs = le16_to_cpu(rp->restart_area_offset); - if (ra_ofs & 7 || ra_ofs < usa_end || + if (ra_ofs & 7 || (have_usa ? ra_ofs < usa_end : + ra_ofs < sizeof(RESTART_PAGE_HEADER)) || ra_ofs > logfile_system_page_size) { ntfs_error(vi->i_sb, "$LogFile restart page specifies " "inconsistent restart area offset."); @@ -402,8 +413,12 @@ static int ntfs_check_and_load_restart_page(struct inode *vi, idx++; } while (to_read > 0); } - /* Perform the multi sector transfer deprotection on the buffer. */ - if (post_read_mst_fixup((NTFS_RECORD*)trp, + /* + * Perform the multi sector transfer deprotection on the buffer if the + * restart page is protected. + */ + if ((!ntfs_is_chkd_record(trp->magic) || le16_to_cpu(trp->usa_count)) + && post_read_mst_fixup((NTFS_RECORD*)trp, le32_to_cpu(rp->system_page_size))) { /* * A multi sector tranfer error was detected. We only need to @@ -615,11 +630,16 @@ is_empty: * Otherwise just throw it away. */ if (rstr2_lsn > rstr1_lsn) { + ntfs_debug("Using second restart page as it is more " + "recent."); ntfs_free(rstr1_ph); rstr1_ph = rstr2_ph; /* rstr1_lsn = rstr2_lsn; */ - } else + } else { + ntfs_debug("Using first restart page as it is more " + "recent."); ntfs_free(rstr2_ph); + } rstr2_ph = NULL; } /* All consistency checks passed. */ -- cgit v1.2.3 From e2fcc61ef0d654887b651bd99ffcb52f7344b836 Mon Sep 17 00:00:00 2001 From: Anton Altaparmakov Date: Mon, 26 Sep 2005 17:02:41 +0100 Subject: NTFS: Re-fix sparse warnings in a more correct way, i.e. don't use an enum with different types in it but #define the two constants instead. Signed-off-by: Anton Altaparmakov --- fs/ntfs/layout.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h index d5491de6abf4..01f2dfa39cec 100644 --- a/fs/ntfs/layout.h +++ b/fs/ntfs/layout.h @@ -308,22 +308,19 @@ typedef le16 MFT_RECORD_FLAGS; * The _LE versions are to be applied on little endian MFT_REFs. * Note: The _LE versions will return a CPU endian formatted value! */ -typedef enum { - MFT_REF_MASK_CPU = 0x0000ffffffffffffULL, - MFT_REF_MASK_LE = const_cpu_to_le64(0x0000ffffffffffffULL), -} MFT_REF_CONSTS; +#define MFT_REF_MASK_CPU 0x0000ffffffffffffULL +#define MFT_REF_MASK_LE const_cpu_to_le64(0x0000ffffffffffffULL) typedef u64 MFT_REF; typedef le64 leMFT_REF; #define MK_MREF(m, s) ((MFT_REF)(((MFT_REF)(s) << 48) | \ - ((MFT_REF)(m) & (u64)MFT_REF_MASK_CPU))) + ((MFT_REF)(m) & MFT_REF_MASK_CPU))) #define MK_LE_MREF(m, s) cpu_to_le64(MK_MREF(m, s)) -#define MREF(x) ((unsigned long)((x) & (u64)MFT_REF_MASK_CPU)) +#define MREF(x) ((unsigned long)((x) & MFT_REF_MASK_CPU)) #define MSEQNO(x) ((u16)(((x) >> 48) & 0xffff)) -#define MREF_LE(x) ((unsigned long)(le64_to_cpu(x) & \ - (u64)MFT_REF_MASK_CPU)) +#define MREF_LE(x) ((unsigned long)(le64_to_cpu(x) & MFT_REF_MASK_CPU)) #define MSEQNO_LE(x) ((u16)((le64_to_cpu(x) >> 48) & 0xffff)) #define IS_ERR_MREF(x) (((x) & 0x0000800000000000ULL) ? 1 : 0) -- cgit v1.2.3 From 909021ea7a8f4ef13af54935b87b03a20906e08a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 27 Sep 2005 21:45:20 -0700 Subject: [PATCH] fuse: add required version info Add information about required version of the userspace library/utilities to Documentation/Changes. Also add pointer to this and to FUSE documentation from Kconfig. Thanks to Anton Altaparmakov for the reminder. Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/Kconfig | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/Kconfig b/fs/Kconfig index 068ccea2f184..48f5422cb19a 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -472,6 +472,9 @@ config FUSE_FS utilities is available from the FUSE homepage: + See for more information. + See for needed library/utility version. + If you want to develop a userspace FS, or if you want to use a filesystem based on FUSE, answer Y or M. -- cgit v1.2.3 From ee4e52719ce474af339f4b81ece2ce9ecf920dfd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 27 Sep 2005 21:45:21 -0700 Subject: [PATCH] fuse: check reserved node ID values This patch checks reserved node ID values returned by lookup and creation operations. In case one of the reserved values is sent, return -EIO. Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/dir.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index e79e49b3eec7..29f1e9f6e85c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -96,6 +96,8 @@ static int fuse_lookup_iget(struct inode *dir, struct dentry *entry, fuse_lookup_init(req, dir, entry, &outarg); request_send(fc, req); err = req->out.h.error; + if (!err && (!outarg.nodeid || outarg.nodeid == FUSE_ROOT_ID)) + err = -EIO; if (!err) { inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, &outarg.attr); @@ -152,6 +154,10 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, fuse_put_request(fc, req); return err; } + if (!outarg.nodeid || outarg.nodeid == FUSE_ROOT_ID) { + fuse_put_request(fc, req); + return -EIO; + } inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, &outarg.attr); if (!inode) { -- cgit v1.2.3 From 9ed6c2fb34a1fb493caec8a9644d05bb880a6923 Mon Sep 17 00:00:00 2001 From: Chris Sykes Date: Tue, 27 Sep 2005 21:45:22 -0700 Subject: [PATCH] Fix ext2_new_inode() failure paths Fix failure paths in ext2_new_inode() and clean up duplicated code: - DQUOT_DROP() was not being called if ext2_init_security() failed. Signed-off-by: Chris Sykes Cc: Stephen Smalley Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext2/ialloc.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index c8d07030c897..e2d6208633a7 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -605,27 +605,28 @@ got: insert_inode_hash(inode); if (DQUOT_ALLOC_INODE(inode)) { - DQUOT_DROP(inode); err = -ENOSPC; - goto fail2; + goto fail_drop; } + err = ext2_init_acl(inode, dir); - if (err) { - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - goto fail2; - } + if (err) + goto fail_free_drop; + err = ext2_init_security(inode,dir); - if (err) { - DQUOT_FREE_INODE(inode); - goto fail2; - } + if (err) + goto fail_free_drop; + mark_inode_dirty(inode); ext2_debug("allocating inode %lu\n", inode->i_ino); ext2_preread_inode(inode); return inode; -fail2: +fail_free_drop: + DQUOT_FREE_INODE(inode); + +fail_drop: + DQUOT_DROP(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; iput(inode); -- cgit v1.2.3 From dc7b5fd6b0d3beb41f9e5e3a94dd1eadf52209f3 Mon Sep 17 00:00:00 2001 From: Chris Sykes Date: Tue, 27 Sep 2005 21:45:23 -0700 Subject: [PATCH] Fix ext3_new_inode() failure paths Fix failure paths in ext3_new_inode() and clean up duplicated code: - DQUOT_DROP() was not being called if ext3_init_security() failed. Signed-off-by: Chris Sykes Cc: Stephen Smalley Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/ialloc.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 96552769d039..6549945f9ac1 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -597,27 +597,22 @@ got: ret = inode; if(DQUOT_ALLOC_INODE(inode)) { - DQUOT_DROP(inode); err = -EDQUOT; - goto fail2; + goto fail_drop; } + err = ext3_init_acl(handle, inode, dir); - if (err) { - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - goto fail2; - } + if (err) + goto fail_free_drop; + err = ext3_init_security(handle,inode, dir); - if (err) { - DQUOT_FREE_INODE(inode); - goto fail2; - } + if (err) + goto fail_free_drop; + err = ext3_mark_inode_dirty(handle, inode); if (err) { ext3_std_error(sb, err); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - goto fail2; + goto fail_free_drop; } ext3_debug("allocating inode %lu\n", inode->i_ino); @@ -631,7 +626,11 @@ really_out: brelse(bitmap_bh); return ret; -fail2: +fail_free_drop: + DQUOT_FREE_INODE(inode); + +fail_drop: + DQUOT_DROP(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; iput(inode); -- cgit v1.2.3 From 0b8dd17762194ec77066d339e0b2866b0c66b715 Mon Sep 17 00:00:00 2001 From: Latchesar Ionkov Date: Tue, 27 Sep 2005 21:45:24 -0700 Subject: [PATCH] v9fs: fix races in fid allocation Fid management cleanup. The patch attempts to fix the races in dentry's fid management. Dentries don't keep the opened fids anymore, they are moved to the file structs. Ideally there should be no more than one fid with fidcreate equal to zero in the dentry's list of fids. v9fs_fid_create initializes the important fields (fid, fidcreated) before v9fs_fid is added to the list. v9fs_fid_lookup returns only fids that are not created by v9fs_create. v9fs_fid_get_created returns the fid created by the same process by v9fs_create (if any) and removes it from dentry's list Signed-off-by: Latchesar Ionkov Cc: Eric Van Hensbergen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/9p/fid.c | 176 +++++++++++++++++++++++++++++------------------------ fs/9p/fid.h | 7 ++- fs/9p/vfs_dentry.c | 2 +- fs/9p/vfs_dir.c | 11 ++-- fs/9p/vfs_file.c | 88 +++++++++------------------ fs/9p/vfs_inode.c | 91 +++++++++++++++++---------- fs/9p/vfs_super.c | 21 +++---- 7 files changed, 200 insertions(+), 196 deletions(-) (limited to 'fs') diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 821c9c4d76aa..d95f8626d170 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -71,21 +71,28 @@ static int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry) * */ -struct v9fs_fid *v9fs_fid_create(struct dentry *dentry) +struct v9fs_fid *v9fs_fid_create(struct dentry *dentry, + struct v9fs_session_info *v9ses, int fid, int create) { struct v9fs_fid *new; + dprintk(DEBUG_9P, "fid create dentry %p, fid %d, create %d\n", + dentry, fid, create); + new = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL); if (new == NULL) { dprintk(DEBUG_ERROR, "Out of Memory\n"); return ERR_PTR(-ENOMEM); } - new->fid = -1; + new->fid = fid; + new->v9ses = v9ses; new->fidopen = 0; - new->fidcreate = 0; + new->fidcreate = create; new->fidclunked = 0; new->iounit = 0; + new->rdir_pos = 0; + new->rdir_fcall = NULL; if (v9fs_fid_insert(new, dentry) == 0) return new; @@ -108,6 +115,59 @@ void v9fs_fid_destroy(struct v9fs_fid *fid) kfree(fid); } +/** + * v9fs_fid_walk_up - walks from the process current directory + * up to the specified dentry. + */ +static struct v9fs_fid *v9fs_fid_walk_up(struct dentry *dentry) +{ + int fidnum, cfidnum, err; + struct v9fs_fid *cfid; + struct dentry *cde; + struct v9fs_session_info *v9ses; + + v9ses = v9fs_inode2v9ses(current->fs->pwd->d_inode); + cfid = v9fs_fid_lookup(current->fs->pwd); + if (cfid == NULL) { + dprintk(DEBUG_ERROR, "process cwd doesn't have a fid\n"); + return ERR_PTR(-ENOENT); + } + + cfidnum = cfid->fid; + cde = current->fs->pwd; + /* TODO: take advantage of multiwalk */ + + fidnum = v9fs_get_idpool(&v9ses->fidpool); + if (fidnum < 0) { + dprintk(DEBUG_ERROR, "could not get a new fid num\n"); + err = -ENOENT; + goto clunk_fid; + } + + while (cde != dentry) { + if (cde == cde->d_parent) { + dprintk(DEBUG_ERROR, "can't find dentry\n"); + err = -ENOENT; + goto clunk_fid; + } + + err = v9fs_t_walk(v9ses, cfidnum, fidnum, "..", NULL); + if (err < 0) { + dprintk(DEBUG_ERROR, "problem walking to parent\n"); + goto clunk_fid; + } + + cfidnum = fidnum; + cde = cde->d_parent; + } + + return v9fs_fid_create(dentry, v9ses, fidnum, 0); + +clunk_fid: + v9fs_t_clunk(v9ses, fidnum, NULL); + return ERR_PTR(err); +} + /** * v9fs_fid_lookup - retrieve the right fid from a particular dentry * @dentry: dentry to look for fid in @@ -119,49 +179,25 @@ void v9fs_fid_destroy(struct v9fs_fid *fid) * */ -struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry, int type) +struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry) { struct list_head *fid_list = (struct list_head *)dentry->d_fsdata; struct v9fs_fid *current_fid = NULL; struct v9fs_fid *temp = NULL; struct v9fs_fid *return_fid = NULL; - int found_parent = 0; - int found_user = 0; - dprintk(DEBUG_9P, " dentry: %s (%p) type %d\n", dentry->d_iname, dentry, - type); + dprintk(DEBUG_9P, " dentry: %s (%p)\n", dentry->d_iname, dentry); - if (fid_list && !list_empty(fid_list)) { + if (fid_list) { list_for_each_entry_safe(current_fid, temp, fid_list, list) { - if (current_fid->uid == current->uid) { - if (return_fid == NULL) { - if ((type == FID_OP) - || (!current_fid->fidopen)) { - return_fid = current_fid; - found_user = 1; - } - } - } - if (current_fid->pid == current->real_parent->pid) { - if ((return_fid == NULL) || (found_parent) - || (found_user)) { - if ((type == FID_OP) - || (!current_fid->fidopen)) { - return_fid = current_fid; - found_parent = 1; - found_user = 0; - } - } - } - if (current_fid->pid == current->pid) { - if ((type == FID_OP) || - (!current_fid->fidopen)) { - return_fid = current_fid; - found_parent = 0; - found_user = 0; - } + if (!current_fid->fidcreate) { + return_fid = current_fid; + break; } } + + if (!return_fid) + return_fid = current_fid; } /* we are at the root but didn't match */ @@ -187,55 +223,33 @@ struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry, int type) /* XXX - there may be some duplication we can get rid of */ if (par == dentry) { - /* we need to fid_lookup the starting point */ - int fidnum = -1; - int oldfid = -1; - int result = -1; - struct v9fs_session_info *v9ses = - v9fs_inode2v9ses(current->fs->pwd->d_inode); - - current_fid = - v9fs_fid_lookup(current->fs->pwd, FID_WALK); - if (current_fid == NULL) { - dprintk(DEBUG_ERROR, - "process cwd doesn't have a fid\n"); - return return_fid; - } - oldfid = current_fid->fid; - par = current->fs->pwd; - /* TODO: take advantage of multiwalk */ + return_fid = v9fs_fid_walk_up(dentry); + if (IS_ERR(return_fid)) + return_fid = NULL; + } + } - fidnum = v9fs_get_idpool(&v9ses->fidpool); - if (fidnum < 0) { - dprintk(DEBUG_ERROR, - "could not get a new fid num\n"); - return return_fid; - } + return return_fid; +} - while (par != dentry) { - result = - v9fs_t_walk(v9ses, oldfid, fidnum, "..", - NULL); - if (result < 0) { - dprintk(DEBUG_ERROR, - "problem walking to parent\n"); - - break; - } - oldfid = fidnum; - if (par == par->d_parent) { - dprintk(DEBUG_ERROR, - "can't find dentry\n"); - break; - } - par = par->d_parent; - } - if (par == dentry) { - return_fid = v9fs_fid_create(dentry); - return_fid->fid = fidnum; +struct v9fs_fid *v9fs_fid_get_created(struct dentry *dentry) +{ + struct list_head *fid_list; + struct v9fs_fid *fid, *ftmp, *ret; + + dprintk(DEBUG_9P, " dentry: %s (%p)\n", dentry->d_iname, dentry); + fid_list = (struct list_head *)dentry->d_fsdata; + ret = NULL; + if (fid_list) { + list_for_each_entry_safe(fid, ftmp, fid_list, list) { + if (fid->fidcreate && fid->pid == current->pid) { + list_del(&fid->list); + ret = fid; + break; } } } - return return_fid; + dprintk(DEBUG_9P, "return %p\n", ret); + return ret; } diff --git a/fs/9p/fid.h b/fs/9p/fid.h index 7db478ccca36..84c673a44c83 100644 --- a/fs/9p/fid.h +++ b/fs/9p/fid.h @@ -25,6 +25,7 @@ #define FID_OP 0 #define FID_WALK 1 +#define FID_CREATE 2 struct v9fs_fid { struct list_head list; /* list of fids associated with a dentry */ @@ -52,6 +53,8 @@ struct v9fs_fid { struct v9fs_session_info *v9ses; /* session info for this FID */ }; -struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry, int type); +struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry); +struct v9fs_fid *v9fs_fid_get_created(struct dentry *); void v9fs_fid_destroy(struct v9fs_fid *fid); -struct v9fs_fid *v9fs_fid_create(struct dentry *); +struct v9fs_fid *v9fs_fid_create(struct dentry *, + struct v9fs_session_info *v9ses, int fid, int create); diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index 306c96741f81..a6aa947de0f9 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -67,7 +67,7 @@ static int v9fs_dentry_validate(struct dentry *dentry, struct nameidata *nd) struct dentry *dc = current->fs->pwd; dprintk(DEBUG_VFS, "dentry: %s (%p)\n", dentry->d_iname, dentry); - if (v9fs_fid_lookup(dentry, FID_OP)) { + if (v9fs_fid_lookup(dentry)) { dprintk(DEBUG_VFS, "VALID\n"); return 1; } diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index c478a7384186..57a43b8feef5 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -197,21 +197,18 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) filemap_fdatawait(inode->i_mapping); if (fidnum >= 0) { - fid->fidopen--; dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen, fid->fid); - if (fid->fidopen == 0) { - if (v9fs_t_clunk(v9ses, fidnum, NULL)) - dprintk(DEBUG_ERROR, "clunk failed\n"); + if (v9fs_t_clunk(v9ses, fidnum, NULL)) + dprintk(DEBUG_ERROR, "clunk failed\n"); - v9fs_put_idpool(fid->fid, &v9ses->fidpool); - } + v9fs_put_idpool(fid->fid, &v9ses->fidpool); kfree(fid->rdir_fcall); + kfree(fid); filp->private_data = NULL; - v9fs_fid_destroy(fid); } d_drop(filp->f_dentry); diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 1f8ae7d580ab..a4799e971d1c 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -53,30 +53,36 @@ int v9fs_file_open(struct inode *inode, struct file *file) { struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); - struct v9fs_fid *v9fid = v9fs_fid_lookup(file->f_dentry, FID_WALK); - struct v9fs_fid *v9newfid = NULL; + struct v9fs_fid *v9fid, *fid; struct v9fs_fcall *fcall = NULL; int open_mode = 0; unsigned int iounit = 0; int newfid = -1; long result = -1; - dprintk(DEBUG_VFS, "inode: %p file: %p v9fid= %p\n", inode, file, - v9fid); + dprintk(DEBUG_VFS, "inode: %p file: %p \n", inode, file); + + v9fid = v9fs_fid_get_created(file->f_dentry); + if (!v9fid) + v9fid = v9fs_fid_lookup(file->f_dentry); if (!v9fid) { - struct dentry *dentry = file->f_dentry; dprintk(DEBUG_ERROR, "Couldn't resolve fid from dentry\n"); + return -EBADF; + } - /* XXX - some duplication from lookup, generalize later */ - /* basically vfs_lookup is too heavy weight */ - v9fid = v9fs_fid_lookup(file->f_dentry, FID_OP); - if (!v9fid) - return -EBADF; + if (!v9fid->fidcreate) { + fid = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL); + if (fid == NULL) { + dprintk(DEBUG_ERROR, "Out of Memory\n"); + return -ENOMEM; + } - v9fid = v9fs_fid_lookup(dentry->d_parent, FID_WALK); - if (!v9fid) - return -EBADF; + fid->fidopen = 0; + fid->fidcreate = 0; + fid->fidclunked = 0; + fid->iounit = 0; + fid->v9ses = v9ses; newfid = v9fs_get_idpool(&v9ses->fidpool); if (newfid < 0) { @@ -85,58 +91,16 @@ int v9fs_file_open(struct inode *inode, struct file *file) } result = - v9fs_t_walk(v9ses, v9fid->fid, newfid, - (char *)file->f_dentry->d_name.name, NULL); + v9fs_t_walk(v9ses, v9fid->fid, newfid, NULL, NULL); + if (result < 0) { v9fs_put_idpool(newfid, &v9ses->fidpool); dprintk(DEBUG_ERROR, "rewalk didn't work\n"); return -EBADF; } - v9fid = v9fs_fid_create(dentry); - if (v9fid == NULL) { - dprintk(DEBUG_ERROR, "couldn't insert\n"); - return -ENOMEM; - } - v9fid->fid = newfid; - } - - if (v9fid->fidcreate) { - /* create case */ - newfid = v9fid->fid; - iounit = v9fid->iounit; - v9fid->fidcreate = 0; - } else { - if (!S_ISDIR(inode->i_mode)) - newfid = v9fid->fid; - else { - newfid = v9fs_get_idpool(&v9ses->fidpool); - if (newfid < 0) { - eprintk(KERN_WARNING, "allocation failed\n"); - return -ENOSPC; - } - /* This would be a somewhat critical clone */ - result = - v9fs_t_walk(v9ses, v9fid->fid, newfid, NULL, - &fcall); - if (result < 0) { - dprintk(DEBUG_ERROR, "clone error: %s\n", - FCALL_ERROR(fcall)); - kfree(fcall); - return result; - } - - v9newfid = v9fs_fid_create(file->f_dentry); - v9newfid->fid = newfid; - v9newfid->qid = v9fid->qid; - v9newfid->iounit = v9fid->iounit; - v9newfid->fidopen = 0; - v9newfid->fidclunked = 0; - v9newfid->v9ses = v9ses; - v9fid = v9newfid; - kfree(fcall); - } - + fid->fid = newfid; + v9fid = fid; /* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */ /* translate open mode appropriately */ open_mode = file->f_flags & 0x3; @@ -163,9 +127,13 @@ int v9fs_file_open(struct inode *inode, struct file *file) iounit = fcall->params.ropen.iounit; kfree(fcall); + } else { + /* create case */ + newfid = v9fid->fid; + iounit = v9fid->iounit; + v9fid->fidcreate = 0; } - file->private_data = v9fid; v9fid->rdir_pos = 0; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index b16322db5ce6..2b696ae6655a 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -307,7 +307,7 @@ v9fs_create(struct inode *dir, struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); struct super_block *sb = dir->i_sb; struct v9fs_fid *dirfid = - v9fs_fid_lookup(file_dentry->d_parent, FID_WALK); + v9fs_fid_lookup(file_dentry->d_parent); struct v9fs_fid *fid = NULL; struct inode *file_inode = NULL; struct v9fs_fcall *fcall = NULL; @@ -317,6 +317,7 @@ v9fs_create(struct inode *dir, long newfid = -1; int result = 0; unsigned int iounit = 0; + int wfidno = -1; perm = unixmode2p9mode(v9ses, perm); @@ -350,7 +351,7 @@ v9fs_create(struct inode *dir, if (result < 0) { dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall)); v9fs_put_idpool(newfid, &v9ses->fidpool); - newfid = 0; + newfid = -1; goto CleanUpFid; } @@ -369,20 +370,39 @@ v9fs_create(struct inode *dir, qid = fcall->params.rcreate.qid; kfree(fcall); - fid = v9fs_fid_create(file_dentry); + fid = v9fs_fid_create(file_dentry, v9ses, newfid, 1); + dprintk(DEBUG_VFS, "fid %p %d\n", fid, fid->fidcreate); if (!fid) { result = -ENOMEM; goto CleanUpFid; } - fid->fid = newfid; - fid->fidopen = 0; - fid->fidcreate = 1; fid->qid = qid; fid->iounit = iounit; - fid->rdir_pos = 0; - fid->rdir_fcall = NULL; - fid->v9ses = v9ses; + + /* walk to the newly created file and put the fid in the dentry */ + wfidno = v9fs_get_idpool(&v9ses->fidpool); + if (newfid < 0) { + eprintk(KERN_WARNING, "no free fids available\n"); + return -ENOSPC; + } + + result = v9fs_t_walk(v9ses, dirfidnum, wfidno, + (char *) file_dentry->d_name.name, NULL); + if (result < 0) { + dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall)); + v9fs_put_idpool(wfidno, &v9ses->fidpool); + wfidno = -1; + goto CleanUpFid; + } + + if (!v9fs_fid_create(file_dentry, v9ses, wfidno, 0)) { + if (!v9fs_t_clunk(v9ses, newfid, &fcall)) { + v9fs_put_idpool(wfidno, &v9ses->fidpool); + } + + goto CleanUpFid; + } if ((perm & V9FS_DMSYMLINK) || (perm & V9FS_DMLINK) || (perm & V9FS_DMNAMEDPIPE) || (perm & V9FS_DMSOCKET) || @@ -410,11 +430,11 @@ v9fs_create(struct inode *dir, d_instantiate(file_dentry, file_inode); if (perm & V9FS_DMDIR) { - if (v9fs_t_clunk(v9ses, newfid, &fcall)) + if (!v9fs_t_clunk(v9ses, newfid, &fcall)) + v9fs_put_idpool(newfid, &v9ses->fidpool); + else dprintk(DEBUG_ERROR, "clunk for mkdir failed: %s\n", FCALL_ERROR(fcall)); - - v9fs_put_idpool(newfid, &v9ses->fidpool); kfree(fcall); fid->fidopen = 0; fid->fidcreate = 0; @@ -426,12 +446,22 @@ v9fs_create(struct inode *dir, CleanUpFid: kfree(fcall); - if (newfid) { - if (v9fs_t_clunk(v9ses, newfid, &fcall)) + if (newfid >= 0) { + if (!v9fs_t_clunk(v9ses, newfid, &fcall)) + v9fs_put_idpool(newfid, &v9ses->fidpool); + else + dprintk(DEBUG_ERROR, "clunk failed: %s\n", + FCALL_ERROR(fcall)); + + kfree(fcall); + } + if (wfidno >= 0) { + if (!v9fs_t_clunk(v9ses, wfidno, &fcall)) + v9fs_put_idpool(wfidno, &v9ses->fidpool); + else dprintk(DEBUG_ERROR, "clunk failed: %s\n", FCALL_ERROR(fcall)); - v9fs_put_idpool(newfid, &v9ses->fidpool); kfree(fcall); } return result; @@ -461,7 +491,7 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) file_inode = file->d_inode; sb = file_inode->i_sb; v9ses = v9fs_inode2v9ses(file_inode); - v9fid = v9fs_fid_lookup(file, FID_OP); + v9fid = v9fs_fid_lookup(file); if (!v9fid) { dprintk(DEBUG_ERROR, @@ -545,7 +575,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, sb = dir->i_sb; v9ses = v9fs_inode2v9ses(dir); - dirfid = v9fs_fid_lookup(dentry->d_parent, FID_WALK); + dirfid = v9fs_fid_lookup(dentry->d_parent); if (!dirfid) { dprintk(DEBUG_ERROR, "no dirfid\n"); @@ -573,7 +603,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, v9fs_put_idpool(newfid, &v9ses->fidpool); if (result == -ENOENT) { d_add(dentry, NULL); - dprintk(DEBUG_ERROR, + dprintk(DEBUG_VFS, "Return negative dentry %p count %d\n", dentry, atomic_read(&dentry->d_count)); return NULL; @@ -601,16 +631,13 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat->qid); - fid = v9fs_fid_create(dentry); + fid = v9fs_fid_create(dentry, v9ses, newfid, 0); if (fid == NULL) { dprintk(DEBUG_ERROR, "couldn't insert\n"); result = -ENOMEM; goto FreeFcall; } - fid->fid = newfid; - fid->fidopen = 0; - fid->v9ses = v9ses; fid->qid = fcall->params.rstat.stat->qid; dentry->d_op = &v9fs_dentry_operations; @@ -665,11 +692,11 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, { struct inode *old_inode = old_dentry->d_inode; struct v9fs_session_info *v9ses = v9fs_inode2v9ses(old_inode); - struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry, FID_WALK); + struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry); struct v9fs_fid *olddirfid = - v9fs_fid_lookup(old_dentry->d_parent, FID_WALK); + v9fs_fid_lookup(old_dentry->d_parent); struct v9fs_fid *newdirfid = - v9fs_fid_lookup(new_dentry->d_parent, FID_WALK); + v9fs_fid_lookup(new_dentry->d_parent); struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); struct v9fs_fcall *fcall = NULL; int fid = -1; @@ -744,7 +771,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, { struct v9fs_fcall *fcall = NULL; struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); - struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP); + struct v9fs_fid *fid = v9fs_fid_lookup(dentry); int err = -EPERM; dprintk(DEBUG_VFS, "dentry: %p\n", dentry); @@ -778,7 +805,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) { struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); - struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP); + struct v9fs_fid *fid = v9fs_fid_lookup(dentry); struct v9fs_fcall *fcall = NULL; struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); int res = -EPERM; @@ -960,7 +987,7 @@ v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) if (retval != 0) goto FreeFcall; - newfid = v9fs_fid_lookup(dentry, FID_OP); + newfid = v9fs_fid_lookup(dentry); /* issue a twstat */ v9fs_blank_mistat(v9ses, mistat); @@ -1004,7 +1031,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) struct v9fs_fcall *fcall = NULL; struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); - struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP); + struct v9fs_fid *fid = v9fs_fid_lookup(dentry); if (!fid) { dprintk(DEBUG_ERROR, "could not resolve fid from dentry\n"); @@ -1148,7 +1175,7 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); struct v9fs_fcall *fcall = NULL; struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); - struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry, FID_OP); + struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry); struct v9fs_fid *newfid = NULL; char *symname = __getname(); @@ -1168,7 +1195,7 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, if (retval != 0) goto FreeMem; - newfid = v9fs_fid_lookup(dentry, FID_OP); + newfid = v9fs_fid_lookup(dentry); if (!newfid) { dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n"); goto FreeMem; @@ -1246,7 +1273,7 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) if (retval != 0) goto FreeMem; - newfid = v9fs_fid_lookup(dentry, FID_OP); + newfid = v9fs_fid_lookup(dentry); if (!newfid) { dprintk(DEBUG_ERROR, "coudn't resove fid from dentry\n"); retval = -EINVAL; diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 1e2b2b54d300..0957f4da91d4 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -129,8 +129,7 @@ static struct super_block *v9fs_get_sb(struct file_system_type if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { dprintk(DEBUG_ERROR, "problem initiating session\n"); - kfree(v9ses); - return ERR_PTR(newfid); + return newfid; } sb = sget(fs_type, NULL, v9fs_set_super, v9ses); @@ -155,23 +154,19 @@ static struct super_block *v9fs_get_sb(struct file_system_type sb->s_root = root; - /* Setup the Root Inode */ - root_fid = v9fs_fid_create(root); - if (root_fid == NULL) { - retval = -ENOMEM; - goto put_back_sb; - } - - root_fid->fidopen = 0; - root_fid->v9ses = v9ses; - stat_result = v9fs_t_stat(v9ses, newfid, &fcall); if (stat_result < 0) { dprintk(DEBUG_ERROR, "stat error\n"); v9fs_t_clunk(v9ses, newfid, NULL); v9fs_put_idpool(newfid, &v9ses->fidpool); } else { - root_fid->fid = newfid; + /* Setup the Root Inode */ + root_fid = v9fs_fid_create(root, v9ses, newfid, 0); + if (root_fid == NULL) { + retval = -ENOMEM; + goto put_back_sb; + } + root_fid->qid = fcall->params.rstat.stat->qid; root->d_inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat->qid); -- cgit v1.2.3 From e3306dd5f7eb2e699f36a4a313fca4b48b18d5e1 Mon Sep 17 00:00:00 2001 From: Davide Libenzi Date: Tue, 27 Sep 2005 21:45:33 -0700 Subject: [PATCH] epoll: handle timeout overflow Handle the timeout upper boundary for epoll. Signed-off-by: Davide Libenzi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 403b90a1213d..4284cd31eba6 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -101,6 +101,10 @@ /* Maximum number of poll wake up nests we are allowing */ #define EP_MAX_POLLWAKE_NESTS 4 +/* Maximum msec timeout value storeable in a long int */ +#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ) + + struct epoll_filefd { struct file *file; int fd; @@ -1506,8 +1510,8 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, * and the overflow condition. The passed timeout is in milliseconds, * that why (t * HZ) / 1000. */ - jtimeout = timeout == -1 || timeout > (MAX_SCHEDULE_TIMEOUT - 1000) / HZ ? - MAX_SCHEDULE_TIMEOUT: (timeout * HZ + 999) / 1000; + jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ? + MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000; retry: write_lock_irqsave(&ep->lock, flags); -- cgit v1.2.3 From 411b67b4b6a4dd1e0292a6a58dd753978179d173 Mon Sep 17 00:00:00 2001 From: Kostik Belousov Date: Wed, 28 Sep 2005 18:21:28 +0300 Subject: [PATCH] readv/writev syscalls are not checked by lsm it seems that readv(2)/writev(2) syscalls do not call file_permission callback. Looks like this is overlook. I have filled the issue into redhat bugzilla as https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=169433 and got the recommendation to post this on lsm mailing list. The following trivial patch solves the problem. Signed-off-by: Kostik Belousov Signed-off-by: Chris Wright --- fs/read_write.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/read_write.c b/fs/read_write.c index b60324aaa2b6..a091ee4f430d 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -497,6 +497,9 @@ static ssize_t do_readv_writev(int type, struct file *file, } ret = rw_verify_area(type, file, pos, tot_len); + if (ret) + goto out; + ret = security_file_permission(file, type == READ ? MAY_READ : MAY_WRITE); if (ret) goto out; -- cgit v1.2.3 From 192eaa28ba7b44485e521df7ba7a2ccbc4cc4d13 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 30 Sep 2005 03:15:08 +0100 Subject: [PATCH] missing ERR_PTR in 9fs Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/9p/vfs_super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 0957f4da91d4..82c5b0084079 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -129,7 +129,7 @@ static struct super_block *v9fs_get_sb(struct file_system_type if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { dprintk(DEBUG_ERROR, "problem initiating session\n"); - return newfid; + return ERR_PTR(newfid); } sb = sget(fs_type, NULL, v9fs_set_super, v9ses); -- cgit v1.2.3 From 998765e5588b197737d457e16f72832d8036190f Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Fri, 30 Sep 2005 11:58:54 -0700 Subject: [PATCH] aio: lock around kiocbTryKick() Only one of the run or kick path is supposed to put an iocb on the run list. If both of them do it than one of them can end up referencing a freed iocb. The kick patch could set the Kicked bit before acquiring the ctx_lock and putting the iocb on the run list. The run path, while holding the ctx_lock, could see this partial kick and mistake it for a kick that was deferred while it was doing work with the run_list NULLed out. It would then race with the kick thread to add the iocb to the run list. This patch moves the kick setting under the ctx_lock so that only one of the kick or run path queues the iocb on the run list, as intended. Signed-off-by: Zach Brown Signed-off-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index 0e11e31dbb77..b8f296999c04 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -899,16 +899,24 @@ static void aio_kick_handler(void *data) * and if required activate the aio work queue to process * it */ -static void queue_kicked_iocb(struct kiocb *iocb) +static void try_queue_kicked_iocb(struct kiocb *iocb) { struct kioctx *ctx = iocb->ki_ctx; unsigned long flags; int run = 0; - WARN_ON((!list_empty(&iocb->ki_wait.task_list))); + /* We're supposed to be the only path putting the iocb back on the run + * list. If we find that the iocb is *back* on a wait queue already + * than retry has happened before we could queue the iocb. This also + * means that the retry could have completed and freed our iocb, no + * good. */ + BUG_ON((!list_empty(&iocb->ki_wait.task_list))); spin_lock_irqsave(&ctx->ctx_lock, flags); - run = __queue_kicked_iocb(iocb); + /* set this inside the lock so that we can't race with aio_run_iocb() + * testing it and putting the iocb on the run list under the lock */ + if (!kiocbTryKick(iocb)) + run = __queue_kicked_iocb(iocb); spin_unlock_irqrestore(&ctx->ctx_lock, flags); if (run) aio_queue_work(ctx); @@ -931,10 +939,7 @@ void fastcall kick_iocb(struct kiocb *iocb) return; } - /* If its already kicked we shouldn't queue it again */ - if (!kiocbTryKick(iocb)) { - queue_kicked_iocb(iocb); - } + try_queue_kicked_iocb(iocb); } EXPORT_SYMBOL(kick_iocb); -- cgit v1.2.3 From 897f15fb587fd2772b9e7ff6ec0265057f3c3975 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Fri, 30 Sep 2005 11:58:55 -0700 Subject: [PATCH] aio: remove unlocked task_list test and resulting race Only one of the run or kick path is supposed to put an iocb on the run list. If both of them do it than one of them can end up referencing a freed iocb. The kick path could delete the task_list item from the wait queue before getting the ctx_lock and putting the iocb on the run list. The run path was testing the task_list item outside the lock so that it could catch ki_retry methods that return -EIOCBRETRY *without* putting the iocb on a wait queue and promising to call kick_iocb. This unlocked check could then race with the kick path to cause both to try and put the iocb on the run list. The patch stops the run path from testing task_list by requring that any ki_retry that returns -EIOCBRETRY *must* guarantee that kick_iocb() will be called in the future. aio_p{read,write}, the only in-tree -EIOCBRETRY users, are updated. Signed-off-by: Zach Brown Signed-off-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 79 +++++++++++++++++++++++++++------------------------------------- 1 file changed, 33 insertions(+), 46 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index b8f296999c04..9edc0e4a1219 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -741,19 +741,9 @@ static ssize_t aio_run_iocb(struct kiocb *iocb) ret = retry(iocb); current->io_wait = NULL; - if (-EIOCBRETRY != ret) { - if (-EIOCBQUEUED != ret) { - BUG_ON(!list_empty(&iocb->ki_wait.task_list)); - aio_complete(iocb, ret, 0); - /* must not access the iocb after this */ - } - } else { - /* - * Issue an additional retry to avoid waiting forever if - * no waits were queued (e.g. in case of a short read). - */ - if (list_empty(&iocb->ki_wait.task_list)) - kiocbSetKicked(iocb); + if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) { + BUG_ON(!list_empty(&iocb->ki_wait.task_list)); + aio_complete(iocb, ret, 0); } out: spin_lock_irq(&ctx->ctx_lock); @@ -1327,8 +1317,11 @@ asmlinkage long sys_io_destroy(aio_context_t ctx) } /* - * Default retry method for aio_read (also used for first time submit) - * Responsible for updating iocb state as retries progress + * aio_p{read,write} are the default ki_retry methods for + * IO_CMD_P{READ,WRITE}. They maintains kiocb retry state around potentially + * multiple calls to f_op->aio_read(). They loop around partial progress + * instead of returning -EIOCBRETRY because they don't have the means to call + * kick_iocb(). */ static ssize_t aio_pread(struct kiocb *iocb) { @@ -1337,25 +1330,25 @@ static ssize_t aio_pread(struct kiocb *iocb) struct inode *inode = mapping->host; ssize_t ret = 0; - ret = file->f_op->aio_read(iocb, iocb->ki_buf, - iocb->ki_left, iocb->ki_pos); + do { + ret = file->f_op->aio_read(iocb, iocb->ki_buf, + iocb->ki_left, iocb->ki_pos); + /* + * Can't just depend on iocb->ki_left to determine + * whether we are done. This may have been a short read. + */ + if (ret > 0) { + iocb->ki_buf += ret; + iocb->ki_left -= ret; + } - /* - * Can't just depend on iocb->ki_left to determine - * whether we are done. This may have been a short read. - */ - if (ret > 0) { - iocb->ki_buf += ret; - iocb->ki_left -= ret; /* - * For pipes and sockets we return once we have - * some data; for regular files we retry till we - * complete the entire read or find that we can't - * read any more data (e.g short reads). + * For pipes and sockets we return once we have some data; for + * regular files we retry till we complete the entire read or + * find that we can't read any more data (e.g short reads). */ - if (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)) - ret = -EIOCBRETRY; - } + } while (ret > 0 && + !S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)); /* This means we must have transferred all that we could */ /* No need to retry anymore */ @@ -1365,27 +1358,21 @@ static ssize_t aio_pread(struct kiocb *iocb) return ret; } -/* - * Default retry method for aio_write (also used for first time submit) - * Responsible for updating iocb state as retries progress - */ +/* see aio_pread() */ static ssize_t aio_pwrite(struct kiocb *iocb) { struct file *file = iocb->ki_filp; ssize_t ret = 0; - ret = file->f_op->aio_write(iocb, iocb->ki_buf, - iocb->ki_left, iocb->ki_pos); - - if (ret > 0) { - iocb->ki_buf += ret; - iocb->ki_left -= ret; - - ret = -EIOCBRETRY; - } + do { + ret = file->f_op->aio_write(iocb, iocb->ki_buf, + iocb->ki_left, iocb->ki_pos); + if (ret > 0) { + iocb->ki_buf += ret; + iocb->ki_left -= ret; + } + } while (ret > 0); - /* This means we must have transferred all that we could */ - /* No need to retry anymore */ if ((ret == 0) || (iocb->ki_left == 0)) ret = iocb->ki_nbytes - iocb->ki_left; -- cgit v1.2.3 From 353fb07e2043d2df12dddf4e2c39552d0ab9b026 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Fri, 30 Sep 2005 11:58:56 -0700 Subject: [PATCH] aio: avoid extra aio_{read,write} call when ki_left == 0 Recently aio_p{read,write} changed to perform retries internally rather than returning -EIOCBRETRY. This inadvertantly resulted in always calling aio_{read,write} with ki_left at 0 which would in turn immediately return 0. Harmless, but we can avoid this call by checking in the caller. Signed-off-by: Zach Brown Signed-off-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index 9edc0e4a1219..d6b1551342b7 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1347,7 +1347,7 @@ static ssize_t aio_pread(struct kiocb *iocb) * regular files we retry till we complete the entire read or * find that we can't read any more data (e.g short reads). */ - } while (ret > 0 && + } while (ret > 0 && iocb->ki_left > 0 && !S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)); /* This means we must have transferred all that we could */ @@ -1371,7 +1371,7 @@ static ssize_t aio_pwrite(struct kiocb *iocb) iocb->ki_buf += ret; iocb->ki_left -= ret; } - } while (ret > 0); + } while (ret > 0 && iocb->ki_left > 0); if ((ret == 0) || (iocb->ki_left == 0)) ret = iocb->ki_nbytes - iocb->ki_left; -- cgit v1.2.3 From daa35edc0a967d1f77c2e2c1346f57d04371487a Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Fri, 30 Sep 2005 11:59:01 -0700 Subject: [PATCH] uml: remove empty hostfs_truncate method Calling truncate() on hostfs spits a kernel warning "Something isn't implemented here", but it still works fine. Indeed, hostfs i_op->truncate doesn't do anything. But hostfs_setattr() -> set_attr() correctly detects ATTR_SIZE and calls truncate() on the host. So we should be safe (using ftruncate() may be better, in case the file is unlinked on the host, but we aren't sure to have the file open for writing, and reopening it would cause the same races; plus nobody should expect UML to be so careful). So, the warning is wrong, because the current implementation is working. Al, am I correct, and can the warning be therefore dropped? CC: Al Viro Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hostfs/hostfs_kern.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs') diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 59c5062cd63f..dd7113106269 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -793,11 +793,6 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from, return(err); } -void hostfs_truncate(struct inode *ino) -{ - not_implemented(); -} - int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd) { char *name; @@ -894,7 +889,6 @@ static struct inode_operations hostfs_iops = { .rmdir = hostfs_rmdir, .mknod = hostfs_mknod, .rename = hostfs_rename, - .truncate = hostfs_truncate, .permission = hostfs_permission, .setattr = hostfs_setattr, .getattr = hostfs_getattr, @@ -910,7 +904,6 @@ static struct inode_operations hostfs_dir_iops = { .rmdir = hostfs_rmdir, .mknod = hostfs_mknod, .rename = hostfs_rename, - .truncate = hostfs_truncate, .permission = hostfs_permission, .setattr = hostfs_setattr, .getattr = hostfs_getattr, -- cgit v1.2.3 From dd190d066b7ded8c44b2b67dd0a14bed01525d3c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 30 Sep 2005 11:59:02 -0700 Subject: [PATCH] fuse: check O_DIRECT Check O_DIRECT and return -EINVAL error in open. dentry_open() also checks this but only after the open method is called. This patch optimizes away the unnecessary upcalls in this case. It could be a correctness issue too: if filesystem has open() with side effect, then it should fail before doing the open, not after. Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/file.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 6454022b0536..657ab11c173b 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -23,6 +23,10 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir) struct fuse_file *ff; int err; + /* VFS checks this, but only _after_ ->open() */ + if (file->f_flags & O_DIRECT) + return -EINVAL; + err = generic_file_open(inode, file); if (err) return err; -- cgit v1.2.3 From 18efefa9355119b4f6d9b73b074ebbf9882c37c3 Mon Sep 17 00:00:00 2001 From: Anton Altaparmakov Date: Tue, 4 Oct 2005 13:06:00 +0100 Subject: NTFS: Fix a stupid bug in __ntfs_bitmap_set_bits_in_run() which caused the count to become negative and hence we had a wild memset() scribbling all over the system's ram. Signed-off-by: Anton Altaparmakov --- fs/ntfs/ChangeLog | 3 +++ fs/ntfs/bitmap.c | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog index 83f3322765cd..de58579a1d0e 100644 --- a/fs/ntfs/ChangeLog +++ b/fs/ntfs/ChangeLog @@ -102,6 +102,9 @@ ToDo/Notes: inode instead of a vfs inode as parameter. - Fix the definition of the CHKD ntfs record magic. It had an off by two error causing it to be CHKB instead of CHKD. + - Fix a stupid bug in __ntfs_bitmap_set_bits_in_run() which caused the + count to become negative and hence we had a wild memset() scribbling + all over the system's ram. 2.1.23 - Implement extension of resident files and make writing safe as well as many bug fixes, cleanups, and enhancements... diff --git a/fs/ntfs/bitmap.c b/fs/ntfs/bitmap.c index 12cf2e30c7dd..7a190cdc60e2 100644 --- a/fs/ntfs/bitmap.c +++ b/fs/ntfs/bitmap.c @@ -1,7 +1,7 @@ /* * bitmap.c - NTFS kernel bitmap handling. Part of the Linux-NTFS project. * - * Copyright (c) 2004 Anton Altaparmakov + * Copyright (c) 2004-2005 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -90,7 +90,8 @@ int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit, /* If the first byte is partial, modify the appropriate bits in it. */ if (bit) { u8 *byte = kaddr + pos; - while ((bit & 7) && cnt--) { + while ((bit & 7) && cnt) { + cnt--; if (value) *byte |= 1 << bit++; else -- cgit v1.2.3 From c394e458b69632902d65f9e2f39df79314f72908 Mon Sep 17 00:00:00 2001 From: Anton Altaparmakov Date: Tue, 4 Oct 2005 13:08:53 +0100 Subject: NTFS: Fix a 64-bitness bug where a left-shift could overflow a 32-bit variable which we now cast to 64-bit first (fs/ntfs/mft.c::map_mft_record_page(). Signed-off-by: Anton Altaparmakov --- fs/ntfs/layout.h | 2 +- fs/ntfs/mft.c | 3 ++- fs/ntfs/unistr.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h index 01f2dfa39cec..5c248d404f05 100644 --- a/fs/ntfs/layout.h +++ b/fs/ntfs/layout.h @@ -309,7 +309,7 @@ typedef le16 MFT_RECORD_FLAGS; * Note: The _LE versions will return a CPU endian formatted value! */ #define MFT_REF_MASK_CPU 0x0000ffffffffffffULL -#define MFT_REF_MASK_LE const_cpu_to_le64(0x0000ffffffffffffULL) +#define MFT_REF_MASK_LE const_cpu_to_le64(MFT_REF_MASK_CPU) typedef u64 MFT_REF; typedef le64 leMFT_REF; diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 247586d1d5dc..b011369b5956 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -58,7 +58,8 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni) * overflowing the unsigned long, but I don't think we would ever get * here if the volume was that big... */ - index = ni->mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT; + index = (u64)ni->mft_no << vol->mft_record_size_bits >> + PAGE_CACHE_SHIFT; ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; i_size = i_size_read(mft_vi); diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c index a389a5a16c84..0ea887fc859c 100644 --- a/fs/ntfs/unistr.c +++ b/fs/ntfs/unistr.c @@ -1,7 +1,7 @@ /* * unistr.c - NTFS Unicode string handling. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published -- cgit v1.2.3 From ce0fe7e70a0ad11097a3773e9f3f0de3d859edf0 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 4 Oct 2005 17:43:06 +0100 Subject: [PATCH] bfs endianness annotations Signed-off-by: Alexey Dobriyan Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/bfs/dir.c | 2 +- fs/bfs/inode.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index e240c335eb23..5af928fa0449 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -108,7 +108,7 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode, inode->i_mapping->a_ops = &bfs_aops; inode->i_mode = mode; inode->i_ino = ino; - BFS_I(inode)->i_dsk_ino = cpu_to_le16(ino); + BFS_I(inode)->i_dsk_ino = ino; BFS_I(inode)->i_sblock = 0; BFS_I(inode)->i_eblock = 0; insert_inode_hash(inode); diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index c7b39aa279d7..868af0f224fb 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -357,7 +357,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) } info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */ - info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - cpu_to_le32(bfs_sb->s_start))>>BFS_BSIZE_BITS; + info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - le32_to_cpu(bfs_sb->s_start))>>BFS_BSIZE_BITS; info->si_freei = 0; info->si_lf_eblk = 0; info->si_lf_sblk = 0; -- cgit v1.2.3 From c2b513dfbb04d7c94cca145172cfeb91f7683e54 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 4 Oct 2005 17:48:44 +0100 Subject: [PATCH] bfs iget() abuses bfs_fill_super() walks the inode table to get the bitmap of free inodes and collect stats. It has no business using iget() for that - it's a lot of extra work, extra icache pollution and more complex code. Switched to walking the damn thing directly. Note: that also allows to kill ->i_dsk_ino in there - separate patch if Tigran can confirm that this field can be zero only for deleted inodes (i.e. something that could only be found during that scan and not by normal lookups). Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/bfs/inode.c | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 868af0f224fb..3af6c73c5b5a 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -362,23 +362,41 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) info->si_lf_eblk = 0; info->si_lf_sblk = 0; info->si_lf_ioff = 0; + bh = NULL; for (i=BFS_ROOT_INO; i<=info->si_lasti; i++) { - inode = iget(s,i); - if (BFS_I(inode)->i_dsk_ino == 0) + struct bfs_inode *di; + int block = (i - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1; + int off = (i - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; + unsigned long sblock, eblock; + + if (!off) { + brelse(bh); + bh = sb_bread(s, block); + } + + if (!bh) + continue; + + di = (struct bfs_inode *)bh->b_data + off; + + if (!di->i_ino) { info->si_freei++; - else { - set_bit(i, info->si_imap); - info->si_freeb -= inode->i_blocks; - if (BFS_I(inode)->i_eblock > info->si_lf_eblk) { - info->si_lf_eblk = BFS_I(inode)->i_eblock; - info->si_lf_sblk = BFS_I(inode)->i_sblock; - info->si_lf_ioff = BFS_INO2OFF(i); - } + continue; + } + set_bit(i, info->si_imap); + info->si_freeb -= BFS_FILEBLOCKS(di); + + sblock = le32_to_cpu(di->i_sblock); + eblock = le32_to_cpu(di->i_eblock); + if (eblock > info->si_lf_eblk) { + info->si_lf_eblk = eblock; + info->si_lf_sblk = sblock; + info->si_lf_ioff = BFS_INO2OFF(i); } - iput(inode); } + brelse(bh); if (!(s->s_flags & MS_RDONLY)) { - mark_buffer_dirty(bh); + mark_buffer_dirty(info->si_sbh); s->s_dirt = 1; } dump_imap("read_super", s); -- cgit v1.2.3 From 829841146878e082613a49581ae252c071057c23 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 6 Oct 2005 21:54:21 -0700 Subject: Avoid 'names_cache' memory leak with CONFIG_AUDITSYSCALL The nameidata "last.name" is always allocated with "__getname()", and should always be free'd with "__putname()". Using "putname()" without the underscores will leak memory, because the allocation will have been hidden from the AUDITSYSCALL code. Arguably the real bug is that the AUDITSYSCALL code is really broken, but in the meantime this fixes the problem people see. Reported by Robert Derr, patch by Rick Lindsley. Acked-by: Al Viro Signed-off-by: Linus Torvalds --- fs/namei.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 043d587216b5..aa62dbda93ac 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1551,19 +1551,19 @@ do_link: if (nd->last_type != LAST_NORM) goto exit; if (nd->last.name[nd->last.len]) { - putname(nd->last.name); + __putname(nd->last.name); goto exit; } error = -ELOOP; if (count++==32) { - putname(nd->last.name); + __putname(nd->last.name); goto exit; } dir = nd->dentry; down(&dir->d_inode->i_sem); path.dentry = __lookup_hash(&nd->last, nd->dentry, nd); path.mnt = nd->mnt; - putname(nd->last.name); + __putname(nd->last.name); goto do_last; } -- cgit v1.2.3 From dd0fc66fb33cd610bc1a5db8a5e232d34879b4d7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 7 Oct 2005 07:46:04 +0100 Subject: [PATCH] gfp flags annotations - part 1 - added typedef unsigned int __nocast gfp_t; - replaced __nocast uses for gfp flags with gfp_t - it gives exactly the same warnings as far as sparse is concerned, doesn't change generated code (from gcc point of view we replaced unsigned int with typedef) and documents what's going on far better. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/bio.c | 10 +++++----- fs/buffer.c | 2 +- fs/mpage.c | 2 +- fs/ntfs/malloc.h | 2 +- fs/posix_acl.c | 6 +++--- fs/xfs/linux-2.6/kmem.c | 10 +++++----- fs/xfs/linux-2.6/kmem.h | 13 ++++++------- 7 files changed, 22 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index 83a349574567..7d81a93afd48 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -75,7 +75,7 @@ struct bio_set { */ static struct bio_set *fs_bio_set; -static inline struct bio_vec *bvec_alloc_bs(unsigned int __nocast gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) +static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) { struct bio_vec *bvl; struct biovec_slab *bp; @@ -155,7 +155,7 @@ inline void bio_init(struct bio *bio) * allocate bio and iovecs from the memory pools specified by the * bio_set structure. **/ -struct bio *bio_alloc_bioset(unsigned int __nocast gfp_mask, int nr_iovecs, struct bio_set *bs) +struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) { struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask); @@ -181,7 +181,7 @@ out: return bio; } -struct bio *bio_alloc(unsigned int __nocast gfp_mask, int nr_iovecs) +struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) { struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); @@ -277,7 +277,7 @@ inline void __bio_clone(struct bio *bio, struct bio *bio_src) * * Like __bio_clone, only also allocates the returned bio */ -struct bio *bio_clone(struct bio *bio, unsigned int __nocast gfp_mask) +struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) { struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); @@ -1078,7 +1078,7 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) return bp; } -static void *bio_pair_alloc(unsigned int __nocast gfp_flags, void *data) +static void *bio_pair_alloc(gfp_t gfp_flags, void *data) { return kmalloc(sizeof(struct bio_pair), gfp_flags); } diff --git a/fs/buffer.c b/fs/buffer.c index 6cbfceabd95d..1216c0d3c8ce 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3045,7 +3045,7 @@ static void recalc_bh_state(void) buffer_heads_over_limit = (tot > max_buffer_heads); } -struct buffer_head *alloc_buffer_head(unsigned int __nocast gfp_flags) +struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) { struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags); if (ret) { diff --git a/fs/mpage.c b/fs/mpage.c index bb9aebe93862..c5adcdddf3cc 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -102,7 +102,7 @@ static struct bio *mpage_bio_submit(int rw, struct bio *bio) static struct bio * mpage_alloc(struct block_device *bdev, sector_t first_sector, int nr_vecs, - unsigned int __nocast gfp_flags) + gfp_t gfp_flags) { struct bio *bio; diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h index 006946efca8c..590887b943f5 100644 --- a/fs/ntfs/malloc.h +++ b/fs/ntfs/malloc.h @@ -40,7 +40,7 @@ * Depending on @gfp_mask the allocation may be guaranteed to succeed. */ static inline void *__ntfs_malloc(unsigned long size, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { if (likely(size <= PAGE_SIZE)) { BUG_ON(!size); diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 296480e96dd5..6c8dcf7613fd 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -35,7 +35,7 @@ EXPORT_SYMBOL(posix_acl_permission); * Allocate a new ACL with the specified number of entries. */ struct posix_acl * -posix_acl_alloc(int count, unsigned int __nocast flags) +posix_acl_alloc(int count, gfp_t flags) { const size_t size = sizeof(struct posix_acl) + count * sizeof(struct posix_acl_entry); @@ -51,7 +51,7 @@ posix_acl_alloc(int count, unsigned int __nocast flags) * Clone an ACL. */ struct posix_acl * -posix_acl_clone(const struct posix_acl *acl, unsigned int __nocast flags) +posix_acl_clone(const struct posix_acl *acl, gfp_t flags) { struct posix_acl *clone = NULL; @@ -185,7 +185,7 @@ posix_acl_equiv_mode(const struct posix_acl *acl, mode_t *mode_p) * Create an ACL representing the file mode permission bits of an inode. */ struct posix_acl * -posix_acl_from_mode(mode_t mode, unsigned int __nocast flags) +posix_acl_from_mode(mode_t mode, gfp_t flags) { struct posix_acl *acl = posix_acl_alloc(3, flags); if (!acl) diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index 4b184559f231..d2653b589b1c 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -45,7 +45,7 @@ void * -kmem_alloc(size_t size, unsigned int __nocast flags) +kmem_alloc(size_t size, gfp_t flags) { int retries = 0; unsigned int lflags = kmem_flags_convert(flags); @@ -67,7 +67,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags) } void * -kmem_zalloc(size_t size, unsigned int __nocast flags) +kmem_zalloc(size_t size, gfp_t flags) { void *ptr; @@ -90,7 +90,7 @@ kmem_free(void *ptr, size_t size) void * kmem_realloc(void *ptr, size_t newsize, size_t oldsize, - unsigned int __nocast flags) + gfp_t flags) { void *new; @@ -105,7 +105,7 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize, } void * -kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) +kmem_zone_alloc(kmem_zone_t *zone, gfp_t flags) { int retries = 0; unsigned int lflags = kmem_flags_convert(flags); @@ -124,7 +124,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) } void * -kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags) +kmem_zone_zalloc(kmem_zone_t *zone, gfp_t flags) { void *ptr; diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index 109fcf27e256..ee7010f085bc 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -81,7 +81,7 @@ typedef unsigned long xfs_pflags_t; *(NSTATEP) = *(OSTATEP); \ } while (0) -static __inline unsigned int kmem_flags_convert(unsigned int __nocast flags) +static __inline unsigned int kmem_flags_convert(gfp_t flags) { unsigned int lflags = __GFP_NOWARN; /* we'll report problems, if need be */ @@ -125,13 +125,12 @@ kmem_zone_destroy(kmem_zone_t *zone) BUG(); } -extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); -extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); +extern void *kmem_zone_zalloc(kmem_zone_t *, gfp_t); +extern void *kmem_zone_alloc(kmem_zone_t *, gfp_t); -extern void *kmem_alloc(size_t, unsigned int __nocast); -extern void *kmem_realloc(void *, size_t, size_t, - unsigned int __nocast); -extern void *kmem_zalloc(size_t, unsigned int __nocast); +extern void *kmem_alloc(size_t, gfp_t); +extern void *kmem_realloc(void *, size_t, size_t, gfp_t); +extern void *kmem_zalloc(size_t, gfp_t); extern void kmem_free(void *, size_t); typedef struct shrinker *kmem_shaker_t; -- cgit v1.2.3 From 1cc956e12aedfdc6baf6312bc36a6b5a71af3c9d Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 9 Oct 2005 10:41:32 -0500 Subject: [PATCH] relayfs: fix bogus param value in call to vmap The third param in this call to vmap shouldn't be GFP_KERNEL, which makes no sense, but rather VM_MAP. Thanks to Al Viro for spotting this. Signed-off-by: Tom Zanussi Signed-off-by: Linus Torvalds --- fs/relayfs/buffers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/relayfs/buffers.c b/fs/relayfs/buffers.c index 2aa8e2719999..84e21ffa5ca8 100644 --- a/fs/relayfs/buffers.c +++ b/fs/relayfs/buffers.c @@ -109,7 +109,7 @@ static void *relay_alloc_buf(struct rchan_buf *buf, unsigned long size) if (unlikely(!buf->page_array[i])) goto depopulate; } - mem = vmap(buf->page_array, n_pages, GFP_KERNEL, PAGE_KERNEL); + mem = vmap(buf->page_array, n_pages, VM_MAP, PAGE_KERNEL); if (!mem) goto depopulate; -- cgit v1.2.3 From 19cba8abd6ca09527c194864ae651db65cbacfe1 Mon Sep 17 00:00:00 2001 From: Latchesar Ionkov Date: Tue, 11 Oct 2005 08:29:03 -0700 Subject: [PATCH] v9fs: remove additional buffer allocation from v9fs_file_read and v9fs_file_write v9fs_file_read and v9fs_file_write use kmalloc to allocate buffers as big as the data buffer received as parameter. kmalloc cannot be used to allocate buffers bigger than 128K, so reading/writing data in chunks bigger than 128k fails. This patch reorganizes v9fs_file_read and v9fs_file_write to allocate only buffers as big as the maximum data that can be sent in one 9P message. Signed-off-by: Latchesar Ionkov Cc: Eric Van Hensbergen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/9p/vfs_file.c | 114 ++++++++++++++++--------------------------------------- 1 file changed, 33 insertions(+), 81 deletions(-) (limited to 'fs') diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index a4799e971d1c..bbc3cc63854f 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -175,16 +175,16 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) } /** - * v9fs_read - read from a file (internal) + * v9fs_file_read - read from a file * @filep: file pointer to read * @data: data buffer to read data into * @count: size of buffer * @offset: offset at which to read data * */ - static ssize_t -v9fs_read(struct file *filp, char *buffer, size_t count, loff_t * offset) +v9fs_file_read(struct file *filp, char __user * data, size_t count, + loff_t * offset) { struct inode *inode = filp->f_dentry->d_inode; struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); @@ -194,6 +194,7 @@ v9fs_read(struct file *filp, char *buffer, size_t count, loff_t * offset) int rsize = 0; int result = 0; int total = 0; + int n; dprintk(DEBUG_VFS, "\n"); @@ -216,10 +217,15 @@ v9fs_read(struct file *filp, char *buffer, size_t count, loff_t * offset) } else *offset += result; - /* XXX - extra copy */ - memcpy(buffer, fcall->params.rread.data, result); + n = copy_to_user(data, fcall->params.rread.data, result); + if (n) { + dprintk(DEBUG_ERROR, "Problem copying to user %d\n", n); + kfree(fcall); + return -EFAULT; + } + count -= result; - buffer += result; + data += result; total += result; kfree(fcall); @@ -232,42 +238,7 @@ v9fs_read(struct file *filp, char *buffer, size_t count, loff_t * offset) } /** - * v9fs_file_read - read from a file - * @filep: file pointer to read - * @data: data buffer to read data into - * @count: size of buffer - * @offset: offset at which to read data - * - */ - -static ssize_t -v9fs_file_read(struct file *filp, char __user * data, size_t count, - loff_t * offset) -{ - int retval = -1; - int ret = 0; - char *buffer; - - buffer = kmalloc(count, GFP_KERNEL); - if (!buffer) - return -ENOMEM; - - retval = v9fs_read(filp, buffer, count, offset); - if (retval > 0) { - if ((ret = copy_to_user(data, buffer, retval)) != 0) { - dprintk(DEBUG_ERROR, "Problem copying to user %d\n", - ret); - retval = ret; - } - } - - kfree(buffer); - - return retval; -} - -/** - * v9fs_write - write to a file + * v9fs_file_write - write to a file * @filep: file pointer to write * @data: data buffer to write data from * @count: size of buffer @@ -276,7 +247,8 @@ v9fs_file_read(struct file *filp, char __user * data, size_t count, */ static ssize_t -v9fs_write(struct file *filp, char *buffer, size_t count, loff_t * offset) +v9fs_file_write(struct file *filp, const char __user * data, + size_t count, loff_t * offset) { struct inode *inode = filp->f_dentry->d_inode; struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); @@ -286,30 +258,42 @@ v9fs_write(struct file *filp, char *buffer, size_t count, loff_t * offset) int result = -EIO; int rsize = 0; int total = 0; + char *buf; - dprintk(DEBUG_VFS, "data %p count %d offset %x\n", buffer, (int)count, + dprintk(DEBUG_VFS, "data %p count %d offset %x\n", data, (int)count, (int)*offset); rsize = v9ses->maxdata - V9FS_IOHDRSZ; if (v9fid->iounit != 0 && rsize > v9fid->iounit) rsize = v9fid->iounit; - dump_data(buffer, count); + buf = kmalloc(v9ses->maxdata - V9FS_IOHDRSZ, GFP_KERNEL); + if (!buf) + return -ENOMEM; do { if (count < rsize) rsize = count; - result = - v9fs_t_write(v9ses, fid, *offset, rsize, buffer, &fcall); + result = copy_from_user(buf, data, rsize); + if (result) { + dprintk(DEBUG_ERROR, "Problem copying from user\n"); + kfree(buf); + return -EFAULT; + } + + dump_data(buf, rsize); + result = v9fs_t_write(v9ses, fid, *offset, rsize, buf, &fcall); if (result < 0) { eprintk(KERN_ERR, "error while writing: %s(%d)\n", FCALL_ERROR(fcall), result); kfree(fcall); + kfree(buf); return result; } else *offset += result; kfree(fcall); + fcall = NULL; if (result != rsize) { eprintk(KERN_ERR, @@ -319,46 +303,14 @@ v9fs_write(struct file *filp, char *buffer, size_t count, loff_t * offset) } count -= result; - buffer += result; + data += result; total += result; } while (count); + kfree(buf); return total; } -/** - * v9fs_file_write - write to a file - * @filep: file pointer to write - * @data: data buffer to write data from - * @count: size of buffer - * @offset: offset at which to write data - * - */ - -static ssize_t -v9fs_file_write(struct file *filp, const char __user * data, - size_t count, loff_t * offset) -{ - int ret = -1; - char *buffer; - - buffer = kmalloc(count, GFP_KERNEL); - if (buffer == NULL) - return -ENOMEM; - - ret = copy_from_user(buffer, data, count); - if (ret) { - dprintk(DEBUG_ERROR, "Problem copying from user\n"); - ret = -EFAULT; - } else { - ret = v9fs_write(filp, buffer, count, offset); - } - - kfree(buffer); - - return ret; -} - struct file_operations v9fs_file_operations = { .llseek = generic_file_llseek, .read = v9fs_file_read, -- cgit v1.2.3 From 22c1ea44f0d33eda532883858b6cdabc5f265b66 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 11 Oct 2005 08:29:05 -0700 Subject: [PATCH] nfsacl: Solaris VxFS compatibility fix Here is a compatibility fix between Linux and Solaris when used with VxFS filesystems: Solaris usually accepts acl entries in any order, but with VxFS it replies with NFSERR_INVAL when it sees a four-entry acl that is not in canonical form. It may also fail with other non-canonical acls -- I can't tell, because that case never triggers: We only send non-canonical acls when we fake up an ACL_MASK entry. Instead of adding fake ACL_MASK entries at the end, inserting them in the correct position makes Solaris+VxFS happy. The Linux client and server sides don't care about entry order. The three-entry-acl special case in which we need a fake ACL_MASK entry was handled in xdr_nfsace_encode. The patch moves this into nfsacl_encode. Signed-off-by: Andreas Gruenbacher Acked-by: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs_common/nfsacl.c | 70 +++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 35 deletions(-) (limited to 'fs') diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index 251e5a1bb1c4..0c2be8c0307d 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c @@ -48,43 +48,26 @@ xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem) (struct nfsacl_encode_desc *) desc; u32 *p = (u32 *) elem; - if (nfsacl_desc->count < nfsacl_desc->acl->a_count) { - struct posix_acl_entry *entry = - &nfsacl_desc->acl->a_entries[nfsacl_desc->count++]; + struct posix_acl_entry *entry = + &nfsacl_desc->acl->a_entries[nfsacl_desc->count++]; - *p++ = htonl(entry->e_tag | nfsacl_desc->typeflag); - switch(entry->e_tag) { - case ACL_USER_OBJ: - *p++ = htonl(nfsacl_desc->uid); - break; - case ACL_GROUP_OBJ: - *p++ = htonl(nfsacl_desc->gid); - break; - case ACL_USER: - case ACL_GROUP: - *p++ = htonl(entry->e_id); - break; - default: /* Solaris depends on that! */ - *p++ = 0; - break; - } - *p++ = htonl(entry->e_perm & S_IRWXO); - } else { - const struct posix_acl_entry *pa, *pe; - int group_obj_perm = ACL_READ|ACL_WRITE|ACL_EXECUTE; - - FOREACH_ACL_ENTRY(pa, nfsacl_desc->acl, pe) { - if (pa->e_tag == ACL_GROUP_OBJ) { - group_obj_perm = pa->e_perm & S_IRWXO; - break; - } - } - /* fake up ACL_MASK entry */ - *p++ = htonl(ACL_MASK | nfsacl_desc->typeflag); - *p++ = htonl(0); - *p++ = htonl(group_obj_perm); + *p++ = htonl(entry->e_tag | nfsacl_desc->typeflag); + switch(entry->e_tag) { + case ACL_USER_OBJ: + *p++ = htonl(nfsacl_desc->uid); + break; + case ACL_GROUP_OBJ: + *p++ = htonl(nfsacl_desc->gid); + break; + case ACL_USER: + case ACL_GROUP: + *p++ = htonl(entry->e_id); + break; + default: /* Solaris depends on that! */ + *p++ = 0; + break; } - + *p++ = htonl(entry->e_perm & S_IRWXO); return 0; } @@ -105,11 +88,28 @@ nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, .gid = inode->i_gid, }; int err; + struct posix_acl *acl2 = NULL; if (entries > NFS_ACL_MAX_ENTRIES || xdr_encode_word(buf, base, entries)) return -EINVAL; + if (encode_entries && acl && acl->a_count == 3) { + /* Fake up an ACL_MASK entry. */ + acl2 = posix_acl_alloc(4, GFP_KERNEL); + if (!acl2) + return -ENOMEM; + /* Insert entries in canonical order: other orders seem + to confuse Solaris VxFS. */ + acl2->a_entries[0] = acl->a_entries[0]; /* ACL_USER_OBJ */ + acl2->a_entries[1] = acl->a_entries[1]; /* ACL_GROUP_OBJ */ + acl2->a_entries[2] = acl->a_entries[1]; /* ACL_MASK */ + acl2->a_entries[2].e_tag = ACL_MASK; + acl2->a_entries[3] = acl->a_entries[2]; /* ACL_OTHER */ + nfsacl_desc.acl = acl2; + } err = xdr_encode_array2(buf, base + 4, &nfsacl_desc.desc); + if (acl2) + posix_acl_release(acl2); if (!err) err = 8 + nfsacl_desc.desc.elem_size * nfsacl_desc.desc.array_len; -- cgit v1.2.3 From 6de505173e24e76bb33a2595312e0c2b44d49e58 Mon Sep 17 00:00:00 2001 From: "akpm@osdl.org" Date: Tue, 11 Oct 2005 08:29:08 -0700 Subject: [PATCH] binfmt_elf bss padding fix Nir Tzachar points out that if an ELF file specifies a zero-length bss at a whacky address, we cannot load that binary because padzero() tries to zero out the end of the page at the whacky address, and that may not be writeable. See also http://bugzilla.kernel.org/show_bug.cgi?id=5411 So teach load_elf_binary() to skip the bss settng altogether if the elf file has a zero-length bss segment. Cc: Roland McGrath Cc: Daniel Jacobowitz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 7976a238f0a3..d4b15576e584 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -905,7 +905,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) send_sig(SIGKILL, current, 0); goto out_free_dentry; } - if (padzero(elf_bss)) { + if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) { send_sig(SIGSEGV, current, 0); retval = -EFAULT; /* Nobody gets to see this, but.. */ goto out_free_dentry; -- cgit v1.2.3 From 63c6764ce4c650245a41a95a2235207d25ca4fde Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Fri, 14 Oct 2005 15:59:11 -0700 Subject: [PATCH] nommu build error fix "proc_smaps_operations" is not defined in case of "CONFIG_MMU=n". Signed-off-by: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 3b33f94020db..a170450aadb1 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -103,7 +103,9 @@ enum pid_directory_inos { PROC_TGID_NUMA_MAPS, PROC_TGID_MOUNTS, PROC_TGID_WCHAN, +#ifdef CONFIG_MMU PROC_TGID_SMAPS, +#endif #ifdef CONFIG_SCHEDSTATS PROC_TGID_SCHEDSTAT, #endif @@ -141,7 +143,9 @@ enum pid_directory_inos { PROC_TID_NUMA_MAPS, PROC_TID_MOUNTS, PROC_TID_WCHAN, +#ifdef CONFIG_MMU PROC_TID_SMAPS, +#endif #ifdef CONFIG_SCHEDSTATS PROC_TID_SCHEDSTAT, #endif @@ -195,7 +199,9 @@ static struct pid_entry tgid_base_stuff[] = { E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), +#ifdef CONFIG_MMU E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO), +#endif #ifdef CONFIG_SECURITY E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), #endif @@ -235,7 +241,9 @@ static struct pid_entry tid_base_stuff[] = { E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO), +#ifdef CONFIG_MMU E(PROC_TID_SMAPS, "smaps", S_IFREG|S_IRUGO), +#endif #ifdef CONFIG_SECURITY E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), #endif @@ -630,6 +638,7 @@ static struct file_operations proc_numa_maps_operations = { }; #endif +#ifdef CONFIG_MMU extern struct seq_operations proc_pid_smaps_op; static int smaps_open(struct inode *inode, struct file *file) { @@ -648,6 +657,7 @@ static struct file_operations proc_smaps_operations = { .llseek = seq_lseek, .release = seq_release, }; +#endif extern struct seq_operations mounts_op; static int mounts_open(struct inode *inode, struct file *file) @@ -1681,10 +1691,12 @@ static struct dentry *proc_pident_lookup(struct inode *dir, case PROC_TGID_MOUNTS: inode->i_fop = &proc_mounts_operations; break; +#ifdef CONFIG_MMU case PROC_TID_SMAPS: case PROC_TGID_SMAPS: inode->i_fop = &proc_smaps_operations; break; +#endif #ifdef CONFIG_SECURITY case PROC_TID_ATTR: inode->i_nlink = 2; -- cgit v1.2.3 From b3c52da33ce95747b1bff86cce716d4f1397f14a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Oct 2005 06:02:00 -0400 Subject: [PATCH] NFS: Fix cache consistency races If the data cache has been marked as potentially invalid by nfs_refresh_inode, we should invalidate it rather than assume that changes are due to our own activity. Also ensure that we always start with a valid cache before declaring it to be protected by a delegation. Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/delegation.c | 4 ++++ fs/nfs/file.c | 3 ++- fs/nfs/inode.c | 7 ++----- 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index d7f7eb669d03..4a36839f0bbd 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -85,6 +85,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct struct nfs_delegation *delegation; int status = 0; + /* Ensure we first revalidate the attributes and page cache! */ + if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR))) + __nfs_revalidate_inode(NFS_SERVER(inode), inode); + delegation = nfs_alloc_delegation(); if (delegation == NULL) return -ENOMEM; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f6b9eda925c5..6bdcfa95de94 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -137,7 +137,8 @@ static int nfs_revalidate_file(struct inode *inode, struct file *filp) struct nfs_inode *nfsi = NFS_I(inode); int retval = 0; - if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode)) + if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR)) + || nfs_attribute_timeout(inode)) retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode); nfs_revalidate_mapping(inode, filp->f_mapping); return 0; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6922469d6fc5..6be46d21c01e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1226,10 +1226,6 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) loff_t cur_size, new_isize; int data_unstable; - /* Do we hold a delegation? */ - if (nfs_have_delegation(inode, FMODE_READ)) - return 0; - spin_lock(&inode->i_lock); /* Are we in the process of updating data on the server? */ @@ -1350,7 +1346,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign nfsi->read_cache_jiffies = fattr->timestamp; /* Are we racing with known updates of the metadata on the server? */ - data_unstable = ! nfs_verify_change_attribute(inode, verifier); + data_unstable = ! (nfs_verify_change_attribute(inode, verifier) || + (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)); /* Check if our cached file size is stale */ new_isize = nfs_size_to_loff_t(fattr->size); -- cgit v1.2.3 From 6ce969171d5187f7621be68c0ebbc7fb02ec53f1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Oct 2005 06:03:23 -0400 Subject: [PATCH] NFS: Fix Oopsable/unnecessary i_count manipulations in nfs_wait_on_inode() Oopsable since nfs_wait_on_inode() can get called as part of iput_final(). Unnecessary since the caller had better be damned sure that the inode won't disappear from underneath it anyway. Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/inode.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6be46d21c01e..d4eadeea128e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -877,12 +877,10 @@ static int nfs_wait_on_inode(struct inode *inode) sigset_t oldmask; int error; - atomic_inc(&inode->i_count); rpc_clnt_sigmask(clnt, &oldmask); error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING, nfs_wait_schedule, TASK_INTERRUPTIBLE); rpc_clnt_sigunmask(clnt, &oldmask); - iput(inode); return error; } -- cgit v1.2.3 From b65574fec5db1211bce7fc8bec7a2b32486e0670 Mon Sep 17 00:00:00 2001 From: David McCullough Date: Mon, 17 Oct 2005 16:43:29 -0700 Subject: [PATCH] output of /proc/maps on nommu systems is incomplete Currently you do not get all the map entries on nommu systems because the start function doesn't index into the list using the value of "pos". Signed-off-by: David McCullough Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/nommu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index f3bf016d5ee3..cff10ab1af63 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -91,6 +91,7 @@ static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos) next = _rb; break; } + pos--; } return next; -- cgit v1.2.3 From 4faa5285283fad081443e3612ca426a311bb6c7e Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Mon, 17 Oct 2005 16:43:33 -0700 Subject: [PATCH] aio: revert lock_kiocb() lock_kiocb() was introduced to serialize retrying and cancellation. In the process of doing so it tried to sleep waiting for KIF_LOCKED while holding the ctx_lock spinlock. Recent fixes have ensured that multiple concurrent retries won't be attempted for a given iocb. Cancel has other problems and has no significant in-tree users that have been complaining about it. So for the immediate future we'll revert sleeping with the lock held and will address proper cancellation and retry serialization in the future. Signed-off-by: Zach Brown Acked-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index d6b1551342b7..9fe7216457d8 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -398,7 +398,7 @@ static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx) if (unlikely(!req)) return NULL; - req->ki_flags = 1 << KIF_LOCKED; + req->ki_flags = 0; req->ki_users = 2; req->ki_key = 0; req->ki_ctx = ctx; @@ -547,25 +547,6 @@ struct kioctx *lookup_ioctx(unsigned long ctx_id) return ioctx; } -static int lock_kiocb_action(void *param) -{ - schedule(); - return 0; -} - -static inline void lock_kiocb(struct kiocb *iocb) -{ - wait_on_bit_lock(&iocb->ki_flags, KIF_LOCKED, lock_kiocb_action, - TASK_UNINTERRUPTIBLE); -} - -static inline void unlock_kiocb(struct kiocb *iocb) -{ - kiocbClearLocked(iocb); - smp_mb__after_clear_bit(); - wake_up_bit(&iocb->ki_flags, KIF_LOCKED); -} - /* * use_mm * Makes the calling kernel thread take on the specified @@ -796,9 +777,7 @@ static int __aio_run_iocbs(struct kioctx *ctx) * Hold an extra reference while retrying i/o. */ iocb->ki_users++; /* grab extra reference */ - lock_kiocb(iocb); aio_run_iocb(iocb); - unlock_kiocb(iocb); if (__aio_put_req(ctx, iocb)) /* drop extra ref */ put_ioctx(ctx); } @@ -1542,7 +1521,6 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, spin_lock_irq(&ctx->ctx_lock); aio_run_iocb(req); - unlock_kiocb(req); if (!list_empty(&ctx->run_list)) { /* drain the run list */ while (__aio_run_iocbs(ctx)) @@ -1674,7 +1652,6 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, if (NULL != cancel) { struct io_event tmp; pr_debug("calling cancel\n"); - lock_kiocb(kiocb); memset(&tmp, 0, sizeof(tmp)); tmp.obj = (u64)(unsigned long)kiocb->ki_obj.user; tmp.data = kiocb->ki_user_data; @@ -1686,7 +1663,6 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, if (copy_to_user(result, &tmp, sizeof(tmp))) ret = -EFAULT; } - unlock_kiocb(kiocb); } else ret = -EINVAL; -- cgit v1.2.3