diff options
Diffstat (limited to 'fs')
101 files changed, 5944 insertions, 2167 deletions
diff --git a/fs/9p/fcall.c b/fs/9p/fcall.c index 71742ba150c4..6f2617820a4e 100644 --- a/fs/9p/fcall.c +++ b/fs/9p/fcall.c @@ -98,23 +98,20 @@ v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname, static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc, int err) { - int fid; + int fid, id; struct v9fs_session_info *v9ses; - if (err) - return; - + id = 0; fid = tc->params.tclunk.fid; - kfree(tc); - - if (!rc) - return; - - v9ses = a; - if (rc->id == RCLUNK) - v9fs_put_idpool(fid, &v9ses->fidpool); + if (rc) + id = rc->id; + kfree(tc); kfree(rc); + if (id == RCLUNK) { + v9ses = a; + v9fs_put_idpool(fid, &v9ses->fidpool); + } } /** diff --git a/fs/9p/mux.c b/fs/9p/mux.c index 3e5b124a7212..f4407eb276c7 100644 --- a/fs/9p/mux.c +++ b/fs/9p/mux.c @@ -50,15 +50,23 @@ enum { Wpending = 8, /* can write */ }; +enum { + None, + Flushing, + Flushed, +}; + struct v9fs_mux_poll_task; struct v9fs_req { + spinlock_t lock; int tag; struct v9fs_fcall *tcall; struct v9fs_fcall *rcall; int err; v9fs_mux_req_callback cb; void *cba; + int flush; struct list_head req_list; }; @@ -96,8 +104,8 @@ struct v9fs_mux_poll_task { struct v9fs_mux_rpc { struct v9fs_mux_data *m; - struct v9fs_req *req; int err; + struct v9fs_fcall *tcall; struct v9fs_fcall *rcall; wait_queue_head_t wqueue; }; @@ -524,10 +532,9 @@ again: static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req) { - int ecode, tag; + int ecode; struct v9fs_str *ename; - tag = req->tag; if (!req->err && req->rcall->id == RERROR) { ecode = req->rcall->params.rerror.errno; ename = &req->rcall->params.rerror.error; @@ -553,23 +560,6 @@ static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req) if (!req->err) req->err = -EIO; } - - if (req->err == ERREQFLUSH) - return; - - if (req->cb) { - dprintk(DEBUG_MUX, "calling callback tcall %p rcall %p\n", - req->tcall, req->rcall); - - (*req->cb) (req->cba, req->tcall, req->rcall, req->err); - req->cb = NULL; - } else - kfree(req->rcall); - - v9fs_mux_put_tag(m, tag); - - wake_up(&m->equeue); - kfree(req); } /** @@ -669,17 +659,26 @@ static void v9fs_read_work(void *a) list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { if (rreq->tag == rcall->tag) { req = rreq; - req->rcall = rcall; - list_del(&req->req_list); - spin_unlock(&m->lock); - process_request(m, req); + if (req->flush != Flushing) + list_del(&req->req_list); break; } - } + spin_unlock(&m->lock); - if (!req) { - spin_unlock(&m->lock); + if (req) { + req->rcall = rcall; + process_request(m, req); + + if (req->flush != Flushing) { + if (req->cb) + (*req->cb) (req, req->cba); + else + kfree(req->rcall); + + wake_up(&m->equeue); + } + } else { if (err >= 0 && rcall->id != RFLUSH) dprintk(DEBUG_ERROR, "unexpected response mux %p id %d tag %d\n", @@ -746,7 +745,6 @@ static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m, return ERR_PTR(-ENOMEM); v9fs_set_tag(tc, n); - if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) { char buf[150]; @@ -754,12 +752,14 @@ static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m, printk(KERN_NOTICE "<<< %p %s\n", m, buf); } + spin_lock_init(&req->lock); req->tag = n; req->tcall = tc; req->rcall = NULL; req->err = 0; req->cb = cb; req->cba = cba; + req->flush = None; spin_lock(&m->lock); list_add_tail(&req->req_list, &m->unsent_req_list); @@ -776,72 +776,108 @@ static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m, return req; } -static void v9fs_mux_flush_cb(void *a, struct v9fs_fcall *tc, - struct v9fs_fcall *rc, int err) +static void v9fs_mux_free_request(struct v9fs_mux_data *m, struct v9fs_req *req) +{ + v9fs_mux_put_tag(m, req->tag); + kfree(req); +} + +static void v9fs_mux_flush_cb(struct v9fs_req *freq, void *a) { v9fs_mux_req_callback cb; int tag; struct v9fs_mux_data *m; - struct v9fs_req *req, *rptr; + struct v9fs_req *req, *rreq, *rptr; m = a; - dprintk(DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m, tc, - rc, err, tc->params.tflush.oldtag); + dprintk(DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m, + freq->tcall, freq->rcall, freq->err, + freq->tcall->params.tflush.oldtag); spin_lock(&m->lock); cb = NULL; - tag = tc->params.tflush.oldtag; - list_for_each_entry_safe(req, rptr, &m->req_list, req_list) { - if (req->tag == tag) { + tag = freq->tcall->params.tflush.oldtag; + req = NULL; + list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { + if (rreq->tag == tag) { + req = rreq; list_del(&req->req_list); - if (req->cb) { - cb = req->cb; - req->cb = NULL; - spin_unlock(&m->lock); - (*cb) (req->cba, req->tcall, req->rcall, - req->err); - } - kfree(req); - wake_up(&m->equeue); break; } } + spin_unlock(&m->lock); - if (!cb) - spin_unlock(&m->lock); + if (req) { + spin_lock(&req->lock); + req->flush = Flushed; + spin_unlock(&req->lock); + + if (req->cb) + (*req->cb) (req, req->cba); + else + kfree(req->rcall); + + wake_up(&m->equeue); + } - v9fs_mux_put_tag(m, tag); - kfree(tc); - kfree(rc); + kfree(freq->tcall); + kfree(freq->rcall); + v9fs_mux_free_request(m, freq); } -static void +static int v9fs_mux_flush_request(struct v9fs_mux_data *m, struct v9fs_req *req) { struct v9fs_fcall *fc; + struct v9fs_req *rreq, *rptr; dprintk(DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag); + /* if a response was received for a request, do nothing */ + spin_lock(&req->lock); + if (req->rcall || req->err) { + spin_unlock(&req->lock); + dprintk(DEBUG_MUX, "mux %p req %p response already received\n", m, req); + return 0; + } + + req->flush = Flushing; + spin_unlock(&req->lock); + + spin_lock(&m->lock); + /* if the request is not sent yet, just remove it from the list */ + list_for_each_entry_safe(rreq, rptr, &m->unsent_req_list, req_list) { + if (rreq->tag == req->tag) { + dprintk(DEBUG_MUX, "mux %p req %p request is not sent yet\n", m, req); + list_del(&rreq->req_list); + req->flush = Flushed; + spin_unlock(&m->lock); + if (req->cb) + (*req->cb) (req, req->cba); + return 0; + } + } + spin_unlock(&m->lock); + + clear_thread_flag(TIF_SIGPENDING); fc = v9fs_create_tflush(req->tag); v9fs_send_request(m, fc, v9fs_mux_flush_cb, m); + return 1; } static void -v9fs_mux_rpc_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc, int err) +v9fs_mux_rpc_cb(struct v9fs_req *req, void *a) { struct v9fs_mux_rpc *r; - if (err == ERREQFLUSH) { - kfree(rc); - dprintk(DEBUG_MUX, "err req flush\n"); - return; - } - + dprintk(DEBUG_MUX, "req %p r %p\n", req, a); r = a; - dprintk(DEBUG_MUX, "mux %p req %p tc %p rc %p err %d\n", r->m, r->req, - tc, rc, err); - r->rcall = rc; - r->err = err; + r->rcall = req->rcall; + r->err = req->err; + + if (req->flush!=None && !req->err) + r->err = -ERESTARTSYS; + wake_up(&r->wqueue); } @@ -856,12 +892,13 @@ int v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, struct v9fs_fcall **rc) { - int err; + int err, sigpending; unsigned long flags; struct v9fs_req *req; struct v9fs_mux_rpc r; r.err = 0; + r.tcall = tc; r.rcall = NULL; r.m = m; init_waitqueue_head(&r.wqueue); @@ -869,48 +906,50 @@ v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, if (rc) *rc = NULL; + sigpending = 0; + if (signal_pending(current)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + } + req = v9fs_send_request(m, tc, v9fs_mux_rpc_cb, &r); if (IS_ERR(req)) { err = PTR_ERR(req); dprintk(DEBUG_MUX, "error %d\n", err); - return PTR_ERR(req); + return err; } - r.req = req; - dprintk(DEBUG_MUX, "mux %p tc %p tag %d rpc %p req %p\n", m, tc, - req->tag, &r, req); err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0); if (r.err < 0) err = r.err; if (err == -ERESTARTSYS && m->trans->status == Connected && m->err == 0) { - spin_lock(&m->lock); - req->tcall = NULL; - req->err = ERREQFLUSH; - spin_unlock(&m->lock); + if (v9fs_mux_flush_request(m, req)) { + /* wait until we get response of the flush message */ + do { + clear_thread_flag(TIF_SIGPENDING); + err = wait_event_interruptible(r.wqueue, + r.rcall || r.err); + } while (!r.rcall && !r.err && err==-ERESTARTSYS && + m->trans->status==Connected && !m->err); + } + sigpending = 1; + } - clear_thread_flag(TIF_SIGPENDING); - v9fs_mux_flush_request(m, req); + if (sigpending) { spin_lock_irqsave(¤t->sighand->siglock, flags); recalc_sigpending(); spin_unlock_irqrestore(¤t->sighand->siglock, flags); } - if (!err) { - if (r.rcall) - dprintk(DEBUG_MUX, "got response id %d tag %d\n", - r.rcall->id, r.rcall->tag); - - if (rc) - *rc = r.rcall; - else - kfree(r.rcall); - } else { + if (rc) + *rc = r.rcall; + else kfree(r.rcall); - dprintk(DEBUG_MUX, "got error %d\n", err); - if (err > 0) - err = -EIO; - } + + v9fs_mux_free_request(m, req); + if (err > 0) + err = -EIO; return err; } @@ -951,12 +990,15 @@ void v9fs_mux_cancel(struct v9fs_mux_data *m, int err) struct v9fs_req *req, *rtmp; LIST_HEAD(cancel_list); - dprintk(DEBUG_MUX, "mux %p err %d\n", m, err); + dprintk(DEBUG_ERROR, "mux %p err %d\n", m, err); m->err = err; spin_lock(&m->lock); list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { list_move(&req->req_list, &cancel_list); } + list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { + list_move(&req->req_list, &cancel_list); + } spin_unlock(&m->lock); list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { @@ -965,11 +1007,9 @@ void v9fs_mux_cancel(struct v9fs_mux_data *m, int err) req->err = err; if (req->cb) - (*req->cb) (req->cba, req->tcall, req->rcall, req->err); + (*req->cb) (req, req->cba); else kfree(req->rcall); - - kfree(req); } wake_up(&m->equeue); diff --git a/fs/9p/mux.h b/fs/9p/mux.h index e90bfd32ea42..fb10c50186a1 100644 --- a/fs/9p/mux.h +++ b/fs/9p/mux.h @@ -24,6 +24,7 @@ */ struct v9fs_mux_data; +struct v9fs_req; /** * v9fs_mux_req_callback - callback function that is called when the @@ -36,8 +37,7 @@ struct v9fs_mux_data; * @rc - response call * @err - error code (non-zero if error occured) */ -typedef void (*v9fs_mux_req_callback)(void *a, struct v9fs_fcall *tc, - struct v9fs_fcall *rc, int err); +typedef void (*v9fs_mux_req_callback)(struct v9fs_req *req, void *a); int v9fs_mux_global_init(void); void v9fs_mux_global_exit(void); diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 083dcfcd158e..1a8e46084f0e 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -72,11 +72,17 @@ int v9fs_file_open(struct inode *inode, struct file *file) return -ENOSPC; } - err = v9fs_t_walk(v9ses, vfid->fid, fid, NULL, NULL); + err = v9fs_t_walk(v9ses, vfid->fid, fid, NULL, &fcall); if (err < 0) { dprintk(DEBUG_ERROR, "rewalk didn't work\n"); - goto put_fid; + if (fcall && fcall->id == RWALK) + goto clunk_fid; + else { + v9fs_put_idpool(fid, &v9ses->fidpool); + goto free_fcall; + } } + kfree(fcall); /* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */ /* translate open mode appropriately */ @@ -109,8 +115,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) clunk_fid: v9fs_t_clunk(v9ses, fid); -put_fid: - v9fs_put_idpool(fid, &v9ses->fidpool); +free_fcall: kfree(fcall); return err; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 133db366d306..2cb87ba4b1c1 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -270,7 +270,10 @@ v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name, u32 perm, err = v9fs_t_walk(v9ses, pfid, fid, NULL, &fcall); if (err < 0) { PRINT_FCALL_ERROR("clone error", fcall); - goto put_fid; + if (fcall && fcall->id == RWALK) + goto clunk_fid; + else + goto put_fid; } kfree(fcall); @@ -322,6 +325,9 @@ v9fs_clone_walk(struct v9fs_session_info *v9ses, u32 fid, struct dentry *dentry) &fcall); if (err < 0) { + if (fcall && fcall->id == RWALK) + goto clunk_fid; + PRINT_FCALL_ERROR("walk error", fcall); v9fs_put_idpool(nfid, &v9ses->fidpool); goto error; @@ -640,19 +646,26 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, } result = v9fs_t_walk(v9ses, dirfidnum, newfid, - (char *)dentry->d_name.name, NULL); + (char *)dentry->d_name.name, &fcall); + if (result < 0) { - v9fs_put_idpool(newfid, &v9ses->fidpool); + if (fcall && fcall->id == RWALK) + v9fs_t_clunk(v9ses, newfid); + else + v9fs_put_idpool(newfid, &v9ses->fidpool); + if (result == -ENOENT) { d_add(dentry, NULL); dprintk(DEBUG_VFS, "Return negative dentry %p count %d\n", dentry, atomic_read(&dentry->d_count)); + kfree(fcall); return NULL; } dprintk(DEBUG_ERROR, "walk error:%d\n", result); goto FreeFcall; } + kfree(fcall); result = v9fs_t_stat(v9ses, newfid, &fcall); if (result < 0) { diff --git a/fs/Kconfig b/fs/Kconfig index f9b5842c8d2d..572cc435a1bb 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1101,6 +1101,44 @@ config JFFS2_SUMMARY If unsure, say 'N'. +config JFFS2_FS_XATTR + bool "JFFS2 XATTR support (EXPERIMENTAL)" + depends on JFFS2_FS && EXPERIMENTAL && !JFFS2_FS_WRITEBUFFER + default n + help + Extended attributes are name:value pairs associated with inodes by + the kernel or by users (see the attr(5) manual page, or visit + <http://acl.bestbits.at/> for details). + + If unsure, say N. + +config JFFS2_FS_POSIX_ACL + bool "JFFS2 POSIX Access Control Lists" + depends on JFFS2_FS_XATTR + default y + select FS_POSIX_ACL + help + Posix Access Control Lists (ACLs) support permissions for users and + groups beyond the owner/group/world scheme. + + To learn more about Access Control Lists, visit the Posix ACLs for + Linux website <http://acl.bestbits.at/>. + + If you don't know what Access Control Lists are, say N + +config JFFS2_FS_SECURITY + bool "JFFS2 Security Labels" + depends on JFFS2_FS_XATTR + default y + help + Security labels support alternative access control models + implemented by security modules like SELinux. This option + enables an extended attribute handler for file security + labels in the jffs2 filesystem. + + If you are not using a security module that requires using + extended attributes for file security labels, say N. + config JFFS2_COMPRESSION_OPTIONS bool "Advanced compression options for JFFS2" depends on JFFS2_FS diff --git a/fs/Makefile b/fs/Makefile index 83bf478e786b..078d3d1191a5 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -45,6 +45,7 @@ obj-$(CONFIG_DNOTIFY) += dnotify.o obj-$(CONFIG_PROC_FS) += proc/ obj-y += partitions/ obj-$(CONFIG_SYSFS) += sysfs/ +obj-$(CONFIG_CONFIGFS_FS) += configfs/ obj-y += devpts/ obj-$(CONFIG_PROFILING) += dcookies.o @@ -100,5 +101,4 @@ obj-$(CONFIG_BEFS_FS) += befs/ obj-$(CONFIG_HOSTFS) += hostfs/ obj-$(CONFIG_HPPFS) += hppfs/ obj-$(CONFIG_DEBUG_FS) += debugfs/ -obj-$(CONFIG_CONFIGFS_FS) += configfs/ obj-$(CONFIG_OCFS2_FS) += ocfs2/ diff --git a/fs/affs/namei.c b/fs/affs/namei.c index d4c2d636c479..a42143ca0169 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -416,10 +416,9 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry, return retval; } - retval = -EIO; bh = affs_bread(sb, old_dentry->d_inode->i_ino); if (!bh) - goto done; + return -EIO; /* Remove header from its parent directory. */ affs_lock_dir(old_dir); diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 57c4903614e5..d6603d02304c 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -74,8 +74,8 @@ struct autofs_wait_queue { struct autofs_wait_queue *next; autofs_wqt_t wait_queue_token; /* We use the following to see what we are waiting for */ - int hash; - int len; + unsigned int hash; + unsigned int len; char *name; u32 dev; u64 ino; @@ -85,7 +85,6 @@ struct autofs_wait_queue { pid_t tgid; /* This is for status reporting upon return */ int status; - atomic_t notify; atomic_t wait_ctr; }; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 84e030c8ddd0..5100f984783f 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -327,6 +327,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + struct autofs_info *ino = autofs4_dentry_ino(dentry); int oz_mode = autofs4_oz_mode(sbi); unsigned int lookup_type; int status; @@ -340,13 +341,8 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) if (oz_mode || !lookup_type) goto done; - /* - * If a request is pending wait for it. - * If it's a mount then it won't be expired till at least - * a liitle later and if it's an expire then we might need - * to mount it again. - */ - if (autofs4_ispending(dentry)) { + /* If an expire request is pending wait for it. */ + if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) { DPRINTK("waiting for active request %p name=%.*s", dentry, dentry->d_name.len, dentry->d_name.name); diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 142ab6aa2aa1..ce103e7b0bc3 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -189,14 +189,30 @@ static int autofs4_getpath(struct autofs_sb_info *sbi, return len; } +static struct autofs_wait_queue * +autofs4_find_wait(struct autofs_sb_info *sbi, + char *name, unsigned int hash, unsigned int len) +{ + struct autofs_wait_queue *wq; + + for (wq = sbi->queues; wq; wq = wq->next) { + if (wq->hash == hash && + wq->len == len && + wq->name && !memcmp(wq->name, name, len)) + break; + } + return wq; +} + int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, enum autofs_notify notify) { + struct autofs_info *ino; struct autofs_wait_queue *wq; char *name; unsigned int len = 0; unsigned int hash = 0; - int status; + int status, type; /* In catatonic mode, we don't wait for nobody */ if (sbi->catatonic) @@ -223,21 +239,41 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, return -EINTR; } - for (wq = sbi->queues ; wq ; wq = wq->next) { - if (wq->hash == dentry->d_name.hash && - wq->len == len && - wq->name && !memcmp(wq->name, name, len)) - break; - } + wq = autofs4_find_wait(sbi, name, hash, len); + ino = autofs4_dentry_ino(dentry); + if (!wq && ino && notify == NFY_NONE) { + /* + * Either we've betean the pending expire to post it's + * wait or it finished while we waited on the mutex. + * So we need to wait till either, the wait appears + * or the expire finishes. + */ + + while (ino->flags & AUTOFS_INF_EXPIRING) { + mutex_unlock(&sbi->wq_mutex); + schedule_timeout_interruptible(HZ/10); + if (mutex_lock_interruptible(&sbi->wq_mutex)) { + kfree(name); + return -EINTR; + } + wq = autofs4_find_wait(sbi, name, hash, len); + if (wq) + break; + } - if (!wq) { - /* Can't wait for an expire if there's no mount */ - if (notify == NFY_NONE && !d_mountpoint(dentry)) { + /* + * Not ideal but the status has already gone. Of the two + * cases where we wait on NFY_NONE neither depend on the + * return status of the wait. + */ + if (!wq) { kfree(name); mutex_unlock(&sbi->wq_mutex); - return -ENOENT; + return 0; } + } + if (!wq) { /* Create a new wait queue */ wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL); if (!wq) { @@ -263,20 +299,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, wq->tgid = current->tgid; wq->status = -EINTR; /* Status return if interrupted */ atomic_set(&wq->wait_ctr, 2); - atomic_set(&wq->notify, 1); - mutex_unlock(&sbi->wq_mutex); - } else { - atomic_inc(&wq->wait_ctr); mutex_unlock(&sbi->wq_mutex); - kfree(name); - DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d", - (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify); - } - - if (notify != NFY_NONE && atomic_read(&wq->notify)) { - int type; - - atomic_dec(&wq->notify); if (sbi->version < 5) { if (notify == NFY_MOUNT) @@ -299,6 +322,12 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, /* autofs4_notify_daemon() may block */ autofs4_notify_daemon(sbi, wq, type); + } else { + atomic_inc(&wq->wait_ctr); + mutex_unlock(&sbi->wq_mutex); + kfree(name); + DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d", + (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify); } /* wq->name is NULL if and only if the lock is already released */ diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 69f44dcdb0b4..b1c902e319c1 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -428,7 +428,6 @@ static int load_flat_file(struct linux_binprm * bprm, loff_t fpos; unsigned long start_code, end_code; int ret; - int exec_fileno; hdr = ((struct flat_hdr *) bprm->buf); /* exec-header */ inode = bprm->file->f_dentry->d_inode; @@ -502,21 +501,12 @@ static int load_flat_file(struct linux_binprm * bprm, goto err; } - /* check file descriptor */ - exec_fileno = get_unused_fd(); - if (exec_fileno < 0) { - ret = -EMFILE; - goto err; - } - get_file(bprm->file); - fd_install(exec_fileno, bprm->file); - /* Flush all traces of the currently running executable */ if (id == 0) { result = flush_old_exec(bprm); if (result) { ret = result; - goto err_close; + goto err; } /* OK, This is the point of no return */ @@ -548,7 +538,7 @@ static int load_flat_file(struct linux_binprm * bprm, textpos = (unsigned long) -ENOMEM; printk("Unable to mmap process text, errno %d\n", (int)-textpos); ret = textpos; - goto err_close; + goto err; } down_write(¤t->mm->mmap_sem); @@ -564,7 +554,7 @@ static int load_flat_file(struct linux_binprm * bprm, (int)-datapos); do_munmap(current->mm, textpos, text_len); ret = realdatastart; - goto err_close; + goto err; } datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long); @@ -587,7 +577,7 @@ static int load_flat_file(struct linux_binprm * bprm, do_munmap(current->mm, textpos, text_len); do_munmap(current->mm, realdatastart, data_len + extra); ret = result; - goto err_close; + goto err; } reloc = (unsigned long *) (datapos+(ntohl(hdr->reloc_start)-text_len)); @@ -606,7 +596,7 @@ static int load_flat_file(struct linux_binprm * bprm, printk("Unable to allocate RAM for process text/data, errno %d\n", (int)-textpos); ret = textpos; - goto err_close; + goto err; } realdatastart = textpos + ntohl(hdr->data_start); @@ -652,7 +642,7 @@ static int load_flat_file(struct linux_binprm * bprm, do_munmap(current->mm, textpos, text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long)); ret = result; - goto err_close; + goto err; } } @@ -717,7 +707,7 @@ static int load_flat_file(struct linux_binprm * bprm, addr = calc_reloc(*rp, libinfo, id, 0); if (addr == RELOC_FAILED) { ret = -ENOEXEC; - goto err_close; + goto err; } *rp = addr; } @@ -747,7 +737,7 @@ static int load_flat_file(struct linux_binprm * bprm, rp = (unsigned long *) calc_reloc(addr, libinfo, id, 1); if (rp == (unsigned long *)RELOC_FAILED) { ret = -ENOEXEC; - goto err_close; + goto err; } /* Get the pointer's value. */ @@ -762,7 +752,7 @@ static int load_flat_file(struct linux_binprm * bprm, addr = calc_reloc(addr, libinfo, id, 0); if (addr == RELOC_FAILED) { ret = -ENOEXEC; - goto err_close; + goto err; } /* Write back the relocated pointer. */ @@ -783,8 +773,6 @@ static int load_flat_file(struct linux_binprm * bprm, stack_len); return 0; -err_close: - sys_close(exec_fileno); err: return ret; } @@ -654,9 +654,10 @@ static struct bio *__bio_map_user_iov(request_queue_t *q, write_to_vm, 0, &pages[cur_page], NULL); up_read(¤t->mm->mmap_sem); - if (ret < local_nr_pages) + if (ret < local_nr_pages) { + ret = -EFAULT; goto out_unmap; - + } offset = uaddr & ~PAGE_MASK; for (j = cur_page; j < page_limit; j++) { @@ -1116,6 +1117,9 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) bp->bio1.bi_io_vec = &bp->bv1; bp->bio2.bi_io_vec = &bp->bv2; + bp->bio1.bi_max_vecs = 1; + bp->bio2.bi_max_vecs = 1; + bp->bio1.bi_end_io = bio_pair_end_1; bp->bio2.bi_end_io = bio_pair_end_2; diff --git a/fs/block_dev.c b/fs/block_dev.c index af88c43043d5..f5958f413bd1 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1104,6 +1104,8 @@ const struct file_operations def_blk_fops = { .readv = generic_file_readv, .writev = generic_file_write_nolock, .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, + .splice_write = generic_file_splice_write, }; int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 8a2de038882e..7271bb0257f6 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -1,7 +1,18 @@ +Version 1.43 +------------ +POSIX locking to servers which support CIFS POSIX Extensions +(disabled by default controlled by proc/fs/cifs/Experimental). +Handle conversion of long share names (especially Asian languages) +to Unicode during mount. + Version 1.42 ------------ Fix slow oplock break when mounted to different servers at the same time and -the tids match and we try to find matching fid on wrong server. +the tids match and we try to find matching fid on wrong server. Fix read +looping when signing required by server (2.6.16 kernel only). Fix readdir +vs. rename race which could cause each to hang. Return . and .. even +if server does not. Allow searches to skip first three entries and +begin at any location. Fix oops in find_writeable_file. Version 1.41 ------------ diff --git a/fs/cifs/README b/fs/cifs/README index b2b4d0803761..0355003f4f0a 100644 --- a/fs/cifs/README +++ b/fs/cifs/README @@ -511,6 +511,14 @@ LinuxExtensionsEnabled If set to one then the client will attempt to support and want to map the uid and gid fields to values supplied at mount (rather than the actual values, then set this to zero. (default 1) +Experimental When set to 1 used to enable certain experimental + features (currently enables multipage writes + when signing is enabled, the multipage write + performance enhancement was disabled when + signing turned on in case buffer was modified + just before it was sent, also this flag will + be used to use the new experimental sessionsetup + code). These experimental features and tracing can be enabled by changing flags in /proc/fs/cifs (after the cifs module has been installed or built into the diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index d4b713e5affb..c262d8874ce9 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -33,6 +33,7 @@ #include <linux/vfs.h> #include <linux/mempool.h> #include <linux/delay.h> +#include <linux/kthread.h> #include "cifsfs.h" #include "cifspdu.h" #define DECLARE_GLOBALS_HERE @@ -75,9 +76,6 @@ unsigned int cifs_max_pending = CIFS_MAX_REQ; module_param(cifs_max_pending, int, 0); MODULE_PARM_DESC(cifs_max_pending,"Simultaneous requests to server. Default: 50 Range: 2 to 256"); -static DECLARE_COMPLETION(cifs_oplock_exited); -static DECLARE_COMPLETION(cifs_dnotify_exited); - extern mempool_t *cifs_sm_req_poolp; extern mempool_t *cifs_req_poolp; extern mempool_t *cifs_mid_poolp; @@ -841,10 +839,6 @@ static int cifs_oplock_thread(void * dummyarg) __u16 netfid; int rc; - daemonize("cifsoplockd"); - allow_signal(SIGTERM); - - oplockThread = current; do { if (try_to_freeze()) continue; @@ -900,9 +894,9 @@ static int cifs_oplock_thread(void * dummyarg) set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1); /* yield in case q were corrupt */ } - } while(!signal_pending(current)); - oplockThread = NULL; - complete_and_exit (&cifs_oplock_exited, 0); + } while (!kthread_should_stop()); + + return 0; } static int cifs_dnotify_thread(void * dummyarg) @@ -910,10 +904,6 @@ static int cifs_dnotify_thread(void * dummyarg) struct list_head *tmp; struct cifsSesInfo *ses; - daemonize("cifsdnotifyd"); - allow_signal(SIGTERM); - - dnotifyThread = current; do { if(try_to_freeze()) continue; @@ -931,8 +921,9 @@ static int cifs_dnotify_thread(void * dummyarg) wake_up_all(&ses->server->response_q); } read_unlock(&GlobalSMBSeslock); - } while(!signal_pending(current)); - complete_and_exit (&cifs_dnotify_exited, 0); + } while (!kthread_should_stop()); + + return 0; } static int __init @@ -982,32 +973,48 @@ init_cifs(void) } rc = cifs_init_inodecache(); - if (!rc) { - rc = cifs_init_mids(); - if (!rc) { - rc = cifs_init_request_bufs(); - if (!rc) { - rc = register_filesystem(&cifs_fs_type); - if (!rc) { - rc = (int)kernel_thread(cifs_oplock_thread, NULL, - CLONE_FS | CLONE_FILES | CLONE_VM); - if(rc > 0) { - rc = (int)kernel_thread(cifs_dnotify_thread, NULL, - CLONE_FS | CLONE_FILES | CLONE_VM); - if(rc > 0) - return 0; - else - cERROR(1,("error %d create dnotify thread", rc)); - } else { - cERROR(1,("error %d create oplock thread",rc)); - } - } - cifs_destroy_request_bufs(); - } - cifs_destroy_mids(); - } - cifs_destroy_inodecache(); + if (rc) + goto out_clean_proc; + + rc = cifs_init_mids(); + if (rc) + goto out_destroy_inodecache; + + rc = cifs_init_request_bufs(); + if (rc) + goto out_destroy_mids; + + rc = register_filesystem(&cifs_fs_type); + if (rc) + goto out_destroy_request_bufs; + + oplockThread = kthread_run(cifs_oplock_thread, NULL, "cifsoplockd"); + if (IS_ERR(oplockThread)) { + rc = PTR_ERR(oplockThread); + cERROR(1,("error %d create oplock thread", rc)); + goto out_unregister_filesystem; } + + dnotifyThread = kthread_run(cifs_dnotify_thread, NULL, "cifsdnotifyd"); + if (IS_ERR(dnotifyThread)) { + rc = PTR_ERR(dnotifyThread); + cERROR(1,("error %d create dnotify thread", rc)); + goto out_stop_oplock_thread; + } + + return 0; + + out_stop_oplock_thread: + kthread_stop(oplockThread); + out_unregister_filesystem: + unregister_filesystem(&cifs_fs_type); + out_destroy_request_bufs: + cifs_destroy_request_bufs(); + out_destroy_mids: + cifs_destroy_mids(); + out_destroy_inodecache: + cifs_destroy_inodecache(); + out_clean_proc: #ifdef CONFIG_PROC_FS cifs_proc_clean(); #endif @@ -1025,14 +1032,8 @@ exit_cifs(void) cifs_destroy_inodecache(); cifs_destroy_mids(); cifs_destroy_request_bufs(); - if(oplockThread) { - send_sig(SIGTERM, oplockThread, 1); - wait_for_completion(&cifs_oplock_exited); - } - if(dnotifyThread) { - send_sig(SIGTERM, dnotifyThread, 1); - wait_for_completion(&cifs_dnotify_exited); - } + kthread_stop(oplockThread); + kthread_stop(dnotifyThread); } MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>"); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 4e829dc672a6..c98755dca868 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -99,5 +99,5 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t); extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); extern int cifs_ioctl (struct inode * inode, struct file * filep, unsigned int command, unsigned long arg); -#define CIFS_VERSION "1.42" +#define CIFS_VERSION "1.43" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 2879ba343ca7..310ea2f0e0bf 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -267,7 +267,7 @@ extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, const int waitFlag); extern int CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, const __u16 smb_file_id, const int get_flag, - const __u64 len, const __u64 offset, + const __u64 len, struct file_lock *, const __u16 lock_type, const int waitFlag); extern int CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon); extern int CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index d705500aa283..925881e00ff2 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1355,7 +1355,8 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, int CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, const __u16 smb_file_id, const int get_flag, const __u64 len, - const __u64 lkoffset, const __u16 lock_type, const int waitFlag) + struct file_lock *pLockData, const __u16 lock_type, + const int waitFlag) { struct smb_com_transaction2_sfi_req *pSMB = NULL; struct smb_com_transaction2_sfi_rsp *pSMBr = NULL; @@ -1366,6 +1367,10 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, __u16 params, param_offset, offset, byte_count, count; cFYI(1, ("Posix Lock")); + + if(pLockData == NULL) + return EINVAL; + rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); if (rc) @@ -1404,10 +1409,10 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, parm_data->lock_type = cpu_to_le16(lock_type); if(waitFlag) - parm_data->lock_flags = 1; + parm_data->lock_flags = cpu_to_le16(1); parm_data->pid = cpu_to_le32(current->tgid); - parm_data->start = lkoffset; - parm_data->length = len; /* normalize negative numbers */ + parm_data->start = cpu_to_le64(pLockData->fl_start); + parm_data->length = cpu_to_le64(len); /* normalize negative numbers */ pSMB->DataOffset = cpu_to_le16(offset); pSMB->Fid = smb_file_id; @@ -1419,8 +1424,33 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cFYI(1, ("Send error in Posix Lock = %d", rc)); - } + } else if (get_flag) { + /* lock structure can be returned on get */ + __u16 data_offset; + __u16 data_count; + rc = validate_t2((struct smb_t2_rsp *)pSMBr); + if (rc || (pSMBr->ByteCount < sizeof(struct cifs_posix_lock))) { + rc = -EIO; /* bad smb */ + goto plk_err_exit; + } + if(pLockData == NULL) { + rc = -EINVAL; + goto plk_err_exit; + } + data_offset = le16_to_cpu(pSMBr->t2.DataOffset); + data_count = le16_to_cpu(pSMBr->t2.DataCount); + if(data_count < sizeof(struct cifs_posix_lock)) { + rc = -EIO; + goto plk_err_exit; + } + parm_data = (struct cifs_posix_lock *) + ((char *)&pSMBr->hdr.Protocol + data_offset); + if(parm_data->lock_type == cpu_to_le16(CIFS_UNLCK)) + pLockData->fl_type = F_UNLCK; + } + +plk_err_exit: if (pSMB) cifs_small_buf_release(pSMB); @@ -3119,7 +3149,7 @@ findFirstRetry: psrch_inf->endOfSearch = FALSE; psrch_inf->entries_in_buffer = le16_to_cpu(parms->SearchCount); - psrch_inf->index_of_last_entry = + psrch_inf->index_of_last_entry = 2 /* skip . and .. */ + psrch_inf->entries_in_buffer; *pnetfid = parms->SearchHandle; } else { diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0b86d5ca9014..bae1479318d1 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2148,6 +2148,8 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, /* We look for obvious messed up bcc or strings in response so we do not go off the end since (at least) WIN2K and Windows XP have a major bug in not null terminating last Unicode string in response */ + if(ses->serverOS) + kfree(ses->serverOS); ses->serverOS = kzalloc(2 * (len + 1), GFP_KERNEL); if(ses->serverOS == NULL) goto sesssetup_nomem; @@ -2160,6 +2162,8 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, if (remaining_words > 0) { len = UniStrnlen((wchar_t *)bcc_ptr, remaining_words-1); + if(ses->serverNOS) + kfree(ses->serverNOS); ses->serverNOS = kzalloc(2 * (len + 1),GFP_KERNEL); if(ses->serverNOS == NULL) goto sesssetup_nomem; @@ -2177,6 +2181,8 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, if (remaining_words > 0) { len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ + if(ses->serverDomain) + kfree(ses->serverDomain); ses->serverDomain = kzalloc(2*(len+1),GFP_KERNEL); if(ses->serverDomain == NULL) @@ -2187,15 +2193,22 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, ses->serverDomain[2*len] = 0; ses->serverDomain[1+(2*len)] = 0; } /* else no more room so create dummy domain string */ - else + else { + if(ses->serverDomain) + kfree(ses->serverDomain); ses->serverDomain = kzalloc(2, GFP_KERNEL); + } } else { /* no room so create dummy domain and NOS string */ /* if these kcallocs fail not much we can do, but better to not fail the sesssetup itself */ + if(ses->serverDomain) + kfree(ses->serverDomain); ses->serverDomain = kzalloc(2, GFP_KERNEL); + if(ses->serverNOS) + kfree(ses->serverNOS); ses->serverNOS = kzalloc(2, GFP_KERNEL); } @@ -2204,6 +2217,8 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, if (((long) bcc_ptr + len) - (long) pByteArea(smb_buffer_response) <= BCC(smb_buffer_response)) { + if(ses->serverOS) + kfree(ses->serverOS); ses->serverOS = kzalloc(len + 1,GFP_KERNEL); if(ses->serverOS == NULL) goto sesssetup_nomem; @@ -2214,6 +2229,8 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, bcc_ptr++; len = strnlen(bcc_ptr, 1024); + if(ses->serverNOS) + kfree(ses->serverNOS); ses->serverNOS = kzalloc(len + 1,GFP_KERNEL); if(ses->serverNOS == NULL) goto sesssetup_nomem; @@ -2223,6 +2240,8 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, bcc_ptr++; len = strnlen(bcc_ptr, 1024); + if(ses->serverDomain) + kfree(ses->serverDomain); ses->serverDomain = kzalloc(len + 1,GFP_KERNEL); if(ses->serverDomain == NULL) goto sesssetup_nomem; @@ -2427,6 +2446,8 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, /* We look for obvious messed up bcc or strings in response so we do not go off the end since (at least) WIN2K and Windows XP have a major bug in not null terminating last Unicode string in response */ + if(ses->serverOS) + kfree(ses->serverOS); ses->serverOS = kzalloc(2 * (len + 1), GFP_KERNEL); cifs_strfromUCS_le(ses->serverOS, @@ -2441,6 +2462,8 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, len = UniStrnlen((wchar_t *)bcc_ptr, remaining_words - 1); + if(ses->serverNOS) + kfree(ses->serverNOS); ses->serverNOS = kzalloc(2 * (len + 1), GFP_KERNEL); @@ -2454,7 +2477,9 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, remaining_words -= len + 1; if (remaining_words > 0) { len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); - /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ + /* last string not null terminated (e.g.Windows XP/2000) */ + if(ses->serverDomain) + kfree(ses->serverDomain); ses->serverDomain = kzalloc(2*(len+1),GFP_KERNEL); cifs_strfromUCS_le(ses->serverDomain, (__le16 *)bcc_ptr, @@ -2463,11 +2488,18 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, ses->serverDomain[2*len] = 0; ses->serverDomain[1+(2*len)] = 0; } /* else no more room so create dummy domain string */ - else + else { + if(ses->serverDomain) + kfree(ses->serverDomain); ses->serverDomain = kzalloc(2,GFP_KERNEL); - } else { /* no room so create dummy domain and NOS string */ + } + } else {/* no room use dummy domain&NOS */ + if(ses->serverDomain) + kfree(ses->serverDomain); ses->serverDomain = kzalloc(2, GFP_KERNEL); + if(ses->serverNOS) + kfree(ses->serverNOS); ses->serverNOS = kzalloc(2, GFP_KERNEL); } } else { /* ASCII */ @@ -2476,6 +2508,8 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, if (((long) bcc_ptr + len) - (long) pByteArea(smb_buffer_response) <= BCC(smb_buffer_response)) { + if(ses->serverOS) + kfree(ses->serverOS); ses->serverOS = kzalloc(len + 1, GFP_KERNEL); strncpy(ses->serverOS, bcc_ptr, len); @@ -2484,6 +2518,8 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, bcc_ptr++; len = strnlen(bcc_ptr, 1024); + if(ses->serverNOS) + kfree(ses->serverNOS); ses->serverNOS = kzalloc(len + 1,GFP_KERNEL); strncpy(ses->serverNOS, bcc_ptr, len); bcc_ptr += len; @@ -2491,6 +2527,8 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, bcc_ptr++; len = strnlen(bcc_ptr, 1024); + if(ses->serverDomain) + kfree(ses->serverDomain); ses->serverDomain = kzalloc(len + 1, GFP_KERNEL); strncpy(ses->serverDomain, bcc_ptr, len); bcc_ptr += len; @@ -2728,6 +2766,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, /* We look for obvious messed up bcc or strings in response so we do not go off the end since (at least) WIN2K and Windows XP have a major bug in not null terminating last Unicode string in response */ + if(ses->serverOS) + kfree(ses->serverOS); ses->serverOS = kzalloc(2 * (len + 1), GFP_KERNEL); cifs_strfromUCS_le(ses->serverOS, @@ -2743,6 +2783,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, bcc_ptr, remaining_words - 1); + if(ses->serverNOS) + kfree(ses->serverNOS); ses->serverNOS = kzalloc(2 * (len + 1), GFP_KERNEL); @@ -2760,6 +2802,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, if (remaining_words > 0) { len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ + if(ses->serverDomain) + kfree(ses->serverDomain); ses->serverDomain = kzalloc(2 * (len + @@ -2777,13 +2821,20 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, [1 + (2 * len)] = 0; } /* else no more room so create dummy domain string */ - else + else { + if(ses->serverDomain) + kfree(ses->serverDomain); ses->serverDomain = kzalloc(2, GFP_KERNEL); + } } else { /* no room so create dummy domain and NOS string */ + if(ses->serverDomain); + kfree(ses->serverDomain); ses->serverDomain = kzalloc(2, GFP_KERNEL); + if(ses->serverNOS) + kfree(ses->serverNOS); ses->serverNOS = kzalloc(2, GFP_KERNEL); } @@ -2792,6 +2843,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, if (((long) bcc_ptr + len) - (long) pByteArea(smb_buffer_response) <= BCC(smb_buffer_response)) { + if(ses->serverOS) + kfree(ses->serverOS); ses->serverOS = kzalloc(len + 1, GFP_KERNEL); @@ -2803,6 +2856,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, bcc_ptr++; len = strnlen(bcc_ptr, 1024); + if(ses->serverNOS) + kfree(ses->serverNOS); ses->serverNOS = kzalloc(len + 1, GFP_KERNEL); @@ -2812,6 +2867,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, bcc_ptr++; len = strnlen(bcc_ptr, 1024); + if(ses->serverDomain) + kfree(ses->serverDomain); ses->serverDomain = kzalloc(len + 1, GFP_KERNEL); @@ -3116,6 +3173,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, /* We look for obvious messed up bcc or strings in response so we do not go off the end since (at least) WIN2K and Windows XP have a major bug in not null terminating last Unicode string in response */ + if(ses->serverOS) + kfree(ses->serverOS); ses->serverOS = kzalloc(2 * (len + 1), GFP_KERNEL); cifs_strfromUCS_le(ses->serverOS, @@ -3131,6 +3190,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, bcc_ptr, remaining_words - 1); + if(ses->serverNOS) + kfree(ses->serverNOS); ses->serverNOS = kzalloc(2 * (len + 1), GFP_KERNEL); @@ -3147,6 +3208,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, if (remaining_words > 0) { len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); /* last string not always null terminated (e.g. for Windows XP & 2000) */ + if(ses->serverDomain) + kfree(ses->serverDomain); ses->serverDomain = kzalloc(2 * (len + @@ -3172,10 +3235,17 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, len)] = 0; } /* else no more room so create dummy domain string */ - else + else { + if(ses->serverDomain) + kfree(ses->serverDomain); ses->serverDomain = kzalloc(2,GFP_KERNEL); + } } else { /* no room so create dummy domain and NOS string */ + if(ses->serverDomain) + kfree(ses->serverDomain); ses->serverDomain = kzalloc(2, GFP_KERNEL); + if(ses->serverNOS) + kfree(ses->serverNOS); ses->serverNOS = kzalloc(2, GFP_KERNEL); } } else { /* ASCII */ @@ -3183,6 +3253,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, if (((long) bcc_ptr + len) - (long) pByteArea(smb_buffer_response) <= BCC(smb_buffer_response)) { + if(ses->serverOS) + kfree(ses->serverOS); ses->serverOS = kzalloc(len + 1,GFP_KERNEL); strncpy(ses->serverOS,bcc_ptr, len); @@ -3191,6 +3263,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, bcc_ptr++; len = strnlen(bcc_ptr, 1024); + if(ses->serverNOS) + kfree(ses->serverNOS); ses->serverNOS = kzalloc(len+1,GFP_KERNEL); strncpy(ses->serverNOS, bcc_ptr, len); bcc_ptr += len; @@ -3198,6 +3272,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, bcc_ptr++; len = strnlen(bcc_ptr, 1024); + if(ses->serverDomain) + kfree(ses->serverDomain); ses->serverDomain = kzalloc(len+1,GFP_KERNEL); strncpy(ses->serverDomain, bcc_ptr, len); bcc_ptr += len; @@ -3282,7 +3358,8 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, bcc_ptr++; /* align */ } - if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + if(ses->server->secMode & + (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; if (ses->capabilities & CAP_STATUS32) { @@ -3294,8 +3371,10 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, if (ses->capabilities & CAP_UNICODE) { smb_buffer->Flags2 |= SMBFLG2_UNICODE; length = - cifs_strtoUCS((__le16 *) bcc_ptr, tree, 100, nls_codepage); - bcc_ptr += 2 * length; /* convert num of 16 bit words to bytes */ + cifs_strtoUCS((__le16 *) bcc_ptr, tree, + 6 /* max utf8 char length in bytes */ * + (/* server len*/ + 256 /* share len */), nls_codepage); + bcc_ptr += 2 * length; /* convert num 16 bit words to bytes */ bcc_ptr += 2; /* skip trailing null */ } else { /* ASCII */ strcpy(bcc_ptr, tree); @@ -3447,6 +3526,12 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, pSesInfo->server->secMode, pSesInfo->server->capabilities, pSesInfo->server->timeZone)); +#ifdef CONFIG_CIFS_EXPERIMENTAL + if(experimEnabled > 1) + rc = CIFS_SessSetup(xid, pSesInfo, CIFS_NTLM /* type */, + &ntlmv2_flag, nls_info); + else +#endif if (extended_security && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) && (pSesInfo->server->secType == NTLMSSP)) { diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 1d0ca3eaaca5..82315edc77d7 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -139,9 +139,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, cifs_sb = CIFS_SB(inode->i_sb); pTcon = cifs_sb->tcon; - mutex_lock(&direntry->d_sb->s_vfs_rename_mutex); full_path = build_path_from_dentry(direntry); - mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex); if(full_path == NULL) { FreeXid(xid); return -ENOMEM; @@ -316,9 +314,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, cifs_sb = CIFS_SB(inode->i_sb); pTcon = cifs_sb->tcon; - mutex_lock(&direntry->d_sb->s_vfs_rename_mutex); full_path = build_path_from_dentry(direntry); - mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex); if(full_path == NULL) rc = -ENOMEM; else if (pTcon->ses->capabilities & CAP_UNIX) { @@ -440,6 +436,20 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name cifs_sb = CIFS_SB(parent_dir_inode->i_sb); pTcon = cifs_sb->tcon; + /* + * Don't allow the separator character in a path component. + * The VFS will not allow "/", but "\" is allowed by posix. + */ + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) { + int i; + for (i = 0; i < direntry->d_name.len; i++) + if (direntry->d_name.name[i] == '\\') { + cFYI(1, ("Invalid file name")); + FreeXid(xid); + return ERR_PTR(-EINVAL); + } + } + /* can not grab the rename sem here since it would deadlock in the cases (beginning of sys_rename itself) in which we already have the sb rename sem */ diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c index ec4dfe9bf5ef..633a93811328 100644 --- a/fs/cifs/fcntl.c +++ b/fs/cifs/fcntl.c @@ -86,9 +86,7 @@ int cifs_dir_notify(struct file * file, unsigned long arg) cifs_sb = CIFS_SB(file->f_dentry->d_sb); pTcon = cifs_sb->tcon; - mutex_lock(&file->f_dentry->d_sb->s_vfs_rename_mutex); full_path = build_path_from_dentry(file->f_dentry); - mutex_unlock(&file->f_dentry->d_sb->s_vfs_rename_mutex); if(full_path == NULL) { rc = -ENOMEM; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 5c497c529772..e2b4ce1dad66 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -84,6 +84,8 @@ static inline int cifs_get_disposition(unsigned int flags) return FILE_OVERWRITE_IF; else if ((flags & O_CREAT) == O_CREAT) return FILE_OPEN_IF; + else if ((flags & O_TRUNC) == O_TRUNC) + return FILE_OVERWRITE; else return FILE_OPEN; } @@ -203,9 +205,7 @@ int cifs_open(struct inode *inode, struct file *file) } } - mutex_lock(&inode->i_sb->s_vfs_rename_mutex); full_path = build_path_from_dentry(file->f_dentry); - mutex_unlock(&inode->i_sb->s_vfs_rename_mutex); if (full_path == NULL) { FreeXid(xid); return -ENOMEM; @@ -658,7 +658,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) else posix_lock_type = CIFS_WRLCK; rc = CIFSSMBPosixLock(xid, pTcon, netfid, 1 /* get */, - length, pfLock->fl_start, + length, pfLock, posix_lock_type, wait_flag); FreeXid(xid); return rc; @@ -706,7 +706,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) return -EOPNOTSUPP; } rc = CIFSSMBPosixLock(xid, pTcon, netfid, 0 /* set */, - length, pfLock->fl_start, + length, pfLock, posix_lock_type, wait_flag); } else rc = CIFSSMBLock(xid, pTcon, netfid, length, pfLock->fl_start, @@ -906,9 +906,10 @@ static ssize_t cifs_write(struct file *file, const char *write_data, if (rc != 0) break; } - /* BB FIXME We can not sign across two buffers yet */ - if((pTcon->ses->server->secMode & - (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) == 0) { + if(experimEnabled || (pTcon->ses->server && + ((pTcon->ses->server->secMode & + (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + == 0))) { struct kvec iov[2]; unsigned int len; @@ -923,13 +924,13 @@ static ssize_t cifs_write(struct file *file, const char *write_data, *poffset, &bytes_written, iov, 1, long_op); } else - /* BB FIXME fixup indentation of line below */ - rc = CIFSSMBWrite(xid, pTcon, - open_file->netfid, - min_t(const int, cifs_sb->wsize, - write_size - total_written), - *poffset, &bytes_written, - write_data + total_written, NULL, long_op); + rc = CIFSSMBWrite(xid, pTcon, + open_file->netfid, + min_t(const int, cifs_sb->wsize, + write_size - total_written), + *poffset, &bytes_written, + write_data + total_written, + NULL, long_op); } if (rc || (bytes_written == 0)) { if (total_written) @@ -968,6 +969,16 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode) struct cifsFileInfo *open_file; int rc; + /* Having a null inode here (because mapping->host was set to zero by + the VFS or MM) should not happen but we had reports of on oops (due to + it being zero) during stress testcases so we need to check for it */ + + if(cifs_inode == NULL) { + cERROR(1,("Null inode passed to cifs_writeable_file")); + dump_stack(); + return NULL; + } + read_lock(&GlobalSMBSeslock); list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { if (open_file->closePend) @@ -1093,12 +1104,11 @@ static int cifs_writepages(struct address_space *mapping, if (cifs_sb->wsize < PAGE_CACHE_SIZE) return generic_writepages(mapping, wbc); - /* BB FIXME we do not have code to sign across multiple buffers yet, - so go to older writepage style write which we can sign if needed */ if((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server)) if(cifs_sb->tcon->ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) - return generic_writepages(mapping, wbc); + if(!experimEnabled) + return generic_writepages(mapping, wbc); /* * BB: Is this meaningful for a non-block-device file system? diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 957ddd1571c6..4093764ef461 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -722,9 +722,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) cifs_sb = CIFS_SB(inode->i_sb); pTcon = cifs_sb->tcon; - mutex_lock(&inode->i_sb->s_vfs_rename_mutex); full_path = build_path_from_dentry(direntry); - mutex_unlock(&inode->i_sb->s_vfs_rename_mutex); if (full_path == NULL) { FreeXid(xid); return -ENOMEM; @@ -807,9 +805,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) cifs_sb = CIFS_SB(inode->i_sb); pTcon = cifs_sb->tcon; - mutex_lock(&inode->i_sb->s_vfs_rename_mutex); full_path = build_path_from_dentry(direntry); - mutex_unlock(&inode->i_sb->s_vfs_rename_mutex); if (full_path == NULL) { FreeXid(xid); return -ENOMEM; @@ -1141,9 +1137,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) rc = 0; } - mutex_lock(&direntry->d_sb->s_vfs_rename_mutex); full_path = build_path_from_dentry(direntry); - mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex); if (full_path == NULL) { FreeXid(xid); return -ENOMEM; diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 9562f5bba65c..2ec99f833142 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -48,10 +48,8 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode, /* No need to check for cross device links since server will do that BB note DFS case in future though (when we may have to check) */ - mutex_lock(&inode->i_sb->s_vfs_rename_mutex); fromName = build_path_from_dentry(old_file); toName = build_path_from_dentry(direntry); - mutex_unlock(&inode->i_sb->s_vfs_rename_mutex); if((fromName == NULL) || (toName == NULL)) { rc = -ENOMEM; goto cifs_hl_exit; @@ -103,9 +101,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd) xid = GetXid(); - mutex_lock(&direntry->d_sb->s_vfs_rename_mutex); full_path = build_path_from_dentry(direntry); - mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex); if (!full_path) goto out_no_free; @@ -164,9 +160,7 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) cifs_sb = CIFS_SB(inode->i_sb); pTcon = cifs_sb->tcon; - mutex_lock(&inode->i_sb->s_vfs_rename_mutex); full_path = build_path_from_dentry(direntry); - mutex_unlock(&inode->i_sb->s_vfs_rename_mutex); if(full_path == NULL) { FreeXid(xid); diff --git a/fs/cifs/ntlmssp.c b/fs/cifs/ntlmssp.c index 78866f925747..115359cc7a32 100644 --- a/fs/cifs/ntlmssp.c +++ b/fs/cifs/ntlmssp.c @@ -121,6 +121,20 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, const int type, } + /* copy session key */ + + /* if Unicode, align strings to two byte boundary */ + + /* copy user name */ /* BB Do we need to special case null user name? */ + + /* copy domain name */ + + /* copy Linux version */ + + /* copy network operating system name */ + + /* update bcc and smb buffer length */ + /* rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buf_type, 0); */ /* SMB request buf freed in SendReceive2 */ diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 2f6e2825571e..b689c5035124 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -404,9 +404,7 @@ static int initiate_cifs_search(const int xid, struct file *file) if(pTcon == NULL) return -EINVAL; - mutex_lock(&file->f_dentry->d_sb->s_vfs_rename_mutex); full_path = build_path_from_dentry(file->f_dentry); - mutex_unlock(&file->f_dentry->d_sb->s_vfs_rename_mutex); if(full_path == NULL) { return -ENOMEM; @@ -592,6 +590,13 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry - cifsFile->srch_inf.entries_in_buffer; + + /* if first entry in buf is zero then is first buffer + in search response data which means it is likely . and .. + will be in this buffer, although some servers do not return + . and .. for the root of a drive and for those we need + to start two entries earlier */ + /* dump_cifs_file_struct(file, "In fce ");*/ if(((index_to_find < cifsFile->srch_inf.index_of_last_entry) && is_dir_changed(file)) || @@ -634,23 +639,14 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, char * end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + smbCalcSize((struct smb_hdr *) cifsFile->srch_inf.ntwrk_buf_start); + + current_entry = cifsFile->srch_inf.srch_entries_start; first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry - cifsFile->srch_inf.entries_in_buffer; pos_in_buf = index_to_find - first_entry_in_buffer; cFYI(1,("found entry - pos_in_buf %d",pos_in_buf)); - current_entry = cifsFile->srch_inf.srch_entries_start; for(i=0;(i<(pos_in_buf)) && (current_entry != NULL);i++) { /* go entry by entry figuring out which is first */ - /* if( . or ..) - skip */ - rc = cifs_entry_is_dot(current_entry,cifsFile); - if(rc == 1) /* is . or .. so skip */ { - cFYI(1,("Entry is .")); /* BB removeme BB */ - /* continue; */ - } else if (rc == 2 ) { - cFYI(1,("Entry is ..")); /* BB removeme BB */ - /* continue; */ - } current_entry = nxt_dir_entry(current_entry,end_of_smb); } if((current_entry == NULL) && (i < pos_in_buf)) { @@ -770,6 +766,11 @@ static int cifs_filldir(char *pfindEntry, struct file *file, if(file->f_dentry == NULL) return -ENOENT; + rc = cifs_entry_is_dot(pfindEntry,pCifsF); + /* skip . and .. since we added them first */ + if(rc != 0) + return 0; + cifs_sb = CIFS_SB(file->f_dentry->d_sb); qstring.name = scratch_buf; @@ -898,22 +899,22 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) switch ((int) file->f_pos) { case 0: - /*if (filldir(direntry, ".", 1, file->f_pos, + if (filldir(direntry, ".", 1, file->f_pos, file->f_dentry->d_inode->i_ino, DT_DIR) < 0) { - cERROR(1, ("Filldir for current dir failed ")); + cERROR(1, ("Filldir for current dir failed")); rc = -ENOMEM; break; } - file->f_pos++; */ + file->f_pos++; case 1: - /* if (filldir(direntry, "..", 2, file->f_pos, + if (filldir(direntry, "..", 2, file->f_pos, file->f_dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) { cERROR(1, ("Filldir for parent dir failed ")); rc = -ENOMEM; break; } - file->f_pos++; */ - case 2: + file->f_pos++; + default: /* 1) If search is active, is in current search buffer? if it before then restart search @@ -927,7 +928,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) return rc; } } - default: if(file->private_data == NULL) { rc = -EINVAL; FreeXid(xid); @@ -947,8 +947,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) kfree(cifsFile->search_resume_name); cifsFile->search_resume_name = NULL; */ - /* BB account for . and .. in f_pos as special case */ - rc = find_cifs_entry(xid,pTcon, file, ¤t_entry,&num_to_fill); if(rc) { @@ -977,7 +975,8 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) num_to_fill, i)); break; } - + /* if buggy server returns . and .. late do + we want to check for that here? */ rc = cifs_filldir(current_entry, file, filldir, direntry,tmp_buf); file->f_pos++; diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 3938444d87b2..7754d641775e 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -62,9 +62,7 @@ int cifs_removexattr(struct dentry * direntry, const char * ea_name) cifs_sb = CIFS_SB(sb); pTcon = cifs_sb->tcon; - mutex_lock(&sb->s_vfs_rename_mutex); full_path = build_path_from_dentry(direntry); - mutex_unlock(&sb->s_vfs_rename_mutex); if(full_path == NULL) { FreeXid(xid); return -ENOMEM; @@ -116,9 +114,7 @@ int cifs_setxattr(struct dentry * direntry, const char * ea_name, cifs_sb = CIFS_SB(sb); pTcon = cifs_sb->tcon; - mutex_lock(&sb->s_vfs_rename_mutex); full_path = build_path_from_dentry(direntry); - mutex_unlock(&sb->s_vfs_rename_mutex); if(full_path == NULL) { FreeXid(xid); return -ENOMEM; @@ -223,9 +219,7 @@ ssize_t cifs_getxattr(struct dentry * direntry, const char * ea_name, cifs_sb = CIFS_SB(sb); pTcon = cifs_sb->tcon; - mutex_lock(&sb->s_vfs_rename_mutex); full_path = build_path_from_dentry(direntry); - mutex_unlock(&sb->s_vfs_rename_mutex); if(full_path == NULL) { FreeXid(xid); return -ENOMEM; @@ -341,9 +335,7 @@ ssize_t cifs_listxattr(struct dentry * direntry, char * data, size_t buf_size) cifs_sb = CIFS_SB(sb); pTcon = cifs_sb->tcon; - mutex_lock(&sb->s_vfs_rename_mutex); full_path = build_path_from_dentry(direntry); - mutex_unlock(&sb->s_vfs_rename_mutex); if(full_path == NULL) { FreeXid(xid); return -ENOMEM; diff --git a/fs/compat.c b/fs/compat.c index 7f8e26ea427c..b1f64786a613 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1217,6 +1217,10 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, if (ret < 0) goto out; + ret = security_file_permission(file, type == READ ? MAY_READ:MAY_WRITE); + if (ret) + goto out; + fnv = NULL; if (type == READ) { fn = file->f_op->read; @@ -1313,6 +1317,26 @@ out: return ret; } +asmlinkage long +compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32, + unsigned int nr_segs, unsigned int flags) +{ + unsigned i; + struct iovec *iov; + if (nr_segs > UIO_MAXIOV) + return -EINVAL; + iov = compat_alloc_user_space(nr_segs * sizeof(struct iovec)); + for (i = 0; i < nr_segs; i++) { + struct compat_iovec v; + if (get_user(v.iov_base, &iov32[i].iov_base) || + get_user(v.iov_len, &iov32[i].iov_len) || + put_user(compat_ptr(v.iov_base), &iov[i].iov_base) || + put_user(v.iov_len, &iov[i].iov_len)) + return -EFAULT; + } + return sys_vmsplice(fd, iov, nr_segs, flags); +} + /* * Exactly like fs/open.c:sys_open(), except that it doesn't set the * O_LARGEFILE flag. @@ -1889,7 +1913,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, } if (sigmask) { - if (sigsetsize |= sizeof(compat_sigset_t)) + if (sigsetsize != sizeof(compat_sigset_t)) return -EINVAL; if (copy_from_user(&ss32, sigmask, sizeof(ss32))) return -EFAULT; @@ -2006,109 +2030,115 @@ union compat_nfsctl_res { struct knfsd_fh cr32_getfs; }; -static int compat_nfs_svc_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg __user *arg) +static int compat_nfs_svc_trans(struct nfsctl_arg *karg, + struct compat_nfsctl_arg __user *arg) { - int err; - - err = access_ok(VERIFY_READ, &arg->ca32_svc, sizeof(arg->ca32_svc)); - err |= get_user(karg->ca_version, &arg->ca32_version); - err |= __get_user(karg->ca_svc.svc_port, &arg->ca32_svc.svc32_port); - err |= __get_user(karg->ca_svc.svc_nthreads, &arg->ca32_svc.svc32_nthreads); - return (err) ? -EFAULT : 0; + if (!access_ok(VERIFY_READ, &arg->ca32_svc, sizeof(arg->ca32_svc)) || + get_user(karg->ca_version, &arg->ca32_version) || + __get_user(karg->ca_svc.svc_port, &arg->ca32_svc.svc32_port) || + __get_user(karg->ca_svc.svc_nthreads, + &arg->ca32_svc.svc32_nthreads)) + return -EFAULT; + return 0; } -static int compat_nfs_clnt_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg __user *arg) +static int compat_nfs_clnt_trans(struct nfsctl_arg *karg, + struct compat_nfsctl_arg __user *arg) { - int err; - - err = access_ok(VERIFY_READ, &arg->ca32_client, sizeof(arg->ca32_client)); - err |= get_user(karg->ca_version, &arg->ca32_version); - err |= __copy_from_user(&karg->ca_client.cl_ident[0], - &arg->ca32_client.cl32_ident[0], - NFSCLNT_IDMAX); - err |= __get_user(karg->ca_client.cl_naddr, &arg->ca32_client.cl32_naddr); - err |= __copy_from_user(&karg->ca_client.cl_addrlist[0], - &arg->ca32_client.cl32_addrlist[0], - (sizeof(struct in_addr) * NFSCLNT_ADDRMAX)); - err |= __get_user(karg->ca_client.cl_fhkeytype, - &arg->ca32_client.cl32_fhkeytype); - err |= __get_user(karg->ca_client.cl_fhkeylen, - &arg->ca32_client.cl32_fhkeylen); - err |= __copy_from_user(&karg->ca_client.cl_fhkey[0], - &arg->ca32_client.cl32_fhkey[0], - NFSCLNT_KEYMAX); + if (!access_ok(VERIFY_READ, &arg->ca32_client, + sizeof(arg->ca32_client)) || + get_user(karg->ca_version, &arg->ca32_version) || + __copy_from_user(&karg->ca_client.cl_ident[0], + &arg->ca32_client.cl32_ident[0], + NFSCLNT_IDMAX) || + __get_user(karg->ca_client.cl_naddr, + &arg->ca32_client.cl32_naddr) || + __copy_from_user(&karg->ca_client.cl_addrlist[0], + &arg->ca32_client.cl32_addrlist[0], + (sizeof(struct in_addr) * NFSCLNT_ADDRMAX)) || + __get_user(karg->ca_client.cl_fhkeytype, + &arg->ca32_client.cl32_fhkeytype) || + __get_user(karg->ca_client.cl_fhkeylen, + &arg->ca32_client.cl32_fhkeylen) || + __copy_from_user(&karg->ca_client.cl_fhkey[0], + &arg->ca32_client.cl32_fhkey[0], + NFSCLNT_KEYMAX)) + return -EFAULT; - return (err) ? -EFAULT : 0; + return 0; } -static int compat_nfs_exp_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg __user *arg) +static int compat_nfs_exp_trans(struct nfsctl_arg *karg, + struct compat_nfsctl_arg __user *arg) { - int err; - - err = access_ok(VERIFY_READ, &arg->ca32_export, sizeof(arg->ca32_export)); - err |= get_user(karg->ca_version, &arg->ca32_version); - err |= __copy_from_user(&karg->ca_export.ex_client[0], - &arg->ca32_export.ex32_client[0], - NFSCLNT_IDMAX); - err |= __copy_from_user(&karg->ca_export.ex_path[0], - &arg->ca32_export.ex32_path[0], - NFS_MAXPATHLEN); - err |= __get_user(karg->ca_export.ex_dev, - &arg->ca32_export.ex32_dev); - err |= __get_user(karg->ca_export.ex_ino, - &arg->ca32_export.ex32_ino); - err |= __get_user(karg->ca_export.ex_flags, - &arg->ca32_export.ex32_flags); - err |= __get_user(karg->ca_export.ex_anon_uid, - &arg->ca32_export.ex32_anon_uid); - err |= __get_user(karg->ca_export.ex_anon_gid, - &arg->ca32_export.ex32_anon_gid); + if (!access_ok(VERIFY_READ, &arg->ca32_export, + sizeof(arg->ca32_export)) || + get_user(karg->ca_version, &arg->ca32_version) || + __copy_from_user(&karg->ca_export.ex_client[0], + &arg->ca32_export.ex32_client[0], + NFSCLNT_IDMAX) || + __copy_from_user(&karg->ca_export.ex_path[0], + &arg->ca32_export.ex32_path[0], + NFS_MAXPATHLEN) || + __get_user(karg->ca_export.ex_dev, + &arg->ca32_export.ex32_dev) || + __get_user(karg->ca_export.ex_ino, + &arg->ca32_export.ex32_ino) || + __get_user(karg->ca_export.ex_flags, + &arg->ca32_export.ex32_flags) || + __get_user(karg->ca_export.ex_anon_uid, + &arg->ca32_export.ex32_anon_uid) || + __get_user(karg->ca_export.ex_anon_gid, + &arg->ca32_export.ex32_anon_gid)) + return -EFAULT; SET_UID(karg->ca_export.ex_anon_uid, karg->ca_export.ex_anon_uid); SET_GID(karg->ca_export.ex_anon_gid, karg->ca_export.ex_anon_gid); - return (err) ? -EFAULT : 0; + return 0; } -static int compat_nfs_getfd_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg __user *arg) +static int compat_nfs_getfd_trans(struct nfsctl_arg *karg, + struct compat_nfsctl_arg __user *arg) { - int err; - - err = access_ok(VERIFY_READ, &arg->ca32_getfd, sizeof(arg->ca32_getfd)); - err |= get_user(karg->ca_version, &arg->ca32_version); - err |= __copy_from_user(&karg->ca_getfd.gd_addr, - &arg->ca32_getfd.gd32_addr, - (sizeof(struct sockaddr))); - err |= __copy_from_user(&karg->ca_getfd.gd_path, - &arg->ca32_getfd.gd32_path, - (NFS_MAXPATHLEN+1)); - err |= __get_user(karg->ca_getfd.gd_version, - &arg->ca32_getfd.gd32_version); + if (!access_ok(VERIFY_READ, &arg->ca32_getfd, + sizeof(arg->ca32_getfd)) || + get_user(karg->ca_version, &arg->ca32_version) || + __copy_from_user(&karg->ca_getfd.gd_addr, + &arg->ca32_getfd.gd32_addr, + (sizeof(struct sockaddr))) || + __copy_from_user(&karg->ca_getfd.gd_path, + &arg->ca32_getfd.gd32_path, + (NFS_MAXPATHLEN+1)) || + __get_user(karg->ca_getfd.gd_version, + &arg->ca32_getfd.gd32_version)) + return -EFAULT; - return (err) ? -EFAULT : 0; + return 0; } -static int compat_nfs_getfs_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg __user *arg) +static int compat_nfs_getfs_trans(struct nfsctl_arg *karg, + struct compat_nfsctl_arg __user *arg) { - int err; - - err = access_ok(VERIFY_READ, &arg->ca32_getfs, sizeof(arg->ca32_getfs)); - err |= get_user(karg->ca_version, &arg->ca32_version); - err |= __copy_from_user(&karg->ca_getfs.gd_addr, - &arg->ca32_getfs.gd32_addr, - (sizeof(struct sockaddr))); - err |= __copy_from_user(&karg->ca_getfs.gd_path, - &arg->ca32_getfs.gd32_path, - (NFS_MAXPATHLEN+1)); - err |= __get_user(karg->ca_getfs.gd_maxlen, - &arg->ca32_getfs.gd32_maxlen); + if (!access_ok(VERIFY_READ,&arg->ca32_getfs,sizeof(arg->ca32_getfs)) || + get_user(karg->ca_version, &arg->ca32_version) || + __copy_from_user(&karg->ca_getfs.gd_addr, + &arg->ca32_getfs.gd32_addr, + (sizeof(struct sockaddr))) || + __copy_from_user(&karg->ca_getfs.gd_path, + &arg->ca32_getfs.gd32_path, + (NFS_MAXPATHLEN+1)) || + __get_user(karg->ca_getfs.gd_maxlen, + &arg->ca32_getfs.gd32_maxlen)) + return -EFAULT; - return (err) ? -EFAULT : 0; + return 0; } /* This really doesn't need translations, we are only passing * back a union which contains opaque nfs file handle data. */ -static int compat_nfs_getfh_res_trans(union nfsctl_res *kres, union compat_nfsctl_res __user *res) +static int compat_nfs_getfh_res_trans(union nfsctl_res *kres, + union compat_nfsctl_res __user *res) { int err; @@ -2117,8 +2147,9 @@ static int compat_nfs_getfh_res_trans(union nfsctl_res *kres, union compat_nfsct return (err) ? -EFAULT : 0; } -asmlinkage long compat_sys_nfsservctl(int cmd, struct compat_nfsctl_arg __user *arg, - union compat_nfsctl_res __user *res) +asmlinkage long compat_sys_nfsservctl(int cmd, + struct compat_nfsctl_arg __user *arg, + union compat_nfsctl_res __user *res) { struct nfsctl_arg *karg; union nfsctl_res *kres; diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 5638c8f9362f..5f952187fc53 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -505,13 +505,15 @@ static int populate_groups(struct config_group *group) int i; if (group->default_groups) { - /* FYI, we're faking mkdir here + /* + * FYI, we're faking mkdir here * I'm not sure we need this semaphore, as we're called * from our parent's mkdir. That holds our parent's * i_mutex, so afaik lookup cannot continue through our * parent to find us, let alone mess with our tree. * That said, taking our i_mutex is closer to mkdir - * emulation, and shouldn't hurt. */ + * emulation, and shouldn't hurt. + */ mutex_lock(&dentry->d_inode->i_mutex); for (i = 0; group->default_groups[i]; i++) { @@ -546,20 +548,34 @@ static void unlink_obj(struct config_item *item) item->ci_group = NULL; item->ci_parent = NULL; + + /* Drop the reference for ci_entry */ config_item_put(item); + /* Drop the reference for ci_parent */ config_group_put(group); } } static void link_obj(struct config_item *parent_item, struct config_item *item) { - /* Parent seems redundant with group, but it makes certain - * traversals much nicer. */ + /* + * Parent seems redundant with group, but it makes certain + * traversals much nicer. + */ item->ci_parent = parent_item; + + /* + * We hold a reference on the parent for the child's ci_parent + * link. + */ item->ci_group = config_group_get(to_config_group(parent_item)); list_add_tail(&item->ci_entry, &item->ci_group->cg_children); + /* + * We hold a reference on the child for ci_entry on the parent's + * cg_children + */ config_item_get(item); } @@ -684,6 +700,10 @@ static void client_drop_item(struct config_item *parent_item, type = parent_item->ci_type; BUG_ON(!type); + /* + * If ->drop_item() exists, it is responsible for the + * config_item_put(). + */ if (type->ct_group_ops && type->ct_group_ops->drop_item) type->ct_group_ops->drop_item(to_config_group(parent_item), item); @@ -694,23 +714,28 @@ static void client_drop_item(struct config_item *parent_item, static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { - int ret; + int ret, module_got = 0; struct config_group *group; struct config_item *item; struct config_item *parent_item; struct configfs_subsystem *subsys; struct configfs_dirent *sd; struct config_item_type *type; - struct module *owner; + struct module *owner = NULL; char *name; - if (dentry->d_parent == configfs_sb->s_root) - return -EPERM; + if (dentry->d_parent == configfs_sb->s_root) { + ret = -EPERM; + goto out; + } sd = dentry->d_parent->d_fsdata; - if (!(sd->s_type & CONFIGFS_USET_DIR)) - return -EPERM; + if (!(sd->s_type & CONFIGFS_USET_DIR)) { + ret = -EPERM; + goto out; + } + /* Get a working ref for the duration of this function */ parent_item = configfs_get_config_item(dentry->d_parent); type = parent_item->ci_type; subsys = to_config_group(parent_item)->cg_subsys; @@ -719,15 +744,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (!type || !type->ct_group_ops || (!type->ct_group_ops->make_group && !type->ct_group_ops->make_item)) { - config_item_put(parent_item); - return -EPERM; /* What lack-of-mkdir returns */ + ret = -EPERM; /* Lack-of-mkdir returns -EPERM */ + goto out_put; } name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL); if (!name) { - config_item_put(parent_item); - return -ENOMEM; + ret = -ENOMEM; + goto out_put; } + snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); down(&subsys->su_sem); @@ -748,40 +774,67 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) kfree(name); if (!item) { - config_item_put(parent_item); - return -ENOMEM; + /* + * If item == NULL, then link_obj() was never called. + * There are no extra references to clean up. + */ + ret = -ENOMEM; + goto out_put; } - ret = -EINVAL; + /* + * link_obj() has been called (via link_group() for groups). + * From here on out, errors must clean that up. + */ + type = item->ci_type; - if (type) { - owner = type->ct_owner; - if (try_module_get(owner)) { - if (group) { - ret = configfs_attach_group(parent_item, - item, - dentry); - } else { - ret = configfs_attach_item(parent_item, - item, - dentry); - } + if (!type) { + ret = -EINVAL; + goto out_unlink; + } - if (ret) { - down(&subsys->su_sem); - if (group) - unlink_group(group); - else - unlink_obj(item); - client_drop_item(parent_item, item); - up(&subsys->su_sem); + owner = type->ct_owner; + if (!try_module_get(owner)) { + ret = -EINVAL; + goto out_unlink; + } - config_item_put(parent_item); - module_put(owner); - } - } + /* + * I hate doing it this way, but if there is + * an error, module_put() probably should + * happen after any cleanup. + */ + module_got = 1; + + if (group) + ret = configfs_attach_group(parent_item, item, dentry); + else + ret = configfs_attach_item(parent_item, item, dentry); + +out_unlink: + if (ret) { + /* Tear down everything we built up */ + down(&subsys->su_sem); + if (group) + unlink_group(group); + else + unlink_obj(item); + client_drop_item(parent_item, item); + up(&subsys->su_sem); + + if (module_got) + module_put(owner); } +out_put: + /* + * link_obj()/link_group() took a reference from child->parent, + * so the parent is safely pinned. We can drop our working + * reference. + */ + config_item_put(parent_item); + +out: return ret; } @@ -801,6 +854,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) if (sd->s_type & CONFIGFS_USET_DEFAULT) return -EPERM; + /* Get a working ref until we have the child */ parent_item = configfs_get_config_item(dentry->d_parent); subsys = to_config_group(parent_item)->cg_subsys; BUG_ON(!subsys); @@ -817,6 +871,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) return ret; } + /* Get a working ref for the duration of this function */ item = configfs_get_config_item(dentry); /* Drop reference from above, item already holds one. */ diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 85d166cdcae4..b55b4ea9a676 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -67,12 +67,13 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d static int debugfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) { - struct inode *inode = debugfs_get_inode(dir->i_sb, mode, dev); + struct inode *inode; int error = -EPERM; if (dentry->d_inode) return -EEXIST; + inode = debugfs_get_inode(dir->i_sb, mode, dev); if (inode) { d_instantiate(dentry, inode); dget(dentry); diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index b06b54f1bbbb..4c39009350f3 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -102,7 +102,7 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, if (acceptable(context, result)) return result; if (S_ISDIR(result->d_inode->i_mode)) { - /* there is no other dentry, so fail */ + err = -EACCES; goto err_result; } diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 48ae0339af17..2edd7eec88fd 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -711,7 +711,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, * direct blocks blocks */ if (num == 0 && blks > 1) { - current_block = le32_to_cpu(where->key + 1); + current_block = le32_to_cpu(where->key) + 1; for (i = 1; i < blks; i++) *(where->p + i ) = cpu_to_le32(current_block++); } @@ -724,7 +724,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, if (block_i) { block_i->last_alloc_logical_block = block + blks - 1; block_i->last_alloc_physical_block = - le32_to_cpu(where[num].key + blks - 1); + le32_to_cpu(where[num].key) + blks - 1; } /* We are done with atomic stuff, now do the rest of housekeeping */ @@ -814,11 +814,13 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, /* Simplest case - block found, no allocation needed */ if (!partial) { - first_block = chain[depth - 1].key; + first_block = le32_to_cpu(chain[depth - 1].key); clear_buffer_new(bh_result); count++; /*map more blocks*/ while (count < maxblocks && count <= blocks_to_boundary) { + unsigned long blk; + if (!verify_chain(chain, partial)) { /* * Indirect block might be removed by @@ -831,8 +833,9 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, count = 0; break; } - if (le32_to_cpu(*(chain[depth-1].p+count) == - (first_block + count))) + blk = le32_to_cpu(*(chain[depth-1].p + count)); + + if (blk == first_block + count) count++; else break; diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index aaf1da17b6d4..8c22aa9a7fbb 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c @@ -48,6 +48,7 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, if (!S_ISDIR(inode->i_mode)) flags &= ~EXT3_DIRSYNC_FL; + mutex_lock(&inode->i_mutex); oldflags = ei->i_flags; /* The JOURNAL_DATA flag is modifiable only by root */ @@ -60,8 +61,10 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, * This test looks nicer. Thanks to Pauline Middelink */ if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) + if (!capable(CAP_LINUX_IMMUTABLE)) { + mutex_unlock(&inode->i_mutex); return -EPERM; + } } /* @@ -69,14 +72,18 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, * the relevant capability. */ if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) { - if (!capable(CAP_SYS_RESOURCE)) + if (!capable(CAP_SYS_RESOURCE)) { + mutex_unlock(&inode->i_mutex); return -EPERM; + } } handle = ext3_journal_start(inode, 1); - if (IS_ERR(handle)) + if (IS_ERR(handle)) { + mutex_unlock(&inode->i_mutex); return PTR_ERR(handle); + } if (IS_SYNC(inode)) handle->h_sync = 1; err = ext3_reserve_inode_write(handle, inode, &iloc); @@ -93,11 +100,14 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, err = ext3_mark_iloc_dirty(handle, inode, &iloc); flags_err: ext3_journal_stop(handle); - if (err) + if (err) { + mutex_unlock(&inode->i_mutex); return err; + } if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) err = ext3_change_inode_journal_flag(inode, jflag); + mutex_unlock(&inode->i_mutex); return err; } case EXT3_IOC_GETVERSION: diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index c5ffa8523968..34b39e9a1e5a 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -213,7 +213,7 @@ static int setup_new_group_blocks(struct super_block *sb, goto exit_bh; } lock_buffer(bh); - memcpy(gdb->b_data, sbi->s_group_desc[i], bh->b_size); + memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size); set_buffer_uptodate(gdb); unlock_buffer(bh); ext3_journal_dirty_metadata(handle, gdb); @@ -767,7 +767,6 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) if (input->group != sbi->s_groups_count) { ext3_warning(sb, __FUNCTION__, "multiple resizers run on filesystem!"); - unlock_super(sb); err = -EBUSY; goto exit_journal; } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index cc750c68fe70..104a62dadb94 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -128,14 +128,24 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) } } -void fuse_remove_background(struct fuse_conn *fc, struct fuse_req *req) +/* + * Called with sbput_sem held for read (request_end) or write + * (fuse_put_super). By the time fuse_put_super() is finished, all + * inodes belonging to background requests must be released, so the + * iputs have to be done within the locked region. + */ +void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req) { - list_del_init(&req->bg_entry); + iput(req->inode); + iput(req->inode2); + spin_lock(&fc->lock); + list_del(&req->bg_entry); if (fc->num_background == FUSE_MAX_BACKGROUND) { fc->blocked = 0; wake_up_all(&fc->blocked_waitq); } fc->num_background--; + spin_unlock(&fc->lock); } /* @@ -165,27 +175,22 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) wake_up(&req->waitq); fuse_put_request(fc, req); } else { - struct inode *inode = req->inode; - struct inode *inode2 = req->inode2; - struct file *file = req->file; void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; req->end = NULL; - req->inode = NULL; - req->inode2 = NULL; - req->file = NULL; - if (!list_empty(&req->bg_entry)) - fuse_remove_background(fc, req); spin_unlock(&fc->lock); + down_read(&fc->sbput_sem); + if (fc->mounted) + fuse_release_background(fc, req); + up_read(&fc->sbput_sem); + + /* fput must go outside sbput_sem, otherwise it can deadlock */ + if (req->file) + fput(req->file); if (end) end(fc, req); else fuse_put_request(fc, req); - - if (file) - fput(file); - iput(inode); - iput(inode2); } } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 59661c481d9d..0474202cb5dc 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -258,9 +258,15 @@ struct fuse_conn { /** waitq for blocked connection */ wait_queue_head_t blocked_waitq; + /** RW semaphore for exclusion with fuse_put_super() */ + struct rw_semaphore sbput_sem; + /** The next unique request id */ u64 reqctr; + /** Mount is active */ + unsigned mounted; + /** Connection established, cleared on umount, connection abort and device release */ unsigned connected; @@ -471,11 +477,11 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req); void request_send_background(struct fuse_conn *fc, struct fuse_req *req); /** - * Remove request from the the background list + * Release inodes and file associated with background request */ -void fuse_remove_background(struct fuse_conn *fc, struct fuse_req *req); +void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req); -/** Abort all requests */ +/* Abort all requests */ void fuse_abort_conn(struct fuse_conn *fc); /** diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 43a6fc0db8a7..7627022446b2 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -204,26 +204,17 @@ static void fuse_put_super(struct super_block *sb) { struct fuse_conn *fc = get_fuse_conn_super(sb); + down_write(&fc->sbput_sem); + while (!list_empty(&fc->background)) + fuse_release_background(fc, + list_entry(fc->background.next, + struct fuse_req, bg_entry)); + spin_lock(&fc->lock); + fc->mounted = 0; fc->connected = 0; - while (!list_empty(&fc->background)) { - struct fuse_req *req = list_entry(fc->background.next, - struct fuse_req, bg_entry); - struct inode *inode = req->inode; - struct inode *inode2 = req->inode2; - - /* File would hold a reference to vfsmount */ - BUG_ON(req->file); - req->inode = NULL; - req->inode2 = NULL; - fuse_remove_background(fc, req); - - spin_unlock(&fc->lock); - iput(inode); - iput(inode2); - spin_lock(&fc->lock); - } spin_unlock(&fc->lock); + up_write(&fc->sbput_sem); /* Flush all readers on this fs */ kill_fasync(&fc->fasync, SIGIO, POLL_IN); wake_up_all(&fc->waitq); @@ -395,6 +386,7 @@ static struct fuse_conn *new_conn(void) INIT_LIST_HEAD(&fc->processing); INIT_LIST_HEAD(&fc->io); INIT_LIST_HEAD(&fc->background); + init_rwsem(&fc->sbput_sem); kobj_set_kset_s(fc, connections_subsys); kobject_init(&fc->kobj); atomic_set(&fc->num_waiting, 0); @@ -508,11 +500,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (file->f_op != &fuse_dev_operations) return -EINVAL; - /* Setting file->private_data can't race with other mount() - instances, since BKL is held for ->get_sb() */ - if (file->private_data) - return -EINVAL; - fc = new_conn(); if (!fc) return -ENOMEM; @@ -548,7 +535,14 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (err) goto err_free_req; + /* Setting file->private_data can't race with other mount() + instances, since BKL is held for ->get_sb() */ + err = -EINVAL; + if (file->private_data) + goto err_kobject_del; + sb->s_root = root_dentry; + fc->mounted = 1; fc->connected = 1; kobject_get(&fc->kobj); file->private_data = fc; @@ -563,6 +557,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) return 0; + err_kobject_del: + kobject_del(&fc->kobj); err_free_req: fuse_request_free(init_req); err_put_root: diff --git a/fs/inotify.c b/fs/inotify.c index 1f50302849c5..732ec4bd5774 100644 --- a/fs/inotify.c +++ b/fs/inotify.c @@ -848,7 +848,11 @@ static int inotify_release(struct inode *ignored, struct file *file) inode = watch->inode; mutex_lock(&inode->inotify_mutex); mutex_lock(&dev->mutex); - remove_watch_no_event(watch, dev); + + /* make sure we didn't race with another list removal */ + if (likely(idr_find(&dev->idr, watch->wd))) + remove_watch_no_event(watch, dev); + mutex_unlock(&dev->mutex); mutex_unlock(&inode->inotify_mutex); put_inotify_watch(watch); @@ -890,8 +894,7 @@ static int inotify_ignore(struct inotify_device *dev, s32 wd) mutex_lock(&dev->mutex); /* make sure that we did not race */ - watch = idr_find(&dev->idr, wd); - if (likely(watch)) + if (likely(idr_find(&dev->idr, wd) == watch)) remove_watch(watch, dev); mutex_unlock(&dev->mutex); diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c index 0ef207dfaf6f..5371a403130a 100644 --- a/fs/jffs/intrep.c +++ b/fs/jffs/intrep.c @@ -247,7 +247,7 @@ flash_safe_read(struct mtd_info *mtd, loff_t from, D3(printk(KERN_NOTICE "flash_safe_read(%p, %08x, %p, %08x)\n", mtd, (unsigned int) from, buf, count)); - res = MTD_READ(mtd, from, count, &retlen, buf); + res = mtd->read(mtd, from, count, &retlen, buf); if (retlen != count) { panic("Didn't read all bytes in flash_safe_read(). Returned %d\n", res); } @@ -262,7 +262,7 @@ flash_read_u32(struct mtd_info *mtd, loff_t from) __u32 ret; int res; - res = MTD_READ(mtd, from, 4, &retlen, (unsigned char *)&ret); + res = mtd->read(mtd, from, 4, &retlen, (unsigned char *)&ret); if (retlen != 4) { printk("Didn't read all bytes in flash_read_u32(). Returned %d\n", res); return 0; @@ -282,7 +282,7 @@ flash_safe_write(struct mtd_info *mtd, loff_t to, D3(printk(KERN_NOTICE "flash_safe_write(%p, %08x, %p, %08x)\n", mtd, (unsigned int) to, buf, count)); - res = MTD_WRITE(mtd, to, count, &retlen, buf); + res = mtd->write(mtd, to, count, &retlen, buf); if (retlen != count) { printk("Didn't write all bytes in flash_safe_write(). Returned %d\n", res); } @@ -300,9 +300,9 @@ flash_safe_writev(struct mtd_info *mtd, const struct kvec *vecs, D3(printk(KERN_NOTICE "flash_safe_writev(%p, %08x, %p)\n", mtd, (unsigned int) to, vecs)); - + if (mtd->writev) { - res = MTD_WRITEV(mtd, vecs, iovec_cnt, to, &retlen); + res = mtd->writev(mtd, vecs, iovec_cnt, to, &retlen); return res ? res : retlen; } /* Not implemented writev. Repeatedly use write - on the not so @@ -312,7 +312,8 @@ flash_safe_writev(struct mtd_info *mtd, const struct kvec *vecs, retlen=0; for (i=0; !res && i<iovec_cnt; i++) { - res = MTD_WRITE(mtd, to, vecs[i].iov_len, &retlen_a, vecs[i].iov_base); + res = mtd->write(mtd, to, vecs[i].iov_len, &retlen_a, + vecs[i].iov_base); if (retlen_a != vecs[i].iov_len) { printk("Didn't write all bytes in flash_safe_writev(). Returned %d\n", res); if (i != iovec_cnt-1) @@ -393,7 +394,7 @@ flash_erase_region(struct mtd_info *mtd, loff_t start, set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&wait_q, &wait); - if (MTD_ERASE(mtd, erase) < 0) { + if (mtd->erase(mtd, erase) < 0) { set_current_state(TASK_RUNNING); remove_wait_queue(&wait_q, &wait); kfree(erase); diff --git a/fs/jffs2/Makefile b/fs/jffs2/Makefile index 77dc5561a04e..7f28ee0bd132 100644 --- a/fs/jffs2/Makefile +++ b/fs/jffs2/Makefile @@ -12,6 +12,9 @@ jffs2-y += symlink.o build.o erase.o background.o fs.o writev.o jffs2-y += super.o debug.o jffs2-$(CONFIG_JFFS2_FS_WRITEBUFFER) += wbuf.o +jffs2-$(CONFIG_JFFS2_FS_XATTR) += xattr.o xattr_trusted.o xattr_user.o +jffs2-$(CONFIG_JFFS2_FS_SECURITY) += security.o +jffs2-$(CONFIG_JFFS2_FS_POSIX_ACL) += acl.o jffs2-$(CONFIG_JFFS2_RUBIN) += compr_rubin.o jffs2-$(CONFIG_JFFS2_RTIME) += compr_rtime.o jffs2-$(CONFIG_JFFS2_ZLIB) += compr_zlib.o diff --git a/fs/jffs2/README.Locking b/fs/jffs2/README.Locking index b7943439b6ec..c8f0bd64e53e 100644 --- a/fs/jffs2/README.Locking +++ b/fs/jffs2/README.Locking @@ -150,3 +150,24 @@ the buffer. Ordering constraints: Lock wbuf_sem last, after the alloc_sem or and f->sem. + + + c->xattr_sem + ------------ + +This read/write semaphore protects against concurrent access to the +xattr related objects which include stuff in superblock and ic->xref. +In read-only path, write-semaphore is too much exclusion. It's enough +by read-semaphore. But you must hold write-semaphore when updating, +creating or deleting any xattr related object. + +Once xattr_sem released, there would be no assurance for the existence +of those objects. Thus, a series of processes is often required to retry, +when updating such a object is necessary under holding read semaphore. +For example, do_jffs2_getxattr() holds read-semaphore to scan xref and +xdatum at first. But it retries this process with holding write-semaphore +after release read-semaphore, if it's necessary to load name/value pair +from medium. + +Ordering constraints: + Lock xattr_sem last, after the alloc_sem. diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c new file mode 100644 index 000000000000..320dd48b834e --- /dev/null +++ b/fs/jffs2/acl.c @@ -0,0 +1,485 @@ +/* + * JFFS2 -- Journalling Flash File System, Version 2. + * + * Copyright (C) 2006 NEC Corporation + * + * Created by KaiGai Kohei <kaigai@ak.jp.nec.com> + * + * For licensing information, see the file 'LICENCE' in this directory. + * + */ +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/time.h> +#include <linux/crc32.h> +#include <linux/jffs2.h> +#include <linux/xattr.h> +#include <linux/posix_acl_xattr.h> +#include <linux/mtd/mtd.h> +#include "nodelist.h" + +static size_t jffs2_acl_size(int count) +{ + if (count <= 4) { + return sizeof(struct jffs2_acl_header) + + count * sizeof(struct jffs2_acl_entry_short); + } else { + return sizeof(struct jffs2_acl_header) + + 4 * sizeof(struct jffs2_acl_entry_short) + + (count - 4) * sizeof(struct jffs2_acl_entry); + } +} + +static int jffs2_acl_count(size_t size) +{ + size_t s; + + size -= sizeof(struct jffs2_acl_header); + s = size - 4 * sizeof(struct jffs2_acl_entry_short); + if (s < 0) { + if (size % sizeof(struct jffs2_acl_entry_short)) + return -1; + return size / sizeof(struct jffs2_acl_entry_short); + } else { + if (s % sizeof(struct jffs2_acl_entry)) + return -1; + return s / sizeof(struct jffs2_acl_entry) + 4; + } +} + +static struct posix_acl *jffs2_acl_from_medium(void *value, size_t size) +{ + void *end = value + size; + struct jffs2_acl_header *header = value; + struct jffs2_acl_entry *entry; + struct posix_acl *acl; + uint32_t ver; + int i, count; + + if (!value) + return NULL; + if (size < sizeof(struct jffs2_acl_header)) + return ERR_PTR(-EINVAL); + ver = je32_to_cpu(header->a_version); + if (ver != JFFS2_ACL_VERSION) { + JFFS2_WARNING("Invalid ACL version. (=%u)\n", ver); + return ERR_PTR(-EINVAL); + } + + value += sizeof(struct jffs2_acl_header); + count = jffs2_acl_count(size); + if (count < 0) + return ERR_PTR(-EINVAL); + if (count == 0) + return NULL; + + acl = posix_acl_alloc(count, GFP_KERNEL); + if (!acl) + return ERR_PTR(-ENOMEM); + + for (i=0; i < count; i++) { + entry = value; + if (value + sizeof(struct jffs2_acl_entry_short) > end) + goto fail; + acl->a_entries[i].e_tag = je16_to_cpu(entry->e_tag); + acl->a_entries[i].e_perm = je16_to_cpu(entry->e_perm); + switch (acl->a_entries[i].e_tag) { + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + value += sizeof(struct jffs2_acl_entry_short); + acl->a_entries[i].e_id = ACL_UNDEFINED_ID; + break; + + case ACL_USER: + case ACL_GROUP: + value += sizeof(struct jffs2_acl_entry); + if (value > end) + goto fail; + acl->a_entries[i].e_id = je32_to_cpu(entry->e_id); + break; + + default: + goto fail; + } + } + if (value != end) + goto fail; + return acl; + fail: + posix_acl_release(acl); + return ERR_PTR(-EINVAL); +} + +static void *jffs2_acl_to_medium(const struct posix_acl *acl, size_t *size) +{ + struct jffs2_acl_header *header; + struct jffs2_acl_entry *entry; + void *e; + size_t i; + + *size = jffs2_acl_size(acl->a_count); + header = kmalloc(sizeof(*header) + acl->a_count * sizeof(*entry), GFP_KERNEL); + if (!header) + return ERR_PTR(-ENOMEM); + header->a_version = cpu_to_je32(JFFS2_ACL_VERSION); + e = header + 1; + for (i=0; i < acl->a_count; i++) { + entry = e; + entry->e_tag = cpu_to_je16(acl->a_entries[i].e_tag); + entry->e_perm = cpu_to_je16(acl->a_entries[i].e_perm); + switch(acl->a_entries[i].e_tag) { + case ACL_USER: + case ACL_GROUP: + entry->e_id = cpu_to_je32(acl->a_entries[i].e_id); + e += sizeof(struct jffs2_acl_entry); + break; + + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + e += sizeof(struct jffs2_acl_entry_short); + break; + + default: + goto fail; + } + } + return header; + fail: + kfree(header); + return ERR_PTR(-EINVAL); +} + +static struct posix_acl *jffs2_iget_acl(struct inode *inode, struct posix_acl **i_acl) +{ + struct posix_acl *acl = JFFS2_ACL_NOT_CACHED; + + spin_lock(&inode->i_lock); + if (*i_acl != JFFS2_ACL_NOT_CACHED) + acl = posix_acl_dup(*i_acl); + spin_unlock(&inode->i_lock); + return acl; +} + +static void jffs2_iset_acl(struct inode *inode, struct posix_acl **i_acl, struct posix_acl *acl) +{ + spin_lock(&inode->i_lock); + if (*i_acl != JFFS2_ACL_NOT_CACHED) + posix_acl_release(*i_acl); + *i_acl = posix_acl_dup(acl); + spin_unlock(&inode->i_lock); +} + +static struct posix_acl *jffs2_get_acl(struct inode *inode, int type) +{ + struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); + struct posix_acl *acl; + char *value = NULL; + int rc, xprefix; + + switch (type) { + case ACL_TYPE_ACCESS: + acl = jffs2_iget_acl(inode, &f->i_acl_access); + if (acl != JFFS2_ACL_NOT_CACHED) + return acl; + xprefix = JFFS2_XPREFIX_ACL_ACCESS; + break; + case ACL_TYPE_DEFAULT: + acl = jffs2_iget_acl(inode, &f->i_acl_default); + if (acl != JFFS2_ACL_NOT_CACHED) + return acl; + xprefix = JFFS2_XPREFIX_ACL_DEFAULT; + break; + default: + return ERR_PTR(-EINVAL); + } + rc = do_jffs2_getxattr(inode, xprefix, "", NULL, 0); + if (rc > 0) { + value = kmalloc(rc, GFP_KERNEL); + if (!value) + return ERR_PTR(-ENOMEM); + rc = do_jffs2_getxattr(inode, xprefix, "", value, rc); + } + if (rc > 0) { + acl = jffs2_acl_from_medium(value, rc); + } else if (rc == -ENODATA || rc == -ENOSYS) { + acl = NULL; + } else { + acl = ERR_PTR(rc); + } + if (value) + kfree(value); + if (!IS_ERR(acl)) { + switch (type) { + case ACL_TYPE_ACCESS: + jffs2_iset_acl(inode, &f->i_acl_access, acl); + break; + case ACL_TYPE_DEFAULT: + jffs2_iset_acl(inode, &f->i_acl_default, acl); + break; + } + } + return acl; +} + +static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) +{ + struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); + size_t size = 0; + char *value = NULL; + int rc, xprefix; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + switch (type) { + case ACL_TYPE_ACCESS: + xprefix = JFFS2_XPREFIX_ACL_ACCESS; + if (acl) { + mode_t mode = inode->i_mode; + rc = posix_acl_equiv_mode(acl, &mode); + if (rc < 0) + return rc; + if (inode->i_mode != mode) { + inode->i_mode = mode; + jffs2_dirty_inode(inode); + } + if (rc == 0) + acl = NULL; + } + break; + case ACL_TYPE_DEFAULT: + xprefix = JFFS2_XPREFIX_ACL_DEFAULT; + if (!S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + break; + default: + return -EINVAL; + } + if (acl) { + value = jffs2_acl_to_medium(acl, &size); + if (IS_ERR(value)) + return PTR_ERR(value); + } + + rc = do_jffs2_setxattr(inode, xprefix, "", value, size, 0); + if (value) + kfree(value); + if (!rc) { + switch(type) { + case ACL_TYPE_ACCESS: + jffs2_iset_acl(inode, &f->i_acl_access, acl); + break; + case ACL_TYPE_DEFAULT: + jffs2_iset_acl(inode, &f->i_acl_default, acl); + break; + } + } + return rc; +} + +static int jffs2_check_acl(struct inode *inode, int mask) +{ + struct posix_acl *acl; + int rc; + + acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + rc = posix_acl_permission(inode, acl, mask); + posix_acl_release(acl); + return rc; + } + return -EAGAIN; +} + +int jffs2_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + return generic_permission(inode, mask, jffs2_check_acl); +} + +int jffs2_init_acl(struct inode *inode, struct inode *dir) +{ + struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); + struct posix_acl *acl = NULL, *clone; + mode_t mode; + int rc = 0; + + f->i_acl_access = JFFS2_ACL_NOT_CACHED; + f->i_acl_default = JFFS2_ACL_NOT_CACHED; + if (!S_ISLNK(inode->i_mode)) { + acl = jffs2_get_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (!acl) + inode->i_mode &= ~current->fs->umask; + } + if (acl) { + if (S_ISDIR(inode->i_mode)) { + rc = jffs2_set_acl(inode, ACL_TYPE_DEFAULT, acl); + if (rc) + goto cleanup; + } + clone = posix_acl_clone(acl, GFP_KERNEL); + rc = -ENOMEM; + if (!clone) + goto cleanup; + mode = inode->i_mode; + rc = posix_acl_create_masq(clone, &mode); + if (rc >= 0) { + inode->i_mode = mode; + if (rc > 0) + rc = jffs2_set_acl(inode, ACL_TYPE_ACCESS, clone); + } + posix_acl_release(clone); + } + cleanup: + posix_acl_release(acl); + return rc; +} + +void jffs2_clear_acl(struct inode *inode) +{ + struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); + + if (f->i_acl_access && f->i_acl_access != JFFS2_ACL_NOT_CACHED) { + posix_acl_release(f->i_acl_access); + f->i_acl_access = JFFS2_ACL_NOT_CACHED; + } + if (f->i_acl_default && f->i_acl_default != JFFS2_ACL_NOT_CACHED) { + posix_acl_release(f->i_acl_default); + f->i_acl_default = JFFS2_ACL_NOT_CACHED; + } +} + +int jffs2_acl_chmod(struct inode *inode) +{ + struct posix_acl *acl, *clone; + int rc; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + clone = posix_acl_clone(acl, GFP_KERNEL); + posix_acl_release(acl); + if (!clone) + return -ENOMEM; + rc = posix_acl_chmod_masq(clone, inode->i_mode); + if (!rc) + rc = jffs2_set_acl(inode, ACL_TYPE_ACCESS, clone); + posix_acl_release(clone); + return rc; +} + +static size_t jffs2_acl_access_listxattr(struct inode *inode, char *list, size_t list_size, + const char *name, size_t name_len) +{ + const int retlen = sizeof(POSIX_ACL_XATTR_ACCESS); + + if (list && retlen <= list_size) + strcpy(list, POSIX_ACL_XATTR_ACCESS); + return retlen; +} + +static size_t jffs2_acl_default_listxattr(struct inode *inode, char *list, size_t list_size, + const char *name, size_t name_len) +{ + const int retlen = sizeof(POSIX_ACL_XATTR_DEFAULT); + + if (list && retlen <= list_size) + strcpy(list, POSIX_ACL_XATTR_DEFAULT); + return retlen; +} + +static int jffs2_acl_getxattr(struct inode *inode, int type, void *buffer, size_t size) +{ + struct posix_acl *acl; + int rc; + + acl = jffs2_get_acl(inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (!acl) + return -ENODATA; + rc = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + + return rc; +} + +static int jffs2_acl_access_getxattr(struct inode *inode, const char *name, void *buffer, size_t size) +{ + if (name[0] != '\0') + return -EINVAL; + return jffs2_acl_getxattr(inode, ACL_TYPE_ACCESS, buffer, size); +} + +static int jffs2_acl_default_getxattr(struct inode *inode, const char *name, void *buffer, size_t size) +{ + if (name[0] != '\0') + return -EINVAL; + return jffs2_acl_getxattr(inode, ACL_TYPE_DEFAULT, buffer, size); +} + +static int jffs2_acl_setxattr(struct inode *inode, int type, const void *value, size_t size) +{ + struct posix_acl *acl; + int rc; + + if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) + return -EPERM; + + if (value) { + acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + rc = posix_acl_valid(acl); + if (rc) + goto out; + } + } else { + acl = NULL; + } + rc = jffs2_set_acl(inode, type, acl); + out: + posix_acl_release(acl); + return rc; +} + +static int jffs2_acl_access_setxattr(struct inode *inode, const char *name, + const void *buffer, size_t size, int flags) +{ + if (name[0] != '\0') + return -EINVAL; + return jffs2_acl_setxattr(inode, ACL_TYPE_ACCESS, buffer, size); +} + +static int jffs2_acl_default_setxattr(struct inode *inode, const char *name, + const void *buffer, size_t size, int flags) +{ + if (name[0] != '\0') + return -EINVAL; + return jffs2_acl_setxattr(inode, ACL_TYPE_DEFAULT, buffer, size); +} + +struct xattr_handler jffs2_acl_access_xattr_handler = { + .prefix = POSIX_ACL_XATTR_ACCESS, + .list = jffs2_acl_access_listxattr, + .get = jffs2_acl_access_getxattr, + .set = jffs2_acl_access_setxattr, +}; + +struct xattr_handler jffs2_acl_default_xattr_handler = { + .prefix = POSIX_ACL_XATTR_DEFAULT, + .list = jffs2_acl_default_listxattr, + .get = jffs2_acl_default_getxattr, + .set = jffs2_acl_default_setxattr, +}; diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h new file mode 100644 index 000000000000..8893bd1a6ba7 --- /dev/null +++ b/fs/jffs2/acl.h @@ -0,0 +1,45 @@ +/* + * JFFS2 -- Journalling Flash File System, Version 2. + * + * Copyright (C) 2006 NEC Corporation + * + * Created by KaiGai Kohei <kaigai@ak.jp.nec.com> + * + * For licensing information, see the file 'LICENCE' in this directory. + * + */ +struct jffs2_acl_entry { + jint16_t e_tag; + jint16_t e_perm; + jint32_t e_id; +}; + +struct jffs2_acl_entry_short { + jint16_t e_tag; + jint16_t e_perm; +}; + +struct jffs2_acl_header { + jint32_t a_version; +}; + +#ifdef CONFIG_JFFS2_FS_POSIX_ACL + +#define JFFS2_ACL_NOT_CACHED ((void *)-1) + +extern int jffs2_permission(struct inode *, int, struct nameidata *); +extern int jffs2_acl_chmod(struct inode *); +extern int jffs2_init_acl(struct inode *, struct inode *); +extern void jffs2_clear_acl(struct inode *); + +extern struct xattr_handler jffs2_acl_access_xattr_handler; +extern struct xattr_handler jffs2_acl_default_xattr_handler; + +#else + +#define jffs2_permission NULL +#define jffs2_acl_chmod(inode) (0) +#define jffs2_init_acl(inode,dir) (0) +#define jffs2_clear_acl(inode) + +#endif /* CONFIG_JFFS2_FS_POSIX_ACL */ diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c index 70f7a896c04a..02826967ab58 100644 --- a/fs/jffs2/build.c +++ b/fs/jffs2/build.c @@ -160,6 +160,7 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) ic->scan_dents = NULL; cond_resched(); } + jffs2_build_xattr_subsystem(c); c->flags &= ~JFFS2_SB_FLAG_BUILDING; dbg_fsbuild("FS build complete\n"); @@ -178,6 +179,7 @@ exit: jffs2_free_full_dirent(fd); } } + jffs2_clear_xattr_subsystem(c); } return ret; diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c index e7944e665b9f..7001ba26c067 100644 --- a/fs/jffs2/compr.c +++ b/fs/jffs2/compr.c @@ -412,7 +412,7 @@ void jffs2_free_comprbuf(unsigned char *comprbuf, unsigned char *orig) kfree(comprbuf); } -int jffs2_compressors_init(void) +int __init jffs2_compressors_init(void) { /* Registering compressors */ #ifdef CONFIG_JFFS2_ZLIB diff --git a/fs/jffs2/compr.h b/fs/jffs2/compr.h index a77e830d85c5..509b8b1c0811 100644 --- a/fs/jffs2/compr.h +++ b/fs/jffs2/compr.h @@ -23,8 +23,8 @@ #include <linux/errno.h> #include <linux/fs.h> #include <linux/jffs2.h> -#include <linux/jffs2_fs_i.h> -#include <linux/jffs2_fs_sb.h> +#include "jffs2_fs_i.h" +#include "jffs2_fs_sb.h" #include "nodelist.h" #define JFFS2_RUBINMIPS_PRIORITY 10 diff --git a/fs/jffs2/debug.c b/fs/jffs2/debug.c index 1fe17de713e8..72b4fc13a106 100644 --- a/fs/jffs2/debug.c +++ b/fs/jffs2/debug.c @@ -192,13 +192,13 @@ __jffs2_dbg_acct_paranoia_check_nolock(struct jffs2_sb_info *c, else my_dirty_size += totlen; - if ((!ref2->next_phys) != (ref2 == jeb->last_node)) { - JFFS2_ERROR("node_ref for node at %#08x (mem %p) has next_phys at %#08x (mem %p), last_node is at %#08x (mem %p).\n", - ref_offset(ref2), ref2, ref_offset(ref2->next_phys), ref2->next_phys, - ref_offset(jeb->last_node), jeb->last_node); + if ((!ref_next(ref2)) != (ref2 == jeb->last_node)) { + JFFS2_ERROR("node_ref for node at %#08x (mem %p) has next at %#08x (mem %p), last_node is at %#08x (mem %p).\n", + ref_offset(ref2), ref2, ref_offset(ref_next(ref2)), ref_next(ref2), + ref_offset(jeb->last_node), jeb->last_node); goto error; } - ref2 = ref2->next_phys; + ref2 = ref_next(ref2); } if (my_used_size != jeb->used_size) { @@ -268,9 +268,9 @@ __jffs2_dbg_dump_node_refs_nolock(struct jffs2_sb_info *c, } printk(JFFS2_DBG); - for (ref = jeb->first_node; ; ref = ref->next_phys) { + for (ref = jeb->first_node; ; ref = ref_next(ref)) { printk("%#08x(%#x)", ref_offset(ref), ref->__totlen); - if (ref->next_phys) + if (ref_next(ref)) printk("->"); else break; diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h index 162af6dfe292..5fa494a792b2 100644 --- a/fs/jffs2/debug.h +++ b/fs/jffs2/debug.h @@ -171,6 +171,12 @@ #define dbg_memalloc(fmt, ...) #endif +/* Watch the XATTR subsystem */ +#ifdef JFFS2_DBG_XATTR_MESSAGES +#define dbg_xattr(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) +#else +#define dbg_xattr(fmt, ...) +#endif /* "Sanity" checks */ void diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 8bc7a5018e40..edd8371fc6a5 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -17,8 +17,8 @@ #include <linux/fs.h> #include <linux/crc32.h> #include <linux/jffs2.h> -#include <linux/jffs2_fs_i.h> -#include <linux/jffs2_fs_sb.h> +#include "jffs2_fs_i.h" +#include "jffs2_fs_sb.h" #include <linux/time.h> #include "nodelist.h" @@ -57,7 +57,12 @@ struct inode_operations jffs2_dir_inode_operations = .rmdir = jffs2_rmdir, .mknod = jffs2_mknod, .rename = jffs2_rename, + .permission = jffs2_permission, .setattr = jffs2_setattr, + .setxattr = jffs2_setxattr, + .getxattr = jffs2_getxattr, + .listxattr = jffs2_listxattr, + .removexattr = jffs2_removexattr }; /***********************************************************************/ @@ -78,6 +83,9 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target, D1(printk(KERN_DEBUG "jffs2_lookup()\n")); + if (target->d_name.len > JFFS2_MAX_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + dir_f = JFFS2_INODE_INFO(dir_i); c = JFFS2_SB_INFO(dir_i->i_sb); @@ -206,12 +214,15 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode, ret = jffs2_do_create(c, dir_f, f, ri, dentry->d_name.name, dentry->d_name.len); - if (ret) { - make_bad_inode(inode); - iput(inode); - jffs2_free_raw_inode(ri); - return ret; - } + if (ret) + goto fail; + + ret = jffs2_init_security(inode, dir_i); + if (ret) + goto fail; + ret = jffs2_init_acl(inode, dir_i); + if (ret) + goto fail; dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(ri->ctime)); @@ -221,6 +232,12 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode, D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n", inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->nlink, inode->i_mapping->nrpages)); return 0; + + fail: + make_bad_inode(inode); + iput(inode); + jffs2_free_raw_inode(ri); + return ret; } /***********************************************************************/ @@ -291,7 +308,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char struct jffs2_full_dnode *fn; struct jffs2_full_dirent *fd; int namelen; - uint32_t alloclen, phys_ofs; + uint32_t alloclen; int ret, targetlen = strlen(target); /* FIXME: If you care. We'd need to use frags for the target @@ -310,8 +327,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char * Just the node will do for now, though */ namelen = dentry->d_name.len; - ret = jffs2_reserve_space(c, sizeof(*ri) + targetlen, &phys_ofs, &alloclen, - ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); + ret = jffs2_reserve_space(c, sizeof(*ri) + targetlen, &alloclen, + ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); if (ret) { jffs2_free_raw_inode(ri); @@ -339,7 +356,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char ri->data_crc = cpu_to_je32(crc32(0, target, targetlen)); ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); - fn = jffs2_write_dnode(c, f, ri, target, targetlen, phys_ofs, ALLOC_NORMAL); + fn = jffs2_write_dnode(c, f, ri, target, targetlen, ALLOC_NORMAL); jffs2_free_raw_inode(ri); @@ -371,8 +388,20 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char up(&f->sem); jffs2_complete_reservation(c); - ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &phys_ofs, &alloclen, - ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); + + ret = jffs2_init_security(inode, dir_i); + if (ret) { + jffs2_clear_inode(inode); + return ret; + } + ret = jffs2_init_acl(inode, dir_i); + if (ret) { + jffs2_clear_inode(inode); + return ret; + } + + ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen, + ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); if (ret) { /* Eep. */ jffs2_clear_inode(inode); @@ -404,7 +433,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8)); rd->name_crc = cpu_to_je32(crc32(0, dentry->d_name.name, namelen)); - fd = jffs2_write_dirent(c, dir_f, rd, dentry->d_name.name, namelen, phys_ofs, ALLOC_NORMAL); + fd = jffs2_write_dirent(c, dir_f, rd, dentry->d_name.name, namelen, ALLOC_NORMAL); if (IS_ERR(fd)) { /* dirent failed to write. Delete the inode normally @@ -442,7 +471,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode) struct jffs2_full_dnode *fn; struct jffs2_full_dirent *fd; int namelen; - uint32_t alloclen, phys_ofs; + uint32_t alloclen; int ret; mode |= S_IFDIR; @@ -457,8 +486,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode) * Just the node will do for now, though */ namelen = dentry->d_name.len; - ret = jffs2_reserve_space(c, sizeof(*ri), &phys_ofs, &alloclen, ALLOC_NORMAL, - JFFS2_SUMMARY_INODE_SIZE); + ret = jffs2_reserve_space(c, sizeof(*ri), &alloclen, ALLOC_NORMAL, + JFFS2_SUMMARY_INODE_SIZE); if (ret) { jffs2_free_raw_inode(ri); @@ -483,7 +512,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode) ri->data_crc = cpu_to_je32(0); ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); - fn = jffs2_write_dnode(c, f, ri, NULL, 0, phys_ofs, ALLOC_NORMAL); + fn = jffs2_write_dnode(c, f, ri, NULL, 0, ALLOC_NORMAL); jffs2_free_raw_inode(ri); @@ -501,8 +530,20 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode) up(&f->sem); jffs2_complete_reservation(c); - ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &phys_ofs, &alloclen, - ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); + + ret = jffs2_init_security(inode, dir_i); + if (ret) { + jffs2_clear_inode(inode); + return ret; + } + ret = jffs2_init_acl(inode, dir_i); + if (ret) { + jffs2_clear_inode(inode); + return ret; + } + + ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen, + ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); if (ret) { /* Eep. */ jffs2_clear_inode(inode); @@ -534,7 +575,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode) rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8)); rd->name_crc = cpu_to_je32(crc32(0, dentry->d_name.name, namelen)); - fd = jffs2_write_dirent(c, dir_f, rd, dentry->d_name.name, namelen, phys_ofs, ALLOC_NORMAL); + fd = jffs2_write_dirent(c, dir_f, rd, dentry->d_name.name, namelen, ALLOC_NORMAL); if (IS_ERR(fd)) { /* dirent failed to write. Delete the inode normally @@ -588,12 +629,12 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de struct jffs2_full_dnode *fn; struct jffs2_full_dirent *fd; int namelen; - jint16_t dev; + union jffs2_device_node dev; int devlen = 0; - uint32_t alloclen, phys_ofs; + uint32_t alloclen; int ret; - if (!old_valid_dev(rdev)) + if (!new_valid_dev(rdev)) return -EINVAL; ri = jffs2_alloc_raw_inode(); @@ -602,17 +643,15 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de c = JFFS2_SB_INFO(dir_i->i_sb); - if (S_ISBLK(mode) || S_ISCHR(mode)) { - dev = cpu_to_je16(old_encode_dev(rdev)); - devlen = sizeof(dev); - } + if (S_ISBLK(mode) || S_ISCHR(mode)) + devlen = jffs2_encode_dev(&dev, rdev); /* Try to reserve enough space for both node and dirent. * Just the node will do for now, though */ namelen = dentry->d_name.len; - ret = jffs2_reserve_space(c, sizeof(*ri) + devlen, &phys_ofs, &alloclen, - ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); + ret = jffs2_reserve_space(c, sizeof(*ri) + devlen, &alloclen, + ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); if (ret) { jffs2_free_raw_inode(ri); @@ -639,7 +678,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de ri->data_crc = cpu_to_je32(crc32(0, &dev, devlen)); ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); - fn = jffs2_write_dnode(c, f, ri, (char *)&dev, devlen, phys_ofs, ALLOC_NORMAL); + fn = jffs2_write_dnode(c, f, ri, (char *)&dev, devlen, ALLOC_NORMAL); jffs2_free_raw_inode(ri); @@ -657,8 +696,20 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de up(&f->sem); jffs2_complete_reservation(c); - ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &phys_ofs, &alloclen, - ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); + + ret = jffs2_init_security(inode, dir_i); + if (ret) { + jffs2_clear_inode(inode); + return ret; + } + ret = jffs2_init_acl(inode, dir_i); + if (ret) { + jffs2_clear_inode(inode); + return ret; + } + + ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen, + ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); if (ret) { /* Eep. */ jffs2_clear_inode(inode); @@ -693,7 +744,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8)); rd->name_crc = cpu_to_je32(crc32(0, dentry->d_name.name, namelen)); - fd = jffs2_write_dirent(c, dir_f, rd, dentry->d_name.name, namelen, phys_ofs, ALLOC_NORMAL); + fd = jffs2_write_dirent(c, dir_f, rd, dentry->d_name.name, namelen, ALLOC_NORMAL); if (IS_ERR(fd)) { /* dirent failed to write. Delete the inode normally diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index dad68fdffe9e..1862e8bc101d 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -30,7 +30,6 @@ static void jffs2_erase_callback(struct erase_info *); #endif static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t bad_offset); static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb); -static void jffs2_free_all_node_refs(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb); static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb); static void jffs2_erase_block(struct jffs2_sb_info *c, @@ -136,7 +135,7 @@ void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count) c->used_size -= jeb->used_size; c->dirty_size -= jeb->dirty_size; jeb->wasted_size = jeb->used_size = jeb->dirty_size = jeb->free_size = 0; - jffs2_free_all_node_refs(c, jeb); + jffs2_free_jeb_node_refs(c, jeb); list_add(&jeb->list, &c->erasing_list); spin_unlock(&c->erase_completion_lock); @@ -231,6 +230,7 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c, at the end of the linked list. Stash it and continue from the beginning of the list */ ic = (struct jffs2_inode_cache *)(*prev); + BUG_ON(ic->class != RAWNODE_CLASS_INODE_CACHE); prev = &ic->nodes; continue; } @@ -283,22 +283,27 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c, jffs2_del_ino_cache(c, ic); } -static void jffs2_free_all_node_refs(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) +void jffs2_free_jeb_node_refs(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) { - struct jffs2_raw_node_ref *ref; + struct jffs2_raw_node_ref *block, *ref; D1(printk(KERN_DEBUG "Freeing all node refs for eraseblock offset 0x%08x\n", jeb->offset)); - while(jeb->first_node) { - ref = jeb->first_node; - jeb->first_node = ref->next_phys; - /* Remove from the inode-list */ - if (ref->next_in_ino) + block = ref = jeb->first_node; + + while (ref) { + if (ref->flash_offset == REF_LINK_NODE) { + ref = ref->next_in_ino; + jffs2_free_refblock(block); + block = ref; + continue; + } + if (ref->flash_offset != REF_EMPTY_NODE && ref->next_in_ino) jffs2_remove_node_refs_from_ino_list(c, ref, jeb); /* else it was a non-inode node or already removed, so don't bother */ - jffs2_free_raw_node_ref(ref); + ref++; } - jeb->last_node = NULL; + jeb->first_node = jeb->last_node = NULL; } static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t *bad_offset) @@ -351,7 +356,6 @@ fail: static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) { - struct jffs2_raw_node_ref *marker_ref = NULL; size_t retlen; int ret; uint32_t bad_offset; @@ -373,12 +377,8 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb goto filebad; } - jeb->first_node = jeb->last_node = NULL; + /* Everything else got zeroed before the erase */ jeb->free_size = c->sector_size; - jeb->used_size = 0; - jeb->dirty_size = 0; - jeb->wasted_size = 0; - } else { struct kvec vecs[1]; @@ -388,11 +388,7 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb .totlen = cpu_to_je32(c->cleanmarker_size) }; - marker_ref = jffs2_alloc_raw_node_ref(); - if (!marker_ref) { - printk(KERN_WARNING "Failed to allocate raw node ref for clean marker. Refiling\n"); - goto refile; - } + jffs2_prealloc_raw_node_refs(c, jeb, 1); marker.hdr_crc = cpu_to_je32(crc32(0, &marker, sizeof(struct jffs2_unknown_node)-4)); @@ -408,21 +404,13 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb printk(KERN_WARNING "Short write to newly-erased block at 0x%08x: Wanted %zd, got %zd\n", jeb->offset, sizeof(marker), retlen); - jffs2_free_raw_node_ref(marker_ref); goto filebad; } - marker_ref->next_in_ino = NULL; - marker_ref->next_phys = NULL; - marker_ref->flash_offset = jeb->offset | REF_NORMAL; - marker_ref->__totlen = c->cleanmarker_size; - - jeb->first_node = jeb->last_node = marker_ref; - - jeb->free_size = c->sector_size - c->cleanmarker_size; - jeb->used_size = c->cleanmarker_size; - jeb->dirty_size = 0; - jeb->wasted_size = 0; + /* Everything else got zeroed before the erase */ + jeb->free_size = c->sector_size; + /* FIXME Special case for cleanmarker in empty block */ + jffs2_link_node_ref(c, jeb, jeb->offset | REF_NORMAL, c->cleanmarker_size, NULL); } spin_lock(&c->erase_completion_lock); diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 9f4171213e58..bb8844f40e48 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -54,7 +54,12 @@ const struct file_operations jffs2_file_operations = struct inode_operations jffs2_file_inode_operations = { - .setattr = jffs2_setattr + .permission = jffs2_permission, + .setattr = jffs2_setattr, + .setxattr = jffs2_setxattr, + .getxattr = jffs2_getxattr, + .listxattr = jffs2_listxattr, + .removexattr = jffs2_removexattr }; struct address_space_operations jffs2_file_address_operations = @@ -129,13 +134,13 @@ static int jffs2_prepare_write (struct file *filp, struct page *pg, struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); struct jffs2_raw_inode ri; struct jffs2_full_dnode *fn; - uint32_t phys_ofs, alloc_len; + uint32_t alloc_len; D1(printk(KERN_DEBUG "Writing new hole frag 0x%x-0x%x between current EOF and new page\n", (unsigned int)inode->i_size, pageofs)); - ret = jffs2_reserve_space(c, sizeof(ri), &phys_ofs, &alloc_len, - ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); + ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, + ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); if (ret) return ret; @@ -161,7 +166,7 @@ static int jffs2_prepare_write (struct file *filp, struct page *pg, ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8)); ri.data_crc = cpu_to_je32(0); - fn = jffs2_write_dnode(c, f, &ri, NULL, 0, phys_ofs, ALLOC_NORMAL); + fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_NORMAL); if (IS_ERR(fn)) { ret = PTR_ERR(fn); @@ -215,12 +220,20 @@ static int jffs2_commit_write (struct file *filp, struct page *pg, D1(printk(KERN_DEBUG "jffs2_commit_write(): ino #%lu, page at 0x%lx, range %d-%d, flags %lx\n", inode->i_ino, pg->index << PAGE_CACHE_SHIFT, start, end, pg->flags)); - if (!start && end == PAGE_CACHE_SIZE) { - /* We need to avoid deadlock with page_cache_read() in - jffs2_garbage_collect_pass(). So we have to mark the - page up to date, to prevent page_cache_read() from - trying to re-lock it. */ - SetPageUptodate(pg); + if (end == PAGE_CACHE_SIZE) { + if (!start) { + /* We need to avoid deadlock with page_cache_read() in + jffs2_garbage_collect_pass(). So we have to mark the + page up to date, to prevent page_cache_read() from + trying to re-lock it. */ + SetPageUptodate(pg); + } else { + /* When writing out the end of a page, write out the + _whole_ page. This helps to reduce the number of + nodes in files which have many short writes, like + syslog files. */ + start = aligned_start = 0; + } } ri = jffs2_alloc_raw_inode(); diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 09e5d10b8840..7b6c24b14f85 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -33,11 +33,11 @@ static int jffs2_do_setattr (struct inode *inode, struct iattr *iattr) struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); struct jffs2_raw_inode *ri; - unsigned short dev; + union jffs2_device_node dev; unsigned char *mdata = NULL; int mdatalen = 0; unsigned int ivalid; - uint32_t phys_ofs, alloclen; + uint32_t alloclen; int ret; D1(printk(KERN_DEBUG "jffs2_setattr(): ino #%lu\n", inode->i_ino)); ret = inode_change_ok(inode, iattr); @@ -51,20 +51,24 @@ static int jffs2_do_setattr (struct inode *inode, struct iattr *iattr) it out again with the appropriate data attached */ if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { /* For these, we don't actually need to read the old node */ - dev = old_encode_dev(inode->i_rdev); + mdatalen = jffs2_encode_dev(&dev, inode->i_rdev); mdata = (char *)&dev; - mdatalen = sizeof(dev); D1(printk(KERN_DEBUG "jffs2_setattr(): Writing %d bytes of kdev_t\n", mdatalen)); } else if (S_ISLNK(inode->i_mode)) { + down(&f->sem); mdatalen = f->metadata->size; mdata = kmalloc(f->metadata->size, GFP_USER); - if (!mdata) + if (!mdata) { + up(&f->sem); return -ENOMEM; + } ret = jffs2_read_dnode(c, f, f->metadata, mdata, 0, mdatalen); if (ret) { + up(&f->sem); kfree(mdata); return ret; } + up(&f->sem); D1(printk(KERN_DEBUG "jffs2_setattr(): Writing %d bytes of symlink target\n", mdatalen)); } @@ -75,8 +79,8 @@ static int jffs2_do_setattr (struct inode *inode, struct iattr *iattr) return -ENOMEM; } - ret = jffs2_reserve_space(c, sizeof(*ri) + mdatalen, &phys_ofs, &alloclen, - ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); + ret = jffs2_reserve_space(c, sizeof(*ri) + mdatalen, &alloclen, + ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); if (ret) { jffs2_free_raw_inode(ri); if (S_ISLNK(inode->i_mode & S_IFMT)) @@ -127,7 +131,7 @@ static int jffs2_do_setattr (struct inode *inode, struct iattr *iattr) else ri->data_crc = cpu_to_je32(0); - new_metadata = jffs2_write_dnode(c, f, ri, mdata, mdatalen, phys_ofs, ALLOC_NORMAL); + new_metadata = jffs2_write_dnode(c, f, ri, mdata, mdatalen, ALLOC_NORMAL); if (S_ISLNK(inode->i_mode)) kfree(mdata); @@ -180,7 +184,12 @@ static int jffs2_do_setattr (struct inode *inode, struct iattr *iattr) int jffs2_setattr(struct dentry *dentry, struct iattr *iattr) { - return jffs2_do_setattr(dentry->d_inode, iattr); + int rc; + + rc = jffs2_do_setattr(dentry->d_inode, iattr); + if (!rc && (iattr->ia_valid & ATTR_MODE)) + rc = jffs2_acl_chmod(dentry->d_inode); + return rc; } int jffs2_statfs(struct super_block *sb, struct kstatfs *buf) @@ -219,6 +228,7 @@ void jffs2_clear_inode (struct inode *inode) D1(printk(KERN_DEBUG "jffs2_clear_inode(): ino #%lu mode %o\n", inode->i_ino, inode->i_mode)); + jffs2_xattr_delete_inode(c, f->inocache); jffs2_do_clear_inode(c, f); } @@ -227,6 +237,8 @@ void jffs2_read_inode (struct inode *inode) struct jffs2_inode_info *f; struct jffs2_sb_info *c; struct jffs2_raw_inode latest_node; + union jffs2_device_node jdev; + dev_t rdev = 0; int ret; D1(printk(KERN_DEBUG "jffs2_read_inode(): inode->i_ino == %lu\n", inode->i_ino)); @@ -258,7 +270,6 @@ void jffs2_read_inode (struct inode *inode) inode->i_blocks = (inode->i_size + 511) >> 9; switch (inode->i_mode & S_IFMT) { - jint16_t rdev; case S_IFLNK: inode->i_op = &jffs2_symlink_inode_operations; @@ -292,8 +303,16 @@ void jffs2_read_inode (struct inode *inode) case S_IFBLK: case S_IFCHR: /* Read the device numbers from the media */ + if (f->metadata->size != sizeof(jdev.old) && + f->metadata->size != sizeof(jdev.new)) { + printk(KERN_NOTICE "Device node has strange size %d\n", f->metadata->size); + up(&f->sem); + jffs2_do_clear_inode(c, f); + make_bad_inode(inode); + return; + } D1(printk(KERN_DEBUG "Reading device numbers from flash\n")); - if (jffs2_read_dnode(c, f, f->metadata, (char *)&rdev, 0, sizeof(rdev)) < 0) { + if (jffs2_read_dnode(c, f, f->metadata, (char *)&jdev, 0, f->metadata->size) < 0) { /* Eep */ printk(KERN_NOTICE "Read device numbers for inode %lu failed\n", (unsigned long)inode->i_ino); up(&f->sem); @@ -301,12 +320,15 @@ void jffs2_read_inode (struct inode *inode) make_bad_inode(inode); return; } + if (f->metadata->size == sizeof(jdev.old)) + rdev = old_decode_dev(je16_to_cpu(jdev.old)); + else + rdev = new_decode_dev(je32_to_cpu(jdev.new)); case S_IFSOCK: case S_IFIFO: inode->i_op = &jffs2_file_inode_operations; - init_special_inode(inode, inode->i_mode, - old_decode_dev((je16_to_cpu(rdev)))); + init_special_inode(inode, inode->i_mode, rdev); break; default: @@ -492,6 +514,8 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent) } memset(c->inocache_list, 0, INOCACHE_HASHSIZE * sizeof(struct jffs2_inode_cache *)); + jffs2_init_xattr_subsystem(c); + if ((ret = jffs2_do_mount_fs(c))) goto out_inohash; @@ -526,6 +550,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent) else kfree(c->blocks); out_inohash: + jffs2_clear_xattr_subsystem(c); kfree(c->inocache_list); out_wbuf: jffs2_flash_cleanup(c); @@ -639,13 +664,6 @@ static int jffs2_flash_setup(struct jffs2_sb_info *c) { return ret; } - /* add setups for other bizarre flashes here... */ - if (jffs2_nor_ecc(c)) { - ret = jffs2_nor_ecc_flash_setup(c); - if (ret) - return ret; - } - /* and Dataflash */ if (jffs2_dataflash(c)) { ret = jffs2_dataflash_setup(c); @@ -669,11 +687,6 @@ void jffs2_flash_cleanup(struct jffs2_sb_info *c) { jffs2_nand_flash_cleanup(c); } - /* add cleanups for other bizarre flashes here... */ - if (jffs2_nor_ecc(c)) { - jffs2_nor_ecc_flash_cleanup(c); - } - /* and DataFlash */ if (jffs2_dataflash(c)) { jffs2_dataflash_cleanup(c); diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c index f9ffece453a3..477c526d638b 100644 --- a/fs/jffs2/gc.c +++ b/fs/jffs2/gc.c @@ -125,6 +125,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c) struct jffs2_eraseblock *jeb; struct jffs2_raw_node_ref *raw; int ret = 0, inum, nlink; + int xattr = 0; if (down_interruptible(&c->alloc_sem)) return -EINTR; @@ -138,7 +139,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c) the node CRCs etc. Do it now. */ /* checked_ino is protected by the alloc_sem */ - if (c->checked_ino > c->highest_ino) { + if (c->checked_ino > c->highest_ino && xattr) { printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n", c->unchecked_size); jffs2_dbg_dump_block_lists_nolock(c); @@ -148,6 +149,9 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c) spin_unlock(&c->erase_completion_lock); + if (!xattr) + xattr = jffs2_verify_xattr(c); + spin_lock(&c->inocache_lock); ic = jffs2_get_ino_cache(c, c->checked_ino++); @@ -181,6 +185,10 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c) and trigger the BUG() above while we haven't yet finished checking all its nodes */ D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino)); + /* We need to come back again for the _same_ inode. We've + made no progress in this case, but that should be OK */ + c->checked_ino--; + up(&c->alloc_sem); sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock); return 0; @@ -231,7 +239,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c) while(ref_obsolete(raw)) { D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw))); - raw = raw->next_phys; + raw = ref_next(raw); if (unlikely(!raw)) { printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n"); printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n", @@ -248,16 +256,37 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c) if (!raw->next_in_ino) { /* Inode-less node. Clean marker, snapshot or something like that */ - /* FIXME: If it's something that needs to be copied, including something - we don't grok that has JFFS2_NODETYPE_RWCOMPAT_COPY, we should do so */ spin_unlock(&c->erase_completion_lock); - jffs2_mark_node_obsolete(c, raw); + if (ref_flags(raw) == REF_PRISTINE) { + /* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */ + jffs2_garbage_collect_pristine(c, NULL, raw); + } else { + /* Just mark it obsolete */ + jffs2_mark_node_obsolete(c, raw); + } up(&c->alloc_sem); goto eraseit_lock; } ic = jffs2_raw_ref_to_ic(raw); +#ifdef CONFIG_JFFS2_FS_XATTR + /* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr. + * We can decide whether this node is inode or xattr by ic->class. */ + if (ic->class == RAWNODE_CLASS_XATTR_DATUM + || ic->class == RAWNODE_CLASS_XATTR_REF) { + BUG_ON(raw->next_in_ino != (void *)ic); + spin_unlock(&c->erase_completion_lock); + + if (ic->class == RAWNODE_CLASS_XATTR_DATUM) { + ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic); + } else { + ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic); + } + goto release_sem; + } +#endif + /* We need to hold the inocache. Either the erase_completion_lock or the inocache_lock are sufficient; we trade down since the inocache_lock causes less contention. */ @@ -499,7 +528,6 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *raw) { union jffs2_node_union *node; - struct jffs2_raw_node_ref *nraw; size_t retlen; int ret; uint32_t phys_ofs, alloclen; @@ -508,15 +536,16 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c, D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw))); - rawlen = ref_totlen(c, c->gcblock, raw); + alloclen = rawlen = ref_totlen(c, c->gcblock, raw); /* Ask for a small amount of space (or the totlen if smaller) because we don't want to force wastage of the end of a block if splitting would work. */ - ret = jffs2_reserve_space_gc(c, min_t(uint32_t, sizeof(struct jffs2_raw_inode) + - JFFS2_MIN_DATA_LEN, rawlen), &phys_ofs, &alloclen, rawlen); - /* this is not the exact summary size of it, - it is only an upper estimation */ + if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN) + alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN; + + ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen); + /* 'rawlen' is not the exact summary size; it is only an upper estimation */ if (ret) return ret; @@ -580,22 +609,17 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c, } break; default: - printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n", - ref_offset(raw), je16_to_cpu(node->u.nodetype)); - goto bail; - } - - nraw = jffs2_alloc_raw_node_ref(); - if (!nraw) { - ret = -ENOMEM; - goto out_node; + /* If it's inode-less, we don't _know_ what it is. Just copy it intact */ + if (ic) { + printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n", + ref_offset(raw), je16_to_cpu(node->u.nodetype)); + goto bail; + } } /* OK, all the CRCs are good; this node can just be copied as-is. */ retry: - nraw->flash_offset = phys_ofs; - nraw->__totlen = rawlen; - nraw->next_phys = NULL; + phys_ofs = write_ofs(c); ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node); @@ -603,17 +627,11 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c, printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n", rawlen, phys_ofs, ret, retlen); if (retlen) { - /* Doesn't belong to any inode */ - nraw->next_in_ino = NULL; - - nraw->flash_offset |= REF_OBSOLETE; - jffs2_add_physical_node_ref(c, nraw); - jffs2_mark_node_obsolete(c, nraw); + jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL); } else { - printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", nraw->flash_offset); - jffs2_free_raw_node_ref(nraw); + printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", phys_ofs); } - if (!retried && (nraw = jffs2_alloc_raw_node_ref())) { + if (!retried) { /* Try to reallocate space and retry */ uint32_t dummy; struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size]; @@ -625,7 +643,7 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c, jffs2_dbg_acct_sanity_check(c,jeb); jffs2_dbg_acct_paranoia_check(c, jeb); - ret = jffs2_reserve_space_gc(c, rawlen, &phys_ofs, &dummy, rawlen); + ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen); /* this is not the exact summary size of it, it is only an upper estimation */ @@ -638,25 +656,13 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c, goto retry; } D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret)); - jffs2_free_raw_node_ref(nraw); } - jffs2_free_raw_node_ref(nraw); if (!ret) ret = -EIO; goto out_node; } - nraw->flash_offset |= REF_PRISTINE; - jffs2_add_physical_node_ref(c, nraw); - - /* Link into per-inode list. This is safe because of the ic - state being INO_STATE_GC. Note that if we're doing this - for an inode which is in-core, the 'nraw' pointer is then - going to be fetched from ic->nodes by our caller. */ - spin_lock(&c->erase_completion_lock); - nraw->next_in_ino = ic->nodes; - ic->nodes = nraw; - spin_unlock(&c->erase_completion_lock); + jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic); jffs2_mark_node_obsolete(c, raw); D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw))); @@ -675,19 +681,16 @@ static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_ struct jffs2_full_dnode *new_fn; struct jffs2_raw_inode ri; struct jffs2_node_frag *last_frag; - jint16_t dev; + union jffs2_device_node dev; char *mdata = NULL, mdatalen = 0; - uint32_t alloclen, phys_ofs, ilen; + uint32_t alloclen, ilen; int ret; if (S_ISBLK(JFFS2_F_I_MODE(f)) || S_ISCHR(JFFS2_F_I_MODE(f)) ) { /* For these, we don't actually need to read the old node */ - /* FIXME: for minor or major > 255. */ - dev = cpu_to_je16(((JFFS2_F_I_RDEV_MAJ(f) << 8) | - JFFS2_F_I_RDEV_MIN(f))); + mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f)); mdata = (char *)&dev; - mdatalen = sizeof(dev); D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen)); } else if (S_ISLNK(JFFS2_F_I_MODE(f))) { mdatalen = fn->size; @@ -706,7 +709,7 @@ static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_ } - ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &phys_ofs, &alloclen, + ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen, JFFS2_SUMMARY_INODE_SIZE); if (ret) { printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n", @@ -744,7 +747,7 @@ static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_ ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8)); ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen)); - new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, phys_ofs, ALLOC_GC); + new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC); if (IS_ERR(new_fn)) { printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn)); @@ -765,7 +768,7 @@ static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_er { struct jffs2_full_dirent *new_fd; struct jffs2_raw_dirent rd; - uint32_t alloclen, phys_ofs; + uint32_t alloclen; int ret; rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); @@ -787,14 +790,14 @@ static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_er rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8)); rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize)); - ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &phys_ofs, &alloclen, + ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen, JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize)); if (ret) { printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n", sizeof(rd)+rd.nsize, ret); return ret; } - new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, phys_ofs, ALLOC_GC); + new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC); if (IS_ERR(new_fd)) { printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd)); @@ -922,7 +925,7 @@ static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eras struct jffs2_raw_inode ri; struct jffs2_node_frag *frag; struct jffs2_full_dnode *new_fn; - uint32_t alloclen, phys_ofs, ilen; + uint32_t alloclen, ilen; int ret; D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n", @@ -1001,14 +1004,14 @@ static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eras ri.data_crc = cpu_to_je32(0); ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8)); - ret = jffs2_reserve_space_gc(c, sizeof(ri), &phys_ofs, &alloclen, - JFFS2_SUMMARY_INODE_SIZE); + ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen, + JFFS2_SUMMARY_INODE_SIZE); if (ret) { printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n", sizeof(ri), ret); return ret; } - new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, phys_ofs, ALLOC_GC); + new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC); if (IS_ERR(new_fn)) { printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn)); @@ -1070,7 +1073,7 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era { struct jffs2_full_dnode *new_fn; struct jffs2_raw_inode ri; - uint32_t alloclen, phys_ofs, offset, orig_end, orig_start; + uint32_t alloclen, offset, orig_end, orig_start; int ret = 0; unsigned char *comprbuf = NULL, *writebuf; unsigned long pg; @@ -1227,7 +1230,7 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era uint32_t cdatalen; uint16_t comprtype = JFFS2_COMPR_NONE; - ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN, &phys_ofs, + ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN, &alloclen, JFFS2_SUMMARY_INODE_SIZE); if (ret) { @@ -1264,7 +1267,7 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8)); ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen)); - new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, phys_ofs, ALLOC_GC); + new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC); jffs2_free_comprbuf(comprbuf, writebuf); diff --git a/fs/jffs2/histo.h b/fs/jffs2/histo.h deleted file mode 100644 index 22a93a08210c..000000000000 --- a/fs/jffs2/histo.h +++ /dev/null @@ -1,3 +0,0 @@ -/* This file provides the bit-probabilities for the input file */ -#define BIT_DIVIDER 629 -static int bits[9] = { 179,167,183,165,159,198,178,119,}; /* ia32 .so files */ diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h new file mode 100644 index 000000000000..2e0cc8e00b85 --- /dev/null +++ b/fs/jffs2/jffs2_fs_i.h @@ -0,0 +1,55 @@ +/* $Id: jffs2_fs_i.h,v 1.19 2005/11/07 11:14:52 gleixner Exp $ */ + +#ifndef _JFFS2_FS_I +#define _JFFS2_FS_I + +#include <linux/version.h> +#include <linux/rbtree.h> +#include <linux/posix_acl.h> +#include <asm/semaphore.h> + +struct jffs2_inode_info { + /* We need an internal mutex similar to inode->i_mutex. + Unfortunately, we can't used the existing one, because + either the GC would deadlock, or we'd have to release it + before letting GC proceed. Or we'd have to put ugliness + into the GC code so it didn't attempt to obtain the i_mutex + for the inode(s) which are already locked */ + struct semaphore sem; + + /* The highest (datanode) version number used for this ino */ + uint32_t highest_version; + + /* List of data fragments which make up the file */ + struct rb_root fragtree; + + /* There may be one datanode which isn't referenced by any of the + above fragments, if it contains a metadata update but no actual + data - or if this is a directory inode */ + /* This also holds the _only_ dnode for symlinks/device nodes, + etc. */ + struct jffs2_full_dnode *metadata; + + /* Directory entries */ + struct jffs2_full_dirent *dents; + + /* The target path if this is the inode of a symlink */ + unsigned char *target; + + /* Some stuff we just have to keep in-core at all times, for each inode. */ + struct jffs2_inode_cache *inocache; + + uint16_t flags; + uint8_t usercompr; +#if !defined (__ECOS) +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,2) + struct inode vfs_inode; +#endif +#endif +#ifdef CONFIG_JFFS2_FS_POSIX_ACL + struct posix_acl *i_acl_access; + struct posix_acl *i_acl_default; +#endif +}; + +#endif /* _JFFS2_FS_I */ diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h new file mode 100644 index 000000000000..935fec1b1201 --- /dev/null +++ b/fs/jffs2/jffs2_fs_sb.h @@ -0,0 +1,133 @@ +/* $Id: jffs2_fs_sb.h,v 1.54 2005/09/21 13:37:34 dedekind Exp $ */ + +#ifndef _JFFS2_FS_SB +#define _JFFS2_FS_SB + +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/workqueue.h> +#include <linux/completion.h> +#include <asm/semaphore.h> +#include <linux/timer.h> +#include <linux/wait.h> +#include <linux/list.h> +#include <linux/rwsem.h> + +#define JFFS2_SB_FLAG_RO 1 +#define JFFS2_SB_FLAG_SCANNING 2 /* Flash scanning is in progress */ +#define JFFS2_SB_FLAG_BUILDING 4 /* File system building is in progress */ + +struct jffs2_inodirty; + +/* A struct for the overall file system control. Pointers to + jffs2_sb_info structs are named `c' in the source code. + Nee jffs_control +*/ +struct jffs2_sb_info { + struct mtd_info *mtd; + + uint32_t highest_ino; + uint32_t checked_ino; + + unsigned int flags; + + struct task_struct *gc_task; /* GC task struct */ + struct completion gc_thread_start; /* GC thread start completion */ + struct completion gc_thread_exit; /* GC thread exit completion port */ + + struct semaphore alloc_sem; /* Used to protect all the following + fields, and also to protect against + out-of-order writing of nodes. And GC. */ + uint32_t cleanmarker_size; /* Size of an _inline_ CLEANMARKER + (i.e. zero for OOB CLEANMARKER */ + + uint32_t flash_size; + uint32_t used_size; + uint32_t dirty_size; + uint32_t wasted_size; + uint32_t free_size; + uint32_t erasing_size; + uint32_t bad_size; + uint32_t sector_size; + uint32_t unchecked_size; + + uint32_t nr_free_blocks; + uint32_t nr_erasing_blocks; + + /* Number of free blocks there must be before we... */ + uint8_t resv_blocks_write; /* ... allow a normal filesystem write */ + uint8_t resv_blocks_deletion; /* ... allow a normal filesystem deletion */ + uint8_t resv_blocks_gctrigger; /* ... wake up the GC thread */ + uint8_t resv_blocks_gcbad; /* ... pick a block from the bad_list to GC */ + uint8_t resv_blocks_gcmerge; /* ... merge pages when garbage collecting */ + + uint32_t nospc_dirty_size; + + uint32_t nr_blocks; + struct jffs2_eraseblock *blocks; /* The whole array of blocks. Used for getting blocks + * from the offset (blocks[ofs / sector_size]) */ + struct jffs2_eraseblock *nextblock; /* The block we're currently filling */ + + struct jffs2_eraseblock *gcblock; /* The block we're currently garbage-collecting */ + + struct list_head clean_list; /* Blocks 100% full of clean data */ + struct list_head very_dirty_list; /* Blocks with lots of dirty space */ + struct list_head dirty_list; /* Blocks with some dirty space */ + struct list_head erasable_list; /* Blocks which are completely dirty, and need erasing */ + struct list_head erasable_pending_wbuf_list; /* Blocks which need erasing but only after the current wbuf is flushed */ + struct list_head erasing_list; /* Blocks which are currently erasing */ + struct list_head erase_pending_list; /* Blocks which need erasing now */ + struct list_head erase_complete_list; /* Blocks which are erased and need the clean marker written to them */ + struct list_head free_list; /* Blocks which are free and ready to be used */ + struct list_head bad_list; /* Bad blocks. */ + struct list_head bad_used_list; /* Bad blocks with valid data in. */ + + spinlock_t erase_completion_lock; /* Protect free_list and erasing_list + against erase completion handler */ + wait_queue_head_t erase_wait; /* For waiting for erases to complete */ + + wait_queue_head_t inocache_wq; + struct jffs2_inode_cache **inocache_list; + spinlock_t inocache_lock; + + /* Sem to allow jffs2_garbage_collect_deletion_dirent to + drop the erase_completion_lock while it's holding a pointer + to an obsoleted node. I don't like this. Alternatives welcomed. */ + struct semaphore erase_free_sem; + + uint32_t wbuf_pagesize; /* 0 for NOR and other flashes with no wbuf */ + +#ifdef CONFIG_JFFS2_FS_WRITEBUFFER + /* Write-behind buffer for NAND flash */ + unsigned char *wbuf; + unsigned char *oobbuf; + uint32_t wbuf_ofs; + uint32_t wbuf_len; + struct jffs2_inodirty *wbuf_inodes; + + struct rw_semaphore wbuf_sem; /* Protects the write buffer */ + + /* Information about out-of-band area usage... */ + struct nand_ecclayout *ecclayout; + uint32_t badblock_pos; + uint32_t fsdata_pos; + uint32_t fsdata_len; +#endif + + struct jffs2_summary *summary; /* Summary information */ + +#ifdef CONFIG_JFFS2_FS_XATTR +#define XATTRINDEX_HASHSIZE (57) + uint32_t highest_xid; + struct list_head xattrindex[XATTRINDEX_HASHSIZE]; + struct list_head xattr_unchecked; + struct jffs2_xattr_ref *xref_temp; + struct rw_semaphore xattr_sem; + uint32_t xdatum_mem_usage; + uint32_t xdatum_mem_threshold; +#endif + /* OS-private pointer for getting back to master superblock info */ + void *os_priv; +}; + +#endif /* _JFFS2_FB_SB */ diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c index 036cbd11c004..4889d0700c0e 100644 --- a/fs/jffs2/malloc.c +++ b/fs/jffs2/malloc.c @@ -26,6 +26,10 @@ static kmem_cache_t *tmp_dnode_info_slab; static kmem_cache_t *raw_node_ref_slab; static kmem_cache_t *node_frag_slab; static kmem_cache_t *inode_cache_slab; +#ifdef CONFIG_JFFS2_FS_XATTR +static kmem_cache_t *xattr_datum_cache; +static kmem_cache_t *xattr_ref_cache; +#endif int __init jffs2_create_slab_caches(void) { @@ -53,8 +57,8 @@ int __init jffs2_create_slab_caches(void) if (!tmp_dnode_info_slab) goto err; - raw_node_ref_slab = kmem_cache_create("jffs2_raw_node_ref", - sizeof(struct jffs2_raw_node_ref), + raw_node_ref_slab = kmem_cache_create("jffs2_refblock", + sizeof(struct jffs2_raw_node_ref) * (REFS_PER_BLOCK + 1), 0, 0, NULL, NULL); if (!raw_node_ref_slab) goto err; @@ -68,8 +72,24 @@ int __init jffs2_create_slab_caches(void) inode_cache_slab = kmem_cache_create("jffs2_inode_cache", sizeof(struct jffs2_inode_cache), 0, 0, NULL, NULL); - if (inode_cache_slab) - return 0; + if (!inode_cache_slab) + goto err; + +#ifdef CONFIG_JFFS2_FS_XATTR + xattr_datum_cache = kmem_cache_create("jffs2_xattr_datum", + sizeof(struct jffs2_xattr_datum), + 0, 0, NULL, NULL); + if (!xattr_datum_cache) + goto err; + + xattr_ref_cache = kmem_cache_create("jffs2_xattr_ref", + sizeof(struct jffs2_xattr_ref), + 0, 0, NULL, NULL); + if (!xattr_ref_cache) + goto err; +#endif + + return 0; err: jffs2_destroy_slab_caches(); return -ENOMEM; @@ -91,6 +111,12 @@ void jffs2_destroy_slab_caches(void) kmem_cache_destroy(node_frag_slab); if(inode_cache_slab) kmem_cache_destroy(inode_cache_slab); +#ifdef CONFIG_JFFS2_FS_XATTR + if (xattr_datum_cache) + kmem_cache_destroy(xattr_datum_cache); + if (xattr_ref_cache) + kmem_cache_destroy(xattr_ref_cache); +#endif } struct jffs2_full_dirent *jffs2_alloc_full_dirent(int namesize) @@ -164,15 +190,65 @@ void jffs2_free_tmp_dnode_info(struct jffs2_tmp_dnode_info *x) kmem_cache_free(tmp_dnode_info_slab, x); } -struct jffs2_raw_node_ref *jffs2_alloc_raw_node_ref(void) +struct jffs2_raw_node_ref *jffs2_alloc_refblock(void) { struct jffs2_raw_node_ref *ret; + ret = kmem_cache_alloc(raw_node_ref_slab, GFP_KERNEL); - dbg_memalloc("%p\n", ret); + if (ret) { + int i = 0; + for (i=0; i < REFS_PER_BLOCK; i++) { + ret[i].flash_offset = REF_EMPTY_NODE; + ret[i].next_in_ino = NULL; + } + ret[i].flash_offset = REF_LINK_NODE; + ret[i].next_in_ino = NULL; + } return ret; } -void jffs2_free_raw_node_ref(struct jffs2_raw_node_ref *x) +int jffs2_prealloc_raw_node_refs(struct jffs2_sb_info *c, + struct jffs2_eraseblock *jeb, int nr) +{ + struct jffs2_raw_node_ref **p, *ref; + int i = nr; + + dbg_memalloc("%d\n", nr); + + p = &jeb->last_node; + ref = *p; + + dbg_memalloc("Reserving %d refs for block @0x%08x\n", nr, jeb->offset); + + /* If jeb->last_node is really a valid node then skip over it */ + if (ref && ref->flash_offset != REF_EMPTY_NODE) + ref++; + + while (i) { + if (!ref) { + dbg_memalloc("Allocating new refblock linked from %p\n", p); + ref = *p = jffs2_alloc_refblock(); + if (!ref) + return -ENOMEM; + } + if (ref->flash_offset == REF_LINK_NODE) { + p = &ref->next_in_ino; + ref = *p; + continue; + } + i--; + ref++; + } + jeb->allocated_refs = nr; + + dbg_memalloc("Reserved %d refs for block @0x%08x, last_node is %p (%08x,%p)\n", + nr, jeb->offset, jeb->last_node, jeb->last_node->flash_offset, + jeb->last_node->next_in_ino); + + return 0; +} + +void jffs2_free_refblock(struct jffs2_raw_node_ref *x) { dbg_memalloc("%p\n", x); kmem_cache_free(raw_node_ref_slab, x); @@ -205,3 +281,40 @@ void jffs2_free_inode_cache(struct jffs2_inode_cache *x) dbg_memalloc("%p\n", x); kmem_cache_free(inode_cache_slab, x); } + +#ifdef CONFIG_JFFS2_FS_XATTR +struct jffs2_xattr_datum *jffs2_alloc_xattr_datum(void) +{ + struct jffs2_xattr_datum *xd; + xd = kmem_cache_alloc(xattr_datum_cache, GFP_KERNEL); + dbg_memalloc("%p\n", xd); + + memset(xd, 0, sizeof(struct jffs2_xattr_datum)); + xd->class = RAWNODE_CLASS_XATTR_DATUM; + INIT_LIST_HEAD(&xd->xindex); + return xd; +} + +void jffs2_free_xattr_datum(struct jffs2_xattr_datum *xd) +{ + dbg_memalloc("%p\n", xd); + kmem_cache_free(xattr_datum_cache, xd); +} + +struct jffs2_xattr_ref *jffs2_alloc_xattr_ref(void) +{ + struct jffs2_xattr_ref *ref; + ref = kmem_cache_alloc(xattr_ref_cache, GFP_KERNEL); + dbg_memalloc("%p\n", ref); + + memset(ref, 0, sizeof(struct jffs2_xattr_ref)); + ref->class = RAWNODE_CLASS_XATTR_REF; + return ref; +} + +void jffs2_free_xattr_ref(struct jffs2_xattr_ref *ref) +{ + dbg_memalloc("%p\n", ref); + kmem_cache_free(xattr_ref_cache, ref); +} +#endif diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c index d4d0c41490cd..927dfe42ba76 100644 --- a/fs/jffs2/nodelist.c +++ b/fs/jffs2/nodelist.c @@ -438,7 +438,7 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info if (c->mtd->point) { err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer); if (!err && retlen < tn->csize) { - JFFS2_WARNING("MTD point returned len too short: %u instead of %u.\n", retlen, tn->csize); + JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize); c->mtd->unpoint(c->mtd, buffer, ofs, len); } else if (err) JFFS2_WARNING("MTD point failed: error code %d.\n", err); @@ -461,7 +461,7 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info } if (retlen != len) { - JFFS2_ERROR("short read at %#08x: %d instead of %d.\n", ofs, retlen, len); + JFFS2_ERROR("short read at %#08x: %zd instead of %d.\n", ofs, retlen, len); err = -EIO; goto free_out; } @@ -938,6 +938,7 @@ void jffs2_free_ino_caches(struct jffs2_sb_info *c) this = c->inocache_list[i]; while (this) { next = this->next; + jffs2_xattr_free_inode(c, this); jffs2_free_inode_cache(this); this = next; } @@ -952,9 +953,13 @@ void jffs2_free_raw_node_refs(struct jffs2_sb_info *c) for (i=0; i<c->nr_blocks; i++) { this = c->blocks[i].first_node; - while(this) { - next = this->next_phys; - jffs2_free_raw_node_ref(this); + while (this) { + if (this[REFS_PER_BLOCK].flash_offset == REF_LINK_NODE) + next = this[REFS_PER_BLOCK].next_in_ino; + else + next = NULL; + + jffs2_free_refblock(this); this = next; } c->blocks[i].first_node = c->blocks[i].last_node = NULL; @@ -1045,3 +1050,169 @@ void jffs2_kill_fragtree(struct rb_root *root, struct jffs2_sb_info *c) cond_resched(); } } + +struct jffs2_raw_node_ref *jffs2_link_node_ref(struct jffs2_sb_info *c, + struct jffs2_eraseblock *jeb, + uint32_t ofs, uint32_t len, + struct jffs2_inode_cache *ic) +{ + struct jffs2_raw_node_ref *ref; + + BUG_ON(!jeb->allocated_refs); + jeb->allocated_refs--; + + ref = jeb->last_node; + + dbg_noderef("Last node at %p is (%08x,%p)\n", ref, ref->flash_offset, + ref->next_in_ino); + + while (ref->flash_offset != REF_EMPTY_NODE) { + if (ref->flash_offset == REF_LINK_NODE) + ref = ref->next_in_ino; + else + ref++; + } + + dbg_noderef("New ref is %p (%08x becomes %08x,%p) len 0x%x\n", ref, + ref->flash_offset, ofs, ref->next_in_ino, len); + + ref->flash_offset = ofs; + + if (!jeb->first_node) { + jeb->first_node = ref; + BUG_ON(ref_offset(ref) != jeb->offset); + } else if (unlikely(ref_offset(ref) != jeb->offset + c->sector_size - jeb->free_size)) { + uint32_t last_len = ref_totlen(c, jeb, jeb->last_node); + + JFFS2_ERROR("Adding new ref %p at (0x%08x-0x%08x) not immediately after previous (0x%08x-0x%08x)\n", + ref, ref_offset(ref), ref_offset(ref)+len, + ref_offset(jeb->last_node), + ref_offset(jeb->last_node)+last_len); + BUG(); + } + jeb->last_node = ref; + + if (ic) { + ref->next_in_ino = ic->nodes; + ic->nodes = ref; + } else { + ref->next_in_ino = NULL; + } + + switch(ref_flags(ref)) { + case REF_UNCHECKED: + c->unchecked_size += len; + jeb->unchecked_size += len; + break; + + case REF_NORMAL: + case REF_PRISTINE: + c->used_size += len; + jeb->used_size += len; + break; + + case REF_OBSOLETE: + c->dirty_size += len; + jeb->dirty_size += len; + break; + } + c->free_size -= len; + jeb->free_size -= len; + +#ifdef TEST_TOTLEN + /* Set (and test) __totlen field... for now */ + ref->__totlen = len; + ref_totlen(c, jeb, ref); +#endif + return ref; +} + +/* No locking, no reservation of 'ref'. Do not use on a live file system */ +int jffs2_scan_dirty_space(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, + uint32_t size) +{ + if (!size) + return 0; + if (unlikely(size > jeb->free_size)) { + printk(KERN_CRIT "Dirty space 0x%x larger then free_size 0x%x (wasted 0x%x)\n", + size, jeb->free_size, jeb->wasted_size); + BUG(); + } + /* REF_EMPTY_NODE is !obsolete, so that works OK */ + if (jeb->last_node && ref_obsolete(jeb->last_node)) { +#ifdef TEST_TOTLEN + jeb->last_node->__totlen += size; +#endif + c->dirty_size += size; + c->free_size -= size; + jeb->dirty_size += size; + jeb->free_size -= size; + } else { + uint32_t ofs = jeb->offset + c->sector_size - jeb->free_size; + ofs |= REF_OBSOLETE; + + jffs2_link_node_ref(c, jeb, ofs, size, NULL); + } + + return 0; +} + +/* Calculate totlen from surrounding nodes or eraseblock */ +static inline uint32_t __ref_totlen(struct jffs2_sb_info *c, + struct jffs2_eraseblock *jeb, + struct jffs2_raw_node_ref *ref) +{ + uint32_t ref_end; + struct jffs2_raw_node_ref *next_ref = ref_next(ref); + + if (next_ref) + ref_end = ref_offset(next_ref); + else { + if (!jeb) + jeb = &c->blocks[ref->flash_offset / c->sector_size]; + + /* Last node in block. Use free_space */ + if (unlikely(ref != jeb->last_node)) { + printk(KERN_CRIT "ref %p @0x%08x is not jeb->last_node (%p @0x%08x)\n", + ref, ref_offset(ref), jeb->last_node, jeb->last_node?ref_offset(jeb->last_node):0); + BUG(); + } + ref_end = jeb->offset + c->sector_size - jeb->free_size; + } + return ref_end - ref_offset(ref); +} + +uint32_t __jffs2_ref_totlen(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, + struct jffs2_raw_node_ref *ref) +{ + uint32_t ret; + + ret = __ref_totlen(c, jeb, ref); + +#ifdef TEST_TOTLEN + if (unlikely(ret != ref->__totlen)) { + if (!jeb) + jeb = &c->blocks[ref->flash_offset / c->sector_size]; + + printk(KERN_CRIT "Totlen for ref at %p (0x%08x-0x%08x) miscalculated as 0x%x instead of %x\n", + ref, ref_offset(ref), ref_offset(ref)+ref->__totlen, + ret, ref->__totlen); + if (ref_next(ref)) { + printk(KERN_CRIT "next %p (0x%08x-0x%08x)\n", ref_next(ref), ref_offset(ref_next(ref)), + ref_offset(ref_next(ref))+ref->__totlen); + } else + printk(KERN_CRIT "No next ref. jeb->last_node is %p\n", jeb->last_node); + + printk(KERN_CRIT "jeb->wasted_size %x, dirty_size %x, used_size %x, free_size %x\n", jeb->wasted_size, jeb->dirty_size, jeb->used_size, jeb->free_size); + +#if defined(JFFS2_DBG_DUMPS) || defined(JFFS2_DBG_PARANOIA_CHECKS) + __jffs2_dbg_dump_node_refs_nolock(c, jeb); +#endif + + WARN_ON(1); + + ret = ref->__totlen; + } +#endif /* TEST_TOTLEN */ + return ret; +} diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index 131b67b6c350..b16c60bbcf6e 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h @@ -18,8 +18,10 @@ #include <linux/fs.h> #include <linux/types.h> #include <linux/jffs2.h> -#include <linux/jffs2_fs_sb.h> -#include <linux/jffs2_fs_i.h> +#include "jffs2_fs_sb.h" +#include "jffs2_fs_i.h" +#include "xattr.h" +#include "acl.h" #include "summary.h" #ifdef __ECOS @@ -75,14 +77,50 @@ struct jffs2_raw_node_ref { struct jffs2_raw_node_ref *next_in_ino; /* Points to the next raw_node_ref - for this inode. If this is the last, it points to the inode_cache - for this inode instead. The inode_cache will have NULL in the first - word so you know when you've got there :) */ - struct jffs2_raw_node_ref *next_phys; + for this object. If this _is_ the last, it points to the inode_cache, + xattr_ref or xattr_datum instead. The common part of those structures + has NULL in the first word. See jffs2_raw_ref_to_ic() below */ uint32_t flash_offset; +#define TEST_TOTLEN +#ifdef TEST_TOTLEN uint32_t __totlen; /* This may die; use ref_totlen(c, jeb, ) below */ +#endif }; +#define REF_LINK_NODE ((int32_t)-1) +#define REF_EMPTY_NODE ((int32_t)-2) + +/* Use blocks of about 256 bytes */ +#define REFS_PER_BLOCK ((255/sizeof(struct jffs2_raw_node_ref))-1) + +static inline struct jffs2_raw_node_ref *ref_next(struct jffs2_raw_node_ref *ref) +{ + ref++; + + /* Link to another block of refs */ + if (ref->flash_offset == REF_LINK_NODE) { + ref = ref->next_in_ino; + if (!ref) + return ref; + } + + /* End of chain */ + if (ref->flash_offset == REF_EMPTY_NODE) + return NULL; + + return ref; +} + +static inline struct jffs2_inode_cache *jffs2_raw_ref_to_ic(struct jffs2_raw_node_ref *raw) +{ + while(raw->next_in_ino) + raw = raw->next_in_ino; + + /* NB. This can be a jffs2_xattr_datum or jffs2_xattr_ref and + not actually a jffs2_inode_cache. Check ->class */ + return ((struct jffs2_inode_cache *)raw); +} + /* flash_offset & 3 always has to be zero, because nodes are always aligned at 4 bytes. So we have a couple of extra bits to play with, which indicate the node's status; see below: */ @@ -95,6 +133,11 @@ struct jffs2_raw_node_ref #define ref_obsolete(ref) (((ref)->flash_offset & 3) == REF_OBSOLETE) #define mark_ref_normal(ref) do { (ref)->flash_offset = ref_offset(ref) | REF_NORMAL; } while(0) +/* NB: REF_PRISTINE for an inode-less node (ref->next_in_ino == NULL) indicates + it is an unknown node of type JFFS2_NODETYPE_RWCOMPAT_COPY, so it'll get + copied. If you need to do anything different to GC inode-less nodes, then + you need to modify gc.c accordingly. */ + /* For each inode in the filesystem, we need to keep a record of nlink, because it would be a PITA to scan the whole directory tree at read_inode() time to calculate it, and to keep sufficient information @@ -103,15 +146,27 @@ struct jffs2_raw_node_ref a pointer to the first physical node which is part of this inode, too. */ struct jffs2_inode_cache { + /* First part of structure is shared with other objects which + can terminate the raw node refs' next_in_ino list -- which + currently struct jffs2_xattr_datum and struct jffs2_xattr_ref. */ + struct jffs2_full_dirent *scan_dents; /* Used during scan to hold temporary lists of dirents, and later must be set to NULL to mark the end of the raw_node_ref->next_in_ino chain. */ - struct jffs2_inode_cache *next; struct jffs2_raw_node_ref *nodes; + uint8_t class; /* It's used for identification */ + + /* end of shared structure */ + + uint8_t flags; + uint16_t state; uint32_t ino; + struct jffs2_inode_cache *next; +#ifdef CONFIG_JFFS2_FS_XATTR + struct jffs2_xattr_ref *xref; +#endif int nlink; - int state; }; /* Inode states for 'state' above. We need the 'GC' state to prevent @@ -125,8 +180,16 @@ struct jffs2_inode_cache { #define INO_STATE_READING 5 /* In read_inode() */ #define INO_STATE_CLEARING 6 /* In clear_inode() */ +#define INO_FLAGS_XATTR_CHECKED 0x01 /* has no duplicate xattr_ref */ + +#define RAWNODE_CLASS_INODE_CACHE 0 +#define RAWNODE_CLASS_XATTR_DATUM 1 +#define RAWNODE_CLASS_XATTR_REF 2 + #define INOCACHE_HASHSIZE 128 +#define write_ofs(c) ((c)->nextblock->offset + (c)->sector_size - (c)->nextblock->free_size) + /* Larger representation of a raw node, kept in-core only when the struct inode for this particular ino is instantiated. @@ -192,6 +255,7 @@ struct jffs2_eraseblock uint32_t wasted_size; uint32_t free_size; /* Note that sector_size - free_size is the address of the first free space */ + uint32_t allocated_refs; struct jffs2_raw_node_ref *first_node; struct jffs2_raw_node_ref *last_node; @@ -203,57 +267,7 @@ static inline int jffs2_blocks_use_vmalloc(struct jffs2_sb_info *c) return ((c->flash_size / c->sector_size) * sizeof (struct jffs2_eraseblock)) > (128 * 1024); } -/* Calculate totlen from surrounding nodes or eraseblock */ -static inline uint32_t __ref_totlen(struct jffs2_sb_info *c, - struct jffs2_eraseblock *jeb, - struct jffs2_raw_node_ref *ref) -{ - uint32_t ref_end; - - if (ref->next_phys) - ref_end = ref_offset(ref->next_phys); - else { - if (!jeb) - jeb = &c->blocks[ref->flash_offset / c->sector_size]; - - /* Last node in block. Use free_space */ - BUG_ON(ref != jeb->last_node); - ref_end = jeb->offset + c->sector_size - jeb->free_size; - } - return ref_end - ref_offset(ref); -} - -static inline uint32_t ref_totlen(struct jffs2_sb_info *c, - struct jffs2_eraseblock *jeb, - struct jffs2_raw_node_ref *ref) -{ - uint32_t ret; - -#if CONFIG_JFFS2_FS_DEBUG > 0 - if (jeb && jeb != &c->blocks[ref->flash_offset / c->sector_size]) { - printk(KERN_CRIT "ref_totlen called with wrong block -- at 0x%08x instead of 0x%08x; ref 0x%08x\n", - jeb->offset, c->blocks[ref->flash_offset / c->sector_size].offset, ref_offset(ref)); - BUG(); - } -#endif - -#if 1 - ret = ref->__totlen; -#else - /* This doesn't actually work yet */ - ret = __ref_totlen(c, jeb, ref); - if (ret != ref->__totlen) { - printk(KERN_CRIT "Totlen for ref at %p (0x%08x-0x%08x) miscalculated as 0x%x instead of %x\n", - ref, ref_offset(ref), ref_offset(ref)+ref->__totlen, - ret, ref->__totlen); - if (!jeb) - jeb = &c->blocks[ref->flash_offset / c->sector_size]; - jffs2_dbg_dump_node_refs_nolock(c, jeb); - BUG(); - } -#endif - return ret; -} +#define ref_totlen(a, b, c) __jffs2_ref_totlen((a), (b), (c)) #define ALLOC_NORMAL 0 /* Normal allocation */ #define ALLOC_DELETION 1 /* Deletion node. Best to allow it */ @@ -268,13 +282,15 @@ static inline uint32_t ref_totlen(struct jffs2_sb_info *c, #define PAD(x) (((x)+3)&~3) -static inline struct jffs2_inode_cache *jffs2_raw_ref_to_ic(struct jffs2_raw_node_ref *raw) +static inline int jffs2_encode_dev(union jffs2_device_node *jdev, dev_t rdev) { - while(raw->next_in_ino) { - raw = raw->next_in_ino; + if (old_valid_dev(rdev)) { + jdev->old = cpu_to_je16(old_encode_dev(rdev)); + return sizeof(jdev->old); + } else { + jdev->new = cpu_to_je32(new_encode_dev(rdev)); + return sizeof(jdev->new); } - - return ((struct jffs2_inode_cache *)raw); } static inline struct jffs2_node_frag *frag_first(struct rb_root *root) @@ -323,28 +339,44 @@ void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, struct jffs2_node_frag *t int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_full_dnode *fn); void jffs2_truncate_fragtree (struct jffs2_sb_info *c, struct rb_root *list, uint32_t size); int jffs2_add_older_frag_to_fragtree(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn); +struct jffs2_raw_node_ref *jffs2_link_node_ref(struct jffs2_sb_info *c, + struct jffs2_eraseblock *jeb, + uint32_t ofs, uint32_t len, + struct jffs2_inode_cache *ic); +extern uint32_t __jffs2_ref_totlen(struct jffs2_sb_info *c, + struct jffs2_eraseblock *jeb, + struct jffs2_raw_node_ref *ref); /* nodemgmt.c */ int jffs2_thread_should_wake(struct jffs2_sb_info *c); -int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uint32_t *ofs, +int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uint32_t *len, int prio, uint32_t sumsize); -int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize, uint32_t *ofs, +int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize, uint32_t *len, uint32_t sumsize); -int jffs2_add_physical_node_ref(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *new); +struct jffs2_raw_node_ref *jffs2_add_physical_node_ref(struct jffs2_sb_info *c, + uint32_t ofs, uint32_t len, + struct jffs2_inode_cache *ic); void jffs2_complete_reservation(struct jffs2_sb_info *c); void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *raw); /* write.c */ int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint32_t mode, struct jffs2_raw_inode *ri); -struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_raw_inode *ri, const unsigned char *data, uint32_t datalen, uint32_t flash_ofs, int alloc_mode); -struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_raw_dirent *rd, const unsigned char *name, uint32_t namelen, uint32_t flash_ofs, int alloc_mode); +struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, + struct jffs2_raw_inode *ri, const unsigned char *data, + uint32_t datalen, int alloc_mode); +struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jffs2_inode_info *f, + struct jffs2_raw_dirent *rd, const unsigned char *name, + uint32_t namelen, int alloc_mode); int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_raw_inode *ri, unsigned char *buf, uint32_t offset, uint32_t writelen, uint32_t *retlen); -int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, struct jffs2_inode_info *f, struct jffs2_raw_inode *ri, const char *name, int namelen); -int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, const char *name, int namelen, struct jffs2_inode_info *dead_f, uint32_t time); -int jffs2_do_link (struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, uint32_t ino, uint8_t type, const char *name, int namelen, uint32_t time); +int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, struct jffs2_inode_info *f, + struct jffs2_raw_inode *ri, const char *name, int namelen); +int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, const char *name, + int namelen, struct jffs2_inode_info *dead_f, uint32_t time); +int jffs2_do_link(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, uint32_t ino, + uint8_t type, const char *name, int namelen, uint32_t time); /* readinode.c */ @@ -367,12 +399,19 @@ struct jffs2_raw_inode *jffs2_alloc_raw_inode(void); void jffs2_free_raw_inode(struct jffs2_raw_inode *); struct jffs2_tmp_dnode_info *jffs2_alloc_tmp_dnode_info(void); void jffs2_free_tmp_dnode_info(struct jffs2_tmp_dnode_info *); -struct jffs2_raw_node_ref *jffs2_alloc_raw_node_ref(void); -void jffs2_free_raw_node_ref(struct jffs2_raw_node_ref *); +int jffs2_prealloc_raw_node_refs(struct jffs2_sb_info *c, + struct jffs2_eraseblock *jeb, int nr); +void jffs2_free_refblock(struct jffs2_raw_node_ref *); struct jffs2_node_frag *jffs2_alloc_node_frag(void); void jffs2_free_node_frag(struct jffs2_node_frag *); struct jffs2_inode_cache *jffs2_alloc_inode_cache(void); void jffs2_free_inode_cache(struct jffs2_inode_cache *); +#ifdef CONFIG_JFFS2_FS_XATTR +struct jffs2_xattr_datum *jffs2_alloc_xattr_datum(void); +void jffs2_free_xattr_datum(struct jffs2_xattr_datum *); +struct jffs2_xattr_ref *jffs2_alloc_xattr_ref(void); +void jffs2_free_xattr_ref(struct jffs2_xattr_ref *); +#endif /* gc.c */ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c); @@ -392,12 +431,14 @@ int jffs2_fill_scan_buf(struct jffs2_sb_info *c, void *buf, uint32_t ofs, uint32_t len); struct jffs2_inode_cache *jffs2_scan_make_ino_cache(struct jffs2_sb_info *c, uint32_t ino); int jffs2_scan_classify_jeb(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb); +int jffs2_scan_dirty_space(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t size); /* build.c */ int jffs2_do_mount_fs(struct jffs2_sb_info *c); /* erase.c */ void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count); +void jffs2_free_jeb_node_refs(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb); #ifdef CONFIG_JFFS2_FS_WRITEBUFFER /* wbuf.c */ diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c index 49127a1f0458..8bedfd2ff689 100644 --- a/fs/jffs2/nodemgmt.c +++ b/fs/jffs2/nodemgmt.c @@ -23,13 +23,12 @@ * jffs2_reserve_space - request physical space to write nodes to flash * @c: superblock info * @minsize: Minimum acceptable size of allocation - * @ofs: Returned value of node offset * @len: Returned value of allocation length * @prio: Allocation type - ALLOC_{NORMAL,DELETION} * * Requests a block of physical space on the flash. Returns zero for success - * and puts 'ofs' and 'len' into the appriopriate place, or returns -ENOSPC - * or other error if appropriate. + * and puts 'len' into the appropriate place, or returns -ENOSPC or other + * error if appropriate. Doesn't return len since that's * * If it returns zero, jffs2_reserve_space() also downs the per-filesystem * allocation semaphore, to prevent more than one allocation from being @@ -40,9 +39,9 @@ */ static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, - uint32_t *ofs, uint32_t *len, uint32_t sumsize); + uint32_t *len, uint32_t sumsize); -int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uint32_t *ofs, +int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uint32_t *len, int prio, uint32_t sumsize) { int ret = -EAGAIN; @@ -132,19 +131,21 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uint32_t *ofs spin_lock(&c->erase_completion_lock); } - ret = jffs2_do_reserve_space(c, minsize, ofs, len, sumsize); + ret = jffs2_do_reserve_space(c, minsize, len, sumsize); if (ret) { D1(printk(KERN_DEBUG "jffs2_reserve_space: ret is %d\n", ret)); } } spin_unlock(&c->erase_completion_lock); + if (!ret) + ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1); if (ret) up(&c->alloc_sem); return ret; } -int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize, uint32_t *ofs, - uint32_t *len, uint32_t sumsize) +int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize, + uint32_t *len, uint32_t sumsize) { int ret = -EAGAIN; minsize = PAD(minsize); @@ -153,12 +154,15 @@ int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize, uint32_t * spin_lock(&c->erase_completion_lock); while(ret == -EAGAIN) { - ret = jffs2_do_reserve_space(c, minsize, ofs, len, sumsize); + ret = jffs2_do_reserve_space(c, minsize, len, sumsize); if (ret) { D1(printk(KERN_DEBUG "jffs2_reserve_space_gc: looping, ret is %d\n", ret)); } } spin_unlock(&c->erase_completion_lock); + if (!ret) + ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1); + return ret; } @@ -259,10 +263,11 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c) } /* Called with alloc sem _and_ erase_completion_lock */ -static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uint32_t *ofs, uint32_t *len, uint32_t sumsize) +static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, + uint32_t *len, uint32_t sumsize) { struct jffs2_eraseblock *jeb = c->nextblock; - uint32_t reserved_size; /* for summary information at the end of the jeb */ + uint32_t reserved_size; /* for summary information at the end of the jeb */ int ret; restart: @@ -312,6 +317,8 @@ static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uin } } else { if (jeb && minsize > jeb->free_size) { + uint32_t waste; + /* Skip the end of this block and file it as having some dirty space */ /* If there's a pending write to it, flush now */ @@ -324,10 +331,26 @@ static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uin goto restart; } - c->wasted_size += jeb->free_size; - c->free_size -= jeb->free_size; - jeb->wasted_size += jeb->free_size; - jeb->free_size = 0; + spin_unlock(&c->erase_completion_lock); + + ret = jffs2_prealloc_raw_node_refs(c, jeb, 1); + if (ret) + return ret; + /* Just lock it again and continue. Nothing much can change because + we hold c->alloc_sem anyway. In fact, it's not entirely clear why + we hold c->erase_completion_lock in the majority of this function... + but that's a question for another (more caffeine-rich) day. */ + spin_lock(&c->erase_completion_lock); + + waste = jeb->free_size; + jffs2_link_node_ref(c, jeb, + (jeb->offset + c->sector_size - waste) | REF_OBSOLETE, + waste, NULL); + /* FIXME: that made it count as dirty. Convert to wasted */ + jeb->dirty_size -= waste; + c->dirty_size -= waste; + jeb->wasted_size += waste; + c->wasted_size += waste; jffs2_close_nextblock(c, jeb); jeb = NULL; @@ -349,7 +372,6 @@ static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uin } /* OK, jeb (==c->nextblock) is now pointing at a block which definitely has enough space */ - *ofs = jeb->offset + (c->sector_size - jeb->free_size); *len = jeb->free_size - reserved_size; if (c->cleanmarker_size && jeb->used_size == c->cleanmarker_size && @@ -365,7 +387,8 @@ static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uin spin_lock(&c->erase_completion_lock); } - D1(printk(KERN_DEBUG "jffs2_do_reserve_space(): Giving 0x%x bytes at 0x%x\n", *len, *ofs)); + D1(printk(KERN_DEBUG "jffs2_do_reserve_space(): Giving 0x%x bytes at 0x%x\n", + *len, jeb->offset + (c->sector_size - jeb->free_size))); return 0; } @@ -374,7 +397,6 @@ static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uin * @c: superblock info * @new: new node reference to add * @len: length of this physical node - * @dirty: dirty flag for new node * * Should only be used to report nodes for which space has been allocated * by jffs2_reserve_space. @@ -382,42 +404,30 @@ static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uin * Must be called with the alloc_sem held. */ -int jffs2_add_physical_node_ref(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *new) +struct jffs2_raw_node_ref *jffs2_add_physical_node_ref(struct jffs2_sb_info *c, + uint32_t ofs, uint32_t len, + struct jffs2_inode_cache *ic) { struct jffs2_eraseblock *jeb; - uint32_t len; + struct jffs2_raw_node_ref *new; - jeb = &c->blocks[new->flash_offset / c->sector_size]; - len = ref_totlen(c, jeb, new); + jeb = &c->blocks[ofs / c->sector_size]; - D1(printk(KERN_DEBUG "jffs2_add_physical_node_ref(): Node at 0x%x(%d), size 0x%x\n", ref_offset(new), ref_flags(new), len)); + D1(printk(KERN_DEBUG "jffs2_add_physical_node_ref(): Node at 0x%x(%d), size 0x%x\n", + ofs & ~3, ofs & 3, len)); #if 1 - /* we could get some obsolete nodes after nextblock was refiled - in wbuf.c */ - if ((c->nextblock || !ref_obsolete(new)) - &&(jeb != c->nextblock || ref_offset(new) != jeb->offset + (c->sector_size - jeb->free_size))) { + /* Allow non-obsolete nodes only to be added at the end of c->nextblock, + if c->nextblock is set. Note that wbuf.c will file obsolete nodes + even after refiling c->nextblock */ + if ((c->nextblock || ((ofs & 3) != REF_OBSOLETE)) + && (jeb != c->nextblock || (ofs & ~3) != jeb->offset + (c->sector_size - jeb->free_size))) { printk(KERN_WARNING "argh. node added in wrong place\n"); - jffs2_free_raw_node_ref(new); - return -EINVAL; + return ERR_PTR(-EINVAL); } #endif spin_lock(&c->erase_completion_lock); - if (!jeb->first_node) - jeb->first_node = new; - if (jeb->last_node) - jeb->last_node->next_phys = new; - jeb->last_node = new; - - jeb->free_size -= len; - c->free_size -= len; - if (ref_obsolete(new)) { - jeb->dirty_size += len; - c->dirty_size += len; - } else { - jeb->used_size += len; - c->used_size += len; - } + new = jffs2_link_node_ref(c, jeb, ofs, len, ic); if (!jeb->free_size && !jeb->dirty_size && !ISDIRTY(jeb->wasted_size)) { /* If it lives on the dirty_list, jffs2_reserve_space will put it there */ @@ -438,7 +448,7 @@ int jffs2_add_physical_node_ref(struct jffs2_sb_info *c, struct jffs2_raw_node_r spin_unlock(&c->erase_completion_lock); - return 0; + return new; } @@ -470,8 +480,9 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref struct jffs2_unknown_node n; int ret, addedsize; size_t retlen; + uint32_t freed_len; - if(!ref) { + if(unlikely(!ref)) { printk(KERN_NOTICE "EEEEEK. jffs2_mark_node_obsolete called with NULL node\n"); return; } @@ -499,32 +510,34 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref spin_lock(&c->erase_completion_lock); + freed_len = ref_totlen(c, jeb, ref); + if (ref_flags(ref) == REF_UNCHECKED) { - D1(if (unlikely(jeb->unchecked_size < ref_totlen(c, jeb, ref))) { + D1(if (unlikely(jeb->unchecked_size < freed_len)) { printk(KERN_NOTICE "raw unchecked node of size 0x%08x freed from erase block %d at 0x%08x, but unchecked_size was already 0x%08x\n", - ref_totlen(c, jeb, ref), blocknr, ref->flash_offset, jeb->used_size); + freed_len, blocknr, ref->flash_offset, jeb->used_size); BUG(); }) - D1(printk(KERN_DEBUG "Obsoleting previously unchecked node at 0x%08x of len %x: ", ref_offset(ref), ref_totlen(c, jeb, ref))); - jeb->unchecked_size -= ref_totlen(c, jeb, ref); - c->unchecked_size -= ref_totlen(c, jeb, ref); + D1(printk(KERN_DEBUG "Obsoleting previously unchecked node at 0x%08x of len %x: ", ref_offset(ref), freed_len)); + jeb->unchecked_size -= freed_len; + c->unchecked_size -= freed_len; } else { - D1(if (unlikely(jeb->used_size < ref_totlen(c, jeb, ref))) { + D1(if (unlikely(jeb->used_size < freed_len)) { printk(KERN_NOTICE "raw node of size 0x%08x freed from erase block %d at 0x%08x, but used_size was already 0x%08x\n", - ref_totlen(c, jeb, ref), blocknr, ref->flash_offset, jeb->used_size); + freed_len, blocknr, ref->flash_offset, jeb->used_size); BUG(); }) - D1(printk(KERN_DEBUG "Obsoleting node at 0x%08x of len %#x: ", ref_offset(ref), ref_totlen(c, jeb, ref))); - jeb->used_size -= ref_totlen(c, jeb, ref); - c->used_size -= ref_totlen(c, jeb, ref); + D1(printk(KERN_DEBUG "Obsoleting node at 0x%08x of len %#x: ", ref_offset(ref), freed_len)); + jeb->used_size -= freed_len; + c->used_size -= freed_len; } // Take care, that wasted size is taken into concern - if ((jeb->dirty_size || ISDIRTY(jeb->wasted_size + ref_totlen(c, jeb, ref))) && jeb != c->nextblock) { - D1(printk(KERN_DEBUG "Dirtying\n")); - addedsize = ref_totlen(c, jeb, ref); - jeb->dirty_size += ref_totlen(c, jeb, ref); - c->dirty_size += ref_totlen(c, jeb, ref); + if ((jeb->dirty_size || ISDIRTY(jeb->wasted_size + freed_len)) && jeb != c->nextblock) { + D1(printk("Dirtying\n")); + addedsize = freed_len; + jeb->dirty_size += freed_len; + c->dirty_size += freed_len; /* Convert wasted space to dirty, if not a bad block */ if (jeb->wasted_size) { @@ -543,10 +556,10 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref } } } else { - D1(printk(KERN_DEBUG "Wasting\n")); + D1(printk("Wasting\n")); addedsize = 0; - jeb->wasted_size += ref_totlen(c, jeb, ref); - c->wasted_size += ref_totlen(c, jeb, ref); + jeb->wasted_size += freed_len; + c->wasted_size += freed_len; } ref->flash_offset = ref_offset(ref) | REF_OBSOLETE; @@ -622,7 +635,7 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref /* The erase_free_sem is locked, and has been since before we marked the node obsolete and potentially put its eraseblock onto the erase_pending_list. Thus, we know that the block hasn't _already_ been erased, and that 'ref' itself hasn't been freed yet - by jffs2_free_all_node_refs() in erase.c. Which is nice. */ + by jffs2_free_jeb_node_refs() in erase.c. Which is nice. */ D1(printk(KERN_DEBUG "obliterating obsoleted node at 0x%08x\n", ref_offset(ref))); ret = jffs2_flash_read(c, ref_offset(ref), sizeof(n), &retlen, (char *)&n); @@ -634,8 +647,8 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref printk(KERN_WARNING "Short read from obsoleted node at 0x%08x: %zd\n", ref_offset(ref), retlen); goto out_erase_sem; } - if (PAD(je32_to_cpu(n.totlen)) != PAD(ref_totlen(c, jeb, ref))) { - printk(KERN_WARNING "Node totlen on flash (0x%08x) != totlen from node ref (0x%08x)\n", je32_to_cpu(n.totlen), ref_totlen(c, jeb, ref)); + if (PAD(je32_to_cpu(n.totlen)) != PAD(freed_len)) { + printk(KERN_WARNING "Node totlen on flash (0x%08x) != totlen from node ref (0x%08x)\n", je32_to_cpu(n.totlen), freed_len); goto out_erase_sem; } if (!(je16_to_cpu(n.nodetype) & JFFS2_NODE_ACCURATE)) { @@ -671,6 +684,10 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref spin_lock(&c->erase_completion_lock); ic = jffs2_raw_ref_to_ic(ref); + /* It seems we should never call jffs2_mark_node_obsolete() for + XATTR nodes.... yet. Make sure we notice if/when we change + that :) */ + BUG_ON(ic->class != RAWNODE_CLASS_INODE_CACHE); for (p = &ic->nodes; (*p) != ref; p = &((*p)->next_in_ino)) ; @@ -683,51 +700,6 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref spin_unlock(&c->erase_completion_lock); } - - /* Merge with the next node in the physical list, if there is one - and if it's also obsolete and if it doesn't belong to any inode */ - if (ref->next_phys && ref_obsolete(ref->next_phys) && - !ref->next_phys->next_in_ino) { - struct jffs2_raw_node_ref *n = ref->next_phys; - - spin_lock(&c->erase_completion_lock); - - ref->__totlen += n->__totlen; - ref->next_phys = n->next_phys; - if (jeb->last_node == n) jeb->last_node = ref; - if (jeb->gc_node == n) { - /* gc will be happy continuing gc on this node */ - jeb->gc_node=ref; - } - spin_unlock(&c->erase_completion_lock); - - jffs2_free_raw_node_ref(n); - } - - /* Also merge with the previous node in the list, if there is one - and that one is obsolete */ - if (ref != jeb->first_node ) { - struct jffs2_raw_node_ref *p = jeb->first_node; - - spin_lock(&c->erase_completion_lock); - - while (p->next_phys != ref) - p = p->next_phys; - - if (ref_obsolete(p) && !ref->next_in_ino) { - p->__totlen += ref->__totlen; - if (jeb->last_node == ref) { - jeb->last_node = p; - } - if (jeb->gc_node == ref) { - /* gc will be happy continuing gc on this node */ - jeb->gc_node=p; - } - p->next_phys = ref->next_phys; - jffs2_free_raw_node_ref(ref); - } - spin_unlock(&c->erase_completion_lock); - } out_erase_sem: up(&c->erase_free_sem); } diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index d307cf548625..cd4021bcb944 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -31,9 +31,7 @@ struct kvec; #define JFFS2_F_I_MODE(f) (OFNI_EDONI_2SFFJ(f)->i_mode) #define JFFS2_F_I_UID(f) (OFNI_EDONI_2SFFJ(f)->i_uid) #define JFFS2_F_I_GID(f) (OFNI_EDONI_2SFFJ(f)->i_gid) - -#define JFFS2_F_I_RDEV_MIN(f) (iminor(OFNI_EDONI_2SFFJ(f))) -#define JFFS2_F_I_RDEV_MAJ(f) (imajor(OFNI_EDONI_2SFFJ(f))) +#define JFFS2_F_I_RDEV(f) (OFNI_EDONI_2SFFJ(f)->i_rdev) #define ITIME(sec) ((struct timespec){sec, 0}) #define I_SEC(tv) ((tv).tv_sec) @@ -60,6 +58,10 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f) f->target = NULL; f->flags = 0; f->usercompr = 0; +#ifdef CONFIG_JFFS2_FS_POSIX_ACL + f->i_acl_access = JFFS2_ACL_NOT_CACHED; + f->i_acl_default = JFFS2_ACL_NOT_CACHED; +#endif } @@ -90,13 +92,10 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f) #define jffs2_flash_writev(a,b,c,d,e,f) jffs2_flash_direct_writev(a,b,c,d,e) #define jffs2_wbuf_timeout NULL #define jffs2_wbuf_process NULL -#define jffs2_nor_ecc(c) (0) #define jffs2_dataflash(c) (0) -#define jffs2_nor_wbuf_flash(c) (0) -#define jffs2_nor_ecc_flash_setup(c) (0) -#define jffs2_nor_ecc_flash_cleanup(c) do {} while (0) #define jffs2_dataflash_setup(c) (0) #define jffs2_dataflash_cleanup(c) do {} while (0) +#define jffs2_nor_wbuf_flash(c) (0) #define jffs2_nor_wbuf_flash_setup(c) (0) #define jffs2_nor_wbuf_flash_cleanup(c) do {} while (0) @@ -107,9 +106,7 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f) #ifdef CONFIG_JFFS2_SUMMARY #define jffs2_can_mark_obsolete(c) (0) #else -#define jffs2_can_mark_obsolete(c) \ - ((c->mtd->type == MTD_NORFLASH && !(c->mtd->flags & (MTD_ECC|MTD_PROGRAM_REGIONS))) || \ - c->mtd->type == MTD_RAM) +#define jffs2_can_mark_obsolete(c) (c->mtd->flags & (MTD_BIT_WRITEABLE)) #endif #define jffs2_cleanmarker_oob(c) (c->mtd->type == MTD_NANDFLASH) @@ -133,15 +130,11 @@ int jffs2_flush_wbuf_pad(struct jffs2_sb_info *c); int jffs2_nand_flash_setup(struct jffs2_sb_info *c); void jffs2_nand_flash_cleanup(struct jffs2_sb_info *c); -#define jffs2_nor_ecc(c) (c->mtd->type == MTD_NORFLASH && (c->mtd->flags & MTD_ECC)) -int jffs2_nor_ecc_flash_setup(struct jffs2_sb_info *c); -void jffs2_nor_ecc_flash_cleanup(struct jffs2_sb_info *c); - #define jffs2_dataflash(c) (c->mtd->type == MTD_DATAFLASH) int jffs2_dataflash_setup(struct jffs2_sb_info *c); void jffs2_dataflash_cleanup(struct jffs2_sb_info *c); -#define jffs2_nor_wbuf_flash(c) (c->mtd->type == MTD_NORFLASH && (c->mtd->flags & MTD_PROGRAM_REGIONS)) +#define jffs2_nor_wbuf_flash(c) (c->mtd->type == MTD_NORFLASH && ! (c->mtd->flags & MTD_BIT_WRITEABLE)) int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c); void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c); diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 6f4a7d846e8c..5fec012b02ed 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -116,19 +116,42 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r uint32_t *latest_mctime, uint32_t *mctime_ver) { struct jffs2_full_dirent *fd; + uint32_t crc; - /* The direntry nodes are checked during the flash scanning */ - BUG_ON(ref_flags(ref) == REF_UNCHECKED); /* Obsoleted. This cannot happen, surely? dwmw2 20020308 */ BUG_ON(ref_obsolete(ref)); - /* Sanity check */ - if (unlikely(PAD((rd->nsize + sizeof(*rd))) != PAD(je32_to_cpu(rd->totlen)))) { - JFFS2_ERROR("illegal nsize in node at %#08x: nsize %#02x, totlen %#04x\n", - ref_offset(ref), rd->nsize, je32_to_cpu(rd->totlen)); + crc = crc32(0, rd, sizeof(*rd) - 8); + if (unlikely(crc != je32_to_cpu(rd->node_crc))) { + JFFS2_NOTICE("header CRC failed on dirent node at %#08x: read %#08x, calculated %#08x\n", + ref_offset(ref), je32_to_cpu(rd->node_crc), crc); return 1; } + /* If we've never checked the CRCs on this node, check them now */ + if (ref_flags(ref) == REF_UNCHECKED) { + struct jffs2_eraseblock *jeb; + int len; + + /* Sanity check */ + if (unlikely(PAD((rd->nsize + sizeof(*rd))) != PAD(je32_to_cpu(rd->totlen)))) { + JFFS2_ERROR("illegal nsize in node at %#08x: nsize %#02x, totlen %#04x\n", + ref_offset(ref), rd->nsize, je32_to_cpu(rd->totlen)); + return 1; + } + + jeb = &c->blocks[ref->flash_offset / c->sector_size]; + len = ref_totlen(c, jeb, ref); + + spin_lock(&c->erase_completion_lock); + jeb->used_size += len; + jeb->unchecked_size -= len; + c->used_size += len; + c->unchecked_size -= len; + ref->flash_offset = ref_offset(ref) | REF_PRISTINE; + spin_unlock(&c->erase_completion_lock); + } + fd = jffs2_alloc_full_dirent(rd->nsize + 1); if (unlikely(!fd)) return -ENOMEM; @@ -198,13 +221,21 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref struct jffs2_tmp_dnode_info *tn; uint32_t len, csize; int ret = 1; + uint32_t crc; /* Obsoleted. This cannot happen, surely? dwmw2 20020308 */ BUG_ON(ref_obsolete(ref)); + crc = crc32(0, rd, sizeof(*rd) - 8); + if (unlikely(crc != je32_to_cpu(rd->node_crc))) { + JFFS2_NOTICE("node CRC failed on dnode at %#08x: read %#08x, calculated %#08x\n", + ref_offset(ref), je32_to_cpu(rd->node_crc), crc); + return 1; + } + tn = jffs2_alloc_tmp_dnode_info(); if (!tn) { - JFFS2_ERROR("failed to allocate tn (%d bytes).\n", sizeof(*tn)); + JFFS2_ERROR("failed to allocate tn (%zu bytes).\n", sizeof(*tn)); return -ENOMEM; } @@ -213,14 +244,6 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref /* If we've never checked the CRCs on this node, check them now */ if (ref_flags(ref) == REF_UNCHECKED) { - uint32_t crc; - - crc = crc32(0, rd, sizeof(*rd) - 8); - if (unlikely(crc != je32_to_cpu(rd->node_crc))) { - JFFS2_NOTICE("header CRC failed on node at %#08x: read %#08x, calculated %#08x\n", - ref_offset(ref), je32_to_cpu(rd->node_crc), crc); - goto free_out; - } /* Sanity checks */ if (unlikely(je32_to_cpu(rd->offset) > je32_to_cpu(rd->isize)) || @@ -343,7 +366,7 @@ free_out: * Helper function for jffs2_get_inode_nodes(). * It is called every time an unknown node is found. * - * Returns: 0 on succes; + * Returns: 0 on success; * 1 if the node should be marked obsolete; * negative error code on failure. */ @@ -354,37 +377,30 @@ static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_re un->nodetype = cpu_to_je16(JFFS2_NODE_ACCURATE | je16_to_cpu(un->nodetype)); - if (crc32(0, un, sizeof(struct jffs2_unknown_node) - 4) != je32_to_cpu(un->hdr_crc)) { - /* Hmmm. This should have been caught at scan time. */ - JFFS2_NOTICE("node header CRC failed at %#08x. But it must have been OK earlier.\n", ref_offset(ref)); - jffs2_dbg_dump_node(c, ref_offset(ref)); - return 1; - } else { - switch(je16_to_cpu(un->nodetype) & JFFS2_COMPAT_MASK) { + switch(je16_to_cpu(un->nodetype) & JFFS2_COMPAT_MASK) { - case JFFS2_FEATURE_INCOMPAT: - JFFS2_ERROR("unknown INCOMPAT nodetype %#04X at %#08x\n", - je16_to_cpu(un->nodetype), ref_offset(ref)); - /* EEP */ - BUG(); - break; + case JFFS2_FEATURE_INCOMPAT: + JFFS2_ERROR("unknown INCOMPAT nodetype %#04X at %#08x\n", + je16_to_cpu(un->nodetype), ref_offset(ref)); + /* EEP */ + BUG(); + break; - case JFFS2_FEATURE_ROCOMPAT: - JFFS2_ERROR("unknown ROCOMPAT nodetype %#04X at %#08x\n", - je16_to_cpu(un->nodetype), ref_offset(ref)); - BUG_ON(!(c->flags & JFFS2_SB_FLAG_RO)); - break; + case JFFS2_FEATURE_ROCOMPAT: + JFFS2_ERROR("unknown ROCOMPAT nodetype %#04X at %#08x\n", + je16_to_cpu(un->nodetype), ref_offset(ref)); + BUG_ON(!(c->flags & JFFS2_SB_FLAG_RO)); + break; - case JFFS2_FEATURE_RWCOMPAT_COPY: - JFFS2_NOTICE("unknown RWCOMPAT_COPY nodetype %#04X at %#08x\n", - je16_to_cpu(un->nodetype), ref_offset(ref)); - break; + case JFFS2_FEATURE_RWCOMPAT_COPY: + JFFS2_NOTICE("unknown RWCOMPAT_COPY nodetype %#04X at %#08x\n", + je16_to_cpu(un->nodetype), ref_offset(ref)); + break; - case JFFS2_FEATURE_RWCOMPAT_DELETE: - JFFS2_NOTICE("unknown RWCOMPAT_DELETE nodetype %#04X at %#08x\n", - je16_to_cpu(un->nodetype), ref_offset(ref)); - return 1; - } + case JFFS2_FEATURE_RWCOMPAT_DELETE: + JFFS2_NOTICE("unknown RWCOMPAT_DELETE nodetype %#04X at %#08x\n", + je16_to_cpu(un->nodetype), ref_offset(ref)); + return 1; } return 0; @@ -434,7 +450,7 @@ static int read_more(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, } if (retlen < len) { - JFFS2_ERROR("short read at %#08x: %d instead of %d.\n", + JFFS2_ERROR("short read at %#08x: %zu instead of %d.\n", offs, retlen, len); return -EIO; } @@ -542,13 +558,25 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf } if (retlen < len) { - JFFS2_ERROR("short read at %#08x: %d instead of %d.\n", ref_offset(ref), retlen, len); + JFFS2_ERROR("short read at %#08x: %zu instead of %d.\n", ref_offset(ref), retlen, len); err = -EIO; goto free_out; } node = (union jffs2_node_union *)bufstart; + /* No need to mask in the valid bit; it shouldn't be invalid */ + if (je32_to_cpu(node->u.hdr_crc) != crc32(0, node, sizeof(node->u)-4)) { + JFFS2_NOTICE("Node header CRC failed at %#08x. {%04x,%04x,%08x,%08x}\n", + ref_offset(ref), je16_to_cpu(node->u.magic), + je16_to_cpu(node->u.nodetype), + je32_to_cpu(node->u.totlen), + je32_to_cpu(node->u.hdr_crc)); + jffs2_dbg_dump_node(c, ref_offset(ref)); + jffs2_mark_node_obsolete(c, ref); + goto cont; + } + switch (je16_to_cpu(node->u.nodetype)) { case JFFS2_NODETYPE_DIRENT: @@ -606,6 +634,7 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf goto free_out; } + cont: spin_lock(&c->erase_completion_lock); } diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index cf55b221fc2b..61618080b86f 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -65,6 +65,28 @@ static inline uint32_t EMPTY_SCAN_SIZE(uint32_t sector_size) { return DEFAULT_EMPTY_SCAN_SIZE; } +static int file_dirty(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) +{ + int ret; + + if ((ret = jffs2_prealloc_raw_node_refs(c, jeb, 1))) + return ret; + if ((ret = jffs2_scan_dirty_space(c, jeb, jeb->free_size))) + return ret; + /* Turned wasted size into dirty, since we apparently + think it's recoverable now. */ + jeb->dirty_size += jeb->wasted_size; + c->dirty_size += jeb->wasted_size; + c->wasted_size -= jeb->wasted_size; + jeb->wasted_size = 0; + if (VERYDIRTY(c, jeb->dirty_size)) { + list_add(&jeb->list, &c->very_dirty_list); + } else { + list_add(&jeb->list, &c->dirty_list); + } + return 0; +} + int jffs2_scan_medium(struct jffs2_sb_info *c) { int i, ret; @@ -170,34 +192,20 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) (!c->nextblock || c->nextblock->free_size < jeb->free_size)) { /* Better candidate for the next writes to go to */ if (c->nextblock) { - c->nextblock->dirty_size += c->nextblock->free_size + c->nextblock->wasted_size; - c->dirty_size += c->nextblock->free_size + c->nextblock->wasted_size; - c->free_size -= c->nextblock->free_size; - c->wasted_size -= c->nextblock->wasted_size; - c->nextblock->free_size = c->nextblock->wasted_size = 0; - if (VERYDIRTY(c, c->nextblock->dirty_size)) { - list_add(&c->nextblock->list, &c->very_dirty_list); - } else { - list_add(&c->nextblock->list, &c->dirty_list); - } + ret = file_dirty(c, c->nextblock); + if (ret) + return ret; /* deleting summary information of the old nextblock */ jffs2_sum_reset_collected(c->summary); } - /* update collected summary infromation for the current nextblock */ + /* update collected summary information for the current nextblock */ jffs2_sum_move_collected(c, s); D1(printk(KERN_DEBUG "jffs2_scan_medium(): new nextblock = 0x%08x\n", jeb->offset)); c->nextblock = jeb; } else { - jeb->dirty_size += jeb->free_size + jeb->wasted_size; - c->dirty_size += jeb->free_size + jeb->wasted_size; - c->free_size -= jeb->free_size; - c->wasted_size -= jeb->wasted_size; - jeb->free_size = jeb->wasted_size = 0; - if (VERYDIRTY(c, jeb->dirty_size)) { - list_add(&jeb->list, &c->very_dirty_list); - } else { - list_add(&jeb->list, &c->dirty_list); - } + ret = file_dirty(c, jeb); + if (ret) + return ret; } break; @@ -222,9 +230,6 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) } } - if (jffs2_sum_active() && s) - kfree(s); - /* Nextblock dirty is always seen as wasted, because we cannot recycle it now */ if (c->nextblock && (c->nextblock->dirty_size)) { c->nextblock->wasted_size += c->nextblock->dirty_size; @@ -242,11 +247,8 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) D1(printk(KERN_DEBUG "jffs2_scan_medium(): Skipping %d bytes in nextblock to ensure page alignment\n", skip)); - c->nextblock->wasted_size += skip; - c->wasted_size += skip; - - c->nextblock->free_size -= skip; - c->free_size -= skip; + jffs2_prealloc_raw_node_refs(c, c->nextblock, 1); + jffs2_scan_dirty_space(c, c->nextblock, skip); } #endif if (c->nr_erasing_blocks) { @@ -266,6 +268,9 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) else c->mtd->unpoint(c->mtd, flashbuf, 0, c->mtd->size); #endif + if (s) + kfree(s); + return ret; } @@ -290,7 +295,7 @@ int jffs2_fill_scan_buf (struct jffs2_sb_info *c, void *buf, int jffs2_scan_classify_jeb(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) { if ((jeb->used_size + jeb->unchecked_size) == PAD(c->cleanmarker_size) && !jeb->dirty_size - && (!jeb->first_node || !jeb->first_node->next_phys) ) + && (!jeb->first_node || !ref_next(jeb->first_node)) ) return BLK_STATE_CLEANMARKER; /* move blocks with max 4 byte dirty space to cleanlist */ @@ -306,11 +311,119 @@ int jffs2_scan_classify_jeb(struct jffs2_sb_info *c, struct jffs2_eraseblock *je return BLK_STATE_ALLDIRTY; } +#ifdef CONFIG_JFFS2_FS_XATTR +static int jffs2_scan_xattr_node(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, + struct jffs2_raw_xattr *rx, uint32_t ofs, + struct jffs2_summary *s) +{ + struct jffs2_xattr_datum *xd; + uint32_t totlen, crc; + int err; + + crc = crc32(0, rx, sizeof(struct jffs2_raw_xattr) - 4); + if (crc != je32_to_cpu(rx->node_crc)) { + if (je32_to_cpu(rx->node_crc) != 0xffffffff) + JFFS2_WARNING("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", + ofs, je32_to_cpu(rx->node_crc), crc); + if ((err = jffs2_scan_dirty_space(c, jeb, je32_to_cpu(rx->totlen)))) + return err; + return 0; + } + + totlen = PAD(sizeof(*rx) + rx->name_len + 1 + je16_to_cpu(rx->value_len)); + if (totlen != je32_to_cpu(rx->totlen)) { + JFFS2_WARNING("node length mismatch at %#08x, read=%u, calc=%u\n", + ofs, je32_to_cpu(rx->totlen), totlen); + if ((err = jffs2_scan_dirty_space(c, jeb, je32_to_cpu(rx->totlen)))) + return err; + return 0; + } + + xd = jffs2_setup_xattr_datum(c, je32_to_cpu(rx->xid), je32_to_cpu(rx->version)); + if (IS_ERR(xd)) { + if (PTR_ERR(xd) == -EEXIST) { + if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(rx->totlen))))) + return err; + return 0; + } + return PTR_ERR(xd); + } + xd->xprefix = rx->xprefix; + xd->name_len = rx->name_len; + xd->value_len = je16_to_cpu(rx->value_len); + xd->data_crc = je32_to_cpu(rx->data_crc); + + xd->node = jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, totlen, NULL); + /* FIXME */ xd->node->next_in_ino = (void *)xd; + + if (jffs2_sum_active()) + jffs2_sum_add_xattr_mem(s, rx, ofs - jeb->offset); + dbg_xattr("scaning xdatum at %#08x (xid=%u, version=%u)\n", + ofs, xd->xid, xd->version); + return 0; +} + +static int jffs2_scan_xref_node(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, + struct jffs2_raw_xref *rr, uint32_t ofs, + struct jffs2_summary *s) +{ + struct jffs2_xattr_ref *ref; + uint32_t crc; + int err; + + crc = crc32(0, rr, sizeof(*rr) - 4); + if (crc != je32_to_cpu(rr->node_crc)) { + if (je32_to_cpu(rr->node_crc) != 0xffffffff) + JFFS2_WARNING("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", + ofs, je32_to_cpu(rr->node_crc), crc); + if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(rr->totlen))))) + return err; + return 0; + } + + if (PAD(sizeof(struct jffs2_raw_xref)) != je32_to_cpu(rr->totlen)) { + JFFS2_WARNING("node length mismatch at %#08x, read=%u, calc=%zd\n", + ofs, je32_to_cpu(rr->totlen), + PAD(sizeof(struct jffs2_raw_xref))); + if ((err = jffs2_scan_dirty_space(c, jeb, je32_to_cpu(rr->totlen)))) + return err; + return 0; + } + + ref = jffs2_alloc_xattr_ref(); + if (!ref) + return -ENOMEM; + + /* BEFORE jffs2_build_xattr_subsystem() called, + * ref->xid is used to store 32bit xid, xd is not used + * ref->ino is used to store 32bit inode-number, ic is not used + * Thoes variables are declared as union, thus using those + * are exclusive. In a similar way, ref->next is temporarily + * used to chain all xattr_ref object. It's re-chained to + * jffs2_inode_cache in jffs2_build_xattr_subsystem() correctly. + */ + ref->ino = je32_to_cpu(rr->ino); + ref->xid = je32_to_cpu(rr->xid); + ref->next = c->xref_temp; + c->xref_temp = ref; + + ref->node = jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, PAD(je32_to_cpu(rr->totlen)), NULL); + /* FIXME */ ref->node->next_in_ino = (void *)ref; + + if (jffs2_sum_active()) + jffs2_sum_add_xref_mem(s, rr, ofs - jeb->offset); + dbg_xattr("scan xref at %#08x (xid=%u, ino=%u)\n", + ofs, ref->xid, ref->ino); + return 0; +} +#endif + +/* Called with 'buf_size == 0' if buf is in fact a pointer _directly_ into + the flash, XIP-style */ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, - unsigned char *buf, uint32_t buf_size, struct jffs2_summary *s) { + unsigned char *buf, uint32_t buf_size, struct jffs2_summary *s) { struct jffs2_unknown_node *node; struct jffs2_unknown_node crcnode; - struct jffs2_sum_marker *sm; uint32_t ofs, prevofs; uint32_t hdr_crc, buf_ofs, buf_len; int err; @@ -344,44 +457,75 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo #endif if (jffs2_sum_active()) { - sm = kmalloc(sizeof(struct jffs2_sum_marker), GFP_KERNEL); - if (!sm) { - return -ENOMEM; - } - - err = jffs2_fill_scan_buf(c, (unsigned char *) sm, jeb->offset + c->sector_size - - sizeof(struct jffs2_sum_marker), sizeof(struct jffs2_sum_marker)); - if (err) { - kfree(sm); - return err; - } - - if (je32_to_cpu(sm->magic) == JFFS2_SUM_MAGIC ) { - err = jffs2_sum_scan_sumnode(c, jeb, je32_to_cpu(sm->offset), &pseudo_random); - if (err) { - kfree(sm); + struct jffs2_sum_marker *sm; + void *sumptr = NULL; + uint32_t sumlen; + + if (!buf_size) { + /* XIP case. Just look, point at the summary if it's there */ + sm = (void *)buf + c->sector_size - sizeof(*sm); + if (je32_to_cpu(sm->magic) == JFFS2_SUM_MAGIC) { + sumptr = buf + je32_to_cpu(sm->offset); + sumlen = c->sector_size - je32_to_cpu(sm->offset); + } + } else { + /* If NAND flash, read a whole page of it. Else just the end */ + if (c->wbuf_pagesize) + buf_len = c->wbuf_pagesize; + else + buf_len = sizeof(*sm); + + /* Read as much as we want into the _end_ of the preallocated buffer */ + err = jffs2_fill_scan_buf(c, buf + buf_size - buf_len, + jeb->offset + c->sector_size - buf_len, + buf_len); + if (err) return err; + + sm = (void *)buf + buf_size - sizeof(*sm); + if (je32_to_cpu(sm->magic) == JFFS2_SUM_MAGIC) { + sumlen = c->sector_size - je32_to_cpu(sm->offset); + sumptr = buf + buf_size - sumlen; + + /* Now, make sure the summary itself is available */ + if (sumlen > buf_size) { + /* Need to kmalloc for this. */ + sumptr = kmalloc(sumlen, GFP_KERNEL); + if (!sumptr) + return -ENOMEM; + memcpy(sumptr + sumlen - buf_len, buf + buf_size - buf_len, buf_len); + } + if (buf_len < sumlen) { + /* Need to read more so that the entire summary node is present */ + err = jffs2_fill_scan_buf(c, sumptr, + jeb->offset + c->sector_size - sumlen, + sumlen - buf_len); + if (err) + return err; + } } + } - kfree(sm); + if (sumptr) { + err = jffs2_sum_scan_sumnode(c, jeb, sumptr, sumlen, &pseudo_random); - ofs = jeb->offset; - prevofs = jeb->offset - 1; + if (buf_size && sumlen > buf_size) + kfree(sumptr); + /* If it returns with a real error, bail. + If it returns positive, that's a block classification + (i.e. BLK_STATE_xxx) so return that too. + If it returns zero, fall through to full scan. */ + if (err) + return err; + } } buf_ofs = jeb->offset; if (!buf_size) { + /* This is the XIP case -- we're reading _directly_ from the flash chip */ buf_len = c->sector_size; - - if (jffs2_sum_active()) { - /* must reread because of summary test */ - err = jffs2_fill_scan_buf(c, buf, buf_ofs, buf_len); - if (err) - return err; - } - } else { buf_len = EMPTY_SCAN_SIZE(c->sector_size); err = jffs2_fill_scan_buf(c, buf, buf_ofs, buf_len); @@ -418,7 +562,10 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo if (ofs) { D1(printk(KERN_DEBUG "Free space at %08x ends at %08x\n", jeb->offset, jeb->offset + ofs)); - DIRTY_SPACE(ofs); + if ((err = jffs2_prealloc_raw_node_refs(c, jeb, 1))) + return err; + if ((err = jffs2_scan_dirty_space(c, jeb, ofs))) + return err; } /* Now ofs is a complete physical flash offset as it always was... */ @@ -433,6 +580,11 @@ scan_more: jffs2_dbg_acct_paranoia_check_nolock(c, jeb); + /* Make sure there are node refs available for use */ + err = jffs2_prealloc_raw_node_refs(c, jeb, 2); + if (err) + return err; + cond_resched(); if (ofs & 3) { @@ -442,7 +594,8 @@ scan_more: } if (ofs == prevofs) { printk(KERN_WARNING "ofs 0x%08x has already been seen. Skipping\n", ofs); - DIRTY_SPACE(4); + if ((err = jffs2_scan_dirty_space(c, jeb, 4))) + return err; ofs += 4; continue; } @@ -451,7 +604,8 @@ scan_more: if (jeb->offset + c->sector_size < ofs + sizeof(*node)) { D1(printk(KERN_DEBUG "Fewer than %zd bytes left to end of block. (%x+%x<%x+%zx) Not reading\n", sizeof(struct jffs2_unknown_node), jeb->offset, c->sector_size, ofs, sizeof(*node))); - DIRTY_SPACE((jeb->offset + c->sector_size)-ofs); + if ((err = jffs2_scan_dirty_space(c, jeb, (jeb->offset + c->sector_size)-ofs))) + return err; break; } @@ -481,7 +635,8 @@ scan_more: if (*(uint32_t *)(&buf[inbuf_ofs]) != 0xffffffff) { printk(KERN_WARNING "Empty flash at 0x%08x ends at 0x%08x\n", empty_start, ofs); - DIRTY_SPACE(ofs-empty_start); + if ((err = jffs2_scan_dirty_space(c, jeb, ofs-empty_start))) + return err; goto scan_more; } @@ -494,7 +649,7 @@ scan_more: /* If we're only checking the beginning of a block with a cleanmarker, bail now */ if (buf_ofs == jeb->offset && jeb->used_size == PAD(c->cleanmarker_size) && - c->cleanmarker_size && !jeb->dirty_size && !jeb->first_node->next_phys) { + c->cleanmarker_size && !jeb->dirty_size && !ref_next(jeb->first_node)) { D1(printk(KERN_DEBUG "%d bytes at start of block seems clean... assuming all clean\n", EMPTY_SCAN_SIZE(c->sector_size))); return BLK_STATE_CLEANMARKER; } @@ -518,20 +673,23 @@ scan_more: if (ofs == jeb->offset && je16_to_cpu(node->magic) == KSAMTIB_CIGAM_2SFFJ) { printk(KERN_WARNING "Magic bitmask is backwards at offset 0x%08x. Wrong endian filesystem?\n", ofs); - DIRTY_SPACE(4); + if ((err = jffs2_scan_dirty_space(c, jeb, 4))) + return err; ofs += 4; continue; } if (je16_to_cpu(node->magic) == JFFS2_DIRTY_BITMASK) { D1(printk(KERN_DEBUG "Dirty bitmask at 0x%08x\n", ofs)); - DIRTY_SPACE(4); + if ((err = jffs2_scan_dirty_space(c, jeb, 4))) + return err; ofs += 4; continue; } if (je16_to_cpu(node->magic) == JFFS2_OLD_MAGIC_BITMASK) { printk(KERN_WARNING "Old JFFS2 bitmask found at 0x%08x\n", ofs); printk(KERN_WARNING "You cannot use older JFFS2 filesystems with newer kernels\n"); - DIRTY_SPACE(4); + if ((err = jffs2_scan_dirty_space(c, jeb, 4))) + return err; ofs += 4; continue; } @@ -540,7 +698,8 @@ scan_more: noisy_printk(&noise, "jffs2_scan_eraseblock(): Magic bitmask 0x%04x not found at 0x%08x: 0x%04x instead\n", JFFS2_MAGIC_BITMASK, ofs, je16_to_cpu(node->magic)); - DIRTY_SPACE(4); + if ((err = jffs2_scan_dirty_space(c, jeb, 4))) + return err; ofs += 4; continue; } @@ -557,7 +716,8 @@ scan_more: je32_to_cpu(node->totlen), je32_to_cpu(node->hdr_crc), hdr_crc); - DIRTY_SPACE(4); + if ((err = jffs2_scan_dirty_space(c, jeb, 4))) + return err; ofs += 4; continue; } @@ -568,7 +728,8 @@ scan_more: printk(KERN_WARNING "Node at 0x%08x with length 0x%08x would run over the end of the erase block\n", ofs, je32_to_cpu(node->totlen)); printk(KERN_WARNING "Perhaps the file system was created with the wrong erase size?\n"); - DIRTY_SPACE(4); + if ((err = jffs2_scan_dirty_space(c, jeb, 4))) + return err; ofs += 4; continue; } @@ -576,7 +737,8 @@ scan_more: if (!(je16_to_cpu(node->nodetype) & JFFS2_NODE_ACCURATE)) { /* Wheee. This is an obsoleted node */ D2(printk(KERN_DEBUG "Node at 0x%08x is obsolete. Skipping\n", ofs)); - DIRTY_SPACE(PAD(je32_to_cpu(node->totlen))); + if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(node->totlen))))) + return err; ofs += PAD(je32_to_cpu(node->totlen)); continue; } @@ -614,30 +776,59 @@ scan_more: ofs += PAD(je32_to_cpu(node->totlen)); break; +#ifdef CONFIG_JFFS2_FS_XATTR + case JFFS2_NODETYPE_XATTR: + if (buf_ofs + buf_len < ofs + je32_to_cpu(node->totlen)) { + buf_len = min_t(uint32_t, buf_size, jeb->offset + c->sector_size - ofs); + D1(printk(KERN_DEBUG "Fewer than %d bytes (xattr node)" + " left to end of buf. Reading 0x%x at 0x%08x\n", + je32_to_cpu(node->totlen), buf_len, ofs)); + err = jffs2_fill_scan_buf(c, buf, ofs, buf_len); + if (err) + return err; + buf_ofs = ofs; + node = (void *)buf; + } + err = jffs2_scan_xattr_node(c, jeb, (void *)node, ofs, s); + if (err) + return err; + ofs += PAD(je32_to_cpu(node->totlen)); + break; + case JFFS2_NODETYPE_XREF: + if (buf_ofs + buf_len < ofs + je32_to_cpu(node->totlen)) { + buf_len = min_t(uint32_t, buf_size, jeb->offset + c->sector_size - ofs); + D1(printk(KERN_DEBUG "Fewer than %d bytes (xref node)" + " left to end of buf. Reading 0x%x at 0x%08x\n", + je32_to_cpu(node->totlen), buf_len, ofs)); + err = jffs2_fill_scan_buf(c, buf, ofs, buf_len); + if (err) + return err; + buf_ofs = ofs; + node = (void *)buf; + } + err = jffs2_scan_xref_node(c, jeb, (void *)node, ofs, s); + if (err) + return err; + ofs += PAD(je32_to_cpu(node->totlen)); + break; +#endif /* CONFIG_JFFS2_FS_XATTR */ + case JFFS2_NODETYPE_CLEANMARKER: D1(printk(KERN_DEBUG "CLEANMARKER node found at 0x%08x\n", ofs)); if (je32_to_cpu(node->totlen) != c->cleanmarker_size) { printk(KERN_NOTICE "CLEANMARKER node found at 0x%08x has totlen 0x%x != normal 0x%x\n", ofs, je32_to_cpu(node->totlen), c->cleanmarker_size); - DIRTY_SPACE(PAD(sizeof(struct jffs2_unknown_node))); + if ((err = jffs2_scan_dirty_space(c, jeb, PAD(sizeof(struct jffs2_unknown_node))))) + return err; ofs += PAD(sizeof(struct jffs2_unknown_node)); } else if (jeb->first_node) { printk(KERN_NOTICE "CLEANMARKER node found at 0x%08x, not first node in block (0x%08x)\n", ofs, jeb->offset); - DIRTY_SPACE(PAD(sizeof(struct jffs2_unknown_node))); + if ((err = jffs2_scan_dirty_space(c, jeb, PAD(sizeof(struct jffs2_unknown_node))))) + return err; ofs += PAD(sizeof(struct jffs2_unknown_node)); } else { - struct jffs2_raw_node_ref *marker_ref = jffs2_alloc_raw_node_ref(); - if (!marker_ref) { - printk(KERN_NOTICE "Failed to allocate node ref for clean marker\n"); - return -ENOMEM; - } - marker_ref->next_in_ino = NULL; - marker_ref->next_phys = NULL; - marker_ref->flash_offset = ofs | REF_NORMAL; - marker_ref->__totlen = c->cleanmarker_size; - jeb->first_node = jeb->last_node = marker_ref; + jffs2_link_node_ref(c, jeb, ofs | REF_NORMAL, c->cleanmarker_size, NULL); - USED_SPACE(PAD(c->cleanmarker_size)); ofs += PAD(c->cleanmarker_size); } break; @@ -645,7 +836,8 @@ scan_more: case JFFS2_NODETYPE_PADDING: if (jffs2_sum_active()) jffs2_sum_add_padding_mem(s, je32_to_cpu(node->totlen)); - DIRTY_SPACE(PAD(je32_to_cpu(node->totlen))); + if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(node->totlen))))) + return err; ofs += PAD(je32_to_cpu(node->totlen)); break; @@ -656,7 +848,8 @@ scan_more: c->flags |= JFFS2_SB_FLAG_RO; if (!(jffs2_is_readonly(c))) return -EROFS; - DIRTY_SPACE(PAD(je32_to_cpu(node->totlen))); + if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(node->totlen))))) + return err; ofs += PAD(je32_to_cpu(node->totlen)); break; @@ -666,15 +859,21 @@ scan_more: case JFFS2_FEATURE_RWCOMPAT_DELETE: D1(printk(KERN_NOTICE "Unknown but compatible feature node (0x%04x) found at offset 0x%08x\n", je16_to_cpu(node->nodetype), ofs)); - DIRTY_SPACE(PAD(je32_to_cpu(node->totlen))); + if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(node->totlen))))) + return err; ofs += PAD(je32_to_cpu(node->totlen)); break; - case JFFS2_FEATURE_RWCOMPAT_COPY: + case JFFS2_FEATURE_RWCOMPAT_COPY: { D1(printk(KERN_NOTICE "Unknown but compatible feature node (0x%04x) found at offset 0x%08x\n", je16_to_cpu(node->nodetype), ofs)); - USED_SPACE(PAD(je32_to_cpu(node->totlen))); + + jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, PAD(je32_to_cpu(node->totlen)), NULL); + + /* We can't summarise nodes we don't grok */ + jffs2_sum_disable_collecting(s); ofs += PAD(je32_to_cpu(node->totlen)); break; + } } } } @@ -687,9 +886,9 @@ scan_more: } } - D1(printk(KERN_DEBUG "Block at 0x%08x: free 0x%08x, dirty 0x%08x, unchecked 0x%08x, used 0x%08x\n", jeb->offset, - jeb->free_size, jeb->dirty_size, jeb->unchecked_size, jeb->used_size)); - + D1(printk(KERN_DEBUG "Block at 0x%08x: free 0x%08x, dirty 0x%08x, unchecked 0x%08x, used 0x%08x, wasted 0x%08x\n", + jeb->offset,jeb->free_size, jeb->dirty_size, jeb->unchecked_size, jeb->used_size, jeb->wasted_size)); + /* mark_node_obsolete can add to wasted !! */ if (jeb->wasted_size) { jeb->dirty_size += jeb->wasted_size; @@ -730,9 +929,9 @@ struct jffs2_inode_cache *jffs2_scan_make_ino_cache(struct jffs2_sb_info *c, uin static int jffs2_scan_inode_node(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, struct jffs2_raw_inode *ri, uint32_t ofs, struct jffs2_summary *s) { - struct jffs2_raw_node_ref *raw; struct jffs2_inode_cache *ic; uint32_t ino = je32_to_cpu(ri->ino); + int err; D1(printk(KERN_DEBUG "jffs2_scan_inode_node(): Node at 0x%08x\n", ofs)); @@ -745,12 +944,6 @@ static int jffs2_scan_inode_node(struct jffs2_sb_info *c, struct jffs2_erasebloc Which means that the _full_ amount of time to get to proper write mode with GC operational may actually be _longer_ than before. Sucks to be me. */ - raw = jffs2_alloc_raw_node_ref(); - if (!raw) { - printk(KERN_NOTICE "jffs2_scan_inode_node(): allocation of node reference failed\n"); - return -ENOMEM; - } - ic = jffs2_get_ino_cache(c, ino); if (!ic) { /* Inocache get failed. Either we read a bogus ino# or it's just genuinely the @@ -762,30 +955,17 @@ static int jffs2_scan_inode_node(struct jffs2_sb_info *c, struct jffs2_erasebloc printk(KERN_NOTICE "jffs2_scan_inode_node(): CRC failed on node at 0x%08x: Read 0x%08x, calculated 0x%08x\n", ofs, je32_to_cpu(ri->node_crc), crc); /* We believe totlen because the CRC on the node _header_ was OK, just the node itself failed. */ - DIRTY_SPACE(PAD(je32_to_cpu(ri->totlen))); - jffs2_free_raw_node_ref(raw); + if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(ri->totlen))))) + return err; return 0; } ic = jffs2_scan_make_ino_cache(c, ino); - if (!ic) { - jffs2_free_raw_node_ref(raw); + if (!ic) return -ENOMEM; - } } /* Wheee. It worked */ - - raw->flash_offset = ofs | REF_UNCHECKED; - raw->__totlen = PAD(je32_to_cpu(ri->totlen)); - raw->next_phys = NULL; - raw->next_in_ino = ic->nodes; - - ic->nodes = raw; - if (!jeb->first_node) - jeb->first_node = raw; - if (jeb->last_node) - jeb->last_node->next_phys = raw; - jeb->last_node = raw; + jffs2_link_node_ref(c, jeb, ofs | REF_UNCHECKED, PAD(je32_to_cpu(ri->totlen)), ic); D1(printk(KERN_DEBUG "Node is ino #%u, version %d. Range 0x%x-0x%x\n", je32_to_cpu(ri->ino), je32_to_cpu(ri->version), @@ -794,8 +974,6 @@ static int jffs2_scan_inode_node(struct jffs2_sb_info *c, struct jffs2_erasebloc pseudo_random += je32_to_cpu(ri->version); - UNCHECKED_SPACE(PAD(je32_to_cpu(ri->totlen))); - if (jffs2_sum_active()) { jffs2_sum_add_inode_mem(s, ri, ofs - jeb->offset); } @@ -806,10 +984,10 @@ static int jffs2_scan_inode_node(struct jffs2_sb_info *c, struct jffs2_erasebloc static int jffs2_scan_dirent_node(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, struct jffs2_raw_dirent *rd, uint32_t ofs, struct jffs2_summary *s) { - struct jffs2_raw_node_ref *raw; struct jffs2_full_dirent *fd; struct jffs2_inode_cache *ic; uint32_t crc; + int err; D1(printk(KERN_DEBUG "jffs2_scan_dirent_node(): Node at 0x%08x\n", ofs)); @@ -821,7 +999,8 @@ static int jffs2_scan_dirent_node(struct jffs2_sb_info *c, struct jffs2_eraseblo printk(KERN_NOTICE "jffs2_scan_dirent_node(): Node CRC failed on node at 0x%08x: Read 0x%08x, calculated 0x%08x\n", ofs, je32_to_cpu(rd->node_crc), crc); /* We believe totlen because the CRC on the node _header_ was OK, just the node itself failed. */ - DIRTY_SPACE(PAD(je32_to_cpu(rd->totlen))); + if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(rd->totlen))))) + return err; return 0; } @@ -842,40 +1021,23 @@ static int jffs2_scan_dirent_node(struct jffs2_sb_info *c, struct jffs2_eraseblo jffs2_free_full_dirent(fd); /* FIXME: Why do we believe totlen? */ /* We believe totlen because the CRC on the node _header_ was OK, just the name failed. */ - DIRTY_SPACE(PAD(je32_to_cpu(rd->totlen))); + if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(rd->totlen))))) + return err; return 0; } - raw = jffs2_alloc_raw_node_ref(); - if (!raw) { - jffs2_free_full_dirent(fd); - printk(KERN_NOTICE "jffs2_scan_dirent_node(): allocation of node reference failed\n"); - return -ENOMEM; - } ic = jffs2_scan_make_ino_cache(c, je32_to_cpu(rd->pino)); if (!ic) { jffs2_free_full_dirent(fd); - jffs2_free_raw_node_ref(raw); return -ENOMEM; } - raw->__totlen = PAD(je32_to_cpu(rd->totlen)); - raw->flash_offset = ofs | REF_PRISTINE; - raw->next_phys = NULL; - raw->next_in_ino = ic->nodes; - ic->nodes = raw; - if (!jeb->first_node) - jeb->first_node = raw; - if (jeb->last_node) - jeb->last_node->next_phys = raw; - jeb->last_node = raw; + fd->raw = jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, PAD(je32_to_cpu(rd->totlen)), ic); - fd->raw = raw; fd->next = NULL; fd->version = je32_to_cpu(rd->version); fd->ino = je32_to_cpu(rd->ino); fd->nhash = full_name_hash(fd->name, rd->nsize); fd->type = rd->type; - USED_SPACE(PAD(je32_to_cpu(rd->totlen))); jffs2_add_fd_to_list(c, fd, &ic->scan_dents); if (jffs2_sum_active()) { diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c new file mode 100644 index 000000000000..52a9894a6364 --- /dev/null +++ b/fs/jffs2/security.c @@ -0,0 +1,82 @@ +/* + * JFFS2 -- Journalling Flash File System, Version 2. + * + * Copyright (C) 2006 NEC Corporation + * + * Created by KaiGai Kohei <kaigai@ak.jp.nec.com> + * + * For licensing information, see the file 'LICENCE' in this directory. + * + */ +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/time.h> +#include <linux/pagemap.h> +#include <linux/highmem.h> +#include <linux/crc32.h> +#include <linux/jffs2.h> +#include <linux/xattr.h> +#include <linux/mtd/mtd.h> +#include <linux/security.h> +#include "nodelist.h" + +/* ---- Initial Security Label Attachment -------------- */ +int jffs2_init_security(struct inode *inode, struct inode *dir) +{ + int rc; + size_t len; + void *value; + char *name; + + rc = security_inode_init_security(inode, dir, &name, &value, &len); + if (rc) { + if (rc == -EOPNOTSUPP) + return 0; + return rc; + } + rc = do_jffs2_setxattr(inode, JFFS2_XPREFIX_SECURITY, name, value, len, 0); + + kfree(name); + kfree(value); + return rc; +} + +/* ---- XATTR Handler for "security.*" ----------------- */ +static int jffs2_security_getxattr(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + if (!strcmp(name, "")) + return -EINVAL; + + return do_jffs2_getxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size); +} + +static int jffs2_security_setxattr(struct inode *inode, const char *name, const void *buffer, + size_t size, int flags) +{ + if (!strcmp(name, "")) + return -EINVAL; + + return do_jffs2_setxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size, flags); +} + +static size_t jffs2_security_listxattr(struct inode *inode, char *list, size_t list_size, + const char *name, size_t name_len) +{ + size_t retlen = XATTR_SECURITY_PREFIX_LEN + name_len + 1; + + if (list && retlen <= list_size) { + strcpy(list, XATTR_SECURITY_PREFIX); + strcpy(list + XATTR_SECURITY_PREFIX_LEN, name); + } + + return retlen; +} + +struct xattr_handler jffs2_security_xattr_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .list = jffs2_security_listxattr, + .set = jffs2_security_setxattr, + .get = jffs2_security_getxattr +}; diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c index fb9cec61fcf2..0b02fc79e4d1 100644 --- a/fs/jffs2/summary.c +++ b/fs/jffs2/summary.c @@ -5,6 +5,7 @@ * Zoltan Sogor <weth@inf.u-szeged.hu>, * Patrik Kluba <pajko@halom.u-szeged.hu>, * University of Szeged, Hungary + * 2005 KaiGai Kohei <kaigai@ak.jp.nec.com> * * For licensing information, see the file 'LICENCE' in this directory. * @@ -81,6 +82,19 @@ static int jffs2_sum_add_mem(struct jffs2_summary *s, union jffs2_sum_mem *item) dbg_summary("dirent (%u) added to summary\n", je32_to_cpu(item->d.ino)); break; +#ifdef CONFIG_JFFS2_FS_XATTR + case JFFS2_NODETYPE_XATTR: + s->sum_size += JFFS2_SUMMARY_XATTR_SIZE; + s->sum_num++; + dbg_summary("xattr (xid=%u, version=%u) added to summary\n", + je32_to_cpu(item->x.xid), je32_to_cpu(item->x.version)); + break; + case JFFS2_NODETYPE_XREF: + s->sum_size += JFFS2_SUMMARY_XREF_SIZE; + s->sum_num++; + dbg_summary("xref added to summary\n"); + break; +#endif default: JFFS2_WARNING("UNKNOWN node type %u\n", je16_to_cpu(item->u.nodetype)); @@ -141,6 +155,40 @@ int jffs2_sum_add_dirent_mem(struct jffs2_summary *s, struct jffs2_raw_dirent *r return jffs2_sum_add_mem(s, (union jffs2_sum_mem *)temp); } +#ifdef CONFIG_JFFS2_FS_XATTR +int jffs2_sum_add_xattr_mem(struct jffs2_summary *s, struct jffs2_raw_xattr *rx, uint32_t ofs) +{ + struct jffs2_sum_xattr_mem *temp; + + temp = kmalloc(sizeof(struct jffs2_sum_xattr_mem), GFP_KERNEL); + if (!temp) + return -ENOMEM; + + temp->nodetype = rx->nodetype; + temp->xid = rx->xid; + temp->version = rx->version; + temp->offset = cpu_to_je32(ofs); + temp->totlen = rx->totlen; + temp->next = NULL; + + return jffs2_sum_add_mem(s, (union jffs2_sum_mem *)temp); +} + +int jffs2_sum_add_xref_mem(struct jffs2_summary *s, struct jffs2_raw_xref *rr, uint32_t ofs) +{ + struct jffs2_sum_xref_mem *temp; + + temp = kmalloc(sizeof(struct jffs2_sum_xref_mem), GFP_KERNEL); + if (!temp) + return -ENOMEM; + + temp->nodetype = rr->nodetype; + temp->offset = cpu_to_je32(ofs); + temp->next = NULL; + + return jffs2_sum_add_mem(s, (union jffs2_sum_mem *)temp); +} +#endif /* Cleanup every collected summary information */ static void jffs2_sum_clean_collected(struct jffs2_summary *s) @@ -259,7 +307,40 @@ int jffs2_sum_add_kvec(struct jffs2_sb_info *c, const struct kvec *invecs, return jffs2_sum_add_mem(c->summary, (union jffs2_sum_mem *)temp); } +#ifdef CONFIG_JFFS2_FS_XATTR + case JFFS2_NODETYPE_XATTR: { + struct jffs2_sum_xattr_mem *temp; + if (je32_to_cpu(node->x.version) == 0xffffffff) + return 0; + temp = kmalloc(sizeof(struct jffs2_sum_xattr_mem), GFP_KERNEL); + if (!temp) + goto no_mem; + + temp->nodetype = node->x.nodetype; + temp->xid = node->x.xid; + temp->version = node->x.version; + temp->totlen = node->x.totlen; + temp->offset = cpu_to_je32(ofs); + temp->next = NULL; + + return jffs2_sum_add_mem(c->summary, (union jffs2_sum_mem *)temp); + } + case JFFS2_NODETYPE_XREF: { + struct jffs2_sum_xref_mem *temp; + + if (je32_to_cpu(node->r.ino) == 0xffffffff + && je32_to_cpu(node->r.xid) == 0xffffffff) + return 0; + temp = kmalloc(sizeof(struct jffs2_sum_xref_mem), GFP_KERNEL); + if (!temp) + goto no_mem; + temp->nodetype = node->r.nodetype; + temp->offset = cpu_to_je32(ofs); + temp->next = NULL; + return jffs2_sum_add_mem(c->summary, (union jffs2_sum_mem *)temp); + } +#endif case JFFS2_NODETYPE_PADDING: dbg_summary("node PADDING\n"); c->summary->sum_padded += je32_to_cpu(node->u.totlen); @@ -288,23 +369,41 @@ no_mem: return -ENOMEM; } +static struct jffs2_raw_node_ref *sum_link_node_ref(struct jffs2_sb_info *c, + struct jffs2_eraseblock *jeb, + uint32_t ofs, uint32_t len, + struct jffs2_inode_cache *ic) +{ + /* If there was a gap, mark it dirty */ + if ((ofs & ~3) > c->sector_size - jeb->free_size) { + /* Ew. Summary doesn't actually tell us explicitly about dirty space */ + jffs2_scan_dirty_space(c, jeb, (ofs & ~3) - (c->sector_size - jeb->free_size)); + } + + return jffs2_link_node_ref(c, jeb, jeb->offset + ofs, len, ic); +} /* Process the stored summary information - helper function for jffs2_sum_scan_sumnode() */ static int jffs2_sum_process_sum_data(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, struct jffs2_raw_summary *summary, uint32_t *pseudo_random) { - struct jffs2_raw_node_ref *raw; struct jffs2_inode_cache *ic; struct jffs2_full_dirent *fd; void *sp; int i, ino; + int err; sp = summary->sum; for (i=0; i<je32_to_cpu(summary->sum_num); i++) { dbg_summary("processing summary index %d\n", i); + /* Make sure there's a spare ref for dirty space */ + err = jffs2_prealloc_raw_node_refs(c, jeb, 2); + if (err) + return err; + switch (je16_to_cpu(((struct jffs2_sum_unknown_flash *)sp)->nodetype)) { case JFFS2_NODETYPE_INODE: { struct jffs2_sum_inode_flash *spi; @@ -312,38 +411,20 @@ static int jffs2_sum_process_sum_data(struct jffs2_sb_info *c, struct jffs2_eras ino = je32_to_cpu(spi->inode); - dbg_summary("Inode at 0x%08x\n", - jeb->offset + je32_to_cpu(spi->offset)); - - raw = jffs2_alloc_raw_node_ref(); - if (!raw) { - JFFS2_NOTICE("allocation of node reference failed\n"); - kfree(summary); - return -ENOMEM; - } + dbg_summary("Inode at 0x%08x-0x%08x\n", + jeb->offset + je32_to_cpu(spi->offset), + jeb->offset + je32_to_cpu(spi->offset) + je32_to_cpu(spi->totlen)); ic = jffs2_scan_make_ino_cache(c, ino); if (!ic) { JFFS2_NOTICE("scan_make_ino_cache failed\n"); - jffs2_free_raw_node_ref(raw); - kfree(summary); return -ENOMEM; } - raw->flash_offset = (jeb->offset + je32_to_cpu(spi->offset)) | REF_UNCHECKED; - raw->__totlen = PAD(je32_to_cpu(spi->totlen)); - raw->next_phys = NULL; - raw->next_in_ino = ic->nodes; - - ic->nodes = raw; - if (!jeb->first_node) - jeb->first_node = raw; - if (jeb->last_node) - jeb->last_node->next_phys = raw; - jeb->last_node = raw; - *pseudo_random += je32_to_cpu(spi->version); + sum_link_node_ref(c, jeb, je32_to_cpu(spi->offset) | REF_UNCHECKED, + PAD(je32_to_cpu(spi->totlen)), ic); - UNCHECKED_SPACE(PAD(je32_to_cpu(spi->totlen))); + *pseudo_random += je32_to_cpu(spi->version); sp += JFFS2_SUMMARY_INODE_SIZE; @@ -354,52 +435,33 @@ static int jffs2_sum_process_sum_data(struct jffs2_sb_info *c, struct jffs2_eras struct jffs2_sum_dirent_flash *spd; spd = sp; - dbg_summary("Dirent at 0x%08x\n", - jeb->offset + je32_to_cpu(spd->offset)); + dbg_summary("Dirent at 0x%08x-0x%08x\n", + jeb->offset + je32_to_cpu(spd->offset), + jeb->offset + je32_to_cpu(spd->offset) + je32_to_cpu(spd->totlen)); + fd = jffs2_alloc_full_dirent(spd->nsize+1); - if (!fd) { - kfree(summary); + if (!fd) return -ENOMEM; - } memcpy(&fd->name, spd->name, spd->nsize); fd->name[spd->nsize] = 0; - raw = jffs2_alloc_raw_node_ref(); - if (!raw) { - jffs2_free_full_dirent(fd); - JFFS2_NOTICE("allocation of node reference failed\n"); - kfree(summary); - return -ENOMEM; - } - ic = jffs2_scan_make_ino_cache(c, je32_to_cpu(spd->pino)); if (!ic) { jffs2_free_full_dirent(fd); - jffs2_free_raw_node_ref(raw); - kfree(summary); return -ENOMEM; } - raw->__totlen = PAD(je32_to_cpu(spd->totlen)); - raw->flash_offset = (jeb->offset + je32_to_cpu(spd->offset)) | REF_PRISTINE; - raw->next_phys = NULL; - raw->next_in_ino = ic->nodes; - ic->nodes = raw; - if (!jeb->first_node) - jeb->first_node = raw; - if (jeb->last_node) - jeb->last_node->next_phys = raw; - jeb->last_node = raw; - - fd->raw = raw; + fd->raw = sum_link_node_ref(c, jeb, je32_to_cpu(spd->offset) | REF_UNCHECKED, + PAD(je32_to_cpu(spd->totlen)), ic); + fd->next = NULL; fd->version = je32_to_cpu(spd->version); fd->ino = je32_to_cpu(spd->ino); fd->nhash = full_name_hash(fd->name, spd->nsize); fd->type = spd->type; - USED_SPACE(PAD(je32_to_cpu(spd->totlen))); + jffs2_add_fd_to_list(c, fd, &ic->scan_dents); *pseudo_random += je32_to_cpu(spd->version); @@ -408,48 +470,105 @@ static int jffs2_sum_process_sum_data(struct jffs2_sb_info *c, struct jffs2_eras break; } +#ifdef CONFIG_JFFS2_FS_XATTR + case JFFS2_NODETYPE_XATTR: { + struct jffs2_xattr_datum *xd; + struct jffs2_sum_xattr_flash *spx; + + spx = (struct jffs2_sum_xattr_flash *)sp; + dbg_summary("xattr at %#08x-%#08x (xid=%u, version=%u)\n", + jeb->offset + je32_to_cpu(spx->offset), + jeb->offset + je32_to_cpu(spx->offset) + je32_to_cpu(spx->totlen), + je32_to_cpu(spx->xid), je32_to_cpu(spx->version)); + + xd = jffs2_setup_xattr_datum(c, je32_to_cpu(spx->xid), + je32_to_cpu(spx->version)); + if (IS_ERR(xd)) { + if (PTR_ERR(xd) == -EEXIST) { + /* a newer version of xd exists */ + if ((err = jffs2_scan_dirty_space(c, jeb, je32_to_cpu(spx->totlen)))) + return err; + sp += JFFS2_SUMMARY_XATTR_SIZE; + break; + } + JFFS2_NOTICE("allocation of xattr_datum failed\n"); + return PTR_ERR(xd); + } + + xd->node = sum_link_node_ref(c, jeb, je32_to_cpu(spx->offset) | REF_UNCHECKED, + PAD(je32_to_cpu(spx->totlen)), NULL); + /* FIXME */ xd->node->next_in_ino = (void *)xd; + + *pseudo_random += je32_to_cpu(spx->xid); + sp += JFFS2_SUMMARY_XATTR_SIZE; + + break; + } + case JFFS2_NODETYPE_XREF: { + struct jffs2_xattr_ref *ref; + struct jffs2_sum_xref_flash *spr; + + spr = (struct jffs2_sum_xref_flash *)sp; + dbg_summary("xref at %#08x-%#08x\n", + jeb->offset + je32_to_cpu(spr->offset), + jeb->offset + je32_to_cpu(spr->offset) + + (uint32_t)PAD(sizeof(struct jffs2_raw_xref))); + + ref = jffs2_alloc_xattr_ref(); + if (!ref) { + JFFS2_NOTICE("allocation of xattr_datum failed\n"); + return -ENOMEM; + } + ref->ino = 0xfffffffe; + ref->xid = 0xfffffffd; + ref->next = c->xref_temp; + c->xref_temp = ref; + ref->node = sum_link_node_ref(c, jeb, je32_to_cpu(spr->offset) | REF_UNCHECKED, + PAD(sizeof(struct jffs2_raw_xref)), NULL); + /* FIXME */ ref->node->next_in_ino = (void *)ref; + + *pseudo_random += ref->node->flash_offset; + sp += JFFS2_SUMMARY_XREF_SIZE; + + break; + } +#endif default : { - JFFS2_WARNING("Unsupported node type found in summary! Exiting..."); - kfree(summary); - return -EIO; + uint16_t nodetype = je16_to_cpu(((struct jffs2_sum_unknown_flash *)sp)->nodetype); + JFFS2_WARNING("Unsupported node type %x found in summary! Exiting...\n", nodetype); + if ((nodetype & JFFS2_COMPAT_MASK) == JFFS2_FEATURE_INCOMPAT) + return -EIO; + + /* For compatible node types, just fall back to the full scan */ + c->wasted_size -= jeb->wasted_size; + c->free_size += c->sector_size - jeb->free_size; + c->used_size -= jeb->used_size; + c->dirty_size -= jeb->dirty_size; + jeb->wasted_size = jeb->used_size = jeb->dirty_size = 0; + jeb->free_size = c->sector_size; + + jffs2_free_jeb_node_refs(c, jeb); + return -ENOTRECOVERABLE; } } } - - kfree(summary); return 0; } /* Process the summary node - called from jffs2_scan_eraseblock() */ - int jffs2_sum_scan_sumnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, - uint32_t ofs, uint32_t *pseudo_random) + struct jffs2_raw_summary *summary, uint32_t sumsize, + uint32_t *pseudo_random) { struct jffs2_unknown_node crcnode; - struct jffs2_raw_node_ref *cache_ref; - struct jffs2_raw_summary *summary; - int ret, sumsize; + int ret, ofs; uint32_t crc; - sumsize = c->sector_size - ofs; - ofs += jeb->offset; + ofs = c->sector_size - sumsize; dbg_summary("summary found for 0x%08x at 0x%08x (0x%x bytes)\n", - jeb->offset, ofs, sumsize); - - summary = kmalloc(sumsize, GFP_KERNEL); - - if (!summary) { - return -ENOMEM; - } - - ret = jffs2_fill_scan_buf(c, (unsigned char *)summary, ofs, sumsize); - - if (ret) { - kfree(summary); - return ret; - } + jeb->offset, jeb->offset + ofs, sumsize); /* OK, now check for node validity and CRC */ crcnode.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); @@ -486,66 +605,49 @@ int jffs2_sum_scan_sumnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb dbg_summary("Summary : CLEANMARKER node \n"); + ret = jffs2_prealloc_raw_node_refs(c, jeb, 1); + if (ret) + return ret; + if (je32_to_cpu(summary->cln_mkr) != c->cleanmarker_size) { dbg_summary("CLEANMARKER node has totlen 0x%x != normal 0x%x\n", je32_to_cpu(summary->cln_mkr), c->cleanmarker_size); - UNCHECKED_SPACE(PAD(je32_to_cpu(summary->cln_mkr))); + if ((ret = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(summary->cln_mkr))))) + return ret; } else if (jeb->first_node) { dbg_summary("CLEANMARKER node not first node in block " "(0x%08x)\n", jeb->offset); - UNCHECKED_SPACE(PAD(je32_to_cpu(summary->cln_mkr))); + if ((ret = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(summary->cln_mkr))))) + return ret; } else { - struct jffs2_raw_node_ref *marker_ref = jffs2_alloc_raw_node_ref(); - - if (!marker_ref) { - JFFS2_NOTICE("Failed to allocate node ref for clean marker\n"); - kfree(summary); - return -ENOMEM; - } - - marker_ref->next_in_ino = NULL; - marker_ref->next_phys = NULL; - marker_ref->flash_offset = jeb->offset | REF_NORMAL; - marker_ref->__totlen = je32_to_cpu(summary->cln_mkr); - jeb->first_node = jeb->last_node = marker_ref; - - USED_SPACE( PAD(je32_to_cpu(summary->cln_mkr)) ); + jffs2_link_node_ref(c, jeb, jeb->offset | REF_NORMAL, + je32_to_cpu(summary->cln_mkr), NULL); } } - if (je32_to_cpu(summary->padded)) { - DIRTY_SPACE(je32_to_cpu(summary->padded)); - } - ret = jffs2_sum_process_sum_data(c, jeb, summary, pseudo_random); + /* -ENOTRECOVERABLE isn't a fatal error -- it means we should do a full + scan of this eraseblock. So return zero */ + if (ret == -ENOTRECOVERABLE) + return 0; if (ret) - return ret; + return ret; /* real error */ /* for PARANOIA_CHECK */ - cache_ref = jffs2_alloc_raw_node_ref(); - - if (!cache_ref) { - JFFS2_NOTICE("Failed to allocate node ref for cache\n"); - return -ENOMEM; - } - - cache_ref->next_in_ino = NULL; - cache_ref->next_phys = NULL; - cache_ref->flash_offset = ofs | REF_NORMAL; - cache_ref->__totlen = sumsize; - - if (!jeb->first_node) - jeb->first_node = cache_ref; - if (jeb->last_node) - jeb->last_node->next_phys = cache_ref; - jeb->last_node = cache_ref; + ret = jffs2_prealloc_raw_node_refs(c, jeb, 2); + if (ret) + return ret; - USED_SPACE(sumsize); + sum_link_node_ref(c, jeb, ofs | REF_NORMAL, sumsize, NULL); - jeb->wasted_size += jeb->free_size; - c->wasted_size += jeb->free_size; - c->free_size -= jeb->free_size; - jeb->free_size = 0; + if (unlikely(jeb->free_size)) { + JFFS2_WARNING("Free size 0x%x bytes in eraseblock @0x%08x with summary?\n", + jeb->free_size, jeb->offset); + jeb->wasted_size += jeb->free_size; + c->wasted_size += jeb->free_size; + c->free_size -= jeb->free_size; + jeb->free_size = 0; + } return jffs2_scan_classify_jeb(c, jeb); @@ -564,6 +666,7 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock union jffs2_sum_mem *temp; struct jffs2_sum_marker *sm; struct kvec vecs[2]; + uint32_t sum_ofs; void *wpage; int ret; size_t retlen; @@ -581,16 +684,17 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock wpage = c->summary->sum_buf; while (c->summary->sum_num) { + temp = c->summary->sum_list_head; - switch (je16_to_cpu(c->summary->sum_list_head->u.nodetype)) { + switch (je16_to_cpu(temp->u.nodetype)) { case JFFS2_NODETYPE_INODE: { struct jffs2_sum_inode_flash *sino_ptr = wpage; - sino_ptr->nodetype = c->summary->sum_list_head->i.nodetype; - sino_ptr->inode = c->summary->sum_list_head->i.inode; - sino_ptr->version = c->summary->sum_list_head->i.version; - sino_ptr->offset = c->summary->sum_list_head->i.offset; - sino_ptr->totlen = c->summary->sum_list_head->i.totlen; + sino_ptr->nodetype = temp->i.nodetype; + sino_ptr->inode = temp->i.inode; + sino_ptr->version = temp->i.version; + sino_ptr->offset = temp->i.offset; + sino_ptr->totlen = temp->i.totlen; wpage += JFFS2_SUMMARY_INODE_SIZE; @@ -600,30 +704,60 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock case JFFS2_NODETYPE_DIRENT: { struct jffs2_sum_dirent_flash *sdrnt_ptr = wpage; - sdrnt_ptr->nodetype = c->summary->sum_list_head->d.nodetype; - sdrnt_ptr->totlen = c->summary->sum_list_head->d.totlen; - sdrnt_ptr->offset = c->summary->sum_list_head->d.offset; - sdrnt_ptr->pino = c->summary->sum_list_head->d.pino; - sdrnt_ptr->version = c->summary->sum_list_head->d.version; - sdrnt_ptr->ino = c->summary->sum_list_head->d.ino; - sdrnt_ptr->nsize = c->summary->sum_list_head->d.nsize; - sdrnt_ptr->type = c->summary->sum_list_head->d.type; + sdrnt_ptr->nodetype = temp->d.nodetype; + sdrnt_ptr->totlen = temp->d.totlen; + sdrnt_ptr->offset = temp->d.offset; + sdrnt_ptr->pino = temp->d.pino; + sdrnt_ptr->version = temp->d.version; + sdrnt_ptr->ino = temp->d.ino; + sdrnt_ptr->nsize = temp->d.nsize; + sdrnt_ptr->type = temp->d.type; - memcpy(sdrnt_ptr->name, c->summary->sum_list_head->d.name, - c->summary->sum_list_head->d.nsize); + memcpy(sdrnt_ptr->name, temp->d.name, + temp->d.nsize); - wpage += JFFS2_SUMMARY_DIRENT_SIZE(c->summary->sum_list_head->d.nsize); + wpage += JFFS2_SUMMARY_DIRENT_SIZE(temp->d.nsize); break; } +#ifdef CONFIG_JFFS2_FS_XATTR + case JFFS2_NODETYPE_XATTR: { + struct jffs2_sum_xattr_flash *sxattr_ptr = wpage; + + temp = c->summary->sum_list_head; + sxattr_ptr->nodetype = temp->x.nodetype; + sxattr_ptr->xid = temp->x.xid; + sxattr_ptr->version = temp->x.version; + sxattr_ptr->offset = temp->x.offset; + sxattr_ptr->totlen = temp->x.totlen; + + wpage += JFFS2_SUMMARY_XATTR_SIZE; + break; + } + case JFFS2_NODETYPE_XREF: { + struct jffs2_sum_xref_flash *sxref_ptr = wpage; + temp = c->summary->sum_list_head; + sxref_ptr->nodetype = temp->r.nodetype; + sxref_ptr->offset = temp->r.offset; + + wpage += JFFS2_SUMMARY_XREF_SIZE; + break; + } +#endif default : { - BUG(); /* unknown node in summary information */ + if ((je16_to_cpu(temp->u.nodetype) & JFFS2_COMPAT_MASK) + == JFFS2_FEATURE_RWCOMPAT_COPY) { + dbg_summary("Writing unknown RWCOMPAT_COPY node type %x\n", + je16_to_cpu(temp->u.nodetype)); + jffs2_sum_disable_collecting(c->summary); + } else { + BUG(); /* unknown node in summary information */ + } } } - temp = c->summary->sum_list_head; - c->summary->sum_list_head = c->summary->sum_list_head->u.next; + c->summary->sum_list_head = temp->u.next; kfree(temp); c->summary->sum_num--; @@ -645,25 +779,34 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock vecs[1].iov_base = c->summary->sum_buf; vecs[1].iov_len = datasize; - dbg_summary("JFFS2: writing out data to flash to pos : 0x%08x\n", - jeb->offset + c->sector_size - jeb->free_size); + sum_ofs = jeb->offset + c->sector_size - jeb->free_size; - spin_unlock(&c->erase_completion_lock); - ret = jffs2_flash_writev(c, vecs, 2, jeb->offset + c->sector_size - - jeb->free_size, &retlen, 0); - spin_lock(&c->erase_completion_lock); + dbg_summary("JFFS2: writing out data to flash to pos : 0x%08x\n", + sum_ofs); + ret = jffs2_flash_writev(c, vecs, 2, sum_ofs, &retlen, 0); if (ret || (retlen != infosize)) { - JFFS2_WARNING("Write of %zd bytes at 0x%08x failed. returned %d, retlen %zd\n", - infosize, jeb->offset + c->sector_size - jeb->free_size, ret, retlen); + + JFFS2_WARNING("Write of %u bytes at 0x%08x failed. returned %d, retlen %zd\n", + infosize, sum_ofs, ret, retlen); + + if (retlen) { + /* Waste remaining space */ + spin_lock(&c->erase_completion_lock); + jffs2_link_node_ref(c, jeb, sum_ofs | REF_OBSOLETE, infosize, NULL); + spin_unlock(&c->erase_completion_lock); + } c->summary->sum_size = JFFS2_SUMMARY_NOSUM_SIZE; - WASTED_SPACE(infosize); - return 1; + return 0; } + spin_lock(&c->erase_completion_lock); + jffs2_link_node_ref(c, jeb, sum_ofs | REF_NORMAL, infosize, NULL); + spin_unlock(&c->erase_completion_lock); + return 0; } @@ -671,13 +814,16 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock int jffs2_sum_write_sumnode(struct jffs2_sb_info *c) { - struct jffs2_raw_node_ref *summary_ref; - int datasize, infosize, padsize, ret; + int datasize, infosize, padsize; struct jffs2_eraseblock *jeb; + int ret; dbg_summary("called\n"); + spin_unlock(&c->erase_completion_lock); + jeb = c->nextblock; + jffs2_prealloc_raw_node_refs(c, jeb, 1); if (!c->summary->sum_num || !c->summary->sum_list_head) { JFFS2_WARNING("Empty summary info!!!\n"); @@ -696,35 +842,11 @@ int jffs2_sum_write_sumnode(struct jffs2_sb_info *c) jffs2_sum_disable_collecting(c->summary); JFFS2_WARNING("Not enough space for summary, padsize = %d\n", padsize); + spin_lock(&c->erase_completion_lock); return 0; } ret = jffs2_sum_write_data(c, jeb, infosize, datasize, padsize); - if (ret) - return 0; /* can't write out summary, block is marked as NOSUM_SIZE */ - - /* for ACCT_PARANOIA_CHECK */ - spin_unlock(&c->erase_completion_lock); - summary_ref = jffs2_alloc_raw_node_ref(); spin_lock(&c->erase_completion_lock); - - if (!summary_ref) { - JFFS2_NOTICE("Failed to allocate node ref for summary\n"); - return -ENOMEM; - } - - summary_ref->next_in_ino = NULL; - summary_ref->next_phys = NULL; - summary_ref->flash_offset = (jeb->offset + c->sector_size - jeb->free_size) | REF_NORMAL; - summary_ref->__totlen = infosize; - - if (!jeb->first_node) - jeb->first_node = summary_ref; - if (jeb->last_node) - jeb->last_node->next_phys = summary_ref; - jeb->last_node = summary_ref; - - USED_SPACE(infosize); - - return 0; + return ret; } diff --git a/fs/jffs2/summary.h b/fs/jffs2/summary.h index b7a678be1709..6bf1f6aa4552 100644 --- a/fs/jffs2/summary.h +++ b/fs/jffs2/summary.h @@ -18,23 +18,6 @@ #include <linux/uio.h> #include <linux/jffs2.h> -#define DIRTY_SPACE(x) do { typeof(x) _x = (x); \ - c->free_size -= _x; c->dirty_size += _x; \ - jeb->free_size -= _x ; jeb->dirty_size += _x; \ - }while(0) -#define USED_SPACE(x) do { typeof(x) _x = (x); \ - c->free_size -= _x; c->used_size += _x; \ - jeb->free_size -= _x ; jeb->used_size += _x; \ - }while(0) -#define WASTED_SPACE(x) do { typeof(x) _x = (x); \ - c->free_size -= _x; c->wasted_size += _x; \ - jeb->free_size -= _x ; jeb->wasted_size += _x; \ - }while(0) -#define UNCHECKED_SPACE(x) do { typeof(x) _x = (x); \ - c->free_size -= _x; c->unchecked_size += _x; \ - jeb->free_size -= _x ; jeb->unchecked_size += _x; \ - }while(0) - #define BLK_STATE_ALLFF 0 #define BLK_STATE_CLEAN 1 #define BLK_STATE_PARTDIRTY 2 @@ -45,6 +28,8 @@ #define JFFS2_SUMMARY_NOSUM_SIZE 0xffffffff #define JFFS2_SUMMARY_INODE_SIZE (sizeof(struct jffs2_sum_inode_flash)) #define JFFS2_SUMMARY_DIRENT_SIZE(x) (sizeof(struct jffs2_sum_dirent_flash) + (x)) +#define JFFS2_SUMMARY_XATTR_SIZE (sizeof(struct jffs2_sum_xattr_flash)) +#define JFFS2_SUMMARY_XREF_SIZE (sizeof(struct jffs2_sum_xref_flash)) /* Summary structures used on flash */ @@ -75,11 +60,28 @@ struct jffs2_sum_dirent_flash uint8_t name[0]; /* dirent name */ } __attribute__((packed)); +struct jffs2_sum_xattr_flash +{ + jint16_t nodetype; /* == JFFS2_NODETYPE_XATR */ + jint32_t xid; /* xattr identifier */ + jint32_t version; /* version number */ + jint32_t offset; /* offset on jeb */ + jint32_t totlen; /* node length */ +} __attribute__((packed)); + +struct jffs2_sum_xref_flash +{ + jint16_t nodetype; /* == JFFS2_NODETYPE_XREF */ + jint32_t offset; /* offset on jeb */ +} __attribute__((packed)); + union jffs2_sum_flash { struct jffs2_sum_unknown_flash u; struct jffs2_sum_inode_flash i; struct jffs2_sum_dirent_flash d; + struct jffs2_sum_xattr_flash x; + struct jffs2_sum_xref_flash r; }; /* Summary structures used in the memory */ @@ -114,11 +116,30 @@ struct jffs2_sum_dirent_mem uint8_t name[0]; /* dirent name */ } __attribute__((packed)); +struct jffs2_sum_xattr_mem +{ + union jffs2_sum_mem *next; + jint16_t nodetype; + jint32_t xid; + jint32_t version; + jint32_t offset; + jint32_t totlen; +} __attribute__((packed)); + +struct jffs2_sum_xref_mem +{ + union jffs2_sum_mem *next; + jint16_t nodetype; + jint32_t offset; +} __attribute__((packed)); + union jffs2_sum_mem { struct jffs2_sum_unknown_mem u; struct jffs2_sum_inode_mem i; struct jffs2_sum_dirent_mem d; + struct jffs2_sum_xattr_mem x; + struct jffs2_sum_xref_mem r; }; /* Summary related information stored in superblock */ @@ -159,8 +180,11 @@ int jffs2_sum_write_sumnode(struct jffs2_sb_info *c); int jffs2_sum_add_padding_mem(struct jffs2_summary *s, uint32_t size); int jffs2_sum_add_inode_mem(struct jffs2_summary *s, struct jffs2_raw_inode *ri, uint32_t ofs); int jffs2_sum_add_dirent_mem(struct jffs2_summary *s, struct jffs2_raw_dirent *rd, uint32_t ofs); +int jffs2_sum_add_xattr_mem(struct jffs2_summary *s, struct jffs2_raw_xattr *rx, uint32_t ofs); +int jffs2_sum_add_xref_mem(struct jffs2_summary *s, struct jffs2_raw_xref *rr, uint32_t ofs); int jffs2_sum_scan_sumnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, - uint32_t ofs, uint32_t *pseudo_random); + struct jffs2_raw_summary *summary, uint32_t sumlen, + uint32_t *pseudo_random); #else /* SUMMARY DISABLED */ @@ -176,7 +200,9 @@ int jffs2_sum_scan_sumnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb #define jffs2_sum_add_padding_mem(a,b) #define jffs2_sum_add_inode_mem(a,b,c) #define jffs2_sum_add_dirent_mem(a,b,c) -#define jffs2_sum_scan_sumnode(a,b,c,d) (0) +#define jffs2_sum_add_xattr_mem(a,b,c) +#define jffs2_sum_add_xref_mem(a,b,c) +#define jffs2_sum_scan_sumnode(a,b,c,d,e) (0) #endif /* CONFIG_JFFS2_SUMMARY */ diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index ffd8e84b22cc..9d0521451f59 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -151,7 +151,10 @@ static struct super_block *jffs2_get_sb_mtd(struct file_system_type *fs_type, sb->s_op = &jffs2_super_operations; sb->s_flags = flags | MS_NOATIME; - + sb->s_xattr = jffs2_xattr_handlers; +#ifdef CONFIG_JFFS2_FS_POSIX_ACL + sb->s_flags |= MS_POSIXACL; +#endif ret = jffs2_do_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (ret) { @@ -293,6 +296,7 @@ static void jffs2_put_super (struct super_block *sb) kfree(c->blocks); jffs2_flash_cleanup(c); kfree(c->inocache_list); + jffs2_clear_xattr_subsystem(c); if (c->mtd->sync) c->mtd->sync(c->mtd); @@ -320,6 +324,18 @@ static int __init init_jffs2_fs(void) { int ret; + /* Paranoia checks for on-medium structures. If we ask GCC + to pack them with __attribute__((packed)) then it _also_ + assumes that they're not aligned -- so it emits crappy + code on some architectures. Ideally we want an attribute + which means just 'no padding', without the alignment + thing. But GCC doesn't have that -- we have to just + hope the structs are the right sizes, instead. */ + BUG_ON(sizeof(struct jffs2_unknown_node) != 12); + BUG_ON(sizeof(struct jffs2_raw_dirent) != 40); + BUG_ON(sizeof(struct jffs2_raw_inode) != 68); + BUG_ON(sizeof(struct jffs2_raw_summary) != 32); + printk(KERN_INFO "JFFS2 version 2.2." #ifdef CONFIG_JFFS2_FS_WRITEBUFFER " (NAND)" @@ -327,7 +343,7 @@ static int __init init_jffs2_fs(void) #ifdef CONFIG_JFFS2_SUMMARY " (SUMMARY) " #endif - " (C) 2001-2003 Red Hat, Inc.\n"); + " (C) 2001-2006 Red Hat, Inc.\n"); jffs2_inode_cachep = kmem_cache_create("jffs2_i", sizeof(struct jffs2_inode_info), diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c index d55754fe8925..fc211b6e9b03 100644 --- a/fs/jffs2/symlink.c +++ b/fs/jffs2/symlink.c @@ -24,7 +24,12 @@ struct inode_operations jffs2_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = jffs2_follow_link, - .setattr = jffs2_setattr + .permission = jffs2_permission, + .setattr = jffs2_setattr, + .setxattr = jffs2_setxattr, + .getxattr = jffs2_getxattr, + .listxattr = jffs2_listxattr, + .removexattr = jffs2_removexattr }; static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd) diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index 4cebf0e57c46..a7f153f79ecb 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -156,69 +156,130 @@ static void jffs2_block_refile(struct jffs2_sb_info *c, struct jffs2_eraseblock jffs2_erase_pending_trigger(c); } - /* Adjust its size counts accordingly */ - c->wasted_size += jeb->free_size; - c->free_size -= jeb->free_size; - jeb->wasted_size += jeb->free_size; - jeb->free_size = 0; + if (!jffs2_prealloc_raw_node_refs(c, jeb, 1)) { + uint32_t oldfree = jeb->free_size; + + jffs2_link_node_ref(c, jeb, + (jeb->offset+c->sector_size-oldfree) | REF_OBSOLETE, + oldfree, NULL); + /* convert to wasted */ + c->wasted_size += oldfree; + jeb->wasted_size += oldfree; + c->dirty_size -= oldfree; + jeb->dirty_size -= oldfree; + } jffs2_dbg_dump_block_lists_nolock(c); jffs2_dbg_acct_sanity_check_nolock(c,jeb); jffs2_dbg_acct_paranoia_check_nolock(c, jeb); } +static struct jffs2_raw_node_ref **jffs2_incore_replace_raw(struct jffs2_sb_info *c, + struct jffs2_inode_info *f, + struct jffs2_raw_node_ref *raw, + union jffs2_node_union *node) +{ + struct jffs2_node_frag *frag; + struct jffs2_full_dirent *fd; + + dbg_noderef("incore_replace_raw: node at %p is {%04x,%04x}\n", + node, je16_to_cpu(node->u.magic), je16_to_cpu(node->u.nodetype)); + + BUG_ON(je16_to_cpu(node->u.magic) != 0x1985 && + je16_to_cpu(node->u.magic) != 0); + + switch (je16_to_cpu(node->u.nodetype)) { + case JFFS2_NODETYPE_INODE: + if (f->metadata && f->metadata->raw == raw) { + dbg_noderef("Will replace ->raw in f->metadata at %p\n", f->metadata); + return &f->metadata->raw; + } + frag = jffs2_lookup_node_frag(&f->fragtree, je32_to_cpu(node->i.offset)); + BUG_ON(!frag); + /* Find a frag which refers to the full_dnode we want to modify */ + while (!frag->node || frag->node->raw != raw) { + frag = frag_next(frag); + BUG_ON(!frag); + } + dbg_noderef("Will replace ->raw in full_dnode at %p\n", frag->node); + return &frag->node->raw; + + case JFFS2_NODETYPE_DIRENT: + for (fd = f->dents; fd; fd = fd->next) { + if (fd->raw == raw) { + dbg_noderef("Will replace ->raw in full_dirent at %p\n", fd); + return &fd->raw; + } + } + BUG(); + + default: + dbg_noderef("Don't care about replacing raw for nodetype %x\n", + je16_to_cpu(node->u.nodetype)); + break; + } + return NULL; +} + /* Recover from failure to write wbuf. Recover the nodes up to the * wbuf, not the one which we were starting to try to write. */ static void jffs2_wbuf_recover(struct jffs2_sb_info *c) { struct jffs2_eraseblock *jeb, *new_jeb; - struct jffs2_raw_node_ref **first_raw, **raw; + struct jffs2_raw_node_ref *raw, *next, *first_raw = NULL; size_t retlen; int ret; + int nr_refile = 0; unsigned char *buf; uint32_t start, end, ofs, len; - spin_lock(&c->erase_completion_lock); - jeb = &c->blocks[c->wbuf_ofs / c->sector_size]; + spin_lock(&c->erase_completion_lock); jffs2_block_refile(c, jeb, REFILE_NOTEMPTY); + spin_unlock(&c->erase_completion_lock); + + BUG_ON(!ref_obsolete(jeb->last_node)); /* Find the first node to be recovered, by skipping over every node which ends before the wbuf starts, or which is obsolete. */ - first_raw = &jeb->first_node; - while (*first_raw && - (ref_obsolete(*first_raw) || - (ref_offset(*first_raw)+ref_totlen(c, jeb, *first_raw)) < c->wbuf_ofs)) { - D1(printk(KERN_DEBUG "Skipping node at 0x%08x(%d)-0x%08x which is either before 0x%08x or obsolete\n", - ref_offset(*first_raw), ref_flags(*first_raw), - (ref_offset(*first_raw) + ref_totlen(c, jeb, *first_raw)), - c->wbuf_ofs)); - first_raw = &(*first_raw)->next_phys; + for (next = raw = jeb->first_node; next; raw = next) { + next = ref_next(raw); + + if (ref_obsolete(raw) || + (next && ref_offset(next) <= c->wbuf_ofs)) { + dbg_noderef("Skipping node at 0x%08x(%d)-0x%08x which is either before 0x%08x or obsolete\n", + ref_offset(raw), ref_flags(raw), + (ref_offset(raw) + ref_totlen(c, jeb, raw)), + c->wbuf_ofs); + continue; + } + dbg_noderef("First node to be recovered is at 0x%08x(%d)-0x%08x\n", + ref_offset(raw), ref_flags(raw), + (ref_offset(raw) + ref_totlen(c, jeb, raw))); + + first_raw = raw; + break; } - if (!*first_raw) { + if (!first_raw) { /* All nodes were obsolete. Nothing to recover. */ D1(printk(KERN_DEBUG "No non-obsolete nodes to be recovered. Just filing block bad\n")); - spin_unlock(&c->erase_completion_lock); + c->wbuf_len = 0; return; } - start = ref_offset(*first_raw); - end = ref_offset(*first_raw) + ref_totlen(c, jeb, *first_raw); - - /* Find the last node to be recovered */ - raw = first_raw; - while ((*raw)) { - if (!ref_obsolete(*raw)) - end = ref_offset(*raw) + ref_totlen(c, jeb, *raw); + start = ref_offset(first_raw); + end = ref_offset(jeb->last_node); + nr_refile = 1; - raw = &(*raw)->next_phys; - } - spin_unlock(&c->erase_completion_lock); + /* Count the number of refs which need to be copied */ + while ((raw = ref_next(raw)) != jeb->last_node) + nr_refile++; - D1(printk(KERN_DEBUG "wbuf recover %08x-%08x\n", start, end)); + dbg_noderef("wbuf recover %08x-%08x (%d bytes in %d nodes)\n", + start, end, end - start, nr_refile); buf = NULL; if (start < c->wbuf_ofs) { @@ -233,28 +294,37 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c) } /* Do the read... */ - if (jffs2_cleanmarker_oob(c)) - ret = c->mtd->read_ecc(c->mtd, start, c->wbuf_ofs - start, &retlen, buf, NULL, c->oobinfo); - else - ret = c->mtd->read(c->mtd, start, c->wbuf_ofs - start, &retlen, buf); + ret = c->mtd->read(c->mtd, start, c->wbuf_ofs - start, &retlen, buf); - if (ret == -EBADMSG && retlen == c->wbuf_ofs - start) { - /* ECC recovered */ + /* ECC recovered ? */ + if ((ret == -EUCLEAN || ret == -EBADMSG) && + (retlen == c->wbuf_ofs - start)) ret = 0; - } + if (ret || retlen != c->wbuf_ofs - start) { printk(KERN_CRIT "Old data are already lost in wbuf recovery. Data loss ensues.\n"); kfree(buf); buf = NULL; read_failed: - first_raw = &(*first_raw)->next_phys; + first_raw = ref_next(first_raw); + nr_refile--; + while (first_raw && ref_obsolete(first_raw)) { + first_raw = ref_next(first_raw); + nr_refile--; + } + /* If this was the only node to be recovered, give up */ - if (!(*first_raw)) + if (!first_raw) { + c->wbuf_len = 0; return; + } /* It wasn't. Go on and try to recover nodes complete in the wbuf */ - start = ref_offset(*first_raw); + start = ref_offset(first_raw); + dbg_noderef("wbuf now recover %08x-%08x (%d bytes in %d nodes)\n", + start, end, end - start, nr_refile); + } else { /* Read succeeded. Copy the remaining data from the wbuf */ memcpy(buf + (c->wbuf_ofs - start), c->wbuf, end - c->wbuf_ofs); @@ -263,14 +333,23 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c) /* OK... we're to rewrite (end-start) bytes of data from first_raw onwards. Either 'buf' contains the data, or we find it in the wbuf */ - /* ... and get an allocation of space from a shiny new block instead */ - ret = jffs2_reserve_space_gc(c, end-start, &ofs, &len, JFFS2_SUMMARY_NOSUM_SIZE); + ret = jffs2_reserve_space_gc(c, end-start, &len, JFFS2_SUMMARY_NOSUM_SIZE); if (ret) { printk(KERN_WARNING "Failed to allocate space for wbuf recovery. Data loss ensues.\n"); kfree(buf); return; } + + ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, nr_refile); + if (ret) { + printk(KERN_WARNING "Failed to allocate node refs for wbuf recovery. Data loss ensues.\n"); + kfree(buf); + return; + } + + ofs = write_ofs(c); + if (end-start >= c->wbuf_pagesize) { /* Need to do another write immediately, but it's possible that this is just because the wbuf itself is completely @@ -288,36 +367,22 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c) if (breakme++ == 20) { printk(KERN_NOTICE "Faking write error at 0x%08x\n", ofs); breakme = 0; - c->mtd->write_ecc(c->mtd, ofs, towrite, &retlen, - brokenbuf, NULL, c->oobinfo); + c->mtd->write(c->mtd, ofs, towrite, &retlen, + brokenbuf); ret = -EIO; } else #endif - if (jffs2_cleanmarker_oob(c)) - ret = c->mtd->write_ecc(c->mtd, ofs, towrite, &retlen, - rewrite_buf, NULL, c->oobinfo); - else - ret = c->mtd->write(c->mtd, ofs, towrite, &retlen, rewrite_buf); + ret = c->mtd->write(c->mtd, ofs, towrite, &retlen, + rewrite_buf); if (ret || retlen != towrite) { /* Argh. We tried. Really we did. */ printk(KERN_CRIT "Recovery of wbuf failed due to a second write error\n"); kfree(buf); - if (retlen) { - struct jffs2_raw_node_ref *raw2; - - raw2 = jffs2_alloc_raw_node_ref(); - if (!raw2) - return; + if (retlen) + jffs2_add_physical_node_ref(c, ofs | REF_OBSOLETE, ref_totlen(c, jeb, first_raw), NULL); - raw2->flash_offset = ofs | REF_OBSOLETE; - raw2->__totlen = ref_totlen(c, jeb, *first_raw); - raw2->next_phys = NULL; - raw2->next_in_ino = NULL; - - jffs2_add_physical_node_ref(c, raw2); - } return; } printk(KERN_NOTICE "Recovery of wbuf succeeded to %08x\n", ofs); @@ -326,12 +391,10 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c) c->wbuf_ofs = ofs + towrite; memmove(c->wbuf, rewrite_buf + towrite, c->wbuf_len); /* Don't muck about with c->wbuf_inodes. False positives are harmless. */ - kfree(buf); } else { /* OK, now we're left with the dregs in whichever buffer we're using */ if (buf) { memcpy(c->wbuf, buf, end-start); - kfree(buf); } else { memmove(c->wbuf, c->wbuf + (start - c->wbuf_ofs), end - start); } @@ -343,62 +406,111 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c) new_jeb = &c->blocks[ofs / c->sector_size]; spin_lock(&c->erase_completion_lock); - if (new_jeb->first_node) { - /* Odd, but possible with ST flash later maybe */ - new_jeb->last_node->next_phys = *first_raw; - } else { - new_jeb->first_node = *first_raw; - } - - raw = first_raw; - while (*raw) { - uint32_t rawlen = ref_totlen(c, jeb, *raw); + for (raw = first_raw; raw != jeb->last_node; raw = ref_next(raw)) { + uint32_t rawlen = ref_totlen(c, jeb, raw); + struct jffs2_inode_cache *ic; + struct jffs2_raw_node_ref *new_ref; + struct jffs2_raw_node_ref **adjust_ref = NULL; + struct jffs2_inode_info *f = NULL; D1(printk(KERN_DEBUG "Refiling block of %08x at %08x(%d) to %08x\n", - rawlen, ref_offset(*raw), ref_flags(*raw), ofs)); + rawlen, ref_offset(raw), ref_flags(raw), ofs)); + + ic = jffs2_raw_ref_to_ic(raw); + + /* Ick. This XATTR mess should be fixed shortly... */ + if (ic && ic->class == RAWNODE_CLASS_XATTR_DATUM) { + struct jffs2_xattr_datum *xd = (void *)ic; + BUG_ON(xd->node != raw); + adjust_ref = &xd->node; + raw->next_in_ino = NULL; + ic = NULL; + } else if (ic && ic->class == RAWNODE_CLASS_XATTR_REF) { + struct jffs2_xattr_datum *xr = (void *)ic; + BUG_ON(xr->node != raw); + adjust_ref = &xr->node; + raw->next_in_ino = NULL; + ic = NULL; + } else if (ic && ic->class == RAWNODE_CLASS_INODE_CACHE) { + struct jffs2_raw_node_ref **p = &ic->nodes; + + /* Remove the old node from the per-inode list */ + while (*p && *p != (void *)ic) { + if (*p == raw) { + (*p) = (raw->next_in_ino); + raw->next_in_ino = NULL; + break; + } + p = &((*p)->next_in_ino); + } - if (ref_obsolete(*raw)) { - /* Shouldn't really happen much */ - new_jeb->dirty_size += rawlen; - new_jeb->free_size -= rawlen; - c->dirty_size += rawlen; - } else { - new_jeb->used_size += rawlen; - new_jeb->free_size -= rawlen; + if (ic->state == INO_STATE_PRESENT && !ref_obsolete(raw)) { + /* If it's an in-core inode, then we have to adjust any + full_dirent or full_dnode structure to point to the + new version instead of the old */ + f = jffs2_gc_fetch_inode(c, ic->ino, ic->nlink); + if (IS_ERR(f)) { + /* Should never happen; it _must_ be present */ + JFFS2_ERROR("Failed to iget() ino #%u, err %ld\n", + ic->ino, PTR_ERR(f)); + BUG(); + } + /* We don't lock f->sem. There's a number of ways we could + end up in here with it already being locked, and nobody's + going to modify it on us anyway because we hold the + alloc_sem. We're only changing one ->raw pointer too, + which we can get away with without upsetting readers. */ + adjust_ref = jffs2_incore_replace_raw(c, f, raw, + (void *)(buf?:c->wbuf) + (ref_offset(raw) - start)); + } else if (unlikely(ic->state != INO_STATE_PRESENT && + ic->state != INO_STATE_CHECKEDABSENT && + ic->state != INO_STATE_GC)) { + JFFS2_ERROR("Inode #%u is in strange state %d!\n", ic->ino, ic->state); + BUG(); + } + } + + new_ref = jffs2_link_node_ref(c, new_jeb, ofs | ref_flags(raw), rawlen, ic); + + if (adjust_ref) { + BUG_ON(*adjust_ref != raw); + *adjust_ref = new_ref; + } + if (f) + jffs2_gc_release_inode(c, f); + + if (!ref_obsolete(raw)) { jeb->dirty_size += rawlen; jeb->used_size -= rawlen; c->dirty_size += rawlen; + c->used_size -= rawlen; + raw->flash_offset = ref_offset(raw) | REF_OBSOLETE; + BUG_ON(raw->next_in_ino); } - c->free_size -= rawlen; - (*raw)->flash_offset = ofs | ref_flags(*raw); ofs += rawlen; - new_jeb->last_node = *raw; - - raw = &(*raw)->next_phys; } + kfree(buf); + /* Fix up the original jeb now it's on the bad_list */ - *first_raw = NULL; - if (first_raw == &jeb->first_node) { - jeb->last_node = NULL; + if (first_raw == jeb->first_node) { D1(printk(KERN_DEBUG "Failing block at %08x is now empty. Moving to erase_pending_list\n", jeb->offset)); list_del(&jeb->list); list_add(&jeb->list, &c->erase_pending_list); c->nr_erasing_blocks++; jffs2_erase_pending_trigger(c); } - else - jeb->last_node = container_of(first_raw, struct jffs2_raw_node_ref, next_phys); jffs2_dbg_acct_sanity_check_nolock(c, jeb); - jffs2_dbg_acct_paranoia_check_nolock(c, jeb); + jffs2_dbg_acct_paranoia_check_nolock(c, jeb); jffs2_dbg_acct_sanity_check_nolock(c, new_jeb); - jffs2_dbg_acct_paranoia_check_nolock(c, new_jeb); + jffs2_dbg_acct_paranoia_check_nolock(c, new_jeb); spin_unlock(&c->erase_completion_lock); - D1(printk(KERN_DEBUG "wbuf recovery completed OK\n")); + D1(printk(KERN_DEBUG "wbuf recovery completed OK. wbuf_ofs 0x%08x, len 0x%x\n", c->wbuf_ofs, c->wbuf_len)); + } /* Meaning of pad argument: @@ -412,6 +524,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c) static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad) { + struct jffs2_eraseblock *wbuf_jeb; int ret; size_t retlen; @@ -429,6 +542,10 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad) if (!c->wbuf_len) /* already checked c->wbuf above */ return 0; + wbuf_jeb = &c->blocks[c->wbuf_ofs / c->sector_size]; + if (jffs2_prealloc_raw_node_refs(c, wbuf_jeb, c->nextblock->allocated_refs + 1)) + return -ENOMEM; + /* claim remaining space on the page this happens, if we have a change to a new block, or if fsync forces us to flush the writebuffer. @@ -458,15 +575,12 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad) if (breakme++ == 20) { printk(KERN_NOTICE "Faking write error at 0x%08x\n", c->wbuf_ofs); breakme = 0; - c->mtd->write_ecc(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, - &retlen, brokenbuf, NULL, c->oobinfo); + c->mtd->write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, + brokenbuf); ret = -EIO; } else #endif - if (jffs2_cleanmarker_oob(c)) - ret = c->mtd->write_ecc(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, c->wbuf, NULL, c->oobinfo); - else ret = c->mtd->write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, c->wbuf); if (ret || retlen != c->wbuf_pagesize) { @@ -483,32 +597,34 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad) return ret; } - spin_lock(&c->erase_completion_lock); - /* Adjust free size of the block if we padded. */ if (pad) { - struct jffs2_eraseblock *jeb; - - jeb = &c->blocks[c->wbuf_ofs / c->sector_size]; + uint32_t waste = c->wbuf_pagesize - c->wbuf_len; D1(printk(KERN_DEBUG "jffs2_flush_wbuf() adjusting free_size of %sblock at %08x\n", - (jeb==c->nextblock)?"next":"", jeb->offset)); + (wbuf_jeb==c->nextblock)?"next":"", wbuf_jeb->offset)); /* wbuf_pagesize - wbuf_len is the amount of space that's to be padded. If there is less free space in the block than that, something screwed up */ - if (jeb->free_size < (c->wbuf_pagesize - c->wbuf_len)) { + if (wbuf_jeb->free_size < waste) { printk(KERN_CRIT "jffs2_flush_wbuf(): Accounting error. wbuf at 0x%08x has 0x%03x bytes, 0x%03x left.\n", - c->wbuf_ofs, c->wbuf_len, c->wbuf_pagesize-c->wbuf_len); + c->wbuf_ofs, c->wbuf_len, waste); printk(KERN_CRIT "jffs2_flush_wbuf(): But free_size for block at 0x%08x is only 0x%08x\n", - jeb->offset, jeb->free_size); + wbuf_jeb->offset, wbuf_jeb->free_size); BUG(); } - jeb->free_size -= (c->wbuf_pagesize - c->wbuf_len); - c->free_size -= (c->wbuf_pagesize - c->wbuf_len); - jeb->wasted_size += (c->wbuf_pagesize - c->wbuf_len); - c->wasted_size += (c->wbuf_pagesize - c->wbuf_len); - } + + spin_lock(&c->erase_completion_lock); + + jffs2_link_node_ref(c, wbuf_jeb, (c->wbuf_ofs + c->wbuf_len) | REF_OBSOLETE, waste, NULL); + /* FIXME: that made it count as dirty. Convert to wasted */ + wbuf_jeb->dirty_size -= waste; + c->dirty_size -= waste; + wbuf_jeb->wasted_size += waste; + c->wasted_size += waste; + } else + spin_lock(&c->erase_completion_lock); /* Stick any now-obsoleted blocks on the erase_pending_list */ jffs2_refile_wbuf_blocks(c); @@ -603,20 +719,30 @@ int jffs2_flush_wbuf_pad(struct jffs2_sb_info *c) return ret; } -int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs, unsigned long count, loff_t to, size_t *retlen, uint32_t ino) + +static size_t jffs2_fill_wbuf(struct jffs2_sb_info *c, const uint8_t *buf, + size_t len) { - struct kvec outvecs[3]; - uint32_t totlen = 0; - uint32_t split_ofs = 0; - uint32_t old_totlen; - int ret, splitvec = -1; - int invec, outvec; - size_t wbuf_retlen; - unsigned char *wbuf_ptr; - size_t donelen = 0; + if (len && !c->wbuf_len && (len >= c->wbuf_pagesize)) + return 0; + + if (len > (c->wbuf_pagesize - c->wbuf_len)) + len = c->wbuf_pagesize - c->wbuf_len; + memcpy(c->wbuf + c->wbuf_len, buf, len); + c->wbuf_len += (uint32_t) len; + return len; +} + +int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs, + unsigned long count, loff_t to, size_t *retlen, + uint32_t ino) +{ + struct jffs2_eraseblock *jeb; + size_t wbuf_retlen, donelen = 0; uint32_t outvec_to = to; + int ret, invec; - /* If not NAND flash, don't bother */ + /* If not writebuffered flash, don't bother */ if (!jffs2_is_writebuffered(c)) return jffs2_flash_direct_writev(c, invecs, count, to, retlen); @@ -629,34 +755,22 @@ int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs, unsig memset(c->wbuf,0xff,c->wbuf_pagesize); } - /* Fixup the wbuf if we are moving to a new eraseblock. The checks below - fail for ECC'd NOR because cleanmarker == 16, so a block starts at - xxx0010. */ - if (jffs2_nor_ecc(c)) { - if (((c->wbuf_ofs % c->sector_size) == 0) && !c->wbuf_len) { - c->wbuf_ofs = PAGE_DIV(to); - c->wbuf_len = PAGE_MOD(to); - memset(c->wbuf,0xff,c->wbuf_pagesize); - } - } - - /* Sanity checks on target address. - It's permitted to write at PAD(c->wbuf_len+c->wbuf_ofs), - and it's permitted to write at the beginning of a new - erase block. Anything else, and you die. - New block starts at xxx000c (0-b = block header) - */ + /* + * Sanity checks on target address. It's permitted to write + * at PAD(c->wbuf_len+c->wbuf_ofs), and it's permitted to + * write at the beginning of a new erase block. Anything else, + * and you die. New block starts at xxx000c (0-b = block + * header) + */ if (SECTOR_ADDR(to) != SECTOR_ADDR(c->wbuf_ofs)) { /* It's a write to a new block */ if (c->wbuf_len) { - D1(printk(KERN_DEBUG "jffs2_flash_writev() to 0x%lx causes flush of wbuf at 0x%08x\n", (unsigned long)to, c->wbuf_ofs)); + D1(printk(KERN_DEBUG "jffs2_flash_writev() to 0x%lx " + "causes flush of wbuf at 0x%08x\n", + (unsigned long)to, c->wbuf_ofs)); ret = __jffs2_flush_wbuf(c, PAD_NOACCOUNT); - if (ret) { - /* the underlying layer has to check wbuf_len to do the cleanup */ - D1(printk(KERN_WARNING "jffs2_flush_wbuf() called from jffs2_flash_writev() failed %d\n", ret)); - *retlen = 0; - goto exit; - } + if (ret) + goto outerr; } /* set pointer to new block */ c->wbuf_ofs = PAGE_DIV(to); @@ -665,165 +779,70 @@ int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs, unsig if (to != PAD(c->wbuf_ofs + c->wbuf_len)) { /* We're not writing immediately after the writebuffer. Bad. */ - printk(KERN_CRIT "jffs2_flash_writev(): Non-contiguous write to %08lx\n", (unsigned long)to); + printk(KERN_CRIT "jffs2_flash_writev(): Non-contiguous write " + "to %08lx\n", (unsigned long)to); if (c->wbuf_len) printk(KERN_CRIT "wbuf was previously %08x-%08x\n", - c->wbuf_ofs, c->wbuf_ofs+c->wbuf_len); + c->wbuf_ofs, c->wbuf_ofs+c->wbuf_len); BUG(); } - /* Note outvecs[3] above. We know count is never greater than 2 */ - if (count > 2) { - printk(KERN_CRIT "jffs2_flash_writev(): count is %ld\n", count); - BUG(); - } - - invec = 0; - outvec = 0; - - /* Fill writebuffer first, if already in use */ - if (c->wbuf_len) { - uint32_t invec_ofs = 0; - - /* adjust alignment offset */ - if (c->wbuf_len != PAGE_MOD(to)) { - c->wbuf_len = PAGE_MOD(to); - /* take care of alignment to next page */ - if (!c->wbuf_len) - c->wbuf_len = c->wbuf_pagesize; - } - - while(c->wbuf_len < c->wbuf_pagesize) { - uint32_t thislen; - - if (invec == count) - goto alldone; - - thislen = c->wbuf_pagesize - c->wbuf_len; - - if (thislen >= invecs[invec].iov_len) - thislen = invecs[invec].iov_len; - - invec_ofs = thislen; - - memcpy(c->wbuf + c->wbuf_len, invecs[invec].iov_base, thislen); - c->wbuf_len += thislen; - donelen += thislen; - /* Get next invec, if actual did not fill the buffer */ - if (c->wbuf_len < c->wbuf_pagesize) - invec++; - } - - /* write buffer is full, flush buffer */ - ret = __jffs2_flush_wbuf(c, NOPAD); - if (ret) { - /* the underlying layer has to check wbuf_len to do the cleanup */ - D1(printk(KERN_WARNING "jffs2_flush_wbuf() called from jffs2_flash_writev() failed %d\n", ret)); - /* Retlen zero to make sure our caller doesn't mark the space dirty. - We've already done everything that's necessary */ - *retlen = 0; - goto exit; - } - outvec_to += donelen; - c->wbuf_ofs = outvec_to; - - /* All invecs done ? */ - if (invec == count) - goto alldone; - - /* Set up the first outvec, containing the remainder of the - invec we partially used */ - if (invecs[invec].iov_len > invec_ofs) { - outvecs[0].iov_base = invecs[invec].iov_base+invec_ofs; - totlen = outvecs[0].iov_len = invecs[invec].iov_len-invec_ofs; - if (totlen > c->wbuf_pagesize) { - splitvec = outvec; - split_ofs = outvecs[0].iov_len - PAGE_MOD(totlen); - } - outvec++; - } - invec++; - } - - /* OK, now we've flushed the wbuf and the start of the bits - we have been asked to write, now to write the rest.... */ - - /* totlen holds the amount of data still to be written */ - old_totlen = totlen; - for ( ; invec < count; invec++,outvec++ ) { - outvecs[outvec].iov_base = invecs[invec].iov_base; - totlen += outvecs[outvec].iov_len = invecs[invec].iov_len; - if (PAGE_DIV(totlen) != PAGE_DIV(old_totlen)) { - splitvec = outvec; - split_ofs = outvecs[outvec].iov_len - PAGE_MOD(totlen); - old_totlen = totlen; + /* adjust alignment offset */ + if (c->wbuf_len != PAGE_MOD(to)) { + c->wbuf_len = PAGE_MOD(to); + /* take care of alignment to next page */ + if (!c->wbuf_len) { + c->wbuf_len = c->wbuf_pagesize; + ret = __jffs2_flush_wbuf(c, NOPAD); + if (ret) + goto outerr; } } - /* Now the outvecs array holds all the remaining data to write */ - /* Up to splitvec,split_ofs is to be written immediately. The rest - goes into the (now-empty) wbuf */ - - if (splitvec != -1) { - uint32_t remainder; - - remainder = outvecs[splitvec].iov_len - split_ofs; - outvecs[splitvec].iov_len = split_ofs; - - /* We did cross a page boundary, so we write some now */ - if (jffs2_cleanmarker_oob(c)) - ret = c->mtd->writev_ecc(c->mtd, outvecs, splitvec+1, outvec_to, &wbuf_retlen, NULL, c->oobinfo); - else - ret = jffs2_flash_direct_writev(c, outvecs, splitvec+1, outvec_to, &wbuf_retlen); - - if (ret < 0 || wbuf_retlen != PAGE_DIV(totlen)) { - /* At this point we have no problem, - c->wbuf is empty. However refile nextblock to avoid - writing again to same address. - */ - struct jffs2_eraseblock *jeb; + for (invec = 0; invec < count; invec++) { + int vlen = invecs[invec].iov_len; + uint8_t *v = invecs[invec].iov_base; - spin_lock(&c->erase_completion_lock); + wbuf_retlen = jffs2_fill_wbuf(c, v, vlen); - jeb = &c->blocks[outvec_to / c->sector_size]; - jffs2_block_refile(c, jeb, REFILE_ANYWAY); - - *retlen = 0; - spin_unlock(&c->erase_completion_lock); - goto exit; + if (c->wbuf_len == c->wbuf_pagesize) { + ret = __jffs2_flush_wbuf(c, NOPAD); + if (ret) + goto outerr; } - + vlen -= wbuf_retlen; + outvec_to += wbuf_retlen; donelen += wbuf_retlen; - c->wbuf_ofs = PAGE_DIV(outvec_to) + PAGE_DIV(totlen); - - if (remainder) { - outvecs[splitvec].iov_base += split_ofs; - outvecs[splitvec].iov_len = remainder; - } else { - splitvec++; + v += wbuf_retlen; + + if (vlen >= c->wbuf_pagesize) { + ret = c->mtd->write(c->mtd, outvec_to, PAGE_DIV(vlen), + &wbuf_retlen, v); + if (ret < 0 || wbuf_retlen != PAGE_DIV(vlen)) + goto outfile; + + vlen -= wbuf_retlen; + outvec_to += wbuf_retlen; + c->wbuf_ofs = outvec_to; + donelen += wbuf_retlen; + v += wbuf_retlen; } - } else { - splitvec = 0; - } - - /* Now splitvec points to the start of the bits we have to copy - into the wbuf */ - wbuf_ptr = c->wbuf; + wbuf_retlen = jffs2_fill_wbuf(c, v, vlen); + if (c->wbuf_len == c->wbuf_pagesize) { + ret = __jffs2_flush_wbuf(c, NOPAD); + if (ret) + goto outerr; + } - for ( ; splitvec < outvec; splitvec++) { - /* Don't copy the wbuf into itself */ - if (outvecs[splitvec].iov_base == c->wbuf) - continue; - memcpy(wbuf_ptr, outvecs[splitvec].iov_base, outvecs[splitvec].iov_len); - wbuf_ptr += outvecs[splitvec].iov_len; - donelen += outvecs[splitvec].iov_len; + outvec_to += wbuf_retlen; + donelen += wbuf_retlen; } - c->wbuf_len = wbuf_ptr - c->wbuf; - /* If there's a remainder in the wbuf and it's a non-GC write, - remember that the wbuf affects this ino */ -alldone: + /* + * If there's a remainder in the wbuf and it's a non-GC write, + * remember that the wbuf affects this ino + */ *retlen = donelen; if (jffs2_sum_active()) { @@ -836,8 +855,24 @@ alldone: jffs2_wbuf_dirties_inode(c, ino); ret = 0; + up_write(&c->wbuf_sem); + return ret; -exit: +outfile: + /* + * At this point we have no problem, c->wbuf is empty. However + * refile nextblock to avoid writing again to same address. + */ + + spin_lock(&c->erase_completion_lock); + + jeb = &c->blocks[outvec_to / c->sector_size]; + jffs2_block_refile(c, jeb, REFILE_ANYWAY); + + spin_unlock(&c->erase_completion_lock); + +outerr: + *retlen = 0; up_write(&c->wbuf_sem); return ret; } @@ -846,7 +881,8 @@ exit: * This is the entry for flash write. * Check, if we work on NAND FLASH, if so build an kvec and write it via vritev */ -int jffs2_flash_write(struct jffs2_sb_info *c, loff_t ofs, size_t len, size_t *retlen, const u_char *buf) +int jffs2_flash_write(struct jffs2_sb_info *c, loff_t ofs, size_t len, + size_t *retlen, const u_char *buf) { struct kvec vecs[1]; @@ -871,25 +907,23 @@ int jffs2_flash_read(struct jffs2_sb_info *c, loff_t ofs, size_t len, size_t *re /* Read flash */ down_read(&c->wbuf_sem); - if (jffs2_cleanmarker_oob(c)) - ret = c->mtd->read_ecc(c->mtd, ofs, len, retlen, buf, NULL, c->oobinfo); - else - ret = c->mtd->read(c->mtd, ofs, len, retlen, buf); - - if ( (ret == -EBADMSG) && (*retlen == len) ) { - printk(KERN_WARNING "mtd->read(0x%zx bytes from 0x%llx) returned ECC error\n", - len, ofs); + ret = c->mtd->read(c->mtd, ofs, len, retlen, buf); + + if ( (ret == -EBADMSG || ret == -EUCLEAN) && (*retlen == len) ) { + if (ret == -EBADMSG) + printk(KERN_WARNING "mtd->read(0x%zx bytes from 0x%llx)" + " returned ECC error\n", len, ofs); /* - * We have the raw data without ECC correction in the buffer, maybe - * we are lucky and all data or parts are correct. We check the node. - * If data are corrupted node check will sort it out. - * We keep this block, it will fail on write or erase and the we - * mark it bad. Or should we do that now? But we should give him a chance. - * Maybe we had a system crash or power loss before the ecc write or - * a erase was completed. + * We have the raw data without ECC correction in the buffer, + * maybe we are lucky and all data or parts are correct. We + * check the node. If data are corrupted node check will sort + * it out. We keep this block, it will fail on write or erase + * and the we mark it bad. Or should we do that now? But we + * should give him a chance. Maybe we had a system crash or + * power loss before the ecc write or a erase was completed. * So we return success. :) */ - ret = 0; + ret = 0; } /* if no writebuffer available or write buffer empty, return */ @@ -911,7 +945,7 @@ int jffs2_flash_read(struct jffs2_sb_info *c, loff_t ofs, size_t len, size_t *re orbf = (c->wbuf_ofs - ofs); /* offset in read buffer */ if (orbf > len) /* is write beyond write buffer ? */ goto exit; - lwbf = len - orbf; /* number of bytes to copy */ + lwbf = len - orbf; /* number of bytes to copy */ if (lwbf > c->wbuf_len) lwbf = c->wbuf_len; } @@ -923,158 +957,159 @@ exit: return ret; } +#define NR_OOB_SCAN_PAGES 4 + /* - * Check, if the out of band area is empty + * Check, if the out of band area is empty */ -int jffs2_check_oob_empty( struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, int mode) +int jffs2_check_oob_empty(struct jffs2_sb_info *c, + struct jffs2_eraseblock *jeb, int mode) { - unsigned char *buf; - int ret = 0; - int i,len,page; - size_t retlen; - int oob_size; - - /* allocate a buffer for all oob data in this sector */ - oob_size = c->mtd->oobsize; - len = 4 * oob_size; - buf = kmalloc(len, GFP_KERNEL); - if (!buf) { - printk(KERN_NOTICE "jffs2_check_oob_empty(): allocation of temporary data buffer for oob check failed\n"); - return -ENOMEM; - } - /* - * if mode = 0, we scan for a total empty oob area, else we have - * to take care of the cleanmarker in the first page of the block - */ - ret = jffs2_flash_read_oob(c, jeb->offset, len , &retlen, buf); + int i, page, ret; + int oobsize = c->mtd->oobsize; + struct mtd_oob_ops ops; + + ops.len = NR_OOB_SCAN_PAGES * oobsize; + ops.ooblen = oobsize; + ops.oobbuf = c->oobbuf; + ops.ooboffs = 0; + ops.datbuf = NULL; + ops.mode = MTD_OOB_PLACE; + + ret = c->mtd->read_oob(c->mtd, jeb->offset, &ops); if (ret) { - D1(printk(KERN_WARNING "jffs2_check_oob_empty(): Read OOB failed %d for block at %08x\n", ret, jeb->offset)); - goto out; + D1(printk(KERN_WARNING "jffs2_check_oob_empty(): Read OOB " + "failed %d for block at %08x\n", ret, jeb->offset)); + return ret; } - if (retlen < len) { - D1(printk(KERN_WARNING "jffs2_check_oob_empty(): Read OOB return short read " - "(%zd bytes not %d) for block at %08x\n", retlen, len, jeb->offset)); - ret = -EIO; - goto out; + if (ops.retlen < ops.len) { + D1(printk(KERN_WARNING "jffs2_check_oob_empty(): Read OOB " + "returned short read (%zd bytes not %d) for block " + "at %08x\n", ops.retlen, ops.len, jeb->offset)); + return -EIO; } /* Special check for first page */ - for(i = 0; i < oob_size ; i++) { + for(i = 0; i < oobsize ; i++) { /* Yeah, we know about the cleanmarker. */ if (mode && i >= c->fsdata_pos && i < c->fsdata_pos + c->fsdata_len) continue; - if (buf[i] != 0xFF) { - D2(printk(KERN_DEBUG "Found %02x at %x in OOB for %08x\n", - buf[i], i, jeb->offset)); - ret = 1; - goto out; + if (ops.oobbuf[i] != 0xFF) { + D2(printk(KERN_DEBUG "Found %02x at %x in OOB for " + "%08x\n", ops.oobbuf[i], i, jeb->offset)); + return 1; } } /* we know, we are aligned :) */ - for (page = oob_size; page < len; page += sizeof(long)) { - unsigned long dat = *(unsigned long *)(&buf[page]); - if(dat != -1) { - ret = 1; - goto out; - } + for (page = oobsize; page < ops.len; page += sizeof(long)) { + long dat = *(long *)(&ops.oobbuf[page]); + if(dat != -1) + return 1; } - -out: - kfree(buf); - - return ret; + return 0; } /* -* Scan for a valid cleanmarker and for bad blocks -* For virtual blocks (concatenated physical blocks) check the cleanmarker -* only in the first page of the first physical block, but scan for bad blocks in all -* physical blocks -*/ -int jffs2_check_nand_cleanmarker (struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) + * Scan for a valid cleanmarker and for bad blocks + */ +int jffs2_check_nand_cleanmarker (struct jffs2_sb_info *c, + struct jffs2_eraseblock *jeb) { struct jffs2_unknown_node n; - unsigned char buf[2 * NAND_MAX_OOBSIZE]; - unsigned char *p; - int ret, i, cnt, retval = 0; - size_t retlen, offset; - int oob_size; - - offset = jeb->offset; - oob_size = c->mtd->oobsize; - - /* Loop through the physical blocks */ - for (cnt = 0; cnt < (c->sector_size / c->mtd->erasesize); cnt++) { - /* Check first if the block is bad. */ - if (c->mtd->block_isbad (c->mtd, offset)) { - D1 (printk (KERN_WARNING "jffs2_check_nand_cleanmarker(): Bad block at %08x\n", jeb->offset)); - return 2; - } - /* - * We read oob data from page 0 and 1 of the block. - * page 0 contains cleanmarker and badblock info - * page 1 contains failure count of this block - */ - ret = c->mtd->read_oob (c->mtd, offset, oob_size << 1, &retlen, buf); + struct mtd_oob_ops ops; + int oobsize = c->mtd->oobsize; + unsigned char *p,*b; + int i, ret; + size_t offset = jeb->offset; + + /* Check first if the block is bad. */ + if (c->mtd->block_isbad(c->mtd, offset)) { + D1 (printk(KERN_WARNING "jffs2_check_nand_cleanmarker()" + ": Bad block at %08x\n", jeb->offset)); + return 2; + } - if (ret) { - D1 (printk (KERN_WARNING "jffs2_check_nand_cleanmarker(): Read OOB failed %d for block at %08x\n", ret, jeb->offset)); - return ret; - } - if (retlen < (oob_size << 1)) { - D1 (printk (KERN_WARNING "jffs2_check_nand_cleanmarker(): Read OOB return short read (%zd bytes not %d) for block at %08x\n", retlen, oob_size << 1, jeb->offset)); - return -EIO; - } + ops.len = oobsize; + ops.ooblen = oobsize; + ops.oobbuf = c->oobbuf; + ops.ooboffs = 0; + ops.datbuf = NULL; + ops.mode = MTD_OOB_PLACE; - /* Check cleanmarker only on the first physical block */ - if (!cnt) { - n.magic = cpu_to_je16 (JFFS2_MAGIC_BITMASK); - n.nodetype = cpu_to_je16 (JFFS2_NODETYPE_CLEANMARKER); - n.totlen = cpu_to_je32 (8); - p = (unsigned char *) &n; + ret = c->mtd->read_oob(c->mtd, offset, &ops); + if (ret) { + D1 (printk(KERN_WARNING "jffs2_check_nand_cleanmarker(): " + "Read OOB failed %d for block at %08x\n", + ret, jeb->offset)); + return ret; + } - for (i = 0; i < c->fsdata_len; i++) { - if (buf[c->fsdata_pos + i] != p[i]) { - retval = 1; - } - } - D1(if (retval == 1) { - printk(KERN_WARNING "jffs2_check_nand_cleanmarker(): Cleanmarker node not detected in block at %08x\n", jeb->offset); - printk(KERN_WARNING "OOB at %08x was ", offset); - for (i=0; i < oob_size; i++) { - printk("%02x ", buf[i]); - } - printk("\n"); - }) - } - offset += c->mtd->erasesize; + if (ops.retlen < ops.len) { + D1 (printk (KERN_WARNING "jffs2_check_nand_cleanmarker(): " + "Read OOB return short read (%zd bytes not %d) " + "for block at %08x\n", ops.retlen, ops.len, + jeb->offset)); + return -EIO; } - return retval; + + n.magic = cpu_to_je16 (JFFS2_MAGIC_BITMASK); + n.nodetype = cpu_to_je16 (JFFS2_NODETYPE_CLEANMARKER); + n.totlen = cpu_to_je32 (8); + p = (unsigned char *) &n; + b = c->oobbuf + c->fsdata_pos; + + for (i = c->fsdata_len; i; i--) { + if (*b++ != *p++) + ret = 1; + } + + D1(if (ret == 1) { + printk(KERN_WARNING "jffs2_check_nand_cleanmarker(): " + "Cleanmarker node not detected in block at %08x\n", + offset); + printk(KERN_WARNING "OOB at %08zx was ", offset); + for (i=0; i < oobsize; i++) + printk("%02x ", c->oobbuf[i]); + printk("\n"); + }); + return ret; } -int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) +int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c, + struct jffs2_eraseblock *jeb) { - struct jffs2_unknown_node n; - int ret; - size_t retlen; + struct jffs2_unknown_node n; + int ret; + struct mtd_oob_ops ops; n.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); n.nodetype = cpu_to_je16(JFFS2_NODETYPE_CLEANMARKER); n.totlen = cpu_to_je32(8); - ret = jffs2_flash_write_oob(c, jeb->offset + c->fsdata_pos, c->fsdata_len, &retlen, (unsigned char *)&n); + ops.len = c->fsdata_len; + ops.ooblen = c->fsdata_len;; + ops.oobbuf = (uint8_t *)&n; + ops.ooboffs = c->fsdata_pos; + ops.datbuf = NULL; + ops.mode = MTD_OOB_PLACE; + + ret = c->mtd->write_oob(c->mtd, jeb->offset, &ops); if (ret) { - D1(printk(KERN_WARNING "jffs2_write_nand_cleanmarker(): Write failed for block at %08x: error %d\n", jeb->offset, ret)); + D1(printk(KERN_WARNING "jffs2_write_nand_cleanmarker(): " + "Write failed for block at %08x: error %d\n", + jeb->offset, ret)); return ret; } - if (retlen != c->fsdata_len) { - D1(printk(KERN_WARNING "jffs2_write_nand_cleanmarker(): Short write for block at %08x: %zd not %d\n", jeb->offset, retlen, c->fsdata_len)); - return ret; + if (ops.retlen != ops.len) { + D1(printk(KERN_WARNING "jffs2_write_nand_cleanmarker(): " + "Short write for block at %08x: %zd not %d\n", + jeb->offset, ops.retlen, ops.len)); + return -EIO; } return 0; } @@ -1108,18 +1143,9 @@ int jffs2_write_nand_badblock(struct jffs2_sb_info *c, struct jffs2_eraseblock * return 1; } -#define NAND_JFFS2_OOB16_FSDALEN 8 - -static struct nand_oobinfo jffs2_oobinfo_docecc = { - .useecc = MTD_NANDECC_PLACE, - .eccbytes = 6, - .eccpos = {0,1,2,3,4,5} -}; - - static int jffs2_nand_set_oobinfo(struct jffs2_sb_info *c) { - struct nand_oobinfo *oinfo = &c->mtd->oobinfo; + struct nand_ecclayout *oinfo = c->mtd->ecclayout; /* Do this only, if we have an oob buffer */ if (!c->mtd->oobsize) @@ -1129,33 +1155,23 @@ static int jffs2_nand_set_oobinfo(struct jffs2_sb_info *c) c->cleanmarker_size = 0; /* Should we use autoplacement ? */ - if (oinfo && oinfo->useecc == MTD_NANDECC_AUTOPLACE) { - D1(printk(KERN_DEBUG "JFFS2 using autoplace on NAND\n")); - /* Get the position of the free bytes */ - if (!oinfo->oobfree[0][1]) { - printk (KERN_WARNING "jffs2_nand_set_oobinfo(): Eeep. Autoplacement selected and no empty space in oob\n"); - return -ENOSPC; - } - c->fsdata_pos = oinfo->oobfree[0][0]; - c->fsdata_len = oinfo->oobfree[0][1]; - if (c->fsdata_len > 8) - c->fsdata_len = 8; - } else { - /* This is just a legacy fallback and should go away soon */ - switch(c->mtd->ecctype) { - case MTD_ECC_RS_DiskOnChip: - printk(KERN_WARNING "JFFS2 using DiskOnChip hardware ECC without autoplacement. Fix it!\n"); - c->oobinfo = &jffs2_oobinfo_docecc; - c->fsdata_pos = 6; - c->fsdata_len = NAND_JFFS2_OOB16_FSDALEN; - c->badblock_pos = 15; - break; + if (!oinfo) { + D1(printk(KERN_DEBUG "JFFS2 on NAND. No autoplacment info found\n")); + return -EINVAL; + } - default: - D1(printk(KERN_DEBUG "JFFS2 on NAND. No autoplacment info found\n")); - return -EINVAL; - } + D1(printk(KERN_DEBUG "JFFS2 using autoplace on NAND\n")); + /* Get the position of the free bytes */ + if (!oinfo->oobfree[0].length) { + printk (KERN_WARNING "jffs2_nand_set_oobinfo(): Eeep." + " Autoplacement selected and no empty space in oob\n"); + return -ENOSPC; } + c->fsdata_pos = oinfo->oobfree[0].offset; + c->fsdata_len = oinfo->oobfree[0].length; + if (c->fsdata_len > 8) + c->fsdata_len = 8; + return 0; } @@ -1165,13 +1181,17 @@ int jffs2_nand_flash_setup(struct jffs2_sb_info *c) /* Initialise write buffer */ init_rwsem(&c->wbuf_sem); - c->wbuf_pagesize = c->mtd->oobblock; + c->wbuf_pagesize = c->mtd->writesize; c->wbuf_ofs = 0xFFFFFFFF; c->wbuf = kmalloc(c->wbuf_pagesize, GFP_KERNEL); if (!c->wbuf) return -ENOMEM; + c->oobbuf = kmalloc(NR_OOB_SCAN_PAGES * c->mtd->oobsize, GFP_KERNEL); + if (!c->oobbuf) + return -ENOMEM; + res = jffs2_nand_set_oobinfo(c); #ifdef BREAKME @@ -1189,6 +1209,7 @@ int jffs2_nand_flash_setup(struct jffs2_sb_info *c) void jffs2_nand_flash_cleanup(struct jffs2_sb_info *c) { kfree(c->wbuf); + kfree(c->oobbuf); } int jffs2_dataflash_setup(struct jffs2_sb_info *c) { @@ -1236,33 +1257,14 @@ void jffs2_dataflash_cleanup(struct jffs2_sb_info *c) { kfree(c->wbuf); } -int jffs2_nor_ecc_flash_setup(struct jffs2_sb_info *c) { - /* Cleanmarker is actually larger on the flashes */ - c->cleanmarker_size = 16; - - /* Initialize write buffer */ - init_rwsem(&c->wbuf_sem); - c->wbuf_pagesize = c->mtd->eccsize; - c->wbuf_ofs = 0xFFFFFFFF; - - c->wbuf = kmalloc(c->wbuf_pagesize, GFP_KERNEL); - if (!c->wbuf) - return -ENOMEM; - - return 0; -} - -void jffs2_nor_ecc_flash_cleanup(struct jffs2_sb_info *c) { - kfree(c->wbuf); -} - int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) { - /* Cleanmarker currently occupies a whole programming region */ - c->cleanmarker_size = MTD_PROGREGION_SIZE(c->mtd); + /* Cleanmarker currently occupies whole programming regions, + * either one or 2 for 8Byte STMicro flashes. */ + c->cleanmarker_size = max(16u, c->mtd->writesize); /* Initialize write buffer */ init_rwsem(&c->wbuf_sem); - c->wbuf_pagesize = MTD_PROGREGION_SIZE(c->mtd); + c->wbuf_pagesize = c->mtd->writesize; c->wbuf_ofs = 0xFFFFFFFF; c->wbuf = kmalloc(c->wbuf_pagesize, GFP_KERNEL); diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c index 1342f0158e9b..67176792e138 100644 --- a/fs/jffs2/write.c +++ b/fs/jffs2/write.c @@ -37,7 +37,6 @@ int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache; f->inocache->state = INO_STATE_PRESENT; - jffs2_add_ino_cache(c, f->inocache); D1(printk(KERN_DEBUG "jffs2_do_new_inode(): Assigned ino# %d\n", f->inocache->ino)); ri->ino = cpu_to_je32(f->inocache->ino); @@ -57,12 +56,14 @@ int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint /* jffs2_write_dnode - given a raw_inode, allocate a full_dnode for it, write it to the flash, link it into the existing inode/fragment list */ -struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_raw_inode *ri, const unsigned char *data, uint32_t datalen, uint32_t flash_ofs, int alloc_mode) +struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, + struct jffs2_raw_inode *ri, const unsigned char *data, + uint32_t datalen, int alloc_mode) { - struct jffs2_raw_node_ref *raw; struct jffs2_full_dnode *fn; size_t retlen; + uint32_t flash_ofs; struct kvec vecs[2]; int ret; int retried = 0; @@ -78,34 +79,21 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2 vecs[1].iov_base = (unsigned char *)data; vecs[1].iov_len = datalen; - jffs2_dbg_prewrite_paranoia_check(c, flash_ofs, vecs[0].iov_len + vecs[1].iov_len); - if (je32_to_cpu(ri->totlen) != sizeof(*ri) + datalen) { printk(KERN_WARNING "jffs2_write_dnode: ri->totlen (0x%08x) != sizeof(*ri) (0x%08zx) + datalen (0x%08x)\n", je32_to_cpu(ri->totlen), sizeof(*ri), datalen); } - raw = jffs2_alloc_raw_node_ref(); - if (!raw) - return ERR_PTR(-ENOMEM); fn = jffs2_alloc_full_dnode(); - if (!fn) { - jffs2_free_raw_node_ref(raw); + if (!fn) return ERR_PTR(-ENOMEM); - } - - fn->ofs = je32_to_cpu(ri->offset); - fn->size = je32_to_cpu(ri->dsize); - fn->frags = 0; /* check number of valid vecs */ if (!datalen || !data) cnt = 1; retry: - fn->raw = raw; + flash_ofs = write_ofs(c); - raw->flash_offset = flash_ofs; - raw->__totlen = PAD(sizeof(*ri)+datalen); - raw->next_phys = NULL; + jffs2_dbg_prewrite_paranoia_check(c, flash_ofs, vecs[0].iov_len + vecs[1].iov_len); if ((alloc_mode!=ALLOC_GC) && (je32_to_cpu(ri->version) < f->highest_version)) { BUG_ON(!retried); @@ -125,22 +113,16 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2 /* Mark the space as dirtied */ if (retlen) { - /* Doesn't belong to any inode */ - raw->next_in_ino = NULL; - /* Don't change raw->size to match retlen. We may have written the node header already, and only the data will seem corrupted, in which case the scan would skip over any node we write before the original intended end of this node */ - raw->flash_offset |= REF_OBSOLETE; - jffs2_add_physical_node_ref(c, raw); - jffs2_mark_node_obsolete(c, raw); + jffs2_add_physical_node_ref(c, flash_ofs | REF_OBSOLETE, PAD(sizeof(*ri)+datalen), NULL); } else { - printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", raw->flash_offset); - jffs2_free_raw_node_ref(raw); + printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", flash_ofs); } - if (!retried && alloc_mode != ALLOC_NORETRY && (raw = jffs2_alloc_raw_node_ref())) { + if (!retried && alloc_mode != ALLOC_NORETRY) { /* Try to reallocate space and retry */ uint32_t dummy; struct jffs2_eraseblock *jeb = &c->blocks[flash_ofs / c->sector_size]; @@ -153,19 +135,20 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2 jffs2_dbg_acct_paranoia_check(c, jeb); if (alloc_mode == ALLOC_GC) { - ret = jffs2_reserve_space_gc(c, sizeof(*ri) + datalen, &flash_ofs, - &dummy, JFFS2_SUMMARY_INODE_SIZE); + ret = jffs2_reserve_space_gc(c, sizeof(*ri) + datalen, &dummy, + JFFS2_SUMMARY_INODE_SIZE); } else { /* Locking pain */ up(&f->sem); jffs2_complete_reservation(c); - ret = jffs2_reserve_space(c, sizeof(*ri) + datalen, &flash_ofs, - &dummy, alloc_mode, JFFS2_SUMMARY_INODE_SIZE); + ret = jffs2_reserve_space(c, sizeof(*ri) + datalen, &dummy, + alloc_mode, JFFS2_SUMMARY_INODE_SIZE); down(&f->sem); } if (!ret) { + flash_ofs = write_ofs(c); D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", flash_ofs)); jffs2_dbg_acct_sanity_check(c,jeb); @@ -174,7 +157,6 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2 goto retry; } D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret)); - jffs2_free_raw_node_ref(raw); } /* Release the full_dnode which is now useless, and return */ jffs2_free_full_dnode(fn); @@ -188,20 +170,17 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2 if ((je32_to_cpu(ri->dsize) >= PAGE_CACHE_SIZE) || ( ((je32_to_cpu(ri->offset)&(PAGE_CACHE_SIZE-1))==0) && (je32_to_cpu(ri->dsize)+je32_to_cpu(ri->offset) == je32_to_cpu(ri->isize)))) { - raw->flash_offset |= REF_PRISTINE; + flash_ofs |= REF_PRISTINE; } else { - raw->flash_offset |= REF_NORMAL; + flash_ofs |= REF_NORMAL; } - jffs2_add_physical_node_ref(c, raw); - - /* Link into per-inode list */ - spin_lock(&c->erase_completion_lock); - raw->next_in_ino = f->inocache->nodes; - f->inocache->nodes = raw; - spin_unlock(&c->erase_completion_lock); + fn->raw = jffs2_add_physical_node_ref(c, flash_ofs, PAD(sizeof(*ri)+datalen), f->inocache); + fn->ofs = je32_to_cpu(ri->offset); + fn->size = je32_to_cpu(ri->dsize); + fn->frags = 0; D1(printk(KERN_DEBUG "jffs2_write_dnode wrote node at 0x%08x(%d) with dsize 0x%x, csize 0x%x, node_crc 0x%08x, data_crc 0x%08x, totlen 0x%08x\n", - flash_ofs, ref_flags(raw), je32_to_cpu(ri->dsize), + flash_ofs & ~3, flash_ofs & 3, je32_to_cpu(ri->dsize), je32_to_cpu(ri->csize), je32_to_cpu(ri->node_crc), je32_to_cpu(ri->data_crc), je32_to_cpu(ri->totlen))); @@ -212,12 +191,14 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2 return fn; } -struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_raw_dirent *rd, const unsigned char *name, uint32_t namelen, uint32_t flash_ofs, int alloc_mode) +struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jffs2_inode_info *f, + struct jffs2_raw_dirent *rd, const unsigned char *name, + uint32_t namelen, int alloc_mode) { - struct jffs2_raw_node_ref *raw; struct jffs2_full_dirent *fd; size_t retlen; struct kvec vecs[2]; + uint32_t flash_ofs; int retried = 0; int ret; @@ -228,26 +209,16 @@ struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jff D1(if(je32_to_cpu(rd->hdr_crc) != crc32(0, rd, sizeof(struct jffs2_unknown_node)-4)) { printk(KERN_CRIT "Eep. CRC not correct in jffs2_write_dirent()\n"); BUG(); - } - ); + }); vecs[0].iov_base = rd; vecs[0].iov_len = sizeof(*rd); vecs[1].iov_base = (unsigned char *)name; vecs[1].iov_len = namelen; - jffs2_dbg_prewrite_paranoia_check(c, flash_ofs, vecs[0].iov_len + vecs[1].iov_len); - - raw = jffs2_alloc_raw_node_ref(); - - if (!raw) - return ERR_PTR(-ENOMEM); - fd = jffs2_alloc_full_dirent(namelen+1); - if (!fd) { - jffs2_free_raw_node_ref(raw); + if (!fd) return ERR_PTR(-ENOMEM); - } fd->version = je32_to_cpu(rd->version); fd->ino = je32_to_cpu(rd->ino); @@ -257,11 +228,9 @@ struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jff fd->name[namelen]=0; retry: - fd->raw = raw; + flash_ofs = write_ofs(c); - raw->flash_offset = flash_ofs; - raw->__totlen = PAD(sizeof(*rd)+namelen); - raw->next_phys = NULL; + jffs2_dbg_prewrite_paranoia_check(c, flash_ofs, vecs[0].iov_len + vecs[1].iov_len); if ((alloc_mode!=ALLOC_GC) && (je32_to_cpu(rd->version) < f->highest_version)) { BUG_ON(!retried); @@ -280,15 +249,11 @@ struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jff sizeof(*rd)+namelen, flash_ofs, ret, retlen); /* Mark the space as dirtied */ if (retlen) { - raw->next_in_ino = NULL; - raw->flash_offset |= REF_OBSOLETE; - jffs2_add_physical_node_ref(c, raw); - jffs2_mark_node_obsolete(c, raw); + jffs2_add_physical_node_ref(c, flash_ofs | REF_OBSOLETE, PAD(sizeof(*rd)+namelen), NULL); } else { - printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", raw->flash_offset); - jffs2_free_raw_node_ref(raw); + printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", flash_ofs); } - if (!retried && (raw = jffs2_alloc_raw_node_ref())) { + if (!retried) { /* Try to reallocate space and retry */ uint32_t dummy; struct jffs2_eraseblock *jeb = &c->blocks[flash_ofs / c->sector_size]; @@ -301,39 +266,33 @@ struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jff jffs2_dbg_acct_paranoia_check(c, jeb); if (alloc_mode == ALLOC_GC) { - ret = jffs2_reserve_space_gc(c, sizeof(*rd) + namelen, &flash_ofs, - &dummy, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); + ret = jffs2_reserve_space_gc(c, sizeof(*rd) + namelen, &dummy, + JFFS2_SUMMARY_DIRENT_SIZE(namelen)); } else { /* Locking pain */ up(&f->sem); jffs2_complete_reservation(c); - ret = jffs2_reserve_space(c, sizeof(*rd) + namelen, &flash_ofs, - &dummy, alloc_mode, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); + ret = jffs2_reserve_space(c, sizeof(*rd) + namelen, &dummy, + alloc_mode, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); down(&f->sem); } if (!ret) { + flash_ofs = write_ofs(c); D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", flash_ofs)); jffs2_dbg_acct_sanity_check(c,jeb); jffs2_dbg_acct_paranoia_check(c, jeb); goto retry; } D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret)); - jffs2_free_raw_node_ref(raw); } /* Release the full_dnode which is now useless, and return */ jffs2_free_full_dirent(fd); return ERR_PTR(ret?ret:-EIO); } /* Mark the space used */ - raw->flash_offset |= REF_PRISTINE; - jffs2_add_physical_node_ref(c, raw); - - spin_lock(&c->erase_completion_lock); - raw->next_in_ino = f->inocache->nodes; - f->inocache->nodes = raw; - spin_unlock(&c->erase_completion_lock); + fd->raw = jffs2_add_physical_node_ref(c, flash_ofs | REF_PRISTINE, PAD(sizeof(*rd)+namelen), f->inocache); if (retried) { jffs2_dbg_acct_sanity_check(c,NULL); @@ -359,14 +318,14 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_full_dnode *fn; unsigned char *comprbuf = NULL; uint16_t comprtype = JFFS2_COMPR_NONE; - uint32_t phys_ofs, alloclen; + uint32_t alloclen; uint32_t datalen, cdatalen; int retried = 0; retry: D2(printk(KERN_DEBUG "jffs2_commit_write() loop: 0x%x to write to 0x%x\n", writelen, offset)); - ret = jffs2_reserve_space(c, sizeof(*ri) + JFFS2_MIN_DATA_LEN, &phys_ofs, + ret = jffs2_reserve_space(c, sizeof(*ri) + JFFS2_MIN_DATA_LEN, &alloclen, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); if (ret) { D1(printk(KERN_DEBUG "jffs2_reserve_space returned %d\n", ret)); @@ -394,7 +353,7 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f, ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); ri->data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen)); - fn = jffs2_write_dnode(c, f, ri, comprbuf, cdatalen, phys_ofs, ALLOC_NORETRY); + fn = jffs2_write_dnode(c, f, ri, comprbuf, cdatalen, ALLOC_NORETRY); jffs2_free_comprbuf(comprbuf, buf); @@ -448,13 +407,13 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str struct jffs2_raw_dirent *rd; struct jffs2_full_dnode *fn; struct jffs2_full_dirent *fd; - uint32_t alloclen, phys_ofs; + uint32_t alloclen; int ret; /* Try to reserve enough space for both node and dirent. * Just the node will do for now, though */ - ret = jffs2_reserve_space(c, sizeof(*ri), &phys_ofs, &alloclen, ALLOC_NORMAL, + ret = jffs2_reserve_space(c, sizeof(*ri), &alloclen, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); D1(printk(KERN_DEBUG "jffs2_do_create(): reserved 0x%x bytes\n", alloclen)); if (ret) { @@ -465,7 +424,7 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str ri->data_crc = cpu_to_je32(0); ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); - fn = jffs2_write_dnode(c, f, ri, NULL, 0, phys_ofs, ALLOC_NORMAL); + fn = jffs2_write_dnode(c, f, ri, NULL, 0, ALLOC_NORMAL); D1(printk(KERN_DEBUG "jffs2_do_create created file with mode 0x%x\n", jemode_to_cpu(ri->mode))); @@ -484,7 +443,7 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str up(&f->sem); jffs2_complete_reservation(c); - ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &phys_ofs, &alloclen, + ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen, ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); if (ret) { @@ -516,7 +475,7 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8)); rd->name_crc = cpu_to_je32(crc32(0, name, namelen)); - fd = jffs2_write_dirent(c, dir_f, rd, name, namelen, phys_ofs, ALLOC_NORMAL); + fd = jffs2_write_dirent(c, dir_f, rd, name, namelen, ALLOC_NORMAL); jffs2_free_raw_dirent(rd); @@ -545,7 +504,7 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, { struct jffs2_raw_dirent *rd; struct jffs2_full_dirent *fd; - uint32_t alloclen, phys_ofs; + uint32_t alloclen; int ret; if (1 /* alternative branch needs testing */ || @@ -556,7 +515,7 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, if (!rd) return -ENOMEM; - ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &phys_ofs, &alloclen, + ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen, ALLOC_DELETION, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); if (ret) { jffs2_free_raw_dirent(rd); @@ -580,7 +539,7 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8)); rd->name_crc = cpu_to_je32(crc32(0, name, namelen)); - fd = jffs2_write_dirent(c, dir_f, rd, name, namelen, phys_ofs, ALLOC_DELETION); + fd = jffs2_write_dirent(c, dir_f, rd, name, namelen, ALLOC_DELETION); jffs2_free_raw_dirent(rd); @@ -659,14 +618,14 @@ int jffs2_do_link (struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, uint { struct jffs2_raw_dirent *rd; struct jffs2_full_dirent *fd; - uint32_t alloclen, phys_ofs; + uint32_t alloclen; int ret; rd = jffs2_alloc_raw_dirent(); if (!rd) return -ENOMEM; - ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &phys_ofs, &alloclen, + ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen, ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); if (ret) { jffs2_free_raw_dirent(rd); @@ -692,7 +651,7 @@ int jffs2_do_link (struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, uint rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8)); rd->name_crc = cpu_to_je32(crc32(0, name, namelen)); - fd = jffs2_write_dirent(c, dir_f, rd, name, namelen, phys_ofs, ALLOC_NORMAL); + fd = jffs2_write_dirent(c, dir_f, rd, name, namelen, ALLOC_NORMAL); jffs2_free_raw_dirent(rd); diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c new file mode 100644 index 000000000000..2d82e250be34 --- /dev/null +++ b/fs/jffs2/xattr.c @@ -0,0 +1,1238 @@ +/* + * JFFS2 -- Journalling Flash File System, Version 2. + * + * Copyright (C) 2006 NEC Corporation + * + * Created by KaiGai Kohei <kaigai@ak.jp.nec.com> + * + * For licensing information, see the file 'LICENCE' in this directory. + * + */ +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/time.h> +#include <linux/pagemap.h> +#include <linux/highmem.h> +#include <linux/crc32.h> +#include <linux/jffs2.h> +#include <linux/xattr.h> +#include <linux/mtd/mtd.h> +#include "nodelist.h" +/* -------- xdatum related functions ---------------- + * xattr_datum_hashkey(xprefix, xname, xvalue, xsize) + * is used to calcurate xdatum hashkey. The reminder of hashkey into XATTRINDEX_HASHSIZE is + * the index of the xattr name/value pair cache (c->xattrindex). + * unload_xattr_datum(c, xd) + * is used to release xattr name/value pair and detach from c->xattrindex. + * reclaim_xattr_datum(c) + * is used to reclaim xattr name/value pairs on the xattr name/value pair cache when + * memory usage by cache is over c->xdatum_mem_threshold. Currentry, this threshold + * is hard coded as 32KiB. + * delete_xattr_datum_node(c, xd) + * is used to delete a jffs2 node is dominated by xdatum. When EBS(Erase Block Summary) is + * enabled, it overwrites the obsolete node by myself. + * delete_xattr_datum(c, xd) + * is used to delete jffs2_xattr_datum object. It must be called with 0-value of reference + * counter. (It means how many jffs2_xattr_ref object refers this xdatum.) + * do_verify_xattr_datum(c, xd) + * is used to load the xdatum informations without name/value pair from the medium. + * It's necessary once, because those informations are not collected during mounting + * process when EBS is enabled. + * 0 will be returned, if success. An negative return value means recoverable error, and + * positive return value means unrecoverable error. Thus, caller must remove this xdatum + * and xref when it returned positive value. + * do_load_xattr_datum(c, xd) + * is used to load name/value pair from the medium. + * The meanings of return value is same as do_verify_xattr_datum(). + * load_xattr_datum(c, xd) + * is used to be as a wrapper of do_verify_xattr_datum() and do_load_xattr_datum(). + * If xd need to call do_verify_xattr_datum() at first, it's called before calling + * do_load_xattr_datum(). The meanings of return value is same as do_verify_xattr_datum(). + * save_xattr_datum(c, xd) + * is used to write xdatum to medium. xd->version will be incremented. + * create_xattr_datum(c, xprefix, xname, xvalue, xsize) + * is used to create new xdatum and write to medium. + * -------------------------------------------------- */ + +static uint32_t xattr_datum_hashkey(int xprefix, const char *xname, const char *xvalue, int xsize) +{ + int name_len = strlen(xname); + + return crc32(xprefix, xname, name_len) ^ crc32(xprefix, xvalue, xsize); +} + +static void unload_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) +{ + /* must be called under down_write(xattr_sem) */ + D1(dbg_xattr("%s: xid=%u, version=%u\n", __FUNCTION__, xd->xid, xd->version)); + if (xd->xname) { + c->xdatum_mem_usage -= (xd->name_len + 1 + xd->value_len); + kfree(xd->xname); + } + + list_del_init(&xd->xindex); + xd->hashkey = 0; + xd->xname = NULL; + xd->xvalue = NULL; +} + +static void reclaim_xattr_datum(struct jffs2_sb_info *c) +{ + /* must be called under down_write(xattr_sem) */ + struct jffs2_xattr_datum *xd, *_xd; + uint32_t target, before; + static int index = 0; + int count; + + if (c->xdatum_mem_threshold > c->xdatum_mem_usage) + return; + + before = c->xdatum_mem_usage; + target = c->xdatum_mem_usage * 4 / 5; /* 20% reduction */ + for (count = 0; count < XATTRINDEX_HASHSIZE; count++) { + list_for_each_entry_safe(xd, _xd, &c->xattrindex[index], xindex) { + if (xd->flags & JFFS2_XFLAGS_HOT) { + xd->flags &= ~JFFS2_XFLAGS_HOT; + } else if (!(xd->flags & JFFS2_XFLAGS_BIND)) { + unload_xattr_datum(c, xd); + } + if (c->xdatum_mem_usage <= target) + goto out; + } + index = (index+1) % XATTRINDEX_HASHSIZE; + } + out: + JFFS2_NOTICE("xdatum_mem_usage from %u byte to %u byte (%u byte reclaimed)\n", + before, c->xdatum_mem_usage, before - c->xdatum_mem_usage); +} + +static void delete_xattr_datum_node(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) +{ + /* must be called under down_write(xattr_sem) */ + struct jffs2_raw_xattr rx; + size_t length; + int rc; + + if (!xd->node) { + JFFS2_WARNING("xdatum (xid=%u) is removed twice.\n", xd->xid); + return; + } + if (jffs2_sum_active()) { + memset(&rx, 0xff, sizeof(struct jffs2_raw_xattr)); + rc = jffs2_flash_read(c, ref_offset(xd->node), + sizeof(struct jffs2_unknown_node), + &length, (char *)&rx); + if (rc || length != sizeof(struct jffs2_unknown_node)) { + JFFS2_ERROR("jffs2_flash_read()=%d, req=%zu, read=%zu at %#08x\n", + rc, sizeof(struct jffs2_unknown_node), + length, ref_offset(xd->node)); + } + rc = jffs2_flash_write(c, ref_offset(xd->node), sizeof(rx), + &length, (char *)&rx); + if (rc || length != sizeof(struct jffs2_raw_xattr)) { + JFFS2_ERROR("jffs2_flash_write()=%d, req=%zu, wrote=%zu ar %#08x\n", + rc, sizeof(rx), length, ref_offset(xd->node)); + } + } + spin_lock(&c->erase_completion_lock); + xd->node->next_in_ino = NULL; + spin_unlock(&c->erase_completion_lock); + jffs2_mark_node_obsolete(c, xd->node); + xd->node = NULL; +} + +static void delete_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) +{ + /* must be called under down_write(xattr_sem) */ + BUG_ON(xd->refcnt); + + unload_xattr_datum(c, xd); + if (xd->node) { + delete_xattr_datum_node(c, xd); + xd->node = NULL; + } + jffs2_free_xattr_datum(xd); +} + +static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) +{ + /* must be called under down_write(xattr_sem) */ + struct jffs2_eraseblock *jeb; + struct jffs2_raw_xattr rx; + size_t readlen; + uint32_t crc, totlen; + int rc; + + BUG_ON(!xd->node); + BUG_ON(ref_flags(xd->node) != REF_UNCHECKED); + + rc = jffs2_flash_read(c, ref_offset(xd->node), sizeof(rx), &readlen, (char *)&rx); + if (rc || readlen != sizeof(rx)) { + JFFS2_WARNING("jffs2_flash_read()=%d, req=%zu, read=%zu at %#08x\n", + rc, sizeof(rx), readlen, ref_offset(xd->node)); + return rc ? rc : -EIO; + } + crc = crc32(0, &rx, sizeof(rx) - 4); + if (crc != je32_to_cpu(rx.node_crc)) { + if (je32_to_cpu(rx.node_crc) != 0xffffffff) + JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", + ref_offset(xd->node), je32_to_cpu(rx.hdr_crc), crc); + return EIO; + } + totlen = PAD(sizeof(rx) + rx.name_len + 1 + je16_to_cpu(rx.value_len)); + if (je16_to_cpu(rx.magic) != JFFS2_MAGIC_BITMASK + || je16_to_cpu(rx.nodetype) != JFFS2_NODETYPE_XATTR + || je32_to_cpu(rx.totlen) != totlen + || je32_to_cpu(rx.xid) != xd->xid + || je32_to_cpu(rx.version) != xd->version) { + JFFS2_ERROR("inconsistent xdatum at %#08x, magic=%#04x/%#04x, " + "nodetype=%#04x/%#04x, totlen=%u/%u, xid=%u/%u, version=%u/%u\n", + ref_offset(xd->node), je16_to_cpu(rx.magic), JFFS2_MAGIC_BITMASK, + je16_to_cpu(rx.nodetype), JFFS2_NODETYPE_XATTR, + je32_to_cpu(rx.totlen), totlen, + je32_to_cpu(rx.xid), xd->xid, + je32_to_cpu(rx.version), xd->version); + return EIO; + } + xd->xprefix = rx.xprefix; + xd->name_len = rx.name_len; + xd->value_len = je16_to_cpu(rx.value_len); + xd->data_crc = je32_to_cpu(rx.data_crc); + + /* This JFFS2_NODETYPE_XATTR node is checked */ + jeb = &c->blocks[ref_offset(xd->node) / c->sector_size]; + totlen = PAD(je32_to_cpu(rx.totlen)); + + spin_lock(&c->erase_completion_lock); + c->unchecked_size -= totlen; c->used_size += totlen; + jeb->unchecked_size -= totlen; jeb->used_size += totlen; + xd->node->flash_offset = ref_offset(xd->node) | REF_PRISTINE; + spin_unlock(&c->erase_completion_lock); + + /* unchecked xdatum is chained with c->xattr_unchecked */ + list_del_init(&xd->xindex); + + dbg_xattr("success on verfying xdatum (xid=%u, version=%u)\n", + xd->xid, xd->version); + + return 0; +} + +static int do_load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) +{ + /* must be called under down_write(xattr_sem) */ + char *data; + size_t readlen; + uint32_t crc, length; + int i, ret, retry = 0; + + BUG_ON(!xd->node); + BUG_ON(ref_flags(xd->node) != REF_PRISTINE); + BUG_ON(!list_empty(&xd->xindex)); + retry: + length = xd->name_len + 1 + xd->value_len; + data = kmalloc(length, GFP_KERNEL); + if (!data) + return -ENOMEM; + + ret = jffs2_flash_read(c, ref_offset(xd->node)+sizeof(struct jffs2_raw_xattr), + length, &readlen, data); + + if (ret || length!=readlen) { + JFFS2_WARNING("jffs2_flash_read() returned %d, request=%d, readlen=%zu, at %#08x\n", + ret, length, readlen, ref_offset(xd->node)); + kfree(data); + return ret ? ret : -EIO; + } + + data[xd->name_len] = '\0'; + crc = crc32(0, data, length); + if (crc != xd->data_crc) { + JFFS2_WARNING("node CRC failed (JFFS2_NODETYPE_XREF)" + " at %#08x, read: 0x%08x calculated: 0x%08x\n", + ref_offset(xd->node), xd->data_crc, crc); + kfree(data); + return EIO; + } + + xd->flags |= JFFS2_XFLAGS_HOT; + xd->xname = data; + xd->xvalue = data + xd->name_len+1; + + c->xdatum_mem_usage += length; + + xd->hashkey = xattr_datum_hashkey(xd->xprefix, xd->xname, xd->xvalue, xd->value_len); + i = xd->hashkey % XATTRINDEX_HASHSIZE; + list_add(&xd->xindex, &c->xattrindex[i]); + if (!retry) { + retry = 1; + reclaim_xattr_datum(c); + if (!xd->xname) + goto retry; + } + + dbg_xattr("success on loading xdatum (xid=%u, xprefix=%u, xname='%s')\n", + xd->xid, xd->xprefix, xd->xname); + + return 0; +} + +static int load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) +{ + /* must be called under down_write(xattr_sem); + * rc < 0 : recoverable error, try again + * rc = 0 : success + * rc > 0 : Unrecoverable error, this node should be deleted. + */ + int rc = 0; + BUG_ON(xd->xname); + if (!xd->node) + return EIO; + if (unlikely(ref_flags(xd->node) != REF_PRISTINE)) { + rc = do_verify_xattr_datum(c, xd); + if (rc > 0) { + list_del_init(&xd->xindex); + delete_xattr_datum_node(c, xd); + } + } + if (!rc) + rc = do_load_xattr_datum(c, xd); + return rc; +} + +static int save_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) +{ + /* must be called under down_write(xattr_sem) */ + struct jffs2_raw_node_ref *raw; + struct jffs2_raw_xattr rx; + struct kvec vecs[2]; + size_t length; + int rc, totlen; + uint32_t phys_ofs = write_ofs(c); + + BUG_ON(!xd->xname); + + vecs[0].iov_base = ℞ + vecs[0].iov_len = PAD(sizeof(rx)); + vecs[1].iov_base = xd->xname; + vecs[1].iov_len = xd->name_len + 1 + xd->value_len; + totlen = vecs[0].iov_len + vecs[1].iov_len; + + /* Setup raw-xattr */ + rx.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); + rx.nodetype = cpu_to_je16(JFFS2_NODETYPE_XATTR); + rx.totlen = cpu_to_je32(PAD(totlen)); + rx.hdr_crc = cpu_to_je32(crc32(0, &rx, sizeof(struct jffs2_unknown_node) - 4)); + + rx.xid = cpu_to_je32(xd->xid); + rx.version = cpu_to_je32(++xd->version); + rx.xprefix = xd->xprefix; + rx.name_len = xd->name_len; + rx.value_len = cpu_to_je16(xd->value_len); + rx.data_crc = cpu_to_je32(crc32(0, vecs[1].iov_base, vecs[1].iov_len)); + rx.node_crc = cpu_to_je32(crc32(0, &rx, sizeof(struct jffs2_raw_xattr) - 4)); + + rc = jffs2_flash_writev(c, vecs, 2, phys_ofs, &length, 0); + if (rc || totlen != length) { + JFFS2_WARNING("jffs2_flash_writev()=%d, req=%u, wrote=%zu, at %#08x\n", + rc, totlen, length, phys_ofs); + rc = rc ? rc : -EIO; + if (length) + jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, PAD(totlen), NULL); + + return rc; + } + + /* success */ + raw = jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, PAD(totlen), NULL); + /* FIXME */ raw->next_in_ino = (void *)xd; + + if (xd->node) + delete_xattr_datum_node(c, xd); + xd->node = raw; + + dbg_xattr("success on saving xdatum (xid=%u, version=%u, xprefix=%u, xname='%s')\n", + xd->xid, xd->version, xd->xprefix, xd->xname); + + return 0; +} + +static struct jffs2_xattr_datum *create_xattr_datum(struct jffs2_sb_info *c, + int xprefix, const char *xname, + const char *xvalue, int xsize) +{ + /* must be called under down_write(xattr_sem) */ + struct jffs2_xattr_datum *xd; + uint32_t hashkey, name_len; + char *data; + int i, rc; + + /* Search xattr_datum has same xname/xvalue by index */ + hashkey = xattr_datum_hashkey(xprefix, xname, xvalue, xsize); + i = hashkey % XATTRINDEX_HASHSIZE; + list_for_each_entry(xd, &c->xattrindex[i], xindex) { + if (xd->hashkey==hashkey + && xd->xprefix==xprefix + && xd->value_len==xsize + && !strcmp(xd->xname, xname) + && !memcmp(xd->xvalue, xvalue, xsize)) { + xd->refcnt++; + return xd; + } + } + + /* Not found, Create NEW XATTR-Cache */ + name_len = strlen(xname); + + xd = jffs2_alloc_xattr_datum(); + if (!xd) + return ERR_PTR(-ENOMEM); + + data = kmalloc(name_len + 1 + xsize, GFP_KERNEL); + if (!data) { + jffs2_free_xattr_datum(xd); + return ERR_PTR(-ENOMEM); + } + strcpy(data, xname); + memcpy(data + name_len + 1, xvalue, xsize); + + xd->refcnt = 1; + xd->xid = ++c->highest_xid; + xd->flags |= JFFS2_XFLAGS_HOT; + xd->xprefix = xprefix; + + xd->hashkey = hashkey; + xd->xname = data; + xd->xvalue = data + name_len + 1; + xd->name_len = name_len; + xd->value_len = xsize; + xd->data_crc = crc32(0, data, xd->name_len + 1 + xd->value_len); + + rc = save_xattr_datum(c, xd); + if (rc) { + kfree(xd->xname); + jffs2_free_xattr_datum(xd); + return ERR_PTR(rc); + } + + /* Insert Hash Index */ + i = hashkey % XATTRINDEX_HASHSIZE; + list_add(&xd->xindex, &c->xattrindex[i]); + + c->xdatum_mem_usage += (xd->name_len + 1 + xd->value_len); + reclaim_xattr_datum(c); + + return xd; +} + +/* -------- xref related functions ------------------ + * verify_xattr_ref(c, ref) + * is used to load xref information from medium. Because summary data does not + * contain xid/ino, it's necessary to verify once while mounting process. + * delete_xattr_ref_node(c, ref) + * is used to delete a jffs2 node is dominated by xref. When EBS is enabled, + * it overwrites the obsolete node by myself. + * delete_xattr_ref(c, ref) + * is used to delete jffs2_xattr_ref object. If the reference counter of xdatum + * is refered by this xref become 0, delete_xattr_datum() is called later. + * save_xattr_ref(c, ref) + * is used to write xref to medium. + * create_xattr_ref(c, ic, xd) + * is used to create a new xref and write to medium. + * jffs2_xattr_delete_inode(c, ic) + * is called to remove xrefs related to obsolete inode when inode is unlinked. + * jffs2_xattr_free_inode(c, ic) + * is called to release xattr related objects when unmounting. + * check_xattr_ref_inode(c, ic) + * is used to confirm inode does not have duplicate xattr name/value pair. + * -------------------------------------------------- */ +static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref) +{ + struct jffs2_eraseblock *jeb; + struct jffs2_raw_xref rr; + size_t readlen; + uint32_t crc, totlen; + int rc; + + BUG_ON(ref_flags(ref->node) != REF_UNCHECKED); + + rc = jffs2_flash_read(c, ref_offset(ref->node), sizeof(rr), &readlen, (char *)&rr); + if (rc || sizeof(rr) != readlen) { + JFFS2_WARNING("jffs2_flash_read()=%d, req=%zu, read=%zu, at %#08x\n", + rc, sizeof(rr), readlen, ref_offset(ref->node)); + return rc ? rc : -EIO; + } + /* obsolete node */ + crc = crc32(0, &rr, sizeof(rr) - 4); + if (crc != je32_to_cpu(rr.node_crc)) { + if (je32_to_cpu(rr.node_crc) != 0xffffffff) + JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", + ref_offset(ref->node), je32_to_cpu(rr.node_crc), crc); + return EIO; + } + if (je16_to_cpu(rr.magic) != JFFS2_MAGIC_BITMASK + || je16_to_cpu(rr.nodetype) != JFFS2_NODETYPE_XREF + || je32_to_cpu(rr.totlen) != PAD(sizeof(rr))) { + JFFS2_ERROR("inconsistent xref at %#08x, magic=%#04x/%#04x, " + "nodetype=%#04x/%#04x, totlen=%u/%zu\n", + ref_offset(ref->node), je16_to_cpu(rr.magic), JFFS2_MAGIC_BITMASK, + je16_to_cpu(rr.nodetype), JFFS2_NODETYPE_XREF, + je32_to_cpu(rr.totlen), PAD(sizeof(rr))); + return EIO; + } + ref->ino = je32_to_cpu(rr.ino); + ref->xid = je32_to_cpu(rr.xid); + + /* fixup superblock/eraseblock info */ + jeb = &c->blocks[ref_offset(ref->node) / c->sector_size]; + totlen = PAD(sizeof(rr)); + + spin_lock(&c->erase_completion_lock); + c->unchecked_size -= totlen; c->used_size += totlen; + jeb->unchecked_size -= totlen; jeb->used_size += totlen; + ref->node->flash_offset = ref_offset(ref->node) | REF_PRISTINE; + spin_unlock(&c->erase_completion_lock); + + dbg_xattr("success on verifying xref (ino=%u, xid=%u) at %#08x\n", + ref->ino, ref->xid, ref_offset(ref->node)); + return 0; +} + +static void delete_xattr_ref_node(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref) +{ + struct jffs2_raw_xref rr; + size_t length; + int rc; + + if (jffs2_sum_active()) { + memset(&rr, 0xff, sizeof(rr)); + rc = jffs2_flash_read(c, ref_offset(ref->node), + sizeof(struct jffs2_unknown_node), + &length, (char *)&rr); + if (rc || length != sizeof(struct jffs2_unknown_node)) { + JFFS2_ERROR("jffs2_flash_read()=%d, req=%zu, read=%zu at %#08x\n", + rc, sizeof(struct jffs2_unknown_node), + length, ref_offset(ref->node)); + } + rc = jffs2_flash_write(c, ref_offset(ref->node), sizeof(rr), + &length, (char *)&rr); + if (rc || length != sizeof(struct jffs2_raw_xref)) { + JFFS2_ERROR("jffs2_flash_write()=%d, req=%zu, wrote=%zu at %#08x\n", + rc, sizeof(rr), length, ref_offset(ref->node)); + } + } + spin_lock(&c->erase_completion_lock); + ref->node->next_in_ino = NULL; + spin_unlock(&c->erase_completion_lock); + jffs2_mark_node_obsolete(c, ref->node); + ref->node = NULL; +} + +static void delete_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref) +{ + /* must be called under down_write(xattr_sem) */ + struct jffs2_xattr_datum *xd; + + BUG_ON(!ref->node); + delete_xattr_ref_node(c, ref); + + xd = ref->xd; + xd->refcnt--; + if (!xd->refcnt) + delete_xattr_datum(c, xd); + jffs2_free_xattr_ref(ref); +} + +static int save_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref) +{ + /* must be called under down_write(xattr_sem) */ + struct jffs2_raw_node_ref *raw; + struct jffs2_raw_xref rr; + size_t length; + uint32_t phys_ofs = write_ofs(c); + int ret; + + rr.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); + rr.nodetype = cpu_to_je16(JFFS2_NODETYPE_XREF); + rr.totlen = cpu_to_je32(PAD(sizeof(rr))); + rr.hdr_crc = cpu_to_je32(crc32(0, &rr, sizeof(struct jffs2_unknown_node) - 4)); + + rr.ino = cpu_to_je32(ref->ic->ino); + rr.xid = cpu_to_je32(ref->xd->xid); + rr.node_crc = cpu_to_je32(crc32(0, &rr, sizeof(rr) - 4)); + + ret = jffs2_flash_write(c, phys_ofs, sizeof(rr), &length, (char *)&rr); + if (ret || sizeof(rr) != length) { + JFFS2_WARNING("jffs2_flash_write() returned %d, request=%zu, retlen=%zu, at %#08x\n", + ret, sizeof(rr), length, phys_ofs); + ret = ret ? ret : -EIO; + if (length) + jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, PAD(sizeof(rr)), NULL); + + return ret; + } + + raw = jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, PAD(sizeof(rr)), NULL); + /* FIXME */ raw->next_in_ino = (void *)ref; + if (ref->node) + delete_xattr_ref_node(c, ref); + ref->node = raw; + + dbg_xattr("success on saving xref (ino=%u, xid=%u)\n", ref->ic->ino, ref->xd->xid); + + return 0; +} + +static struct jffs2_xattr_ref *create_xattr_ref(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic, + struct jffs2_xattr_datum *xd) +{ + /* must be called under down_write(xattr_sem) */ + struct jffs2_xattr_ref *ref; + int ret; + + ref = jffs2_alloc_xattr_ref(); + if (!ref) + return ERR_PTR(-ENOMEM); + ref->ic = ic; + ref->xd = xd; + + ret = save_xattr_ref(c, ref); + if (ret) { + jffs2_free_xattr_ref(ref); + return ERR_PTR(ret); + } + + /* Chain to inode */ + ref->next = ic->xref; + ic->xref = ref; + + return ref; /* success */ +} + +void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic) +{ + /* It's called from jffs2_clear_inode() on inode removing. + When an inode with XATTR is removed, those XATTRs must be removed. */ + struct jffs2_xattr_ref *ref, *_ref; + + if (!ic || ic->nlink > 0) + return; + + down_write(&c->xattr_sem); + for (ref = ic->xref; ref; ref = _ref) { + _ref = ref->next; + delete_xattr_ref(c, ref); + } + ic->xref = NULL; + up_write(&c->xattr_sem); +} + +void jffs2_xattr_free_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic) +{ + /* It's called from jffs2_free_ino_caches() until unmounting FS. */ + struct jffs2_xattr_datum *xd; + struct jffs2_xattr_ref *ref, *_ref; + + down_write(&c->xattr_sem); + for (ref = ic->xref; ref; ref = _ref) { + _ref = ref->next; + xd = ref->xd; + xd->refcnt--; + if (!xd->refcnt) { + unload_xattr_datum(c, xd); + jffs2_free_xattr_datum(xd); + } + jffs2_free_xattr_ref(ref); + } + ic->xref = NULL; + up_write(&c->xattr_sem); +} + +static int check_xattr_ref_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic) +{ + /* success of check_xattr_ref_inode() means taht inode (ic) dose not have + * duplicate name/value pairs. If duplicate name/value pair would be found, + * one will be removed. + */ + struct jffs2_xattr_ref *ref, *cmp, **pref; + int rc = 0; + + if (likely(ic->flags & INO_FLAGS_XATTR_CHECKED)) + return 0; + down_write(&c->xattr_sem); + retry: + rc = 0; + for (ref=ic->xref, pref=&ic->xref; ref; pref=&ref->next, ref=ref->next) { + if (!ref->xd->xname) { + rc = load_xattr_datum(c, ref->xd); + if (unlikely(rc > 0)) { + *pref = ref->next; + delete_xattr_ref(c, ref); + goto retry; + } else if (unlikely(rc < 0)) + goto out; + } + for (cmp=ref->next, pref=&ref->next; cmp; pref=&cmp->next, cmp=cmp->next) { + if (!cmp->xd->xname) { + ref->xd->flags |= JFFS2_XFLAGS_BIND; + rc = load_xattr_datum(c, cmp->xd); + ref->xd->flags &= ~JFFS2_XFLAGS_BIND; + if (unlikely(rc > 0)) { + *pref = cmp->next; + delete_xattr_ref(c, cmp); + goto retry; + } else if (unlikely(rc < 0)) + goto out; + } + if (ref->xd->xprefix == cmp->xd->xprefix + && !strcmp(ref->xd->xname, cmp->xd->xname)) { + *pref = cmp->next; + delete_xattr_ref(c, cmp); + goto retry; + } + } + } + ic->flags |= INO_FLAGS_XATTR_CHECKED; + out: + up_write(&c->xattr_sem); + + return rc; +} + +/* -------- xattr subsystem functions --------------- + * jffs2_init_xattr_subsystem(c) + * is used to initialize semaphore and list_head, and some variables. + * jffs2_find_xattr_datum(c, xid) + * is used to lookup xdatum while scanning process. + * jffs2_clear_xattr_subsystem(c) + * is used to release any xattr related objects. + * jffs2_build_xattr_subsystem(c) + * is used to associate xdatum and xref while super block building process. + * jffs2_setup_xattr_datum(c, xid, version) + * is used to insert xdatum while scanning process. + * -------------------------------------------------- */ +void jffs2_init_xattr_subsystem(struct jffs2_sb_info *c) +{ + int i; + + for (i=0; i < XATTRINDEX_HASHSIZE; i++) + INIT_LIST_HEAD(&c->xattrindex[i]); + INIT_LIST_HEAD(&c->xattr_unchecked); + c->xref_temp = NULL; + + init_rwsem(&c->xattr_sem); + c->xdatum_mem_usage = 0; + c->xdatum_mem_threshold = 32 * 1024; /* Default 32KB */ +} + +static struct jffs2_xattr_datum *jffs2_find_xattr_datum(struct jffs2_sb_info *c, uint32_t xid) +{ + struct jffs2_xattr_datum *xd; + int i = xid % XATTRINDEX_HASHSIZE; + + /* It's only used in scanning/building process. */ + BUG_ON(!(c->flags & (JFFS2_SB_FLAG_SCANNING|JFFS2_SB_FLAG_BUILDING))); + + list_for_each_entry(xd, &c->xattrindex[i], xindex) { + if (xd->xid==xid) + return xd; + } + return NULL; +} + +void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c) +{ + struct jffs2_xattr_datum *xd, *_xd; + struct jffs2_xattr_ref *ref, *_ref; + int i; + + for (ref=c->xref_temp; ref; ref = _ref) { + _ref = ref->next; + jffs2_free_xattr_ref(ref); + } + c->xref_temp = NULL; + + for (i=0; i < XATTRINDEX_HASHSIZE; i++) { + list_for_each_entry_safe(xd, _xd, &c->xattrindex[i], xindex) { + list_del(&xd->xindex); + if (xd->xname) + kfree(xd->xname); + jffs2_free_xattr_datum(xd); + } + } +} + +void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c) +{ + struct jffs2_xattr_ref *ref, *_ref; + struct jffs2_xattr_datum *xd, *_xd; + struct jffs2_inode_cache *ic; + int i, xdatum_count =0, xdatum_unchecked_count = 0, xref_count = 0; + + BUG_ON(!(c->flags & JFFS2_SB_FLAG_BUILDING)); + + /* Phase.1 */ + for (ref=c->xref_temp; ref; ref=_ref) { + _ref = ref->next; + /* checking REF_UNCHECKED nodes */ + if (ref_flags(ref->node) != REF_PRISTINE) { + if (verify_xattr_ref(c, ref)) { + delete_xattr_ref_node(c, ref); + jffs2_free_xattr_ref(ref); + continue; + } + } + /* At this point, ref->xid and ref->ino contain XID and inode number. + ref->xd and ref->ic are not valid yet. */ + xd = jffs2_find_xattr_datum(c, ref->xid); + ic = jffs2_get_ino_cache(c, ref->ino); + if (!xd || !ic) { + if (ref_flags(ref->node) != REF_UNCHECKED) + JFFS2_WARNING("xref(ino=%u, xid=%u) is orphan. \n", + ref->ino, ref->xid); + delete_xattr_ref_node(c, ref); + jffs2_free_xattr_ref(ref); + continue; + } + ref->xd = xd; + ref->ic = ic; + xd->refcnt++; + ref->next = ic->xref; + ic->xref = ref; + xref_count++; + } + c->xref_temp = NULL; + /* After this, ref->xid/ino are NEVER used. */ + + /* Phase.2 */ + for (i=0; i < XATTRINDEX_HASHSIZE; i++) { + list_for_each_entry_safe(xd, _xd, &c->xattrindex[i], xindex) { + list_del_init(&xd->xindex); + if (!xd->refcnt) { + if (ref_flags(xd->node) != REF_UNCHECKED) + JFFS2_WARNING("orphan xdatum(xid=%u, version=%u) at %#08x\n", + xd->xid, xd->version, ref_offset(xd->node)); + delete_xattr_datum(c, xd); + continue; + } + if (ref_flags(xd->node) != REF_PRISTINE) { + dbg_xattr("unchecked xdatum(xid=%u) at %#08x\n", + xd->xid, ref_offset(xd->node)); + list_add(&xd->xindex, &c->xattr_unchecked); + xdatum_unchecked_count++; + } + xdatum_count++; + } + } + /* build complete */ + JFFS2_NOTICE("complete building xattr subsystem, %u of xdatum (%u unchecked) and " + "%u of xref found.\n", xdatum_count, xdatum_unchecked_count, xref_count); +} + +struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c, + uint32_t xid, uint32_t version) +{ + struct jffs2_xattr_datum *xd, *_xd; + + _xd = jffs2_find_xattr_datum(c, xid); + if (_xd) { + dbg_xattr("duplicate xdatum (xid=%u, version=%u/%u) at %#08x\n", + xid, version, _xd->version, ref_offset(_xd->node)); + if (version < _xd->version) + return ERR_PTR(-EEXIST); + } + xd = jffs2_alloc_xattr_datum(); + if (!xd) + return ERR_PTR(-ENOMEM); + xd->xid = xid; + xd->version = version; + if (xd->xid > c->highest_xid) + c->highest_xid = xd->xid; + list_add_tail(&xd->xindex, &c->xattrindex[xid % XATTRINDEX_HASHSIZE]); + + if (_xd) { + list_del_init(&_xd->xindex); + delete_xattr_datum_node(c, _xd); + jffs2_free_xattr_datum(_xd); + } + return xd; +} + +/* -------- xattr subsystem functions --------------- + * xprefix_to_handler(xprefix) + * is used to translate xprefix into xattr_handler. + * jffs2_listxattr(dentry, buffer, size) + * is an implementation of listxattr handler on jffs2. + * do_jffs2_getxattr(inode, xprefix, xname, buffer, size) + * is an implementation of getxattr handler on jffs2. + * do_jffs2_setxattr(inode, xprefix, xname, buffer, size, flags) + * is an implementation of setxattr handler on jffs2. + * -------------------------------------------------- */ +struct xattr_handler *jffs2_xattr_handlers[] = { + &jffs2_user_xattr_handler, +#ifdef CONFIG_JFFS2_FS_SECURITY + &jffs2_security_xattr_handler, +#endif +#ifdef CONFIG_JFFS2_FS_POSIX_ACL + &jffs2_acl_access_xattr_handler, + &jffs2_acl_default_xattr_handler, +#endif + &jffs2_trusted_xattr_handler, + NULL +}; + +static struct xattr_handler *xprefix_to_handler(int xprefix) { + struct xattr_handler *ret; + + switch (xprefix) { + case JFFS2_XPREFIX_USER: + ret = &jffs2_user_xattr_handler; + break; +#ifdef CONFIG_JFFS2_FS_SECURITY + case JFFS2_XPREFIX_SECURITY: + ret = &jffs2_security_xattr_handler; + break; +#endif +#ifdef CONFIG_JFFS2_FS_POSIX_ACL + case JFFS2_XPREFIX_ACL_ACCESS: + ret = &jffs2_acl_access_xattr_handler; + break; + case JFFS2_XPREFIX_ACL_DEFAULT: + ret = &jffs2_acl_default_xattr_handler; + break; +#endif + case JFFS2_XPREFIX_TRUSTED: + ret = &jffs2_trusted_xattr_handler; + break; + default: + ret = NULL; + break; + } + return ret; +} + +ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); + struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); + struct jffs2_inode_cache *ic = f->inocache; + struct jffs2_xattr_ref *ref, **pref; + struct jffs2_xattr_datum *xd; + struct xattr_handler *xhandle; + ssize_t len, rc; + int retry = 0; + + rc = check_xattr_ref_inode(c, ic); + if (unlikely(rc)) + return rc; + + down_read(&c->xattr_sem); + retry: + len = 0; + for (ref=ic->xref, pref=&ic->xref; ref; pref=&ref->next, ref=ref->next) { + BUG_ON(ref->ic != ic); + xd = ref->xd; + if (!xd->xname) { + /* xdatum is unchached */ + if (!retry) { + retry = 1; + up_read(&c->xattr_sem); + down_write(&c->xattr_sem); + goto retry; + } else { + rc = load_xattr_datum(c, xd); + if (unlikely(rc > 0)) { + *pref = ref->next; + delete_xattr_ref(c, ref); + goto retry; + } else if (unlikely(rc < 0)) + goto out; + } + } + xhandle = xprefix_to_handler(xd->xprefix); + if (!xhandle) + continue; + if (buffer) { + rc = xhandle->list(inode, buffer+len, size-len, xd->xname, xd->name_len); + } else { + rc = xhandle->list(inode, NULL, 0, xd->xname, xd->name_len); + } + if (rc < 0) + goto out; + len += rc; + } + rc = len; + out: + if (!retry) { + up_read(&c->xattr_sem); + } else { + up_write(&c->xattr_sem); + } + return rc; +} + +int do_jffs2_getxattr(struct inode *inode, int xprefix, const char *xname, + char *buffer, size_t size) +{ + struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); + struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); + struct jffs2_inode_cache *ic = f->inocache; + struct jffs2_xattr_datum *xd; + struct jffs2_xattr_ref *ref, **pref; + int rc, retry = 0; + + rc = check_xattr_ref_inode(c, ic); + if (unlikely(rc)) + return rc; + + down_read(&c->xattr_sem); + retry: + for (ref=ic->xref, pref=&ic->xref; ref; pref=&ref->next, ref=ref->next) { + BUG_ON(ref->ic!=ic); + + xd = ref->xd; + if (xd->xprefix != xprefix) + continue; + if (!xd->xname) { + /* xdatum is unchached */ + if (!retry) { + retry = 1; + up_read(&c->xattr_sem); + down_write(&c->xattr_sem); + goto retry; + } else { + rc = load_xattr_datum(c, xd); + if (unlikely(rc > 0)) { + *pref = ref->next; + delete_xattr_ref(c, ref); + goto retry; + } else if (unlikely(rc < 0)) { + goto out; + } + } + } + if (!strcmp(xname, xd->xname)) { + rc = xd->value_len; + if (buffer) { + if (size < rc) { + rc = -ERANGE; + } else { + memcpy(buffer, xd->xvalue, rc); + } + } + goto out; + } + } + rc = -ENODATA; + out: + if (!retry) { + up_read(&c->xattr_sem); + } else { + up_write(&c->xattr_sem); + } + return rc; +} + +int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname, + const char *buffer, size_t size, int flags) +{ + struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); + struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); + struct jffs2_inode_cache *ic = f->inocache; + struct jffs2_xattr_datum *xd; + struct jffs2_xattr_ref *ref, *newref, **pref; + uint32_t length, request; + int rc; + + rc = check_xattr_ref_inode(c, ic); + if (unlikely(rc)) + return rc; + + request = PAD(sizeof(struct jffs2_raw_xattr) + strlen(xname) + 1 + size); + rc = jffs2_reserve_space(c, request, &length, + ALLOC_NORMAL, JFFS2_SUMMARY_XATTR_SIZE); + if (rc) { + JFFS2_WARNING("jffs2_reserve_space()=%d, request=%u\n", rc, request); + return rc; + } + + /* Find existing xattr */ + down_write(&c->xattr_sem); + retry: + for (ref=ic->xref, pref=&ic->xref; ref; pref=&ref->next, ref=ref->next) { + xd = ref->xd; + if (xd->xprefix != xprefix) + continue; + if (!xd->xname) { + rc = load_xattr_datum(c, xd); + if (unlikely(rc > 0)) { + *pref = ref->next; + delete_xattr_ref(c, ref); + goto retry; + } else if (unlikely(rc < 0)) + goto out; + } + if (!strcmp(xd->xname, xname)) { + if (flags & XATTR_CREATE) { + rc = -EEXIST; + goto out; + } + if (!buffer) { + *pref = ref->next; + delete_xattr_ref(c, ref); + rc = 0; + goto out; + } + goto found; + } + } + /* not found */ + if (flags & XATTR_REPLACE) { + rc = -ENODATA; + goto out; + } + if (!buffer) { + rc = -EINVAL; + goto out; + } + found: + xd = create_xattr_datum(c, xprefix, xname, buffer, size); + if (IS_ERR(xd)) { + rc = PTR_ERR(xd); + goto out; + } + up_write(&c->xattr_sem); + jffs2_complete_reservation(c); + + /* create xattr_ref */ + request = PAD(sizeof(struct jffs2_raw_xref)); + rc = jffs2_reserve_space(c, request, &length, + ALLOC_NORMAL, JFFS2_SUMMARY_XREF_SIZE); + if (rc) { + JFFS2_WARNING("jffs2_reserve_space()=%d, request=%u\n", rc, request); + down_write(&c->xattr_sem); + xd->refcnt--; + if (!xd->refcnt) + delete_xattr_datum(c, xd); + up_write(&c->xattr_sem); + return rc; + } + down_write(&c->xattr_sem); + if (ref) + *pref = ref->next; + newref = create_xattr_ref(c, ic, xd); + if (IS_ERR(newref)) { + if (ref) { + ref->next = ic->xref; + ic->xref = ref; + } + rc = PTR_ERR(newref); + xd->refcnt--; + if (!xd->refcnt) + delete_xattr_datum(c, xd); + } else if (ref) { + delete_xattr_ref(c, ref); + } + out: + up_write(&c->xattr_sem); + jffs2_complete_reservation(c); + return rc; +} + +/* -------- garbage collector functions ------------- + * jffs2_garbage_collect_xattr_datum(c, xd) + * is used to move xdatum into new node. + * jffs2_garbage_collect_xattr_ref(c, ref) + * is used to move xref into new node. + * jffs2_verify_xattr(c) + * is used to call do_verify_xattr_datum() before garbage collecting. + * -------------------------------------------------- */ +int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) +{ + uint32_t totlen, length, old_ofs; + int rc = -EINVAL; + + down_write(&c->xattr_sem); + BUG_ON(!xd->node); + + old_ofs = ref_offset(xd->node); + totlen = ref_totlen(c, c->gcblock, xd->node); + if (totlen < sizeof(struct jffs2_raw_xattr)) + goto out; + + if (!xd->xname) { + rc = load_xattr_datum(c, xd); + if (unlikely(rc > 0)) { + delete_xattr_datum_node(c, xd); + rc = 0; + goto out; + } else if (unlikely(rc < 0)) + goto out; + } + rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XATTR_SIZE); + if (rc || length < totlen) { + JFFS2_WARNING("jffs2_reserve_space()=%d, request=%u\n", rc, totlen); + rc = rc ? rc : -EBADFD; + goto out; + } + rc = save_xattr_datum(c, xd); + if (!rc) + dbg_xattr("xdatum (xid=%u, version=%u) GC'ed from %#08x to %08x\n", + xd->xid, xd->version, old_ofs, ref_offset(xd->node)); + out: + up_write(&c->xattr_sem); + return rc; +} + + +int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref) +{ + uint32_t totlen, length, old_ofs; + int rc = -EINVAL; + + down_write(&c->xattr_sem); + BUG_ON(!ref->node); + + old_ofs = ref_offset(ref->node); + totlen = ref_totlen(c, c->gcblock, ref->node); + if (totlen != sizeof(struct jffs2_raw_xref)) + goto out; + + rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XREF_SIZE); + if (rc || length < totlen) { + JFFS2_WARNING("%s: jffs2_reserve_space() = %d, request = %u\n", + __FUNCTION__, rc, totlen); + rc = rc ? rc : -EBADFD; + goto out; + } + rc = save_xattr_ref(c, ref); + if (!rc) + dbg_xattr("xref (ino=%u, xid=%u) GC'ed from %#08x to %08x\n", + ref->ic->ino, ref->xd->xid, old_ofs, ref_offset(ref->node)); + out: + up_write(&c->xattr_sem); + return rc; +} + +int jffs2_verify_xattr(struct jffs2_sb_info *c) +{ + struct jffs2_xattr_datum *xd, *_xd; + int rc; + + down_write(&c->xattr_sem); + list_for_each_entry_safe(xd, _xd, &c->xattr_unchecked, xindex) { + rc = do_verify_xattr_datum(c, xd); + if (rc == 0) { + list_del_init(&xd->xindex); + break; + } else if (rc > 0) { + list_del_init(&xd->xindex); + delete_xattr_datum_node(c, xd); + } + } + up_write(&c->xattr_sem); + + return list_empty(&c->xattr_unchecked) ? 1 : 0; +} diff --git a/fs/jffs2/xattr.h b/fs/jffs2/xattr.h new file mode 100644 index 000000000000..2c199856c582 --- /dev/null +++ b/fs/jffs2/xattr.h @@ -0,0 +1,116 @@ +/* + * JFFS2 -- Journalling Flash File System, Version 2. + * + * Copyright (C) 2006 NEC Corporation + * + * Created by KaiGai Kohei <kaigai@ak.jp.nec.com> + * + * For licensing information, see the file 'LICENCE' in this directory. + * + */ +#ifndef _JFFS2_FS_XATTR_H_ +#define _JFFS2_FS_XATTR_H_ + +#include <linux/xattr.h> +#include <linux/list.h> + +#define JFFS2_XFLAGS_HOT (0x01) /* This datum is HOT */ +#define JFFS2_XFLAGS_BIND (0x02) /* This datum is not reclaimed */ + +struct jffs2_xattr_datum +{ + void *always_null; + struct jffs2_raw_node_ref *node; + uint8_t class; + uint8_t flags; + uint16_t xprefix; /* see JFFS2_XATTR_PREFIX_* */ + + struct list_head xindex; /* chained from c->xattrindex[n] */ + uint32_t refcnt; /* # of xattr_ref refers this */ + uint32_t xid; + uint32_t version; + + uint32_t data_crc; + uint32_t hashkey; + char *xname; /* XATTR name without prefix */ + uint32_t name_len; /* length of xname */ + char *xvalue; /* XATTR value */ + uint32_t value_len; /* length of xvalue */ +}; + +struct jffs2_inode_cache; +struct jffs2_xattr_ref +{ + void *always_null; + struct jffs2_raw_node_ref *node; + uint8_t class; + uint8_t flags; /* Currently unused */ + u16 unused; + + union { + struct jffs2_inode_cache *ic; /* reference to jffs2_inode_cache */ + uint32_t ino; /* only used in scanning/building */ + }; + union { + struct jffs2_xattr_datum *xd; /* reference to jffs2_xattr_datum */ + uint32_t xid; /* only used in sccanning/building */ + }; + struct jffs2_xattr_ref *next; /* chained from ic->xref_list */ +}; + +#ifdef CONFIG_JFFS2_FS_XATTR + +extern void jffs2_init_xattr_subsystem(struct jffs2_sb_info *c); +extern void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c); +extern void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c); + +extern struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c, + uint32_t xid, uint32_t version); + +extern void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic); +extern void jffs2_xattr_free_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic); + +extern int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd); +extern int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref); +extern int jffs2_verify_xattr(struct jffs2_sb_info *c); + +extern int do_jffs2_getxattr(struct inode *inode, int xprefix, const char *xname, + char *buffer, size_t size); +extern int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname, + const char *buffer, size_t size, int flags); + +extern struct xattr_handler *jffs2_xattr_handlers[]; +extern struct xattr_handler jffs2_user_xattr_handler; +extern struct xattr_handler jffs2_trusted_xattr_handler; + +extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t); +#define jffs2_getxattr generic_getxattr +#define jffs2_setxattr generic_setxattr +#define jffs2_removexattr generic_removexattr + +#else + +#define jffs2_init_xattr_subsystem(c) +#define jffs2_build_xattr_subsystem(c) +#define jffs2_clear_xattr_subsystem(c) + +#define jffs2_xattr_delete_inode(c, ic) +#define jffs2_xattr_free_inode(c, ic) +#define jffs2_verify_xattr(c) (1) + +#define jffs2_xattr_handlers NULL +#define jffs2_listxattr NULL +#define jffs2_getxattr NULL +#define jffs2_setxattr NULL +#define jffs2_removexattr NULL + +#endif /* CONFIG_JFFS2_FS_XATTR */ + +#ifdef CONFIG_JFFS2_FS_SECURITY +extern int jffs2_init_security(struct inode *inode, struct inode *dir); +extern struct xattr_handler jffs2_security_xattr_handler; +#else +#define jffs2_init_security(inode,dir) (0) +#endif /* CONFIG_JFFS2_FS_SECURITY */ + +#endif /* _JFFS2_FS_XATTR_H_ */ diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c new file mode 100644 index 000000000000..ed046e19dbfa --- /dev/null +++ b/fs/jffs2/xattr_trusted.c @@ -0,0 +1,52 @@ +/* + * JFFS2 -- Journalling Flash File System, Version 2. + * + * Copyright (C) 2006 NEC Corporation + * + * Created by KaiGai Kohei <kaigai@ak.jp.nec.com> + * + * For licensing information, see the file 'LICENCE' in this directory. + * + */ +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/jffs2.h> +#include <linux/xattr.h> +#include <linux/mtd/mtd.h> +#include "nodelist.h" + +static int jffs2_trusted_getxattr(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + if (!strcmp(name, "")) + return -EINVAL; + return do_jffs2_getxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size); +} + +static int jffs2_trusted_setxattr(struct inode *inode, const char *name, const void *buffer, + size_t size, int flags) +{ + if (!strcmp(name, "")) + return -EINVAL; + return do_jffs2_setxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size, flags); +} + +static size_t jffs2_trusted_listxattr(struct inode *inode, char *list, size_t list_size, + const char *name, size_t name_len) +{ + size_t retlen = XATTR_TRUSTED_PREFIX_LEN + name_len + 1; + + if (list && retlen<=list_size) { + strcpy(list, XATTR_TRUSTED_PREFIX); + strcpy(list + XATTR_TRUSTED_PREFIX_LEN, name); + } + + return retlen; +} + +struct xattr_handler jffs2_trusted_xattr_handler = { + .prefix = XATTR_TRUSTED_PREFIX, + .list = jffs2_trusted_listxattr, + .set = jffs2_trusted_setxattr, + .get = jffs2_trusted_getxattr +}; diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c new file mode 100644 index 000000000000..2f8e9aa01ea0 --- /dev/null +++ b/fs/jffs2/xattr_user.c @@ -0,0 +1,52 @@ +/* + * JFFS2 -- Journalling Flash File System, Version 2. + * + * Copyright (C) 2006 NEC Corporation + * + * Created by KaiGai Kohei <kaigai@ak.jp.nec.com> + * + * For licensing information, see the file 'LICENCE' in this directory. + * + */ +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/jffs2.h> +#include <linux/xattr.h> +#include <linux/mtd/mtd.h> +#include "nodelist.h" + +static int jffs2_user_getxattr(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + if (!strcmp(name, "")) + return -EINVAL; + return do_jffs2_getxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size); +} + +static int jffs2_user_setxattr(struct inode *inode, const char *name, const void *buffer, + size_t size, int flags) +{ + if (!strcmp(name, "")) + return -EINVAL; + return do_jffs2_setxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size, flags); +} + +static size_t jffs2_user_listxattr(struct inode *inode, char *list, size_t list_size, + const char *name, size_t name_len) +{ + size_t retlen = XATTR_USER_PREFIX_LEN + name_len + 1; + + if (list && retlen <= list_size) { + strcpy(list, XATTR_USER_PREFIX); + strcpy(list + XATTR_USER_PREFIX_LEN, name); + } + + return retlen; +} + +struct xattr_handler jffs2_user_xattr_handler = { + .prefix = XATTR_USER_PREFIX, + .list = jffs2_user_listxattr, + .set = jffs2_user_setxattr, + .get = jffs2_user_getxattr +}; diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index f28696f235c4..2b220dd6b4e7 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -542,7 +542,7 @@ add_failed: static int metapage_releasepage(struct page *page, gfp_t gfp_mask) { struct metapage *mp; - int busy = 0; + int ret = 1; unsigned int offset; for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { @@ -552,30 +552,20 @@ static int metapage_releasepage(struct page *page, gfp_t gfp_mask) continue; jfs_info("metapage_releasepage: mp = 0x%p", mp); - if (mp->count || mp->nohomeok) { + if (mp->count || mp->nohomeok || + test_bit(META_dirty, &mp->flag)) { jfs_info("count = %ld, nohomeok = %d", mp->count, mp->nohomeok); - busy = 1; + ret = 0; continue; } - wait_on_page_writeback(page); - //WARN_ON(test_bit(META_dirty, &mp->flag)); - if (test_bit(META_dirty, &mp->flag)) { - dump_mem("dirty mp in metapage_releasepage", mp, - sizeof(struct metapage)); - dump_mem("page", page, sizeof(struct page)); - dump_stack(); - } if (mp->lsn) remove_from_logsync(mp); remove_metapage(page, mp); INCREMENT(mpStat.pagefree); free_metapage(mp); } - if (busy) - return -1; - - return 0; + return ret; } static void metapage_invalidatepage(struct page *page, unsigned long offset) diff --git a/fs/locks.c b/fs/locks.c index efad798824dc..ab61a8b54829 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -446,15 +446,14 @@ static struct lock_manager_operations lease_manager_ops = { */ static int lease_init(struct file *filp, int type, struct file_lock *fl) { + if (assign_type(fl, type) != 0) + return -EINVAL; + fl->fl_owner = current->files; fl->fl_pid = current->tgid; fl->fl_file = filp; fl->fl_flags = FL_LEASE; - if (assign_type(fl, type) != 0) { - locks_free_lock(fl); - return -EINVAL; - } fl->fl_start = 0; fl->fl_end = OFFSET_MAX; fl->fl_ops = NULL; @@ -466,16 +465,19 @@ static int lease_init(struct file *filp, int type, struct file_lock *fl) static int lease_alloc(struct file *filp, int type, struct file_lock **flp) { struct file_lock *fl = locks_alloc_lock(); - int error; + int error = -ENOMEM; if (fl == NULL) - return -ENOMEM; + goto out; error = lease_init(filp, type, fl); - if (error) - return error; + if (error) { + locks_free_lock(fl); + fl = NULL; + } +out: *flp = fl; - return 0; + return error; } /* Check if two locks overlap each other. @@ -753,6 +755,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) if (request->fl_type == F_UNLCK) goto out; + error = -ENOMEM; new_fl = locks_alloc_lock(); if (new_fl == NULL) goto out; @@ -779,6 +782,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) locks_copy_lock(new_fl, request); locks_insert_lock(&inode->i_flock, new_fl); new_fl = NULL; + error = 0; out: unlock_kernel(); @@ -1372,6 +1376,7 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp) goto out; if (my_before != NULL) { + *flp = *my_before; error = lease->fl_lmops->fl_change(my_before, arg); goto out; } diff --git a/fs/namei.c b/fs/namei.c index 96723ae83c89..d6e2ee251736 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1080,8 +1080,8 @@ static int fastcall do_path_lookup(int dfd, const char *name, nd->flags = flags; nd->depth = 0; - read_lock(¤t->fs->lock); if (*name=='/') { + read_lock(¤t->fs->lock); if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { nd->mnt = mntget(current->fs->altrootmnt); nd->dentry = dget(current->fs->altroot); @@ -1092,33 +1092,35 @@ static int fastcall do_path_lookup(int dfd, const char *name, } nd->mnt = mntget(current->fs->rootmnt); nd->dentry = dget(current->fs->root); + read_unlock(¤t->fs->lock); } else if (dfd == AT_FDCWD) { + read_lock(¤t->fs->lock); nd->mnt = mntget(current->fs->pwdmnt); nd->dentry = dget(current->fs->pwd); + read_unlock(¤t->fs->lock); } else { struct dentry *dentry; file = fget_light(dfd, &fput_needed); retval = -EBADF; if (!file) - goto unlock_fail; + goto out_fail; dentry = file->f_dentry; retval = -ENOTDIR; if (!S_ISDIR(dentry->d_inode->i_mode)) - goto fput_unlock_fail; + goto fput_fail; retval = file_permission(file, MAY_EXEC); if (retval) - goto fput_unlock_fail; + goto fput_fail; nd->mnt = mntget(file->f_vfsmnt); nd->dentry = dget(dentry); fput_light(file, fput_needed); } - read_unlock(¤t->fs->lock); current->total_link_count = 0; retval = link_path_walk(name, nd); out: @@ -1127,13 +1129,12 @@ out: nd->dentry->d_inode)) audit_inode(name, nd->dentry->d_inode, flags); } +out_fail: return retval; -fput_unlock_fail: +fput_fail: fput_light(file, fput_needed); -unlock_fail: - read_unlock(¤t->fs->lock); - return retval; + goto out_fail; } int fastcall path_lookup(const char *name, unsigned int flags, diff --git a/fs/namespace.c b/fs/namespace.c index 2c5f1f80bdc2..bf478addb852 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -899,13 +899,11 @@ static int do_change_type(struct nameidata *nd, int flag) /* * do loopback mount. */ -static int do_loopback(struct nameidata *nd, char *old_name, unsigned long flags, int mnt_flags) +static int do_loopback(struct nameidata *nd, char *old_name, int recurse) { struct nameidata old_nd; struct vfsmount *mnt = NULL; - int recurse = flags & MS_REC; int err = mount_is_safe(nd); - if (err) return err; if (!old_name || !*old_name) @@ -939,7 +937,6 @@ static int do_loopback(struct nameidata *nd, char *old_name, unsigned long flags spin_unlock(&vfsmount_lock); release_mounts(&umount_list); } - mnt->mnt_flags = mnt_flags; out: up_write(&namespace_sem); @@ -1353,7 +1350,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, data_page); else if (flags & MS_BIND) - retval = do_loopback(&nd, dev_name, flags, mnt_flags); + retval = do_loopback(&nd, dev_name, flags & MS_REC); else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) retval = do_change_type(&nd, flags); else if (flags & MS_MOVE) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 4e0578121d9a..3eec30000f3f 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1066,9 +1066,11 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp, rv = nfserr_perm; else if (IS_ERR(exp)) rv = nfserrno(PTR_ERR(exp)); - else + else { rv = fh_compose(fhp, exp, fsid_key->ek_dentry, NULL); + exp_put(exp); + } cache_put(&fsid_key->h, &svc_expkey_cache); return rv; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 6aa92d0e6876..1d65f13f458c 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1922,11 +1922,10 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) value = kmalloc(size, GFP_KERNEL); if (!value) return -ENOMEM; - size = posix_acl_to_xattr(acl, value, size); - if (size < 0) { - error = size; + error = posix_acl_to_xattr(acl, value, size); + if (error < 0) goto getout; - } + size = error; } else size = 0; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 0d858d0b25be..47152bf9a7f2 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -276,13 +276,29 @@ static int ocfs2_writepage(struct page *page, struct writeback_control *wbc) return ret; } +/* This can also be called from ocfs2_write_zero_page() which has done + * it's own cluster locking. */ +int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page, + unsigned from, unsigned to) +{ + int ret; + + down_read(&OCFS2_I(inode)->ip_alloc_sem); + + ret = block_prepare_write(page, from, to, ocfs2_get_block); + + up_read(&OCFS2_I(inode)->ip_alloc_sem); + + return ret; +} + /* * ocfs2_prepare_write() can be an outer-most ocfs2 call when it is called * from loopback. It must be able to perform its own locking around * ocfs2_get_block(). */ -int ocfs2_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int ocfs2_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) { struct inode *inode = page->mapping->host; int ret; @@ -295,11 +311,7 @@ int ocfs2_prepare_write(struct file *file, struct page *page, goto out; } - down_read(&OCFS2_I(inode)->ip_alloc_sem); - - ret = block_prepare_write(page, from, to, ocfs2_get_block); - - up_read(&OCFS2_I(inode)->ip_alloc_sem); + ret = ocfs2_prepare_write_nolock(inode, page, from, to); ocfs2_meta_unlock(inode, 0); out: @@ -625,11 +637,31 @@ static ssize_t ocfs2_direct_IO(int rw, int ret; mlog_entry_void(); + + /* + * We get PR data locks even for O_DIRECT. This allows + * concurrent O_DIRECT I/O but doesn't let O_DIRECT with + * extending and buffered zeroing writes race. If they did + * race then the buffered zeroing could be written back after + * the O_DIRECT I/O. It's one thing to tell people not to mix + * buffered and O_DIRECT writes, but expecting them to + * understand that file extension is also an implicit buffered + * write is too much. By getting the PR we force writeback of + * the buffered zeroing before proceeding. + */ + ret = ocfs2_data_lock(inode, 0); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + ocfs2_data_unlock(inode, 0); + ret = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, ocfs2_direct_IO_get_blocks, ocfs2_dio_end_io); +out: mlog_exit(ret); return ret; } diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index d40456d509a0..e88c3f0b8fa9 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h @@ -22,8 +22,8 @@ #ifndef OCFS2_AOPS_H #define OCFS2_AOPS_H -int ocfs2_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to); +int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page, + unsigned from, unsigned to); struct ocfs2_journal_handle *ocfs2_start_walk_page_trans(struct inode *inode, struct page *page, diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 4601fc256f11..1a5c69071df6 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -569,7 +569,7 @@ static int ocfs2_extent_map_insert(struct inode *inode, ret = -ENOMEM; ctxt.new_ent = kmem_cache_alloc(ocfs2_em_ent_cachep, - GFP_KERNEL); + GFP_NOFS); if (!ctxt.new_ent) { mlog_errno(ret); return ret; @@ -583,14 +583,14 @@ static int ocfs2_extent_map_insert(struct inode *inode, if (ctxt.need_left && !ctxt.left_ent) { ctxt.left_ent = kmem_cache_alloc(ocfs2_em_ent_cachep, - GFP_KERNEL); + GFP_NOFS); if (!ctxt.left_ent) break; } if (ctxt.need_right && !ctxt.right_ent) { ctxt.right_ent = kmem_cache_alloc(ocfs2_em_ent_cachep, - GFP_KERNEL); + GFP_NOFS); if (!ctxt.right_ent) break; } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 581eb451a41a..a9559c874530 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -613,7 +613,8 @@ leave: /* Some parts of this taken from generic_cont_expand, which turned out * to be too fragile to do exactly what we need without us having to - * worry about recursive locking in ->commit_write(). */ + * worry about recursive locking in ->prepare_write() and + * ->commit_write(). */ static int ocfs2_write_zero_page(struct inode *inode, u64 size) { @@ -641,7 +642,7 @@ static int ocfs2_write_zero_page(struct inode *inode, goto out; } - ret = ocfs2_prepare_write(NULL, page, offset, offset); + ret = ocfs2_prepare_write_nolock(inode, page, offset, offset); if (ret < 0) { mlog_errno(ret); goto out_unlock; @@ -695,13 +696,26 @@ out: return ret; } +/* + * A tail_to_skip value > 0 indicates that we're being called from + * ocfs2_file_aio_write(). This has the following implications: + * + * - we don't want to update i_size + * - di_bh will be NULL, which is fine because it's only used in the + * case where we want to update i_size. + * - ocfs2_zero_extend() will then only be filling the hole created + * between i_size and the start of the write. + */ static int ocfs2_extend_file(struct inode *inode, struct buffer_head *di_bh, - u64 new_i_size) + u64 new_i_size, + size_t tail_to_skip) { int ret = 0; u32 clusters_to_add; + BUG_ON(!tail_to_skip && !di_bh); + /* setattr sometimes calls us like this. */ if (new_i_size == 0) goto out; @@ -714,27 +728,44 @@ static int ocfs2_extend_file(struct inode *inode, OCFS2_I(inode)->ip_clusters; if (clusters_to_add) { - ret = ocfs2_extend_allocation(inode, clusters_to_add); + /* + * protect the pages that ocfs2_zero_extend is going to + * be pulling into the page cache.. we do this before the + * metadata extend so that we don't get into the situation + * where we've extended the metadata but can't get the data + * lock to zero. + */ + ret = ocfs2_data_lock(inode, 1); if (ret < 0) { mlog_errno(ret); goto out; } - ret = ocfs2_zero_extend(inode, new_i_size); + ret = ocfs2_extend_allocation(inode, clusters_to_add); if (ret < 0) { mlog_errno(ret); - goto out; + goto out_unlock; } - } - /* No allocation required, we just use this helper to - * do a trivial update of i_size. */ - ret = ocfs2_simple_size_update(inode, di_bh, new_i_size); - if (ret < 0) { - mlog_errno(ret); - goto out; + ret = ocfs2_zero_extend(inode, (u64)new_i_size - tail_to_skip); + if (ret < 0) { + mlog_errno(ret); + goto out_unlock; + } + } + + if (!tail_to_skip) { + /* We're being called from ocfs2_setattr() which wants + * us to update i_size */ + ret = ocfs2_simple_size_update(inode, di_bh, new_i_size); + if (ret < 0) + mlog_errno(ret); } +out_unlock: + if (clusters_to_add) /* this is the only case in which we lock */ + ocfs2_data_unlock(inode, 1); + out: return ret; } @@ -793,7 +824,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) if (i_size_read(inode) > attr->ia_size) status = ocfs2_truncate_file(inode, bh, attr->ia_size); else - status = ocfs2_extend_file(inode, bh, attr->ia_size); + status = ocfs2_extend_file(inode, bh, attr->ia_size, 0); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); @@ -1049,21 +1080,12 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, if (!clusters) break; - ret = ocfs2_extend_allocation(inode, clusters); + ret = ocfs2_extend_file(inode, NULL, newsize, count); if (ret < 0) { if (ret != -ENOSPC) mlog_errno(ret); goto out; } - - /* Fill any holes which would've been created by this - * write. If we're O_APPEND, this will wind up - * (correctly) being a noop. */ - ret = ocfs2_zero_extend(inode, (u64) newsize - count); - if (ret < 0) { - mlog_errno(ret); - goto out; - } break; } @@ -1146,6 +1168,22 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, ocfs2_iocb_set_rw_locked(iocb); } + /* + * We're fine letting folks race truncates and extending + * writes with read across the cluster, just like they can + * locally. Hence no rw_lock during read. + * + * Take and drop the meta data lock to update inode fields + * like i_size. This allows the checks down below + * generic_file_aio_read() a chance of actually working. + */ + ret = ocfs2_meta_lock(inode, NULL, NULL, 0); + if (ret < 0) { + mlog_errno(ret); + goto bail; + } + ocfs2_meta_unlock(inode, 0); + ret = generic_file_aio_read(iocb, buf, count, iocb->ki_pos); if (ret == -EINVAL) mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n"); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 6a610ae53583..eebc3cfa6be8 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -117,7 +117,7 @@ struct ocfs2_journal_handle *ocfs2_alloc_handle(struct ocfs2_super *osb) { struct ocfs2_journal_handle *retval = NULL; - retval = kcalloc(1, sizeof(*retval), GFP_KERNEL); + retval = kcalloc(1, sizeof(*retval), GFP_NOFS); if (!retval) { mlog(ML_ERROR, "Failed to allocate memory for journal " "handle!\n"); @@ -870,9 +870,11 @@ static int ocfs2_force_read_journal(struct inode *inode) if (p_blocks > CONCURRENT_JOURNAL_FILL) p_blocks = CONCURRENT_JOURNAL_FILL; + /* We are reading journal data which should not + * be put in the uptodate cache */ status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb), p_blkno, p_blocks, bhs, 0, - inode); + NULL); if (status < 0) { mlog_errno(status); goto bail; @@ -982,7 +984,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, { struct ocfs2_la_recovery_item *item; - item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_KERNEL); + item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS); if (!item) { /* Though we wish to avoid it, we are in fact safe in * skipping local alloc cleanup as fsck.ocfs2 is more diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c index 04a684dfdd96..b8a00a793326 100644 --- a/fs/ocfs2/uptodate.c +++ b/fs/ocfs2/uptodate.c @@ -337,7 +337,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi, (unsigned long long)oi->ip_blkno, (unsigned long long)block, expand_tree); - new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_KERNEL); + new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_NOFS); if (!new) { mlog_errno(-ENOMEM); return; @@ -349,7 +349,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi, * has no way of tracking that. */ for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) { tree[i] = kmem_cache_alloc(ocfs2_uptodate_cachep, - GFP_KERNEL); + GFP_NOFS); if (!tree[i]) { mlog_errno(-ENOMEM); goto out_free; diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c index 53049a204197..ee42765a8553 100644 --- a/fs/ocfs2/vote.c +++ b/fs/ocfs2/vote.c @@ -586,7 +586,7 @@ static struct ocfs2_net_wait_ctxt *ocfs2_new_net_wait_ctxt(unsigned int response { struct ocfs2_net_wait_ctxt *w; - w = kcalloc(1, sizeof(*w), GFP_KERNEL); + w = kcalloc(1, sizeof(*w), GFP_NOFS); if (!w) { mlog_errno(-ENOMEM); goto bail; @@ -749,7 +749,7 @@ static struct ocfs2_vote_msg * ocfs2_new_vote_request(struct ocfs2_super *osb, BUG_ON(!ocfs2_is_valid_vote_request(type)); - request = kcalloc(1, sizeof(*request), GFP_KERNEL); + request = kcalloc(1, sizeof(*request), GFP_NOFS); if (!request) { mlog_errno(-ENOMEM); } else { @@ -1129,7 +1129,7 @@ static int ocfs2_handle_vote_message(struct o2net_msg *msg, struct ocfs2_super *osb = data; struct ocfs2_vote_work *work; - work = kmalloc(sizeof(struct ocfs2_vote_work), GFP_KERNEL); + work = kmalloc(sizeof(struct ocfs2_vote_work), GFP_NOFS); if (!work) { status = -ENOMEM; mlog_errno(status); diff --git a/fs/open.c b/fs/open.c index 53ec28c36777..317b7c7f38a7 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1124,7 +1124,6 @@ asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, prevent_tail_call(ret); return ret; } -EXPORT_SYMBOL_GPL(sys_openat); #ifndef __alpha__ diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 45ae7dd3c650..7ef1f094de91 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -533,6 +533,7 @@ void del_gendisk(struct gendisk *disk) devfs_remove_disk(disk); + kobject_uevent(&disk->kobj, KOBJ_REMOVE); if (disk->holder_dir) kobject_unregister(disk->holder_dir); if (disk->slave_dir) @@ -545,7 +546,7 @@ void del_gendisk(struct gendisk *disk) kfree(disk_name); } put_device(disk->driverfs_dev); + disk->driverfs_dev = NULL; } - kobject_uevent(&disk->kobj, KOBJ_REMOVE); kobject_del(&disk->kobj); } diff --git a/fs/pipe.c b/fs/pipe.c index 7fefb10db8d9..5acd8954aaa0 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -55,7 +55,8 @@ void pipe_wait(struct pipe_inode_info *pipe) } static int -pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) +pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len, + int atomic) { unsigned long copy; @@ -64,8 +65,13 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) iov++; copy = min_t(unsigned long, len, iov->iov_len); - if (copy_from_user(to, iov->iov_base, copy)) - return -EFAULT; + if (atomic) { + if (__copy_from_user_inatomic(to, iov->iov_base, copy)) + return -EFAULT; + } else { + if (copy_from_user(to, iov->iov_base, copy)) + return -EFAULT; + } to += copy; len -= copy; iov->iov_base += copy; @@ -75,7 +81,8 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) } static int -pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) +pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len, + int atomic) { unsigned long copy; @@ -84,8 +91,13 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) iov++; copy = min_t(unsigned long, len, iov->iov_len); - if (copy_to_user(iov->iov_base, from, copy)) - return -EFAULT; + if (atomic) { + if (__copy_to_user_inatomic(iov->iov_base, from, copy)) + return -EFAULT; + } else { + if (copy_to_user(iov->iov_base, from, copy)) + return -EFAULT; + } from += copy; len -= copy; iov->iov_base += copy; @@ -94,13 +106,52 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) return 0; } +/* + * Attempt to pre-fault in the user memory, so we can use atomic copies. + * Returns the number of bytes not faulted in. + */ +static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len) +{ + while (!iov->iov_len) + iov++; + + while (len > 0) { + unsigned long this_len; + + this_len = min_t(unsigned long, len, iov->iov_len); + if (fault_in_pages_writeable(iov->iov_base, this_len)) + break; + + len -= this_len; + iov++; + } + + return len; +} + +/* + * Pre-fault in the user memory, so we can use atomic copies. + */ +static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len) +{ + while (!iov->iov_len) + iov++; + + while (len > 0) { + unsigned long this_len; + + this_len = min_t(unsigned long, len, iov->iov_len); + fault_in_pages_readable(iov->iov_base, this_len); + len -= this_len; + iov++; + } +} + static void anon_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct page *page = buf->page; - buf->flags &= ~PIPE_BUF_FLAG_STOLEN; - /* * If nobody else uses this page, and we don't already have a * temporary page, let's keep track of it as a one-deep @@ -112,38 +163,58 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe, page_cache_release(page); } -static void * anon_pipe_buf_map(struct file *file, struct pipe_inode_info *pipe, - struct pipe_buffer *buf) +void *generic_pipe_buf_map(struct pipe_inode_info *pipe, + struct pipe_buffer *buf, int atomic) { + if (atomic) { + buf->flags |= PIPE_BUF_FLAG_ATOMIC; + return kmap_atomic(buf->page, KM_USER0); + } + return kmap(buf->page); } -static void anon_pipe_buf_unmap(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) +void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, + struct pipe_buffer *buf, void *map_data) { - kunmap(buf->page); + if (buf->flags & PIPE_BUF_FLAG_ATOMIC) { + buf->flags &= ~PIPE_BUF_FLAG_ATOMIC; + kunmap_atomic(map_data, KM_USER0); + } else + kunmap(buf->page); } -static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) +int generic_pipe_buf_steal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) { - buf->flags |= PIPE_BUF_FLAG_STOLEN; - return 0; + struct page *page = buf->page; + + if (page_count(page) == 1) { + lock_page(page); + return 0; + } + + return 1; } -static void anon_pipe_buf_get(struct pipe_inode_info *info, - struct pipe_buffer *buf) +void generic_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf) { page_cache_get(buf->page); } +int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf) +{ + return 0; +} + static struct pipe_buf_operations anon_pipe_buf_ops = { .can_merge = 1, - .map = anon_pipe_buf_map, - .unmap = anon_pipe_buf_unmap, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .pin = generic_pipe_buf_pin, .release = anon_pipe_buf_release, - .steal = anon_pipe_buf_steal, - .get = anon_pipe_buf_get, + .steal = generic_pipe_buf_steal, + .get = generic_pipe_buf_get, }; static ssize_t @@ -174,22 +245,33 @@ pipe_readv(struct file *filp, const struct iovec *_iov, struct pipe_buf_operations *ops = buf->ops; void *addr; size_t chars = buf->len; - int error; + int error, atomic; if (chars > total_len) chars = total_len; - addr = ops->map(filp, pipe, buf); - if (IS_ERR(addr)) { + error = ops->pin(pipe, buf); + if (error) { if (!ret) - ret = PTR_ERR(addr); + error = ret; break; } - error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars); - ops->unmap(pipe, buf); + + atomic = !iov_fault_in_pages_write(iov, chars); +redo: + addr = ops->map(pipe, buf, atomic); + error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic); + ops->unmap(pipe, buf, addr); if (unlikely(error)) { + /* + * Just retry with the slow path if we failed. + */ + if (atomic) { + atomic = 0; + goto redo; + } if (!ret) - ret = -EFAULT; + ret = error; break; } ret += chars; @@ -293,21 +375,28 @@ pipe_writev(struct file *filp, const struct iovec *_iov, int offset = buf->offset + buf->len; if (ops->can_merge && offset + chars <= PAGE_SIZE) { + int error, atomic = 1; void *addr; - int error; - addr = ops->map(filp, pipe, buf); - if (IS_ERR(addr)) { - error = PTR_ERR(addr); + error = ops->pin(pipe, buf); + if (error) goto out; - } + + iov_fault_in_pages_read(iov, chars); +redo1: + addr = ops->map(pipe, buf, atomic); error = pipe_iov_copy_from_user(offset + addr, iov, - chars); - ops->unmap(pipe, buf); + chars, atomic); + ops->unmap(pipe, buf, addr); ret = error; do_wakeup = 1; - if (error) + if (error) { + if (atomic) { + atomic = 0; + goto redo1; + } goto out; + } buf->len += chars; total_len -= chars; ret = chars; @@ -330,7 +419,8 @@ pipe_writev(struct file *filp, const struct iovec *_iov, int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); struct pipe_buffer *buf = pipe->bufs + newbuf; struct page *page = pipe->tmp_page; - int error; + char *src; + int error, atomic = 1; if (!page) { page = alloc_page(GFP_HIGHUSER); @@ -350,11 +440,27 @@ pipe_writev(struct file *filp, const struct iovec *_iov, if (chars > total_len) chars = total_len; - error = pipe_iov_copy_from_user(kmap(page), iov, chars); - kunmap(page); + iov_fault_in_pages_read(iov, chars); +redo2: + if (atomic) + src = kmap_atomic(page, KM_USER0); + else + src = kmap(page); + + error = pipe_iov_copy_from_user(src, iov, chars, + atomic); + if (atomic) + kunmap_atomic(src, KM_USER0); + else + kunmap(page); + if (unlikely(error)) { + if (atomic) { + atomic = 0; + goto redo2; + } if (!ret) - ret = -EFAULT; + ret = error; break; } ret += chars; diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 58c418fbca2c..97ae1b92bc47 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -408,8 +408,9 @@ int reiserfs_cache_default_acl(struct inode *inode) acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT); reiserfs_read_unlock_xattrs(inode->i_sb); reiserfs_read_unlock_xattr_i(inode); - ret = acl ? 1 : 0; - posix_acl_release(acl); + ret = (acl && !IS_ERR(acl)); + if (ret) + posix_acl_release(acl); } return ret; diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c index 34c7a11d91f0..70d9c5a37f5a 100644 --- a/fs/smbfs/dir.c +++ b/fs/smbfs/dir.c @@ -434,6 +434,11 @@ smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) if (dentry->d_name.len > SMB_MAXNAMELEN) goto out; + /* Do not allow lookup of names with backslashes in */ + error = -EINVAL; + if (memchr(dentry->d_name.name, '\\', dentry->d_name.len)) + goto out; + lock_kernel(); error = smb_proc_getattr(dentry, &finfo); #ifdef SMBFS_PARANOIA diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c index c71c375863cc..c71dd2760d32 100644 --- a/fs/smbfs/request.c +++ b/fs/smbfs/request.c @@ -339,9 +339,11 @@ int smb_add_request(struct smb_request *req) /* * On timeout or on interrupt we want to try and remove the * request from the recvq/xmitq. + * First check if the request is still part of a queue. (May + * have been removed by some error condition) */ smb_lock_server(server); - if (!(req->rq_flags & SMB_REQ_RECEIVED)) { + if (!list_empty(&req->rq_queue)) { list_del_init(&req->rq_queue); smb_rput(req); } diff --git a/fs/splice.c b/fs/splice.c index 0559e7577a04..a285fd746dc0 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -27,15 +27,22 @@ #include <linux/buffer_head.h> #include <linux/module.h> #include <linux/syscalls.h> +#include <linux/uio.h> + +struct partial_page { + unsigned int offset; + unsigned int len; +}; /* - * Passed to the actors + * Passed to splice_to_pipe */ -struct splice_desc { - unsigned int len, total_len; /* current and remaining length */ +struct splice_pipe_desc { + struct page **pages; /* page map */ + struct partial_page *partial; /* pages[] may not be contig */ + int nr_pages; /* number of pages in map */ unsigned int flags; /* splice flags */ - struct file *file; /* file to read/write */ - loff_t pos; /* file position */ + struct pipe_buf_operations *ops;/* ops associated with output pipe */ }; /* @@ -44,7 +51,7 @@ struct splice_desc { * addition of remove_mapping(). If success is returned, the caller may * attempt to reuse this page for another destination. */ -static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, +static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct page *page = buf->page; @@ -71,21 +78,19 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, return 1; } - buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; + buf->flags |= PIPE_BUF_FLAG_LRU; return 0; } -static void page_cache_pipe_buf_release(struct pipe_inode_info *info, +static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { page_cache_release(buf->page); - buf->page = NULL; - buf->flags &= ~(PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU); + buf->flags &= ~PIPE_BUF_FLAG_LRU; } -static void *page_cache_pipe_buf_map(struct file *file, - struct pipe_inode_info *info, - struct pipe_buffer *buf) +static int page_cache_pipe_buf_pin(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) { struct page *page = buf->page; int err; @@ -111,51 +116,59 @@ static void *page_cache_pipe_buf_map(struct file *file, } /* - * Page is ok afterall, fall through to mapping. + * Page is ok afterall, we are done. */ unlock_page(page); } - return kmap(page); + return 0; error: unlock_page(page); - return ERR_PTR(err); + return err; } -static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, - struct pipe_buffer *buf) -{ - kunmap(buf->page); -} +static struct pipe_buf_operations page_cache_pipe_buf_ops = { + .can_merge = 0, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .pin = page_cache_pipe_buf_pin, + .release = page_cache_pipe_buf_release, + .steal = page_cache_pipe_buf_steal, + .get = generic_pipe_buf_get, +}; -static void page_cache_pipe_buf_get(struct pipe_inode_info *info, +static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { - page_cache_get(buf->page); + if (!(buf->flags & PIPE_BUF_FLAG_GIFT)) + return 1; + + buf->flags |= PIPE_BUF_FLAG_LRU; + return generic_pipe_buf_steal(pipe, buf); } -static struct pipe_buf_operations page_cache_pipe_buf_ops = { +static struct pipe_buf_operations user_page_pipe_buf_ops = { .can_merge = 0, - .map = page_cache_pipe_buf_map, - .unmap = page_cache_pipe_buf_unmap, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .pin = generic_pipe_buf_pin, .release = page_cache_pipe_buf_release, - .steal = page_cache_pipe_buf_steal, - .get = page_cache_pipe_buf_get, + .steal = user_page_pipe_buf_steal, + .get = generic_pipe_buf_get, }; /* * Pipe output worker. This sets up our pipe format with the page cache * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). */ -static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, - int nr_pages, unsigned long len, - unsigned int offset, unsigned int flags) +static ssize_t splice_to_pipe(struct pipe_inode_info *pipe, + struct splice_pipe_desc *spd) { - int ret, do_wakeup, i; + int ret, do_wakeup, page_nr; ret = 0; do_wakeup = 0; - i = 0; + page_nr = 0; if (pipe->inode) mutex_lock(&pipe->inode->i_mutex); @@ -171,27 +184,22 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, if (pipe->nrbufs < PIPE_BUFFERS) { int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1); struct pipe_buffer *buf = pipe->bufs + newbuf; - struct page *page = pages[i++]; - unsigned long this_len; - this_len = PAGE_CACHE_SIZE - offset; - if (this_len > len) - this_len = len; + buf->page = spd->pages[page_nr]; + buf->offset = spd->partial[page_nr].offset; + buf->len = spd->partial[page_nr].len; + buf->ops = spd->ops; + if (spd->flags & SPLICE_F_GIFT) + buf->flags |= PIPE_BUF_FLAG_GIFT; - buf->page = page; - buf->offset = offset; - buf->len = this_len; - buf->ops = &page_cache_pipe_buf_ops; pipe->nrbufs++; + page_nr++; + ret += buf->len; + if (pipe->inode) do_wakeup = 1; - ret += this_len; - len -= this_len; - offset = 0; - if (!--nr_pages) - break; - if (!len) + if (!--spd->nr_pages) break; if (pipe->nrbufs < PIPE_BUFFERS) continue; @@ -199,7 +207,7 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, break; } - if (flags & SPLICE_F_NONBLOCK) { + if (spd->flags & SPLICE_F_NONBLOCK) { if (!ret) ret = -EAGAIN; break; @@ -234,8 +242,8 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } - while (i < nr_pages) - page_cache_release(pages[i++]); + while (page_nr < spd->nr_pages) + page_cache_release(spd->pages[page_nr++]); return ret; } @@ -246,17 +254,24 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, unsigned int flags) { struct address_space *mapping = in->f_mapping; - unsigned int loff, offset, nr_pages; + unsigned int loff, nr_pages; struct page *pages[PIPE_BUFFERS]; + struct partial_page partial[PIPE_BUFFERS]; struct page *page; pgoff_t index, end_index; loff_t isize; - size_t bytes; - int i, error; + size_t total_len; + int error, page_nr; + struct splice_pipe_desc spd = { + .pages = pages, + .partial = partial, + .flags = flags, + .ops = &page_cache_pipe_buf_ops, + }; index = *ppos >> PAGE_CACHE_SHIFT; - loff = offset = *ppos & ~PAGE_CACHE_MASK; - nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + loff = *ppos & ~PAGE_CACHE_MASK; + nr_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; if (nr_pages > PIPE_BUFFERS) nr_pages = PIPE_BUFFERS; @@ -266,47 +281,83 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, * read-ahead if this is a non-zero offset (we are likely doing small * chunk splice and the page is already there) for a single page. */ - if (!offset || nr_pages > 1) - do_page_cache_readahead(mapping, in, index, nr_pages); + if (!loff || nr_pages > 1) + page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages); /* * Now fill in the holes: */ error = 0; - bytes = 0; - for (i = 0; i < nr_pages; i++, index++) { - unsigned int this_len; + total_len = 0; - if (!len) - break; + /* + * Lookup the (hopefully) full range of pages we need. + */ + spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages); + /* + * If find_get_pages_contig() returned fewer pages than we needed, + * allocate the rest. + */ + index += spd.nr_pages; + while (spd.nr_pages < nr_pages) { /* - * this_len is the max we'll use from this page - */ - this_len = min(len, PAGE_CACHE_SIZE - loff); -find_page: - /* - * lookup the page for this index + * Page could be there, find_get_pages_contig() breaks on + * the first hole. */ page = find_get_page(mapping, index); if (!page) { /* - * page didn't exist, allocate one + * Make sure the read-ahead engine is notified + * about this failure. + */ + handle_ra_miss(mapping, &in->f_ra, index); + + /* + * page didn't exist, allocate one. */ page = page_cache_alloc_cold(mapping); if (!page) break; error = add_to_page_cache_lru(page, mapping, index, - mapping_gfp_mask(mapping)); + mapping_gfp_mask(mapping)); if (unlikely(error)) { page_cache_release(page); + if (error == -EEXIST) + continue; break; } - - goto readpage; + /* + * add_to_page_cache() locks the page, unlock it + * to avoid convoluting the logic below even more. + */ + unlock_page(page); } + pages[spd.nr_pages++] = page; + index++; + } + + /* + * Now loop over the map and see if we need to start IO on any + * pages, fill in the partial map, etc. + */ + index = *ppos >> PAGE_CACHE_SHIFT; + nr_pages = spd.nr_pages; + spd.nr_pages = 0; + for (page_nr = 0; page_nr < nr_pages; page_nr++) { + unsigned int this_len; + + if (!len) + break; + + /* + * this_len is the max we'll use from this page + */ + this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff); + page = pages[page_nr]; + /* * If the page isn't uptodate, we may need to start io on it */ @@ -327,7 +378,6 @@ find_page: */ if (!page->mapping) { unlock_page(page); - page_cache_release(page); break; } /* @@ -338,16 +388,20 @@ find_page: goto fill_it; } -readpage: /* * need to read in the page */ error = mapping->a_ops->readpage(in, page); - if (unlikely(error)) { - page_cache_release(page); + /* + * We really should re-lookup the page here, + * but it complicates things a lot. Instead + * lets just do what we already stored, and + * we'll get it the next time we are called. + */ if (error == AOP_TRUNCATED_PAGE) - goto find_page; + error = 0; + break; } @@ -356,10 +410,8 @@ readpage: */ isize = i_size_read(mapping->host); end_index = (isize - 1) >> PAGE_CACHE_SHIFT; - if (unlikely(!isize || index > end_index)) { - page_cache_release(page); + if (unlikely(!isize || index > end_index)) break; - } /* * if this is the last page, see if we need to shrink @@ -367,26 +419,35 @@ readpage: */ if (end_index == index) { loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK); - if (bytes + loff > isize) { - page_cache_release(page); + if (total_len + loff > isize) break; - } /* * force quit after adding this page */ - nr_pages = i; + len = this_len; this_len = min(this_len, loff); + loff = 0; } } fill_it: - pages[i] = page; - bytes += this_len; + partial[page_nr].offset = loff; + partial[page_nr].len = this_len; len -= this_len; + total_len += this_len; loff = 0; + spd.nr_pages++; + index++; } - if (i) - return move_to_pipe(pipe, pages, i, bytes, offset, flags); + /* + * Release any pages at the end, if we quit early. 'i' is how far + * we got, 'nr_pages' is how many pages are in the map. + */ + while (page_nr < nr_pages) + page_cache_release(pages[page_nr++]); + + if (spd.nr_pages) + return splice_to_pipe(pipe, &spd); return error; } @@ -439,38 +500,24 @@ EXPORT_SYMBOL(generic_file_splice_read); /* * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' - * using sendpage(). + * using sendpage(). Return the number of bytes sent. */ -static int pipe_to_sendpage(struct pipe_inode_info *info, +static int pipe_to_sendpage(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { struct file *file = sd->file; loff_t pos = sd->pos; - unsigned int offset; - ssize_t ret; - void *ptr; - int more; + int ret, more; - /* - * Sub-optimal, but we are limited by the pipe ->map. We don't - * need a kmap'ed buffer here, we just want to make sure we - * have the page pinned if the pipe page originates from the - * page cache. - */ - ptr = buf->ops->map(file, info, buf); - if (IS_ERR(ptr)) - return PTR_ERR(ptr); - - offset = pos & ~PAGE_CACHE_MASK; - more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; + ret = buf->ops->pin(pipe, buf); + if (!ret) { + more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; - ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,more); - - buf->ops->unmap(info, buf); - if (ret == sd->len) - return 0; + ret = file->f_op->sendpage(file, buf->page, buf->offset, + sd->len, &pos, more); + } - return -EIO; + return ret; } /* @@ -493,43 +540,51 @@ static int pipe_to_sendpage(struct pipe_inode_info *info, * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create * a new page in the output file page cache and fill/dirty that. */ -static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, +static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { struct file *file = sd->file; struct address_space *mapping = file->f_mapping; gfp_t gfp_mask = mapping_gfp_mask(mapping); - unsigned int offset; + unsigned int offset, this_len; struct page *page; pgoff_t index; - char *src; int ret; /* * make sure the data in this buffer is uptodate */ - src = buf->ops->map(file, info, buf); - if (IS_ERR(src)) - return PTR_ERR(src); + ret = buf->ops->pin(pipe, buf); + if (unlikely(ret)) + return ret; index = sd->pos >> PAGE_CACHE_SHIFT; offset = sd->pos & ~PAGE_CACHE_MASK; + this_len = sd->len; + if (this_len + offset > PAGE_CACHE_SIZE) + this_len = PAGE_CACHE_SIZE - offset; + /* - * Reuse buf page, if SPLICE_F_MOVE is set. + * Reuse buf page, if SPLICE_F_MOVE is set and we are doing a full + * page. */ - if (sd->flags & SPLICE_F_MOVE) { + if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) { /* - * If steal succeeds, buf->page is now pruned from the vm - * side (LRU and page cache) and we can reuse it. The page - * will also be looked on successful return. + * If steal succeeds, buf->page is now pruned from the + * pagecache and we can reuse it. The page will also be + * locked on successful return. */ - if (buf->ops->steal(info, buf)) + if (buf->ops->steal(pipe, buf)) goto find_page; page = buf->page; - if (add_to_page_cache(page, mapping, index, gfp_mask)) + if (add_to_page_cache(page, mapping, index, gfp_mask)) { + unlock_page(page); goto find_page; + } + + page_cache_get(page); if (!(buf->flags & PIPE_BUF_FLAG_LRU)) lru_cache_add(page); @@ -558,7 +613,7 @@ find_page: * the full page. */ if (!PageUptodate(page)) { - if (sd->len < PAGE_CACHE_SIZE) { + if (this_len < PAGE_CACHE_SIZE) { ret = mapping->a_ops->readpage(file, page); if (unlikely(ret)) goto out; @@ -582,51 +637,67 @@ find_page: } } - ret = mapping->a_ops->prepare_write(file, page, 0, sd->len); - if (ret == AOP_TRUNCATED_PAGE) { + ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len); + if (unlikely(ret)) { + loff_t isize = i_size_read(mapping->host); + + if (ret != AOP_TRUNCATED_PAGE) + unlock_page(page); page_cache_release(page); - goto find_page; - } else if (ret) + if (ret == AOP_TRUNCATED_PAGE) + goto find_page; + + /* + * prepare_write() may have instantiated a few blocks + * outside i_size. Trim these off again. + */ + if (sd->pos + this_len > isize) + vmtruncate(mapping->host, isize); + goto out; + } - if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { - char *dst = kmap_atomic(page, KM_USER0); + if (buf->page != page) { + /* + * Careful, ->map() uses KM_USER0! + */ + char *src = buf->ops->map(pipe, buf, 1); + char *dst = kmap_atomic(page, KM_USER1); - memcpy(dst + offset, src + buf->offset, sd->len); + memcpy(dst + offset, src + buf->offset, this_len); flush_dcache_page(page); - kunmap_atomic(dst, KM_USER0); + kunmap_atomic(dst, KM_USER1); + buf->ops->unmap(pipe, buf, src); } - ret = mapping->a_ops->commit_write(file, page, 0, sd->len); - if (ret == AOP_TRUNCATED_PAGE) { + ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); + if (!ret) { + /* + * Return the number of bytes written and mark page as + * accessed, we are now done! + */ + ret = this_len; + mark_page_accessed(page); + balance_dirty_pages_ratelimited(mapping); + } else if (ret == AOP_TRUNCATED_PAGE) { page_cache_release(page); goto find_page; - } else if (ret) - goto out; - - mark_page_accessed(page); - balance_dirty_pages_ratelimited(mapping); + } out: - if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) - page_cache_release(page); - + page_cache_release(page); unlock_page(page); out_nomem: - buf->ops->unmap(info, buf); return ret; } -typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, - struct splice_desc *); - /* * Pipe input worker. Most of this logic works like a regular pipe, the * key here is the 'actor' worker passed in that actually moves the data * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. */ -static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out, - loff_t *ppos, size_t len, unsigned int flags, - splice_actor *actor) +ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags, + splice_actor *actor) { int ret, do_wakeup, err; struct splice_desc sd; @@ -652,16 +723,22 @@ static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out, sd.len = sd.total_len; err = actor(pipe, buf, &sd); - if (err) { + if (err <= 0) { if (!ret && err != -ENODATA) ret = err; break; } - ret += sd.len; - buf->offset += sd.len; - buf->len -= sd.len; + ret += err; + buf->offset += err; + buf->len -= err; + + sd.len -= err; + sd.pos += err; + sd.total_len -= err; + if (sd.len) + continue; if (!buf->len) { buf->ops = NULL; @@ -672,8 +749,6 @@ static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out, do_wakeup = 1; } - sd.pos += sd.len; - sd.total_len -= sd.len; if (!sd.total_len) break; } @@ -741,7 +816,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, struct address_space *mapping = out->f_mapping; ssize_t ret; - ret = move_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); + ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); if (ret > 0) { struct inode *inode = mapping->host; @@ -783,7 +858,7 @@ EXPORT_SYMBOL(generic_file_splice_write); ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { - return move_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage); + return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage); } EXPORT_SYMBOL(generic_splice_sendpage); @@ -870,7 +945,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, /* * We don't have an immediate reader, but we'll read the stuff - * out of the pipe right after the move_to_pipe(). So set + * out of the pipe right after the splice_to_pipe(). So set * PIPE_READERS appropriately. */ pipe->readers = 1; @@ -1010,6 +1085,184 @@ static long do_splice(struct file *in, loff_t __user *off_in, return -EINVAL; } +/* + * Map an iov into an array of pages and offset/length tupples. With the + * partial_page structure, we can map several non-contiguous ranges into + * our ones pages[] map instead of splitting that operation into pieces. + * Could easily be exported as a generic helper for other users, in which + * case one would probably want to add a 'max_nr_pages' parameter as well. + */ +static int get_iovec_page_array(const struct iovec __user *iov, + unsigned int nr_vecs, struct page **pages, + struct partial_page *partial, int aligned) +{ + int buffers = 0, error = 0; + + /* + * It's ok to take the mmap_sem for reading, even + * across a "get_user()". + */ + down_read(¤t->mm->mmap_sem); + + while (nr_vecs) { + unsigned long off, npages; + void __user *base; + size_t len; + int i; + + /* + * Get user address base and length for this iovec. + */ + error = get_user(base, &iov->iov_base); + if (unlikely(error)) + break; + error = get_user(len, &iov->iov_len); + if (unlikely(error)) + break; + + /* + * Sanity check this iovec. 0 read succeeds. + */ + if (unlikely(!len)) + break; + error = -EFAULT; + if (unlikely(!base)) + break; + + /* + * Get this base offset and number of pages, then map + * in the user pages. + */ + off = (unsigned long) base & ~PAGE_MASK; + + /* + * If asked for alignment, the offset must be zero and the + * length a multiple of the PAGE_SIZE. + */ + error = -EINVAL; + if (aligned && (off || len & ~PAGE_MASK)) + break; + + npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (npages > PIPE_BUFFERS - buffers) + npages = PIPE_BUFFERS - buffers; + + error = get_user_pages(current, current->mm, + (unsigned long) base, npages, 0, 0, + &pages[buffers], NULL); + + if (unlikely(error <= 0)) + break; + + /* + * Fill this contiguous range into the partial page map. + */ + for (i = 0; i < error; i++) { + const int plen = min_t(size_t, len, PAGE_SIZE - off); + + partial[buffers].offset = off; + partial[buffers].len = plen; + + off = 0; + len -= plen; + buffers++; + } + + /* + * We didn't complete this iov, stop here since it probably + * means we have to move some of this into a pipe to + * be able to continue. + */ + if (len) + break; + + /* + * Don't continue if we mapped fewer pages than we asked for, + * or if we mapped the max number of pages that we have + * room for. + */ + if (error < npages || buffers == PIPE_BUFFERS) + break; + + nr_vecs--; + iov++; + } + + up_read(¤t->mm->mmap_sem); + + if (buffers) + return buffers; + + return error; +} + +/* + * vmsplice splices a user address range into a pipe. It can be thought of + * as splice-from-memory, where the regular splice is splice-from-file (or + * to file). In both cases the output is a pipe, naturally. + * + * Note that vmsplice only supports splicing _from_ user memory to a pipe, + * not the other way around. Splicing from user memory is a simple operation + * that can be supported without any funky alignment restrictions or nasty + * vm tricks. We simply map in the user memory and fill them into a pipe. + * The reverse isn't quite as easy, though. There are two possible solutions + * for that: + * + * - memcpy() the data internally, at which point we might as well just + * do a regular read() on the buffer anyway. + * - Lots of nasty vm tricks, that are neither fast nor flexible (it + * has restriction limitations on both ends of the pipe). + * + * Alas, it isn't here. + * + */ +static long do_vmsplice(struct file *file, const struct iovec __user *iov, + unsigned long nr_segs, unsigned int flags) +{ + struct pipe_inode_info *pipe = file->f_dentry->d_inode->i_pipe; + struct page *pages[PIPE_BUFFERS]; + struct partial_page partial[PIPE_BUFFERS]; + struct splice_pipe_desc spd = { + .pages = pages, + .partial = partial, + .flags = flags, + .ops = &user_page_pipe_buf_ops, + }; + + if (unlikely(!pipe)) + return -EBADF; + if (unlikely(nr_segs > UIO_MAXIOV)) + return -EINVAL; + else if (unlikely(!nr_segs)) + return 0; + + spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, + flags & SPLICE_F_GIFT); + if (spd.nr_pages <= 0) + return spd.nr_pages; + + return splice_to_pipe(pipe, &spd); +} + +asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, + unsigned long nr_segs, unsigned int flags) +{ + struct file *file; + long error; + int fput; + + error = -EBADF; + file = fget_light(fd, &fput); + if (file) { + if (file->f_mode & FMODE_WRITE) + error = do_vmsplice(file, iov, nr_segs, flags); + + fput_light(file, fput); + } + + return error; +} + asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, int fd_out, loff_t __user *off_out, size_t len, unsigned int flags) @@ -1092,6 +1345,12 @@ static int link_pipe(struct pipe_inode_info *ipipe, obuf = opipe->bufs + nbuf; *obuf = *ibuf; + /* + * Don't inherit the gift flag, we need to + * prevent multiple steals of this page. + */ + obuf->flags &= ~PIPE_BUF_FLAG_GIFT; + if (obuf->len > len) obuf->len = len; diff --git a/fs/stat.c b/fs/stat.c index 9948cc1685a4..0f282face322 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -261,7 +261,7 @@ asmlinkage long sys_newlstat(char __user *filename, struct stat __user *statbuf) return error; } -#ifndef __ARCH_WANT_STAT64 +#if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT) asmlinkage long sys_newfstatat(int dfd, char __user *filename, struct stat __user *statbuf, int flag) { diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 64ee07db0d5e..8558226281c4 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -1942,8 +1942,10 @@ xfs_alloc_fix_freelist( /* * Allocate as many blocks as possible at once. */ - if ((error = xfs_alloc_ag_vextent(&targs))) + if ((error = xfs_alloc_ag_vextent(&targs))) { + xfs_trans_brelse(tp, agflbp); return error; + } /* * Stop if we run out. Won't happen if callers are obeying * the restrictions correctly. Can happen for free calls @@ -1960,6 +1962,7 @@ xfs_alloc_fix_freelist( return error; } } + xfs_trans_brelse(tp, agflbp); args->agbp = agbp; return 0; } diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index 81a05cfd77d2..1f148762eb28 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -316,6 +316,18 @@ xfs_rename( } } + /* + * If we are using project inheritance, we only allow renames + * into our tree when the project IDs are the same; else the + * tree quota mechanism would be circumvented. + */ + if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && + (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) { + error = XFS_ERROR(EXDEV); + xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); + goto rele_return; + } + new_parent = (src_dp != target_dp); src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR); diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index f0e09ca14139..36ea1b2094f2 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -669,31 +669,22 @@ xfs_mntupdate( xfs_mount_t *mp = XFS_BHVTOM(bdp); int error; - if (args->flags & XFSMNT_BARRIER) - mp->m_flags |= XFS_MOUNT_BARRIER; - else - mp->m_flags &= ~XFS_MOUNT_BARRIER; - - if ((vfsp->vfs_flag & VFS_RDONLY) && - !(*flags & MS_RDONLY)) { - vfsp->vfs_flag &= ~VFS_RDONLY; - - if (args->flags & XFSMNT_BARRIER) + if (!(*flags & MS_RDONLY)) { /* rw/ro -> rw */ + if (vfsp->vfs_flag & VFS_RDONLY) + vfsp->vfs_flag &= ~VFS_RDONLY; + if (args->flags & XFSMNT_BARRIER) { + mp->m_flags |= XFS_MOUNT_BARRIER; xfs_mountfs_check_barriers(mp); - } - - if (!(vfsp->vfs_flag & VFS_RDONLY) && - (*flags & MS_RDONLY)) { + } else { + mp->m_flags &= ~XFS_MOUNT_BARRIER; + } + } else if (!(vfsp->vfs_flag & VFS_RDONLY)) { /* rw -> ro */ VFS_SYNC(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL, error); - xfs_quiesce_fs(mp); - - /* Ok now write out an unmount record */ xfs_log_unmount_write(mp); xfs_unmountfs_writesb(mp); vfsp->vfs_flag |= VFS_RDONLY; } - return 0; } diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index fa71b305ba5c..7027ae68ee38 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2663,7 +2663,7 @@ xfs_link( */ if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && (tdp->i_d.di_projid != sip->i_d.di_projid))) { - error = XFS_ERROR(EPERM); + error = XFS_ERROR(EXDEV); goto error_return; } |