summaryrefslogtreecommitdiffstats
path: root/fs/orangefs/dir.c
blob: cf0ebb06b84e9e1c09091834fe9cff6375f2840f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
/*
 * Copyright 2017 Omnibond Systems, L.L.C.
 */

#include "protocol.h"
#include "orangefs-kernel.h"
#include "orangefs-bufmap.h"

/*
 * There can be up to 512 directory entries.  Each entry is encoded as
 * follows:
 * 4 bytes: string size (n)
 * n bytes: string
 * 1 byte: trailing zero
 * padding to 8 bytes
 * 16 bytes: khandle
 * padding to 8 bytes
 */
#define MAX_DIRECTORY ((4 + 257 + 3 + 16)*512)

struct orangefs_dir {
	__u64 token;
	void *directory;
	size_t i, len;
	int error;
};

/*
 * The userspace component sends several directory entries of the
 * following format.  The first four bytes are the string length not
 * including a trailing zero byte.  This is followed by the string and a
 * trailing zero padded to the next four byte boundry.  This is followed
 * by the sixteen byte khandle padded to the next eight byte boundry.
 *
 * The trailer_buf starts with a struct orangefs_readdir_response_s
 * which must be skipped to get to the directory data.
 */

static int orangefs_dir_more(struct orangefs_inode_s *oi,
    struct orangefs_dir *od, struct dentry *dentry)
{
	const size_t offset =
	    sizeof(struct orangefs_readdir_response_s);
	struct orangefs_readdir_response_s *resp;
	struct orangefs_kernel_op_s *op;
	int bufi, r;

	op = op_alloc(ORANGEFS_VFS_OP_READDIR);
	if (!op) {
		od->error = -ENOMEM;
		return -ENOMEM;
	}

	/*
	 * Despite the badly named field, readdir does not use shared
	 * memory.  However, there are a limited number of readdir
	 * slots, which must be allocated here.  This flag simply tells
	 * the op scheduler to return the op here for retry.
	 */
	op->uses_shared_memory = 1;
	op->upcall.req.readdir.refn = oi->refn;
	op->upcall.req.readdir.token = od->token;
	op->upcall.req.readdir.max_dirent_count =
	    ORANGEFS_MAX_DIRENT_COUNT_READDIR;

again:
	bufi = orangefs_readdir_index_get();
	if (bufi < 0) {
		op_release(op);
		od->error = bufi;
		return bufi;
	}

	op->upcall.req.readdir.buf_index = bufi;

	r = service_operation(op, "orangefs_readdir",
	    get_interruptible_flag(dentry->d_inode));

	orangefs_readdir_index_put(bufi);

	if (op_state_purged(op)) {
		if (r == -EAGAIN) {
			vfree(op->downcall.trailer_buf);
			goto again;
		} else if (r == -EIO) {
			vfree(op->downcall.trailer_buf);
			op_release(op);
			od->error = r;
			return r;
		}
	}

	if (r < 0) {
		vfree(op->downcall.trailer_buf);
		op_release(op);
		od->error = r;
		return r;
	} else if (op->downcall.status) {
		vfree(op->downcall.trailer_buf);
		op_release(op);
		od->error = op->downcall.status;
		return op->downcall.status;
	}

	resp = (struct orangefs_readdir_response_s *)
	    op->downcall.trailer_buf;
	od->token = resp->token;

	if (od->len + op->downcall.trailer_size - offset <=
	    MAX_DIRECTORY) {
		memcpy(od->directory + od->len,
		    op->downcall.trailer_buf + offset,
		    op->downcall.trailer_size - offset);
		od->len += op->downcall.trailer_size - offset;
	} else {
		/* This limit was chosen based on protocol limits. */
		gossip_err("orangefs_dir_more: userspace sent too much data\n");
		vfree(op->downcall.trailer_buf);
		op_release(op);
		od->error = -EIO;
		return -EIO;
	}

	vfree(op->downcall.trailer_buf);
	op_release(op);
	return 0;
}

static int orangefs_dir_fill(struct orangefs_inode_s *oi,
    struct orangefs_dir *od, struct dentry *dentry,
    struct dir_context *ctx)
{
	struct orangefs_khandle *khandle;
	__u32 *len, padlen;
	char *s;
	while (od->i < od->len) {
		if (od->len < od->i + sizeof *len)
			goto eio;
		len = od->directory + od->i;
		/*
		 * len is the size of the string itself.  padlen is the
		 * total size of the encoded string.
		 */
		padlen = (sizeof *len + *len + 1) +
		    (4 - (sizeof *len + *len + 1)%8)%8;
		if (od->len < od->i + padlen + sizeof *khandle)
			goto eio;
		s = od->directory + od->i + sizeof *len;
		if (s[*len] != 0)
			goto eio;
		khandle = od->directory + od->i + padlen;

		if (!dir_emit(ctx, s, *len,
		    orangefs_khandle_to_ino(khandle), DT_UNKNOWN))
			return 0;
		od->i += padlen + sizeof *khandle;
		od->i = od->i + (8 - od->i%8)%8;
		ctx->pos = 2 + od->i;
	}
	BUG_ON(od->i > od->len);
	return 0;
eio:
	gossip_err("orangefs_dir_fill: userspace returns corrupt data\n");
	od->error = -EIO;
	return -EIO;
}

static int orangefs_dir_iterate(struct file *file,
    struct dir_context *ctx)
{
	struct orangefs_inode_s *oi;
	struct orangefs_dir *od;
	struct dentry *dentry;
	int r;

	dentry = file->f_path.dentry;
	oi = ORANGEFS_I(dentry->d_inode);
	od = file->private_data;

	if (od->error)
		return od->error;

	if (ctx->pos == 0) {
		if (!dir_emit_dot(file, ctx))
			return 0;
		ctx->pos++;
	}
	if (ctx->pos == 1) {
		if (!dir_emit_dotdot(file, ctx))
			return 0;
		ctx->pos++;
	}

	r = 0;

	if (od->i < od->len) {
		r = orangefs_dir_fill(oi, od, dentry, ctx);
		if (r)
			return r;
	}

	if (od->token != ORANGEFS_READDIR_END) {
		r = orangefs_dir_more(oi, od, dentry);
		if (r)
			return r;
		r = orangefs_dir_fill(oi, od, dentry, ctx);
	}

	return r;
}

static int orangefs_dir_open(struct inode *inode, struct file *file)
{
	struct orangefs_dir *od;
	file->private_data = kmalloc(sizeof(struct orangefs_dir),
	    GFP_KERNEL);
	if (!file->private_data)
		return -ENOMEM;
	od = file->private_data;
	od->token = ORANGEFS_READDIR_START;
	/*
	 * XXX: It seems wasteful to allocate such a large buffer for
	 * each request.  Most will be much smaller.
	 */
	od->directory = alloc_pages_exact(MAX_DIRECTORY, GFP_KERNEL);
	if (!od->directory) {
		kfree(file->private_data);
		return -ENOMEM;
	}
	od->i = 0;
	od->len = 0;
	od->error = 0;
	return 0;
}

static int orangefs_dir_release(struct inode *inode, struct file *file)
{
	struct orangefs_dir *od = file->private_data;
	orangefs_flush_inode(inode);
	free_pages_exact(od->directory, MAX_DIRECTORY);
	kfree(od);
	return 0;
}

const struct file_operations orangefs_dir_operations = {
	.read = generic_read_dir,
	.iterate = orangefs_dir_iterate,
	.open = orangefs_dir_open,
	.release = orangefs_dir_release
};