1 /* vi: set sw=4 ts=4: */
3 * unix_io.c --- This is the Unix (well, really POSIX) implementation
6 * Implements a one-block write-through cache.
8 * Includes support for Windows NT support under Cygwin.
10 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
11 * 2002 by Theodore Ts'o.
14 * This file may be redistributed under the terms of the GNU Public
30 #include <sys/utsname.h>
36 #include <sys/types.h>
38 #include <sys/resource.h>
44 * For checking structure magic numbers...
47 #define EXT2_CHECK_MAGIC(struct, code) \
48 if ((struct)->magic != (code)) return (code)
59 #define WRITE_DIRECT_SIZE 4 /* Must be smaller than CACHE_SIZE */
60 #define READ_DIRECT_SIZE 4 /* Should be smaller than CACHE_SIZE */
62 struct unix_private_data {
68 struct unix_cache cache[CACHE_SIZE];
71 static errcode_t unix_open(const char *name, int flags, io_channel *channel);
72 static errcode_t unix_close(io_channel channel);
73 static errcode_t unix_set_blksize(io_channel channel, int blksize);
74 static errcode_t unix_read_blk(io_channel channel, unsigned long block,
75 int count, void *data);
76 static errcode_t unix_write_blk(io_channel channel, unsigned long block,
77 int count, const void *data);
78 static errcode_t unix_flush(io_channel channel);
79 static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
80 int size, const void *data);
81 static errcode_t unix_set_option(io_channel channel, const char *option,
84 static void reuse_cache(io_channel channel, struct unix_private_data *data,
85 struct unix_cache *cache, unsigned long block);
87 /* __FreeBSD_kernel__ is defined by GNU/kFreeBSD - the FreeBSD kernel
88 * does not know buffered block devices - everything is raw. */
89 #if defined(__CYGWIN__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
90 #define NEED_BOUNCE_BUFFER
92 #undef NEED_BOUNCE_BUFFER
95 static struct struct_io_manager struct_unix_manager = {
96 EXT2_ET_MAGIC_IO_MANAGER,
104 #ifdef NEED_BOUNCE_BUFFER
112 io_manager unix_io_manager = &struct_unix_manager;
115 * Here are the raw I/O functions
117 #ifndef NEED_BOUNCE_BUFFER
118 static errcode_t raw_read_blk(io_channel channel,
119 struct unix_private_data *data,
121 int count, void *buf)
125 ext2_loff_t location;
128 size = (count < 0) ? -count : count * channel->block_size;
129 location = ((ext2_loff_t) block * channel->block_size) + data->offset;
130 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
131 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
134 actual = read(data->dev, buf, size);
135 if (actual != size) {
138 retval = EXT2_ET_SHORT_READ;
144 memset((char *) buf+actual, 0, size-actual);
145 if (channel->read_error)
146 retval = (channel->read_error)(channel, block, count, buf,
147 size, actual, retval);
150 #else /* NEED_BOUNCE_BUFFER */
152 * Windows and FreeBSD block devices only allow sector alignment IO in offset and size
154 static errcode_t raw_read_blk(io_channel channel,
155 struct unix_private_data *data,
157 int count, void *buf)
160 size_t size, alignsize, fragment;
161 ext2_loff_t location;
162 int total = 0, actual;
163 #define BLOCKALIGN 512
164 char sector[BLOCKALIGN];
166 size = (count < 0) ? -count : count * channel->block_size;
167 location = ((ext2_loff_t) block * channel->block_size) + data->offset;
169 printf("count=%d, size=%d, block=%d, blk_size=%d, location=%lx\n",
170 count, size, block, channel->block_size, location);
172 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
173 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
176 fragment = size % BLOCKALIGN;
177 alignsize = size - fragment;
179 actual = read(data->dev, buf, alignsize);
180 if (actual != alignsize)
184 actual = read(data->dev, sector, BLOCKALIGN);
185 if (actual != BLOCKALIGN)
187 memcpy(buf+alignsize, sector, fragment);
194 retval = EXT2_ET_SHORT_READ;
197 memset((char *) buf+total, 0, size-actual);
198 if (channel->read_error)
199 retval = (channel->read_error)(channel, block, count, buf,
200 size, actual, retval);
205 static errcode_t raw_write_blk(io_channel channel,
206 struct unix_private_data *data,
208 int count, const void *buf)
211 ext2_loff_t location;
216 size = channel->block_size;
221 size = count * channel->block_size;
224 location = ((ext2_loff_t) block * channel->block_size) + data->offset;
225 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
226 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
230 actual = write(data->dev, buf, size);
231 if (actual != size) {
232 retval = EXT2_ET_SHORT_WRITE;
238 if (channel->write_error)
239 retval = (channel->write_error)(channel, block, count, buf,
240 size, actual, retval);
246 * Here we implement the cache functions
249 /* Allocate the cache buffers */
250 static errcode_t alloc_cache(io_channel channel,
251 struct unix_private_data *data)
254 struct unix_cache *cache;
257 data->access_time = 0;
258 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
260 cache->access_time = 0;
263 if ((retval = ext2fs_get_mem(channel->block_size,
270 /* Free the cache buffers */
271 static void free_cache(struct unix_private_data *data)
273 struct unix_cache *cache;
276 data->access_time = 0;
277 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
279 cache->access_time = 0;
282 ext2fs_free_mem(&cache->buf);
289 * Try to find a block in the cache. If the block is not found, and
290 * eldest is a non-zero pointer, then fill in eldest with the cache
291 * entry to that should be reused.
293 static struct unix_cache *find_cached_block(struct unix_private_data *data,
295 struct unix_cache **eldest)
297 struct unix_cache *cache, *unused_cache, *oldest_cache;
300 unused_cache = oldest_cache = 0;
301 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
302 if (!cache->in_use) {
304 unused_cache = cache;
307 if (cache->block == block) {
308 cache->access_time = ++data->access_time;
312 (cache->access_time < oldest_cache->access_time))
313 oldest_cache = cache;
316 *eldest = (unused_cache) ? unused_cache : oldest_cache;
321 * Reuse a particular cache entry for another block.
323 static void reuse_cache(io_channel channel, struct unix_private_data *data,
324 struct unix_cache *cache, unsigned long block)
326 if (cache->dirty && cache->in_use)
327 raw_write_blk(channel, data, cache->block, 1, cache->buf);
331 cache->block = block;
332 cache->access_time = ++data->access_time;
336 * Flush all of the blocks in the cache
338 static errcode_t flush_cached_blocks(io_channel channel,
339 struct unix_private_data *data,
343 struct unix_cache *cache;
344 errcode_t retval, retval2;
348 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
358 retval = raw_write_blk(channel, data,
359 cache->block, 1, cache->buf);
367 #endif /* NO_IO_CACHE */
369 static errcode_t unix_open(const char *name, int flags, io_channel *channel)
371 io_channel io = NULL;
372 struct unix_private_data *data = NULL;
381 return EXT2_ET_BAD_DEVICE_NAME;
382 retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io);
385 memset(io, 0, sizeof(struct struct_io_channel));
386 io->magic = EXT2_ET_MAGIC_IO_CHANNEL;
387 retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data);
391 io->manager = unix_io_manager;
392 retval = ext2fs_get_mem(strlen(name)+1, &io->name);
396 strcpy(io->name, name);
397 io->private_data = data;
398 io->block_size = 1024;
403 memset(data, 0, sizeof(struct unix_private_data));
404 data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL;
406 if ((retval = alloc_cache(io, data)))
409 open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY;
411 data->dev = open64(io->name, open_flags);
413 data->dev = open(io->name, open_flags);
422 #if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4)))
423 #define RLIM_INFINITY ((unsigned long)(~0UL>>1))
425 #define RLIM_INFINITY (~0UL)
428 * Work around a bug in 2.4.10-2.4.18 kernels where writes to
429 * block devices are wrongly getting hit by the filesize
430 * limit. This workaround isn't perfect, since it won't work
431 * if glibc wasn't built against 2.2 header files. (Sigh.)
434 if ((flags & IO_FLAG_RW) &&
436 ((ut.release[0] == '2') && (ut.release[1] == '.') &&
437 (ut.release[2] == '4') && (ut.release[3] == '.') &&
438 (ut.release[4] == '1') && (ut.release[5] >= '0') &&
439 (ut.release[5] < '8')) &&
440 (fstat(data->dev, &st) == 0) &&
441 (S_ISBLK(st.st_mode))) {
444 rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY;
445 setrlimit(RLIMIT_FSIZE, &rlim);
446 getrlimit(RLIMIT_FSIZE, &rlim);
447 if (((unsigned long) rlim.rlim_cur) <
448 ((unsigned long) rlim.rlim_max)) {
449 rlim.rlim_cur = rlim.rlim_max;
450 setrlimit(RLIMIT_FSIZE, &rlim);
460 ext2fs_free_mem(&data);
462 ext2fs_free_mem(&io);
466 static errcode_t unix_close(io_channel channel)
468 struct unix_private_data *data;
469 errcode_t retval = 0;
471 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
472 data = (struct unix_private_data *) channel->private_data;
473 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
475 if (--channel->refcount > 0)
479 retval = flush_cached_blocks(channel, data, 0);
482 if (close(data->dev) < 0)
486 ext2fs_free_mem(&channel->private_data);
487 ext2fs_free_mem(&channel->name);
488 ext2fs_free_mem(&channel);
492 static errcode_t unix_set_blksize(io_channel channel, int blksize)
494 struct unix_private_data *data;
497 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
498 data = (struct unix_private_data *) channel->private_data;
499 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
501 if (channel->block_size != blksize) {
503 if ((retval = flush_cached_blocks(channel, data, 0)))
507 channel->block_size = blksize;
509 if ((retval = alloc_cache(channel, data)))
516 static errcode_t unix_read_blk(io_channel channel, unsigned long block,
517 int count, void *buf)
519 struct unix_private_data *data;
520 struct unix_cache *cache, *reuse[READ_DIRECT_SIZE];
525 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
526 data = (struct unix_private_data *) channel->private_data;
527 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
530 return raw_read_blk(channel, data, block, count, buf);
533 * If we're doing an odd-sized read or a very large read,
534 * flush out the cache and then do a direct read.
536 if (count < 0 || count > WRITE_DIRECT_SIZE) {
537 if ((retval = flush_cached_blocks(channel, data, 0)))
539 return raw_read_blk(channel, data, block, count, buf);
544 /* If it's in the cache, use it! */
545 if ((cache = find_cached_block(data, block, &reuse[0]))) {
547 printf("Using cached block %d\n", block);
549 memcpy(cp, cache->buf, channel->block_size);
552 cp += channel->block_size;
556 * Find the number of uncached blocks so we can do a
557 * single read request
559 for (i=1; i < count; i++)
560 if (find_cached_block(data, block+i, &reuse[i]))
563 printf("Reading %d blocks starting at %d\n", i, block);
565 if ((retval = raw_read_blk(channel, data, block, i, cp)))
568 /* Save the results in the cache */
569 for (j=0; j < i; j++) {
572 reuse_cache(channel, data, cache, block++);
573 memcpy(cache->buf, cp, channel->block_size);
574 cp += channel->block_size;
578 #endif /* NO_IO_CACHE */
581 static errcode_t unix_write_blk(io_channel channel, unsigned long block,
582 int count, const void *buf)
584 struct unix_private_data *data;
585 struct unix_cache *cache, *reuse;
586 errcode_t retval = 0;
590 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
591 data = (struct unix_private_data *) channel->private_data;
592 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
595 return raw_write_blk(channel, data, block, count, buf);
598 * If we're doing an odd-sized write or a very large write,
599 * flush out the cache completely and then do a direct write.
601 if (count < 0 || count > WRITE_DIRECT_SIZE) {
602 if ((retval = flush_cached_blocks(channel, data, 1)))
604 return raw_write_blk(channel, data, block, count, buf);
608 * For a moderate-sized multi-block write, first force a write
609 * if we're in write-through cache mode, and then fill the
610 * cache with the blocks.
612 writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH;
614 retval = raw_write_blk(channel, data, block, count, buf);
618 cache = find_cached_block(data, block, &reuse);
621 reuse_cache(channel, data, cache, block);
623 memcpy(cache->buf, cp, channel->block_size);
624 cache->dirty = !writethrough;
627 cp += channel->block_size;
630 #endif /* NO_IO_CACHE */
633 static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
634 int size, const void *buf)
636 struct unix_private_data *data;
637 errcode_t retval = 0;
640 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
641 data = (struct unix_private_data *) channel->private_data;
642 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
646 * Flush out the cache completely
648 if ((retval = flush_cached_blocks(channel, data, 1)))
652 if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0)
655 actual = write(data->dev, buf, size);
657 return EXT2_ET_SHORT_WRITE;
663 * Flush data buffers to disk.
665 static errcode_t unix_flush(io_channel channel)
667 struct unix_private_data *data;
668 errcode_t retval = 0;
670 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
671 data = (struct unix_private_data *) channel->private_data;
672 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
675 retval = flush_cached_blocks(channel, data, 0);
681 static errcode_t unix_set_option(io_channel channel, const char *option,
684 struct unix_private_data *data;
688 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
689 data = (struct unix_private_data *) channel->private_data;
690 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
692 if (!strcmp(option, "offset")) {
694 return EXT2_ET_INVALID_ARGUMENT;
696 tmp = strtoul(arg, &end, 0);
698 return EXT2_ET_INVALID_ARGUMENT;
702 return EXT2_ET_INVALID_ARGUMENT;