Introduce bdrv_check (Kevin Wolf)
[qemu] / block.c
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #ifdef HOST_BSD
26 /* include native header before sys-queue.h */
27 #include <sys/queue.h>
28 #endif
29
30 #include "qemu-common.h"
31 #include "monitor.h"
32 #include "block_int.h"
33
34 #ifdef HOST_BSD
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/ioctl.h>
38 #ifndef __DragonFly__
39 #include <sys/disk.h>
40 #endif
41 #endif
42
43 #ifdef _WIN32
44 #include <windows.h>
45 #endif
46
47 #define SECTOR_BITS 9
48 #define SECTOR_SIZE (1 << SECTOR_BITS)
49
50 typedef struct BlockDriverAIOCBSync {
51     BlockDriverAIOCB common;
52     QEMUBH *bh;
53     int ret;
54     /* vector translation state */
55     QEMUIOVector *qiov;
56     uint8_t *bounce;
57     int is_write;
58 } BlockDriverAIOCBSync;
59
60 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
61         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
62         BlockDriverCompletionFunc *cb, void *opaque);
63 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
64         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
65         BlockDriverCompletionFunc *cb, void *opaque);
66 static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb);
67 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
68                         uint8_t *buf, int nb_sectors);
69 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
70                          const uint8_t *buf, int nb_sectors);
71
72 BlockDriverState *bdrv_first;
73
74 static BlockDriver *first_drv;
75
76 int path_is_absolute(const char *path)
77 {
78     const char *p;
79 #ifdef _WIN32
80     /* specific case for names like: "\\.\d:" */
81     if (*path == '/' || *path == '\\')
82         return 1;
83 #endif
84     p = strchr(path, ':');
85     if (p)
86         p++;
87     else
88         p = path;
89 #ifdef _WIN32
90     return (*p == '/' || *p == '\\');
91 #else
92     return (*p == '/');
93 #endif
94 }
95
96 /* if filename is absolute, just copy it to dest. Otherwise, build a
97    path to it by considering it is relative to base_path. URL are
98    supported. */
99 void path_combine(char *dest, int dest_size,
100                   const char *base_path,
101                   const char *filename)
102 {
103     const char *p, *p1;
104     int len;
105
106     if (dest_size <= 0)
107         return;
108     if (path_is_absolute(filename)) {
109         pstrcpy(dest, dest_size, filename);
110     } else {
111         p = strchr(base_path, ':');
112         if (p)
113             p++;
114         else
115             p = base_path;
116         p1 = strrchr(base_path, '/');
117 #ifdef _WIN32
118         {
119             const char *p2;
120             p2 = strrchr(base_path, '\\');
121             if (!p1 || p2 > p1)
122                 p1 = p2;
123         }
124 #endif
125         if (p1)
126             p1++;
127         else
128             p1 = base_path;
129         if (p1 > p)
130             p = p1;
131         len = p - base_path;
132         if (len > dest_size - 1)
133             len = dest_size - 1;
134         memcpy(dest, base_path, len);
135         dest[len] = '\0';
136         pstrcat(dest, dest_size, filename);
137     }
138 }
139
140
141 static void bdrv_register(BlockDriver *bdrv)
142 {
143     if (!bdrv->bdrv_aio_readv) {
144         /* add AIO emulation layer */
145         bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
146         bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
147         bdrv->bdrv_aio_cancel = bdrv_aio_cancel_em;
148         bdrv->aiocb_size = sizeof(BlockDriverAIOCBSync);
149     } else if (!bdrv->bdrv_read) {
150         /* add synchronous IO emulation layer */
151         bdrv->bdrv_read = bdrv_read_em;
152         bdrv->bdrv_write = bdrv_write_em;
153     }
154     aio_pool_init(&bdrv->aio_pool, bdrv->aiocb_size, bdrv->bdrv_aio_cancel);
155     bdrv->next = first_drv;
156     first_drv = bdrv;
157 }
158
159 /* create a new block device (by default it is empty) */
160 BlockDriverState *bdrv_new(const char *device_name)
161 {
162     BlockDriverState **pbs, *bs;
163
164     bs = qemu_mallocz(sizeof(BlockDriverState));
165     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
166     if (device_name[0] != '\0') {
167         /* insert at the end */
168         pbs = &bdrv_first;
169         while (*pbs != NULL)
170             pbs = &(*pbs)->next;
171         *pbs = bs;
172     }
173     return bs;
174 }
175
176 BlockDriver *bdrv_find_format(const char *format_name)
177 {
178     BlockDriver *drv1;
179     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
180         if (!strcmp(drv1->format_name, format_name))
181             return drv1;
182     }
183     return NULL;
184 }
185
186 int bdrv_create2(BlockDriver *drv,
187                 const char *filename, int64_t size_in_sectors,
188                 const char *backing_file, const char *backing_format,
189                 int flags)
190 {
191     if (drv->bdrv_create2)
192         return drv->bdrv_create2(filename, size_in_sectors, backing_file,
193                                  backing_format, flags);
194     if (drv->bdrv_create)
195         return drv->bdrv_create(filename, size_in_sectors, backing_file,
196                                 flags);
197     return -ENOTSUP;
198 }
199
200 int bdrv_create(BlockDriver *drv,
201                 const char *filename, int64_t size_in_sectors,
202                 const char *backing_file, int flags)
203 {
204     if (!drv->bdrv_create)
205         return -ENOTSUP;
206     return drv->bdrv_create(filename, size_in_sectors, backing_file, flags);
207 }
208
209 #ifdef _WIN32
210 void get_tmp_filename(char *filename, int size)
211 {
212     char temp_dir[MAX_PATH];
213
214     GetTempPath(MAX_PATH, temp_dir);
215     GetTempFileName(temp_dir, "qem", 0, filename);
216 }
217 #else
218 void get_tmp_filename(char *filename, int size)
219 {
220     int fd;
221     const char *tmpdir;
222     /* XXX: race condition possible */
223     tmpdir = getenv("TMPDIR");
224     if (!tmpdir)
225         tmpdir = "/tmp";
226     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
227     fd = mkstemp(filename);
228     close(fd);
229 }
230 #endif
231
232 #ifdef _WIN32
233 static int is_windows_drive_prefix(const char *filename)
234 {
235     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
236              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
237             filename[1] == ':');
238 }
239
240 static int is_windows_drive(const char *filename)
241 {
242     if (is_windows_drive_prefix(filename) &&
243         filename[2] == '\0')
244         return 1;
245     if (strstart(filename, "\\\\.\\", NULL) ||
246         strstart(filename, "//./", NULL))
247         return 1;
248     return 0;
249 }
250 #endif
251
252 static BlockDriver *find_protocol(const char *filename)
253 {
254     BlockDriver *drv1;
255     char protocol[128];
256     int len;
257     const char *p;
258
259 #ifdef _WIN32
260     if (is_windows_drive(filename) ||
261         is_windows_drive_prefix(filename))
262         return &bdrv_raw;
263 #endif
264     p = strchr(filename, ':');
265     if (!p)
266         return &bdrv_raw;
267     len = p - filename;
268     if (len > sizeof(protocol) - 1)
269         len = sizeof(protocol) - 1;
270     memcpy(protocol, filename, len);
271     protocol[len] = '\0';
272     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
273         if (drv1->protocol_name &&
274             !strcmp(drv1->protocol_name, protocol))
275             return drv1;
276     }
277     return NULL;
278 }
279
280 /* XXX: force raw format if block or character device ? It would
281    simplify the BSD case */
282 static BlockDriver *find_image_format(const char *filename)
283 {
284     int ret, score, score_max;
285     BlockDriver *drv1, *drv;
286     uint8_t buf[2048];
287     BlockDriverState *bs;
288
289     /* detect host devices. By convention, /dev/cdrom[N] is always
290        recognized as a host CDROM */
291     if (strstart(filename, "/dev/cdrom", NULL))
292         return &bdrv_host_device;
293 #ifdef _WIN32
294     if (is_windows_drive(filename))
295         return &bdrv_host_device;
296 #else
297     {
298         struct stat st;
299         if (stat(filename, &st) >= 0 &&
300             (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
301             return &bdrv_host_device;
302         }
303     }
304 #endif
305
306     drv = find_protocol(filename);
307     /* no need to test disk image formats for vvfat */
308     if (drv == &bdrv_vvfat)
309         return drv;
310
311     ret = bdrv_file_open(&bs, filename, BDRV_O_RDONLY);
312     if (ret < 0)
313         return NULL;
314     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
315     bdrv_delete(bs);
316     if (ret < 0) {
317         return NULL;
318     }
319
320     score_max = 0;
321     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
322         if (drv1->bdrv_probe) {
323             score = drv1->bdrv_probe(buf, ret, filename);
324             if (score > score_max) {
325                 score_max = score;
326                 drv = drv1;
327             }
328         }
329     }
330     return drv;
331 }
332
333 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
334 {
335     BlockDriverState *bs;
336     int ret;
337
338     bs = bdrv_new("");
339     ret = bdrv_open2(bs, filename, flags | BDRV_O_FILE, NULL);
340     if (ret < 0) {
341         bdrv_delete(bs);
342         return ret;
343     }
344     bs->growable = 1;
345     *pbs = bs;
346     return 0;
347 }
348
349 int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
350 {
351     return bdrv_open2(bs, filename, flags, NULL);
352 }
353
354 int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
355                BlockDriver *drv)
356 {
357     int ret, open_flags;
358     char tmp_filename[PATH_MAX];
359     char backing_filename[PATH_MAX];
360
361     bs->read_only = 0;
362     bs->is_temporary = 0;
363     bs->encrypted = 0;
364     bs->valid_key = 0;
365
366     if (flags & BDRV_O_SNAPSHOT) {
367         BlockDriverState *bs1;
368         int64_t total_size;
369         int is_protocol = 0;
370
371         /* if snapshot, we create a temporary backing file and open it
372            instead of opening 'filename' directly */
373
374         /* if there is a backing file, use it */
375         bs1 = bdrv_new("");
376         ret = bdrv_open2(bs1, filename, 0, drv);
377         if (ret < 0) {
378             bdrv_delete(bs1);
379             return ret;
380         }
381         total_size = bdrv_getlength(bs1) >> SECTOR_BITS;
382
383         if (bs1->drv && bs1->drv->protocol_name)
384             is_protocol = 1;
385
386         bdrv_delete(bs1);
387
388         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
389
390         /* Real path is meaningless for protocols */
391         if (is_protocol)
392             snprintf(backing_filename, sizeof(backing_filename),
393                      "%s", filename);
394         else
395             realpath(filename, backing_filename);
396
397         ret = bdrv_create2(&bdrv_qcow2, tmp_filename,
398                            total_size, backing_filename, 
399                            (drv ? drv->format_name : NULL), 0);
400         if (ret < 0) {
401             return ret;
402         }
403         filename = tmp_filename;
404         drv = &bdrv_qcow2;
405         bs->is_temporary = 1;
406     }
407
408     pstrcpy(bs->filename, sizeof(bs->filename), filename);
409     if (flags & BDRV_O_FILE) {
410         drv = find_protocol(filename);
411     } else if (!drv) {
412         drv = find_image_format(filename);
413     }
414     if (!drv) {
415         ret = -ENOENT;
416         goto unlink_and_fail;
417     }
418     bs->drv = drv;
419     bs->opaque = qemu_mallocz(drv->instance_size);
420     /* Note: for compatibility, we open disk image files as RDWR, and
421        RDONLY as fallback */
422     if (!(flags & BDRV_O_FILE))
423         open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK);
424     else
425         open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
426     ret = drv->bdrv_open(bs, filename, open_flags);
427     if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
428         ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
429         bs->read_only = 1;
430     }
431     if (ret < 0) {
432         qemu_free(bs->opaque);
433         bs->opaque = NULL;
434         bs->drv = NULL;
435     unlink_and_fail:
436         if (bs->is_temporary)
437             unlink(filename);
438         return ret;
439     }
440     if (drv->bdrv_getlength) {
441         bs->total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
442     }
443 #ifndef _WIN32
444     if (bs->is_temporary) {
445         unlink(filename);
446     }
447 #endif
448     if (bs->backing_file[0] != '\0') {
449         /* if there is a backing file, use it */
450         BlockDriver *back_drv = NULL;
451         bs->backing_hd = bdrv_new("");
452         path_combine(backing_filename, sizeof(backing_filename),
453                      filename, bs->backing_file);
454         if (bs->backing_format[0] != '\0')
455             back_drv = bdrv_find_format(bs->backing_format);
456         ret = bdrv_open2(bs->backing_hd, backing_filename, open_flags,
457                          back_drv);
458         if (ret < 0) {
459             bdrv_close(bs);
460             return ret;
461         }
462     }
463
464     if (!bdrv_key_required(bs)) {
465         /* call the change callback */
466         bs->media_changed = 1;
467         if (bs->change_cb)
468             bs->change_cb(bs->change_opaque);
469     }
470     return 0;
471 }
472
473 void bdrv_close(BlockDriverState *bs)
474 {
475     if (bs->drv) {
476         if (bs->backing_hd)
477             bdrv_delete(bs->backing_hd);
478         bs->drv->bdrv_close(bs);
479         qemu_free(bs->opaque);
480 #ifdef _WIN32
481         if (bs->is_temporary) {
482             unlink(bs->filename);
483         }
484 #endif
485         bs->opaque = NULL;
486         bs->drv = NULL;
487
488         /* call the change callback */
489         bs->media_changed = 1;
490         if (bs->change_cb)
491             bs->change_cb(bs->change_opaque);
492     }
493 }
494
495 void bdrv_delete(BlockDriverState *bs)
496 {
497     BlockDriverState **pbs;
498
499     pbs = &bdrv_first;
500     while (*pbs != bs && *pbs != NULL)
501         pbs = &(*pbs)->next;
502     if (*pbs == bs)
503         *pbs = bs->next;
504
505     bdrv_close(bs);
506     qemu_free(bs);
507 }
508
509 /*
510  * Run consistency checks on an image
511  *
512  * Returns the number of errors or -errno when an internal error occurs
513  */
514 int bdrv_check(BlockDriverState *bs)
515 {
516     if (bs->drv->bdrv_check == NULL) {
517         return -ENOTSUP;
518     }
519
520     return bs->drv->bdrv_check(bs);
521 }
522
523 /* commit COW file into the raw image */
524 int bdrv_commit(BlockDriverState *bs)
525 {
526     BlockDriver *drv = bs->drv;
527     int64_t i, total_sectors;
528     int n, j;
529     unsigned char sector[512];
530
531     if (!drv)
532         return -ENOMEDIUM;
533
534     if (bs->read_only) {
535         return -EACCES;
536     }
537
538     if (!bs->backing_hd) {
539         return -ENOTSUP;
540     }
541
542     total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
543     for (i = 0; i < total_sectors;) {
544         if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
545             for(j = 0; j < n; j++) {
546                 if (bdrv_read(bs, i, sector, 1) != 0) {
547                     return -EIO;
548                 }
549
550                 if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
551                     return -EIO;
552                 }
553                 i++;
554             }
555         } else {
556             i += n;
557         }
558     }
559
560     if (drv->bdrv_make_empty)
561         return drv->bdrv_make_empty(bs);
562
563     return 0;
564 }
565
566 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
567                                    size_t size)
568 {
569     int64_t len;
570
571     if (!bdrv_is_inserted(bs))
572         return -ENOMEDIUM;
573
574     if (bs->growable)
575         return 0;
576
577     len = bdrv_getlength(bs);
578
579     if ((offset + size) > len)
580         return -EIO;
581
582     return 0;
583 }
584
585 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
586                               int nb_sectors)
587 {
588     return bdrv_check_byte_request(bs, sector_num * 512, nb_sectors * 512);
589 }
590
591 /* return < 0 if error. See bdrv_write() for the return codes */
592 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
593               uint8_t *buf, int nb_sectors)
594 {
595     BlockDriver *drv = bs->drv;
596
597     if (!drv)
598         return -ENOMEDIUM;
599     if (bdrv_check_request(bs, sector_num, nb_sectors))
600         return -EIO;
601
602     return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
603 }
604
605 /* Return < 0 if error. Important errors are:
606   -EIO         generic I/O error (may happen for all errors)
607   -ENOMEDIUM   No media inserted.
608   -EINVAL      Invalid sector number or nb_sectors
609   -EACCES      Trying to write a read-only device
610 */
611 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
612                const uint8_t *buf, int nb_sectors)
613 {
614     BlockDriver *drv = bs->drv;
615     if (!bs->drv)
616         return -ENOMEDIUM;
617     if (bs->read_only)
618         return -EACCES;
619     if (bdrv_check_request(bs, sector_num, nb_sectors))
620         return -EIO;
621
622     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
623 }
624
625 int bdrv_pread(BlockDriverState *bs, int64_t offset,
626                void *buf, int count1)
627 {
628     uint8_t tmp_buf[SECTOR_SIZE];
629     int len, nb_sectors, count;
630     int64_t sector_num;
631
632     count = count1;
633     /* first read to align to sector start */
634     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
635     if (len > count)
636         len = count;
637     sector_num = offset >> SECTOR_BITS;
638     if (len > 0) {
639         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
640             return -EIO;
641         memcpy(buf, tmp_buf + (offset & (SECTOR_SIZE - 1)), len);
642         count -= len;
643         if (count == 0)
644             return count1;
645         sector_num++;
646         buf += len;
647     }
648
649     /* read the sectors "in place" */
650     nb_sectors = count >> SECTOR_BITS;
651     if (nb_sectors > 0) {
652         if (bdrv_read(bs, sector_num, buf, nb_sectors) < 0)
653             return -EIO;
654         sector_num += nb_sectors;
655         len = nb_sectors << SECTOR_BITS;
656         buf += len;
657         count -= len;
658     }
659
660     /* add data from the last sector */
661     if (count > 0) {
662         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
663             return -EIO;
664         memcpy(buf, tmp_buf, count);
665     }
666     return count1;
667 }
668
669 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
670                 const void *buf, int count1)
671 {
672     uint8_t tmp_buf[SECTOR_SIZE];
673     int len, nb_sectors, count;
674     int64_t sector_num;
675
676     count = count1;
677     /* first write to align to sector start */
678     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
679     if (len > count)
680         len = count;
681     sector_num = offset >> SECTOR_BITS;
682     if (len > 0) {
683         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
684             return -EIO;
685         memcpy(tmp_buf + (offset & (SECTOR_SIZE - 1)), buf, len);
686         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
687             return -EIO;
688         count -= len;
689         if (count == 0)
690             return count1;
691         sector_num++;
692         buf += len;
693     }
694
695     /* write the sectors "in place" */
696     nb_sectors = count >> SECTOR_BITS;
697     if (nb_sectors > 0) {
698         if (bdrv_write(bs, sector_num, buf, nb_sectors) < 0)
699             return -EIO;
700         sector_num += nb_sectors;
701         len = nb_sectors << SECTOR_BITS;
702         buf += len;
703         count -= len;
704     }
705
706     /* add data from the last sector */
707     if (count > 0) {
708         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
709             return -EIO;
710         memcpy(tmp_buf, buf, count);
711         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
712             return -EIO;
713     }
714     return count1;
715 }
716
717 /**
718  * Truncate file to 'offset' bytes (needed only for file protocols)
719  */
720 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
721 {
722     BlockDriver *drv = bs->drv;
723     if (!drv)
724         return -ENOMEDIUM;
725     if (!drv->bdrv_truncate)
726         return -ENOTSUP;
727     return drv->bdrv_truncate(bs, offset);
728 }
729
730 /**
731  * Length of a file in bytes. Return < 0 if error or unknown.
732  */
733 int64_t bdrv_getlength(BlockDriverState *bs)
734 {
735     BlockDriver *drv = bs->drv;
736     if (!drv)
737         return -ENOMEDIUM;
738     if (!drv->bdrv_getlength) {
739         /* legacy mode */
740         return bs->total_sectors * SECTOR_SIZE;
741     }
742     return drv->bdrv_getlength(bs);
743 }
744
745 /* return 0 as number of sectors if no device present or error */
746 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
747 {
748     int64_t length;
749     length = bdrv_getlength(bs);
750     if (length < 0)
751         length = 0;
752     else
753         length = length >> SECTOR_BITS;
754     *nb_sectors_ptr = length;
755 }
756
757 struct partition {
758         uint8_t boot_ind;           /* 0x80 - active */
759         uint8_t head;               /* starting head */
760         uint8_t sector;             /* starting sector */
761         uint8_t cyl;                /* starting cylinder */
762         uint8_t sys_ind;            /* What partition type */
763         uint8_t end_head;           /* end head */
764         uint8_t end_sector;         /* end sector */
765         uint8_t end_cyl;            /* end cylinder */
766         uint32_t start_sect;        /* starting sector counting from 0 */
767         uint32_t nr_sects;          /* nr of sectors in partition */
768 } __attribute__((packed));
769
770 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
771 static int guess_disk_lchs(BlockDriverState *bs,
772                            int *pcylinders, int *pheads, int *psectors)
773 {
774     uint8_t buf[512];
775     int ret, i, heads, sectors, cylinders;
776     struct partition *p;
777     uint32_t nr_sects;
778     uint64_t nb_sectors;
779
780     bdrv_get_geometry(bs, &nb_sectors);
781
782     ret = bdrv_read(bs, 0, buf, 1);
783     if (ret < 0)
784         return -1;
785     /* test msdos magic */
786     if (buf[510] != 0x55 || buf[511] != 0xaa)
787         return -1;
788     for(i = 0; i < 4; i++) {
789         p = ((struct partition *)(buf + 0x1be)) + i;
790         nr_sects = le32_to_cpu(p->nr_sects);
791         if (nr_sects && p->end_head) {
792             /* We make the assumption that the partition terminates on
793                a cylinder boundary */
794             heads = p->end_head + 1;
795             sectors = p->end_sector & 63;
796             if (sectors == 0)
797                 continue;
798             cylinders = nb_sectors / (heads * sectors);
799             if (cylinders < 1 || cylinders > 16383)
800                 continue;
801             *pheads = heads;
802             *psectors = sectors;
803             *pcylinders = cylinders;
804 #if 0
805             printf("guessed geometry: LCHS=%d %d %d\n",
806                    cylinders, heads, sectors);
807 #endif
808             return 0;
809         }
810     }
811     return -1;
812 }
813
814 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
815 {
816     int translation, lba_detected = 0;
817     int cylinders, heads, secs;
818     uint64_t nb_sectors;
819
820     /* if a geometry hint is available, use it */
821     bdrv_get_geometry(bs, &nb_sectors);
822     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
823     translation = bdrv_get_translation_hint(bs);
824     if (cylinders != 0) {
825         *pcyls = cylinders;
826         *pheads = heads;
827         *psecs = secs;
828     } else {
829         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
830             if (heads > 16) {
831                 /* if heads > 16, it means that a BIOS LBA
832                    translation was active, so the default
833                    hardware geometry is OK */
834                 lba_detected = 1;
835                 goto default_geometry;
836             } else {
837                 *pcyls = cylinders;
838                 *pheads = heads;
839                 *psecs = secs;
840                 /* disable any translation to be in sync with
841                    the logical geometry */
842                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
843                     bdrv_set_translation_hint(bs,
844                                               BIOS_ATA_TRANSLATION_NONE);
845                 }
846             }
847         } else {
848         default_geometry:
849             /* if no geometry, use a standard physical disk geometry */
850             cylinders = nb_sectors / (16 * 63);
851
852             if (cylinders > 16383)
853                 cylinders = 16383;
854             else if (cylinders < 2)
855                 cylinders = 2;
856             *pcyls = cylinders;
857             *pheads = 16;
858             *psecs = 63;
859             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
860                 if ((*pcyls * *pheads) <= 131072) {
861                     bdrv_set_translation_hint(bs,
862                                               BIOS_ATA_TRANSLATION_LARGE);
863                 } else {
864                     bdrv_set_translation_hint(bs,
865                                               BIOS_ATA_TRANSLATION_LBA);
866                 }
867             }
868         }
869         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
870     }
871 }
872
873 void bdrv_set_geometry_hint(BlockDriverState *bs,
874                             int cyls, int heads, int secs)
875 {
876     bs->cyls = cyls;
877     bs->heads = heads;
878     bs->secs = secs;
879 }
880
881 void bdrv_set_type_hint(BlockDriverState *bs, int type)
882 {
883     bs->type = type;
884     bs->removable = ((type == BDRV_TYPE_CDROM ||
885                       type == BDRV_TYPE_FLOPPY));
886 }
887
888 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
889 {
890     bs->translation = translation;
891 }
892
893 void bdrv_get_geometry_hint(BlockDriverState *bs,
894                             int *pcyls, int *pheads, int *psecs)
895 {
896     *pcyls = bs->cyls;
897     *pheads = bs->heads;
898     *psecs = bs->secs;
899 }
900
901 int bdrv_get_type_hint(BlockDriverState *bs)
902 {
903     return bs->type;
904 }
905
906 int bdrv_get_translation_hint(BlockDriverState *bs)
907 {
908     return bs->translation;
909 }
910
911 int bdrv_is_removable(BlockDriverState *bs)
912 {
913     return bs->removable;
914 }
915
916 int bdrv_is_read_only(BlockDriverState *bs)
917 {
918     return bs->read_only;
919 }
920
921 int bdrv_is_sg(BlockDriverState *bs)
922 {
923     return bs->sg;
924 }
925
926 /* XXX: no longer used */
927 void bdrv_set_change_cb(BlockDriverState *bs,
928                         void (*change_cb)(void *opaque), void *opaque)
929 {
930     bs->change_cb = change_cb;
931     bs->change_opaque = opaque;
932 }
933
934 int bdrv_is_encrypted(BlockDriverState *bs)
935 {
936     if (bs->backing_hd && bs->backing_hd->encrypted)
937         return 1;
938     return bs->encrypted;
939 }
940
941 int bdrv_key_required(BlockDriverState *bs)
942 {
943     BlockDriverState *backing_hd = bs->backing_hd;
944
945     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
946         return 1;
947     return (bs->encrypted && !bs->valid_key);
948 }
949
950 int bdrv_set_key(BlockDriverState *bs, const char *key)
951 {
952     int ret;
953     if (bs->backing_hd && bs->backing_hd->encrypted) {
954         ret = bdrv_set_key(bs->backing_hd, key);
955         if (ret < 0)
956             return ret;
957         if (!bs->encrypted)
958             return 0;
959     }
960     if (!bs->encrypted || !bs->drv || !bs->drv->bdrv_set_key)
961         return -1;
962     ret = bs->drv->bdrv_set_key(bs, key);
963     if (ret < 0) {
964         bs->valid_key = 0;
965     } else if (!bs->valid_key) {
966         bs->valid_key = 1;
967         /* call the change callback now, we skipped it on open */
968         bs->media_changed = 1;
969         if (bs->change_cb)
970             bs->change_cb(bs->change_opaque);
971     }
972     return ret;
973 }
974
975 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
976 {
977     if (!bs->drv) {
978         buf[0] = '\0';
979     } else {
980         pstrcpy(buf, buf_size, bs->drv->format_name);
981     }
982 }
983
984 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
985                          void *opaque)
986 {
987     BlockDriver *drv;
988
989     for (drv = first_drv; drv != NULL; drv = drv->next) {
990         it(opaque, drv->format_name);
991     }
992 }
993
994 BlockDriverState *bdrv_find(const char *name)
995 {
996     BlockDriverState *bs;
997
998     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
999         if (!strcmp(name, bs->device_name))
1000             return bs;
1001     }
1002     return NULL;
1003 }
1004
1005 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1006 {
1007     BlockDriverState *bs;
1008
1009     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1010         it(opaque, bs);
1011     }
1012 }
1013
1014 const char *bdrv_get_device_name(BlockDriverState *bs)
1015 {
1016     return bs->device_name;
1017 }
1018
1019 void bdrv_flush(BlockDriverState *bs)
1020 {
1021     if (!bs->drv)
1022         return;
1023     if (bs->drv->bdrv_flush)
1024         bs->drv->bdrv_flush(bs);
1025     if (bs->backing_hd)
1026         bdrv_flush(bs->backing_hd);
1027 }
1028
1029 void bdrv_flush_all(void)
1030 {
1031     BlockDriverState *bs;
1032
1033     for (bs = bdrv_first; bs != NULL; bs = bs->next)
1034         if (bs->drv && !bdrv_is_read_only(bs) && 
1035             (!bdrv_is_removable(bs) || bdrv_is_inserted(bs)))
1036             bdrv_flush(bs);
1037 }
1038
1039 /*
1040  * Returns true iff the specified sector is present in the disk image. Drivers
1041  * not implementing the functionality are assumed to not support backing files,
1042  * hence all their sectors are reported as allocated.
1043  *
1044  * 'pnum' is set to the number of sectors (including and immediately following
1045  * the specified sector) that are known to be in the same
1046  * allocated/unallocated state.
1047  *
1048  * 'nb_sectors' is the max value 'pnum' should be set to.
1049  */
1050 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1051         int *pnum)
1052 {
1053     int64_t n;
1054     if (!bs->drv->bdrv_is_allocated) {
1055         if (sector_num >= bs->total_sectors) {
1056             *pnum = 0;
1057             return 0;
1058         }
1059         n = bs->total_sectors - sector_num;
1060         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1061         return 1;
1062     }
1063     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1064 }
1065
1066 void bdrv_info(Monitor *mon)
1067 {
1068     BlockDriverState *bs;
1069
1070     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1071         monitor_printf(mon, "%s:", bs->device_name);
1072         monitor_printf(mon, " type=");
1073         switch(bs->type) {
1074         case BDRV_TYPE_HD:
1075             monitor_printf(mon, "hd");
1076             break;
1077         case BDRV_TYPE_CDROM:
1078             monitor_printf(mon, "cdrom");
1079             break;
1080         case BDRV_TYPE_FLOPPY:
1081             monitor_printf(mon, "floppy");
1082             break;
1083         }
1084         monitor_printf(mon, " removable=%d", bs->removable);
1085         if (bs->removable) {
1086             monitor_printf(mon, " locked=%d", bs->locked);
1087         }
1088         if (bs->drv) {
1089             monitor_printf(mon, " file=");
1090             monitor_print_filename(mon, bs->filename);
1091             if (bs->backing_file[0] != '\0') {
1092                 monitor_printf(mon, " backing_file=");
1093                 monitor_print_filename(mon, bs->backing_file);
1094             }
1095             monitor_printf(mon, " ro=%d", bs->read_only);
1096             monitor_printf(mon, " drv=%s", bs->drv->format_name);
1097             monitor_printf(mon, " encrypted=%d", bdrv_is_encrypted(bs));
1098         } else {
1099             monitor_printf(mon, " [not inserted]");
1100         }
1101         monitor_printf(mon, "\n");
1102     }
1103 }
1104
1105 /* The "info blockstats" command. */
1106 void bdrv_info_stats(Monitor *mon)
1107 {
1108     BlockDriverState *bs;
1109
1110     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1111         monitor_printf(mon, "%s:"
1112                        " rd_bytes=%" PRIu64
1113                        " wr_bytes=%" PRIu64
1114                        " rd_operations=%" PRIu64
1115                        " wr_operations=%" PRIu64
1116                        "\n",
1117                        bs->device_name,
1118                        bs->rd_bytes, bs->wr_bytes,
1119                        bs->rd_ops, bs->wr_ops);
1120     }
1121 }
1122
1123 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1124 {
1125     if (bs->backing_hd && bs->backing_hd->encrypted)
1126         return bs->backing_file;
1127     else if (bs->encrypted)
1128         return bs->filename;
1129     else
1130         return NULL;
1131 }
1132
1133 void bdrv_get_backing_filename(BlockDriverState *bs,
1134                                char *filename, int filename_size)
1135 {
1136     if (!bs->backing_hd) {
1137         pstrcpy(filename, filename_size, "");
1138     } else {
1139         pstrcpy(filename, filename_size, bs->backing_file);
1140     }
1141 }
1142
1143 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1144                           const uint8_t *buf, int nb_sectors)
1145 {
1146     BlockDriver *drv = bs->drv;
1147     if (!drv)
1148         return -ENOMEDIUM;
1149     if (!drv->bdrv_write_compressed)
1150         return -ENOTSUP;
1151     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1152 }
1153
1154 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1155 {
1156     BlockDriver *drv = bs->drv;
1157     if (!drv)
1158         return -ENOMEDIUM;
1159     if (!drv->bdrv_get_info)
1160         return -ENOTSUP;
1161     memset(bdi, 0, sizeof(*bdi));
1162     return drv->bdrv_get_info(bs, bdi);
1163 }
1164
1165 int bdrv_put_buffer(BlockDriverState *bs, const uint8_t *buf, int64_t pos, int size)
1166 {
1167     BlockDriver *drv = bs->drv;
1168     if (!drv)
1169         return -ENOMEDIUM;
1170     if (!drv->bdrv_put_buffer)
1171         return -ENOTSUP;
1172     return drv->bdrv_put_buffer(bs, buf, pos, size);
1173 }
1174
1175 int bdrv_get_buffer(BlockDriverState *bs, uint8_t *buf, int64_t pos, int size)
1176 {
1177     BlockDriver *drv = bs->drv;
1178     if (!drv)
1179         return -ENOMEDIUM;
1180     if (!drv->bdrv_get_buffer)
1181         return -ENOTSUP;
1182     return drv->bdrv_get_buffer(bs, buf, pos, size);
1183 }
1184
1185 /**************************************************************/
1186 /* handling of snapshots */
1187
1188 int bdrv_snapshot_create(BlockDriverState *bs,
1189                          QEMUSnapshotInfo *sn_info)
1190 {
1191     BlockDriver *drv = bs->drv;
1192     if (!drv)
1193         return -ENOMEDIUM;
1194     if (!drv->bdrv_snapshot_create)
1195         return -ENOTSUP;
1196     return drv->bdrv_snapshot_create(bs, sn_info);
1197 }
1198
1199 int bdrv_snapshot_goto(BlockDriverState *bs,
1200                        const char *snapshot_id)
1201 {
1202     BlockDriver *drv = bs->drv;
1203     if (!drv)
1204         return -ENOMEDIUM;
1205     if (!drv->bdrv_snapshot_goto)
1206         return -ENOTSUP;
1207     return drv->bdrv_snapshot_goto(bs, snapshot_id);
1208 }
1209
1210 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1211 {
1212     BlockDriver *drv = bs->drv;
1213     if (!drv)
1214         return -ENOMEDIUM;
1215     if (!drv->bdrv_snapshot_delete)
1216         return -ENOTSUP;
1217     return drv->bdrv_snapshot_delete(bs, snapshot_id);
1218 }
1219
1220 int bdrv_snapshot_list(BlockDriverState *bs,
1221                        QEMUSnapshotInfo **psn_info)
1222 {
1223     BlockDriver *drv = bs->drv;
1224     if (!drv)
1225         return -ENOMEDIUM;
1226     if (!drv->bdrv_snapshot_list)
1227         return -ENOTSUP;
1228     return drv->bdrv_snapshot_list(bs, psn_info);
1229 }
1230
1231 #define NB_SUFFIXES 4
1232
1233 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1234 {
1235     static const char suffixes[NB_SUFFIXES] = "KMGT";
1236     int64_t base;
1237     int i;
1238
1239     if (size <= 999) {
1240         snprintf(buf, buf_size, "%" PRId64, size);
1241     } else {
1242         base = 1024;
1243         for(i = 0; i < NB_SUFFIXES; i++) {
1244             if (size < (10 * base)) {
1245                 snprintf(buf, buf_size, "%0.1f%c",
1246                          (double)size / base,
1247                          suffixes[i]);
1248                 break;
1249             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1250                 snprintf(buf, buf_size, "%" PRId64 "%c",
1251                          ((size + (base >> 1)) / base),
1252                          suffixes[i]);
1253                 break;
1254             }
1255             base = base * 1024;
1256         }
1257     }
1258     return buf;
1259 }
1260
1261 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1262 {
1263     char buf1[128], date_buf[128], clock_buf[128];
1264 #ifdef _WIN32
1265     struct tm *ptm;
1266 #else
1267     struct tm tm;
1268 #endif
1269     time_t ti;
1270     int64_t secs;
1271
1272     if (!sn) {
1273         snprintf(buf, buf_size,
1274                  "%-10s%-20s%7s%20s%15s",
1275                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
1276     } else {
1277         ti = sn->date_sec;
1278 #ifdef _WIN32
1279         ptm = localtime(&ti);
1280         strftime(date_buf, sizeof(date_buf),
1281                  "%Y-%m-%d %H:%M:%S", ptm);
1282 #else
1283         localtime_r(&ti, &tm);
1284         strftime(date_buf, sizeof(date_buf),
1285                  "%Y-%m-%d %H:%M:%S", &tm);
1286 #endif
1287         secs = sn->vm_clock_nsec / 1000000000;
1288         snprintf(clock_buf, sizeof(clock_buf),
1289                  "%02d:%02d:%02d.%03d",
1290                  (int)(secs / 3600),
1291                  (int)((secs / 60) % 60),
1292                  (int)(secs % 60),
1293                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
1294         snprintf(buf, buf_size,
1295                  "%-10s%-20s%7s%20s%15s",
1296                  sn->id_str, sn->name,
1297                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
1298                  date_buf,
1299                  clock_buf);
1300     }
1301     return buf;
1302 }
1303
1304
1305 /**************************************************************/
1306 /* async I/Os */
1307
1308 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1309                                  QEMUIOVector *qiov, int nb_sectors,
1310                                  BlockDriverCompletionFunc *cb, void *opaque)
1311 {
1312     BlockDriver *drv = bs->drv;
1313     BlockDriverAIOCB *ret;
1314
1315     if (!drv)
1316         return NULL;
1317     if (bdrv_check_request(bs, sector_num, nb_sectors))
1318         return NULL;
1319
1320     ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
1321                               cb, opaque);
1322
1323     if (ret) {
1324         /* Update stats even though technically transfer has not happened. */
1325         bs->rd_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1326         bs->rd_ops ++;
1327     }
1328
1329     return ret;
1330 }
1331
1332 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1333                                   QEMUIOVector *qiov, int nb_sectors,
1334                                   BlockDriverCompletionFunc *cb, void *opaque)
1335 {
1336     BlockDriver *drv = bs->drv;
1337     BlockDriverAIOCB *ret;
1338
1339     if (!drv)
1340         return NULL;
1341     if (bs->read_only)
1342         return NULL;
1343     if (bdrv_check_request(bs, sector_num, nb_sectors))
1344         return NULL;
1345
1346     ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
1347                                cb, opaque);
1348
1349     if (ret) {
1350         /* Update stats even though technically transfer has not happened. */
1351         bs->wr_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1352         bs->wr_ops ++;
1353     }
1354
1355     return ret;
1356 }
1357
1358 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
1359 {
1360     acb->pool->cancel(acb);
1361 }
1362
1363
1364 /**************************************************************/
1365 /* async block device emulation */
1366
1367 static void bdrv_aio_bh_cb(void *opaque)
1368 {
1369     BlockDriverAIOCBSync *acb = opaque;
1370
1371     if (!acb->is_write)
1372         qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
1373     qemu_vfree(acb->bounce);
1374     acb->common.cb(acb->common.opaque, acb->ret);
1375
1376     qemu_aio_release(acb);
1377 }
1378
1379 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
1380                                             int64_t sector_num,
1381                                             QEMUIOVector *qiov,
1382                                             int nb_sectors,
1383                                             BlockDriverCompletionFunc *cb,
1384                                             void *opaque,
1385                                             int is_write)
1386
1387 {
1388     BlockDriverAIOCBSync *acb;
1389
1390     acb = qemu_aio_get(bs, cb, opaque);
1391     acb->is_write = is_write;
1392     acb->qiov = qiov;
1393     acb->bounce = qemu_memalign(512, qiov->size);
1394
1395     if (!acb->bh)
1396         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1397
1398     if (is_write) {
1399         qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1400         acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
1401     } else {
1402         acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
1403     }
1404
1405     qemu_bh_schedule(acb->bh);
1406
1407     return &acb->common;
1408 }
1409
1410 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
1411         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1412         BlockDriverCompletionFunc *cb, void *opaque)
1413 {
1414     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
1415 }
1416
1417 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
1418         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1419         BlockDriverCompletionFunc *cb, void *opaque)
1420 {
1421     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
1422 }
1423
1424
1425 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
1426 {
1427     BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
1428     qemu_bh_cancel(acb->bh);
1429     qemu_aio_release(acb);
1430 }
1431
1432 /**************************************************************/
1433 /* sync block device emulation */
1434
1435 static void bdrv_rw_em_cb(void *opaque, int ret)
1436 {
1437     *(int *)opaque = ret;
1438 }
1439
1440 #define NOT_DONE 0x7fffffff
1441
1442 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
1443                         uint8_t *buf, int nb_sectors)
1444 {
1445     int async_ret;
1446     BlockDriverAIOCB *acb;
1447     struct iovec iov;
1448     QEMUIOVector qiov;
1449
1450     async_ret = NOT_DONE;
1451     iov.iov_base = (void *)buf;
1452     iov.iov_len = nb_sectors * 512;
1453     qemu_iovec_init_external(&qiov, &iov, 1);
1454     acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
1455         bdrv_rw_em_cb, &async_ret);
1456     if (acb == NULL)
1457         return -1;
1458
1459     while (async_ret == NOT_DONE) {
1460         qemu_aio_wait();
1461     }
1462
1463     return async_ret;
1464 }
1465
1466 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
1467                          const uint8_t *buf, int nb_sectors)
1468 {
1469     int async_ret;
1470     BlockDriverAIOCB *acb;
1471     struct iovec iov;
1472     QEMUIOVector qiov;
1473
1474     async_ret = NOT_DONE;
1475     iov.iov_base = (void *)buf;
1476     iov.iov_len = nb_sectors * 512;
1477     qemu_iovec_init_external(&qiov, &iov, 1);
1478     acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
1479         bdrv_rw_em_cb, &async_ret);
1480     if (acb == NULL)
1481         return -1;
1482     while (async_ret == NOT_DONE) {
1483         qemu_aio_wait();
1484     }
1485     return async_ret;
1486 }
1487
1488 void bdrv_init(void)
1489 {
1490     bdrv_register(&bdrv_raw);
1491     bdrv_register(&bdrv_host_device);
1492 #ifndef _WIN32
1493     bdrv_register(&bdrv_cow);
1494 #endif
1495     bdrv_register(&bdrv_qcow);
1496     bdrv_register(&bdrv_vmdk);
1497     bdrv_register(&bdrv_cloop);
1498     bdrv_register(&bdrv_dmg);
1499     bdrv_register(&bdrv_bochs);
1500     bdrv_register(&bdrv_vpc);
1501     bdrv_register(&bdrv_vvfat);
1502     bdrv_register(&bdrv_qcow2);
1503     bdrv_register(&bdrv_parallels);
1504     bdrv_register(&bdrv_nbd);
1505 }
1506
1507 void aio_pool_init(AIOPool *pool, int aiocb_size,
1508                    void (*cancel)(BlockDriverAIOCB *acb))
1509 {
1510     pool->aiocb_size = aiocb_size;
1511     pool->cancel = cancel;
1512     pool->free_aiocb = NULL;
1513 }
1514
1515 void *qemu_aio_get_pool(AIOPool *pool, BlockDriverState *bs,
1516                         BlockDriverCompletionFunc *cb, void *opaque)
1517 {
1518     BlockDriverAIOCB *acb;
1519
1520     if (pool->free_aiocb) {
1521         acb = pool->free_aiocb;
1522         pool->free_aiocb = acb->next;
1523     } else {
1524         acb = qemu_mallocz(pool->aiocb_size);
1525         acb->pool = pool;
1526     }
1527     acb->bs = bs;
1528     acb->cb = cb;
1529     acb->opaque = opaque;
1530     return acb;
1531 }
1532
1533 void *qemu_aio_get(BlockDriverState *bs, BlockDriverCompletionFunc *cb,
1534                    void *opaque)
1535 {
1536     return qemu_aio_get_pool(&bs->drv->aio_pool, bs, cb, opaque);
1537 }
1538
1539 void qemu_aio_release(void *p)
1540 {
1541     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
1542     AIOPool *pool = acb->pool;
1543     acb->next = pool->free_aiocb;
1544     pool->free_aiocb = acb;
1545 }
1546
1547 /**************************************************************/
1548 /* removable device support */
1549
1550 /**
1551  * Return TRUE if the media is present
1552  */
1553 int bdrv_is_inserted(BlockDriverState *bs)
1554 {
1555     BlockDriver *drv = bs->drv;
1556     int ret;
1557     if (!drv)
1558         return 0;
1559     if (!drv->bdrv_is_inserted)
1560         return 1;
1561     ret = drv->bdrv_is_inserted(bs);
1562     return ret;
1563 }
1564
1565 /**
1566  * Return TRUE if the media changed since the last call to this
1567  * function. It is currently only used for floppy disks
1568  */
1569 int bdrv_media_changed(BlockDriverState *bs)
1570 {
1571     BlockDriver *drv = bs->drv;
1572     int ret;
1573
1574     if (!drv || !drv->bdrv_media_changed)
1575         ret = -ENOTSUP;
1576     else
1577         ret = drv->bdrv_media_changed(bs);
1578     if (ret == -ENOTSUP)
1579         ret = bs->media_changed;
1580     bs->media_changed = 0;
1581     return ret;
1582 }
1583
1584 /**
1585  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
1586  */
1587 void bdrv_eject(BlockDriverState *bs, int eject_flag)
1588 {
1589     BlockDriver *drv = bs->drv;
1590     int ret;
1591
1592     if (!drv || !drv->bdrv_eject) {
1593         ret = -ENOTSUP;
1594     } else {
1595         ret = drv->bdrv_eject(bs, eject_flag);
1596     }
1597     if (ret == -ENOTSUP) {
1598         if (eject_flag)
1599             bdrv_close(bs);
1600     }
1601 }
1602
1603 int bdrv_is_locked(BlockDriverState *bs)
1604 {
1605     return bs->locked;
1606 }
1607
1608 /**
1609  * Lock or unlock the media (if it is locked, the user won't be able
1610  * to eject it manually).
1611  */
1612 void bdrv_set_locked(BlockDriverState *bs, int locked)
1613 {
1614     BlockDriver *drv = bs->drv;
1615
1616     bs->locked = locked;
1617     if (drv && drv->bdrv_set_locked) {
1618         drv->bdrv_set_locked(bs, locked);
1619     }
1620 }
1621
1622 /* needed for generic scsi interface */
1623
1624 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1625 {
1626     BlockDriver *drv = bs->drv;
1627
1628     if (drv && drv->bdrv_ioctl)
1629         return drv->bdrv_ioctl(bs, req, buf);
1630     return -ENOTSUP;
1631 }
1632
1633 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
1634         unsigned long int req, void *buf,
1635         BlockDriverCompletionFunc *cb, void *opaque)
1636 {
1637     BlockDriver *drv = bs->drv;
1638
1639     if (drv && drv->bdrv_aio_ioctl)
1640         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
1641     return NULL;
1642 }