kvm: Drop kvm_patch_opcode_byte (Jan Kiszka)
[qemu] / block.c
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #ifdef HOST_BSD
26 /* include native header before sys-queue.h */
27 #include <sys/queue.h>
28 #endif
29
30 #include "qemu-common.h"
31 #include "monitor.h"
32 #include "block_int.h"
33
34 #ifdef HOST_BSD
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/ioctl.h>
38 #ifndef __DragonFly__
39 #include <sys/disk.h>
40 #endif
41 #endif
42
43 #ifdef _WIN32
44 #include <windows.h>
45 #endif
46
47 #define SECTOR_BITS 9
48 #define SECTOR_SIZE (1 << SECTOR_BITS)
49
50 static AIOPool vectored_aio_pool;
51
52 typedef struct BlockDriverAIOCBSync {
53     BlockDriverAIOCB common;
54     QEMUBH *bh;
55     int ret;
56 } BlockDriverAIOCBSync;
57
58 static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
59         int64_t sector_num, uint8_t *buf, int nb_sectors,
60         BlockDriverCompletionFunc *cb, void *opaque);
61 static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
62         int64_t sector_num, const uint8_t *buf, int nb_sectors,
63         BlockDriverCompletionFunc *cb, void *opaque);
64 static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb);
65 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
66                         uint8_t *buf, int nb_sectors);
67 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
68                          const uint8_t *buf, int nb_sectors);
69
70 BlockDriverState *bdrv_first;
71
72 static BlockDriver *first_drv;
73
74 int path_is_absolute(const char *path)
75 {
76     const char *p;
77 #ifdef _WIN32
78     /* specific case for names like: "\\.\d:" */
79     if (*path == '/' || *path == '\\')
80         return 1;
81 #endif
82     p = strchr(path, ':');
83     if (p)
84         p++;
85     else
86         p = path;
87 #ifdef _WIN32
88     return (*p == '/' || *p == '\\');
89 #else
90     return (*p == '/');
91 #endif
92 }
93
94 /* if filename is absolute, just copy it to dest. Otherwise, build a
95    path to it by considering it is relative to base_path. URL are
96    supported. */
97 void path_combine(char *dest, int dest_size,
98                   const char *base_path,
99                   const char *filename)
100 {
101     const char *p, *p1;
102     int len;
103
104     if (dest_size <= 0)
105         return;
106     if (path_is_absolute(filename)) {
107         pstrcpy(dest, dest_size, filename);
108     } else {
109         p = strchr(base_path, ':');
110         if (p)
111             p++;
112         else
113             p = base_path;
114         p1 = strrchr(base_path, '/');
115 #ifdef _WIN32
116         {
117             const char *p2;
118             p2 = strrchr(base_path, '\\');
119             if (!p1 || p2 > p1)
120                 p1 = p2;
121         }
122 #endif
123         if (p1)
124             p1++;
125         else
126             p1 = base_path;
127         if (p1 > p)
128             p = p1;
129         len = p - base_path;
130         if (len > dest_size - 1)
131             len = dest_size - 1;
132         memcpy(dest, base_path, len);
133         dest[len] = '\0';
134         pstrcat(dest, dest_size, filename);
135     }
136 }
137
138
139 static void bdrv_register(BlockDriver *bdrv)
140 {
141     if (!bdrv->bdrv_aio_read) {
142         /* add AIO emulation layer */
143         bdrv->bdrv_aio_read = bdrv_aio_read_em;
144         bdrv->bdrv_aio_write = bdrv_aio_write_em;
145         bdrv->bdrv_aio_cancel = bdrv_aio_cancel_em;
146         bdrv->aiocb_size = sizeof(BlockDriverAIOCBSync);
147     } else if (!bdrv->bdrv_read) {
148         /* add synchronous IO emulation layer */
149         bdrv->bdrv_read = bdrv_read_em;
150         bdrv->bdrv_write = bdrv_write_em;
151     }
152     aio_pool_init(&bdrv->aio_pool, bdrv->aiocb_size, bdrv->bdrv_aio_cancel);
153     bdrv->next = first_drv;
154     first_drv = bdrv;
155 }
156
157 /* create a new block device (by default it is empty) */
158 BlockDriverState *bdrv_new(const char *device_name)
159 {
160     BlockDriverState **pbs, *bs;
161
162     bs = qemu_mallocz(sizeof(BlockDriverState));
163     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
164     if (device_name[0] != '\0') {
165         /* insert at the end */
166         pbs = &bdrv_first;
167         while (*pbs != NULL)
168             pbs = &(*pbs)->next;
169         *pbs = bs;
170     }
171     return bs;
172 }
173
174 BlockDriver *bdrv_find_format(const char *format_name)
175 {
176     BlockDriver *drv1;
177     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
178         if (!strcmp(drv1->format_name, format_name))
179             return drv1;
180     }
181     return NULL;
182 }
183
184 int bdrv_create(BlockDriver *drv,
185                 const char *filename, int64_t size_in_sectors,
186                 const char *backing_file, int flags)
187 {
188     if (!drv->bdrv_create)
189         return -ENOTSUP;
190     return drv->bdrv_create(filename, size_in_sectors, backing_file, flags);
191 }
192
193 #ifdef _WIN32
194 void get_tmp_filename(char *filename, int size)
195 {
196     char temp_dir[MAX_PATH];
197
198     GetTempPath(MAX_PATH, temp_dir);
199     GetTempFileName(temp_dir, "qem", 0, filename);
200 }
201 #else
202 void get_tmp_filename(char *filename, int size)
203 {
204     int fd;
205     const char *tmpdir;
206     /* XXX: race condition possible */
207     tmpdir = getenv("TMPDIR");
208     if (!tmpdir)
209         tmpdir = "/tmp";
210     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
211     fd = mkstemp(filename);
212     close(fd);
213 }
214 #endif
215
216 #ifdef _WIN32
217 static int is_windows_drive_prefix(const char *filename)
218 {
219     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
220              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
221             filename[1] == ':');
222 }
223
224 static int is_windows_drive(const char *filename)
225 {
226     if (is_windows_drive_prefix(filename) &&
227         filename[2] == '\0')
228         return 1;
229     if (strstart(filename, "\\\\.\\", NULL) ||
230         strstart(filename, "//./", NULL))
231         return 1;
232     return 0;
233 }
234 #endif
235
236 static BlockDriver *find_protocol(const char *filename)
237 {
238     BlockDriver *drv1;
239     char protocol[128];
240     int len;
241     const char *p;
242
243 #ifdef _WIN32
244     if (is_windows_drive(filename) ||
245         is_windows_drive_prefix(filename))
246         return &bdrv_raw;
247 #endif
248     p = strchr(filename, ':');
249     if (!p)
250         return &bdrv_raw;
251     len = p - filename;
252     if (len > sizeof(protocol) - 1)
253         len = sizeof(protocol) - 1;
254     memcpy(protocol, filename, len);
255     protocol[len] = '\0';
256     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
257         if (drv1->protocol_name &&
258             !strcmp(drv1->protocol_name, protocol))
259             return drv1;
260     }
261     return NULL;
262 }
263
264 /* XXX: force raw format if block or character device ? It would
265    simplify the BSD case */
266 static BlockDriver *find_image_format(const char *filename)
267 {
268     int ret, score, score_max;
269     BlockDriver *drv1, *drv;
270     uint8_t buf[2048];
271     BlockDriverState *bs;
272
273     /* detect host devices. By convention, /dev/cdrom[N] is always
274        recognized as a host CDROM */
275     if (strstart(filename, "/dev/cdrom", NULL))
276         return &bdrv_host_device;
277 #ifdef _WIN32
278     if (is_windows_drive(filename))
279         return &bdrv_host_device;
280 #else
281     {
282         struct stat st;
283         if (stat(filename, &st) >= 0 &&
284             (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
285             return &bdrv_host_device;
286         }
287     }
288 #endif
289
290     drv = find_protocol(filename);
291     /* no need to test disk image formats for vvfat */
292     if (drv == &bdrv_vvfat)
293         return drv;
294
295     ret = bdrv_file_open(&bs, filename, BDRV_O_RDONLY);
296     if (ret < 0)
297         return NULL;
298     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
299     bdrv_delete(bs);
300     if (ret < 0) {
301         return NULL;
302     }
303
304     score_max = 0;
305     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
306         if (drv1->bdrv_probe) {
307             score = drv1->bdrv_probe(buf, ret, filename);
308             if (score > score_max) {
309                 score_max = score;
310                 drv = drv1;
311             }
312         }
313     }
314     return drv;
315 }
316
317 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
318 {
319     BlockDriverState *bs;
320     int ret;
321
322     bs = bdrv_new("");
323     ret = bdrv_open2(bs, filename, flags | BDRV_O_FILE, NULL);
324     if (ret < 0) {
325         bdrv_delete(bs);
326         return ret;
327     }
328     bs->growable = 1;
329     *pbs = bs;
330     return 0;
331 }
332
333 int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
334 {
335     return bdrv_open2(bs, filename, flags, NULL);
336 }
337
338 int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
339                BlockDriver *drv)
340 {
341     int ret, open_flags;
342     char tmp_filename[PATH_MAX];
343     char backing_filename[PATH_MAX];
344
345     bs->read_only = 0;
346     bs->is_temporary = 0;
347     bs->encrypted = 0;
348     bs->valid_key = 0;
349
350     if (flags & BDRV_O_SNAPSHOT) {
351         BlockDriverState *bs1;
352         int64_t total_size;
353         int is_protocol = 0;
354
355         /* if snapshot, we create a temporary backing file and open it
356            instead of opening 'filename' directly */
357
358         /* if there is a backing file, use it */
359         bs1 = bdrv_new("");
360         ret = bdrv_open(bs1, filename, 0);
361         if (ret < 0) {
362             bdrv_delete(bs1);
363             return ret;
364         }
365         total_size = bdrv_getlength(bs1) >> SECTOR_BITS;
366
367         if (bs1->drv && bs1->drv->protocol_name)
368             is_protocol = 1;
369
370         bdrv_delete(bs1);
371
372         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
373
374         /* Real path is meaningless for protocols */
375         if (is_protocol)
376             snprintf(backing_filename, sizeof(backing_filename),
377                      "%s", filename);
378         else
379             realpath(filename, backing_filename);
380
381         ret = bdrv_create(&bdrv_qcow2, tmp_filename,
382                           total_size, backing_filename, 0);
383         if (ret < 0) {
384             return ret;
385         }
386         filename = tmp_filename;
387         bs->is_temporary = 1;
388     }
389
390     pstrcpy(bs->filename, sizeof(bs->filename), filename);
391     if (flags & BDRV_O_FILE) {
392         drv = find_protocol(filename);
393     } else if (!drv) {
394         drv = find_image_format(filename);
395     }
396     if (!drv) {
397         ret = -ENOENT;
398         goto unlink_and_fail;
399     }
400     bs->drv = drv;
401     bs->opaque = qemu_mallocz(drv->instance_size);
402     /* Note: for compatibility, we open disk image files as RDWR, and
403        RDONLY as fallback */
404     if (!(flags & BDRV_O_FILE))
405         open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK);
406     else
407         open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
408     ret = drv->bdrv_open(bs, filename, open_flags);
409     if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
410         ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
411         bs->read_only = 1;
412     }
413     if (ret < 0) {
414         qemu_free(bs->opaque);
415         bs->opaque = NULL;
416         bs->drv = NULL;
417     unlink_and_fail:
418         if (bs->is_temporary)
419             unlink(filename);
420         return ret;
421     }
422     if (drv->bdrv_getlength) {
423         bs->total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
424     }
425 #ifndef _WIN32
426     if (bs->is_temporary) {
427         unlink(filename);
428     }
429 #endif
430     if (bs->backing_file[0] != '\0') {
431         /* if there is a backing file, use it */
432         bs->backing_hd = bdrv_new("");
433         path_combine(backing_filename, sizeof(backing_filename),
434                      filename, bs->backing_file);
435         ret = bdrv_open(bs->backing_hd, backing_filename, open_flags);
436         if (ret < 0) {
437             bdrv_close(bs);
438             return ret;
439         }
440     }
441
442     if (!bdrv_key_required(bs)) {
443         /* call the change callback */
444         bs->media_changed = 1;
445         if (bs->change_cb)
446             bs->change_cb(bs->change_opaque);
447     }
448     return 0;
449 }
450
451 void bdrv_close(BlockDriverState *bs)
452 {
453     if (bs->drv) {
454         if (bs->backing_hd)
455             bdrv_delete(bs->backing_hd);
456         bs->drv->bdrv_close(bs);
457         qemu_free(bs->opaque);
458 #ifdef _WIN32
459         if (bs->is_temporary) {
460             unlink(bs->filename);
461         }
462 #endif
463         bs->opaque = NULL;
464         bs->drv = NULL;
465
466         /* call the change callback */
467         bs->media_changed = 1;
468         if (bs->change_cb)
469             bs->change_cb(bs->change_opaque);
470     }
471 }
472
473 void bdrv_delete(BlockDriverState *bs)
474 {
475     BlockDriverState **pbs;
476
477     pbs = &bdrv_first;
478     while (*pbs != bs && *pbs != NULL)
479         pbs = &(*pbs)->next;
480     if (*pbs == bs)
481         *pbs = bs->next;
482
483     bdrv_close(bs);
484     qemu_free(bs);
485 }
486
487 /* commit COW file into the raw image */
488 int bdrv_commit(BlockDriverState *bs)
489 {
490     BlockDriver *drv = bs->drv;
491     int64_t i, total_sectors;
492     int n, j;
493     unsigned char sector[512];
494
495     if (!drv)
496         return -ENOMEDIUM;
497
498     if (bs->read_only) {
499         return -EACCES;
500     }
501
502     if (!bs->backing_hd) {
503         return -ENOTSUP;
504     }
505
506     total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
507     for (i = 0; i < total_sectors;) {
508         if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
509             for(j = 0; j < n; j++) {
510                 if (bdrv_read(bs, i, sector, 1) != 0) {
511                     return -EIO;
512                 }
513
514                 if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
515                     return -EIO;
516                 }
517                 i++;
518             }
519         } else {
520             i += n;
521         }
522     }
523
524     if (drv->bdrv_make_empty)
525         return drv->bdrv_make_empty(bs);
526
527     return 0;
528 }
529
530 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
531                                    size_t size)
532 {
533     int64_t len;
534
535     if (!bdrv_is_inserted(bs))
536         return -ENOMEDIUM;
537
538     if (bs->growable)
539         return 0;
540
541     len = bdrv_getlength(bs);
542
543     if ((offset + size) > len)
544         return -EIO;
545
546     return 0;
547 }
548
549 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
550                               int nb_sectors)
551 {
552     int64_t offset;
553
554     /* Deal with byte accesses */
555     if (sector_num < 0)
556         offset = -sector_num;
557     else
558         offset = sector_num * 512;
559
560     return bdrv_check_byte_request(bs, offset, nb_sectors * 512);
561 }
562
563 /* return < 0 if error. See bdrv_write() for the return codes */
564 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
565               uint8_t *buf, int nb_sectors)
566 {
567     BlockDriver *drv = bs->drv;
568
569     if (!drv)
570         return -ENOMEDIUM;
571     if (bdrv_check_request(bs, sector_num, nb_sectors))
572         return -EIO;
573
574     return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
575 }
576
577 /* Return < 0 if error. Important errors are:
578   -EIO         generic I/O error (may happen for all errors)
579   -ENOMEDIUM   No media inserted.
580   -EINVAL      Invalid sector number or nb_sectors
581   -EACCES      Trying to write a read-only device
582 */
583 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
584                const uint8_t *buf, int nb_sectors)
585 {
586     BlockDriver *drv = bs->drv;
587     if (!bs->drv)
588         return -ENOMEDIUM;
589     if (bs->read_only)
590         return -EACCES;
591     if (bdrv_check_request(bs, sector_num, nb_sectors))
592         return -EIO;
593
594     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
595 }
596
597 int bdrv_pread(BlockDriverState *bs, int64_t offset,
598                void *buf, int count1)
599 {
600     uint8_t tmp_buf[SECTOR_SIZE];
601     int len, nb_sectors, count;
602     int64_t sector_num;
603
604     count = count1;
605     /* first read to align to sector start */
606     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
607     if (len > count)
608         len = count;
609     sector_num = offset >> SECTOR_BITS;
610     if (len > 0) {
611         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
612             return -EIO;
613         memcpy(buf, tmp_buf + (offset & (SECTOR_SIZE - 1)), len);
614         count -= len;
615         if (count == 0)
616             return count1;
617         sector_num++;
618         buf += len;
619     }
620
621     /* read the sectors "in place" */
622     nb_sectors = count >> SECTOR_BITS;
623     if (nb_sectors > 0) {
624         if (bdrv_read(bs, sector_num, buf, nb_sectors) < 0)
625             return -EIO;
626         sector_num += nb_sectors;
627         len = nb_sectors << SECTOR_BITS;
628         buf += len;
629         count -= len;
630     }
631
632     /* add data from the last sector */
633     if (count > 0) {
634         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
635             return -EIO;
636         memcpy(buf, tmp_buf, count);
637     }
638     return count1;
639 }
640
641 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
642                 const void *buf, int count1)
643 {
644     uint8_t tmp_buf[SECTOR_SIZE];
645     int len, nb_sectors, count;
646     int64_t sector_num;
647
648     count = count1;
649     /* first write to align to sector start */
650     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
651     if (len > count)
652         len = count;
653     sector_num = offset >> SECTOR_BITS;
654     if (len > 0) {
655         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
656             return -EIO;
657         memcpy(tmp_buf + (offset & (SECTOR_SIZE - 1)), buf, len);
658         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
659             return -EIO;
660         count -= len;
661         if (count == 0)
662             return count1;
663         sector_num++;
664         buf += len;
665     }
666
667     /* write the sectors "in place" */
668     nb_sectors = count >> SECTOR_BITS;
669     if (nb_sectors > 0) {
670         if (bdrv_write(bs, sector_num, buf, nb_sectors) < 0)
671             return -EIO;
672         sector_num += nb_sectors;
673         len = nb_sectors << SECTOR_BITS;
674         buf += len;
675         count -= len;
676     }
677
678     /* add data from the last sector */
679     if (count > 0) {
680         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
681             return -EIO;
682         memcpy(tmp_buf, buf, count);
683         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
684             return -EIO;
685     }
686     return count1;
687 }
688
689 /**
690  * Truncate file to 'offset' bytes (needed only for file protocols)
691  */
692 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
693 {
694     BlockDriver *drv = bs->drv;
695     if (!drv)
696         return -ENOMEDIUM;
697     if (!drv->bdrv_truncate)
698         return -ENOTSUP;
699     return drv->bdrv_truncate(bs, offset);
700 }
701
702 /**
703  * Length of a file in bytes. Return < 0 if error or unknown.
704  */
705 int64_t bdrv_getlength(BlockDriverState *bs)
706 {
707     BlockDriver *drv = bs->drv;
708     if (!drv)
709         return -ENOMEDIUM;
710     if (!drv->bdrv_getlength) {
711         /* legacy mode */
712         return bs->total_sectors * SECTOR_SIZE;
713     }
714     return drv->bdrv_getlength(bs);
715 }
716
717 /* return 0 as number of sectors if no device present or error */
718 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
719 {
720     int64_t length;
721     length = bdrv_getlength(bs);
722     if (length < 0)
723         length = 0;
724     else
725         length = length >> SECTOR_BITS;
726     *nb_sectors_ptr = length;
727 }
728
729 struct partition {
730         uint8_t boot_ind;           /* 0x80 - active */
731         uint8_t head;               /* starting head */
732         uint8_t sector;             /* starting sector */
733         uint8_t cyl;                /* starting cylinder */
734         uint8_t sys_ind;            /* What partition type */
735         uint8_t end_head;           /* end head */
736         uint8_t end_sector;         /* end sector */
737         uint8_t end_cyl;            /* end cylinder */
738         uint32_t start_sect;        /* starting sector counting from 0 */
739         uint32_t nr_sects;          /* nr of sectors in partition */
740 } __attribute__((packed));
741
742 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
743 static int guess_disk_lchs(BlockDriverState *bs,
744                            int *pcylinders, int *pheads, int *psectors)
745 {
746     uint8_t buf[512];
747     int ret, i, heads, sectors, cylinders;
748     struct partition *p;
749     uint32_t nr_sects;
750     uint64_t nb_sectors;
751
752     bdrv_get_geometry(bs, &nb_sectors);
753
754     ret = bdrv_read(bs, 0, buf, 1);
755     if (ret < 0)
756         return -1;
757     /* test msdos magic */
758     if (buf[510] != 0x55 || buf[511] != 0xaa)
759         return -1;
760     for(i = 0; i < 4; i++) {
761         p = ((struct partition *)(buf + 0x1be)) + i;
762         nr_sects = le32_to_cpu(p->nr_sects);
763         if (nr_sects && p->end_head) {
764             /* We make the assumption that the partition terminates on
765                a cylinder boundary */
766             heads = p->end_head + 1;
767             sectors = p->end_sector & 63;
768             if (sectors == 0)
769                 continue;
770             cylinders = nb_sectors / (heads * sectors);
771             if (cylinders < 1 || cylinders > 16383)
772                 continue;
773             *pheads = heads;
774             *psectors = sectors;
775             *pcylinders = cylinders;
776 #if 0
777             printf("guessed geometry: LCHS=%d %d %d\n",
778                    cylinders, heads, sectors);
779 #endif
780             return 0;
781         }
782     }
783     return -1;
784 }
785
786 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
787 {
788     int translation, lba_detected = 0;
789     int cylinders, heads, secs;
790     uint64_t nb_sectors;
791
792     /* if a geometry hint is available, use it */
793     bdrv_get_geometry(bs, &nb_sectors);
794     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
795     translation = bdrv_get_translation_hint(bs);
796     if (cylinders != 0) {
797         *pcyls = cylinders;
798         *pheads = heads;
799         *psecs = secs;
800     } else {
801         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
802             if (heads > 16) {
803                 /* if heads > 16, it means that a BIOS LBA
804                    translation was active, so the default
805                    hardware geometry is OK */
806                 lba_detected = 1;
807                 goto default_geometry;
808             } else {
809                 *pcyls = cylinders;
810                 *pheads = heads;
811                 *psecs = secs;
812                 /* disable any translation to be in sync with
813                    the logical geometry */
814                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
815                     bdrv_set_translation_hint(bs,
816                                               BIOS_ATA_TRANSLATION_NONE);
817                 }
818             }
819         } else {
820         default_geometry:
821             /* if no geometry, use a standard physical disk geometry */
822             cylinders = nb_sectors / (16 * 63);
823
824             if (cylinders > 16383)
825                 cylinders = 16383;
826             else if (cylinders < 2)
827                 cylinders = 2;
828             *pcyls = cylinders;
829             *pheads = 16;
830             *psecs = 63;
831             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
832                 if ((*pcyls * *pheads) <= 131072) {
833                     bdrv_set_translation_hint(bs,
834                                               BIOS_ATA_TRANSLATION_LARGE);
835                 } else {
836                     bdrv_set_translation_hint(bs,
837                                               BIOS_ATA_TRANSLATION_LBA);
838                 }
839             }
840         }
841         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
842     }
843 }
844
845 void bdrv_set_geometry_hint(BlockDriverState *bs,
846                             int cyls, int heads, int secs)
847 {
848     bs->cyls = cyls;
849     bs->heads = heads;
850     bs->secs = secs;
851 }
852
853 void bdrv_set_type_hint(BlockDriverState *bs, int type)
854 {
855     bs->type = type;
856     bs->removable = ((type == BDRV_TYPE_CDROM ||
857                       type == BDRV_TYPE_FLOPPY));
858 }
859
860 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
861 {
862     bs->translation = translation;
863 }
864
865 void bdrv_get_geometry_hint(BlockDriverState *bs,
866                             int *pcyls, int *pheads, int *psecs)
867 {
868     *pcyls = bs->cyls;
869     *pheads = bs->heads;
870     *psecs = bs->secs;
871 }
872
873 int bdrv_get_type_hint(BlockDriverState *bs)
874 {
875     return bs->type;
876 }
877
878 int bdrv_get_translation_hint(BlockDriverState *bs)
879 {
880     return bs->translation;
881 }
882
883 int bdrv_is_removable(BlockDriverState *bs)
884 {
885     return bs->removable;
886 }
887
888 int bdrv_is_read_only(BlockDriverState *bs)
889 {
890     return bs->read_only;
891 }
892
893 int bdrv_is_sg(BlockDriverState *bs)
894 {
895     return bs->sg;
896 }
897
898 /* XXX: no longer used */
899 void bdrv_set_change_cb(BlockDriverState *bs,
900                         void (*change_cb)(void *opaque), void *opaque)
901 {
902     bs->change_cb = change_cb;
903     bs->change_opaque = opaque;
904 }
905
906 int bdrv_is_encrypted(BlockDriverState *bs)
907 {
908     if (bs->backing_hd && bs->backing_hd->encrypted)
909         return 1;
910     return bs->encrypted;
911 }
912
913 int bdrv_key_required(BlockDriverState *bs)
914 {
915     BlockDriverState *backing_hd = bs->backing_hd;
916
917     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
918         return 1;
919     return (bs->encrypted && !bs->valid_key);
920 }
921
922 int bdrv_set_key(BlockDriverState *bs, const char *key)
923 {
924     int ret;
925     if (bs->backing_hd && bs->backing_hd->encrypted) {
926         ret = bdrv_set_key(bs->backing_hd, key);
927         if (ret < 0)
928             return ret;
929         if (!bs->encrypted)
930             return 0;
931     }
932     if (!bs->encrypted || !bs->drv || !bs->drv->bdrv_set_key)
933         return -1;
934     ret = bs->drv->bdrv_set_key(bs, key);
935     if (ret < 0) {
936         bs->valid_key = 0;
937     } else if (!bs->valid_key) {
938         bs->valid_key = 1;
939         /* call the change callback now, we skipped it on open */
940         bs->media_changed = 1;
941         if (bs->change_cb)
942             bs->change_cb(bs->change_opaque);
943     }
944     return ret;
945 }
946
947 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
948 {
949     if (!bs->drv) {
950         buf[0] = '\0';
951     } else {
952         pstrcpy(buf, buf_size, bs->drv->format_name);
953     }
954 }
955
956 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
957                          void *opaque)
958 {
959     BlockDriver *drv;
960
961     for (drv = first_drv; drv != NULL; drv = drv->next) {
962         it(opaque, drv->format_name);
963     }
964 }
965
966 BlockDriverState *bdrv_find(const char *name)
967 {
968     BlockDriverState *bs;
969
970     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
971         if (!strcmp(name, bs->device_name))
972             return bs;
973     }
974     return NULL;
975 }
976
977 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
978 {
979     BlockDriverState *bs;
980
981     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
982         it(opaque, bs);
983     }
984 }
985
986 const char *bdrv_get_device_name(BlockDriverState *bs)
987 {
988     return bs->device_name;
989 }
990
991 void bdrv_flush(BlockDriverState *bs)
992 {
993     if (bs->drv->bdrv_flush)
994         bs->drv->bdrv_flush(bs);
995     if (bs->backing_hd)
996         bdrv_flush(bs->backing_hd);
997 }
998
999 void bdrv_flush_all(void)
1000 {
1001     BlockDriverState *bs;
1002
1003     for (bs = bdrv_first; bs != NULL; bs = bs->next)
1004         if (bs->drv && !bdrv_is_read_only(bs) && 
1005             (!bdrv_is_removable(bs) || bdrv_is_inserted(bs)))
1006             bdrv_flush(bs);
1007 }
1008
1009 /*
1010  * Returns true iff the specified sector is present in the disk image. Drivers
1011  * not implementing the functionality are assumed to not support backing files,
1012  * hence all their sectors are reported as allocated.
1013  *
1014  * 'pnum' is set to the number of sectors (including and immediately following
1015  * the specified sector) that are known to be in the same
1016  * allocated/unallocated state.
1017  *
1018  * 'nb_sectors' is the max value 'pnum' should be set to.
1019  */
1020 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1021         int *pnum)
1022 {
1023     int64_t n;
1024     if (!bs->drv->bdrv_is_allocated) {
1025         if (sector_num >= bs->total_sectors) {
1026             *pnum = 0;
1027             return 0;
1028         }
1029         n = bs->total_sectors - sector_num;
1030         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1031         return 1;
1032     }
1033     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1034 }
1035
1036 void bdrv_info(Monitor *mon)
1037 {
1038     BlockDriverState *bs;
1039
1040     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1041         monitor_printf(mon, "%s:", bs->device_name);
1042         monitor_printf(mon, " type=");
1043         switch(bs->type) {
1044         case BDRV_TYPE_HD:
1045             monitor_printf(mon, "hd");
1046             break;
1047         case BDRV_TYPE_CDROM:
1048             monitor_printf(mon, "cdrom");
1049             break;
1050         case BDRV_TYPE_FLOPPY:
1051             monitor_printf(mon, "floppy");
1052             break;
1053         }
1054         monitor_printf(mon, " removable=%d", bs->removable);
1055         if (bs->removable) {
1056             monitor_printf(mon, " locked=%d", bs->locked);
1057         }
1058         if (bs->drv) {
1059             monitor_printf(mon, " file=");
1060             monitor_print_filename(mon, bs->filename);
1061             if (bs->backing_file[0] != '\0') {
1062                 monitor_printf(mon, " backing_file=");
1063                 monitor_print_filename(mon, bs->backing_file);
1064             }
1065             monitor_printf(mon, " ro=%d", bs->read_only);
1066             monitor_printf(mon, " drv=%s", bs->drv->format_name);
1067             monitor_printf(mon, " encrypted=%d", bdrv_is_encrypted(bs));
1068         } else {
1069             monitor_printf(mon, " [not inserted]");
1070         }
1071         monitor_printf(mon, "\n");
1072     }
1073 }
1074
1075 /* The "info blockstats" command. */
1076 void bdrv_info_stats(Monitor *mon)
1077 {
1078     BlockDriverState *bs;
1079
1080     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1081         monitor_printf(mon, "%s:"
1082                        " rd_bytes=%" PRIu64
1083                        " wr_bytes=%" PRIu64
1084                        " rd_operations=%" PRIu64
1085                        " wr_operations=%" PRIu64
1086                        "\n",
1087                        bs->device_name,
1088                        bs->rd_bytes, bs->wr_bytes,
1089                        bs->rd_ops, bs->wr_ops);
1090     }
1091 }
1092
1093 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1094 {
1095     if (bs->backing_hd && bs->backing_hd->encrypted)
1096         return bs->backing_file;
1097     else if (bs->encrypted)
1098         return bs->filename;
1099     else
1100         return NULL;
1101 }
1102
1103 void bdrv_get_backing_filename(BlockDriverState *bs,
1104                                char *filename, int filename_size)
1105 {
1106     if (!bs->backing_hd) {
1107         pstrcpy(filename, filename_size, "");
1108     } else {
1109         pstrcpy(filename, filename_size, bs->backing_file);
1110     }
1111 }
1112
1113 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1114                           const uint8_t *buf, int nb_sectors)
1115 {
1116     BlockDriver *drv = bs->drv;
1117     if (!drv)
1118         return -ENOMEDIUM;
1119     if (!drv->bdrv_write_compressed)
1120         return -ENOTSUP;
1121     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1122 }
1123
1124 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1125 {
1126     BlockDriver *drv = bs->drv;
1127     if (!drv)
1128         return -ENOMEDIUM;
1129     if (!drv->bdrv_get_info)
1130         return -ENOTSUP;
1131     memset(bdi, 0, sizeof(*bdi));
1132     return drv->bdrv_get_info(bs, bdi);
1133 }
1134
1135 /**************************************************************/
1136 /* handling of snapshots */
1137
1138 int bdrv_snapshot_create(BlockDriverState *bs,
1139                          QEMUSnapshotInfo *sn_info)
1140 {
1141     BlockDriver *drv = bs->drv;
1142     if (!drv)
1143         return -ENOMEDIUM;
1144     if (!drv->bdrv_snapshot_create)
1145         return -ENOTSUP;
1146     return drv->bdrv_snapshot_create(bs, sn_info);
1147 }
1148
1149 int bdrv_snapshot_goto(BlockDriverState *bs,
1150                        const char *snapshot_id)
1151 {
1152     BlockDriver *drv = bs->drv;
1153     if (!drv)
1154         return -ENOMEDIUM;
1155     if (!drv->bdrv_snapshot_goto)
1156         return -ENOTSUP;
1157     return drv->bdrv_snapshot_goto(bs, snapshot_id);
1158 }
1159
1160 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1161 {
1162     BlockDriver *drv = bs->drv;
1163     if (!drv)
1164         return -ENOMEDIUM;
1165     if (!drv->bdrv_snapshot_delete)
1166         return -ENOTSUP;
1167     return drv->bdrv_snapshot_delete(bs, snapshot_id);
1168 }
1169
1170 int bdrv_snapshot_list(BlockDriverState *bs,
1171                        QEMUSnapshotInfo **psn_info)
1172 {
1173     BlockDriver *drv = bs->drv;
1174     if (!drv)
1175         return -ENOMEDIUM;
1176     if (!drv->bdrv_snapshot_list)
1177         return -ENOTSUP;
1178     return drv->bdrv_snapshot_list(bs, psn_info);
1179 }
1180
1181 #define NB_SUFFIXES 4
1182
1183 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1184 {
1185     static const char suffixes[NB_SUFFIXES] = "KMGT";
1186     int64_t base;
1187     int i;
1188
1189     if (size <= 999) {
1190         snprintf(buf, buf_size, "%" PRId64, size);
1191     } else {
1192         base = 1024;
1193         for(i = 0; i < NB_SUFFIXES; i++) {
1194             if (size < (10 * base)) {
1195                 snprintf(buf, buf_size, "%0.1f%c",
1196                          (double)size / base,
1197                          suffixes[i]);
1198                 break;
1199             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1200                 snprintf(buf, buf_size, "%" PRId64 "%c",
1201                          ((size + (base >> 1)) / base),
1202                          suffixes[i]);
1203                 break;
1204             }
1205             base = base * 1024;
1206         }
1207     }
1208     return buf;
1209 }
1210
1211 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1212 {
1213     char buf1[128], date_buf[128], clock_buf[128];
1214 #ifdef _WIN32
1215     struct tm *ptm;
1216 #else
1217     struct tm tm;
1218 #endif
1219     time_t ti;
1220     int64_t secs;
1221
1222     if (!sn) {
1223         snprintf(buf, buf_size,
1224                  "%-10s%-20s%7s%20s%15s",
1225                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
1226     } else {
1227         ti = sn->date_sec;
1228 #ifdef _WIN32
1229         ptm = localtime(&ti);
1230         strftime(date_buf, sizeof(date_buf),
1231                  "%Y-%m-%d %H:%M:%S", ptm);
1232 #else
1233         localtime_r(&ti, &tm);
1234         strftime(date_buf, sizeof(date_buf),
1235                  "%Y-%m-%d %H:%M:%S", &tm);
1236 #endif
1237         secs = sn->vm_clock_nsec / 1000000000;
1238         snprintf(clock_buf, sizeof(clock_buf),
1239                  "%02d:%02d:%02d.%03d",
1240                  (int)(secs / 3600),
1241                  (int)((secs / 60) % 60),
1242                  (int)(secs % 60),
1243                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
1244         snprintf(buf, buf_size,
1245                  "%-10s%-20s%7s%20s%15s",
1246                  sn->id_str, sn->name,
1247                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
1248                  date_buf,
1249                  clock_buf);
1250     }
1251     return buf;
1252 }
1253
1254
1255 /**************************************************************/
1256 /* async I/Os */
1257
1258 typedef struct VectorTranslationAIOCB {
1259     BlockDriverAIOCB common;
1260     QEMUIOVector *iov;
1261     uint8_t *bounce;
1262     int is_write;
1263     BlockDriverAIOCB *aiocb;
1264 } VectorTranslationAIOCB;
1265
1266 static void bdrv_aio_cancel_vector(BlockDriverAIOCB *_acb)
1267 {
1268     VectorTranslationAIOCB *acb
1269         = container_of(_acb, VectorTranslationAIOCB, common);
1270
1271     bdrv_aio_cancel(acb->aiocb);
1272 }
1273
1274 static void bdrv_aio_rw_vector_cb(void *opaque, int ret)
1275 {
1276     VectorTranslationAIOCB *s = (VectorTranslationAIOCB *)opaque;
1277
1278     if (!s->is_write) {
1279         qemu_iovec_from_buffer(s->iov, s->bounce, s->iov->size);
1280     }
1281     qemu_vfree(s->bounce);
1282     s->common.cb(s->common.opaque, ret);
1283     qemu_aio_release(s);
1284 }
1285
1286 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
1287                                             int64_t sector_num,
1288                                             QEMUIOVector *iov,
1289                                             int nb_sectors,
1290                                             BlockDriverCompletionFunc *cb,
1291                                             void *opaque,
1292                                             int is_write)
1293
1294 {
1295     VectorTranslationAIOCB *s = qemu_aio_get_pool(&vectored_aio_pool, bs,
1296                                                   cb, opaque);
1297
1298     s->iov = iov;
1299     s->bounce = qemu_memalign(512, nb_sectors * 512);
1300     s->is_write = is_write;
1301     if (is_write) {
1302         qemu_iovec_to_buffer(s->iov, s->bounce);
1303         s->aiocb = bdrv_aio_write(bs, sector_num, s->bounce, nb_sectors,
1304                                   bdrv_aio_rw_vector_cb, s);
1305     } else {
1306         s->aiocb = bdrv_aio_read(bs, sector_num, s->bounce, nb_sectors,
1307                                  bdrv_aio_rw_vector_cb, s);
1308     }
1309     if (!s->aiocb) {
1310         qemu_vfree(s->bounce);
1311         qemu_aio_release(s);
1312         return NULL;
1313     }
1314     return &s->common;
1315 }
1316
1317 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1318                                  QEMUIOVector *iov, int nb_sectors,
1319                                  BlockDriverCompletionFunc *cb, void *opaque)
1320 {
1321     if (bdrv_check_request(bs, sector_num, nb_sectors))
1322         return NULL;
1323
1324     return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
1325                               cb, opaque, 0);
1326 }
1327
1328 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1329                                   QEMUIOVector *iov, int nb_sectors,
1330                                   BlockDriverCompletionFunc *cb, void *opaque)
1331 {
1332     if (bdrv_check_request(bs, sector_num, nb_sectors))
1333         return NULL;
1334
1335     return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
1336                               cb, opaque, 1);
1337 }
1338
1339 BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
1340                                 uint8_t *buf, int nb_sectors,
1341                                 BlockDriverCompletionFunc *cb, void *opaque)
1342 {
1343     BlockDriver *drv = bs->drv;
1344     BlockDriverAIOCB *ret;
1345
1346     if (!drv)
1347         return NULL;
1348     if (bdrv_check_request(bs, sector_num, nb_sectors))
1349         return NULL;
1350
1351     ret = drv->bdrv_aio_read(bs, sector_num, buf, nb_sectors, cb, opaque);
1352
1353     if (ret) {
1354         /* Update stats even though technically transfer has not happened. */
1355         bs->rd_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1356         bs->rd_ops ++;
1357     }
1358
1359     return ret;
1360 }
1361
1362 BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
1363                                  const uint8_t *buf, int nb_sectors,
1364                                  BlockDriverCompletionFunc *cb, void *opaque)
1365 {
1366     BlockDriver *drv = bs->drv;
1367     BlockDriverAIOCB *ret;
1368
1369     if (!drv)
1370         return NULL;
1371     if (bs->read_only)
1372         return NULL;
1373     if (bdrv_check_request(bs, sector_num, nb_sectors))
1374         return NULL;
1375
1376     ret = drv->bdrv_aio_write(bs, sector_num, buf, nb_sectors, cb, opaque);
1377
1378     if (ret) {
1379         /* Update stats even though technically transfer has not happened. */
1380         bs->wr_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1381         bs->wr_ops ++;
1382     }
1383
1384     return ret;
1385 }
1386
1387 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
1388 {
1389     acb->pool->cancel(acb);
1390 }
1391
1392
1393 /**************************************************************/
1394 /* async block device emulation */
1395
1396 static void bdrv_aio_bh_cb(void *opaque)
1397 {
1398     BlockDriverAIOCBSync *acb = opaque;
1399     acb->common.cb(acb->common.opaque, acb->ret);
1400     qemu_aio_release(acb);
1401 }
1402
1403 static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
1404         int64_t sector_num, uint8_t *buf, int nb_sectors,
1405         BlockDriverCompletionFunc *cb, void *opaque)
1406 {
1407     BlockDriverAIOCBSync *acb;
1408     int ret;
1409
1410     acb = qemu_aio_get(bs, cb, opaque);
1411     if (!acb->bh)
1412         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1413     ret = bdrv_read(bs, sector_num, buf, nb_sectors);
1414     acb->ret = ret;
1415     qemu_bh_schedule(acb->bh);
1416     return &acb->common;
1417 }
1418
1419 static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
1420         int64_t sector_num, const uint8_t *buf, int nb_sectors,
1421         BlockDriverCompletionFunc *cb, void *opaque)
1422 {
1423     BlockDriverAIOCBSync *acb;
1424     int ret;
1425
1426     acb = qemu_aio_get(bs, cb, opaque);
1427     if (!acb->bh)
1428         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1429     ret = bdrv_write(bs, sector_num, buf, nb_sectors);
1430     acb->ret = ret;
1431     qemu_bh_schedule(acb->bh);
1432     return &acb->common;
1433 }
1434
1435 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
1436 {
1437     BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
1438     qemu_bh_cancel(acb->bh);
1439     qemu_aio_release(acb);
1440 }
1441
1442 /**************************************************************/
1443 /* sync block device emulation */
1444
1445 static void bdrv_rw_em_cb(void *opaque, int ret)
1446 {
1447     *(int *)opaque = ret;
1448 }
1449
1450 #define NOT_DONE 0x7fffffff
1451
1452 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
1453                         uint8_t *buf, int nb_sectors)
1454 {
1455     int async_ret;
1456     BlockDriverAIOCB *acb;
1457
1458     async_ret = NOT_DONE;
1459     acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors,
1460                         bdrv_rw_em_cb, &async_ret);
1461     if (acb == NULL)
1462         return -1;
1463
1464     while (async_ret == NOT_DONE) {
1465         qemu_aio_wait();
1466     }
1467
1468     return async_ret;
1469 }
1470
1471 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
1472                          const uint8_t *buf, int nb_sectors)
1473 {
1474     int async_ret;
1475     BlockDriverAIOCB *acb;
1476
1477     async_ret = NOT_DONE;
1478     acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors,
1479                          bdrv_rw_em_cb, &async_ret);
1480     if (acb == NULL)
1481         return -1;
1482     while (async_ret == NOT_DONE) {
1483         qemu_aio_wait();
1484     }
1485     return async_ret;
1486 }
1487
1488 void bdrv_init(void)
1489 {
1490     aio_pool_init(&vectored_aio_pool, sizeof(VectorTranslationAIOCB),
1491                   bdrv_aio_cancel_vector);
1492
1493     bdrv_register(&bdrv_raw);
1494     bdrv_register(&bdrv_host_device);
1495 #ifndef _WIN32
1496     bdrv_register(&bdrv_cow);
1497 #endif
1498     bdrv_register(&bdrv_qcow);
1499     bdrv_register(&bdrv_vmdk);
1500     bdrv_register(&bdrv_cloop);
1501     bdrv_register(&bdrv_dmg);
1502     bdrv_register(&bdrv_bochs);
1503     bdrv_register(&bdrv_vpc);
1504     bdrv_register(&bdrv_vvfat);
1505     bdrv_register(&bdrv_qcow2);
1506     bdrv_register(&bdrv_parallels);
1507     bdrv_register(&bdrv_nbd);
1508 }
1509
1510 void aio_pool_init(AIOPool *pool, int aiocb_size,
1511                    void (*cancel)(BlockDriverAIOCB *acb))
1512 {
1513     pool->aiocb_size = aiocb_size;
1514     pool->cancel = cancel;
1515     pool->free_aiocb = NULL;
1516 }
1517
1518 void *qemu_aio_get_pool(AIOPool *pool, BlockDriverState *bs,
1519                         BlockDriverCompletionFunc *cb, void *opaque)
1520 {
1521     BlockDriverAIOCB *acb;
1522
1523     if (pool->free_aiocb) {
1524         acb = pool->free_aiocb;
1525         pool->free_aiocb = acb->next;
1526     } else {
1527         acb = qemu_mallocz(pool->aiocb_size);
1528         acb->pool = pool;
1529     }
1530     acb->bs = bs;
1531     acb->cb = cb;
1532     acb->opaque = opaque;
1533     return acb;
1534 }
1535
1536 void *qemu_aio_get(BlockDriverState *bs, BlockDriverCompletionFunc *cb,
1537                    void *opaque)
1538 {
1539     return qemu_aio_get_pool(&bs->drv->aio_pool, bs, cb, opaque);
1540 }
1541
1542 void qemu_aio_release(void *p)
1543 {
1544     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
1545     AIOPool *pool = acb->pool;
1546     acb->next = pool->free_aiocb;
1547     pool->free_aiocb = acb;
1548 }
1549
1550 /**************************************************************/
1551 /* removable device support */
1552
1553 /**
1554  * Return TRUE if the media is present
1555  */
1556 int bdrv_is_inserted(BlockDriverState *bs)
1557 {
1558     BlockDriver *drv = bs->drv;
1559     int ret;
1560     if (!drv)
1561         return 0;
1562     if (!drv->bdrv_is_inserted)
1563         return 1;
1564     ret = drv->bdrv_is_inserted(bs);
1565     return ret;
1566 }
1567
1568 /**
1569  * Return TRUE if the media changed since the last call to this
1570  * function. It is currently only used for floppy disks
1571  */
1572 int bdrv_media_changed(BlockDriverState *bs)
1573 {
1574     BlockDriver *drv = bs->drv;
1575     int ret;
1576
1577     if (!drv || !drv->bdrv_media_changed)
1578         ret = -ENOTSUP;
1579     else
1580         ret = drv->bdrv_media_changed(bs);
1581     if (ret == -ENOTSUP)
1582         ret = bs->media_changed;
1583     bs->media_changed = 0;
1584     return ret;
1585 }
1586
1587 /**
1588  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
1589  */
1590 void bdrv_eject(BlockDriverState *bs, int eject_flag)
1591 {
1592     BlockDriver *drv = bs->drv;
1593     int ret;
1594
1595     if (!drv || !drv->bdrv_eject) {
1596         ret = -ENOTSUP;
1597     } else {
1598         ret = drv->bdrv_eject(bs, eject_flag);
1599     }
1600     if (ret == -ENOTSUP) {
1601         if (eject_flag)
1602             bdrv_close(bs);
1603     }
1604 }
1605
1606 int bdrv_is_locked(BlockDriverState *bs)
1607 {
1608     return bs->locked;
1609 }
1610
1611 /**
1612  * Lock or unlock the media (if it is locked, the user won't be able
1613  * to eject it manually).
1614  */
1615 void bdrv_set_locked(BlockDriverState *bs, int locked)
1616 {
1617     BlockDriver *drv = bs->drv;
1618
1619     bs->locked = locked;
1620     if (drv && drv->bdrv_set_locked) {
1621         drv->bdrv_set_locked(bs, locked);
1622     }
1623 }
1624
1625 /* needed for generic scsi interface */
1626
1627 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1628 {
1629     BlockDriver *drv = bs->drv;
1630
1631     if (drv && drv->bdrv_ioctl)
1632         return drv->bdrv_ioctl(bs, req, buf);
1633     return -ENOTSUP;
1634 }
1635
1636 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
1637         unsigned long int req, void *buf,
1638         BlockDriverCompletionFunc *cb, void *opaque)
1639 {
1640     BlockDriver *drv = bs->drv;
1641
1642     if (drv && drv->bdrv_aio_ioctl)
1643         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
1644     return NULL;
1645 }