block: support known backing format for image create and open (Uri Lublin)
[qemu] / block.c
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #ifdef HOST_BSD
26 /* include native header before sys-queue.h */
27 #include <sys/queue.h>
28 #endif
29
30 #include "qemu-common.h"
31 #include "monitor.h"
32 #include "block_int.h"
33
34 #ifdef HOST_BSD
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/ioctl.h>
38 #ifndef __DragonFly__
39 #include <sys/disk.h>
40 #endif
41 #endif
42
43 #ifdef _WIN32
44 #include <windows.h>
45 #endif
46
47 #define SECTOR_BITS 9
48 #define SECTOR_SIZE (1 << SECTOR_BITS)
49
50 static AIOPool vectored_aio_pool;
51
52 typedef struct BlockDriverAIOCBSync {
53     BlockDriverAIOCB common;
54     QEMUBH *bh;
55     int ret;
56 } BlockDriverAIOCBSync;
57
58 static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
59         int64_t sector_num, uint8_t *buf, int nb_sectors,
60         BlockDriverCompletionFunc *cb, void *opaque);
61 static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
62         int64_t sector_num, const uint8_t *buf, int nb_sectors,
63         BlockDriverCompletionFunc *cb, void *opaque);
64 static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb);
65 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
66                         uint8_t *buf, int nb_sectors);
67 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
68                          const uint8_t *buf, int nb_sectors);
69
70 BlockDriverState *bdrv_first;
71
72 static BlockDriver *first_drv;
73
74 int path_is_absolute(const char *path)
75 {
76     const char *p;
77 #ifdef _WIN32
78     /* specific case for names like: "\\.\d:" */
79     if (*path == '/' || *path == '\\')
80         return 1;
81 #endif
82     p = strchr(path, ':');
83     if (p)
84         p++;
85     else
86         p = path;
87 #ifdef _WIN32
88     return (*p == '/' || *p == '\\');
89 #else
90     return (*p == '/');
91 #endif
92 }
93
94 /* if filename is absolute, just copy it to dest. Otherwise, build a
95    path to it by considering it is relative to base_path. URL are
96    supported. */
97 void path_combine(char *dest, int dest_size,
98                   const char *base_path,
99                   const char *filename)
100 {
101     const char *p, *p1;
102     int len;
103
104     if (dest_size <= 0)
105         return;
106     if (path_is_absolute(filename)) {
107         pstrcpy(dest, dest_size, filename);
108     } else {
109         p = strchr(base_path, ':');
110         if (p)
111             p++;
112         else
113             p = base_path;
114         p1 = strrchr(base_path, '/');
115 #ifdef _WIN32
116         {
117             const char *p2;
118             p2 = strrchr(base_path, '\\');
119             if (!p1 || p2 > p1)
120                 p1 = p2;
121         }
122 #endif
123         if (p1)
124             p1++;
125         else
126             p1 = base_path;
127         if (p1 > p)
128             p = p1;
129         len = p - base_path;
130         if (len > dest_size - 1)
131             len = dest_size - 1;
132         memcpy(dest, base_path, len);
133         dest[len] = '\0';
134         pstrcat(dest, dest_size, filename);
135     }
136 }
137
138
139 static void bdrv_register(BlockDriver *bdrv)
140 {
141     if (!bdrv->bdrv_aio_read) {
142         /* add AIO emulation layer */
143         bdrv->bdrv_aio_read = bdrv_aio_read_em;
144         bdrv->bdrv_aio_write = bdrv_aio_write_em;
145         bdrv->bdrv_aio_cancel = bdrv_aio_cancel_em;
146         bdrv->aiocb_size = sizeof(BlockDriverAIOCBSync);
147     } else if (!bdrv->bdrv_read) {
148         /* add synchronous IO emulation layer */
149         bdrv->bdrv_read = bdrv_read_em;
150         bdrv->bdrv_write = bdrv_write_em;
151     }
152     aio_pool_init(&bdrv->aio_pool, bdrv->aiocb_size, bdrv->bdrv_aio_cancel);
153     bdrv->next = first_drv;
154     first_drv = bdrv;
155 }
156
157 /* create a new block device (by default it is empty) */
158 BlockDriverState *bdrv_new(const char *device_name)
159 {
160     BlockDriverState **pbs, *bs;
161
162     bs = qemu_mallocz(sizeof(BlockDriverState));
163     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
164     if (device_name[0] != '\0') {
165         /* insert at the end */
166         pbs = &bdrv_first;
167         while (*pbs != NULL)
168             pbs = &(*pbs)->next;
169         *pbs = bs;
170     }
171     return bs;
172 }
173
174 BlockDriver *bdrv_find_format(const char *format_name)
175 {
176     BlockDriver *drv1;
177     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
178         if (!strcmp(drv1->format_name, format_name))
179             return drv1;
180     }
181     return NULL;
182 }
183
184 int bdrv_create2(BlockDriver *drv,
185                 const char *filename, int64_t size_in_sectors,
186                 const char *backing_file, const char *backing_format,
187                 int flags)
188 {
189     if (drv->bdrv_create2)
190         return drv->bdrv_create2(filename, size_in_sectors, backing_file,
191                                  backing_format, flags);
192     if (drv->bdrv_create)
193         return drv->bdrv_create(filename, size_in_sectors, backing_file,
194                                 flags);
195     return -ENOTSUP;
196 }
197
198 int bdrv_create(BlockDriver *drv,
199                 const char *filename, int64_t size_in_sectors,
200                 const char *backing_file, int flags)
201 {
202     if (!drv->bdrv_create)
203         return -ENOTSUP;
204     return drv->bdrv_create(filename, size_in_sectors, backing_file, flags);
205 }
206
207 #ifdef _WIN32
208 void get_tmp_filename(char *filename, int size)
209 {
210     char temp_dir[MAX_PATH];
211
212     GetTempPath(MAX_PATH, temp_dir);
213     GetTempFileName(temp_dir, "qem", 0, filename);
214 }
215 #else
216 void get_tmp_filename(char *filename, int size)
217 {
218     int fd;
219     const char *tmpdir;
220     /* XXX: race condition possible */
221     tmpdir = getenv("TMPDIR");
222     if (!tmpdir)
223         tmpdir = "/tmp";
224     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
225     fd = mkstemp(filename);
226     close(fd);
227 }
228 #endif
229
230 #ifdef _WIN32
231 static int is_windows_drive_prefix(const char *filename)
232 {
233     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
234              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
235             filename[1] == ':');
236 }
237
238 static int is_windows_drive(const char *filename)
239 {
240     if (is_windows_drive_prefix(filename) &&
241         filename[2] == '\0')
242         return 1;
243     if (strstart(filename, "\\\\.\\", NULL) ||
244         strstart(filename, "//./", NULL))
245         return 1;
246     return 0;
247 }
248 #endif
249
250 static BlockDriver *find_protocol(const char *filename)
251 {
252     BlockDriver *drv1;
253     char protocol[128];
254     int len;
255     const char *p;
256
257 #ifdef _WIN32
258     if (is_windows_drive(filename) ||
259         is_windows_drive_prefix(filename))
260         return &bdrv_raw;
261 #endif
262     p = strchr(filename, ':');
263     if (!p)
264         return &bdrv_raw;
265     len = p - filename;
266     if (len > sizeof(protocol) - 1)
267         len = sizeof(protocol) - 1;
268     memcpy(protocol, filename, len);
269     protocol[len] = '\0';
270     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
271         if (drv1->protocol_name &&
272             !strcmp(drv1->protocol_name, protocol))
273             return drv1;
274     }
275     return NULL;
276 }
277
278 /* XXX: force raw format if block or character device ? It would
279    simplify the BSD case */
280 static BlockDriver *find_image_format(const char *filename)
281 {
282     int ret, score, score_max;
283     BlockDriver *drv1, *drv;
284     uint8_t buf[2048];
285     BlockDriverState *bs;
286
287     /* detect host devices. By convention, /dev/cdrom[N] is always
288        recognized as a host CDROM */
289     if (strstart(filename, "/dev/cdrom", NULL))
290         return &bdrv_host_device;
291 #ifdef _WIN32
292     if (is_windows_drive(filename))
293         return &bdrv_host_device;
294 #else
295     {
296         struct stat st;
297         if (stat(filename, &st) >= 0 &&
298             (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
299             return &bdrv_host_device;
300         }
301     }
302 #endif
303
304     drv = find_protocol(filename);
305     /* no need to test disk image formats for vvfat */
306     if (drv == &bdrv_vvfat)
307         return drv;
308
309     ret = bdrv_file_open(&bs, filename, BDRV_O_RDONLY);
310     if (ret < 0)
311         return NULL;
312     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
313     bdrv_delete(bs);
314     if (ret < 0) {
315         return NULL;
316     }
317
318     score_max = 0;
319     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
320         if (drv1->bdrv_probe) {
321             score = drv1->bdrv_probe(buf, ret, filename);
322             if (score > score_max) {
323                 score_max = score;
324                 drv = drv1;
325             }
326         }
327     }
328     return drv;
329 }
330
331 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
332 {
333     BlockDriverState *bs;
334     int ret;
335
336     bs = bdrv_new("");
337     ret = bdrv_open2(bs, filename, flags | BDRV_O_FILE, NULL);
338     if (ret < 0) {
339         bdrv_delete(bs);
340         return ret;
341     }
342     bs->growable = 1;
343     *pbs = bs;
344     return 0;
345 }
346
347 int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
348 {
349     return bdrv_open2(bs, filename, flags, NULL);
350 }
351
352 int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
353                BlockDriver *drv)
354 {
355     int ret, open_flags;
356     char tmp_filename[PATH_MAX];
357     char backing_filename[PATH_MAX];
358
359     bs->read_only = 0;
360     bs->is_temporary = 0;
361     bs->encrypted = 0;
362     bs->valid_key = 0;
363
364     if (flags & BDRV_O_SNAPSHOT) {
365         BlockDriverState *bs1;
366         int64_t total_size;
367         int is_protocol = 0;
368
369         /* if snapshot, we create a temporary backing file and open it
370            instead of opening 'filename' directly */
371
372         /* if there is a backing file, use it */
373         bs1 = bdrv_new("");
374         ret = bdrv_open2(bs1, filename, 0, drv);
375         if (ret < 0) {
376             bdrv_delete(bs1);
377             return ret;
378         }
379         total_size = bdrv_getlength(bs1) >> SECTOR_BITS;
380
381         if (bs1->drv && bs1->drv->protocol_name)
382             is_protocol = 1;
383
384         bdrv_delete(bs1);
385
386         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
387
388         /* Real path is meaningless for protocols */
389         if (is_protocol)
390             snprintf(backing_filename, sizeof(backing_filename),
391                      "%s", filename);
392         else
393             realpath(filename, backing_filename);
394
395         ret = bdrv_create2(&bdrv_qcow2, tmp_filename,
396                            total_size, backing_filename, 
397                            (drv ? drv->format_name : NULL), 0);
398         if (ret < 0) {
399             return ret;
400         }
401         filename = tmp_filename;
402         drv = &bdrv_qcow2;
403         bs->is_temporary = 1;
404     }
405
406     pstrcpy(bs->filename, sizeof(bs->filename), filename);
407     if (flags & BDRV_O_FILE) {
408         drv = find_protocol(filename);
409     } else if (!drv) {
410         drv = find_image_format(filename);
411     }
412     if (!drv) {
413         ret = -ENOENT;
414         goto unlink_and_fail;
415     }
416     bs->drv = drv;
417     bs->opaque = qemu_mallocz(drv->instance_size);
418     /* Note: for compatibility, we open disk image files as RDWR, and
419        RDONLY as fallback */
420     if (!(flags & BDRV_O_FILE))
421         open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK);
422     else
423         open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
424     ret = drv->bdrv_open(bs, filename, open_flags);
425     if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
426         ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
427         bs->read_only = 1;
428     }
429     if (ret < 0) {
430         qemu_free(bs->opaque);
431         bs->opaque = NULL;
432         bs->drv = NULL;
433     unlink_and_fail:
434         if (bs->is_temporary)
435             unlink(filename);
436         return ret;
437     }
438     if (drv->bdrv_getlength) {
439         bs->total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
440     }
441 #ifndef _WIN32
442     if (bs->is_temporary) {
443         unlink(filename);
444     }
445 #endif
446     if (bs->backing_file[0] != '\0') {
447         /* if there is a backing file, use it */
448         BlockDriver *back_drv = NULL;
449         bs->backing_hd = bdrv_new("");
450         path_combine(backing_filename, sizeof(backing_filename),
451                      filename, bs->backing_file);
452         if (bs->backing_format[0] != '\0')
453             back_drv = bdrv_find_format(bs->backing_format);
454         ret = bdrv_open2(bs->backing_hd, backing_filename, open_flags,
455                          back_drv);
456         if (ret < 0) {
457             bdrv_close(bs);
458             return ret;
459         }
460     }
461
462     if (!bdrv_key_required(bs)) {
463         /* call the change callback */
464         bs->media_changed = 1;
465         if (bs->change_cb)
466             bs->change_cb(bs->change_opaque);
467     }
468     return 0;
469 }
470
471 void bdrv_close(BlockDriverState *bs)
472 {
473     if (bs->drv) {
474         if (bs->backing_hd)
475             bdrv_delete(bs->backing_hd);
476         bs->drv->bdrv_close(bs);
477         qemu_free(bs->opaque);
478 #ifdef _WIN32
479         if (bs->is_temporary) {
480             unlink(bs->filename);
481         }
482 #endif
483         bs->opaque = NULL;
484         bs->drv = NULL;
485
486         /* call the change callback */
487         bs->media_changed = 1;
488         if (bs->change_cb)
489             bs->change_cb(bs->change_opaque);
490     }
491 }
492
493 void bdrv_delete(BlockDriverState *bs)
494 {
495     BlockDriverState **pbs;
496
497     pbs = &bdrv_first;
498     while (*pbs != bs && *pbs != NULL)
499         pbs = &(*pbs)->next;
500     if (*pbs == bs)
501         *pbs = bs->next;
502
503     bdrv_close(bs);
504     qemu_free(bs);
505 }
506
507 /* commit COW file into the raw image */
508 int bdrv_commit(BlockDriverState *bs)
509 {
510     BlockDriver *drv = bs->drv;
511     int64_t i, total_sectors;
512     int n, j;
513     unsigned char sector[512];
514
515     if (!drv)
516         return -ENOMEDIUM;
517
518     if (bs->read_only) {
519         return -EACCES;
520     }
521
522     if (!bs->backing_hd) {
523         return -ENOTSUP;
524     }
525
526     total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
527     for (i = 0; i < total_sectors;) {
528         if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
529             for(j = 0; j < n; j++) {
530                 if (bdrv_read(bs, i, sector, 1) != 0) {
531                     return -EIO;
532                 }
533
534                 if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
535                     return -EIO;
536                 }
537                 i++;
538             }
539         } else {
540             i += n;
541         }
542     }
543
544     if (drv->bdrv_make_empty)
545         return drv->bdrv_make_empty(bs);
546
547     return 0;
548 }
549
550 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
551                                    size_t size)
552 {
553     int64_t len;
554
555     if (!bdrv_is_inserted(bs))
556         return -ENOMEDIUM;
557
558     if (bs->growable)
559         return 0;
560
561     len = bdrv_getlength(bs);
562
563     if ((offset + size) > len)
564         return -EIO;
565
566     return 0;
567 }
568
569 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
570                               int nb_sectors)
571 {
572     int64_t offset;
573
574     /* Deal with byte accesses */
575     if (sector_num < 0)
576         offset = -sector_num;
577     else
578         offset = sector_num * 512;
579
580     return bdrv_check_byte_request(bs, offset, nb_sectors * 512);
581 }
582
583 /* return < 0 if error. See bdrv_write() for the return codes */
584 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
585               uint8_t *buf, int nb_sectors)
586 {
587     BlockDriver *drv = bs->drv;
588
589     if (!drv)
590         return -ENOMEDIUM;
591     if (bdrv_check_request(bs, sector_num, nb_sectors))
592         return -EIO;
593
594     return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
595 }
596
597 /* Return < 0 if error. Important errors are:
598   -EIO         generic I/O error (may happen for all errors)
599   -ENOMEDIUM   No media inserted.
600   -EINVAL      Invalid sector number or nb_sectors
601   -EACCES      Trying to write a read-only device
602 */
603 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
604                const uint8_t *buf, int nb_sectors)
605 {
606     BlockDriver *drv = bs->drv;
607     if (!bs->drv)
608         return -ENOMEDIUM;
609     if (bs->read_only)
610         return -EACCES;
611     if (bdrv_check_request(bs, sector_num, nb_sectors))
612         return -EIO;
613
614     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
615 }
616
617 int bdrv_pread(BlockDriverState *bs, int64_t offset,
618                void *buf, int count1)
619 {
620     uint8_t tmp_buf[SECTOR_SIZE];
621     int len, nb_sectors, count;
622     int64_t sector_num;
623
624     count = count1;
625     /* first read to align to sector start */
626     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
627     if (len > count)
628         len = count;
629     sector_num = offset >> SECTOR_BITS;
630     if (len > 0) {
631         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
632             return -EIO;
633         memcpy(buf, tmp_buf + (offset & (SECTOR_SIZE - 1)), len);
634         count -= len;
635         if (count == 0)
636             return count1;
637         sector_num++;
638         buf += len;
639     }
640
641     /* read the sectors "in place" */
642     nb_sectors = count >> SECTOR_BITS;
643     if (nb_sectors > 0) {
644         if (bdrv_read(bs, sector_num, buf, nb_sectors) < 0)
645             return -EIO;
646         sector_num += nb_sectors;
647         len = nb_sectors << SECTOR_BITS;
648         buf += len;
649         count -= len;
650     }
651
652     /* add data from the last sector */
653     if (count > 0) {
654         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
655             return -EIO;
656         memcpy(buf, tmp_buf, count);
657     }
658     return count1;
659 }
660
661 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
662                 const void *buf, int count1)
663 {
664     uint8_t tmp_buf[SECTOR_SIZE];
665     int len, nb_sectors, count;
666     int64_t sector_num;
667
668     count = count1;
669     /* first write to align to sector start */
670     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
671     if (len > count)
672         len = count;
673     sector_num = offset >> SECTOR_BITS;
674     if (len > 0) {
675         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
676             return -EIO;
677         memcpy(tmp_buf + (offset & (SECTOR_SIZE - 1)), buf, len);
678         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
679             return -EIO;
680         count -= len;
681         if (count == 0)
682             return count1;
683         sector_num++;
684         buf += len;
685     }
686
687     /* write the sectors "in place" */
688     nb_sectors = count >> SECTOR_BITS;
689     if (nb_sectors > 0) {
690         if (bdrv_write(bs, sector_num, buf, nb_sectors) < 0)
691             return -EIO;
692         sector_num += nb_sectors;
693         len = nb_sectors << SECTOR_BITS;
694         buf += len;
695         count -= len;
696     }
697
698     /* add data from the last sector */
699     if (count > 0) {
700         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
701             return -EIO;
702         memcpy(tmp_buf, buf, count);
703         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
704             return -EIO;
705     }
706     return count1;
707 }
708
709 /**
710  * Truncate file to 'offset' bytes (needed only for file protocols)
711  */
712 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
713 {
714     BlockDriver *drv = bs->drv;
715     if (!drv)
716         return -ENOMEDIUM;
717     if (!drv->bdrv_truncate)
718         return -ENOTSUP;
719     return drv->bdrv_truncate(bs, offset);
720 }
721
722 /**
723  * Length of a file in bytes. Return < 0 if error or unknown.
724  */
725 int64_t bdrv_getlength(BlockDriverState *bs)
726 {
727     BlockDriver *drv = bs->drv;
728     if (!drv)
729         return -ENOMEDIUM;
730     if (!drv->bdrv_getlength) {
731         /* legacy mode */
732         return bs->total_sectors * SECTOR_SIZE;
733     }
734     return drv->bdrv_getlength(bs);
735 }
736
737 /* return 0 as number of sectors if no device present or error */
738 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
739 {
740     int64_t length;
741     length = bdrv_getlength(bs);
742     if (length < 0)
743         length = 0;
744     else
745         length = length >> SECTOR_BITS;
746     *nb_sectors_ptr = length;
747 }
748
749 struct partition {
750         uint8_t boot_ind;           /* 0x80 - active */
751         uint8_t head;               /* starting head */
752         uint8_t sector;             /* starting sector */
753         uint8_t cyl;                /* starting cylinder */
754         uint8_t sys_ind;            /* What partition type */
755         uint8_t end_head;           /* end head */
756         uint8_t end_sector;         /* end sector */
757         uint8_t end_cyl;            /* end cylinder */
758         uint32_t start_sect;        /* starting sector counting from 0 */
759         uint32_t nr_sects;          /* nr of sectors in partition */
760 } __attribute__((packed));
761
762 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
763 static int guess_disk_lchs(BlockDriverState *bs,
764                            int *pcylinders, int *pheads, int *psectors)
765 {
766     uint8_t buf[512];
767     int ret, i, heads, sectors, cylinders;
768     struct partition *p;
769     uint32_t nr_sects;
770     uint64_t nb_sectors;
771
772     bdrv_get_geometry(bs, &nb_sectors);
773
774     ret = bdrv_read(bs, 0, buf, 1);
775     if (ret < 0)
776         return -1;
777     /* test msdos magic */
778     if (buf[510] != 0x55 || buf[511] != 0xaa)
779         return -1;
780     for(i = 0; i < 4; i++) {
781         p = ((struct partition *)(buf + 0x1be)) + i;
782         nr_sects = le32_to_cpu(p->nr_sects);
783         if (nr_sects && p->end_head) {
784             /* We make the assumption that the partition terminates on
785                a cylinder boundary */
786             heads = p->end_head + 1;
787             sectors = p->end_sector & 63;
788             if (sectors == 0)
789                 continue;
790             cylinders = nb_sectors / (heads * sectors);
791             if (cylinders < 1 || cylinders > 16383)
792                 continue;
793             *pheads = heads;
794             *psectors = sectors;
795             *pcylinders = cylinders;
796 #if 0
797             printf("guessed geometry: LCHS=%d %d %d\n",
798                    cylinders, heads, sectors);
799 #endif
800             return 0;
801         }
802     }
803     return -1;
804 }
805
806 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
807 {
808     int translation, lba_detected = 0;
809     int cylinders, heads, secs;
810     uint64_t nb_sectors;
811
812     /* if a geometry hint is available, use it */
813     bdrv_get_geometry(bs, &nb_sectors);
814     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
815     translation = bdrv_get_translation_hint(bs);
816     if (cylinders != 0) {
817         *pcyls = cylinders;
818         *pheads = heads;
819         *psecs = secs;
820     } else {
821         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
822             if (heads > 16) {
823                 /* if heads > 16, it means that a BIOS LBA
824                    translation was active, so the default
825                    hardware geometry is OK */
826                 lba_detected = 1;
827                 goto default_geometry;
828             } else {
829                 *pcyls = cylinders;
830                 *pheads = heads;
831                 *psecs = secs;
832                 /* disable any translation to be in sync with
833                    the logical geometry */
834                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
835                     bdrv_set_translation_hint(bs,
836                                               BIOS_ATA_TRANSLATION_NONE);
837                 }
838             }
839         } else {
840         default_geometry:
841             /* if no geometry, use a standard physical disk geometry */
842             cylinders = nb_sectors / (16 * 63);
843
844             if (cylinders > 16383)
845                 cylinders = 16383;
846             else if (cylinders < 2)
847                 cylinders = 2;
848             *pcyls = cylinders;
849             *pheads = 16;
850             *psecs = 63;
851             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
852                 if ((*pcyls * *pheads) <= 131072) {
853                     bdrv_set_translation_hint(bs,
854                                               BIOS_ATA_TRANSLATION_LARGE);
855                 } else {
856                     bdrv_set_translation_hint(bs,
857                                               BIOS_ATA_TRANSLATION_LBA);
858                 }
859             }
860         }
861         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
862     }
863 }
864
865 void bdrv_set_geometry_hint(BlockDriverState *bs,
866                             int cyls, int heads, int secs)
867 {
868     bs->cyls = cyls;
869     bs->heads = heads;
870     bs->secs = secs;
871 }
872
873 void bdrv_set_type_hint(BlockDriverState *bs, int type)
874 {
875     bs->type = type;
876     bs->removable = ((type == BDRV_TYPE_CDROM ||
877                       type == BDRV_TYPE_FLOPPY));
878 }
879
880 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
881 {
882     bs->translation = translation;
883 }
884
885 void bdrv_get_geometry_hint(BlockDriverState *bs,
886                             int *pcyls, int *pheads, int *psecs)
887 {
888     *pcyls = bs->cyls;
889     *pheads = bs->heads;
890     *psecs = bs->secs;
891 }
892
893 int bdrv_get_type_hint(BlockDriverState *bs)
894 {
895     return bs->type;
896 }
897
898 int bdrv_get_translation_hint(BlockDriverState *bs)
899 {
900     return bs->translation;
901 }
902
903 int bdrv_is_removable(BlockDriverState *bs)
904 {
905     return bs->removable;
906 }
907
908 int bdrv_is_read_only(BlockDriverState *bs)
909 {
910     return bs->read_only;
911 }
912
913 int bdrv_is_sg(BlockDriverState *bs)
914 {
915     return bs->sg;
916 }
917
918 /* XXX: no longer used */
919 void bdrv_set_change_cb(BlockDriverState *bs,
920                         void (*change_cb)(void *opaque), void *opaque)
921 {
922     bs->change_cb = change_cb;
923     bs->change_opaque = opaque;
924 }
925
926 int bdrv_is_encrypted(BlockDriverState *bs)
927 {
928     if (bs->backing_hd && bs->backing_hd->encrypted)
929         return 1;
930     return bs->encrypted;
931 }
932
933 int bdrv_key_required(BlockDriverState *bs)
934 {
935     BlockDriverState *backing_hd = bs->backing_hd;
936
937     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
938         return 1;
939     return (bs->encrypted && !bs->valid_key);
940 }
941
942 int bdrv_set_key(BlockDriverState *bs, const char *key)
943 {
944     int ret;
945     if (bs->backing_hd && bs->backing_hd->encrypted) {
946         ret = bdrv_set_key(bs->backing_hd, key);
947         if (ret < 0)
948             return ret;
949         if (!bs->encrypted)
950             return 0;
951     }
952     if (!bs->encrypted || !bs->drv || !bs->drv->bdrv_set_key)
953         return -1;
954     ret = bs->drv->bdrv_set_key(bs, key);
955     if (ret < 0) {
956         bs->valid_key = 0;
957     } else if (!bs->valid_key) {
958         bs->valid_key = 1;
959         /* call the change callback now, we skipped it on open */
960         bs->media_changed = 1;
961         if (bs->change_cb)
962             bs->change_cb(bs->change_opaque);
963     }
964     return ret;
965 }
966
967 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
968 {
969     if (!bs->drv) {
970         buf[0] = '\0';
971     } else {
972         pstrcpy(buf, buf_size, bs->drv->format_name);
973     }
974 }
975
976 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
977                          void *opaque)
978 {
979     BlockDriver *drv;
980
981     for (drv = first_drv; drv != NULL; drv = drv->next) {
982         it(opaque, drv->format_name);
983     }
984 }
985
986 BlockDriverState *bdrv_find(const char *name)
987 {
988     BlockDriverState *bs;
989
990     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
991         if (!strcmp(name, bs->device_name))
992             return bs;
993     }
994     return NULL;
995 }
996
997 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
998 {
999     BlockDriverState *bs;
1000
1001     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1002         it(opaque, bs);
1003     }
1004 }
1005
1006 const char *bdrv_get_device_name(BlockDriverState *bs)
1007 {
1008     return bs->device_name;
1009 }
1010
1011 void bdrv_flush(BlockDriverState *bs)
1012 {
1013     if (bs->drv->bdrv_flush)
1014         bs->drv->bdrv_flush(bs);
1015     if (bs->backing_hd)
1016         bdrv_flush(bs->backing_hd);
1017 }
1018
1019 void bdrv_flush_all(void)
1020 {
1021     BlockDriverState *bs;
1022
1023     for (bs = bdrv_first; bs != NULL; bs = bs->next)
1024         if (bs->drv && !bdrv_is_read_only(bs) && 
1025             (!bdrv_is_removable(bs) || bdrv_is_inserted(bs)))
1026             bdrv_flush(bs);
1027 }
1028
1029 /*
1030  * Returns true iff the specified sector is present in the disk image. Drivers
1031  * not implementing the functionality are assumed to not support backing files,
1032  * hence all their sectors are reported as allocated.
1033  *
1034  * 'pnum' is set to the number of sectors (including and immediately following
1035  * the specified sector) that are known to be in the same
1036  * allocated/unallocated state.
1037  *
1038  * 'nb_sectors' is the max value 'pnum' should be set to.
1039  */
1040 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1041         int *pnum)
1042 {
1043     int64_t n;
1044     if (!bs->drv->bdrv_is_allocated) {
1045         if (sector_num >= bs->total_sectors) {
1046             *pnum = 0;
1047             return 0;
1048         }
1049         n = bs->total_sectors - sector_num;
1050         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1051         return 1;
1052     }
1053     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1054 }
1055
1056 void bdrv_info(Monitor *mon)
1057 {
1058     BlockDriverState *bs;
1059
1060     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1061         monitor_printf(mon, "%s:", bs->device_name);
1062         monitor_printf(mon, " type=");
1063         switch(bs->type) {
1064         case BDRV_TYPE_HD:
1065             monitor_printf(mon, "hd");
1066             break;
1067         case BDRV_TYPE_CDROM:
1068             monitor_printf(mon, "cdrom");
1069             break;
1070         case BDRV_TYPE_FLOPPY:
1071             monitor_printf(mon, "floppy");
1072             break;
1073         }
1074         monitor_printf(mon, " removable=%d", bs->removable);
1075         if (bs->removable) {
1076             monitor_printf(mon, " locked=%d", bs->locked);
1077         }
1078         if (bs->drv) {
1079             monitor_printf(mon, " file=");
1080             monitor_print_filename(mon, bs->filename);
1081             if (bs->backing_file[0] != '\0') {
1082                 monitor_printf(mon, " backing_file=");
1083                 monitor_print_filename(mon, bs->backing_file);
1084             }
1085             monitor_printf(mon, " ro=%d", bs->read_only);
1086             monitor_printf(mon, " drv=%s", bs->drv->format_name);
1087             monitor_printf(mon, " encrypted=%d", bdrv_is_encrypted(bs));
1088         } else {
1089             monitor_printf(mon, " [not inserted]");
1090         }
1091         monitor_printf(mon, "\n");
1092     }
1093 }
1094
1095 /* The "info blockstats" command. */
1096 void bdrv_info_stats(Monitor *mon)
1097 {
1098     BlockDriverState *bs;
1099
1100     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1101         monitor_printf(mon, "%s:"
1102                        " rd_bytes=%" PRIu64
1103                        " wr_bytes=%" PRIu64
1104                        " rd_operations=%" PRIu64
1105                        " wr_operations=%" PRIu64
1106                        "\n",
1107                        bs->device_name,
1108                        bs->rd_bytes, bs->wr_bytes,
1109                        bs->rd_ops, bs->wr_ops);
1110     }
1111 }
1112
1113 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1114 {
1115     if (bs->backing_hd && bs->backing_hd->encrypted)
1116         return bs->backing_file;
1117     else if (bs->encrypted)
1118         return bs->filename;
1119     else
1120         return NULL;
1121 }
1122
1123 void bdrv_get_backing_filename(BlockDriverState *bs,
1124                                char *filename, int filename_size)
1125 {
1126     if (!bs->backing_hd) {
1127         pstrcpy(filename, filename_size, "");
1128     } else {
1129         pstrcpy(filename, filename_size, bs->backing_file);
1130     }
1131 }
1132
1133 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1134                           const uint8_t *buf, int nb_sectors)
1135 {
1136     BlockDriver *drv = bs->drv;
1137     if (!drv)
1138         return -ENOMEDIUM;
1139     if (!drv->bdrv_write_compressed)
1140         return -ENOTSUP;
1141     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1142 }
1143
1144 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1145 {
1146     BlockDriver *drv = bs->drv;
1147     if (!drv)
1148         return -ENOMEDIUM;
1149     if (!drv->bdrv_get_info)
1150         return -ENOTSUP;
1151     memset(bdi, 0, sizeof(*bdi));
1152     return drv->bdrv_get_info(bs, bdi);
1153 }
1154
1155 /**************************************************************/
1156 /* handling of snapshots */
1157
1158 int bdrv_snapshot_create(BlockDriverState *bs,
1159                          QEMUSnapshotInfo *sn_info)
1160 {
1161     BlockDriver *drv = bs->drv;
1162     if (!drv)
1163         return -ENOMEDIUM;
1164     if (!drv->bdrv_snapshot_create)
1165         return -ENOTSUP;
1166     return drv->bdrv_snapshot_create(bs, sn_info);
1167 }
1168
1169 int bdrv_snapshot_goto(BlockDriverState *bs,
1170                        const char *snapshot_id)
1171 {
1172     BlockDriver *drv = bs->drv;
1173     if (!drv)
1174         return -ENOMEDIUM;
1175     if (!drv->bdrv_snapshot_goto)
1176         return -ENOTSUP;
1177     return drv->bdrv_snapshot_goto(bs, snapshot_id);
1178 }
1179
1180 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1181 {
1182     BlockDriver *drv = bs->drv;
1183     if (!drv)
1184         return -ENOMEDIUM;
1185     if (!drv->bdrv_snapshot_delete)
1186         return -ENOTSUP;
1187     return drv->bdrv_snapshot_delete(bs, snapshot_id);
1188 }
1189
1190 int bdrv_snapshot_list(BlockDriverState *bs,
1191                        QEMUSnapshotInfo **psn_info)
1192 {
1193     BlockDriver *drv = bs->drv;
1194     if (!drv)
1195         return -ENOMEDIUM;
1196     if (!drv->bdrv_snapshot_list)
1197         return -ENOTSUP;
1198     return drv->bdrv_snapshot_list(bs, psn_info);
1199 }
1200
1201 #define NB_SUFFIXES 4
1202
1203 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1204 {
1205     static const char suffixes[NB_SUFFIXES] = "KMGT";
1206     int64_t base;
1207     int i;
1208
1209     if (size <= 999) {
1210         snprintf(buf, buf_size, "%" PRId64, size);
1211     } else {
1212         base = 1024;
1213         for(i = 0; i < NB_SUFFIXES; i++) {
1214             if (size < (10 * base)) {
1215                 snprintf(buf, buf_size, "%0.1f%c",
1216                          (double)size / base,
1217                          suffixes[i]);
1218                 break;
1219             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1220                 snprintf(buf, buf_size, "%" PRId64 "%c",
1221                          ((size + (base >> 1)) / base),
1222                          suffixes[i]);
1223                 break;
1224             }
1225             base = base * 1024;
1226         }
1227     }
1228     return buf;
1229 }
1230
1231 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1232 {
1233     char buf1[128], date_buf[128], clock_buf[128];
1234 #ifdef _WIN32
1235     struct tm *ptm;
1236 #else
1237     struct tm tm;
1238 #endif
1239     time_t ti;
1240     int64_t secs;
1241
1242     if (!sn) {
1243         snprintf(buf, buf_size,
1244                  "%-10s%-20s%7s%20s%15s",
1245                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
1246     } else {
1247         ti = sn->date_sec;
1248 #ifdef _WIN32
1249         ptm = localtime(&ti);
1250         strftime(date_buf, sizeof(date_buf),
1251                  "%Y-%m-%d %H:%M:%S", ptm);
1252 #else
1253         localtime_r(&ti, &tm);
1254         strftime(date_buf, sizeof(date_buf),
1255                  "%Y-%m-%d %H:%M:%S", &tm);
1256 #endif
1257         secs = sn->vm_clock_nsec / 1000000000;
1258         snprintf(clock_buf, sizeof(clock_buf),
1259                  "%02d:%02d:%02d.%03d",
1260                  (int)(secs / 3600),
1261                  (int)((secs / 60) % 60),
1262                  (int)(secs % 60),
1263                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
1264         snprintf(buf, buf_size,
1265                  "%-10s%-20s%7s%20s%15s",
1266                  sn->id_str, sn->name,
1267                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
1268                  date_buf,
1269                  clock_buf);
1270     }
1271     return buf;
1272 }
1273
1274
1275 /**************************************************************/
1276 /* async I/Os */
1277
1278 typedef struct VectorTranslationAIOCB {
1279     BlockDriverAIOCB common;
1280     QEMUIOVector *iov;
1281     uint8_t *bounce;
1282     int is_write;
1283     BlockDriverAIOCB *aiocb;
1284 } VectorTranslationAIOCB;
1285
1286 static void bdrv_aio_cancel_vector(BlockDriverAIOCB *_acb)
1287 {
1288     VectorTranslationAIOCB *acb
1289         = container_of(_acb, VectorTranslationAIOCB, common);
1290
1291     bdrv_aio_cancel(acb->aiocb);
1292 }
1293
1294 static void bdrv_aio_rw_vector_cb(void *opaque, int ret)
1295 {
1296     VectorTranslationAIOCB *s = (VectorTranslationAIOCB *)opaque;
1297
1298     if (!s->is_write) {
1299         qemu_iovec_from_buffer(s->iov, s->bounce, s->iov->size);
1300     }
1301     qemu_vfree(s->bounce);
1302     s->common.cb(s->common.opaque, ret);
1303     qemu_aio_release(s);
1304 }
1305
1306 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
1307                                             int64_t sector_num,
1308                                             QEMUIOVector *iov,
1309                                             int nb_sectors,
1310                                             BlockDriverCompletionFunc *cb,
1311                                             void *opaque,
1312                                             int is_write)
1313
1314 {
1315     VectorTranslationAIOCB *s = qemu_aio_get_pool(&vectored_aio_pool, bs,
1316                                                   cb, opaque);
1317
1318     s->iov = iov;
1319     s->bounce = qemu_memalign(512, nb_sectors * 512);
1320     s->is_write = is_write;
1321     if (is_write) {
1322         qemu_iovec_to_buffer(s->iov, s->bounce);
1323         s->aiocb = bdrv_aio_write(bs, sector_num, s->bounce, nb_sectors,
1324                                   bdrv_aio_rw_vector_cb, s);
1325     } else {
1326         s->aiocb = bdrv_aio_read(bs, sector_num, s->bounce, nb_sectors,
1327                                  bdrv_aio_rw_vector_cb, s);
1328     }
1329     if (!s->aiocb) {
1330         qemu_vfree(s->bounce);
1331         qemu_aio_release(s);
1332         return NULL;
1333     }
1334     return &s->common;
1335 }
1336
1337 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1338                                  QEMUIOVector *iov, int nb_sectors,
1339                                  BlockDriverCompletionFunc *cb, void *opaque)
1340 {
1341     if (bdrv_check_request(bs, sector_num, nb_sectors))
1342         return NULL;
1343
1344     return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
1345                               cb, opaque, 0);
1346 }
1347
1348 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1349                                   QEMUIOVector *iov, int nb_sectors,
1350                                   BlockDriverCompletionFunc *cb, void *opaque)
1351 {
1352     if (bdrv_check_request(bs, sector_num, nb_sectors))
1353         return NULL;
1354
1355     return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
1356                               cb, opaque, 1);
1357 }
1358
1359 BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
1360                                 uint8_t *buf, int nb_sectors,
1361                                 BlockDriverCompletionFunc *cb, void *opaque)
1362 {
1363     BlockDriver *drv = bs->drv;
1364     BlockDriverAIOCB *ret;
1365
1366     if (!drv)
1367         return NULL;
1368     if (bdrv_check_request(bs, sector_num, nb_sectors))
1369         return NULL;
1370
1371     ret = drv->bdrv_aio_read(bs, sector_num, buf, nb_sectors, cb, opaque);
1372
1373     if (ret) {
1374         /* Update stats even though technically transfer has not happened. */
1375         bs->rd_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1376         bs->rd_ops ++;
1377     }
1378
1379     return ret;
1380 }
1381
1382 BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
1383                                  const uint8_t *buf, int nb_sectors,
1384                                  BlockDriverCompletionFunc *cb, void *opaque)
1385 {
1386     BlockDriver *drv = bs->drv;
1387     BlockDriverAIOCB *ret;
1388
1389     if (!drv)
1390         return NULL;
1391     if (bs->read_only)
1392         return NULL;
1393     if (bdrv_check_request(bs, sector_num, nb_sectors))
1394         return NULL;
1395
1396     ret = drv->bdrv_aio_write(bs, sector_num, buf, nb_sectors, cb, opaque);
1397
1398     if (ret) {
1399         /* Update stats even though technically transfer has not happened. */
1400         bs->wr_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1401         bs->wr_ops ++;
1402     }
1403
1404     return ret;
1405 }
1406
1407 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
1408 {
1409     acb->pool->cancel(acb);
1410 }
1411
1412
1413 /**************************************************************/
1414 /* async block device emulation */
1415
1416 static void bdrv_aio_bh_cb(void *opaque)
1417 {
1418     BlockDriverAIOCBSync *acb = opaque;
1419     acb->common.cb(acb->common.opaque, acb->ret);
1420     qemu_aio_release(acb);
1421 }
1422
1423 static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
1424         int64_t sector_num, uint8_t *buf, int nb_sectors,
1425         BlockDriverCompletionFunc *cb, void *opaque)
1426 {
1427     BlockDriverAIOCBSync *acb;
1428     int ret;
1429
1430     acb = qemu_aio_get(bs, cb, opaque);
1431     if (!acb->bh)
1432         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1433     ret = bdrv_read(bs, sector_num, buf, nb_sectors);
1434     acb->ret = ret;
1435     qemu_bh_schedule(acb->bh);
1436     return &acb->common;
1437 }
1438
1439 static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
1440         int64_t sector_num, const uint8_t *buf, int nb_sectors,
1441         BlockDriverCompletionFunc *cb, void *opaque)
1442 {
1443     BlockDriverAIOCBSync *acb;
1444     int ret;
1445
1446     acb = qemu_aio_get(bs, cb, opaque);
1447     if (!acb->bh)
1448         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1449     ret = bdrv_write(bs, sector_num, buf, nb_sectors);
1450     acb->ret = ret;
1451     qemu_bh_schedule(acb->bh);
1452     return &acb->common;
1453 }
1454
1455 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
1456 {
1457     BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
1458     qemu_bh_cancel(acb->bh);
1459     qemu_aio_release(acb);
1460 }
1461
1462 /**************************************************************/
1463 /* sync block device emulation */
1464
1465 static void bdrv_rw_em_cb(void *opaque, int ret)
1466 {
1467     *(int *)opaque = ret;
1468 }
1469
1470 #define NOT_DONE 0x7fffffff
1471
1472 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
1473                         uint8_t *buf, int nb_sectors)
1474 {
1475     int async_ret;
1476     BlockDriverAIOCB *acb;
1477
1478     async_ret = NOT_DONE;
1479     acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors,
1480                         bdrv_rw_em_cb, &async_ret);
1481     if (acb == NULL)
1482         return -1;
1483
1484     while (async_ret == NOT_DONE) {
1485         qemu_aio_wait();
1486     }
1487
1488     return async_ret;
1489 }
1490
1491 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
1492                          const uint8_t *buf, int nb_sectors)
1493 {
1494     int async_ret;
1495     BlockDriverAIOCB *acb;
1496
1497     async_ret = NOT_DONE;
1498     acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors,
1499                          bdrv_rw_em_cb, &async_ret);
1500     if (acb == NULL)
1501         return -1;
1502     while (async_ret == NOT_DONE) {
1503         qemu_aio_wait();
1504     }
1505     return async_ret;
1506 }
1507
1508 void bdrv_init(void)
1509 {
1510     aio_pool_init(&vectored_aio_pool, sizeof(VectorTranslationAIOCB),
1511                   bdrv_aio_cancel_vector);
1512
1513     bdrv_register(&bdrv_raw);
1514     bdrv_register(&bdrv_host_device);
1515 #ifndef _WIN32
1516     bdrv_register(&bdrv_cow);
1517 #endif
1518     bdrv_register(&bdrv_qcow);
1519     bdrv_register(&bdrv_vmdk);
1520     bdrv_register(&bdrv_cloop);
1521     bdrv_register(&bdrv_dmg);
1522     bdrv_register(&bdrv_bochs);
1523     bdrv_register(&bdrv_vpc);
1524     bdrv_register(&bdrv_vvfat);
1525     bdrv_register(&bdrv_qcow2);
1526     bdrv_register(&bdrv_parallels);
1527     bdrv_register(&bdrv_nbd);
1528 }
1529
1530 void aio_pool_init(AIOPool *pool, int aiocb_size,
1531                    void (*cancel)(BlockDriverAIOCB *acb))
1532 {
1533     pool->aiocb_size = aiocb_size;
1534     pool->cancel = cancel;
1535     pool->free_aiocb = NULL;
1536 }
1537
1538 void *qemu_aio_get_pool(AIOPool *pool, BlockDriverState *bs,
1539                         BlockDriverCompletionFunc *cb, void *opaque)
1540 {
1541     BlockDriverAIOCB *acb;
1542
1543     if (pool->free_aiocb) {
1544         acb = pool->free_aiocb;
1545         pool->free_aiocb = acb->next;
1546     } else {
1547         acb = qemu_mallocz(pool->aiocb_size);
1548         acb->pool = pool;
1549     }
1550     acb->bs = bs;
1551     acb->cb = cb;
1552     acb->opaque = opaque;
1553     return acb;
1554 }
1555
1556 void *qemu_aio_get(BlockDriverState *bs, BlockDriverCompletionFunc *cb,
1557                    void *opaque)
1558 {
1559     return qemu_aio_get_pool(&bs->drv->aio_pool, bs, cb, opaque);
1560 }
1561
1562 void qemu_aio_release(void *p)
1563 {
1564     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
1565     AIOPool *pool = acb->pool;
1566     acb->next = pool->free_aiocb;
1567     pool->free_aiocb = acb;
1568 }
1569
1570 /**************************************************************/
1571 /* removable device support */
1572
1573 /**
1574  * Return TRUE if the media is present
1575  */
1576 int bdrv_is_inserted(BlockDriverState *bs)
1577 {
1578     BlockDriver *drv = bs->drv;
1579     int ret;
1580     if (!drv)
1581         return 0;
1582     if (!drv->bdrv_is_inserted)
1583         return 1;
1584     ret = drv->bdrv_is_inserted(bs);
1585     return ret;
1586 }
1587
1588 /**
1589  * Return TRUE if the media changed since the last call to this
1590  * function. It is currently only used for floppy disks
1591  */
1592 int bdrv_media_changed(BlockDriverState *bs)
1593 {
1594     BlockDriver *drv = bs->drv;
1595     int ret;
1596
1597     if (!drv || !drv->bdrv_media_changed)
1598         ret = -ENOTSUP;
1599     else
1600         ret = drv->bdrv_media_changed(bs);
1601     if (ret == -ENOTSUP)
1602         ret = bs->media_changed;
1603     bs->media_changed = 0;
1604     return ret;
1605 }
1606
1607 /**
1608  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
1609  */
1610 void bdrv_eject(BlockDriverState *bs, int eject_flag)
1611 {
1612     BlockDriver *drv = bs->drv;
1613     int ret;
1614
1615     if (!drv || !drv->bdrv_eject) {
1616         ret = -ENOTSUP;
1617     } else {
1618         ret = drv->bdrv_eject(bs, eject_flag);
1619     }
1620     if (ret == -ENOTSUP) {
1621         if (eject_flag)
1622             bdrv_close(bs);
1623     }
1624 }
1625
1626 int bdrv_is_locked(BlockDriverState *bs)
1627 {
1628     return bs->locked;
1629 }
1630
1631 /**
1632  * Lock or unlock the media (if it is locked, the user won't be able
1633  * to eject it manually).
1634  */
1635 void bdrv_set_locked(BlockDriverState *bs, int locked)
1636 {
1637     BlockDriver *drv = bs->drv;
1638
1639     bs->locked = locked;
1640     if (drv && drv->bdrv_set_locked) {
1641         drv->bdrv_set_locked(bs, locked);
1642     }
1643 }
1644
1645 /* needed for generic scsi interface */
1646
1647 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1648 {
1649     BlockDriver *drv = bs->drv;
1650
1651     if (drv && drv->bdrv_ioctl)
1652         return drv->bdrv_ioctl(bs, req, buf);
1653     return -ENOTSUP;
1654 }
1655
1656 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
1657         unsigned long int req, void *buf,
1658         BlockDriverCompletionFunc *cb, void *opaque)
1659 {
1660     BlockDriver *drv = bs->drv;
1661
1662     if (drv && drv->bdrv_aio_ioctl)
1663         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
1664     return NULL;
1665 }