Fix wrong return value
[qemu] / block.c
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #ifdef HOST_BSD
26 /* include native header before sys-queue.h */
27 #include <sys/queue.h>
28 #endif
29
30 #include "qemu-common.h"
31 #include "monitor.h"
32 #include "block_int.h"
33
34 #ifdef HOST_BSD
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/ioctl.h>
38 #ifndef __DragonFly__
39 #include <sys/disk.h>
40 #endif
41 #endif
42
43 #ifdef _WIN32
44 #include <windows.h>
45 #endif
46
47 #define SECTOR_BITS 9
48 #define SECTOR_SIZE (1 << SECTOR_BITS)
49
50 static AIOPool vectored_aio_pool;
51
52 typedef struct BlockDriverAIOCBSync {
53     BlockDriverAIOCB common;
54     QEMUBH *bh;
55     int ret;
56 } BlockDriverAIOCBSync;
57
58 static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
59         int64_t sector_num, uint8_t *buf, int nb_sectors,
60         BlockDriverCompletionFunc *cb, void *opaque);
61 static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
62         int64_t sector_num, const uint8_t *buf, int nb_sectors,
63         BlockDriverCompletionFunc *cb, void *opaque);
64 static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb);
65 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
66                         uint8_t *buf, int nb_sectors);
67 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
68                          const uint8_t *buf, int nb_sectors);
69
70 BlockDriverState *bdrv_first;
71
72 static BlockDriver *first_drv;
73
74 int path_is_absolute(const char *path)
75 {
76     const char *p;
77 #ifdef _WIN32
78     /* specific case for names like: "\\.\d:" */
79     if (*path == '/' || *path == '\\')
80         return 1;
81 #endif
82     p = strchr(path, ':');
83     if (p)
84         p++;
85     else
86         p = path;
87 #ifdef _WIN32
88     return (*p == '/' || *p == '\\');
89 #else
90     return (*p == '/');
91 #endif
92 }
93
94 /* if filename is absolute, just copy it to dest. Otherwise, build a
95    path to it by considering it is relative to base_path. URL are
96    supported. */
97 void path_combine(char *dest, int dest_size,
98                   const char *base_path,
99                   const char *filename)
100 {
101     const char *p, *p1;
102     int len;
103
104     if (dest_size <= 0)
105         return;
106     if (path_is_absolute(filename)) {
107         pstrcpy(dest, dest_size, filename);
108     } else {
109         p = strchr(base_path, ':');
110         if (p)
111             p++;
112         else
113             p = base_path;
114         p1 = strrchr(base_path, '/');
115 #ifdef _WIN32
116         {
117             const char *p2;
118             p2 = strrchr(base_path, '\\');
119             if (!p1 || p2 > p1)
120                 p1 = p2;
121         }
122 #endif
123         if (p1)
124             p1++;
125         else
126             p1 = base_path;
127         if (p1 > p)
128             p = p1;
129         len = p - base_path;
130         if (len > dest_size - 1)
131             len = dest_size - 1;
132         memcpy(dest, base_path, len);
133         dest[len] = '\0';
134         pstrcat(dest, dest_size, filename);
135     }
136 }
137
138
139 static void bdrv_register(BlockDriver *bdrv)
140 {
141     if (!bdrv->bdrv_aio_read) {
142         /* add AIO emulation layer */
143         bdrv->bdrv_aio_read = bdrv_aio_read_em;
144         bdrv->bdrv_aio_write = bdrv_aio_write_em;
145         bdrv->bdrv_aio_cancel = bdrv_aio_cancel_em;
146         bdrv->aiocb_size = sizeof(BlockDriverAIOCBSync);
147     } else if (!bdrv->bdrv_read) {
148         /* add synchronous IO emulation layer */
149         bdrv->bdrv_read = bdrv_read_em;
150         bdrv->bdrv_write = bdrv_write_em;
151     }
152     aio_pool_init(&bdrv->aio_pool, bdrv->aiocb_size, bdrv->bdrv_aio_cancel);
153     bdrv->next = first_drv;
154     first_drv = bdrv;
155 }
156
157 /* create a new block device (by default it is empty) */
158 BlockDriverState *bdrv_new(const char *device_name)
159 {
160     BlockDriverState **pbs, *bs;
161
162     bs = qemu_mallocz(sizeof(BlockDriverState));
163     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
164     if (device_name[0] != '\0') {
165         /* insert at the end */
166         pbs = &bdrv_first;
167         while (*pbs != NULL)
168             pbs = &(*pbs)->next;
169         *pbs = bs;
170     }
171     return bs;
172 }
173
174 BlockDriver *bdrv_find_format(const char *format_name)
175 {
176     BlockDriver *drv1;
177     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
178         if (!strcmp(drv1->format_name, format_name))
179             return drv1;
180     }
181     return NULL;
182 }
183
184 int bdrv_create2(BlockDriver *drv,
185                 const char *filename, int64_t size_in_sectors,
186                 const char *backing_file, const char *backing_format,
187                 int flags)
188 {
189     if (drv->bdrv_create2)
190         return drv->bdrv_create2(filename, size_in_sectors, backing_file,
191                                  backing_format, flags);
192     if (drv->bdrv_create)
193         return drv->bdrv_create(filename, size_in_sectors, backing_file,
194                                 flags);
195     return -ENOTSUP;
196 }
197
198 int bdrv_create(BlockDriver *drv,
199                 const char *filename, int64_t size_in_sectors,
200                 const char *backing_file, int flags)
201 {
202     if (!drv->bdrv_create)
203         return -ENOTSUP;
204     return drv->bdrv_create(filename, size_in_sectors, backing_file, flags);
205 }
206
207 #ifdef _WIN32
208 void get_tmp_filename(char *filename, int size)
209 {
210     char temp_dir[MAX_PATH];
211
212     GetTempPath(MAX_PATH, temp_dir);
213     GetTempFileName(temp_dir, "qem", 0, filename);
214 }
215 #else
216 void get_tmp_filename(char *filename, int size)
217 {
218     int fd;
219     const char *tmpdir;
220     /* XXX: race condition possible */
221     tmpdir = getenv("TMPDIR");
222     if (!tmpdir)
223         tmpdir = "/tmp";
224     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
225     fd = mkstemp(filename);
226     close(fd);
227 }
228 #endif
229
230 #ifdef _WIN32
231 static int is_windows_drive_prefix(const char *filename)
232 {
233     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
234              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
235             filename[1] == ':');
236 }
237
238 static int is_windows_drive(const char *filename)
239 {
240     if (is_windows_drive_prefix(filename) &&
241         filename[2] == '\0')
242         return 1;
243     if (strstart(filename, "\\\\.\\", NULL) ||
244         strstart(filename, "//./", NULL))
245         return 1;
246     return 0;
247 }
248 #endif
249
250 static BlockDriver *find_protocol(const char *filename)
251 {
252     BlockDriver *drv1;
253     char protocol[128];
254     int len;
255     const char *p;
256
257 #ifdef _WIN32
258     if (is_windows_drive(filename) ||
259         is_windows_drive_prefix(filename))
260         return &bdrv_raw;
261 #endif
262     p = strchr(filename, ':');
263     if (!p)
264         return &bdrv_raw;
265     len = p - filename;
266     if (len > sizeof(protocol) - 1)
267         len = sizeof(protocol) - 1;
268     memcpy(protocol, filename, len);
269     protocol[len] = '\0';
270     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
271         if (drv1->protocol_name &&
272             !strcmp(drv1->protocol_name, protocol))
273             return drv1;
274     }
275     return NULL;
276 }
277
278 /* XXX: force raw format if block or character device ? It would
279    simplify the BSD case */
280 static BlockDriver *find_image_format(const char *filename)
281 {
282     int ret, score, score_max;
283     BlockDriver *drv1, *drv;
284     uint8_t buf[2048];
285     BlockDriverState *bs;
286
287     /* detect host devices. By convention, /dev/cdrom[N] is always
288        recognized as a host CDROM */
289     if (strstart(filename, "/dev/cdrom", NULL))
290         return &bdrv_host_device;
291 #ifdef _WIN32
292     if (is_windows_drive(filename))
293         return &bdrv_host_device;
294 #else
295     {
296         struct stat st;
297         if (stat(filename, &st) >= 0 &&
298             (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
299             return &bdrv_host_device;
300         }
301     }
302 #endif
303
304     drv = find_protocol(filename);
305     /* no need to test disk image formats for vvfat */
306     if (drv == &bdrv_vvfat)
307         return drv;
308
309     ret = bdrv_file_open(&bs, filename, BDRV_O_RDONLY);
310     if (ret < 0)
311         return NULL;
312     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
313     bdrv_delete(bs);
314     if (ret < 0) {
315         return NULL;
316     }
317
318     score_max = 0;
319     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
320         if (drv1->bdrv_probe) {
321             score = drv1->bdrv_probe(buf, ret, filename);
322             if (score > score_max) {
323                 score_max = score;
324                 drv = drv1;
325             }
326         }
327     }
328     return drv;
329 }
330
331 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
332 {
333     BlockDriverState *bs;
334     int ret;
335
336     bs = bdrv_new("");
337     ret = bdrv_open2(bs, filename, flags | BDRV_O_FILE, NULL);
338     if (ret < 0) {
339         bdrv_delete(bs);
340         return ret;
341     }
342     bs->growable = 1;
343     *pbs = bs;
344     return 0;
345 }
346
347 int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
348 {
349     return bdrv_open2(bs, filename, flags, NULL);
350 }
351
352 int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
353                BlockDriver *drv)
354 {
355     int ret, open_flags;
356     char tmp_filename[PATH_MAX];
357     char backing_filename[PATH_MAX];
358
359     bs->read_only = 0;
360     bs->is_temporary = 0;
361     bs->encrypted = 0;
362     bs->valid_key = 0;
363
364     if (flags & BDRV_O_SNAPSHOT) {
365         BlockDriverState *bs1;
366         int64_t total_size;
367         int is_protocol = 0;
368
369         /* if snapshot, we create a temporary backing file and open it
370            instead of opening 'filename' directly */
371
372         /* if there is a backing file, use it */
373         bs1 = bdrv_new("");
374         ret = bdrv_open2(bs1, filename, 0, drv);
375         if (ret < 0) {
376             bdrv_delete(bs1);
377             return ret;
378         }
379         total_size = bdrv_getlength(bs1) >> SECTOR_BITS;
380
381         if (bs1->drv && bs1->drv->protocol_name)
382             is_protocol = 1;
383
384         bdrv_delete(bs1);
385
386         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
387
388         /* Real path is meaningless for protocols */
389         if (is_protocol)
390             snprintf(backing_filename, sizeof(backing_filename),
391                      "%s", filename);
392         else
393             realpath(filename, backing_filename);
394
395         ret = bdrv_create2(&bdrv_qcow2, tmp_filename,
396                            total_size, backing_filename, 
397                            (drv ? drv->format_name : NULL), 0);
398         if (ret < 0) {
399             return ret;
400         }
401         filename = tmp_filename;
402         drv = &bdrv_qcow2;
403         bs->is_temporary = 1;
404     }
405
406     pstrcpy(bs->filename, sizeof(bs->filename), filename);
407     if (flags & BDRV_O_FILE) {
408         drv = find_protocol(filename);
409     } else if (!drv) {
410         drv = find_image_format(filename);
411     }
412     if (!drv) {
413         ret = -ENOENT;
414         goto unlink_and_fail;
415     }
416     bs->drv = drv;
417     bs->opaque = qemu_mallocz(drv->instance_size);
418     /* Note: for compatibility, we open disk image files as RDWR, and
419        RDONLY as fallback */
420     if (!(flags & BDRV_O_FILE))
421         open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK);
422     else
423         open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
424     ret = drv->bdrv_open(bs, filename, open_flags);
425     if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
426         ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
427         bs->read_only = 1;
428     }
429     if (ret < 0) {
430         qemu_free(bs->opaque);
431         bs->opaque = NULL;
432         bs->drv = NULL;
433     unlink_and_fail:
434         if (bs->is_temporary)
435             unlink(filename);
436         return ret;
437     }
438     if (drv->bdrv_getlength) {
439         bs->total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
440     }
441 #ifndef _WIN32
442     if (bs->is_temporary) {
443         unlink(filename);
444     }
445 #endif
446     if (bs->backing_file[0] != '\0') {
447         /* if there is a backing file, use it */
448         BlockDriver *back_drv = NULL;
449         bs->backing_hd = bdrv_new("");
450         path_combine(backing_filename, sizeof(backing_filename),
451                      filename, bs->backing_file);
452         if (bs->backing_format[0] != '\0')
453             back_drv = bdrv_find_format(bs->backing_format);
454         ret = bdrv_open2(bs->backing_hd, backing_filename, open_flags,
455                          back_drv);
456         if (ret < 0) {
457             bdrv_close(bs);
458             return ret;
459         }
460     }
461
462     if (!bdrv_key_required(bs)) {
463         /* call the change callback */
464         bs->media_changed = 1;
465         if (bs->change_cb)
466             bs->change_cb(bs->change_opaque);
467     }
468     return 0;
469 }
470
471 void bdrv_close(BlockDriverState *bs)
472 {
473     if (bs->drv) {
474         if (bs->backing_hd)
475             bdrv_delete(bs->backing_hd);
476         bs->drv->bdrv_close(bs);
477         qemu_free(bs->opaque);
478 #ifdef _WIN32
479         if (bs->is_temporary) {
480             unlink(bs->filename);
481         }
482 #endif
483         bs->opaque = NULL;
484         bs->drv = NULL;
485
486         /* call the change callback */
487         bs->media_changed = 1;
488         if (bs->change_cb)
489             bs->change_cb(bs->change_opaque);
490     }
491 }
492
493 void bdrv_delete(BlockDriverState *bs)
494 {
495     BlockDriverState **pbs;
496
497     pbs = &bdrv_first;
498     while (*pbs != bs && *pbs != NULL)
499         pbs = &(*pbs)->next;
500     if (*pbs == bs)
501         *pbs = bs->next;
502
503     bdrv_close(bs);
504     qemu_free(bs);
505 }
506
507 /* commit COW file into the raw image */
508 int bdrv_commit(BlockDriverState *bs)
509 {
510     BlockDriver *drv = bs->drv;
511     int64_t i, total_sectors;
512     int n, j;
513     unsigned char sector[512];
514
515     if (!drv)
516         return -ENOMEDIUM;
517
518     if (bs->read_only) {
519         return -EACCES;
520     }
521
522     if (!bs->backing_hd) {
523         return -ENOTSUP;
524     }
525
526     total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
527     for (i = 0; i < total_sectors;) {
528         if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
529             for(j = 0; j < n; j++) {
530                 if (bdrv_read(bs, i, sector, 1) != 0) {
531                     return -EIO;
532                 }
533
534                 if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
535                     return -EIO;
536                 }
537                 i++;
538             }
539         } else {
540             i += n;
541         }
542     }
543
544     if (drv->bdrv_make_empty)
545         return drv->bdrv_make_empty(bs);
546
547     return 0;
548 }
549
550 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
551                                    size_t size)
552 {
553     int64_t len;
554
555     if (!bdrv_is_inserted(bs))
556         return -ENOMEDIUM;
557
558     if (bs->growable)
559         return 0;
560
561     len = bdrv_getlength(bs);
562
563     if ((offset + size) > len)
564         return -EIO;
565
566     return 0;
567 }
568
569 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
570                               int nb_sectors)
571 {
572     return bdrv_check_byte_request(bs, sector_num * 512, nb_sectors * 512);
573 }
574
575 /* return < 0 if error. See bdrv_write() for the return codes */
576 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
577               uint8_t *buf, int nb_sectors)
578 {
579     BlockDriver *drv = bs->drv;
580
581     if (!drv)
582         return -ENOMEDIUM;
583     if (bdrv_check_request(bs, sector_num, nb_sectors))
584         return -EIO;
585
586     return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
587 }
588
589 /* Return < 0 if error. Important errors are:
590   -EIO         generic I/O error (may happen for all errors)
591   -ENOMEDIUM   No media inserted.
592   -EINVAL      Invalid sector number or nb_sectors
593   -EACCES      Trying to write a read-only device
594 */
595 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
596                const uint8_t *buf, int nb_sectors)
597 {
598     BlockDriver *drv = bs->drv;
599     if (!bs->drv)
600         return -ENOMEDIUM;
601     if (bs->read_only)
602         return -EACCES;
603     if (bdrv_check_request(bs, sector_num, nb_sectors))
604         return -EIO;
605
606     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
607 }
608
609 int bdrv_pread(BlockDriverState *bs, int64_t offset,
610                void *buf, int count1)
611 {
612     uint8_t tmp_buf[SECTOR_SIZE];
613     int len, nb_sectors, count;
614     int64_t sector_num;
615
616     count = count1;
617     /* first read to align to sector start */
618     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
619     if (len > count)
620         len = count;
621     sector_num = offset >> SECTOR_BITS;
622     if (len > 0) {
623         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
624             return -EIO;
625         memcpy(buf, tmp_buf + (offset & (SECTOR_SIZE - 1)), len);
626         count -= len;
627         if (count == 0)
628             return count1;
629         sector_num++;
630         buf += len;
631     }
632
633     /* read the sectors "in place" */
634     nb_sectors = count >> SECTOR_BITS;
635     if (nb_sectors > 0) {
636         if (bdrv_read(bs, sector_num, buf, nb_sectors) < 0)
637             return -EIO;
638         sector_num += nb_sectors;
639         len = nb_sectors << SECTOR_BITS;
640         buf += len;
641         count -= len;
642     }
643
644     /* add data from the last sector */
645     if (count > 0) {
646         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
647             return -EIO;
648         memcpy(buf, tmp_buf, count);
649     }
650     return count1;
651 }
652
653 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
654                 const void *buf, int count1)
655 {
656     uint8_t tmp_buf[SECTOR_SIZE];
657     int len, nb_sectors, count;
658     int64_t sector_num;
659
660     count = count1;
661     /* first write to align to sector start */
662     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
663     if (len > count)
664         len = count;
665     sector_num = offset >> SECTOR_BITS;
666     if (len > 0) {
667         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
668             return -EIO;
669         memcpy(tmp_buf + (offset & (SECTOR_SIZE - 1)), buf, len);
670         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
671             return -EIO;
672         count -= len;
673         if (count == 0)
674             return count1;
675         sector_num++;
676         buf += len;
677     }
678
679     /* write the sectors "in place" */
680     nb_sectors = count >> SECTOR_BITS;
681     if (nb_sectors > 0) {
682         if (bdrv_write(bs, sector_num, buf, nb_sectors) < 0)
683             return -EIO;
684         sector_num += nb_sectors;
685         len = nb_sectors << SECTOR_BITS;
686         buf += len;
687         count -= len;
688     }
689
690     /* add data from the last sector */
691     if (count > 0) {
692         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
693             return -EIO;
694         memcpy(tmp_buf, buf, count);
695         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
696             return -EIO;
697     }
698     return count1;
699 }
700
701 /**
702  * Truncate file to 'offset' bytes (needed only for file protocols)
703  */
704 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
705 {
706     BlockDriver *drv = bs->drv;
707     if (!drv)
708         return -ENOMEDIUM;
709     if (!drv->bdrv_truncate)
710         return -ENOTSUP;
711     return drv->bdrv_truncate(bs, offset);
712 }
713
714 /**
715  * Length of a file in bytes. Return < 0 if error or unknown.
716  */
717 int64_t bdrv_getlength(BlockDriverState *bs)
718 {
719     BlockDriver *drv = bs->drv;
720     if (!drv)
721         return -ENOMEDIUM;
722     if (!drv->bdrv_getlength) {
723         /* legacy mode */
724         return bs->total_sectors * SECTOR_SIZE;
725     }
726     return drv->bdrv_getlength(bs);
727 }
728
729 /* return 0 as number of sectors if no device present or error */
730 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
731 {
732     int64_t length;
733     length = bdrv_getlength(bs);
734     if (length < 0)
735         length = 0;
736     else
737         length = length >> SECTOR_BITS;
738     *nb_sectors_ptr = length;
739 }
740
741 struct partition {
742         uint8_t boot_ind;           /* 0x80 - active */
743         uint8_t head;               /* starting head */
744         uint8_t sector;             /* starting sector */
745         uint8_t cyl;                /* starting cylinder */
746         uint8_t sys_ind;            /* What partition type */
747         uint8_t end_head;           /* end head */
748         uint8_t end_sector;         /* end sector */
749         uint8_t end_cyl;            /* end cylinder */
750         uint32_t start_sect;        /* starting sector counting from 0 */
751         uint32_t nr_sects;          /* nr of sectors in partition */
752 } __attribute__((packed));
753
754 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
755 static int guess_disk_lchs(BlockDriverState *bs,
756                            int *pcylinders, int *pheads, int *psectors)
757 {
758     uint8_t buf[512];
759     int ret, i, heads, sectors, cylinders;
760     struct partition *p;
761     uint32_t nr_sects;
762     uint64_t nb_sectors;
763
764     bdrv_get_geometry(bs, &nb_sectors);
765
766     ret = bdrv_read(bs, 0, buf, 1);
767     if (ret < 0)
768         return -1;
769     /* test msdos magic */
770     if (buf[510] != 0x55 || buf[511] != 0xaa)
771         return -1;
772     for(i = 0; i < 4; i++) {
773         p = ((struct partition *)(buf + 0x1be)) + i;
774         nr_sects = le32_to_cpu(p->nr_sects);
775         if (nr_sects && p->end_head) {
776             /* We make the assumption that the partition terminates on
777                a cylinder boundary */
778             heads = p->end_head + 1;
779             sectors = p->end_sector & 63;
780             if (sectors == 0)
781                 continue;
782             cylinders = nb_sectors / (heads * sectors);
783             if (cylinders < 1 || cylinders > 16383)
784                 continue;
785             *pheads = heads;
786             *psectors = sectors;
787             *pcylinders = cylinders;
788 #if 0
789             printf("guessed geometry: LCHS=%d %d %d\n",
790                    cylinders, heads, sectors);
791 #endif
792             return 0;
793         }
794     }
795     return -1;
796 }
797
798 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
799 {
800     int translation, lba_detected = 0;
801     int cylinders, heads, secs;
802     uint64_t nb_sectors;
803
804     /* if a geometry hint is available, use it */
805     bdrv_get_geometry(bs, &nb_sectors);
806     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
807     translation = bdrv_get_translation_hint(bs);
808     if (cylinders != 0) {
809         *pcyls = cylinders;
810         *pheads = heads;
811         *psecs = secs;
812     } else {
813         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
814             if (heads > 16) {
815                 /* if heads > 16, it means that a BIOS LBA
816                    translation was active, so the default
817                    hardware geometry is OK */
818                 lba_detected = 1;
819                 goto default_geometry;
820             } else {
821                 *pcyls = cylinders;
822                 *pheads = heads;
823                 *psecs = secs;
824                 /* disable any translation to be in sync with
825                    the logical geometry */
826                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
827                     bdrv_set_translation_hint(bs,
828                                               BIOS_ATA_TRANSLATION_NONE);
829                 }
830             }
831         } else {
832         default_geometry:
833             /* if no geometry, use a standard physical disk geometry */
834             cylinders = nb_sectors / (16 * 63);
835
836             if (cylinders > 16383)
837                 cylinders = 16383;
838             else if (cylinders < 2)
839                 cylinders = 2;
840             *pcyls = cylinders;
841             *pheads = 16;
842             *psecs = 63;
843             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
844                 if ((*pcyls * *pheads) <= 131072) {
845                     bdrv_set_translation_hint(bs,
846                                               BIOS_ATA_TRANSLATION_LARGE);
847                 } else {
848                     bdrv_set_translation_hint(bs,
849                                               BIOS_ATA_TRANSLATION_LBA);
850                 }
851             }
852         }
853         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
854     }
855 }
856
857 void bdrv_set_geometry_hint(BlockDriverState *bs,
858                             int cyls, int heads, int secs)
859 {
860     bs->cyls = cyls;
861     bs->heads = heads;
862     bs->secs = secs;
863 }
864
865 void bdrv_set_type_hint(BlockDriverState *bs, int type)
866 {
867     bs->type = type;
868     bs->removable = ((type == BDRV_TYPE_CDROM ||
869                       type == BDRV_TYPE_FLOPPY));
870 }
871
872 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
873 {
874     bs->translation = translation;
875 }
876
877 void bdrv_get_geometry_hint(BlockDriverState *bs,
878                             int *pcyls, int *pheads, int *psecs)
879 {
880     *pcyls = bs->cyls;
881     *pheads = bs->heads;
882     *psecs = bs->secs;
883 }
884
885 int bdrv_get_type_hint(BlockDriverState *bs)
886 {
887     return bs->type;
888 }
889
890 int bdrv_get_translation_hint(BlockDriverState *bs)
891 {
892     return bs->translation;
893 }
894
895 int bdrv_is_removable(BlockDriverState *bs)
896 {
897     return bs->removable;
898 }
899
900 int bdrv_is_read_only(BlockDriverState *bs)
901 {
902     return bs->read_only;
903 }
904
905 int bdrv_is_sg(BlockDriverState *bs)
906 {
907     return bs->sg;
908 }
909
910 /* XXX: no longer used */
911 void bdrv_set_change_cb(BlockDriverState *bs,
912                         void (*change_cb)(void *opaque), void *opaque)
913 {
914     bs->change_cb = change_cb;
915     bs->change_opaque = opaque;
916 }
917
918 int bdrv_is_encrypted(BlockDriverState *bs)
919 {
920     if (bs->backing_hd && bs->backing_hd->encrypted)
921         return 1;
922     return bs->encrypted;
923 }
924
925 int bdrv_key_required(BlockDriverState *bs)
926 {
927     BlockDriverState *backing_hd = bs->backing_hd;
928
929     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
930         return 1;
931     return (bs->encrypted && !bs->valid_key);
932 }
933
934 int bdrv_set_key(BlockDriverState *bs, const char *key)
935 {
936     int ret;
937     if (bs->backing_hd && bs->backing_hd->encrypted) {
938         ret = bdrv_set_key(bs->backing_hd, key);
939         if (ret < 0)
940             return ret;
941         if (!bs->encrypted)
942             return 0;
943     }
944     if (!bs->encrypted || !bs->drv || !bs->drv->bdrv_set_key)
945         return -1;
946     ret = bs->drv->bdrv_set_key(bs, key);
947     if (ret < 0) {
948         bs->valid_key = 0;
949     } else if (!bs->valid_key) {
950         bs->valid_key = 1;
951         /* call the change callback now, we skipped it on open */
952         bs->media_changed = 1;
953         if (bs->change_cb)
954             bs->change_cb(bs->change_opaque);
955     }
956     return ret;
957 }
958
959 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
960 {
961     if (!bs->drv) {
962         buf[0] = '\0';
963     } else {
964         pstrcpy(buf, buf_size, bs->drv->format_name);
965     }
966 }
967
968 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
969                          void *opaque)
970 {
971     BlockDriver *drv;
972
973     for (drv = first_drv; drv != NULL; drv = drv->next) {
974         it(opaque, drv->format_name);
975     }
976 }
977
978 BlockDriverState *bdrv_find(const char *name)
979 {
980     BlockDriverState *bs;
981
982     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
983         if (!strcmp(name, bs->device_name))
984             return bs;
985     }
986     return NULL;
987 }
988
989 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
990 {
991     BlockDriverState *bs;
992
993     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
994         it(opaque, bs);
995     }
996 }
997
998 const char *bdrv_get_device_name(BlockDriverState *bs)
999 {
1000     return bs->device_name;
1001 }
1002
1003 void bdrv_flush(BlockDriverState *bs)
1004 {
1005     if (!bs->drv)
1006         return;
1007     if (bs->drv->bdrv_flush)
1008         bs->drv->bdrv_flush(bs);
1009     if (bs->backing_hd)
1010         bdrv_flush(bs->backing_hd);
1011 }
1012
1013 void bdrv_flush_all(void)
1014 {
1015     BlockDriverState *bs;
1016
1017     for (bs = bdrv_first; bs != NULL; bs = bs->next)
1018         if (bs->drv && !bdrv_is_read_only(bs) && 
1019             (!bdrv_is_removable(bs) || bdrv_is_inserted(bs)))
1020             bdrv_flush(bs);
1021 }
1022
1023 /*
1024  * Returns true iff the specified sector is present in the disk image. Drivers
1025  * not implementing the functionality are assumed to not support backing files,
1026  * hence all their sectors are reported as allocated.
1027  *
1028  * 'pnum' is set to the number of sectors (including and immediately following
1029  * the specified sector) that are known to be in the same
1030  * allocated/unallocated state.
1031  *
1032  * 'nb_sectors' is the max value 'pnum' should be set to.
1033  */
1034 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1035         int *pnum)
1036 {
1037     int64_t n;
1038     if (!bs->drv->bdrv_is_allocated) {
1039         if (sector_num >= bs->total_sectors) {
1040             *pnum = 0;
1041             return 0;
1042         }
1043         n = bs->total_sectors - sector_num;
1044         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1045         return 1;
1046     }
1047     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1048 }
1049
1050 void bdrv_info(Monitor *mon)
1051 {
1052     BlockDriverState *bs;
1053
1054     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1055         monitor_printf(mon, "%s:", bs->device_name);
1056         monitor_printf(mon, " type=");
1057         switch(bs->type) {
1058         case BDRV_TYPE_HD:
1059             monitor_printf(mon, "hd");
1060             break;
1061         case BDRV_TYPE_CDROM:
1062             monitor_printf(mon, "cdrom");
1063             break;
1064         case BDRV_TYPE_FLOPPY:
1065             monitor_printf(mon, "floppy");
1066             break;
1067         }
1068         monitor_printf(mon, " removable=%d", bs->removable);
1069         if (bs->removable) {
1070             monitor_printf(mon, " locked=%d", bs->locked);
1071         }
1072         if (bs->drv) {
1073             monitor_printf(mon, " file=");
1074             monitor_print_filename(mon, bs->filename);
1075             if (bs->backing_file[0] != '\0') {
1076                 monitor_printf(mon, " backing_file=");
1077                 monitor_print_filename(mon, bs->backing_file);
1078             }
1079             monitor_printf(mon, " ro=%d", bs->read_only);
1080             monitor_printf(mon, " drv=%s", bs->drv->format_name);
1081             monitor_printf(mon, " encrypted=%d", bdrv_is_encrypted(bs));
1082         } else {
1083             monitor_printf(mon, " [not inserted]");
1084         }
1085         monitor_printf(mon, "\n");
1086     }
1087 }
1088
1089 /* The "info blockstats" command. */
1090 void bdrv_info_stats(Monitor *mon)
1091 {
1092     BlockDriverState *bs;
1093
1094     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1095         monitor_printf(mon, "%s:"
1096                        " rd_bytes=%" PRIu64
1097                        " wr_bytes=%" PRIu64
1098                        " rd_operations=%" PRIu64
1099                        " wr_operations=%" PRIu64
1100                        "\n",
1101                        bs->device_name,
1102                        bs->rd_bytes, bs->wr_bytes,
1103                        bs->rd_ops, bs->wr_ops);
1104     }
1105 }
1106
1107 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1108 {
1109     if (bs->backing_hd && bs->backing_hd->encrypted)
1110         return bs->backing_file;
1111     else if (bs->encrypted)
1112         return bs->filename;
1113     else
1114         return NULL;
1115 }
1116
1117 void bdrv_get_backing_filename(BlockDriverState *bs,
1118                                char *filename, int filename_size)
1119 {
1120     if (!bs->backing_hd) {
1121         pstrcpy(filename, filename_size, "");
1122     } else {
1123         pstrcpy(filename, filename_size, bs->backing_file);
1124     }
1125 }
1126
1127 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1128                           const uint8_t *buf, int nb_sectors)
1129 {
1130     BlockDriver *drv = bs->drv;
1131     if (!drv)
1132         return -ENOMEDIUM;
1133     if (!drv->bdrv_write_compressed)
1134         return -ENOTSUP;
1135     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1136 }
1137
1138 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1139 {
1140     BlockDriver *drv = bs->drv;
1141     if (!drv)
1142         return -ENOMEDIUM;
1143     if (!drv->bdrv_get_info)
1144         return -ENOTSUP;
1145     memset(bdi, 0, sizeof(*bdi));
1146     return drv->bdrv_get_info(bs, bdi);
1147 }
1148
1149 /**************************************************************/
1150 /* handling of snapshots */
1151
1152 int bdrv_snapshot_create(BlockDriverState *bs,
1153                          QEMUSnapshotInfo *sn_info)
1154 {
1155     BlockDriver *drv = bs->drv;
1156     if (!drv)
1157         return -ENOMEDIUM;
1158     if (!drv->bdrv_snapshot_create)
1159         return -ENOTSUP;
1160     return drv->bdrv_snapshot_create(bs, sn_info);
1161 }
1162
1163 int bdrv_snapshot_goto(BlockDriverState *bs,
1164                        const char *snapshot_id)
1165 {
1166     BlockDriver *drv = bs->drv;
1167     if (!drv)
1168         return -ENOMEDIUM;
1169     if (!drv->bdrv_snapshot_goto)
1170         return -ENOTSUP;
1171     return drv->bdrv_snapshot_goto(bs, snapshot_id);
1172 }
1173
1174 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1175 {
1176     BlockDriver *drv = bs->drv;
1177     if (!drv)
1178         return -ENOMEDIUM;
1179     if (!drv->bdrv_snapshot_delete)
1180         return -ENOTSUP;
1181     return drv->bdrv_snapshot_delete(bs, snapshot_id);
1182 }
1183
1184 int bdrv_snapshot_list(BlockDriverState *bs,
1185                        QEMUSnapshotInfo **psn_info)
1186 {
1187     BlockDriver *drv = bs->drv;
1188     if (!drv)
1189         return -ENOMEDIUM;
1190     if (!drv->bdrv_snapshot_list)
1191         return -ENOTSUP;
1192     return drv->bdrv_snapshot_list(bs, psn_info);
1193 }
1194
1195 #define NB_SUFFIXES 4
1196
1197 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1198 {
1199     static const char suffixes[NB_SUFFIXES] = "KMGT";
1200     int64_t base;
1201     int i;
1202
1203     if (size <= 999) {
1204         snprintf(buf, buf_size, "%" PRId64, size);
1205     } else {
1206         base = 1024;
1207         for(i = 0; i < NB_SUFFIXES; i++) {
1208             if (size < (10 * base)) {
1209                 snprintf(buf, buf_size, "%0.1f%c",
1210                          (double)size / base,
1211                          suffixes[i]);
1212                 break;
1213             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1214                 snprintf(buf, buf_size, "%" PRId64 "%c",
1215                          ((size + (base >> 1)) / base),
1216                          suffixes[i]);
1217                 break;
1218             }
1219             base = base * 1024;
1220         }
1221     }
1222     return buf;
1223 }
1224
1225 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1226 {
1227     char buf1[128], date_buf[128], clock_buf[128];
1228 #ifdef _WIN32
1229     struct tm *ptm;
1230 #else
1231     struct tm tm;
1232 #endif
1233     time_t ti;
1234     int64_t secs;
1235
1236     if (!sn) {
1237         snprintf(buf, buf_size,
1238                  "%-10s%-20s%7s%20s%15s",
1239                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
1240     } else {
1241         ti = sn->date_sec;
1242 #ifdef _WIN32
1243         ptm = localtime(&ti);
1244         strftime(date_buf, sizeof(date_buf),
1245                  "%Y-%m-%d %H:%M:%S", ptm);
1246 #else
1247         localtime_r(&ti, &tm);
1248         strftime(date_buf, sizeof(date_buf),
1249                  "%Y-%m-%d %H:%M:%S", &tm);
1250 #endif
1251         secs = sn->vm_clock_nsec / 1000000000;
1252         snprintf(clock_buf, sizeof(clock_buf),
1253                  "%02d:%02d:%02d.%03d",
1254                  (int)(secs / 3600),
1255                  (int)((secs / 60) % 60),
1256                  (int)(secs % 60),
1257                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
1258         snprintf(buf, buf_size,
1259                  "%-10s%-20s%7s%20s%15s",
1260                  sn->id_str, sn->name,
1261                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
1262                  date_buf,
1263                  clock_buf);
1264     }
1265     return buf;
1266 }
1267
1268
1269 /**************************************************************/
1270 /* async I/Os */
1271
1272 typedef struct VectorTranslationAIOCB {
1273     BlockDriverAIOCB common;
1274     QEMUIOVector *iov;
1275     uint8_t *bounce;
1276     int is_write;
1277     BlockDriverAIOCB *aiocb;
1278 } VectorTranslationAIOCB;
1279
1280 static void bdrv_aio_cancel_vector(BlockDriverAIOCB *_acb)
1281 {
1282     VectorTranslationAIOCB *acb
1283         = container_of(_acb, VectorTranslationAIOCB, common);
1284
1285     bdrv_aio_cancel(acb->aiocb);
1286 }
1287
1288 static void bdrv_aio_rw_vector_cb(void *opaque, int ret)
1289 {
1290     VectorTranslationAIOCB *s = (VectorTranslationAIOCB *)opaque;
1291
1292     if (!s->is_write) {
1293         qemu_iovec_from_buffer(s->iov, s->bounce, s->iov->size);
1294     }
1295     qemu_vfree(s->bounce);
1296     s->common.cb(s->common.opaque, ret);
1297     qemu_aio_release(s);
1298 }
1299
1300 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
1301                                             int64_t sector_num,
1302                                             QEMUIOVector *iov,
1303                                             int nb_sectors,
1304                                             BlockDriverCompletionFunc *cb,
1305                                             void *opaque,
1306                                             int is_write)
1307
1308 {
1309     VectorTranslationAIOCB *s = qemu_aio_get_pool(&vectored_aio_pool, bs,
1310                                                   cb, opaque);
1311
1312     s->iov = iov;
1313     s->bounce = qemu_memalign(512, nb_sectors * 512);
1314     s->is_write = is_write;
1315     if (is_write) {
1316         qemu_iovec_to_buffer(s->iov, s->bounce);
1317         s->aiocb = bdrv_aio_write(bs, sector_num, s->bounce, nb_sectors,
1318                                   bdrv_aio_rw_vector_cb, s);
1319     } else {
1320         s->aiocb = bdrv_aio_read(bs, sector_num, s->bounce, nb_sectors,
1321                                  bdrv_aio_rw_vector_cb, s);
1322     }
1323     if (!s->aiocb) {
1324         qemu_vfree(s->bounce);
1325         qemu_aio_release(s);
1326         return NULL;
1327     }
1328     return &s->common;
1329 }
1330
1331 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1332                                  QEMUIOVector *iov, int nb_sectors,
1333                                  BlockDriverCompletionFunc *cb, void *opaque)
1334 {
1335     if (bdrv_check_request(bs, sector_num, nb_sectors))
1336         return NULL;
1337
1338     return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
1339                               cb, opaque, 0);
1340 }
1341
1342 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1343                                   QEMUIOVector *iov, int nb_sectors,
1344                                   BlockDriverCompletionFunc *cb, void *opaque)
1345 {
1346     if (bdrv_check_request(bs, sector_num, nb_sectors))
1347         return NULL;
1348
1349     return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
1350                               cb, opaque, 1);
1351 }
1352
1353 BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
1354                                 uint8_t *buf, int nb_sectors,
1355                                 BlockDriverCompletionFunc *cb, void *opaque)
1356 {
1357     BlockDriver *drv = bs->drv;
1358     BlockDriverAIOCB *ret;
1359
1360     if (!drv)
1361         return NULL;
1362     if (bdrv_check_request(bs, sector_num, nb_sectors))
1363         return NULL;
1364
1365     ret = drv->bdrv_aio_read(bs, sector_num, buf, nb_sectors, cb, opaque);
1366
1367     if (ret) {
1368         /* Update stats even though technically transfer has not happened. */
1369         bs->rd_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1370         bs->rd_ops ++;
1371     }
1372
1373     return ret;
1374 }
1375
1376 BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
1377                                  const uint8_t *buf, int nb_sectors,
1378                                  BlockDriverCompletionFunc *cb, void *opaque)
1379 {
1380     BlockDriver *drv = bs->drv;
1381     BlockDriverAIOCB *ret;
1382
1383     if (!drv)
1384         return NULL;
1385     if (bs->read_only)
1386         return NULL;
1387     if (bdrv_check_request(bs, sector_num, nb_sectors))
1388         return NULL;
1389
1390     ret = drv->bdrv_aio_write(bs, sector_num, buf, nb_sectors, cb, opaque);
1391
1392     if (ret) {
1393         /* Update stats even though technically transfer has not happened. */
1394         bs->wr_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1395         bs->wr_ops ++;
1396     }
1397
1398     return ret;
1399 }
1400
1401 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
1402 {
1403     acb->pool->cancel(acb);
1404 }
1405
1406
1407 /**************************************************************/
1408 /* async block device emulation */
1409
1410 static void bdrv_aio_bh_cb(void *opaque)
1411 {
1412     BlockDriverAIOCBSync *acb = opaque;
1413     acb->common.cb(acb->common.opaque, acb->ret);
1414     qemu_aio_release(acb);
1415 }
1416
1417 static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
1418         int64_t sector_num, uint8_t *buf, int nb_sectors,
1419         BlockDriverCompletionFunc *cb, void *opaque)
1420 {
1421     BlockDriverAIOCBSync *acb;
1422     int ret;
1423
1424     acb = qemu_aio_get(bs, cb, opaque);
1425     if (!acb->bh)
1426         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1427     ret = bdrv_read(bs, sector_num, buf, nb_sectors);
1428     acb->ret = ret;
1429     qemu_bh_schedule(acb->bh);
1430     return &acb->common;
1431 }
1432
1433 static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
1434         int64_t sector_num, const uint8_t *buf, int nb_sectors,
1435         BlockDriverCompletionFunc *cb, void *opaque)
1436 {
1437     BlockDriverAIOCBSync *acb;
1438     int ret;
1439
1440     acb = qemu_aio_get(bs, cb, opaque);
1441     if (!acb->bh)
1442         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1443     ret = bdrv_write(bs, sector_num, buf, nb_sectors);
1444     acb->ret = ret;
1445     qemu_bh_schedule(acb->bh);
1446     return &acb->common;
1447 }
1448
1449 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
1450 {
1451     BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
1452     qemu_bh_cancel(acb->bh);
1453     qemu_aio_release(acb);
1454 }
1455
1456 /**************************************************************/
1457 /* sync block device emulation */
1458
1459 static void bdrv_rw_em_cb(void *opaque, int ret)
1460 {
1461     *(int *)opaque = ret;
1462 }
1463
1464 #define NOT_DONE 0x7fffffff
1465
1466 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
1467                         uint8_t *buf, int nb_sectors)
1468 {
1469     int async_ret;
1470     BlockDriverAIOCB *acb;
1471
1472     async_ret = NOT_DONE;
1473     acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors,
1474                         bdrv_rw_em_cb, &async_ret);
1475     if (acb == NULL)
1476         return -1;
1477
1478     while (async_ret == NOT_DONE) {
1479         qemu_aio_wait();
1480     }
1481
1482     return async_ret;
1483 }
1484
1485 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
1486                          const uint8_t *buf, int nb_sectors)
1487 {
1488     int async_ret;
1489     BlockDriverAIOCB *acb;
1490
1491     async_ret = NOT_DONE;
1492     acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors,
1493                          bdrv_rw_em_cb, &async_ret);
1494     if (acb == NULL)
1495         return -1;
1496     while (async_ret == NOT_DONE) {
1497         qemu_aio_wait();
1498     }
1499     return async_ret;
1500 }
1501
1502 void bdrv_init(void)
1503 {
1504     aio_pool_init(&vectored_aio_pool, sizeof(VectorTranslationAIOCB),
1505                   bdrv_aio_cancel_vector);
1506
1507     bdrv_register(&bdrv_raw);
1508     bdrv_register(&bdrv_host_device);
1509 #ifndef _WIN32
1510     bdrv_register(&bdrv_cow);
1511 #endif
1512     bdrv_register(&bdrv_qcow);
1513     bdrv_register(&bdrv_vmdk);
1514     bdrv_register(&bdrv_cloop);
1515     bdrv_register(&bdrv_dmg);
1516     bdrv_register(&bdrv_bochs);
1517     bdrv_register(&bdrv_vpc);
1518     bdrv_register(&bdrv_vvfat);
1519     bdrv_register(&bdrv_qcow2);
1520     bdrv_register(&bdrv_parallels);
1521     bdrv_register(&bdrv_nbd);
1522 }
1523
1524 void aio_pool_init(AIOPool *pool, int aiocb_size,
1525                    void (*cancel)(BlockDriverAIOCB *acb))
1526 {
1527     pool->aiocb_size = aiocb_size;
1528     pool->cancel = cancel;
1529     pool->free_aiocb = NULL;
1530 }
1531
1532 void *qemu_aio_get_pool(AIOPool *pool, BlockDriverState *bs,
1533                         BlockDriverCompletionFunc *cb, void *opaque)
1534 {
1535     BlockDriverAIOCB *acb;
1536
1537     if (pool->free_aiocb) {
1538         acb = pool->free_aiocb;
1539         pool->free_aiocb = acb->next;
1540     } else {
1541         acb = qemu_mallocz(pool->aiocb_size);
1542         acb->pool = pool;
1543     }
1544     acb->bs = bs;
1545     acb->cb = cb;
1546     acb->opaque = opaque;
1547     return acb;
1548 }
1549
1550 void *qemu_aio_get(BlockDriverState *bs, BlockDriverCompletionFunc *cb,
1551                    void *opaque)
1552 {
1553     return qemu_aio_get_pool(&bs->drv->aio_pool, bs, cb, opaque);
1554 }
1555
1556 void qemu_aio_release(void *p)
1557 {
1558     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
1559     AIOPool *pool = acb->pool;
1560     acb->next = pool->free_aiocb;
1561     pool->free_aiocb = acb;
1562 }
1563
1564 /**************************************************************/
1565 /* removable device support */
1566
1567 /**
1568  * Return TRUE if the media is present
1569  */
1570 int bdrv_is_inserted(BlockDriverState *bs)
1571 {
1572     BlockDriver *drv = bs->drv;
1573     int ret;
1574     if (!drv)
1575         return 0;
1576     if (!drv->bdrv_is_inserted)
1577         return 1;
1578     ret = drv->bdrv_is_inserted(bs);
1579     return ret;
1580 }
1581
1582 /**
1583  * Return TRUE if the media changed since the last call to this
1584  * function. It is currently only used for floppy disks
1585  */
1586 int bdrv_media_changed(BlockDriverState *bs)
1587 {
1588     BlockDriver *drv = bs->drv;
1589     int ret;
1590
1591     if (!drv || !drv->bdrv_media_changed)
1592         ret = -ENOTSUP;
1593     else
1594         ret = drv->bdrv_media_changed(bs);
1595     if (ret == -ENOTSUP)
1596         ret = bs->media_changed;
1597     bs->media_changed = 0;
1598     return ret;
1599 }
1600
1601 /**
1602  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
1603  */
1604 void bdrv_eject(BlockDriverState *bs, int eject_flag)
1605 {
1606     BlockDriver *drv = bs->drv;
1607     int ret;
1608
1609     if (!drv || !drv->bdrv_eject) {
1610         ret = -ENOTSUP;
1611     } else {
1612         ret = drv->bdrv_eject(bs, eject_flag);
1613     }
1614     if (ret == -ENOTSUP) {
1615         if (eject_flag)
1616             bdrv_close(bs);
1617     }
1618 }
1619
1620 int bdrv_is_locked(BlockDriverState *bs)
1621 {
1622     return bs->locked;
1623 }
1624
1625 /**
1626  * Lock or unlock the media (if it is locked, the user won't be able
1627  * to eject it manually).
1628  */
1629 void bdrv_set_locked(BlockDriverState *bs, int locked)
1630 {
1631     BlockDriver *drv = bs->drv;
1632
1633     bs->locked = locked;
1634     if (drv && drv->bdrv_set_locked) {
1635         drv->bdrv_set_locked(bs, locked);
1636     }
1637 }
1638
1639 /* needed for generic scsi interface */
1640
1641 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1642 {
1643     BlockDriver *drv = bs->drv;
1644
1645     if (drv && drv->bdrv_ioctl)
1646         return drv->bdrv_ioctl(bs, req, buf);
1647     return -ENOTSUP;
1648 }
1649
1650 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
1651         unsigned long int req, void *buf,
1652         BlockDriverCompletionFunc *cb, void *opaque)
1653 {
1654     BlockDriver *drv = bs->drv;
1655
1656     if (drv && drv->bdrv_aio_ioctl)
1657         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
1658     return NULL;
1659 }