36de3b6183f8644049824b87a96b75926647ccd2
[qemu] / block.c
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #ifdef HOST_BSD
26 /* include native header before sys-queue.h */
27 #include <sys/queue.h>
28 #endif
29
30 #include "qemu-common.h"
31 #include "monitor.h"
32 #include "block_int.h"
33
34 #ifdef HOST_BSD
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/ioctl.h>
38 #ifndef __DragonFly__
39 #include <sys/disk.h>
40 #endif
41 #endif
42
43 #define SECTOR_BITS 9
44 #define SECTOR_SIZE (1 << SECTOR_BITS)
45
46 typedef struct BlockDriverAIOCBSync {
47     BlockDriverAIOCB common;
48     QEMUBH *bh;
49     int ret;
50 } BlockDriverAIOCBSync;
51
52 static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
53         int64_t sector_num, uint8_t *buf, int nb_sectors,
54         BlockDriverCompletionFunc *cb, void *opaque);
55 static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
56         int64_t sector_num, const uint8_t *buf, int nb_sectors,
57         BlockDriverCompletionFunc *cb, void *opaque);
58 static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb);
59 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
60                         uint8_t *buf, int nb_sectors);
61 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
62                          const uint8_t *buf, int nb_sectors);
63
64 BlockDriverState *bdrv_first;
65
66 static BlockDriver *first_drv;
67
68 int path_is_absolute(const char *path)
69 {
70     const char *p;
71 #ifdef _WIN32
72     /* specific case for names like: "\\.\d:" */
73     if (*path == '/' || *path == '\\')
74         return 1;
75 #endif
76     p = strchr(path, ':');
77     if (p)
78         p++;
79     else
80         p = path;
81 #ifdef _WIN32
82     return (*p == '/' || *p == '\\');
83 #else
84     return (*p == '/');
85 #endif
86 }
87
88 /* if filename is absolute, just copy it to dest. Otherwise, build a
89    path to it by considering it is relative to base_path. URL are
90    supported. */
91 void path_combine(char *dest, int dest_size,
92                   const char *base_path,
93                   const char *filename)
94 {
95     const char *p, *p1;
96     int len;
97
98     if (dest_size <= 0)
99         return;
100     if (path_is_absolute(filename)) {
101         pstrcpy(dest, dest_size, filename);
102     } else {
103         p = strchr(base_path, ':');
104         if (p)
105             p++;
106         else
107             p = base_path;
108         p1 = strrchr(base_path, '/');
109 #ifdef _WIN32
110         {
111             const char *p2;
112             p2 = strrchr(base_path, '\\');
113             if (!p1 || p2 > p1)
114                 p1 = p2;
115         }
116 #endif
117         if (p1)
118             p1++;
119         else
120             p1 = base_path;
121         if (p1 > p)
122             p = p1;
123         len = p - base_path;
124         if (len > dest_size - 1)
125             len = dest_size - 1;
126         memcpy(dest, base_path, len);
127         dest[len] = '\0';
128         pstrcat(dest, dest_size, filename);
129     }
130 }
131
132
133 static void bdrv_register(BlockDriver *bdrv)
134 {
135     if (!bdrv->bdrv_aio_read) {
136         /* add AIO emulation layer */
137         bdrv->bdrv_aio_read = bdrv_aio_read_em;
138         bdrv->bdrv_aio_write = bdrv_aio_write_em;
139         bdrv->bdrv_aio_cancel = bdrv_aio_cancel_em;
140         bdrv->aiocb_size = sizeof(BlockDriverAIOCBSync);
141     } else if (!bdrv->bdrv_read && !bdrv->bdrv_pread) {
142         /* add synchronous IO emulation layer */
143         bdrv->bdrv_read = bdrv_read_em;
144         bdrv->bdrv_write = bdrv_write_em;
145     }
146     bdrv->next = first_drv;
147     first_drv = bdrv;
148 }
149
150 /* create a new block device (by default it is empty) */
151 BlockDriverState *bdrv_new(const char *device_name)
152 {
153     BlockDriverState **pbs, *bs;
154
155     bs = qemu_mallocz(sizeof(BlockDriverState));
156     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
157     if (device_name[0] != '\0') {
158         /* insert at the end */
159         pbs = &bdrv_first;
160         while (*pbs != NULL)
161             pbs = &(*pbs)->next;
162         *pbs = bs;
163     }
164     return bs;
165 }
166
167 BlockDriver *bdrv_find_format(const char *format_name)
168 {
169     BlockDriver *drv1;
170     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
171         if (!strcmp(drv1->format_name, format_name))
172             return drv1;
173     }
174     return NULL;
175 }
176
177 int bdrv_create(BlockDriver *drv,
178                 const char *filename, int64_t size_in_sectors,
179                 const char *backing_file, int flags)
180 {
181     if (!drv->bdrv_create)
182         return -ENOTSUP;
183     return drv->bdrv_create(filename, size_in_sectors, backing_file, flags);
184 }
185
186 #ifdef _WIN32
187 void get_tmp_filename(char *filename, int size)
188 {
189     char temp_dir[MAX_PATH];
190
191     GetTempPath(MAX_PATH, temp_dir);
192     GetTempFileName(temp_dir, "qem", 0, filename);
193 }
194 #else
195 void get_tmp_filename(char *filename, int size)
196 {
197     int fd;
198     const char *tmpdir;
199     /* XXX: race condition possible */
200     tmpdir = getenv("TMPDIR");
201     if (!tmpdir)
202         tmpdir = "/tmp";
203     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
204     fd = mkstemp(filename);
205     close(fd);
206 }
207 #endif
208
209 #ifdef _WIN32
210 static int is_windows_drive_prefix(const char *filename)
211 {
212     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
213              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
214             filename[1] == ':');
215 }
216
217 static int is_windows_drive(const char *filename)
218 {
219     if (is_windows_drive_prefix(filename) &&
220         filename[2] == '\0')
221         return 1;
222     if (strstart(filename, "\\\\.\\", NULL) ||
223         strstart(filename, "//./", NULL))
224         return 1;
225     return 0;
226 }
227 #endif
228
229 static BlockDriver *find_protocol(const char *filename)
230 {
231     BlockDriver *drv1;
232     char protocol[128];
233     int len;
234     const char *p;
235
236 #ifdef _WIN32
237     if (is_windows_drive(filename) ||
238         is_windows_drive_prefix(filename))
239         return &bdrv_raw;
240 #endif
241     p = strchr(filename, ':');
242     if (!p)
243         return &bdrv_raw;
244     len = p - filename;
245     if (len > sizeof(protocol) - 1)
246         len = sizeof(protocol) - 1;
247     memcpy(protocol, filename, len);
248     protocol[len] = '\0';
249     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
250         if (drv1->protocol_name &&
251             !strcmp(drv1->protocol_name, protocol))
252             return drv1;
253     }
254     return NULL;
255 }
256
257 /* XXX: force raw format if block or character device ? It would
258    simplify the BSD case */
259 static BlockDriver *find_image_format(const char *filename)
260 {
261     int ret, score, score_max;
262     BlockDriver *drv1, *drv;
263     uint8_t buf[2048];
264     BlockDriverState *bs;
265
266     /* detect host devices. By convention, /dev/cdrom[N] is always
267        recognized as a host CDROM */
268     if (strstart(filename, "/dev/cdrom", NULL))
269         return &bdrv_host_device;
270 #ifdef _WIN32
271     if (is_windows_drive(filename))
272         return &bdrv_host_device;
273 #else
274     {
275         struct stat st;
276         if (stat(filename, &st) >= 0 &&
277             (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
278             return &bdrv_host_device;
279         }
280     }
281 #endif
282
283     drv = find_protocol(filename);
284     /* no need to test disk image formats for vvfat */
285     if (drv == &bdrv_vvfat)
286         return drv;
287
288     ret = bdrv_file_open(&bs, filename, BDRV_O_RDONLY);
289     if (ret < 0)
290         return NULL;
291     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
292     bdrv_delete(bs);
293     if (ret < 0) {
294         return NULL;
295     }
296
297     score_max = 0;
298     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
299         if (drv1->bdrv_probe) {
300             score = drv1->bdrv_probe(buf, ret, filename);
301             if (score > score_max) {
302                 score_max = score;
303                 drv = drv1;
304             }
305         }
306     }
307     return drv;
308 }
309
310 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
311 {
312     BlockDriverState *bs;
313     int ret;
314
315     bs = bdrv_new("");
316     ret = bdrv_open2(bs, filename, flags | BDRV_O_FILE, NULL);
317     if (ret < 0) {
318         bdrv_delete(bs);
319         return ret;
320     }
321     bs->growable = 1;
322     *pbs = bs;
323     return 0;
324 }
325
326 int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
327 {
328     return bdrv_open2(bs, filename, flags, NULL);
329 }
330
331 int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
332                BlockDriver *drv)
333 {
334     int ret, open_flags;
335     char tmp_filename[PATH_MAX];
336     char backing_filename[PATH_MAX];
337
338     bs->read_only = 0;
339     bs->is_temporary = 0;
340     bs->encrypted = 0;
341     bs->valid_key = 0;
342
343     if (flags & BDRV_O_SNAPSHOT) {
344         BlockDriverState *bs1;
345         int64_t total_size;
346         int is_protocol = 0;
347
348         /* if snapshot, we create a temporary backing file and open it
349            instead of opening 'filename' directly */
350
351         /* if there is a backing file, use it */
352         bs1 = bdrv_new("");
353         ret = bdrv_open(bs1, filename, 0);
354         if (ret < 0) {
355             bdrv_delete(bs1);
356             return ret;
357         }
358         total_size = bdrv_getlength(bs1) >> SECTOR_BITS;
359
360         if (bs1->drv && bs1->drv->protocol_name)
361             is_protocol = 1;
362
363         bdrv_delete(bs1);
364
365         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
366
367         /* Real path is meaningless for protocols */
368         if (is_protocol)
369             snprintf(backing_filename, sizeof(backing_filename),
370                      "%s", filename);
371         else
372             realpath(filename, backing_filename);
373
374         ret = bdrv_create(&bdrv_qcow2, tmp_filename,
375                           total_size, backing_filename, 0);
376         if (ret < 0) {
377             return ret;
378         }
379         filename = tmp_filename;
380         bs->is_temporary = 1;
381     }
382
383     pstrcpy(bs->filename, sizeof(bs->filename), filename);
384     if (flags & BDRV_O_FILE) {
385         drv = find_protocol(filename);
386     } else if (!drv) {
387         drv = find_image_format(filename);
388     }
389     if (!drv) {
390         ret = -ENOENT;
391         goto unlink_and_fail;
392     }
393     bs->drv = drv;
394     bs->opaque = qemu_mallocz(drv->instance_size);
395     /* Note: for compatibility, we open disk image files as RDWR, and
396        RDONLY as fallback */
397     if (!(flags & BDRV_O_FILE))
398         open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK);
399     else
400         open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
401     ret = drv->bdrv_open(bs, filename, open_flags);
402     if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
403         ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
404         bs->read_only = 1;
405     }
406     if (ret < 0) {
407         qemu_free(bs->opaque);
408         bs->opaque = NULL;
409         bs->drv = NULL;
410     unlink_and_fail:
411         if (bs->is_temporary)
412             unlink(filename);
413         return ret;
414     }
415     if (drv->bdrv_getlength) {
416         bs->total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
417     }
418 #ifndef _WIN32
419     if (bs->is_temporary) {
420         unlink(filename);
421     }
422 #endif
423     if (bs->backing_file[0] != '\0') {
424         /* if there is a backing file, use it */
425         bs->backing_hd = bdrv_new("");
426         path_combine(backing_filename, sizeof(backing_filename),
427                      filename, bs->backing_file);
428         ret = bdrv_open(bs->backing_hd, backing_filename, open_flags);
429         if (ret < 0) {
430             bdrv_close(bs);
431             return ret;
432         }
433     }
434
435     if (!bdrv_key_required(bs)) {
436         /* call the change callback */
437         bs->media_changed = 1;
438         if (bs->change_cb)
439             bs->change_cb(bs->change_opaque);
440     }
441     return 0;
442 }
443
444 void bdrv_close(BlockDriverState *bs)
445 {
446     if (bs->drv) {
447         if (bs->backing_hd)
448             bdrv_delete(bs->backing_hd);
449         bs->drv->bdrv_close(bs);
450         qemu_free(bs->opaque);
451 #ifdef _WIN32
452         if (bs->is_temporary) {
453             unlink(bs->filename);
454         }
455 #endif
456         bs->opaque = NULL;
457         bs->drv = NULL;
458
459         /* call the change callback */
460         bs->media_changed = 1;
461         if (bs->change_cb)
462             bs->change_cb(bs->change_opaque);
463     }
464 }
465
466 void bdrv_delete(BlockDriverState *bs)
467 {
468     BlockDriverState **pbs;
469
470     pbs = &bdrv_first;
471     while (*pbs != bs && *pbs != NULL)
472         pbs = &(*pbs)->next;
473     if (*pbs == bs)
474         *pbs = bs->next;
475
476     bdrv_close(bs);
477     qemu_free(bs);
478 }
479
480 /* commit COW file into the raw image */
481 int bdrv_commit(BlockDriverState *bs)
482 {
483     BlockDriver *drv = bs->drv;
484     int64_t i, total_sectors;
485     int n, j;
486     unsigned char sector[512];
487
488     if (!drv)
489         return -ENOMEDIUM;
490
491     if (bs->read_only) {
492         return -EACCES;
493     }
494
495     if (!bs->backing_hd) {
496         return -ENOTSUP;
497     }
498
499     total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
500     for (i = 0; i < total_sectors;) {
501         if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
502             for(j = 0; j < n; j++) {
503                 if (bdrv_read(bs, i, sector, 1) != 0) {
504                     return -EIO;
505                 }
506
507                 if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
508                     return -EIO;
509                 }
510                 i++;
511             }
512         } else {
513             i += n;
514         }
515     }
516
517     if (drv->bdrv_make_empty)
518         return drv->bdrv_make_empty(bs);
519
520     return 0;
521 }
522
523 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
524                                    size_t size)
525 {
526     int64_t len;
527
528     if (!bdrv_is_inserted(bs))
529         return -ENOMEDIUM;
530
531     if (bs->growable)
532         return 0;
533
534     len = bdrv_getlength(bs);
535
536     if ((offset + size) > len)
537         return -EIO;
538
539     return 0;
540 }
541
542 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
543                               int nb_sectors)
544 {
545     int64_t offset;
546
547     /* Deal with byte accesses */
548     if (sector_num < 0)
549         offset = -sector_num;
550     else
551         offset = sector_num * 512;
552
553     return bdrv_check_byte_request(bs, offset, nb_sectors * 512);
554 }
555
556 /* return < 0 if error. See bdrv_write() for the return codes */
557 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
558               uint8_t *buf, int nb_sectors)
559 {
560     BlockDriver *drv = bs->drv;
561
562     if (!drv)
563         return -ENOMEDIUM;
564     if (bdrv_check_request(bs, sector_num, nb_sectors))
565         return -EIO;
566
567     if (drv->bdrv_pread) {
568         int ret, len;
569         len = nb_sectors * 512;
570         ret = drv->bdrv_pread(bs, sector_num * 512, buf, len);
571         if (ret < 0)
572             return ret;
573         else if (ret != len)
574             return -EINVAL;
575         else {
576             bs->rd_bytes += (unsigned) len;
577             bs->rd_ops ++;
578             return 0;
579         }
580     } else {
581         return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
582     }
583 }
584
585 /* Return < 0 if error. Important errors are:
586   -EIO         generic I/O error (may happen for all errors)
587   -ENOMEDIUM   No media inserted.
588   -EINVAL      Invalid sector number or nb_sectors
589   -EACCES      Trying to write a read-only device
590 */
591 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
592                const uint8_t *buf, int nb_sectors)
593 {
594     BlockDriver *drv = bs->drv;
595     if (!bs->drv)
596         return -ENOMEDIUM;
597     if (bs->read_only)
598         return -EACCES;
599     if (bdrv_check_request(bs, sector_num, nb_sectors))
600         return -EIO;
601
602     if (drv->bdrv_pwrite) {
603         int ret, len, count = 0;
604         len = nb_sectors * 512;
605         do {
606             ret = drv->bdrv_pwrite(bs, sector_num * 512, buf, len - count);
607             if (ret < 0) {
608                 printf("bdrv_write ret=%d\n", ret);
609                 return ret;
610             }
611             count += ret;
612             buf += ret;
613         } while (count != len);
614         bs->wr_bytes += (unsigned) len;
615         bs->wr_ops ++;
616         return 0;
617     }
618     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
619 }
620
621 static int bdrv_pread_em(BlockDriverState *bs, int64_t offset,
622                          uint8_t *buf, int count1)
623 {
624     uint8_t tmp_buf[SECTOR_SIZE];
625     int len, nb_sectors, count;
626     int64_t sector_num;
627
628     count = count1;
629     /* first read to align to sector start */
630     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
631     if (len > count)
632         len = count;
633     sector_num = offset >> SECTOR_BITS;
634     if (len > 0) {
635         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
636             return -EIO;
637         memcpy(buf, tmp_buf + (offset & (SECTOR_SIZE - 1)), len);
638         count -= len;
639         if (count == 0)
640             return count1;
641         sector_num++;
642         buf += len;
643     }
644
645     /* read the sectors "in place" */
646     nb_sectors = count >> SECTOR_BITS;
647     if (nb_sectors > 0) {
648         if (bdrv_read(bs, sector_num, buf, nb_sectors) < 0)
649             return -EIO;
650         sector_num += nb_sectors;
651         len = nb_sectors << SECTOR_BITS;
652         buf += len;
653         count -= len;
654     }
655
656     /* add data from the last sector */
657     if (count > 0) {
658         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
659             return -EIO;
660         memcpy(buf, tmp_buf, count);
661     }
662     return count1;
663 }
664
665 static int bdrv_pwrite_em(BlockDriverState *bs, int64_t offset,
666                           const uint8_t *buf, int count1)
667 {
668     uint8_t tmp_buf[SECTOR_SIZE];
669     int len, nb_sectors, count;
670     int64_t sector_num;
671
672     count = count1;
673     /* first write to align to sector start */
674     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
675     if (len > count)
676         len = count;
677     sector_num = offset >> SECTOR_BITS;
678     if (len > 0) {
679         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
680             return -EIO;
681         memcpy(tmp_buf + (offset & (SECTOR_SIZE - 1)), buf, len);
682         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
683             return -EIO;
684         count -= len;
685         if (count == 0)
686             return count1;
687         sector_num++;
688         buf += len;
689     }
690
691     /* write the sectors "in place" */
692     nb_sectors = count >> SECTOR_BITS;
693     if (nb_sectors > 0) {
694         if (bdrv_write(bs, sector_num, buf, nb_sectors) < 0)
695             return -EIO;
696         sector_num += nb_sectors;
697         len = nb_sectors << SECTOR_BITS;
698         buf += len;
699         count -= len;
700     }
701
702     /* add data from the last sector */
703     if (count > 0) {
704         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
705             return -EIO;
706         memcpy(tmp_buf, buf, count);
707         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
708             return -EIO;
709     }
710     return count1;
711 }
712
713 /**
714  * Read with byte offsets (needed only for file protocols)
715  */
716 int bdrv_pread(BlockDriverState *bs, int64_t offset,
717                void *buf1, int count1)
718 {
719     BlockDriver *drv = bs->drv;
720
721     if (!drv)
722         return -ENOMEDIUM;
723     if (bdrv_check_byte_request(bs, offset, count1))
724         return -EIO;
725
726     if (!drv->bdrv_pread)
727         return bdrv_pread_em(bs, offset, buf1, count1);
728     return drv->bdrv_pread(bs, offset, buf1, count1);
729 }
730
731 /**
732  * Write with byte offsets (needed only for file protocols)
733  */
734 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
735                 const void *buf1, int count1)
736 {
737     BlockDriver *drv = bs->drv;
738
739     if (!drv)
740         return -ENOMEDIUM;
741     if (bdrv_check_byte_request(bs, offset, count1))
742         return -EIO;
743
744     if (!drv->bdrv_pwrite)
745         return bdrv_pwrite_em(bs, offset, buf1, count1);
746     return drv->bdrv_pwrite(bs, offset, buf1, count1);
747 }
748
749 /**
750  * Truncate file to 'offset' bytes (needed only for file protocols)
751  */
752 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
753 {
754     BlockDriver *drv = bs->drv;
755     if (!drv)
756         return -ENOMEDIUM;
757     if (!drv->bdrv_truncate)
758         return -ENOTSUP;
759     return drv->bdrv_truncate(bs, offset);
760 }
761
762 /**
763  * Length of a file in bytes. Return < 0 if error or unknown.
764  */
765 int64_t bdrv_getlength(BlockDriverState *bs)
766 {
767     BlockDriver *drv = bs->drv;
768     if (!drv)
769         return -ENOMEDIUM;
770     if (!drv->bdrv_getlength) {
771         /* legacy mode */
772         return bs->total_sectors * SECTOR_SIZE;
773     }
774     return drv->bdrv_getlength(bs);
775 }
776
777 /* return 0 as number of sectors if no device present or error */
778 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
779 {
780     int64_t length;
781     length = bdrv_getlength(bs);
782     if (length < 0)
783         length = 0;
784     else
785         length = length >> SECTOR_BITS;
786     *nb_sectors_ptr = length;
787 }
788
789 struct partition {
790         uint8_t boot_ind;           /* 0x80 - active */
791         uint8_t head;               /* starting head */
792         uint8_t sector;             /* starting sector */
793         uint8_t cyl;                /* starting cylinder */
794         uint8_t sys_ind;            /* What partition type */
795         uint8_t end_head;           /* end head */
796         uint8_t end_sector;         /* end sector */
797         uint8_t end_cyl;            /* end cylinder */
798         uint32_t start_sect;        /* starting sector counting from 0 */
799         uint32_t nr_sects;          /* nr of sectors in partition */
800 } __attribute__((packed));
801
802 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
803 static int guess_disk_lchs(BlockDriverState *bs,
804                            int *pcylinders, int *pheads, int *psectors)
805 {
806     uint8_t buf[512];
807     int ret, i, heads, sectors, cylinders;
808     struct partition *p;
809     uint32_t nr_sects;
810     uint64_t nb_sectors;
811
812     bdrv_get_geometry(bs, &nb_sectors);
813
814     ret = bdrv_read(bs, 0, buf, 1);
815     if (ret < 0)
816         return -1;
817     /* test msdos magic */
818     if (buf[510] != 0x55 || buf[511] != 0xaa)
819         return -1;
820     for(i = 0; i < 4; i++) {
821         p = ((struct partition *)(buf + 0x1be)) + i;
822         nr_sects = le32_to_cpu(p->nr_sects);
823         if (nr_sects && p->end_head) {
824             /* We make the assumption that the partition terminates on
825                a cylinder boundary */
826             heads = p->end_head + 1;
827             sectors = p->end_sector & 63;
828             if (sectors == 0)
829                 continue;
830             cylinders = nb_sectors / (heads * sectors);
831             if (cylinders < 1 || cylinders > 16383)
832                 continue;
833             *pheads = heads;
834             *psectors = sectors;
835             *pcylinders = cylinders;
836 #if 0
837             printf("guessed geometry: LCHS=%d %d %d\n",
838                    cylinders, heads, sectors);
839 #endif
840             return 0;
841         }
842     }
843     return -1;
844 }
845
846 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
847 {
848     int translation, lba_detected = 0;
849     int cylinders, heads, secs;
850     uint64_t nb_sectors;
851
852     /* if a geometry hint is available, use it */
853     bdrv_get_geometry(bs, &nb_sectors);
854     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
855     translation = bdrv_get_translation_hint(bs);
856     if (cylinders != 0) {
857         *pcyls = cylinders;
858         *pheads = heads;
859         *psecs = secs;
860     } else {
861         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
862             if (heads > 16) {
863                 /* if heads > 16, it means that a BIOS LBA
864                    translation was active, so the default
865                    hardware geometry is OK */
866                 lba_detected = 1;
867                 goto default_geometry;
868             } else {
869                 *pcyls = cylinders;
870                 *pheads = heads;
871                 *psecs = secs;
872                 /* disable any translation to be in sync with
873                    the logical geometry */
874                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
875                     bdrv_set_translation_hint(bs,
876                                               BIOS_ATA_TRANSLATION_NONE);
877                 }
878             }
879         } else {
880         default_geometry:
881             /* if no geometry, use a standard physical disk geometry */
882             cylinders = nb_sectors / (16 * 63);
883
884             if (cylinders > 16383)
885                 cylinders = 16383;
886             else if (cylinders < 2)
887                 cylinders = 2;
888             *pcyls = cylinders;
889             *pheads = 16;
890             *psecs = 63;
891             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
892                 if ((*pcyls * *pheads) <= 131072) {
893                     bdrv_set_translation_hint(bs,
894                                               BIOS_ATA_TRANSLATION_LARGE);
895                 } else {
896                     bdrv_set_translation_hint(bs,
897                                               BIOS_ATA_TRANSLATION_LBA);
898                 }
899             }
900         }
901         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
902     }
903 }
904
905 void bdrv_set_geometry_hint(BlockDriverState *bs,
906                             int cyls, int heads, int secs)
907 {
908     bs->cyls = cyls;
909     bs->heads = heads;
910     bs->secs = secs;
911 }
912
913 void bdrv_set_type_hint(BlockDriverState *bs, int type)
914 {
915     bs->type = type;
916     bs->removable = ((type == BDRV_TYPE_CDROM ||
917                       type == BDRV_TYPE_FLOPPY));
918 }
919
920 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
921 {
922     bs->translation = translation;
923 }
924
925 void bdrv_get_geometry_hint(BlockDriverState *bs,
926                             int *pcyls, int *pheads, int *psecs)
927 {
928     *pcyls = bs->cyls;
929     *pheads = bs->heads;
930     *psecs = bs->secs;
931 }
932
933 int bdrv_get_type_hint(BlockDriverState *bs)
934 {
935     return bs->type;
936 }
937
938 int bdrv_get_translation_hint(BlockDriverState *bs)
939 {
940     return bs->translation;
941 }
942
943 int bdrv_is_removable(BlockDriverState *bs)
944 {
945     return bs->removable;
946 }
947
948 int bdrv_is_read_only(BlockDriverState *bs)
949 {
950     return bs->read_only;
951 }
952
953 int bdrv_is_sg(BlockDriverState *bs)
954 {
955     return bs->sg;
956 }
957
958 /* XXX: no longer used */
959 void bdrv_set_change_cb(BlockDriverState *bs,
960                         void (*change_cb)(void *opaque), void *opaque)
961 {
962     bs->change_cb = change_cb;
963     bs->change_opaque = opaque;
964 }
965
966 int bdrv_is_encrypted(BlockDriverState *bs)
967 {
968     if (bs->backing_hd && bs->backing_hd->encrypted)
969         return 1;
970     return bs->encrypted;
971 }
972
973 int bdrv_key_required(BlockDriverState *bs)
974 {
975     BlockDriverState *backing_hd = bs->backing_hd;
976
977     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
978         return 1;
979     return (bs->encrypted && !bs->valid_key);
980 }
981
982 int bdrv_set_key(BlockDriverState *bs, const char *key)
983 {
984     int ret;
985     if (bs->backing_hd && bs->backing_hd->encrypted) {
986         ret = bdrv_set_key(bs->backing_hd, key);
987         if (ret < 0)
988             return ret;
989         if (!bs->encrypted)
990             return 0;
991     }
992     if (!bs->encrypted || !bs->drv || !bs->drv->bdrv_set_key)
993         return -1;
994     ret = bs->drv->bdrv_set_key(bs, key);
995     if (ret < 0) {
996         bs->valid_key = 0;
997     } else if (!bs->valid_key) {
998         bs->valid_key = 1;
999         /* call the change callback now, we skipped it on open */
1000         bs->media_changed = 1;
1001         if (bs->change_cb)
1002             bs->change_cb(bs->change_opaque);
1003     }
1004     return ret;
1005 }
1006
1007 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1008 {
1009     if (!bs->drv) {
1010         buf[0] = '\0';
1011     } else {
1012         pstrcpy(buf, buf_size, bs->drv->format_name);
1013     }
1014 }
1015
1016 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1017                          void *opaque)
1018 {
1019     BlockDriver *drv;
1020
1021     for (drv = first_drv; drv != NULL; drv = drv->next) {
1022         it(opaque, drv->format_name);
1023     }
1024 }
1025
1026 BlockDriverState *bdrv_find(const char *name)
1027 {
1028     BlockDriverState *bs;
1029
1030     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1031         if (!strcmp(name, bs->device_name))
1032             return bs;
1033     }
1034     return NULL;
1035 }
1036
1037 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1038 {
1039     BlockDriverState *bs;
1040
1041     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1042         it(opaque, bs);
1043     }
1044 }
1045
1046 const char *bdrv_get_device_name(BlockDriverState *bs)
1047 {
1048     return bs->device_name;
1049 }
1050
1051 void bdrv_flush(BlockDriverState *bs)
1052 {
1053     if (bs->drv->bdrv_flush)
1054         bs->drv->bdrv_flush(bs);
1055     if (bs->backing_hd)
1056         bdrv_flush(bs->backing_hd);
1057 }
1058
1059 void bdrv_flush_all(void)
1060 {
1061     BlockDriverState *bs;
1062
1063     for (bs = bdrv_first; bs != NULL; bs = bs->next)
1064         if (bs->drv && !bdrv_is_read_only(bs) && 
1065             (!bdrv_is_removable(bs) || bdrv_is_inserted(bs)))
1066             bdrv_flush(bs);
1067 }
1068
1069 /*
1070  * Returns true iff the specified sector is present in the disk image. Drivers
1071  * not implementing the functionality are assumed to not support backing files,
1072  * hence all their sectors are reported as allocated.
1073  *
1074  * 'pnum' is set to the number of sectors (including and immediately following
1075  * the specified sector) that are known to be in the same
1076  * allocated/unallocated state.
1077  *
1078  * 'nb_sectors' is the max value 'pnum' should be set to.
1079  */
1080 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1081         int *pnum)
1082 {
1083     int64_t n;
1084     if (!bs->drv->bdrv_is_allocated) {
1085         if (sector_num >= bs->total_sectors) {
1086             *pnum = 0;
1087             return 0;
1088         }
1089         n = bs->total_sectors - sector_num;
1090         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1091         return 1;
1092     }
1093     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1094 }
1095
1096 void bdrv_info(Monitor *mon)
1097 {
1098     BlockDriverState *bs;
1099
1100     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1101         monitor_printf(mon, "%s:", bs->device_name);
1102         monitor_printf(mon, " type=");
1103         switch(bs->type) {
1104         case BDRV_TYPE_HD:
1105             monitor_printf(mon, "hd");
1106             break;
1107         case BDRV_TYPE_CDROM:
1108             monitor_printf(mon, "cdrom");
1109             break;
1110         case BDRV_TYPE_FLOPPY:
1111             monitor_printf(mon, "floppy");
1112             break;
1113         }
1114         monitor_printf(mon, " removable=%d", bs->removable);
1115         if (bs->removable) {
1116             monitor_printf(mon, " locked=%d", bs->locked);
1117         }
1118         if (bs->drv) {
1119             monitor_printf(mon, " file=");
1120             monitor_print_filename(mon, bs->filename);
1121             if (bs->backing_file[0] != '\0') {
1122                 monitor_printf(mon, " backing_file=");
1123                 monitor_print_filename(mon, bs->backing_file);
1124             }
1125             monitor_printf(mon, " ro=%d", bs->read_only);
1126             monitor_printf(mon, " drv=%s", bs->drv->format_name);
1127             monitor_printf(mon, " encrypted=%d", bdrv_is_encrypted(bs));
1128         } else {
1129             monitor_printf(mon, " [not inserted]");
1130         }
1131         monitor_printf(mon, "\n");
1132     }
1133 }
1134
1135 /* The "info blockstats" command. */
1136 void bdrv_info_stats(Monitor *mon)
1137 {
1138     BlockDriverState *bs;
1139     BlockDriverInfo bdi;
1140
1141     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1142         monitor_printf(mon, "%s:"
1143                        " rd_bytes=%" PRIu64
1144                        " wr_bytes=%" PRIu64
1145                        " rd_operations=%" PRIu64
1146                        " wr_operations=%" PRIu64
1147                        ,
1148                        bs->device_name,
1149                        bs->rd_bytes, bs->wr_bytes,
1150                        bs->rd_ops, bs->wr_ops);
1151         if (bdrv_get_info(bs, &bdi) == 0)
1152             monitor_printf(mon, " high=%" PRId64
1153                            " bytes_free=%" PRId64,
1154                            bdi.highest_alloc, bdi.num_free_bytes);
1155         monitor_printf(mon, "\n");
1156     }
1157 }
1158
1159 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1160 {
1161     if (bs->backing_hd && bs->backing_hd->encrypted)
1162         return bs->backing_file;
1163     else if (bs->encrypted)
1164         return bs->filename;
1165     else
1166         return NULL;
1167 }
1168
1169 void bdrv_get_backing_filename(BlockDriverState *bs,
1170                                char *filename, int filename_size)
1171 {
1172     if (!bs->backing_hd) {
1173         pstrcpy(filename, filename_size, "");
1174     } else {
1175         pstrcpy(filename, filename_size, bs->backing_file);
1176     }
1177 }
1178
1179 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1180                           const uint8_t *buf, int nb_sectors)
1181 {
1182     BlockDriver *drv = bs->drv;
1183     if (!drv)
1184         return -ENOMEDIUM;
1185     if (!drv->bdrv_write_compressed)
1186         return -ENOTSUP;
1187     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1188 }
1189
1190 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1191 {
1192     BlockDriver *drv = bs->drv;
1193     if (!drv)
1194         return -ENOMEDIUM;
1195     if (!drv->bdrv_get_info)
1196         return -ENOTSUP;
1197     memset(bdi, 0, sizeof(*bdi));
1198     return drv->bdrv_get_info(bs, bdi);
1199 }
1200
1201 /**************************************************************/
1202 /* handling of snapshots */
1203
1204 int bdrv_snapshot_create(BlockDriverState *bs,
1205                          QEMUSnapshotInfo *sn_info)
1206 {
1207     BlockDriver *drv = bs->drv;
1208     if (!drv)
1209         return -ENOMEDIUM;
1210     if (!drv->bdrv_snapshot_create)
1211         return -ENOTSUP;
1212     return drv->bdrv_snapshot_create(bs, sn_info);
1213 }
1214
1215 int bdrv_snapshot_goto(BlockDriverState *bs,
1216                        const char *snapshot_id)
1217 {
1218     BlockDriver *drv = bs->drv;
1219     if (!drv)
1220         return -ENOMEDIUM;
1221     if (!drv->bdrv_snapshot_goto)
1222         return -ENOTSUP;
1223     return drv->bdrv_snapshot_goto(bs, snapshot_id);
1224 }
1225
1226 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1227 {
1228     BlockDriver *drv = bs->drv;
1229     if (!drv)
1230         return -ENOMEDIUM;
1231     if (!drv->bdrv_snapshot_delete)
1232         return -ENOTSUP;
1233     return drv->bdrv_snapshot_delete(bs, snapshot_id);
1234 }
1235
1236 int bdrv_snapshot_list(BlockDriverState *bs,
1237                        QEMUSnapshotInfo **psn_info)
1238 {
1239     BlockDriver *drv = bs->drv;
1240     if (!drv)
1241         return -ENOMEDIUM;
1242     if (!drv->bdrv_snapshot_list)
1243         return -ENOTSUP;
1244     return drv->bdrv_snapshot_list(bs, psn_info);
1245 }
1246
1247 #define NB_SUFFIXES 4
1248
1249 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1250 {
1251     static const char suffixes[NB_SUFFIXES] = "KMGT";
1252     int64_t base;
1253     int i;
1254
1255     if (size <= 999) {
1256         snprintf(buf, buf_size, "%" PRId64, size);
1257     } else {
1258         base = 1024;
1259         for(i = 0; i < NB_SUFFIXES; i++) {
1260             if (size < (10 * base)) {
1261                 snprintf(buf, buf_size, "%0.1f%c",
1262                          (double)size / base,
1263                          suffixes[i]);
1264                 break;
1265             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1266                 snprintf(buf, buf_size, "%" PRId64 "%c",
1267                          ((size + (base >> 1)) / base),
1268                          suffixes[i]);
1269                 break;
1270             }
1271             base = base * 1024;
1272         }
1273     }
1274     return buf;
1275 }
1276
1277 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1278 {
1279     char buf1[128], date_buf[128], clock_buf[128];
1280 #ifdef _WIN32
1281     struct tm *ptm;
1282 #else
1283     struct tm tm;
1284 #endif
1285     time_t ti;
1286     int64_t secs;
1287
1288     if (!sn) {
1289         snprintf(buf, buf_size,
1290                  "%-10s%-20s%7s%20s%15s",
1291                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
1292     } else {
1293         ti = sn->date_sec;
1294 #ifdef _WIN32
1295         ptm = localtime(&ti);
1296         strftime(date_buf, sizeof(date_buf),
1297                  "%Y-%m-%d %H:%M:%S", ptm);
1298 #else
1299         localtime_r(&ti, &tm);
1300         strftime(date_buf, sizeof(date_buf),
1301                  "%Y-%m-%d %H:%M:%S", &tm);
1302 #endif
1303         secs = sn->vm_clock_nsec / 1000000000;
1304         snprintf(clock_buf, sizeof(clock_buf),
1305                  "%02d:%02d:%02d.%03d",
1306                  (int)(secs / 3600),
1307                  (int)((secs / 60) % 60),
1308                  (int)(secs % 60),
1309                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
1310         snprintf(buf, buf_size,
1311                  "%-10s%-20s%7s%20s%15s",
1312                  sn->id_str, sn->name,
1313                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
1314                  date_buf,
1315                  clock_buf);
1316     }
1317     return buf;
1318 }
1319
1320
1321 /**************************************************************/
1322 /* async I/Os */
1323
1324 typedef struct VectorTranslationState {
1325     QEMUIOVector *iov;
1326     uint8_t *bounce;
1327     int is_write;
1328     BlockDriverAIOCB *aiocb;
1329     BlockDriverAIOCB *this_aiocb;
1330 } VectorTranslationState;
1331
1332 static void bdrv_aio_rw_vector_cb(void *opaque, int ret)
1333 {
1334     VectorTranslationState *s = opaque;
1335
1336     if (!s->is_write) {
1337         qemu_iovec_from_buffer(s->iov, s->bounce, s->iov->size);
1338     }
1339     qemu_vfree(s->bounce);
1340     s->this_aiocb->cb(s->this_aiocb->opaque, ret);
1341     qemu_aio_release(s->this_aiocb);
1342 }
1343
1344 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
1345                                             int64_t sector_num,
1346                                             QEMUIOVector *iov,
1347                                             int nb_sectors,
1348                                             BlockDriverCompletionFunc *cb,
1349                                             void *opaque,
1350                                             int is_write)
1351
1352 {
1353     VectorTranslationState *s = qemu_mallocz(sizeof(*s));
1354     BlockDriverAIOCB *aiocb = qemu_aio_get(bs, cb, opaque);
1355
1356     s->this_aiocb = aiocb;
1357     s->iov = iov;
1358     s->bounce = qemu_memalign(512, nb_sectors * 512);
1359     s->is_write = is_write;
1360     if (is_write) {
1361         qemu_iovec_to_buffer(s->iov, s->bounce);
1362         s->aiocb = bdrv_aio_write(bs, sector_num, s->bounce, nb_sectors,
1363                                   bdrv_aio_rw_vector_cb, s);
1364     } else {
1365         s->aiocb = bdrv_aio_read(bs, sector_num, s->bounce, nb_sectors,
1366                                  bdrv_aio_rw_vector_cb, s);
1367     }
1368     return aiocb;
1369 }
1370
1371 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1372                                  QEMUIOVector *iov, int nb_sectors,
1373                                  BlockDriverCompletionFunc *cb, void *opaque)
1374 {
1375     if (bdrv_check_request(bs, sector_num, nb_sectors))
1376         return NULL;
1377
1378     return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
1379                               cb, opaque, 0);
1380 }
1381
1382 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1383                                   QEMUIOVector *iov, int nb_sectors,
1384                                   BlockDriverCompletionFunc *cb, void *opaque)
1385 {
1386     if (bdrv_check_request(bs, sector_num, nb_sectors))
1387         return NULL;
1388
1389     return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
1390                               cb, opaque, 1);
1391 }
1392
1393 BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
1394                                 uint8_t *buf, int nb_sectors,
1395                                 BlockDriverCompletionFunc *cb, void *opaque)
1396 {
1397     BlockDriver *drv = bs->drv;
1398     BlockDriverAIOCB *ret;
1399
1400     if (!drv)
1401         return NULL;
1402     if (bdrv_check_request(bs, sector_num, nb_sectors))
1403         return NULL;
1404
1405     ret = drv->bdrv_aio_read(bs, sector_num, buf, nb_sectors, cb, opaque);
1406
1407     if (ret) {
1408         /* Update stats even though technically transfer has not happened. */
1409         bs->rd_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1410         bs->rd_ops ++;
1411     }
1412
1413     return ret;
1414 }
1415
1416 BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
1417                                  const uint8_t *buf, int nb_sectors,
1418                                  BlockDriverCompletionFunc *cb, void *opaque)
1419 {
1420     BlockDriver *drv = bs->drv;
1421     BlockDriverAIOCB *ret;
1422
1423     if (!drv)
1424         return NULL;
1425     if (bs->read_only)
1426         return NULL;
1427     if (bdrv_check_request(bs, sector_num, nb_sectors))
1428         return NULL;
1429
1430     ret = drv->bdrv_aio_write(bs, sector_num, buf, nb_sectors, cb, opaque);
1431
1432     if (ret) {
1433         /* Update stats even though technically transfer has not happened. */
1434         bs->wr_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1435         bs->wr_ops ++;
1436     }
1437
1438     return ret;
1439 }
1440
1441 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
1442 {
1443     BlockDriver *drv = acb->bs->drv;
1444
1445     if (acb->cb == bdrv_aio_rw_vector_cb) {
1446         VectorTranslationState *s = acb->opaque;
1447         acb = s->aiocb;
1448     }
1449
1450     drv->bdrv_aio_cancel(acb);
1451 }
1452
1453
1454 /**************************************************************/
1455 /* async block device emulation */
1456
1457 static void bdrv_aio_bh_cb(void *opaque)
1458 {
1459     BlockDriverAIOCBSync *acb = opaque;
1460     acb->common.cb(acb->common.opaque, acb->ret);
1461     qemu_aio_release(acb);
1462 }
1463
1464 static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
1465         int64_t sector_num, uint8_t *buf, int nb_sectors,
1466         BlockDriverCompletionFunc *cb, void *opaque)
1467 {
1468     BlockDriverAIOCBSync *acb;
1469     int ret;
1470
1471     acb = qemu_aio_get(bs, cb, opaque);
1472     if (!acb->bh)
1473         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1474     ret = bdrv_read(bs, sector_num, buf, nb_sectors);
1475     acb->ret = ret;
1476     qemu_bh_schedule(acb->bh);
1477     return &acb->common;
1478 }
1479
1480 static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
1481         int64_t sector_num, const uint8_t *buf, int nb_sectors,
1482         BlockDriverCompletionFunc *cb, void *opaque)
1483 {
1484     BlockDriverAIOCBSync *acb;
1485     int ret;
1486
1487     acb = qemu_aio_get(bs, cb, opaque);
1488     if (!acb->bh)
1489         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1490     ret = bdrv_write(bs, sector_num, buf, nb_sectors);
1491     acb->ret = ret;
1492     qemu_bh_schedule(acb->bh);
1493     return &acb->common;
1494 }
1495
1496 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
1497 {
1498     BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
1499     qemu_bh_cancel(acb->bh);
1500     qemu_aio_release(acb);
1501 }
1502
1503 /**************************************************************/
1504 /* sync block device emulation */
1505
1506 static void bdrv_rw_em_cb(void *opaque, int ret)
1507 {
1508     *(int *)opaque = ret;
1509 }
1510
1511 #define NOT_DONE 0x7fffffff
1512
1513 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
1514                         uint8_t *buf, int nb_sectors)
1515 {
1516     int async_ret;
1517     BlockDriverAIOCB *acb;
1518
1519     async_ret = NOT_DONE;
1520     acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors,
1521                         bdrv_rw_em_cb, &async_ret);
1522     if (acb == NULL)
1523         return -1;
1524
1525     while (async_ret == NOT_DONE) {
1526         qemu_aio_wait();
1527     }
1528
1529     return async_ret;
1530 }
1531
1532 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
1533                          const uint8_t *buf, int nb_sectors)
1534 {
1535     int async_ret;
1536     BlockDriverAIOCB *acb;
1537
1538     async_ret = NOT_DONE;
1539     acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors,
1540                          bdrv_rw_em_cb, &async_ret);
1541     if (acb == NULL)
1542         return -1;
1543     while (async_ret == NOT_DONE) {
1544         qemu_aio_wait();
1545     }
1546     return async_ret;
1547 }
1548
1549 void bdrv_init(void)
1550 {
1551     bdrv_register(&bdrv_raw);
1552     bdrv_register(&bdrv_host_device);
1553 #ifndef _WIN32
1554     bdrv_register(&bdrv_cow);
1555 #endif
1556     bdrv_register(&bdrv_qcow);
1557     bdrv_register(&bdrv_vmdk);
1558     bdrv_register(&bdrv_cloop);
1559     bdrv_register(&bdrv_dmg);
1560     bdrv_register(&bdrv_bochs);
1561     bdrv_register(&bdrv_vpc);
1562     bdrv_register(&bdrv_vvfat);
1563     bdrv_register(&bdrv_qcow2);
1564     bdrv_register(&bdrv_parallels);
1565     bdrv_register(&bdrv_nbd);
1566 }
1567
1568 void *qemu_aio_get(BlockDriverState *bs, BlockDriverCompletionFunc *cb,
1569                    void *opaque)
1570 {
1571     BlockDriver *drv;
1572     BlockDriverAIOCB *acb;
1573
1574     drv = bs->drv;
1575     if (drv->free_aiocb) {
1576         acb = drv->free_aiocb;
1577         drv->free_aiocb = acb->next;
1578     } else {
1579         acb = qemu_mallocz(drv->aiocb_size);
1580     }
1581     acb->bs = bs;
1582     acb->cb = cb;
1583     acb->opaque = opaque;
1584     return acb;
1585 }
1586
1587 void qemu_aio_release(void *p)
1588 {
1589     BlockDriverAIOCB *acb = p;
1590     BlockDriver *drv = acb->bs->drv;
1591     acb->next = drv->free_aiocb;
1592     drv->free_aiocb = acb;
1593 }
1594
1595 /**************************************************************/
1596 /* removable device support */
1597
1598 /**
1599  * Return TRUE if the media is present
1600  */
1601 int bdrv_is_inserted(BlockDriverState *bs)
1602 {
1603     BlockDriver *drv = bs->drv;
1604     int ret;
1605     if (!drv)
1606         return 0;
1607     if (!drv->bdrv_is_inserted)
1608         return 1;
1609     ret = drv->bdrv_is_inserted(bs);
1610     return ret;
1611 }
1612
1613 /**
1614  * Return TRUE if the media changed since the last call to this
1615  * function. It is currently only used for floppy disks
1616  */
1617 int bdrv_media_changed(BlockDriverState *bs)
1618 {
1619     BlockDriver *drv = bs->drv;
1620     int ret;
1621
1622     if (!drv || !drv->bdrv_media_changed)
1623         ret = -ENOTSUP;
1624     else
1625         ret = drv->bdrv_media_changed(bs);
1626     if (ret == -ENOTSUP)
1627         ret = bs->media_changed;
1628     bs->media_changed = 0;
1629     return ret;
1630 }
1631
1632 /**
1633  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
1634  */
1635 void bdrv_eject(BlockDriverState *bs, int eject_flag)
1636 {
1637     BlockDriver *drv = bs->drv;
1638     int ret;
1639
1640     if (!drv || !drv->bdrv_eject) {
1641         ret = -ENOTSUP;
1642     } else {
1643         ret = drv->bdrv_eject(bs, eject_flag);
1644     }
1645     if (ret == -ENOTSUP) {
1646         if (eject_flag)
1647             bdrv_close(bs);
1648     }
1649 }
1650
1651 int bdrv_is_locked(BlockDriverState *bs)
1652 {
1653     return bs->locked;
1654 }
1655
1656 /**
1657  * Lock or unlock the media (if it is locked, the user won't be able
1658  * to eject it manually).
1659  */
1660 void bdrv_set_locked(BlockDriverState *bs, int locked)
1661 {
1662     BlockDriver *drv = bs->drv;
1663
1664     bs->locked = locked;
1665     if (drv && drv->bdrv_set_locked) {
1666         drv->bdrv_set_locked(bs, locked);
1667     }
1668 }
1669
1670 /* needed for generic scsi interface */
1671
1672 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1673 {
1674     BlockDriver *drv = bs->drv;
1675
1676     if (drv && drv->bdrv_ioctl)
1677         return drv->bdrv_ioctl(bs, req, buf);
1678     return -ENOTSUP;
1679 }