linux-user: initialize mmap_mutex properly
[qemu] / block.c
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #ifdef HOST_BSD
26 /* include native header before sys-queue.h */
27 #include <sys/queue.h>
28 #endif
29
30 #include "qemu-common.h"
31 #include "monitor.h"
32 #include "block_int.h"
33 #include "module.h"
34
35 #ifdef HOST_BSD
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <sys/ioctl.h>
39 #ifndef __DragonFly__
40 #include <sys/disk.h>
41 #endif
42 #endif
43
44 #ifdef _WIN32
45 #include <windows.h>
46 #endif
47
48 #define SECTOR_BITS 9
49 #define SECTOR_SIZE (1 << SECTOR_BITS)
50
51 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
52         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
53         BlockDriverCompletionFunc *cb, void *opaque);
54 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
55         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
56         BlockDriverCompletionFunc *cb, void *opaque);
57 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
58                         uint8_t *buf, int nb_sectors);
59 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
60                          const uint8_t *buf, int nb_sectors);
61
62 BlockDriverState *bdrv_first;
63
64 static BlockDriver *first_drv;
65
66 int path_is_absolute(const char *path)
67 {
68     const char *p;
69 #ifdef _WIN32
70     /* specific case for names like: "\\.\d:" */
71     if (*path == '/' || *path == '\\')
72         return 1;
73 #endif
74     p = strchr(path, ':');
75     if (p)
76         p++;
77     else
78         p = path;
79 #ifdef _WIN32
80     return (*p == '/' || *p == '\\');
81 #else
82     return (*p == '/');
83 #endif
84 }
85
86 /* if filename is absolute, just copy it to dest. Otherwise, build a
87    path to it by considering it is relative to base_path. URL are
88    supported. */
89 void path_combine(char *dest, int dest_size,
90                   const char *base_path,
91                   const char *filename)
92 {
93     const char *p, *p1;
94     int len;
95
96     if (dest_size <= 0)
97         return;
98     if (path_is_absolute(filename)) {
99         pstrcpy(dest, dest_size, filename);
100     } else {
101         p = strchr(base_path, ':');
102         if (p)
103             p++;
104         else
105             p = base_path;
106         p1 = strrchr(base_path, '/');
107 #ifdef _WIN32
108         {
109             const char *p2;
110             p2 = strrchr(base_path, '\\');
111             if (!p1 || p2 > p1)
112                 p1 = p2;
113         }
114 #endif
115         if (p1)
116             p1++;
117         else
118             p1 = base_path;
119         if (p1 > p)
120             p = p1;
121         len = p - base_path;
122         if (len > dest_size - 1)
123             len = dest_size - 1;
124         memcpy(dest, base_path, len);
125         dest[len] = '\0';
126         pstrcat(dest, dest_size, filename);
127     }
128 }
129
130 void bdrv_register(BlockDriver *bdrv)
131 {
132     if (!bdrv->bdrv_aio_readv) {
133         /* add AIO emulation layer */
134         bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
135         bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
136     } else if (!bdrv->bdrv_read) {
137         /* add synchronous IO emulation layer */
138         bdrv->bdrv_read = bdrv_read_em;
139         bdrv->bdrv_write = bdrv_write_em;
140     }
141     bdrv->next = first_drv;
142     first_drv = bdrv;
143 }
144
145 /* create a new block device (by default it is empty) */
146 BlockDriverState *bdrv_new(const char *device_name)
147 {
148     BlockDriverState **pbs, *bs;
149
150     bs = qemu_mallocz(sizeof(BlockDriverState));
151     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
152     if (device_name[0] != '\0') {
153         /* insert at the end */
154         pbs = &bdrv_first;
155         while (*pbs != NULL)
156             pbs = &(*pbs)->next;
157         *pbs = bs;
158     }
159     return bs;
160 }
161
162 BlockDriver *bdrv_find_format(const char *format_name)
163 {
164     BlockDriver *drv1;
165     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
166         if (!strcmp(drv1->format_name, format_name))
167             return drv1;
168     }
169     return NULL;
170 }
171
172 int bdrv_create(BlockDriver *drv, const char* filename,
173     QEMUOptionParameter *options)
174 {
175     if (!drv->bdrv_create)
176         return -ENOTSUP;
177
178     return drv->bdrv_create(filename, options);
179 }
180
181 #ifdef _WIN32
182 void get_tmp_filename(char *filename, int size)
183 {
184     char temp_dir[MAX_PATH];
185
186     GetTempPath(MAX_PATH, temp_dir);
187     GetTempFileName(temp_dir, "qem", 0, filename);
188 }
189 #else
190 void get_tmp_filename(char *filename, int size)
191 {
192     int fd;
193     const char *tmpdir;
194     /* XXX: race condition possible */
195     tmpdir = getenv("TMPDIR");
196     if (!tmpdir)
197         tmpdir = "/tmp";
198     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
199     fd = mkstemp(filename);
200     close(fd);
201 }
202 #endif
203
204 #ifdef _WIN32
205 static int is_windows_drive_prefix(const char *filename)
206 {
207     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
208              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
209             filename[1] == ':');
210 }
211
212 int is_windows_drive(const char *filename)
213 {
214     if (is_windows_drive_prefix(filename) &&
215         filename[2] == '\0')
216         return 1;
217     if (strstart(filename, "\\\\.\\", NULL) ||
218         strstart(filename, "//./", NULL))
219         return 1;
220     return 0;
221 }
222 #endif
223
224 static BlockDriver *find_protocol(const char *filename)
225 {
226     BlockDriver *drv1;
227     char protocol[128];
228     int len;
229     const char *p;
230
231 #ifdef _WIN32
232     if (is_windows_drive(filename) ||
233         is_windows_drive_prefix(filename))
234         return bdrv_find_format("raw");
235 #endif
236     p = strchr(filename, ':');
237     if (!p)
238         return bdrv_find_format("raw");
239     len = p - filename;
240     if (len > sizeof(protocol) - 1)
241         len = sizeof(protocol) - 1;
242     memcpy(protocol, filename, len);
243     protocol[len] = '\0';
244     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
245         if (drv1->protocol_name &&
246             !strcmp(drv1->protocol_name, protocol))
247             return drv1;
248     }
249     return NULL;
250 }
251
252 /*
253  * Detect host devices. By convention, /dev/cdrom[N] is always
254  * recognized as a host CDROM.
255  */
256 static BlockDriver *find_hdev_driver(const char *filename)
257 {
258     int score_max = 0, score;
259     BlockDriver *drv = NULL, *d;
260
261     for (d = first_drv; d; d = d->next) {
262         if (d->bdrv_probe_device) {
263             score = d->bdrv_probe_device(filename);
264             if (score > score_max) {
265                 score_max = score;
266                 drv = d;
267             }
268         }
269     }
270
271     return drv;
272 }
273
274 static BlockDriver *find_image_format(const char *filename)
275 {
276     int ret, score, score_max;
277     BlockDriver *drv1, *drv;
278     uint8_t buf[2048];
279     BlockDriverState *bs;
280
281     drv = find_protocol(filename);
282     /* no need to test disk image formats for vvfat */
283     if (drv && strcmp(drv->format_name, "vvfat") == 0)
284         return drv;
285
286     ret = bdrv_file_open(&bs, filename, BDRV_O_RDONLY);
287     if (ret < 0)
288         return NULL;
289     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
290     bdrv_delete(bs);
291     if (ret < 0) {
292         return NULL;
293     }
294
295     score_max = 0;
296     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
297         if (drv1->bdrv_probe) {
298             score = drv1->bdrv_probe(buf, ret, filename);
299             if (score > score_max) {
300                 score_max = score;
301                 drv = drv1;
302             }
303         }
304     }
305     return drv;
306 }
307
308 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
309 {
310     BlockDriverState *bs;
311     int ret;
312
313     bs = bdrv_new("");
314     ret = bdrv_open2(bs, filename, flags | BDRV_O_FILE, NULL);
315     if (ret < 0) {
316         bdrv_delete(bs);
317         return ret;
318     }
319     bs->growable = 1;
320     *pbs = bs;
321     return 0;
322 }
323
324 int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
325 {
326     return bdrv_open2(bs, filename, flags, NULL);
327 }
328
329 int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
330                BlockDriver *drv)
331 {
332     int ret, open_flags;
333     char tmp_filename[PATH_MAX];
334     char backing_filename[PATH_MAX];
335
336     bs->read_only = 0;
337     bs->is_temporary = 0;
338     bs->encrypted = 0;
339     bs->valid_key = 0;
340     /* buffer_alignment defaulted to 512, drivers can change this value */
341     bs->buffer_alignment = 512;
342
343     if (flags & BDRV_O_SNAPSHOT) {
344         BlockDriverState *bs1;
345         int64_t total_size;
346         int is_protocol = 0;
347         BlockDriver *bdrv_qcow2;
348         QEMUOptionParameter *options;
349
350         /* if snapshot, we create a temporary backing file and open it
351            instead of opening 'filename' directly */
352
353         /* if there is a backing file, use it */
354         bs1 = bdrv_new("");
355         ret = bdrv_open2(bs1, filename, 0, drv);
356         if (ret < 0) {
357             bdrv_delete(bs1);
358             return ret;
359         }
360         total_size = bdrv_getlength(bs1) >> SECTOR_BITS;
361
362         if (bs1->drv && bs1->drv->protocol_name)
363             is_protocol = 1;
364
365         bdrv_delete(bs1);
366
367         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
368
369         /* Real path is meaningless for protocols */
370         if (is_protocol)
371             snprintf(backing_filename, sizeof(backing_filename),
372                      "%s", filename);
373         else
374             realpath(filename, backing_filename);
375
376         bdrv_qcow2 = bdrv_find_format("qcow2");
377         options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
378
379         set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size * 512);
380         set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
381         if (drv) {
382             set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
383                 drv->format_name);
384         }
385
386         ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
387         if (ret < 0) {
388             return ret;
389         }
390
391         filename = tmp_filename;
392         drv = bdrv_qcow2;
393         bs->is_temporary = 1;
394     }
395
396     pstrcpy(bs->filename, sizeof(bs->filename), filename);
397     if (flags & BDRV_O_FILE) {
398         drv = find_protocol(filename);
399     } else if (!drv) {
400         drv = find_hdev_driver(filename);
401         if (!drv) {
402             drv = find_image_format(filename);
403         }
404     }
405     if (!drv) {
406         ret = -ENOENT;
407         goto unlink_and_fail;
408     }
409     bs->drv = drv;
410     bs->opaque = qemu_mallocz(drv->instance_size);
411     /* Note: for compatibility, we open disk image files as RDWR, and
412        RDONLY as fallback */
413     if (!(flags & BDRV_O_FILE))
414         open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK);
415     else
416         open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
417     ret = drv->bdrv_open(bs, filename, open_flags);
418     if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
419         ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
420         bs->read_only = 1;
421     }
422     if (ret < 0) {
423         qemu_free(bs->opaque);
424         bs->opaque = NULL;
425         bs->drv = NULL;
426     unlink_and_fail:
427         if (bs->is_temporary)
428             unlink(filename);
429         return ret;
430     }
431     if (drv->bdrv_getlength) {
432         bs->total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
433     }
434 #ifndef _WIN32
435     if (bs->is_temporary) {
436         unlink(filename);
437     }
438 #endif
439     if (bs->backing_file[0] != '\0') {
440         /* if there is a backing file, use it */
441         BlockDriver *back_drv = NULL;
442         bs->backing_hd = bdrv_new("");
443         path_combine(backing_filename, sizeof(backing_filename),
444                      filename, bs->backing_file);
445         if (bs->backing_format[0] != '\0')
446             back_drv = bdrv_find_format(bs->backing_format);
447         ret = bdrv_open2(bs->backing_hd, backing_filename, open_flags,
448                          back_drv);
449         if (ret < 0) {
450             bdrv_close(bs);
451             return ret;
452         }
453     }
454
455     if (!bdrv_key_required(bs)) {
456         /* call the change callback */
457         bs->media_changed = 1;
458         if (bs->change_cb)
459             bs->change_cb(bs->change_opaque);
460     }
461     return 0;
462 }
463
464 void bdrv_close(BlockDriverState *bs)
465 {
466     if (bs->drv) {
467         if (bs->backing_hd)
468             bdrv_delete(bs->backing_hd);
469         bs->drv->bdrv_close(bs);
470         qemu_free(bs->opaque);
471 #ifdef _WIN32
472         if (bs->is_temporary) {
473             unlink(bs->filename);
474         }
475 #endif
476         bs->opaque = NULL;
477         bs->drv = NULL;
478
479         /* call the change callback */
480         bs->media_changed = 1;
481         if (bs->change_cb)
482             bs->change_cb(bs->change_opaque);
483     }
484 }
485
486 void bdrv_delete(BlockDriverState *bs)
487 {
488     BlockDriverState **pbs;
489
490     pbs = &bdrv_first;
491     while (*pbs != bs && *pbs != NULL)
492         pbs = &(*pbs)->next;
493     if (*pbs == bs)
494         *pbs = bs->next;
495
496     bdrv_close(bs);
497     qemu_free(bs);
498 }
499
500 /*
501  * Run consistency checks on an image
502  *
503  * Returns the number of errors or -errno when an internal error occurs
504  */
505 int bdrv_check(BlockDriverState *bs)
506 {
507     if (bs->drv->bdrv_check == NULL) {
508         return -ENOTSUP;
509     }
510
511     return bs->drv->bdrv_check(bs);
512 }
513
514 /* commit COW file into the raw image */
515 int bdrv_commit(BlockDriverState *bs)
516 {
517     BlockDriver *drv = bs->drv;
518     int64_t i, total_sectors;
519     int n, j;
520     unsigned char sector[512];
521
522     if (!drv)
523         return -ENOMEDIUM;
524
525     if (bs->read_only) {
526         return -EACCES;
527     }
528
529     if (!bs->backing_hd) {
530         return -ENOTSUP;
531     }
532
533     total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
534     for (i = 0; i < total_sectors;) {
535         if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
536             for(j = 0; j < n; j++) {
537                 if (bdrv_read(bs, i, sector, 1) != 0) {
538                     return -EIO;
539                 }
540
541                 if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
542                     return -EIO;
543                 }
544                 i++;
545             }
546         } else {
547             i += n;
548         }
549     }
550
551     if (drv->bdrv_make_empty)
552         return drv->bdrv_make_empty(bs);
553
554     return 0;
555 }
556
557 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
558                                    size_t size)
559 {
560     int64_t len;
561
562     if (!bdrv_is_inserted(bs))
563         return -ENOMEDIUM;
564
565     if (bs->growable)
566         return 0;
567
568     len = bdrv_getlength(bs);
569
570     if (offset < 0)
571         return -EIO;
572
573     if ((offset > len) || (len - offset < size))
574         return -EIO;
575
576     return 0;
577 }
578
579 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
580                               int nb_sectors)
581 {
582     return bdrv_check_byte_request(bs, sector_num * 512, nb_sectors * 512);
583 }
584
585 /* return < 0 if error. See bdrv_write() for the return codes */
586 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
587               uint8_t *buf, int nb_sectors)
588 {
589     BlockDriver *drv = bs->drv;
590
591     if (!drv)
592         return -ENOMEDIUM;
593     if (bdrv_check_request(bs, sector_num, nb_sectors))
594         return -EIO;
595
596     return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
597 }
598
599 /* Return < 0 if error. Important errors are:
600   -EIO         generic I/O error (may happen for all errors)
601   -ENOMEDIUM   No media inserted.
602   -EINVAL      Invalid sector number or nb_sectors
603   -EACCES      Trying to write a read-only device
604 */
605 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
606                const uint8_t *buf, int nb_sectors)
607 {
608     BlockDriver *drv = bs->drv;
609     if (!bs->drv)
610         return -ENOMEDIUM;
611     if (bs->read_only)
612         return -EACCES;
613     if (bdrv_check_request(bs, sector_num, nb_sectors))
614         return -EIO;
615
616     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
617 }
618
619 int bdrv_pread(BlockDriverState *bs, int64_t offset,
620                void *buf, int count1)
621 {
622     uint8_t tmp_buf[SECTOR_SIZE];
623     int len, nb_sectors, count;
624     int64_t sector_num;
625
626     count = count1;
627     /* first read to align to sector start */
628     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
629     if (len > count)
630         len = count;
631     sector_num = offset >> SECTOR_BITS;
632     if (len > 0) {
633         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
634             return -EIO;
635         memcpy(buf, tmp_buf + (offset & (SECTOR_SIZE - 1)), len);
636         count -= len;
637         if (count == 0)
638             return count1;
639         sector_num++;
640         buf += len;
641     }
642
643     /* read the sectors "in place" */
644     nb_sectors = count >> SECTOR_BITS;
645     if (nb_sectors > 0) {
646         if (bdrv_read(bs, sector_num, buf, nb_sectors) < 0)
647             return -EIO;
648         sector_num += nb_sectors;
649         len = nb_sectors << SECTOR_BITS;
650         buf += len;
651         count -= len;
652     }
653
654     /* add data from the last sector */
655     if (count > 0) {
656         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
657             return -EIO;
658         memcpy(buf, tmp_buf, count);
659     }
660     return count1;
661 }
662
663 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
664                 const void *buf, int count1)
665 {
666     uint8_t tmp_buf[SECTOR_SIZE];
667     int len, nb_sectors, count;
668     int64_t sector_num;
669
670     count = count1;
671     /* first write to align to sector start */
672     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
673     if (len > count)
674         len = count;
675     sector_num = offset >> SECTOR_BITS;
676     if (len > 0) {
677         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
678             return -EIO;
679         memcpy(tmp_buf + (offset & (SECTOR_SIZE - 1)), buf, len);
680         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
681             return -EIO;
682         count -= len;
683         if (count == 0)
684             return count1;
685         sector_num++;
686         buf += len;
687     }
688
689     /* write the sectors "in place" */
690     nb_sectors = count >> SECTOR_BITS;
691     if (nb_sectors > 0) {
692         if (bdrv_write(bs, sector_num, buf, nb_sectors) < 0)
693             return -EIO;
694         sector_num += nb_sectors;
695         len = nb_sectors << SECTOR_BITS;
696         buf += len;
697         count -= len;
698     }
699
700     /* add data from the last sector */
701     if (count > 0) {
702         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
703             return -EIO;
704         memcpy(tmp_buf, buf, count);
705         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
706             return -EIO;
707     }
708     return count1;
709 }
710
711 /**
712  * Truncate file to 'offset' bytes (needed only for file protocols)
713  */
714 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
715 {
716     BlockDriver *drv = bs->drv;
717     if (!drv)
718         return -ENOMEDIUM;
719     if (!drv->bdrv_truncate)
720         return -ENOTSUP;
721     return drv->bdrv_truncate(bs, offset);
722 }
723
724 /**
725  * Length of a file in bytes. Return < 0 if error or unknown.
726  */
727 int64_t bdrv_getlength(BlockDriverState *bs)
728 {
729     BlockDriver *drv = bs->drv;
730     if (!drv)
731         return -ENOMEDIUM;
732     if (!drv->bdrv_getlength) {
733         /* legacy mode */
734         return bs->total_sectors * SECTOR_SIZE;
735     }
736     return drv->bdrv_getlength(bs);
737 }
738
739 /* return 0 as number of sectors if no device present or error */
740 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
741 {
742     int64_t length;
743     length = bdrv_getlength(bs);
744     if (length < 0)
745         length = 0;
746     else
747         length = length >> SECTOR_BITS;
748     *nb_sectors_ptr = length;
749 }
750
751 struct partition {
752         uint8_t boot_ind;           /* 0x80 - active */
753         uint8_t head;               /* starting head */
754         uint8_t sector;             /* starting sector */
755         uint8_t cyl;                /* starting cylinder */
756         uint8_t sys_ind;            /* What partition type */
757         uint8_t end_head;           /* end head */
758         uint8_t end_sector;         /* end sector */
759         uint8_t end_cyl;            /* end cylinder */
760         uint32_t start_sect;        /* starting sector counting from 0 */
761         uint32_t nr_sects;          /* nr of sectors in partition */
762 } __attribute__((packed));
763
764 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
765 static int guess_disk_lchs(BlockDriverState *bs,
766                            int *pcylinders, int *pheads, int *psectors)
767 {
768     uint8_t buf[512];
769     int ret, i, heads, sectors, cylinders;
770     struct partition *p;
771     uint32_t nr_sects;
772     uint64_t nb_sectors;
773
774     bdrv_get_geometry(bs, &nb_sectors);
775
776     ret = bdrv_read(bs, 0, buf, 1);
777     if (ret < 0)
778         return -1;
779     /* test msdos magic */
780     if (buf[510] != 0x55 || buf[511] != 0xaa)
781         return -1;
782     for(i = 0; i < 4; i++) {
783         p = ((struct partition *)(buf + 0x1be)) + i;
784         nr_sects = le32_to_cpu(p->nr_sects);
785         if (nr_sects && p->end_head) {
786             /* We make the assumption that the partition terminates on
787                a cylinder boundary */
788             heads = p->end_head + 1;
789             sectors = p->end_sector & 63;
790             if (sectors == 0)
791                 continue;
792             cylinders = nb_sectors / (heads * sectors);
793             if (cylinders < 1 || cylinders > 16383)
794                 continue;
795             *pheads = heads;
796             *psectors = sectors;
797             *pcylinders = cylinders;
798 #if 0
799             printf("guessed geometry: LCHS=%d %d %d\n",
800                    cylinders, heads, sectors);
801 #endif
802             return 0;
803         }
804     }
805     return -1;
806 }
807
808 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
809 {
810     int translation, lba_detected = 0;
811     int cylinders, heads, secs;
812     uint64_t nb_sectors;
813
814     /* if a geometry hint is available, use it */
815     bdrv_get_geometry(bs, &nb_sectors);
816     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
817     translation = bdrv_get_translation_hint(bs);
818     if (cylinders != 0) {
819         *pcyls = cylinders;
820         *pheads = heads;
821         *psecs = secs;
822     } else {
823         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
824             if (heads > 16) {
825                 /* if heads > 16, it means that a BIOS LBA
826                    translation was active, so the default
827                    hardware geometry is OK */
828                 lba_detected = 1;
829                 goto default_geometry;
830             } else {
831                 *pcyls = cylinders;
832                 *pheads = heads;
833                 *psecs = secs;
834                 /* disable any translation to be in sync with
835                    the logical geometry */
836                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
837                     bdrv_set_translation_hint(bs,
838                                               BIOS_ATA_TRANSLATION_NONE);
839                 }
840             }
841         } else {
842         default_geometry:
843             /* if no geometry, use a standard physical disk geometry */
844             cylinders = nb_sectors / (16 * 63);
845
846             if (cylinders > 16383)
847                 cylinders = 16383;
848             else if (cylinders < 2)
849                 cylinders = 2;
850             *pcyls = cylinders;
851             *pheads = 16;
852             *psecs = 63;
853             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
854                 if ((*pcyls * *pheads) <= 131072) {
855                     bdrv_set_translation_hint(bs,
856                                               BIOS_ATA_TRANSLATION_LARGE);
857                 } else {
858                     bdrv_set_translation_hint(bs,
859                                               BIOS_ATA_TRANSLATION_LBA);
860                 }
861             }
862         }
863         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
864     }
865 }
866
867 void bdrv_set_geometry_hint(BlockDriverState *bs,
868                             int cyls, int heads, int secs)
869 {
870     bs->cyls = cyls;
871     bs->heads = heads;
872     bs->secs = secs;
873 }
874
875 void bdrv_set_type_hint(BlockDriverState *bs, int type)
876 {
877     bs->type = type;
878     bs->removable = ((type == BDRV_TYPE_CDROM ||
879                       type == BDRV_TYPE_FLOPPY));
880 }
881
882 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
883 {
884     bs->translation = translation;
885 }
886
887 void bdrv_get_geometry_hint(BlockDriverState *bs,
888                             int *pcyls, int *pheads, int *psecs)
889 {
890     *pcyls = bs->cyls;
891     *pheads = bs->heads;
892     *psecs = bs->secs;
893 }
894
895 int bdrv_get_type_hint(BlockDriverState *bs)
896 {
897     return bs->type;
898 }
899
900 int bdrv_get_translation_hint(BlockDriverState *bs)
901 {
902     return bs->translation;
903 }
904
905 int bdrv_is_removable(BlockDriverState *bs)
906 {
907     return bs->removable;
908 }
909
910 int bdrv_is_read_only(BlockDriverState *bs)
911 {
912     return bs->read_only;
913 }
914
915 int bdrv_is_sg(BlockDriverState *bs)
916 {
917     return bs->sg;
918 }
919
920 /* XXX: no longer used */
921 void bdrv_set_change_cb(BlockDriverState *bs,
922                         void (*change_cb)(void *opaque), void *opaque)
923 {
924     bs->change_cb = change_cb;
925     bs->change_opaque = opaque;
926 }
927
928 int bdrv_is_encrypted(BlockDriverState *bs)
929 {
930     if (bs->backing_hd && bs->backing_hd->encrypted)
931         return 1;
932     return bs->encrypted;
933 }
934
935 int bdrv_key_required(BlockDriverState *bs)
936 {
937     BlockDriverState *backing_hd = bs->backing_hd;
938
939     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
940         return 1;
941     return (bs->encrypted && !bs->valid_key);
942 }
943
944 int bdrv_set_key(BlockDriverState *bs, const char *key)
945 {
946     int ret;
947     if (bs->backing_hd && bs->backing_hd->encrypted) {
948         ret = bdrv_set_key(bs->backing_hd, key);
949         if (ret < 0)
950             return ret;
951         if (!bs->encrypted)
952             return 0;
953     }
954     if (!bs->encrypted || !bs->drv || !bs->drv->bdrv_set_key)
955         return -1;
956     ret = bs->drv->bdrv_set_key(bs, key);
957     if (ret < 0) {
958         bs->valid_key = 0;
959     } else if (!bs->valid_key) {
960         bs->valid_key = 1;
961         /* call the change callback now, we skipped it on open */
962         bs->media_changed = 1;
963         if (bs->change_cb)
964             bs->change_cb(bs->change_opaque);
965     }
966     return ret;
967 }
968
969 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
970 {
971     if (!bs->drv) {
972         buf[0] = '\0';
973     } else {
974         pstrcpy(buf, buf_size, bs->drv->format_name);
975     }
976 }
977
978 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
979                          void *opaque)
980 {
981     BlockDriver *drv;
982
983     for (drv = first_drv; drv != NULL; drv = drv->next) {
984         it(opaque, drv->format_name);
985     }
986 }
987
988 BlockDriverState *bdrv_find(const char *name)
989 {
990     BlockDriverState *bs;
991
992     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
993         if (!strcmp(name, bs->device_name))
994             return bs;
995     }
996     return NULL;
997 }
998
999 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1000 {
1001     BlockDriverState *bs;
1002
1003     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1004         it(opaque, bs);
1005     }
1006 }
1007
1008 const char *bdrv_get_device_name(BlockDriverState *bs)
1009 {
1010     return bs->device_name;
1011 }
1012
1013 void bdrv_flush(BlockDriverState *bs)
1014 {
1015     if (!bs->drv)
1016         return;
1017     if (bs->drv->bdrv_flush)
1018         bs->drv->bdrv_flush(bs);
1019     if (bs->backing_hd)
1020         bdrv_flush(bs->backing_hd);
1021 }
1022
1023 void bdrv_flush_all(void)
1024 {
1025     BlockDriverState *bs;
1026
1027     for (bs = bdrv_first; bs != NULL; bs = bs->next)
1028         if (bs->drv && !bdrv_is_read_only(bs) && 
1029             (!bdrv_is_removable(bs) || bdrv_is_inserted(bs)))
1030             bdrv_flush(bs);
1031 }
1032
1033 /*
1034  * Returns true iff the specified sector is present in the disk image. Drivers
1035  * not implementing the functionality are assumed to not support backing files,
1036  * hence all their sectors are reported as allocated.
1037  *
1038  * 'pnum' is set to the number of sectors (including and immediately following
1039  * the specified sector) that are known to be in the same
1040  * allocated/unallocated state.
1041  *
1042  * 'nb_sectors' is the max value 'pnum' should be set to.
1043  */
1044 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1045         int *pnum)
1046 {
1047     int64_t n;
1048     if (!bs->drv->bdrv_is_allocated) {
1049         if (sector_num >= bs->total_sectors) {
1050             *pnum = 0;
1051             return 0;
1052         }
1053         n = bs->total_sectors - sector_num;
1054         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1055         return 1;
1056     }
1057     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1058 }
1059
1060 void bdrv_info(Monitor *mon)
1061 {
1062     BlockDriverState *bs;
1063
1064     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1065         monitor_printf(mon, "%s:", bs->device_name);
1066         monitor_printf(mon, " type=");
1067         switch(bs->type) {
1068         case BDRV_TYPE_HD:
1069             monitor_printf(mon, "hd");
1070             break;
1071         case BDRV_TYPE_CDROM:
1072             monitor_printf(mon, "cdrom");
1073             break;
1074         case BDRV_TYPE_FLOPPY:
1075             monitor_printf(mon, "floppy");
1076             break;
1077         }
1078         monitor_printf(mon, " removable=%d", bs->removable);
1079         if (bs->removable) {
1080             monitor_printf(mon, " locked=%d", bs->locked);
1081         }
1082         if (bs->drv) {
1083             monitor_printf(mon, " file=");
1084             monitor_print_filename(mon, bs->filename);
1085             if (bs->backing_file[0] != '\0') {
1086                 monitor_printf(mon, " backing_file=");
1087                 monitor_print_filename(mon, bs->backing_file);
1088             }
1089             monitor_printf(mon, " ro=%d", bs->read_only);
1090             monitor_printf(mon, " drv=%s", bs->drv->format_name);
1091             monitor_printf(mon, " encrypted=%d", bdrv_is_encrypted(bs));
1092         } else {
1093             monitor_printf(mon, " [not inserted]");
1094         }
1095         monitor_printf(mon, "\n");
1096     }
1097 }
1098
1099 /* The "info blockstats" command. */
1100 void bdrv_info_stats(Monitor *mon)
1101 {
1102     BlockDriverState *bs;
1103
1104     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1105         monitor_printf(mon, "%s:"
1106                        " rd_bytes=%" PRIu64
1107                        " wr_bytes=%" PRIu64
1108                        " rd_operations=%" PRIu64
1109                        " wr_operations=%" PRIu64
1110                        "\n",
1111                        bs->device_name,
1112                        bs->rd_bytes, bs->wr_bytes,
1113                        bs->rd_ops, bs->wr_ops);
1114     }
1115 }
1116
1117 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1118 {
1119     if (bs->backing_hd && bs->backing_hd->encrypted)
1120         return bs->backing_file;
1121     else if (bs->encrypted)
1122         return bs->filename;
1123     else
1124         return NULL;
1125 }
1126
1127 void bdrv_get_backing_filename(BlockDriverState *bs,
1128                                char *filename, int filename_size)
1129 {
1130     if (!bs->backing_hd) {
1131         pstrcpy(filename, filename_size, "");
1132     } else {
1133         pstrcpy(filename, filename_size, bs->backing_file);
1134     }
1135 }
1136
1137 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1138                           const uint8_t *buf, int nb_sectors)
1139 {
1140     BlockDriver *drv = bs->drv;
1141     if (!drv)
1142         return -ENOMEDIUM;
1143     if (!drv->bdrv_write_compressed)
1144         return -ENOTSUP;
1145     if (bdrv_check_request(bs, sector_num, nb_sectors))
1146         return -EIO;
1147     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1148 }
1149
1150 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1151 {
1152     BlockDriver *drv = bs->drv;
1153     if (!drv)
1154         return -ENOMEDIUM;
1155     if (!drv->bdrv_get_info)
1156         return -ENOTSUP;
1157     memset(bdi, 0, sizeof(*bdi));
1158     return drv->bdrv_get_info(bs, bdi);
1159 }
1160
1161 int bdrv_put_buffer(BlockDriverState *bs, const uint8_t *buf, int64_t pos, int size)
1162 {
1163     BlockDriver *drv = bs->drv;
1164     if (!drv)
1165         return -ENOMEDIUM;
1166     if (!drv->bdrv_put_buffer)
1167         return -ENOTSUP;
1168     return drv->bdrv_put_buffer(bs, buf, pos, size);
1169 }
1170
1171 int bdrv_get_buffer(BlockDriverState *bs, uint8_t *buf, int64_t pos, int size)
1172 {
1173     BlockDriver *drv = bs->drv;
1174     if (!drv)
1175         return -ENOMEDIUM;
1176     if (!drv->bdrv_get_buffer)
1177         return -ENOTSUP;
1178     return drv->bdrv_get_buffer(bs, buf, pos, size);
1179 }
1180
1181 /**************************************************************/
1182 /* handling of snapshots */
1183
1184 int bdrv_snapshot_create(BlockDriverState *bs,
1185                          QEMUSnapshotInfo *sn_info)
1186 {
1187     BlockDriver *drv = bs->drv;
1188     if (!drv)
1189         return -ENOMEDIUM;
1190     if (!drv->bdrv_snapshot_create)
1191         return -ENOTSUP;
1192     return drv->bdrv_snapshot_create(bs, sn_info);
1193 }
1194
1195 int bdrv_snapshot_goto(BlockDriverState *bs,
1196                        const char *snapshot_id)
1197 {
1198     BlockDriver *drv = bs->drv;
1199     if (!drv)
1200         return -ENOMEDIUM;
1201     if (!drv->bdrv_snapshot_goto)
1202         return -ENOTSUP;
1203     return drv->bdrv_snapshot_goto(bs, snapshot_id);
1204 }
1205
1206 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1207 {
1208     BlockDriver *drv = bs->drv;
1209     if (!drv)
1210         return -ENOMEDIUM;
1211     if (!drv->bdrv_snapshot_delete)
1212         return -ENOTSUP;
1213     return drv->bdrv_snapshot_delete(bs, snapshot_id);
1214 }
1215
1216 int bdrv_snapshot_list(BlockDriverState *bs,
1217                        QEMUSnapshotInfo **psn_info)
1218 {
1219     BlockDriver *drv = bs->drv;
1220     if (!drv)
1221         return -ENOMEDIUM;
1222     if (!drv->bdrv_snapshot_list)
1223         return -ENOTSUP;
1224     return drv->bdrv_snapshot_list(bs, psn_info);
1225 }
1226
1227 #define NB_SUFFIXES 4
1228
1229 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1230 {
1231     static const char suffixes[NB_SUFFIXES] = "KMGT";
1232     int64_t base;
1233     int i;
1234
1235     if (size <= 999) {
1236         snprintf(buf, buf_size, "%" PRId64, size);
1237     } else {
1238         base = 1024;
1239         for(i = 0; i < NB_SUFFIXES; i++) {
1240             if (size < (10 * base)) {
1241                 snprintf(buf, buf_size, "%0.1f%c",
1242                          (double)size / base,
1243                          suffixes[i]);
1244                 break;
1245             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1246                 snprintf(buf, buf_size, "%" PRId64 "%c",
1247                          ((size + (base >> 1)) / base),
1248                          suffixes[i]);
1249                 break;
1250             }
1251             base = base * 1024;
1252         }
1253     }
1254     return buf;
1255 }
1256
1257 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1258 {
1259     char buf1[128], date_buf[128], clock_buf[128];
1260 #ifdef _WIN32
1261     struct tm *ptm;
1262 #else
1263     struct tm tm;
1264 #endif
1265     time_t ti;
1266     int64_t secs;
1267
1268     if (!sn) {
1269         snprintf(buf, buf_size,
1270                  "%-10s%-20s%7s%20s%15s",
1271                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
1272     } else {
1273         ti = sn->date_sec;
1274 #ifdef _WIN32
1275         ptm = localtime(&ti);
1276         strftime(date_buf, sizeof(date_buf),
1277                  "%Y-%m-%d %H:%M:%S", ptm);
1278 #else
1279         localtime_r(&ti, &tm);
1280         strftime(date_buf, sizeof(date_buf),
1281                  "%Y-%m-%d %H:%M:%S", &tm);
1282 #endif
1283         secs = sn->vm_clock_nsec / 1000000000;
1284         snprintf(clock_buf, sizeof(clock_buf),
1285                  "%02d:%02d:%02d.%03d",
1286                  (int)(secs / 3600),
1287                  (int)((secs / 60) % 60),
1288                  (int)(secs % 60),
1289                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
1290         snprintf(buf, buf_size,
1291                  "%-10s%-20s%7s%20s%15s",
1292                  sn->id_str, sn->name,
1293                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
1294                  date_buf,
1295                  clock_buf);
1296     }
1297     return buf;
1298 }
1299
1300
1301 /**************************************************************/
1302 /* async I/Os */
1303
1304 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1305                                  QEMUIOVector *qiov, int nb_sectors,
1306                                  BlockDriverCompletionFunc *cb, void *opaque)
1307 {
1308     BlockDriver *drv = bs->drv;
1309     BlockDriverAIOCB *ret;
1310
1311     if (!drv)
1312         return NULL;
1313     if (bdrv_check_request(bs, sector_num, nb_sectors))
1314         return NULL;
1315
1316     ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
1317                               cb, opaque);
1318
1319     if (ret) {
1320         /* Update stats even though technically transfer has not happened. */
1321         bs->rd_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1322         bs->rd_ops ++;
1323     }
1324
1325     return ret;
1326 }
1327
1328 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1329                                   QEMUIOVector *qiov, int nb_sectors,
1330                                   BlockDriverCompletionFunc *cb, void *opaque)
1331 {
1332     BlockDriver *drv = bs->drv;
1333     BlockDriverAIOCB *ret;
1334
1335     if (!drv)
1336         return NULL;
1337     if (bs->read_only)
1338         return NULL;
1339     if (bdrv_check_request(bs, sector_num, nb_sectors))
1340         return NULL;
1341
1342     ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
1343                                cb, opaque);
1344
1345     if (ret) {
1346         /* Update stats even though technically transfer has not happened. */
1347         bs->wr_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1348         bs->wr_ops ++;
1349     }
1350
1351     return ret;
1352 }
1353
1354 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
1355 {
1356     acb->pool->cancel(acb);
1357 }
1358
1359
1360 /**************************************************************/
1361 /* async block device emulation */
1362
1363 typedef struct BlockDriverAIOCBSync {
1364     BlockDriverAIOCB common;
1365     QEMUBH *bh;
1366     int ret;
1367     /* vector translation state */
1368     QEMUIOVector *qiov;
1369     uint8_t *bounce;
1370     int is_write;
1371 } BlockDriverAIOCBSync;
1372
1373 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
1374 {
1375     BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
1376     qemu_bh_cancel(acb->bh);
1377     qemu_aio_release(acb);
1378 }
1379
1380 static AIOPool bdrv_em_aio_pool = {
1381     .aiocb_size         = sizeof(BlockDriverAIOCBSync),
1382     .cancel             = bdrv_aio_cancel_em,
1383 };
1384
1385 static void bdrv_aio_bh_cb(void *opaque)
1386 {
1387     BlockDriverAIOCBSync *acb = opaque;
1388
1389     if (!acb->is_write)
1390         qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
1391     qemu_vfree(acb->bounce);
1392     acb->common.cb(acb->common.opaque, acb->ret);
1393
1394     qemu_aio_release(acb);
1395 }
1396
1397 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
1398                                             int64_t sector_num,
1399                                             QEMUIOVector *qiov,
1400                                             int nb_sectors,
1401                                             BlockDriverCompletionFunc *cb,
1402                                             void *opaque,
1403                                             int is_write)
1404
1405 {
1406     BlockDriverAIOCBSync *acb;
1407
1408     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
1409     acb->is_write = is_write;
1410     acb->qiov = qiov;
1411     acb->bounce = qemu_blockalign(bs, qiov->size);
1412
1413     if (!acb->bh)
1414         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1415
1416     if (is_write) {
1417         qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1418         acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
1419     } else {
1420         acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
1421     }
1422
1423     qemu_bh_schedule(acb->bh);
1424
1425     return &acb->common;
1426 }
1427
1428 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
1429         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1430         BlockDriverCompletionFunc *cb, void *opaque)
1431 {
1432     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
1433 }
1434
1435 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
1436         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1437         BlockDriverCompletionFunc *cb, void *opaque)
1438 {
1439     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
1440 }
1441
1442 /**************************************************************/
1443 /* sync block device emulation */
1444
1445 static void bdrv_rw_em_cb(void *opaque, int ret)
1446 {
1447     *(int *)opaque = ret;
1448 }
1449
1450 #define NOT_DONE 0x7fffffff
1451
1452 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
1453                         uint8_t *buf, int nb_sectors)
1454 {
1455     int async_ret;
1456     BlockDriverAIOCB *acb;
1457     struct iovec iov;
1458     QEMUIOVector qiov;
1459
1460     async_ret = NOT_DONE;
1461     iov.iov_base = (void *)buf;
1462     iov.iov_len = nb_sectors * 512;
1463     qemu_iovec_init_external(&qiov, &iov, 1);
1464     acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
1465         bdrv_rw_em_cb, &async_ret);
1466     if (acb == NULL)
1467         return -1;
1468
1469     while (async_ret == NOT_DONE) {
1470         qemu_aio_wait();
1471     }
1472
1473     return async_ret;
1474 }
1475
1476 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
1477                          const uint8_t *buf, int nb_sectors)
1478 {
1479     int async_ret;
1480     BlockDriverAIOCB *acb;
1481     struct iovec iov;
1482     QEMUIOVector qiov;
1483
1484     async_ret = NOT_DONE;
1485     iov.iov_base = (void *)buf;
1486     iov.iov_len = nb_sectors * 512;
1487     qemu_iovec_init_external(&qiov, &iov, 1);
1488     acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
1489         bdrv_rw_em_cb, &async_ret);
1490     if (acb == NULL)
1491         return -1;
1492     while (async_ret == NOT_DONE) {
1493         qemu_aio_wait();
1494     }
1495     return async_ret;
1496 }
1497
1498 void bdrv_init(void)
1499 {
1500     module_call_init(MODULE_INIT_BLOCK);
1501 }
1502
1503 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
1504                    BlockDriverCompletionFunc *cb, void *opaque)
1505 {
1506     BlockDriverAIOCB *acb;
1507
1508     if (pool->free_aiocb) {
1509         acb = pool->free_aiocb;
1510         pool->free_aiocb = acb->next;
1511     } else {
1512         acb = qemu_mallocz(pool->aiocb_size);
1513         acb->pool = pool;
1514     }
1515     acb->bs = bs;
1516     acb->cb = cb;
1517     acb->opaque = opaque;
1518     return acb;
1519 }
1520
1521 void qemu_aio_release(void *p)
1522 {
1523     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
1524     AIOPool *pool = acb->pool;
1525     acb->next = pool->free_aiocb;
1526     pool->free_aiocb = acb;
1527 }
1528
1529 /**************************************************************/
1530 /* removable device support */
1531
1532 /**
1533  * Return TRUE if the media is present
1534  */
1535 int bdrv_is_inserted(BlockDriverState *bs)
1536 {
1537     BlockDriver *drv = bs->drv;
1538     int ret;
1539     if (!drv)
1540         return 0;
1541     if (!drv->bdrv_is_inserted)
1542         return 1;
1543     ret = drv->bdrv_is_inserted(bs);
1544     return ret;
1545 }
1546
1547 /**
1548  * Return TRUE if the media changed since the last call to this
1549  * function. It is currently only used for floppy disks
1550  */
1551 int bdrv_media_changed(BlockDriverState *bs)
1552 {
1553     BlockDriver *drv = bs->drv;
1554     int ret;
1555
1556     if (!drv || !drv->bdrv_media_changed)
1557         ret = -ENOTSUP;
1558     else
1559         ret = drv->bdrv_media_changed(bs);
1560     if (ret == -ENOTSUP)
1561         ret = bs->media_changed;
1562     bs->media_changed = 0;
1563     return ret;
1564 }
1565
1566 /**
1567  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
1568  */
1569 void bdrv_eject(BlockDriverState *bs, int eject_flag)
1570 {
1571     BlockDriver *drv = bs->drv;
1572     int ret;
1573
1574     if (!drv || !drv->bdrv_eject) {
1575         ret = -ENOTSUP;
1576     } else {
1577         ret = drv->bdrv_eject(bs, eject_flag);
1578     }
1579     if (ret == -ENOTSUP) {
1580         if (eject_flag)
1581             bdrv_close(bs);
1582     }
1583 }
1584
1585 int bdrv_is_locked(BlockDriverState *bs)
1586 {
1587     return bs->locked;
1588 }
1589
1590 /**
1591  * Lock or unlock the media (if it is locked, the user won't be able
1592  * to eject it manually).
1593  */
1594 void bdrv_set_locked(BlockDriverState *bs, int locked)
1595 {
1596     BlockDriver *drv = bs->drv;
1597
1598     bs->locked = locked;
1599     if (drv && drv->bdrv_set_locked) {
1600         drv->bdrv_set_locked(bs, locked);
1601     }
1602 }
1603
1604 /* needed for generic scsi interface */
1605
1606 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1607 {
1608     BlockDriver *drv = bs->drv;
1609
1610     if (drv && drv->bdrv_ioctl)
1611         return drv->bdrv_ioctl(bs, req, buf);
1612     return -ENOTSUP;
1613 }
1614
1615 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
1616         unsigned long int req, void *buf,
1617         BlockDriverCompletionFunc *cb, void *opaque)
1618 {
1619     BlockDriver *drv = bs->drv;
1620
1621     if (drv && drv->bdrv_aio_ioctl)
1622         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
1623     return NULL;
1624 }
1625
1626 void *qemu_blockalign(BlockDriverState *bs, size_t size)
1627 {
1628     return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
1629 }