fix raw_aio_remove (Stefano Stabellini)
[qemu] / block.c
diff --git a/block.c b/block.c
index 0730954..4f4bf7c 100644 (file)
--- a/block.c
+++ b/block.c
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
+#include "config-host.h"
+#ifdef _BSD
+/* include native header before sys-queue.h */
+#include <sys/queue.h>
+#endif
+
 #include "qemu-common.h"
-#ifndef QEMU_IMG
 #include "console.h"
-#endif
 #include "block_int.h"
 
 #ifdef _BSD
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>
-#include <sys/queue.h>
 #include <sys/disk.h>
 #endif
 
@@ -57,6 +60,7 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
                          const uint8_t *buf, int nb_sectors);
 
 BlockDriverState *bdrv_first;
+
 static BlockDriver *first_drv;
 
 int path_is_absolute(const char *path)
@@ -147,8 +151,6 @@ BlockDriverState *bdrv_new(const char *device_name)
     BlockDriverState **pbs, *bs;
 
     bs = qemu_mallocz(sizeof(BlockDriverState));
-    if(!bs)
-        return NULL;
     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
     if (device_name[0] != '\0') {
         /* insert at the end */
@@ -191,7 +193,7 @@ void get_tmp_filename(char *filename, int size)
 void get_tmp_filename(char *filename, int size)
 {
     int fd;
-    char *tmpdir;
+    const char *tmpdir;
     /* XXX: race condition possible */
     tmpdir = getenv("TMPDIR");
     if (!tmpdir)
@@ -339,6 +341,7 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
     if (flags & BDRV_O_SNAPSHOT) {
         BlockDriverState *bs1;
         int64_t total_size;
+        int is_protocol = 0;
 
         /* if snapshot, we create a temporary backing file and open it
            instead of opening 'filename' directly */
@@ -353,10 +356,21 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
             return -1;
         }
         total_size = bdrv_getlength(bs1) >> SECTOR_BITS;
+
+        if (bs1->drv && bs1->drv->protocol_name)
+            is_protocol = 1;
+
         bdrv_delete(bs1);
 
         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
-        realpath(filename, backing_filename);
+
+        /* Real path is meaningless for protocols */
+        if (is_protocol)
+            snprintf(backing_filename, sizeof(backing_filename),
+                     "%s", filename);
+        else
+            realpath(filename, backing_filename);
+
         if (bdrv_create(&bdrv_qcow2, tmp_filename,
                         total_size, backing_filename, 0) < 0) {
             return -1;
@@ -379,17 +393,15 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
     }
     bs->drv = drv;
     bs->opaque = qemu_mallocz(drv->instance_size);
-    if (bs->opaque == NULL && drv->instance_size > 0)
-        return -1;
     /* Note: for compatibility, we open disk image files as RDWR, and
        RDONLY as fallback */
     if (!(flags & BDRV_O_FILE))
-        open_flags = BDRV_O_RDWR | (flags & BDRV_O_DIRECT);
+        open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK);
     else
         open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
     ret = drv->bdrv_open(bs, filename, open_flags);
-    if (ret == -EACCES && !(flags & BDRV_O_FILE)) {
-        ret = drv->bdrv_open(bs, filename, BDRV_O_RDONLY);
+    if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
+        ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
         bs->read_only = 1;
     }
     if (ret < 0) {
@@ -416,7 +428,7 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
         }
         path_combine(backing_filename, sizeof(backing_filename),
                      filename, bs->backing_file);
-        if (bdrv_open(bs->backing_hd, backing_filename, 0) < 0)
+        if (bdrv_open(bs->backing_hd, backing_filename, open_flags) < 0)
             goto fail;
     }
 
@@ -452,7 +464,14 @@ void bdrv_close(BlockDriverState *bs)
 
 void bdrv_delete(BlockDriverState *bs)
 {
-    /* XXX: remove the driver list */
+    BlockDriverState **pbs;
+
+    pbs = &bdrv_first;
+    while (*pbs != bs && *pbs != NULL)
+        pbs = &(*pbs)->next;
+    if (*pbs == bs)
+        *pbs = bs->next;
+
     bdrv_close(bs);
     qemu_free(bs);
 }
@@ -509,14 +528,6 @@ int bdrv_read(BlockDriverState *bs, int64_t sector_num,
     if (!drv)
         return -ENOMEDIUM;
 
-    if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
-            memcpy(buf, bs->boot_sector_data, 512);
-        sector_num++;
-        nb_sectors--;
-        buf += 512;
-        if (nb_sectors == 0)
-            return 0;
-    }
     if (drv->bdrv_pread) {
         int ret, len;
         len = nb_sectors * 512;
@@ -549,25 +560,23 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num,
         return -ENOMEDIUM;
     if (bs->read_only)
         return -EACCES;
-    if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
-        memcpy(bs->boot_sector_data, buf, 512);
-    }
     if (drv->bdrv_pwrite) {
-        int ret, len;
+        int ret, len, count = 0;
         len = nb_sectors * 512;
-        ret = drv->bdrv_pwrite(bs, sector_num * 512, buf, len);
-        if (ret < 0)
-            return ret;
-        else if (ret != len)
-            return -EIO;
-        else {
-           bs->wr_bytes += (unsigned) len;
-           bs->wr_ops ++;
-            return 0;
-       }
-    } else {
-        return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
+        do {
+            ret = drv->bdrv_pwrite(bs, sector_num * 512, buf, len - count);
+            if (ret < 0) {
+                printf("bdrv_write ret=%d\n", ret);
+                return ret;
+            }
+            count += ret;
+            buf += ret;
+        } while (count != len);
+        bs->wr_bytes += (unsigned) len;
+        bs->wr_ops ++;
+        return 0;
     }
+    return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
 }
 
 static int bdrv_pread_em(BlockDriverState *bs, int64_t offset,
@@ -732,14 +741,120 @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
     *nb_sectors_ptr = length;
 }
 
-/* force a given boot sector. */
-void bdrv_set_boot_sector(BlockDriverState *bs, const uint8_t *data, int size)
+struct partition {
+        uint8_t boot_ind;           /* 0x80 - active */
+        uint8_t head;               /* starting head */
+        uint8_t sector;             /* starting sector */
+        uint8_t cyl;                /* starting cylinder */
+        uint8_t sys_ind;            /* What partition type */
+        uint8_t end_head;           /* end head */
+        uint8_t end_sector;         /* end sector */
+        uint8_t end_cyl;            /* end cylinder */
+        uint32_t start_sect;        /* starting sector counting from 0 */
+        uint32_t nr_sects;          /* nr of sectors in partition */
+} __attribute__((packed));
+
+/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
+static int guess_disk_lchs(BlockDriverState *bs,
+                           int *pcylinders, int *pheads, int *psectors)
+{
+    uint8_t buf[512];
+    int ret, i, heads, sectors, cylinders;
+    struct partition *p;
+    uint32_t nr_sects;
+    uint64_t nb_sectors;
+
+    bdrv_get_geometry(bs, &nb_sectors);
+
+    ret = bdrv_read(bs, 0, buf, 1);
+    if (ret < 0)
+        return -1;
+    /* test msdos magic */
+    if (buf[510] != 0x55 || buf[511] != 0xaa)
+        return -1;
+    for(i = 0; i < 4; i++) {
+        p = ((struct partition *)(buf + 0x1be)) + i;
+        nr_sects = le32_to_cpu(p->nr_sects);
+        if (nr_sects && p->end_head) {
+            /* We make the assumption that the partition terminates on
+               a cylinder boundary */
+            heads = p->end_head + 1;
+            sectors = p->end_sector & 63;
+            if (sectors == 0)
+                continue;
+            cylinders = nb_sectors / (heads * sectors);
+            if (cylinders < 1 || cylinders > 16383)
+                continue;
+            *pheads = heads;
+            *psectors = sectors;
+            *pcylinders = cylinders;
+#if 0
+            printf("guessed geometry: LCHS=%d %d %d\n",
+                   cylinders, heads, sectors);
+#endif
+            return 0;
+        }
+    }
+    return -1;
+}
+
+void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
 {
-    bs->boot_sector_enabled = 1;
-    if (size > 512)
-        size = 512;
-    memcpy(bs->boot_sector_data, data, size);
-    memset(bs->boot_sector_data + size, 0, 512 - size);
+    int translation, lba_detected = 0;
+    int cylinders, heads, secs;
+    uint64_t nb_sectors;
+
+    /* if a geometry hint is available, use it */
+    bdrv_get_geometry(bs, &nb_sectors);
+    bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
+    translation = bdrv_get_translation_hint(bs);
+    if (cylinders != 0) {
+        *pcyls = cylinders;
+        *pheads = heads;
+        *psecs = secs;
+    } else {
+        if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
+            if (heads > 16) {
+                /* if heads > 16, it means that a BIOS LBA
+                   translation was active, so the default
+                   hardware geometry is OK */
+                lba_detected = 1;
+                goto default_geometry;
+            } else {
+                *pcyls = cylinders;
+                *pheads = heads;
+                *psecs = secs;
+                /* disable any translation to be in sync with
+                   the logical geometry */
+                if (translation == BIOS_ATA_TRANSLATION_AUTO) {
+                    bdrv_set_translation_hint(bs,
+                                              BIOS_ATA_TRANSLATION_NONE);
+                }
+            }
+        } else {
+        default_geometry:
+            /* if no geometry, use a standard physical disk geometry */
+            cylinders = nb_sectors / (16 * 63);
+
+            if (cylinders > 16383)
+                cylinders = 16383;
+            else if (cylinders < 2)
+                cylinders = 2;
+            *pcyls = cylinders;
+            *pheads = 16;
+            *psecs = 63;
+            if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
+                if ((*pcyls * *pheads) <= 131072) {
+                    bdrv_set_translation_hint(bs,
+                                              BIOS_ATA_TRANSLATION_LARGE);
+                } else {
+                    bdrv_set_translation_hint(bs,
+                                              BIOS_ATA_TRANSLATION_LBA);
+                }
+            }
+        }
+        bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
+    }
 }
 
 void bdrv_set_geometry_hint(BlockDriverState *bs,
@@ -877,7 +992,43 @@ void bdrv_flush(BlockDriverState *bs)
         bdrv_flush(bs->backing_hd);
 }
 
-#ifndef QEMU_IMG
+void bdrv_flush_all(void)
+{
+    BlockDriverState *bs;
+
+    for (bs = bdrv_first; bs != NULL; bs = bs->next)
+        if (bs->drv && !bdrv_is_read_only(bs) && 
+            (!bdrv_is_removable(bs) || bdrv_is_inserted(bs)))
+            bdrv_flush(bs);
+}
+
+/*
+ * Returns true iff the specified sector is present in the disk image. Drivers
+ * not implementing the functionality are assumed to not support backing files,
+ * hence all their sectors are reported as allocated.
+ *
+ * 'pnum' is set to the number of sectors (including and immediately following
+ * the specified sector) that are known to be in the same
+ * allocated/unallocated state.
+ *
+ * 'nb_sectors' is the max value 'pnum' should be set to.
+ */
+int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+       int *pnum)
+{
+    int64_t n;
+    if (!bs->drv->bdrv_is_allocated) {
+        if (sector_num >= bs->total_sectors) {
+            *pnum = 0;
+            return 0;
+        }
+        n = bs->total_sectors - sector_num;
+        *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
+        return 1;
+    }
+    return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
+}
+
 void bdrv_info(void)
 {
     BlockDriverState *bs;
@@ -922,6 +1073,7 @@ void bdrv_info(void)
 void bdrv_info_stats (void)
 {
     BlockDriverState *bs;
+    BlockDriverInfo bdi;
 
     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
        term_printf ("%s:"
@@ -929,13 +1081,17 @@ void bdrv_info_stats (void)
                     " wr_bytes=%" PRIu64
                     " rd_operations=%" PRIu64
                     " wr_operations=%" PRIu64
-                    "\n",
+                     ,
                     bs->device_name,
                     bs->rd_bytes, bs->wr_bytes,
                     bs->rd_ops, bs->wr_ops);
+        if (bdrv_get_info(bs, &bdi) == 0)
+            term_printf(" high=%" PRId64
+                        " bytes_free=%" PRId64,
+                        bdi.highest_alloc, bdi.num_free_bytes);
+        term_printf("\n");
     }
 }
-#endif
 
 void bdrv_get_backing_filename(BlockDriverState *bs,
                                char *filename, int filename_size)
@@ -1092,6 +1248,69 @@ char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
 /**************************************************************/
 /* async I/Os */
 
+typedef struct VectorTranslationState {
+    QEMUIOVector *iov;
+    uint8_t *bounce;
+    int is_write;
+    BlockDriverAIOCB *aiocb;
+    BlockDriverAIOCB *this_aiocb;
+} VectorTranslationState;
+
+static void bdrv_aio_rw_vector_cb(void *opaque, int ret)
+{
+    VectorTranslationState *s = opaque;
+
+    if (!s->is_write) {
+        qemu_iovec_from_buffer(s->iov, s->bounce, s->iov->size);
+    }
+    qemu_free(s->bounce);
+    s->this_aiocb->cb(s->this_aiocb->opaque, ret);
+    qemu_aio_release(s->this_aiocb);
+}
+
+static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
+                                            int64_t sector_num,
+                                            QEMUIOVector *iov,
+                                            int nb_sectors,
+                                            BlockDriverCompletionFunc *cb,
+                                            void *opaque,
+                                            int is_write)
+
+{
+    VectorTranslationState *s = qemu_mallocz(sizeof(*s));
+    BlockDriverAIOCB *aiocb = qemu_aio_get(bs, cb, opaque);
+
+    s->this_aiocb = aiocb;
+    s->iov = iov;
+    s->bounce = qemu_memalign(512, nb_sectors * 512);
+    s->is_write = is_write;
+    if (is_write) {
+        qemu_iovec_to_buffer(s->iov, s->bounce);
+        s->aiocb = bdrv_aio_write(bs, sector_num, s->bounce, nb_sectors,
+                                  bdrv_aio_rw_vector_cb, s);
+    } else {
+        s->aiocb = bdrv_aio_read(bs, sector_num, s->bounce, nb_sectors,
+                                 bdrv_aio_rw_vector_cb, s);
+    }
+    return aiocb;
+}
+
+BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
+                                 QEMUIOVector *iov, int nb_sectors,
+                                 BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
+                              cb, opaque, 0);
+}
+
+BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
+                                  QEMUIOVector *iov, int nb_sectors,
+                                  BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
+                              cb, opaque, 1);
+}
+
 BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
                                 uint8_t *buf, int nb_sectors,
                                 BlockDriverCompletionFunc *cb, void *opaque)
@@ -1102,14 +1321,6 @@ BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
     if (!drv)
         return NULL;
 
-    /* XXX: we assume that nb_sectors == 0 is suppored by the async read */
-    if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
-        memcpy(buf, bs->boot_sector_data, 512);
-        sector_num++;
-        nb_sectors--;
-        buf += 512;
-    }
-
     ret = drv->bdrv_aio_read(bs, sector_num, buf, nb_sectors, cb, opaque);
 
     if (ret) {
@@ -1132,9 +1343,6 @@ BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
         return NULL;
     if (bs->read_only)
         return NULL;
-    if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
-        memcpy(bs->boot_sector_data, buf, 512);
-    }
 
     ret = drv->bdrv_aio_write(bs, sector_num, buf, nb_sectors, cb, opaque);
 
@@ -1151,6 +1359,11 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb)
 {
     BlockDriver *drv = acb->bs->drv;
 
+    if (acb->cb == bdrv_aio_rw_vector_cb) {
+        VectorTranslationState *s = acb->opaque;
+        acb = s->aiocb;
+    }
+
     drv->bdrv_aio_cancel(acb);
 }
 
@@ -1158,31 +1371,6 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb)
 /**************************************************************/
 /* async block device emulation */
 
-#ifdef QEMU_IMG
-static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
-        int64_t sector_num, uint8_t *buf, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
-{
-    int ret;
-    ret = bdrv_read(bs, sector_num, buf, nb_sectors);
-    cb(opaque, ret);
-    return NULL;
-}
-
-static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
-        int64_t sector_num, const uint8_t *buf, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
-{
-    int ret;
-    ret = bdrv_write(bs, sector_num, buf, nb_sectors);
-    cb(opaque, ret);
-    return NULL;
-}
-
-static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb)
-{
-}
-#else
 static void bdrv_aio_bh_cb(void *opaque)
 {
     BlockDriverAIOCBSync *acb = opaque;
@@ -1228,7 +1416,6 @@ static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
     qemu_bh_cancel(acb->bh);
     qemu_aio_release(acb);
 }
-#endif /* !QEMU_IMG */
 
 /**************************************************************/
 /* sync block device emulation */
@@ -1247,17 +1434,15 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
     BlockDriverAIOCB *acb;
 
     async_ret = NOT_DONE;
-    qemu_aio_wait_start();
     acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors,
                         bdrv_rw_em_cb, &async_ret);
-    if (acb == NULL) {
-        qemu_aio_wait_end();
+    if (acb == NULL)
         return -1;
-    }
+
     while (async_ret == NOT_DONE) {
         qemu_aio_wait();
     }
-    qemu_aio_wait_end();
+
     return async_ret;
 }
 
@@ -1268,17 +1453,13 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
     BlockDriverAIOCB *acb;
 
     async_ret = NOT_DONE;
-    qemu_aio_wait_start();
     acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors,
                          bdrv_rw_em_cb, &async_ret);
-    if (acb == NULL) {
-        qemu_aio_wait_end();
+    if (acb == NULL)
         return -1;
-    }
     while (async_ret == NOT_DONE) {
         qemu_aio_wait();
     }
-    qemu_aio_wait_end();
     return async_ret;
 }
 
@@ -1298,6 +1479,7 @@ void bdrv_init(void)
     bdrv_register(&bdrv_vvfat);
     bdrv_register(&bdrv_qcow2);
     bdrv_register(&bdrv_parallels);
+    bdrv_register(&bdrv_nbd);
 }
 
 void *qemu_aio_get(BlockDriverState *bs, BlockDriverCompletionFunc *cb,
@@ -1312,8 +1494,6 @@ void *qemu_aio_get(BlockDriverState *bs, BlockDriverCompletionFunc *cb,
         drv->free_aiocb = acb->next;
     } else {
         acb = qemu_mallocz(drv->aiocb_size);
-        if (!acb)
-            return NULL;
     }
     acb->bs = bs;
     acb->cb = cb;