Suppress type mismatch warnings in VDE code.
[qemu] / block-qcow2.c
index d8ac647..1f33125 100644 (file)
@@ -45,6 +45,7 @@
 
 //#define DEBUG_ALLOC
 //#define DEBUG_ALLOC2
+//#define DEBUG_EXT
 
 #define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
 #define QCOW_VERSION 2
@@ -77,6 +78,15 @@ typedef struct QCowHeader {
     uint64_t snapshots_offset;
 } QCowHeader;
 
+
+typedef struct {
+    uint32_t magic;
+    uint32_t len;
+} QCowExtension;
+#define  QCOW_EXT_MAGIC_END 0
+#define  QCOW_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
+
+
 typedef struct __attribute__((packed)) QCowSnapshotHeader {
     /* header is 8 byte aligned */
     uint64_t l1_table_offset;
@@ -167,9 +177,7 @@ static int64_t alloc_clusters(BlockDriverState *bs, int64_t size);
 static int64_t alloc_bytes(BlockDriverState *bs, int size);
 static void free_clusters(BlockDriverState *bs,
                           int64_t offset, int64_t size);
-#ifdef DEBUG_ALLOC
-static void check_refcounts(BlockDriverState *bs);
-#endif
+static int check_refcounts(BlockDriverState *bs);
 
 static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
@@ -183,11 +191,84 @@ static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
         return 0;
 }
 
+
+/* 
+ * read qcow2 extension and fill bs
+ * start reading from start_offset
+ * finish reading upon magic of value 0 or when end_offset reached
+ * unknown magic is skipped (future extension this version knows nothing about)
+ * return 0 upon success, non-0 otherwise
+ */
+static int qcow_read_extensions(BlockDriverState *bs, uint64_t start_offset,
+                                uint64_t end_offset)
+{
+    BDRVQcowState *s = bs->opaque;
+    QCowExtension ext;
+    uint64_t offset;
+
+#ifdef DEBUG_EXT
+    printf("qcow_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
+#endif
+    offset = start_offset;
+    while (offset < end_offset) {
+
+#ifdef DEBUG_EXT
+        /* Sanity check */
+        if (offset > s->cluster_size)
+            printf("qcow_handle_extension: suspicious offset %lu\n", offset);
+
+        printf("attemting to read extended header in offset %lu\n", offset);
+#endif
+
+        if (bdrv_pread(s->hd, offset, &ext, sizeof(ext)) != sizeof(ext)) {
+            fprintf(stderr, "qcow_handle_extension: ERROR: pread fail from offset %llu\n",
+                    (unsigned long long)offset);
+            return 1;
+        }
+        be32_to_cpus(&ext.magic);
+        be32_to_cpus(&ext.len);
+        offset += sizeof(ext);
+#ifdef DEBUG_EXT
+        printf("ext.magic = 0x%x\n", ext.magic);
+#endif
+        switch (ext.magic) {
+        case QCOW_EXT_MAGIC_END:
+            return 0;
+
+        case QCOW_EXT_MAGIC_BACKING_FORMAT:
+            if (ext.len >= sizeof(bs->backing_format)) {
+                fprintf(stderr, "ERROR: ext_backing_format: len=%u too large"
+                        " (>=%zu)\n",
+                        ext.len, sizeof(bs->backing_format));
+                return 2;
+            }
+            if (bdrv_pread(s->hd, offset , bs->backing_format,
+                           ext.len) != ext.len)
+                return 3;
+            bs->backing_format[ext.len] = '\0';
+#ifdef DEBUG_EXT
+            printf("Qcow2: Got format extension %s\n", bs->backing_format);
+#endif
+            offset += ((ext.len + 7) & ~7);
+            break;
+
+        default:
+            /* unknown magic -- just skip it */
+            offset += ((ext.len + 7) & ~7);
+            break;
+        }
+    }
+
+    return 0;
+}
+
+
 static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
 {
     BDRVQcowState *s = bs->opaque;
     int len, i, shift, ret;
     QCowHeader header;
+    uint64_t ext_end;
 
     /* Performance is terrible right now with cache=writethrough due mainly
      * to reference count updates.  If the user does not explicitly specify
@@ -253,8 +334,6 @@ static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
         goto fail;
     s->l1_table_offset = header.l1_table_offset;
     s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t));
-    if (!s->l1_table)
-        goto fail;
     if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) !=
         s->l1_size * sizeof(uint64_t))
         goto fail;
@@ -263,21 +342,23 @@ static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
     }
     /* alloc L2 cache */
     s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
-    if (!s->l2_cache)
-        goto fail;
     s->cluster_cache = qemu_malloc(s->cluster_size);
-    if (!s->cluster_cache)
-        goto fail;
     /* one more sector for decompressed data alignment */
     s->cluster_data = qemu_malloc(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
                                   + 512);
-    if (!s->cluster_data)
-        goto fail;
     s->cluster_cache_offset = -1;
 
     if (refcount_init(bs) < 0)
         goto fail;
 
+    /* read qcow2 extensions */
+    if (header.backing_file_offset)
+        ext_end = header.backing_file_offset;
+    else
+        ext_end = s->cluster_size;
+    if (qcow_read_extensions(bs, sizeof(header), ext_end))
+        goto fail;
+
     /* read the backing file name */
     if (header.backing_file_offset != 0) {
         len = header.backing_file_size;
@@ -451,8 +532,6 @@ static int grow_l1_table(BlockDriverState *bs, int min_size)
 
     new_l1_size2 = sizeof(uint64_t) * new_l1_size;
     new_l1_table = qemu_mallocz(new_l1_size2);
-    if (!new_l1_table)
-        return -ENOMEM;
     memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
 
     /* write new table (align to cluster) */
@@ -615,7 +694,7 @@ static int size_to_clusters(BDRVQcowState *s, int64_t size)
 }
 
 static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
-        uint64_t *l2_table, uint64_t mask)
+        uint64_t *l2_table, uint64_t start, uint64_t mask)
 {
     int i;
     uint64_t offset = be64_to_cpu(l2_table[0]) & ~mask;
@@ -623,11 +702,11 @@ static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
     if (!offset)
         return 0;
 
-    for (i = 0; i < nb_clusters; i++)
+    for (i = start; i < start + nb_clusters; i++)
         if (offset + i * cluster_size != (be64_to_cpu(l2_table[i]) & ~mask))
             break;
 
-       return i;
+       return (i - start);
 }
 
 static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table)
@@ -680,6 +759,10 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
 
     nb_available = (nb_available >> 9) + index_in_cluster;
 
+    if (nb_needed > nb_available) {
+        nb_needed = nb_available;
+    }
+
     cluster_offset = 0;
 
     /* seek the the l2 offset in the l1 table */
@@ -714,7 +797,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
     } else {
         /* how many allocated clusters ? */
         c = count_contiguous_clusters(nb_clusters, s->cluster_size,
-                &l2_table[l2_index], QCOW_OFLAG_COPIED);
+                &l2_table[l2_index], 0, QCOW_OFLAG_COPIED);
     }
 
    nb_available = (c * s->cluster_sectors);
@@ -885,8 +968,7 @@ static int alloc_cluster_link_l2(BlockDriverState *bs, uint64_t cluster_offset,
     if (m->nb_clusters == 0)
         return 0;
 
-    if (!(old_cluster = qemu_malloc(m->nb_clusters * sizeof(uint64_t))))
-        return -ENOMEM;
+    old_cluster = qemu_malloc(m->nb_clusters * sizeof(uint64_t));
 
     /* copy content of unmodified sectors */
     start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9;
@@ -923,7 +1005,7 @@ static int alloc_cluster_link_l2(BlockDriverState *bs, uint64_t cluster_offset,
         goto err;
 
     for (i = 0; i < j; i++)
-        free_any_clusters(bs, old_cluster[i], 1);
+        free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1);
 
     ret = 0;
 err:
@@ -968,7 +1050,7 @@ static uint64_t alloc_cluster_offset(BlockDriverState *bs,
 
     if (cluster_offset & QCOW_OFLAG_COPIED) {
         nb_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size,
-                &l2_table[l2_index], 0);
+                &l2_table[l2_index], 0, 0);
 
         cluster_offset &= ~QCOW_OFLAG_COPIED;
         m->nb_clusters = 0;
@@ -985,7 +1067,7 @@ static uint64_t alloc_cluster_offset(BlockDriverState *bs,
 
     while (i < nb_clusters) {
         i += count_contiguous_clusters(nb_clusters - i, s->cluster_size,
-                &l2_table[l2_index + i], 0);
+                &l2_table[l2_index], i, 0);
 
         if(be64_to_cpu(l2_table[l2_index + i]))
             break;
@@ -1180,12 +1262,16 @@ static int qcow_write(BlockDriverState *bs, int64_t sector_num,
 typedef struct QCowAIOCB {
     BlockDriverAIOCB common;
     int64_t sector_num;
+    QEMUIOVector *qiov;
     uint8_t *buf;
+    void *orig_buf;
     int nb_sectors;
     int n;
     uint64_t cluster_offset;
     uint8_t *cluster_data;
     BlockDriverAIOCB *hd_aiocb;
+    struct iovec hd_iov;
+    QEMUIOVector hd_qiov;
     QEMUBH *bh;
     QCowL2Meta l2meta;
 } QCowAIOCB;
@@ -1221,12 +1307,8 @@ static void qcow_aio_read_cb(void *opaque, int ret)
     int index_in_cluster, n1;
 
     acb->hd_aiocb = NULL;
-    if (ret < 0) {
-fail:
-        acb->common.cb(acb->common.opaque, ret);
-        qemu_aio_release(acb);
-        return;
-    }
+    if (ret < 0)
+        goto done;
 
     /* post process the read buffer */
     if (!acb->cluster_offset) {
@@ -1247,9 +1329,8 @@ fail:
 
     if (acb->nb_sectors == 0) {
         /* request completed */
-        acb->common.cb(acb->common.opaque, 0);
-        qemu_aio_release(acb);
-        return;
+        ret = 0;
+        goto done;
     }
 
     /* prepare next AIO request */
@@ -1263,47 +1344,64 @@ fail:
             n1 = backing_read1(bs->backing_hd, acb->sector_num,
                                acb->buf, acb->n);
             if (n1 > 0) {
-                acb->hd_aiocb = bdrv_aio_read(bs->backing_hd, acb->sector_num,
-                                    acb->buf, acb->n, qcow_aio_read_cb, acb);
+                acb->hd_iov.iov_base = (void *)acb->buf;
+                acb->hd_iov.iov_len = acb->n * 512;
+                qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
+                acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
+                                    &acb->hd_qiov, acb->n,
+                                   qcow_aio_read_cb, acb);
                 if (acb->hd_aiocb == NULL)
-                    goto fail;
+                    goto done;
             } else {
                 ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
                 if (ret < 0)
-                    goto fail;
+                    goto done;
             }
         } else {
             /* Note: in this case, no need to wait */
             memset(acb->buf, 0, 512 * acb->n);
             ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
             if (ret < 0)
-                goto fail;
+                goto done;
         }
     } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
         /* add AIO support for compressed blocks ? */
         if (decompress_cluster(s, acb->cluster_offset) < 0)
-            goto fail;
+            goto done;
         memcpy(acb->buf,
                s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
         ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
         if (ret < 0)
-            goto fail;
+            goto done;
     } else {
         if ((acb->cluster_offset & 511) != 0) {
             ret = -EIO;
-            goto fail;
+            goto done;
         }
-        acb->hd_aiocb = bdrv_aio_read(s->hd,
+
+        acb->hd_iov.iov_base = (void *)acb->buf;
+        acb->hd_iov.iov_len = acb->n * 512;
+        qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
+        acb->hd_aiocb = bdrv_aio_readv(s->hd,
                             (acb->cluster_offset >> 9) + index_in_cluster,
-                            acb->buf, acb->n, qcow_aio_read_cb, acb);
+                            &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
         if (acb->hd_aiocb == NULL)
-            goto fail;
+            goto done;
     }
+
+    return;
+done:
+    if (acb->qiov->niov > 1) {
+        qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
+        qemu_vfree(acb->orig_buf);
+    }
+    acb->common.cb(acb->common.opaque, ret);
+    qemu_aio_release(acb);
 }
 
 static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
-        int64_t sector_num, uint8_t *buf, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int is_write)
 {
     QCowAIOCB *acb;
 
@@ -1312,7 +1410,14 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
         return NULL;
     acb->hd_aiocb = NULL;
     acb->sector_num = sector_num;
-    acb->buf = buf;
+    acb->qiov = qiov;
+    if (qiov->niov > 1) {
+        acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size);
+        if (is_write)
+            qemu_iovec_to_buffer(qiov, acb->buf);
+    } else {
+        acb->buf = (uint8_t *)qiov->iov->iov_base;
+    }
     acb->nb_sectors = nb_sectors;
     acb->n = 0;
     acb->cluster_offset = 0;
@@ -1320,13 +1425,13 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
     return acb;
 }
 
-static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs,
-        int64_t sector_num, uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque)
 {
     QCowAIOCB *acb;
 
-    acb = qcow_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
+    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
     if (!acb)
         return NULL;
 
@@ -1345,16 +1450,12 @@ static void qcow_aio_write_cb(void *opaque, int ret)
 
     acb->hd_aiocb = NULL;
 
-    if (ret < 0) {
-    fail:
-        acb->common.cb(acb->common.opaque, ret);
-        qemu_aio_release(acb);
-        return;
-    }
+    if (ret < 0)
+        goto done;
 
     if (alloc_cluster_link_l2(bs, acb->cluster_offset, &acb->l2meta) < 0) {
         free_any_clusters(bs, acb->cluster_offset, acb->l2meta.nb_clusters);
-        goto fail;
+        goto done;
     }
 
     acb->nb_sectors -= acb->n;
@@ -1363,9 +1464,8 @@ static void qcow_aio_write_cb(void *opaque, int ret)
 
     if (acb->nb_sectors == 0) {
         /* request completed */
-        acb->common.cb(acb->common.opaque, 0);
-        qemu_aio_release(acb);
-        return;
+        ret = 0;
+        goto done;
     }
 
     index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
@@ -1379,16 +1479,12 @@ static void qcow_aio_write_cb(void *opaque, int ret)
                                           n_end, &acb->n, &acb->l2meta);
     if (!acb->cluster_offset || (acb->cluster_offset & 511) != 0) {
         ret = -EIO;
-        goto fail;
+        goto done;
     }
     if (s->crypt_method) {
         if (!acb->cluster_data) {
             acb->cluster_data = qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS *
                                              s->cluster_size);
-            if (!acb->cluster_data) {
-                ret = -ENOMEM;
-                goto fail;
-            }
         }
         encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf,
                         acb->n, 1, &s->aes_encrypt_key);
@@ -1396,16 +1492,27 @@ static void qcow_aio_write_cb(void *opaque, int ret)
     } else {
         src_buf = acb->buf;
     }
-    acb->hd_aiocb = bdrv_aio_write(s->hd,
-                                   (acb->cluster_offset >> 9) + index_in_cluster,
-                                   src_buf, acb->n,
-                                   qcow_aio_write_cb, acb);
+    acb->hd_iov.iov_base = (void *)src_buf;
+    acb->hd_iov.iov_len = acb->n * 512;
+    qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
+    acb->hd_aiocb = bdrv_aio_writev(s->hd,
+                                    (acb->cluster_offset >> 9) + index_in_cluster,
+                                    &acb->hd_qiov, acb->n,
+                                    qcow_aio_write_cb, acb);
     if (acb->hd_aiocb == NULL)
-        goto fail;
+        goto done;
+
+    return;
+
+done:
+    if (acb->qiov->niov > 1)
+        qemu_vfree(acb->orig_buf);
+    acb->common.cb(acb->common.opaque, ret);
+    qemu_aio_release(acb);
 }
 
-static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs,
-        int64_t sector_num, const uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque)
 {
     BDRVQcowState *s = bs->opaque;
@@ -1413,7 +1520,7 @@ static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs,
 
     s->cluster_cache_offset = -1; /* disable compressed cache */
 
-    acb = qcow_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
+    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
     if (!acb)
         return NULL;
 
@@ -1469,13 +1576,18 @@ static void create_refcount_update(QCowCreateState *s,
     }
 }
 
-static int qcow_create(const char *filename, int64_t total_size,
-                      const char *backing_file, int flags)
+static int qcow_create2(const char *filename, int64_t total_size,
+                        const char *backing_file, const char *backing_format,
+                        int flags)
 {
+
     int fd, header_size, backing_filename_len, l1_size, i, shift, l2_bits;
+    int ref_clusters, backing_format_len = 0;
     QCowHeader header;
     uint64_t tmp, offset;
     QCowCreateState s1, *s = &s1;
+    QCowExtension ext_bf = {0, 0};
+
 
     memset(s, 0, sizeof(*s));
 
@@ -1489,6 +1601,12 @@ static int qcow_create(const char *filename, int64_t total_size,
     header_size = sizeof(header);
     backing_filename_len = 0;
     if (backing_file) {
+        if (backing_format) {
+            ext_bf.magic = QCOW_EXT_MAGIC_BACKING_FORMAT;
+            backing_format_len = strlen(backing_format);
+            ext_bf.len = (backing_format_len + 7) & ~7;
+            header_size += ((sizeof(ext_bf) + ext_bf.len + 7) & ~7);
+        }
         header.backing_file_offset = cpu_to_be64(header_size);
         backing_filename_len = strlen(backing_file);
         header.backing_file_size = cpu_to_be32(backing_filename_len);
@@ -1513,30 +1631,45 @@ static int qcow_create(const char *filename, int64_t total_size,
     offset += align_offset(l1_size * sizeof(uint64_t), s->cluster_size);
 
     s->refcount_table = qemu_mallocz(s->cluster_size);
-    if (!s->refcount_table)
-        goto fail;
-    s->refcount_block = qemu_mallocz(s->cluster_size);
-    if (!s->refcount_block)
-        goto fail;
 
     s->refcount_table_offset = offset;
     header.refcount_table_offset = cpu_to_be64(offset);
     header.refcount_table_clusters = cpu_to_be32(1);
     offset += s->cluster_size;
-
-    s->refcount_table[0] = cpu_to_be64(offset);
     s->refcount_block_offset = offset;
-    offset += s->cluster_size;
+
+    /* count how many refcount blocks needed */
+    tmp = offset >> s->cluster_bits;
+    ref_clusters = (tmp >> (s->cluster_bits - REFCOUNT_SHIFT)) + 1;
+    for (i=0; i < ref_clusters; i++) {
+        s->refcount_table[i] = cpu_to_be64(offset);
+        offset += s->cluster_size;
+    }
+
+    s->refcount_block = qemu_mallocz(ref_clusters * s->cluster_size);
 
     /* update refcounts */
     create_refcount_update(s, 0, header_size);
     create_refcount_update(s, s->l1_table_offset, l1_size * sizeof(uint64_t));
     create_refcount_update(s, s->refcount_table_offset, s->cluster_size);
-    create_refcount_update(s, s->refcount_block_offset, s->cluster_size);
+    create_refcount_update(s, s->refcount_block_offset, ref_clusters * s->cluster_size);
 
     /* write all the data */
     write(fd, &header, sizeof(header));
     if (backing_file) {
+        if (backing_format_len) {
+            char zero[16];
+            int d = ext_bf.len - backing_format_len;
+
+            memset(zero, 0, sizeof(zero));
+            cpu_to_be32s(&ext_bf.magic);
+            cpu_to_be32s(&ext_bf.len);
+            write(fd, &ext_bf, sizeof(ext_bf));
+            write(fd, backing_format, backing_format_len);
+            if (d>0) {
+                write(fd, zero, d);
+            }
+        }
         write(fd, backing_file, backing_filename_len);
     }
     lseek(fd, s->l1_table_offset, SEEK_SET);
@@ -1548,17 +1681,18 @@ static int qcow_create(const char *filename, int64_t total_size,
     write(fd, s->refcount_table, s->cluster_size);
 
     lseek(fd, s->refcount_block_offset, SEEK_SET);
-    write(fd, s->refcount_block, s->cluster_size);
+    write(fd, s->refcount_block, ref_clusters * s->cluster_size);
 
     qemu_free(s->refcount_table);
     qemu_free(s->refcount_block);
     close(fd);
     return 0;
- fail:
-    qemu_free(s->refcount_table);
-    qemu_free(s->refcount_block);
-    close(fd);
-    return -ENOMEM;
+}
+
+static int qcow_create(const char *filename, int64_t total_size,
+                       const char *backing_file, int flags)
+{
+    return qcow_create2(filename, total_size, backing_file, NULL, flags);
 }
 
 static int qcow_make_empty(BlockDriverState *bs)
@@ -1605,8 +1739,6 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
         return -EINVAL;
 
     out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
-    if (!out_buf)
-        return -ENOMEM;
 
     /* best compression, small window, no zlib header */
     memset(&strm, 0, sizeof(strm));
@@ -1689,8 +1821,6 @@ static int update_snapshot_refcount(BlockDriverState *bs,
     l1_allocated = 0;
     if (l1_table_offset != s->l1_table_offset) {
         l1_table = qemu_malloc(l1_size2);
-        if (!l1_table)
-            goto fail;
         l1_allocated = 1;
         if (bdrv_pread(s->hd, l1_table_offset,
                        l1_table, l1_size2) != l1_size2)
@@ -1705,8 +1835,6 @@ static int update_snapshot_refcount(BlockDriverState *bs,
 
     l2_size = s->l2_size * sizeof(uint64_t);
     l2_table = qemu_malloc(l2_size);
-    if (!l2_table)
-        goto fail;
     l1_modified = 0;
     for(i = 0; i < l1_size; i++) {
         l2_offset = l1_table[i];
@@ -1809,10 +1937,14 @@ static int qcow_read_snapshots(BlockDriverState *bs)
     int64_t offset;
     uint32_t extra_data_size;
 
+    if (!s->nb_snapshots) {
+        s->snapshots = NULL;
+        s->snapshots_size = 0;
+        return 0;
+    }
+
     offset = s->snapshots_offset;
     s->snapshots = qemu_mallocz(s->nb_snapshots * sizeof(QCowSnapshot));
-    if (!s->snapshots)
-        goto fail;
     for(i = 0; i < s->nb_snapshots; i++) {
         offset = align_offset(offset, 8);
         if (bdrv_pread(s->hd, offset, &h, sizeof(h)) != sizeof(h))
@@ -1833,16 +1965,12 @@ static int qcow_read_snapshots(BlockDriverState *bs)
         offset += extra_data_size;
 
         sn->id_str = qemu_malloc(id_str_size + 1);
-        if (!sn->id_str)
-            goto fail;
         if (bdrv_pread(s->hd, offset, sn->id_str, id_str_size) != id_str_size)
             goto fail;
         offset += id_str_size;
         sn->id_str[id_str_size] = '\0';
 
         sn->name = qemu_malloc(name_size + 1);
-        if (!sn->name)
-            goto fail;
         if (bdrv_pread(s->hd, offset, sn->name, name_size) != name_size)
             goto fail;
         offset += name_size;
@@ -2008,8 +2136,6 @@ static int qcow_snapshot_create(BlockDriverState *bs,
     sn->l1_size = s->l1_size;
 
     l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t));
-    if (!l1_table)
-        goto fail;
     for(i = 0; i < s->l1_size; i++) {
         l1_table[i] = cpu_to_be64(s->l1_table[i]);
     }
@@ -2021,9 +2147,10 @@ static int qcow_snapshot_create(BlockDriverState *bs,
     l1_table = NULL;
 
     snapshots1 = qemu_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
-    if (!snapshots1)
-        goto fail;
-    memcpy(snapshots1, s->snapshots, s->nb_snapshots * sizeof(QCowSnapshot));
+    if (s->snapshots) {
+        memcpy(snapshots1, s->snapshots, s->nb_snapshots * sizeof(QCowSnapshot));
+        qemu_free(s->snapshots);
+    }
     s->snapshots = snapshots1;
     s->snapshots[s->nb_snapshots++] = *sn;
 
@@ -2126,8 +2253,6 @@ static int qcow_snapshot_list(BlockDriverState *bs,
     int i;
 
     sn_tab = qemu_mallocz(s->nb_snapshots * sizeof(QEMUSnapshotInfo));
-    if (!sn_tab)
-        goto fail;
     for(i = 0; i < s->nb_snapshots; i++) {
         sn_info = sn_tab + i;
         sn = s->snapshots + i;
@@ -2142,10 +2267,6 @@ static int qcow_snapshot_list(BlockDriverState *bs,
     }
     *psn_tab = sn_tab;
     return s->nb_snapshots;
- fail:
-    qemu_free(sn_tab);
-    *psn_tab = NULL;
-    return -ENOMEM;
 }
 
 /*********************************************************/
@@ -2157,12 +2278,8 @@ static int refcount_init(BlockDriverState *bs)
     int ret, refcount_table_size2, i;
 
     s->refcount_block_cache = qemu_malloc(s->cluster_size);
-    if (!s->refcount_block_cache)
-        goto fail;
     refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
     s->refcount_table = qemu_malloc(refcount_table_size2);
-    if (!s->refcount_table)
-        goto fail;
     if (s->refcount_table_size > 0) {
         ret = bdrv_pread(s->hd, s->refcount_table_offset,
                          s->refcount_table, refcount_table_size2);
@@ -2326,8 +2443,6 @@ static int grow_refcount_table(BlockDriverState *bs, int min_size)
 #endif
     new_table_size2 = new_table_size * sizeof(uint64_t);
     new_table = qemu_mallocz(new_table_size2);
-    if (!new_table)
-        return -ENOMEM;
     memcpy(new_table, s->refcount_table,
            s->refcount_table_size * sizeof(uint64_t));
     for(i = 0; i < s->refcount_table_size; i++)
@@ -2447,8 +2562,14 @@ static void update_refcount(BlockDriverState *bs,
     }
 }
 
-#ifdef DEBUG_ALLOC
-static void inc_refcounts(BlockDriverState *bs,
+/*
+ * Increases the refcount for a range of clusters in a given refcount table.
+ * This is used to construct a temporary refcount table out of L1 and L2 tables
+ * which can be compared the the refcount table saved in the image.
+ *
+ * Returns the number of errors in the image that were found
+ */
+static int inc_refcounts(BlockDriverState *bs,
                           uint16_t *refcount_table,
                           int refcount_table_size,
                           int64_t offset, int64_t size)
@@ -2456,9 +2577,10 @@ static void inc_refcounts(BlockDriverState *bs,
     BDRVQcowState *s = bs->opaque;
     int64_t start, last, cluster_offset;
     int k;
+    int errors = 0;
 
     if (size <= 0)
-        return;
+        return 0;
 
     start = offset & ~(s->cluster_size - 1);
     last = (offset + size - 1) & ~(s->cluster_size - 1);
@@ -2466,15 +2588,112 @@ static void inc_refcounts(BlockDriverState *bs,
         cluster_offset += s->cluster_size) {
         k = cluster_offset >> s->cluster_bits;
         if (k < 0 || k >= refcount_table_size) {
-            printf("ERROR: invalid cluster offset=0x%llx\n", cluster_offset);
+            fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
+                cluster_offset);
+            errors++;
         } else {
             if (++refcount_table[k] == 0) {
-                printf("ERROR: overflow cluster offset=0x%llx\n", cluster_offset);
+                fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
+                    "\n", cluster_offset);
+                errors++;
+            }
+        }
+    }
+
+    return errors;
+}
+
+/*
+ * Increases the refcount in the given refcount table for the all clusters
+ * referenced in the L2 table. While doing so, performs some checks on L2
+ * entries.
+ *
+ * Returns the number of errors found by the checks or -errno if an internal
+ * error occurred.
+ */
+static int check_refcounts_l2(BlockDriverState *bs,
+    uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
+    int check_copied)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t *l2_table, offset;
+    int i, l2_size, nb_csectors, refcount;
+    int errors = 0;
+
+    /* Read L2 table from disk */
+    l2_size = s->l2_size * sizeof(uint64_t);
+    l2_table = qemu_malloc(l2_size);
+
+    if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
+        goto fail;
+
+    /* Do the actual checks */
+    for(i = 0; i < s->l2_size; i++) {
+        offset = be64_to_cpu(l2_table[i]);
+        if (offset != 0) {
+            if (offset & QCOW_OFLAG_COMPRESSED) {
+                /* Compressed clusters don't have QCOW_OFLAG_COPIED */
+                if (offset & QCOW_OFLAG_COPIED) {
+                    fprintf(stderr, "ERROR: cluster %" PRId64 ": "
+                        "copied flag must never be set for compressed "
+                        "clusters\n", offset >> s->cluster_bits);
+                    offset &= ~QCOW_OFLAG_COPIED;
+                    errors++;
+                }
+
+                /* Mark cluster as used */
+                nb_csectors = ((offset >> s->csize_shift) &
+                               s->csize_mask) + 1;
+                offset &= s->cluster_offset_mask;
+                errors += inc_refcounts(bs, refcount_table,
+                              refcount_table_size,
+                              offset & ~511, nb_csectors * 512);
+            } else {
+                /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
+                if (check_copied) {
+                    uint64_t entry = offset;
+                    offset &= ~QCOW_OFLAG_COPIED;
+                    refcount = get_refcount(bs, offset >> s->cluster_bits);
+                    if ((refcount == 1) != ((entry & QCOW_OFLAG_COPIED) != 0)) {
+                        fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
+                            PRIx64 " refcount=%d\n", entry, refcount);
+                        errors++;
+                    }
+                }
+
+                /* Mark cluster as used */
+                offset &= ~QCOW_OFLAG_COPIED;
+                errors += inc_refcounts(bs, refcount_table,
+                              refcount_table_size,
+                              offset, s->cluster_size);
+
+                /* Correct offsets are cluster aligned */
+                if (offset & (s->cluster_size - 1)) {
+                    fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
+                        "properly aligned; L2 entry corrupted.\n", offset);
+                    errors++;
+                }
             }
         }
     }
+
+    qemu_free(l2_table);
+    return errors;
+
+fail:
+    fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
+    qemu_free(l2_table);
+    return -EIO;
 }
 
+/*
+ * Increases the refcount for the L1 table, its L2 tables and all referenced
+ * clusters in the given refcount table. While doing so, performs some checks
+ * on L1 and L2 entries.
+ *
+ * Returns the number of errors found by the checks or -errno if an internal
+ * error occurred.
+ */
 static int check_refcounts_l1(BlockDriverState *bs,
                               uint16_t *refcount_table,
                               int refcount_table_size,
@@ -2482,105 +2701,101 @@ static int check_refcounts_l1(BlockDriverState *bs,
                               int check_copied)
 {
     BDRVQcowState *s = bs->opaque;
-    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2;
-    int l2_size, i, j, nb_csectors, refcount;
+    uint64_t *l1_table, l2_offset, l1_size2;
+    int i, refcount, ret;
+    int errors = 0;
 
-    l2_table = NULL;
     l1_size2 = l1_size * sizeof(uint64_t);
 
-    inc_refcounts(bs, refcount_table, refcount_table_size,
+    /* Mark L1 table as used */
+    errors += inc_refcounts(bs, refcount_table, refcount_table_size,
                   l1_table_offset, l1_size2);
 
+    /* Read L1 table entries from disk */
     l1_table = qemu_malloc(l1_size2);
-    if (!l1_table)
-        goto fail;
     if (bdrv_pread(s->hd, l1_table_offset,
                    l1_table, l1_size2) != l1_size2)
         goto fail;
     for(i = 0;i < l1_size; i++)
         be64_to_cpus(&l1_table[i]);
 
-    l2_size = s->l2_size * sizeof(uint64_t);
-    l2_table = qemu_malloc(l2_size);
-    if (!l2_table)
-        goto fail;
+    /* Do the actual checks */
     for(i = 0; i < l1_size; i++) {
         l2_offset = l1_table[i];
         if (l2_offset) {
+            /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
             if (check_copied) {
-                refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED) >> s->cluster_bits);
+                refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
+                    >> s->cluster_bits);
                 if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
-                    printf("ERROR OFLAG_COPIED: l2_offset=%llx refcount=%d\n",
-                           l2_offset, refcount);
+                    fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
+                        " refcount=%d\n", l2_offset, refcount);
+                    errors++;
                 }
             }
+
+            /* Mark L2 table as used */
             l2_offset &= ~QCOW_OFLAG_COPIED;
-            if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
-                goto fail;
-            for(j = 0; j < s->l2_size; j++) {
-                offset = be64_to_cpu(l2_table[j]);
-                if (offset != 0) {
-                    if (offset & QCOW_OFLAG_COMPRESSED) {
-                        if (offset & QCOW_OFLAG_COPIED) {
-                            printf("ERROR: cluster %lld: copied flag must never be set for compressed clusters\n",
-                                   offset >> s->cluster_bits);
-                            offset &= ~QCOW_OFLAG_COPIED;
-                        }
-                        nb_csectors = ((offset >> s->csize_shift) &
-                                       s->csize_mask) + 1;
-                        offset &= s->cluster_offset_mask;
-                        inc_refcounts(bs, refcount_table,
-                                      refcount_table_size,
-                                      offset & ~511, nb_csectors * 512);
-                    } else {
-                        if (check_copied) {
-                            refcount = get_refcount(bs, (offset & ~QCOW_OFLAG_COPIED) >> s->cluster_bits);
-                            if ((refcount == 1) != ((offset & QCOW_OFLAG_COPIED) != 0)) {
-                                printf("ERROR OFLAG_COPIED: offset=%llx refcount=%d\n",
-                                       offset, refcount);
-                            }
-                        }
-                        offset &= ~QCOW_OFLAG_COPIED;
-                        inc_refcounts(bs, refcount_table,
-                                      refcount_table_size,
-                                      offset, s->cluster_size);
-                    }
-                }
-            }
-            inc_refcounts(bs, refcount_table,
+            errors += inc_refcounts(bs, refcount_table,
                           refcount_table_size,
                           l2_offset,
                           s->cluster_size);
+
+            /* L2 tables are cluster aligned */
+            if (l2_offset & (s->cluster_size - 1)) {
+                fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
+                    "cluster aligned; L1 entry corrupted\n", l2_offset);
+                errors++;
+            }
+
+            /* Process and check L2 entries */
+            ret = check_refcounts_l2(bs, refcount_table, refcount_table_size,
+                l2_offset, check_copied);
+            if (ret < 0) {
+                goto fail;
+            }
+            errors += ret;
         }
     }
     qemu_free(l1_table);
-    qemu_free(l2_table);
-    return 0;
- fail:
-    printf("ERROR: I/O error in check_refcounts_l1\n");
+    return errors;
+
+fail:
+    fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
     qemu_free(l1_table);
-    qemu_free(l2_table);
     return -EIO;
 }
 
-static void check_refcounts(BlockDriverState *bs)
+/*
+ * Checks an image for refcount consistency.
+ *
+ * Returns 0 if no errors are found, the number of errors in case the image is
+ * detected as corrupted, and -errno when an internal error occured.
+ */
+static int check_refcounts(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
     int64_t size;
     int nb_clusters, refcount1, refcount2, i;
     QCowSnapshot *sn;
     uint16_t *refcount_table;
+    int ret, errors = 0;
 
     size = bdrv_getlength(s->hd);
     nb_clusters = size_to_clusters(s, size);
     refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t));
 
     /* header */
-    inc_refcounts(bs, refcount_table, nb_clusters,
+    errors += inc_refcounts(bs, refcount_table, nb_clusters,
                   0, s->cluster_size);
 
-    check_refcounts_l1(bs, refcount_table, nb_clusters,
+    /* current L1 table */
+    ret = check_refcounts_l1(bs, refcount_table, nb_clusters,
                        s->l1_table_offset, s->l1_size, 1);
+    if (ret < 0) {
+        return ret;
+    }
+    errors += ret;
 
     /* snapshots */
     for(i = 0; i < s->nb_snapshots; i++) {
@@ -2588,18 +2803,18 @@ static void check_refcounts(BlockDriverState *bs)
         check_refcounts_l1(bs, refcount_table, nb_clusters,
                            sn->l1_table_offset, sn->l1_size, 0);
     }
-    inc_refcounts(bs, refcount_table, nb_clusters,
+    errors += inc_refcounts(bs, refcount_table, nb_clusters,
                   s->snapshots_offset, s->snapshots_size);
 
     /* refcount data */
-    inc_refcounts(bs, refcount_table, nb_clusters,
+    errors += inc_refcounts(bs, refcount_table, nb_clusters,
                   s->refcount_table_offset,
                   s->refcount_table_size * sizeof(uint64_t));
     for(i = 0; i < s->refcount_table_size; i++) {
         int64_t offset;
         offset = s->refcount_table[i];
         if (offset != 0) {
-            inc_refcounts(bs, refcount_table, nb_clusters,
+            errors += inc_refcounts(bs, refcount_table, nb_clusters,
                           offset, s->cluster_size);
         }
     }
@@ -2608,12 +2823,21 @@ static void check_refcounts(BlockDriverState *bs)
     for(i = 0; i < nb_clusters; i++) {
         refcount1 = get_refcount(bs, i);
         refcount2 = refcount_table[i];
-        if (refcount1 != refcount2)
-            printf("ERROR cluster %d refcount=%d reference=%d\n",
+        if (refcount1 != refcount2) {
+            fprintf(stderr, "ERROR cluster %d refcount=%d reference=%d\n",
                    i, refcount1, refcount2);
+            errors++;
+        }
     }
 
     qemu_free(refcount_table);
+
+    return errors;
+}
+
+static int qcow_check(BlockDriverState *bs)
+{
+    return check_refcounts(bs);
 }
 
 #if 0
@@ -2635,31 +2859,59 @@ static void dump_refcounts(BlockDriverState *bs)
     }
 }
 #endif
-#endif
+
+static int qcow_put_buffer(BlockDriverState *bs, const uint8_t *buf,
+                           int64_t pos, int size)
+{
+    int growable = bs->growable;
+
+    bs->growable = 1;
+    bdrv_pwrite(bs, pos, buf, size);
+    bs->growable = growable;
+
+    return size;
+}
+
+static int qcow_get_buffer(BlockDriverState *bs, uint8_t *buf,
+                           int64_t pos, int size)
+{
+    int growable = bs->growable;
+    int ret;
+
+    bs->growable = 1;
+    ret = bdrv_pread(bs, pos, buf, size);
+    bs->growable = growable;
+
+    return ret;
+}
 
 BlockDriver bdrv_qcow2 = {
-    "qcow2",
-    sizeof(BDRVQcowState),
-    qcow_probe,
-    qcow_open,
-    NULL,
-    NULL,
-    qcow_close,
-    qcow_create,
-    qcow_flush,
-    qcow_is_allocated,
-    qcow_set_key,
-    qcow_make_empty,
-
-    .bdrv_aio_read = qcow_aio_read,
-    .bdrv_aio_write = qcow_aio_write,
-    .bdrv_aio_cancel = qcow_aio_cancel,
-    .aiocb_size = sizeof(QCowAIOCB),
+    .format_name       = "qcow2",
+    .instance_size     = sizeof(BDRVQcowState),
+    .bdrv_probe                = qcow_probe,
+    .bdrv_open         = qcow_open,
+    .bdrv_close                = qcow_close,
+    .bdrv_create       = qcow_create,
+    .bdrv_flush                = qcow_flush,
+    .bdrv_is_allocated = qcow_is_allocated,
+    .bdrv_set_key      = qcow_set_key,
+    .bdrv_make_empty   = qcow_make_empty,
+
+    .bdrv_aio_readv    = qcow_aio_readv,
+    .bdrv_aio_writev   = qcow_aio_writev,
+    .bdrv_aio_cancel   = qcow_aio_cancel,
+    .aiocb_size                = sizeof(QCowAIOCB),
     .bdrv_write_compressed = qcow_write_compressed,
 
     .bdrv_snapshot_create = qcow_snapshot_create,
-    .bdrv_snapshot_goto = qcow_snapshot_goto,
+    .bdrv_snapshot_goto        = qcow_snapshot_goto,
     .bdrv_snapshot_delete = qcow_snapshot_delete,
-    .bdrv_snapshot_list = qcow_snapshot_list,
-    .bdrv_get_info = qcow_get_info,
+    .bdrv_snapshot_list        = qcow_snapshot_list,
+    .bdrv_get_info     = qcow_get_info,
+
+    .bdrv_put_buffer    = qcow_put_buffer,
+    .bdrv_get_buffer    = qcow_get_buffer,
+
+    .bdrv_create2 = qcow_create2,
+    .bdrv_check = qcow_check,
 };