2 * QEMU posix-aio emulation
4 * Copyright IBM, Corp. 2008
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include <sys/ioctl.h>
23 #include "qemu-common.h"
25 #include "posix-aio-compat.h"
27 static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
28 static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
29 static pthread_t thread_id;
30 static pthread_attr_t attr;
31 static int max_threads = 64;
32 static int cur_threads = 0;
33 static int idle_threads = 0;
34 static TAILQ_HEAD(, qemu_paiocb) request_list;
36 static void die2(int err, const char *what)
38 fprintf(stderr, "%s failed: %s\n", what, strerror(err));
42 static void die(const char *what)
47 static void mutex_lock(pthread_mutex_t *mutex)
49 int ret = pthread_mutex_lock(mutex);
50 if (ret) die2(ret, "pthread_mutex_lock");
53 static void mutex_unlock(pthread_mutex_t *mutex)
55 int ret = pthread_mutex_unlock(mutex);
56 if (ret) die2(ret, "pthread_mutex_unlock");
59 static int cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
62 int ret = pthread_cond_timedwait(cond, mutex, ts);
63 if (ret && ret != ETIMEDOUT) die2(ret, "pthread_cond_timedwait");
67 static void cond_signal(pthread_cond_t *cond)
69 int ret = pthread_cond_signal(cond);
70 if (ret) die2(ret, "pthread_cond_signal");
73 static void thread_create(pthread_t *thread, pthread_attr_t *attr,
74 void *(*start_routine)(void*), void *arg)
76 int ret = pthread_create(thread, attr, start_routine, arg);
77 if (ret) die2(ret, "pthread_create");
80 static size_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb)
84 ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
91 * Check if we need to copy the data in the aiocb into a new
92 * properly aligned buffer.
94 static int aiocb_needs_copy(struct qemu_paiocb *aiocb)
96 if (aiocb->aio_flags & QEMU_AIO_SECTOR_ALIGNED) {
99 for (i = 0; i < aiocb->aio_niov; i++)
100 if ((uintptr_t) aiocb->aio_iov[i].iov_base % 512)
107 static size_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf)
112 while (offset < aiocb->aio_nbytes) {
113 if (aiocb->aio_type == QEMU_PAIO_WRITE)
114 len = pwrite(aiocb->aio_fildes,
115 (const char *)buf + offset,
116 aiocb->aio_nbytes - offset,
117 aiocb->aio_offset + offset);
119 len = pread(aiocb->aio_fildes,
121 aiocb->aio_nbytes - offset,
122 aiocb->aio_offset + offset);
124 if (len == -1 && errno == EINTR)
126 else if (len == -1) {
138 static size_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
143 if (!aiocb_needs_copy(aiocb) && aiocb->aio_niov == 1) {
145 * If there is just a single buffer, and it is properly aligned
146 * we can just use plain pread/pwrite without any problems.
148 return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
152 * Ok, we have to do it the hard way, copy all segments into
153 * a single aligned buffer.
155 buf = qemu_memalign(512, aiocb->aio_nbytes);
156 if (aiocb->aio_type == QEMU_PAIO_WRITE) {
160 for (i = 0; i < aiocb->aio_niov; ++i) {
161 memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
162 p += aiocb->aio_iov[i].iov_len;
166 nbytes = handle_aiocb_rw_linear(aiocb, buf);
167 if (aiocb->aio_type != QEMU_PAIO_WRITE) {
169 size_t count = aiocb->aio_nbytes, copy;
172 for (i = 0; i < aiocb->aio_niov && count; ++i) {
174 if (copy > aiocb->aio_iov[i].iov_len)
175 copy = aiocb->aio_iov[i].iov_len;
176 memcpy(aiocb->aio_iov[i].iov_base, p, copy);
186 static void *aio_thread(void *unused)
193 /* block all signals */
194 if (sigfillset(&set)) die("sigfillset");
195 if (sigprocmask(SIG_BLOCK, &set, NULL)) die("sigprocmask");
198 struct qemu_paiocb *aiocb;
203 qemu_gettimeofday(&tv);
204 ts.tv_sec = tv.tv_sec + 10;
209 while (TAILQ_EMPTY(&request_list) &&
210 !(ret == ETIMEDOUT)) {
211 ret = cond_timedwait(&cond, &lock, &ts);
214 if (TAILQ_EMPTY(&request_list))
217 aiocb = TAILQ_FIRST(&request_list);
218 TAILQ_REMOVE(&request_list, aiocb, node);
223 switch (aiocb->aio_type) {
225 case QEMU_PAIO_WRITE:
226 ret = handle_aiocb_rw(aiocb);
228 case QEMU_PAIO_IOCTL:
229 ret = handle_aiocb_ioctl(aiocb);
232 fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
242 if (kill(pid, aiocb->ev_signo)) die("kill failed");
252 static void spawn_thread(void)
256 thread_create(&thread_id, &attr, aio_thread, NULL);
259 int qemu_paio_init(struct qemu_paioinit *aioinit)
263 ret = pthread_attr_init(&attr);
264 if (ret) die2(ret, "pthread_attr_init");
266 ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
267 if (ret) die2(ret, "pthread_attr_setdetachstate");
269 TAILQ_INIT(&request_list);
274 static int qemu_paio_submit(struct qemu_paiocb *aiocb, int type)
276 aiocb->aio_type = type;
277 aiocb->ret = -EINPROGRESS;
280 if (idle_threads == 0 && cur_threads < max_threads)
282 TAILQ_INSERT_TAIL(&request_list, aiocb, node);
289 int qemu_paio_read(struct qemu_paiocb *aiocb)
291 return qemu_paio_submit(aiocb, QEMU_PAIO_READ);
294 int qemu_paio_write(struct qemu_paiocb *aiocb)
296 return qemu_paio_submit(aiocb, QEMU_PAIO_WRITE);
299 int qemu_paio_ioctl(struct qemu_paiocb *aiocb)
301 return qemu_paio_submit(aiocb, QEMU_PAIO_IOCTL);
304 ssize_t qemu_paio_return(struct qemu_paiocb *aiocb)
315 int qemu_paio_error(struct qemu_paiocb *aiocb)
317 ssize_t ret = qemu_paio_return(aiocb);
327 int qemu_paio_cancel(int fd, struct qemu_paiocb *aiocb)
332 if (!aiocb->active) {
333 TAILQ_REMOVE(&request_list, aiocb, node);
334 aiocb->ret = -ECANCELED;
335 ret = QEMU_PAIO_CANCELED;
336 } else if (aiocb->ret == -EINPROGRESS)
337 ret = QEMU_PAIO_NOTCANCELED;
339 ret = QEMU_PAIO_ALLDONE;