diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 9bd0ba87553ff059a9273527eed356f5f1ce072f..92346b5d9f5b088b34363d3d5707107734fd902f 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -182,12 +182,18 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req,
 	struct uring_cache *cache;
 
 	cache = io_uring_async_get(req);
-	if (cache) {
-		memcpy(cache->sqes, sqe, uring_sqe_size(req->ctx));
-		ioucmd->sqe = req->async_data;
+	if (unlikely(!cache))
+		return -ENOMEM;
+
+	if (!(req->flags & REQ_F_FORCE_ASYNC)) {
+		/* defer memcpy until we need it */
+		ioucmd->sqe = sqe;
 		return 0;
 	}
-	return -ENOMEM;
+
+	memcpy(req->async_data, sqe, uring_sqe_size(req->ctx));
+	ioucmd->sqe = req->async_data;
+	return 0;
 }
 
 int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -245,8 +251,15 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
 	}
 
 	ret = file->f_op->uring_cmd(ioucmd, issue_flags);
-	if (ret == -EAGAIN || ret == -EIOCBQUEUED)
-		return ret;
+	if (ret == -EAGAIN) {
+		struct uring_cache *cache = req->async_data;
+
+		if (ioucmd->sqe != (void *) cache)
+			memcpy(cache, ioucmd->sqe, uring_sqe_size(req->ctx));
+		return -EAGAIN;
+	} else if (ret == -EIOCBQUEUED) {
+		return -EIOCBQUEUED;
+	}
 
 	if (ret < 0)
 		req_set_fail(req);