1488570ebSJim Harris /* SPDX-License-Identifier: BSD-3-Clause
2a6dbe372Spaul luse * Copyright (C) 2017 Intel Corporation.
307fe6a43SSeth Howell * All rights reserved.
407fe6a43SSeth Howell */
507fe6a43SSeth Howell
607fe6a43SSeth Howell #include "spdk/stdinc.h"
707fe6a43SSeth Howell
807fe6a43SSeth Howell #include "bdev_rbd.h"
907fe6a43SSeth Howell
1007fe6a43SSeth Howell #include <rbd/librbd.h>
1107fe6a43SSeth Howell #include <rados/librados.h>
1207fe6a43SSeth Howell
1307fe6a43SSeth Howell #include "spdk/env.h"
1407fe6a43SSeth Howell #include "spdk/bdev.h"
1507fe6a43SSeth Howell #include "spdk/thread.h"
1607fe6a43SSeth Howell #include "spdk/json.h"
1707fe6a43SSeth Howell #include "spdk/string.h"
1807fe6a43SSeth Howell #include "spdk/util.h"
19e6993554SZiye Yang #include "spdk/likely.h"
2007fe6a43SSeth Howell
2107fe6a43SSeth Howell #include "spdk/bdev_module.h"
224e8e97c8STomasz Zawadzki #include "spdk/log.h"
2307fe6a43SSeth Howell
2407fe6a43SSeth Howell static int bdev_rbd_count = 0;
2507fe6a43SSeth Howell
261738550bSZiye Yang struct bdev_rbd_pool_ctx {
271738550bSZiye Yang rados_t *cluster_p;
281738550bSZiye Yang char *name;
291738550bSZiye Yang rados_ioctx_t io_ctx;
301738550bSZiye Yang uint32_t ref;
311738550bSZiye Yang STAILQ_ENTRY(bdev_rbd_pool_ctx) link;
321738550bSZiye Yang };
331738550bSZiye Yang
341738550bSZiye Yang static STAILQ_HEAD(, bdev_rbd_pool_ctx) g_map_bdev_rbd_pool_ctx = STAILQ_HEAD_INITIALIZER(
351738550bSZiye Yang g_map_bdev_rbd_pool_ctx);
361738550bSZiye Yang
3707fe6a43SSeth Howell struct bdev_rbd {
3807fe6a43SSeth Howell struct spdk_bdev disk;
3907fe6a43SSeth Howell char *rbd_name;
4007fe6a43SSeth Howell char *user_id;
4107fe6a43SSeth Howell char *pool_name;
4207fe6a43SSeth Howell char **config;
43e1e73440SZiye Yang
44b940247bSZiye Yang rados_t cluster;
455c016026SZiye Yang rados_t *cluster_p;
465c016026SZiye Yang char *cluster_name;
47e1e73440SZiye Yang
48215a6e4cSZiye Yang union rbd_ctx {
49215a6e4cSZiye Yang rados_ioctx_t io_ctx;
501738550bSZiye Yang struct bdev_rbd_pool_ctx *ctx;
51215a6e4cSZiye Yang } rados_ctx;
52215a6e4cSZiye Yang
53e1e73440SZiye Yang rbd_image_t image;
54e1e73440SZiye Yang
5507fe6a43SSeth Howell rbd_image_info_t info;
56abb4baefSJim Harris struct spdk_thread *destruct_td;
57e1e73440SZiye Yang
5807fe6a43SSeth Howell TAILQ_ENTRY(bdev_rbd) tailq;
5907fe6a43SSeth Howell struct spdk_poller *reset_timer;
6007fe6a43SSeth Howell struct spdk_bdev_io *reset_bdev_io;
61cfc9dcdfSPhilipp Riederer
62cfc9dcdfSPhilipp Riederer uint64_t rbd_watch_handle;
6307fe6a43SSeth Howell };
6407fe6a43SSeth Howell
6507fe6a43SSeth Howell struct bdev_rbd_io_channel {
6607fe6a43SSeth Howell struct bdev_rbd *disk;
67c441e270SZiye Yang struct spdk_io_channel *group_ch;
6807fe6a43SSeth Howell };
6907fe6a43SSeth Howell
7007fe6a43SSeth Howell struct bdev_rbd_io {
71e1e73440SZiye Yang struct spdk_thread *submit_td;
72e1e73440SZiye Yang enum spdk_bdev_io_status status;
730d90cda0SZiye Yang rbd_completion_t comp;
745d097aa7SZiye Yang size_t total_len;
7507fe6a43SSeth Howell };
7607fe6a43SSeth Howell
77e18eaee2SZiye Yang struct bdev_rbd_cluster {
78e18eaee2SZiye Yang char *name;
79e18eaee2SZiye Yang char *user_id;
80e18eaee2SZiye Yang char **config_param;
81e18eaee2SZiye Yang char *config_file;
8220c8a3b8STan Long char *key_file;
83b618f056SYue-Zhu char *core_mask;
84e18eaee2SZiye Yang rados_t cluster;
85e18eaee2SZiye Yang uint32_t ref;
86e18eaee2SZiye Yang STAILQ_ENTRY(bdev_rbd_cluster) link;
87e18eaee2SZiye Yang };
88e18eaee2SZiye Yang
89e18eaee2SZiye Yang static STAILQ_HEAD(, bdev_rbd_cluster) g_map_bdev_rbd_cluster = STAILQ_HEAD_INITIALIZER(
90e18eaee2SZiye Yang g_map_bdev_rbd_cluster);
91e18eaee2SZiye Yang static pthread_mutex_t g_map_bdev_rbd_cluster_mutex = PTHREAD_MUTEX_INITIALIZER;
92e18eaee2SZiye Yang
93cfc9dcdfSPhilipp Riederer static struct spdk_io_channel *bdev_rbd_get_io_channel(void *ctx);
94cfc9dcdfSPhilipp Riederer
95cfc9dcdfSPhilipp Riederer static void
_rbd_update_callback(void * arg)96cfc9dcdfSPhilipp Riederer _rbd_update_callback(void *arg)
97cfc9dcdfSPhilipp Riederer {
98cfc9dcdfSPhilipp Riederer struct bdev_rbd *rbd = arg;
99cfc9dcdfSPhilipp Riederer uint64_t current_size_in_bytes = 0;
100cfc9dcdfSPhilipp Riederer int rc;
101cfc9dcdfSPhilipp Riederer
102cfc9dcdfSPhilipp Riederer rc = rbd_get_size(rbd->image, ¤t_size_in_bytes);
103cfc9dcdfSPhilipp Riederer if (rc < 0) {
104cfc9dcdfSPhilipp Riederer SPDK_ERRLOG("Failed getting size %d\n", rc);
105cfc9dcdfSPhilipp Riederer return;
106cfc9dcdfSPhilipp Riederer }
107cfc9dcdfSPhilipp Riederer
108cfc9dcdfSPhilipp Riederer rc = spdk_bdev_notify_blockcnt_change(&rbd->disk, current_size_in_bytes / rbd->disk.blocklen);
109cfc9dcdfSPhilipp Riederer if (rc != 0) {
110cfc9dcdfSPhilipp Riederer SPDK_ERRLOG("failed to notify block cnt change.\n");
111cfc9dcdfSPhilipp Riederer }
112cfc9dcdfSPhilipp Riederer }
113cfc9dcdfSPhilipp Riederer
114cfc9dcdfSPhilipp Riederer static void
rbd_update_callback(void * arg)115cfc9dcdfSPhilipp Riederer rbd_update_callback(void *arg)
116cfc9dcdfSPhilipp Riederer {
117cfc9dcdfSPhilipp Riederer spdk_thread_send_msg(spdk_thread_get_app_thread(), _rbd_update_callback, arg);
118cfc9dcdfSPhilipp Riederer }
119cfc9dcdfSPhilipp Riederer
120e18eaee2SZiye Yang static void
bdev_rbd_cluster_free(struct bdev_rbd_cluster * entry)121e18eaee2SZiye Yang bdev_rbd_cluster_free(struct bdev_rbd_cluster *entry)
122e18eaee2SZiye Yang {
123e18eaee2SZiye Yang assert(entry != NULL);
124e18eaee2SZiye Yang
125e18eaee2SZiye Yang bdev_rbd_free_config(entry->config_param);
126e18eaee2SZiye Yang free(entry->config_file);
12720c8a3b8STan Long free(entry->key_file);
128e18eaee2SZiye Yang free(entry->user_id);
129e18eaee2SZiye Yang free(entry->name);
130b618f056SYue-Zhu free(entry->core_mask);
131e18eaee2SZiye Yang free(entry);
132e18eaee2SZiye Yang }
133e18eaee2SZiye Yang
13407fe6a43SSeth Howell static void
bdev_rbd_put_cluster(rados_t ** cluster)1355c016026SZiye Yang bdev_rbd_put_cluster(rados_t **cluster)
1365c016026SZiye Yang {
1375c016026SZiye Yang struct bdev_rbd_cluster *entry;
1385c016026SZiye Yang
1395c016026SZiye Yang assert(cluster != NULL);
1405c016026SZiye Yang
1415c016026SZiye Yang /* No need go through the map if *cluster equals to NULL */
1425c016026SZiye Yang if (*cluster == NULL) {
1435c016026SZiye Yang return;
1445c016026SZiye Yang }
1455c016026SZiye Yang
1465c016026SZiye Yang pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
1475c016026SZiye Yang STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
1485c016026SZiye Yang if (*cluster != &entry->cluster) {
1495c016026SZiye Yang continue;
1505c016026SZiye Yang }
1515c016026SZiye Yang
1525c016026SZiye Yang assert(entry->ref > 0);
1535c016026SZiye Yang entry->ref--;
1545c016026SZiye Yang *cluster = NULL;
1555c016026SZiye Yang pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
1565c016026SZiye Yang return;
1575c016026SZiye Yang }
1585c016026SZiye Yang
1595c016026SZiye Yang pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
1605c016026SZiye Yang SPDK_ERRLOG("Cannot find the entry for cluster=%p\n", cluster);
1615c016026SZiye Yang }
1625c016026SZiye Yang
1635c016026SZiye Yang static void
bdev_rbd_put_pool_ctx(struct bdev_rbd_pool_ctx * entry)1641738550bSZiye Yang bdev_rbd_put_pool_ctx(struct bdev_rbd_pool_ctx *entry)
1651738550bSZiye Yang {
1661738550bSZiye Yang assert(spdk_get_thread() == spdk_thread_get_app_thread());
1671738550bSZiye Yang
1681738550bSZiye Yang assert(entry != NULL);
1691738550bSZiye Yang assert(entry->ref > 0);
1701738550bSZiye Yang entry->ref--;
1711738550bSZiye Yang if (entry->ref == 0) {
1721738550bSZiye Yang STAILQ_REMOVE(&g_map_bdev_rbd_pool_ctx, entry, bdev_rbd_pool_ctx, link);
1731738550bSZiye Yang rados_ioctx_destroy(entry->io_ctx);
1741738550bSZiye Yang free(entry->name);
1751738550bSZiye Yang free(entry);
1761738550bSZiye Yang }
1771738550bSZiye Yang }
1781738550bSZiye Yang
1791738550bSZiye Yang static void
bdev_rbd_free(struct bdev_rbd * rbd)18007fe6a43SSeth Howell bdev_rbd_free(struct bdev_rbd *rbd)
18107fe6a43SSeth Howell {
18207fe6a43SSeth Howell if (!rbd) {
18307fe6a43SSeth Howell return;
18407fe6a43SSeth Howell }
18507fe6a43SSeth Howell
186c441e270SZiye Yang if (rbd->image) {
187cfc9dcdfSPhilipp Riederer rbd_update_unwatch(rbd->image, rbd->rbd_watch_handle);
188c441e270SZiye Yang rbd_flush(rbd->image);
189c441e270SZiye Yang rbd_close(rbd->image);
190c441e270SZiye Yang }
191c441e270SZiye Yang
19207fe6a43SSeth Howell free(rbd->disk.name);
19307fe6a43SSeth Howell free(rbd->rbd_name);
19407fe6a43SSeth Howell free(rbd->user_id);
19507fe6a43SSeth Howell free(rbd->pool_name);
196289e0f59SSeth Howell bdev_rbd_free_config(rbd->config);
1975c016026SZiye Yang
198215a6e4cSZiye Yang if (rbd->cluster_name) {
1991738550bSZiye Yang /* When rbd is destructed by bdev_rbd_destruct, it will not enter here
2001738550bSZiye Yang * because the ctx will already freed by bdev_rbd_free_cb in async manner.
2011738550bSZiye Yang * This path only happens during the rbd initialization procedure of rbd */
202215a6e4cSZiye Yang if (rbd->rados_ctx.ctx) {
203215a6e4cSZiye Yang bdev_rbd_put_pool_ctx(rbd->rados_ctx.ctx);
204215a6e4cSZiye Yang rbd->rados_ctx.ctx = NULL;
205e1e73440SZiye Yang }
206e1e73440SZiye Yang
2075c016026SZiye Yang bdev_rbd_put_cluster(&rbd->cluster_p);
2085c016026SZiye Yang free(rbd->cluster_name);
2095c016026SZiye Yang } else if (rbd->cluster) {
210215a6e4cSZiye Yang if (rbd->rados_ctx.io_ctx) {
211215a6e4cSZiye Yang rados_ioctx_destroy(rbd->rados_ctx.io_ctx);
212215a6e4cSZiye Yang }
2135c016026SZiye Yang rados_shutdown(rbd->cluster);
2145c016026SZiye Yang }
2155c016026SZiye Yang
21607fe6a43SSeth Howell free(rbd);
21707fe6a43SSeth Howell }
21807fe6a43SSeth Howell
21907fe6a43SSeth Howell void
bdev_rbd_free_config(char ** config)220289e0f59SSeth Howell bdev_rbd_free_config(char **config)
22107fe6a43SSeth Howell {
22207fe6a43SSeth Howell char **entry;
22307fe6a43SSeth Howell
22407fe6a43SSeth Howell if (config) {
22507fe6a43SSeth Howell for (entry = config; *entry; entry++) {
22607fe6a43SSeth Howell free(*entry);
22707fe6a43SSeth Howell }
22807fe6a43SSeth Howell free(config);
22907fe6a43SSeth Howell }
23007fe6a43SSeth Howell }
23107fe6a43SSeth Howell
23207fe6a43SSeth Howell char **
bdev_rbd_dup_config(const char * const * config)233289e0f59SSeth Howell bdev_rbd_dup_config(const char *const *config)
23407fe6a43SSeth Howell {
23507fe6a43SSeth Howell size_t count;
23607fe6a43SSeth Howell char **copy;
23707fe6a43SSeth Howell
23807fe6a43SSeth Howell if (!config) {
23907fe6a43SSeth Howell return NULL;
24007fe6a43SSeth Howell }
24107fe6a43SSeth Howell for (count = 0; config[count]; count++) {}
24207fe6a43SSeth Howell copy = calloc(count + 1, sizeof(*copy));
24307fe6a43SSeth Howell if (!copy) {
24407fe6a43SSeth Howell return NULL;
24507fe6a43SSeth Howell }
24607fe6a43SSeth Howell for (count = 0; config[count]; count++) {
24707fe6a43SSeth Howell if (!(copy[count] = strdup(config[count]))) {
248289e0f59SSeth Howell bdev_rbd_free_config(copy);
24907fe6a43SSeth Howell return NULL;
25007fe6a43SSeth Howell }
25107fe6a43SSeth Howell }
25207fe6a43SSeth Howell return copy;
25307fe6a43SSeth Howell }
25407fe6a43SSeth Howell
25507fe6a43SSeth Howell static int
bdev_rados_cluster_init(const char * user_id,const char * const * config,rados_t * cluster)256b940247bSZiye Yang bdev_rados_cluster_init(const char *user_id, const char *const *config,
257b940247bSZiye Yang rados_t *cluster)
25807fe6a43SSeth Howell {
25907fe6a43SSeth Howell int ret;
26007fe6a43SSeth Howell
26107fe6a43SSeth Howell ret = rados_create(cluster, user_id);
26207fe6a43SSeth Howell if (ret < 0) {
26307fe6a43SSeth Howell SPDK_ERRLOG("Failed to create rados_t struct\n");
26407fe6a43SSeth Howell return -1;
26507fe6a43SSeth Howell }
26607fe6a43SSeth Howell
26707fe6a43SSeth Howell if (config) {
26807fe6a43SSeth Howell const char *const *entry = config;
26907fe6a43SSeth Howell while (*entry) {
27007fe6a43SSeth Howell ret = rados_conf_set(*cluster, entry[0], entry[1]);
27107fe6a43SSeth Howell if (ret < 0) {
27207fe6a43SSeth Howell SPDK_ERRLOG("Failed to set %s = %s\n", entry[0], entry[1]);
27307fe6a43SSeth Howell rados_shutdown(*cluster);
2743c5a8ddcSLukas Stockner *cluster = NULL;
27507fe6a43SSeth Howell return -1;
27607fe6a43SSeth Howell }
27707fe6a43SSeth Howell entry += 2;
27807fe6a43SSeth Howell }
27907fe6a43SSeth Howell } else {
28007fe6a43SSeth Howell ret = rados_conf_read_file(*cluster, NULL);
28107fe6a43SSeth Howell if (ret < 0) {
28207fe6a43SSeth Howell SPDK_ERRLOG("Failed to read conf file\n");
28307fe6a43SSeth Howell rados_shutdown(*cluster);
2843c5a8ddcSLukas Stockner *cluster = NULL;
28507fe6a43SSeth Howell return -1;
28607fe6a43SSeth Howell }
28707fe6a43SSeth Howell }
28807fe6a43SSeth Howell
28907fe6a43SSeth Howell ret = rados_connect(*cluster);
29007fe6a43SSeth Howell if (ret < 0) {
29107fe6a43SSeth Howell SPDK_ERRLOG("Failed to connect to rbd_pool\n");
29207fe6a43SSeth Howell rados_shutdown(*cluster);
2933c5a8ddcSLukas Stockner *cluster = NULL;
29407fe6a43SSeth Howell return -1;
29507fe6a43SSeth Howell }
29607fe6a43SSeth Howell
297b940247bSZiye Yang return 0;
29807fe6a43SSeth Howell }
29907fe6a43SSeth Howell
3005c016026SZiye Yang static int
bdev_rbd_get_cluster(const char * cluster_name,rados_t ** cluster)3015c016026SZiye Yang bdev_rbd_get_cluster(const char *cluster_name, rados_t **cluster)
3025c016026SZiye Yang {
3035c016026SZiye Yang struct bdev_rbd_cluster *entry;
3045c016026SZiye Yang
3055c016026SZiye Yang if (cluster == NULL) {
3065c016026SZiye Yang SPDK_ERRLOG("cluster should not be NULL\n");
3075c016026SZiye Yang return -1;
3085c016026SZiye Yang }
3095c016026SZiye Yang
3105c016026SZiye Yang pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
3115c016026SZiye Yang STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
312d33f4486SZiye Yang if (strcmp(cluster_name, entry->name) == 0) {
3135c016026SZiye Yang entry->ref++;
3145c016026SZiye Yang *cluster = &entry->cluster;
3155c016026SZiye Yang pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
3165c016026SZiye Yang return 0;
3175c016026SZiye Yang }
3185c016026SZiye Yang }
3195c016026SZiye Yang
3205c016026SZiye Yang pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
3215c016026SZiye Yang return -1;
3225c016026SZiye Yang }
3235c016026SZiye Yang
3245c016026SZiye Yang static int
bdev_rbd_shared_cluster_init(const char * cluster_name,rados_t ** cluster)3255c016026SZiye Yang bdev_rbd_shared_cluster_init(const char *cluster_name, rados_t **cluster)
3265c016026SZiye Yang {
3275c016026SZiye Yang int ret;
3285c016026SZiye Yang
3295c016026SZiye Yang ret = bdev_rbd_get_cluster(cluster_name, cluster);
3305c016026SZiye Yang if (ret < 0) {
3315c016026SZiye Yang SPDK_ERRLOG("Failed to create rados_t struct\n");
3325c016026SZiye Yang return -1;
3335c016026SZiye Yang }
3345c016026SZiye Yang
3355c016026SZiye Yang return ret;
3365c016026SZiye Yang }
3375c016026SZiye Yang
338b940247bSZiye Yang static void *
bdev_rbd_cluster_handle(void * arg)339b940247bSZiye Yang bdev_rbd_cluster_handle(void *arg)
340b940247bSZiye Yang {
341b940247bSZiye Yang void *ret = arg;
3425c016026SZiye Yang struct bdev_rbd *rbd = arg;
343b940247bSZiye Yang int rc;
344b940247bSZiye Yang
345b940247bSZiye Yang rc = bdev_rados_cluster_init(rbd->user_id, (const char *const *)rbd->config,
346b940247bSZiye Yang &rbd->cluster);
347b940247bSZiye Yang if (rc < 0) {
348b940247bSZiye Yang SPDK_ERRLOG("Failed to create rados cluster for user_id=%s and rbd_pool=%s\n",
349b940247bSZiye Yang rbd->user_id ? rbd->user_id : "admin (the default)", rbd->pool_name);
350b940247bSZiye Yang ret = NULL;
351b940247bSZiye Yang }
352b940247bSZiye Yang
353b940247bSZiye Yang return ret;
35407fe6a43SSeth Howell }
35507fe6a43SSeth Howell
3561738550bSZiye Yang static int
bdev_rbd_get_pool_ctx(rados_t * cluster_p,const char * name,struct bdev_rbd_pool_ctx ** ctx)3571738550bSZiye Yang bdev_rbd_get_pool_ctx(rados_t *cluster_p, const char *name, struct bdev_rbd_pool_ctx **ctx)
3581738550bSZiye Yang {
3591738550bSZiye Yang struct bdev_rbd_pool_ctx *entry;
3601738550bSZiye Yang
3611738550bSZiye Yang assert(spdk_get_thread() == spdk_thread_get_app_thread());
3621738550bSZiye Yang
3631738550bSZiye Yang if (name == NULL || ctx == NULL) {
3641738550bSZiye Yang return -1;
3651738550bSZiye Yang }
3661738550bSZiye Yang
3671738550bSZiye Yang STAILQ_FOREACH(entry, &g_map_bdev_rbd_pool_ctx, link) {
3681738550bSZiye Yang if (strcmp(name, entry->name) == 0 && cluster_p == entry->cluster_p) {
3691738550bSZiye Yang entry->ref++;
3701738550bSZiye Yang *ctx = entry;
3711738550bSZiye Yang return 0;
3721738550bSZiye Yang }
3731738550bSZiye Yang }
3741738550bSZiye Yang
3751738550bSZiye Yang entry = calloc(1, sizeof(*entry));
3761738550bSZiye Yang if (!entry) {
3771738550bSZiye Yang SPDK_ERRLOG("Cannot allocate an entry for name=%s\n", name);
3781738550bSZiye Yang return -1;
3791738550bSZiye Yang }
3801738550bSZiye Yang
3811738550bSZiye Yang entry->name = strdup(name);
3821738550bSZiye Yang if (entry->name == NULL) {
3831738550bSZiye Yang SPDK_ERRLOG("Failed to allocate the name =%s space on entry =%p\n", name, entry);
3841738550bSZiye Yang goto err_handle;
3851738550bSZiye Yang }
3861738550bSZiye Yang
3871738550bSZiye Yang if (rados_ioctx_create(*cluster_p, name, &entry->io_ctx) < 0) {
3881738550bSZiye Yang goto err_handle1;
3891738550bSZiye Yang }
3901738550bSZiye Yang
3911738550bSZiye Yang entry->cluster_p = cluster_p;
3921738550bSZiye Yang entry->ref = 1;
3931738550bSZiye Yang *ctx = entry;
3941738550bSZiye Yang STAILQ_INSERT_TAIL(&g_map_bdev_rbd_pool_ctx, entry, link);
3951738550bSZiye Yang
3961738550bSZiye Yang return 0;
3971738550bSZiye Yang
3981738550bSZiye Yang err_handle1:
3991738550bSZiye Yang free(entry->name);
4001738550bSZiye Yang err_handle:
4011738550bSZiye Yang free(entry);
4021738550bSZiye Yang
4031738550bSZiye Yang return -1;
4041738550bSZiye Yang }
4051738550bSZiye Yang
406e1e73440SZiye Yang static void *
bdev_rbd_init_context(void * arg)407e1e73440SZiye Yang bdev_rbd_init_context(void *arg)
408e1e73440SZiye Yang {
409e1e73440SZiye Yang struct bdev_rbd *rbd = arg;
410e1e73440SZiye Yang int rc;
411215a6e4cSZiye Yang rados_ioctx_t *io_ctx = NULL;
412e1e73440SZiye Yang
413215a6e4cSZiye Yang if (rbd->cluster_name) {
414215a6e4cSZiye Yang if (bdev_rbd_get_pool_ctx(rbd->cluster_p, rbd->pool_name, &rbd->rados_ctx.ctx) < 0) {
415215a6e4cSZiye Yang SPDK_ERRLOG("Failed to create ioctx on rbd=%p with cluster_name=%s\n",
416215a6e4cSZiye Yang rbd, rbd->cluster_name);
417215a6e4cSZiye Yang return NULL;
418215a6e4cSZiye Yang }
419215a6e4cSZiye Yang io_ctx = &rbd->rados_ctx.ctx->io_ctx;
420215a6e4cSZiye Yang } else {
421215a6e4cSZiye Yang if (rados_ioctx_create(*(rbd->cluster_p), rbd->pool_name, &rbd->rados_ctx.io_ctx) < 0) {
422e1e73440SZiye Yang SPDK_ERRLOG("Failed to create ioctx on rbd=%p\n", rbd);
423e1e73440SZiye Yang return NULL;
424e1e73440SZiye Yang }
425215a6e4cSZiye Yang io_ctx = &rbd->rados_ctx.io_ctx;
426215a6e4cSZiye Yang }
427e1e73440SZiye Yang
428215a6e4cSZiye Yang assert(io_ctx != NULL);
429215a6e4cSZiye Yang rc = rbd_open(*io_ctx, rbd->rbd_name, &rbd->image, NULL);
430e1e73440SZiye Yang if (rc < 0) {
431e1e73440SZiye Yang SPDK_ERRLOG("Failed to open specified rbd device\n");
432e1e73440SZiye Yang return NULL;
433e1e73440SZiye Yang }
434e1e73440SZiye Yang
435cfc9dcdfSPhilipp Riederer rc = rbd_update_watch(rbd->image, &rbd->rbd_watch_handle, rbd_update_callback, (void *)rbd);
436cfc9dcdfSPhilipp Riederer if (rc < 0) {
437cfc9dcdfSPhilipp Riederer SPDK_ERRLOG("Failed to set up watch %d\n", rc);
438cfc9dcdfSPhilipp Riederer }
439cfc9dcdfSPhilipp Riederer
440e1e73440SZiye Yang rc = rbd_stat(rbd->image, &rbd->info, sizeof(rbd->info));
441e1e73440SZiye Yang if (rc < 0) {
442e1e73440SZiye Yang SPDK_ERRLOG("Failed to stat specified rbd device\n");
443e1e73440SZiye Yang return NULL;
444e1e73440SZiye Yang }
445e1e73440SZiye Yang
446e1e73440SZiye Yang return arg;
447e1e73440SZiye Yang }
448e1e73440SZiye Yang
44907fe6a43SSeth Howell static int
bdev_rbd_init(struct bdev_rbd * rbd)450467148fcSZiye Yang bdev_rbd_init(struct bdev_rbd *rbd)
45107fe6a43SSeth Howell {
452741abc3cSZiye Yang int ret = 0;
45307fe6a43SSeth Howell
4545c016026SZiye Yang if (!rbd->cluster_name) {
4555c016026SZiye Yang rbd->cluster_p = &rbd->cluster;
456b940247bSZiye Yang /* Cluster should be created in non-SPDK thread to avoid conflict between
457b940247bSZiye Yang * Rados and SPDK thread */
458b940247bSZiye Yang if (spdk_call_unaffinitized(bdev_rbd_cluster_handle, rbd) == NULL) {
459b940247bSZiye Yang SPDK_ERRLOG("Cannot create the rados object on rbd=%p\n", rbd);
460b940247bSZiye Yang return -1;
461b940247bSZiye Yang }
4625c016026SZiye Yang } else {
4635c016026SZiye Yang ret = bdev_rbd_shared_cluster_init(rbd->cluster_name, &rbd->cluster_p);
4645c016026SZiye Yang if (ret < 0) {
4655c016026SZiye Yang SPDK_ERRLOG("Failed to create rados object for rbd =%p on cluster_name=%s\n",
4665c016026SZiye Yang rbd, rbd->cluster_name);
4675c016026SZiye Yang return -1;
4685c016026SZiye Yang }
4695c016026SZiye Yang }
470b940247bSZiye Yang
471e1e73440SZiye Yang if (spdk_call_unaffinitized(bdev_rbd_init_context, rbd) == NULL) {
472e1e73440SZiye Yang SPDK_ERRLOG("Cannot init rbd context for rbd=%p\n", rbd);
473c6824c79SYifan Bian return -1;
47407fe6a43SSeth Howell }
47507fe6a43SSeth Howell
476c441e270SZiye Yang return ret;
47707fe6a43SSeth Howell }
47807fe6a43SSeth Howell
47907fe6a43SSeth Howell static void
_bdev_rbd_io_complete(void * _rbd_io)480e1e73440SZiye Yang _bdev_rbd_io_complete(void *_rbd_io)
481e1e73440SZiye Yang {
482e1e73440SZiye Yang struct bdev_rbd_io *rbd_io = _rbd_io;
483e1e73440SZiye Yang
484e1e73440SZiye Yang spdk_bdev_io_complete(spdk_bdev_io_from_ctx(rbd_io), rbd_io->status);
485e1e73440SZiye Yang }
486e1e73440SZiye Yang
487e1e73440SZiye Yang static void
bdev_rbd_io_complete(struct spdk_bdev_io * bdev_io,enum spdk_bdev_io_status status)488e1e73440SZiye Yang bdev_rbd_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status)
489e1e73440SZiye Yang {
490e1e73440SZiye Yang struct bdev_rbd_io *rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
49170e2e5d9SZiye Yang struct spdk_thread *current_thread = spdk_get_thread();
492e1e73440SZiye Yang
493e1e73440SZiye Yang rbd_io->status = status;
49470e2e5d9SZiye Yang assert(rbd_io->submit_td != NULL);
49570e2e5d9SZiye Yang if (rbd_io->submit_td != current_thread) {
496e1e73440SZiye Yang spdk_thread_send_msg(rbd_io->submit_td, _bdev_rbd_io_complete, rbd_io);
497e1e73440SZiye Yang } else {
498e1e73440SZiye Yang _bdev_rbd_io_complete(rbd_io);
499e1e73440SZiye Yang }
500e1e73440SZiye Yang }
501e1e73440SZiye Yang
502e1e73440SZiye Yang static void
bdev_rbd_finish_aiocb(rbd_completion_t cb,void * arg)50362210effSZiye Yang bdev_rbd_finish_aiocb(rbd_completion_t cb, void *arg)
50462210effSZiye Yang {
50562210effSZiye Yang int io_status;
50662210effSZiye Yang struct spdk_bdev_io *bdev_io;
50762210effSZiye Yang struct bdev_rbd_io *rbd_io;
50862210effSZiye Yang enum spdk_bdev_io_status bio_status;
50962210effSZiye Yang
51062210effSZiye Yang bdev_io = rbd_aio_get_arg(cb);
51162210effSZiye Yang rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
51262210effSZiye Yang io_status = rbd_aio_get_return_value(cb);
51362210effSZiye Yang bio_status = SPDK_BDEV_IO_STATUS_SUCCESS;
51462210effSZiye Yang
51562210effSZiye Yang if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
51662210effSZiye Yang if ((int)rbd_io->total_len != io_status) {
51762210effSZiye Yang bio_status = SPDK_BDEV_IO_STATUS_FAILED;
51862210effSZiye Yang }
5199fe36c0dSJonas Pfefferle #ifdef LIBRBD_SUPPORTS_COMPARE_AND_WRITE_IOVEC
5209fe36c0dSJonas Pfefferle } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE && io_status == -EILSEQ) {
5219fe36c0dSJonas Pfefferle bio_status = SPDK_BDEV_IO_STATUS_MISCOMPARE;
5229fe36c0dSJonas Pfefferle #endif
5239fe36c0dSJonas Pfefferle } else if (io_status != 0) { /* For others, 0 means success */
52462210effSZiye Yang bio_status = SPDK_BDEV_IO_STATUS_FAILED;
52562210effSZiye Yang }
52662210effSZiye Yang
52762210effSZiye Yang rbd_aio_release(cb);
52862210effSZiye Yang
52962210effSZiye Yang bdev_rbd_io_complete(bdev_io, bio_status);
53062210effSZiye Yang }
53162210effSZiye Yang
53262210effSZiye Yang static void
_bdev_rbd_start_aio(struct bdev_rbd * disk,struct spdk_bdev_io * bdev_io,struct iovec * iov,int iovcnt,uint64_t offset,size_t len)533533470b2SKonrad Sztyber _bdev_rbd_start_aio(struct bdev_rbd *disk, struct spdk_bdev_io *bdev_io,
5345d097aa7SZiye Yang struct iovec *iov, int iovcnt, uint64_t offset, size_t len)
53507fe6a43SSeth Howell {
53607fe6a43SSeth Howell int ret;
5370d90cda0SZiye Yang struct bdev_rbd_io *rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
538e1e73440SZiye Yang rbd_image_t image = disk->image;
539d4229b4dSZiye Yang
54007fe6a43SSeth Howell ret = rbd_aio_create_completion(bdev_io, bdev_rbd_finish_aiocb,
5410d90cda0SZiye Yang &rbd_io->comp);
54207fe6a43SSeth Howell if (ret < 0) {
5430731c227SZiye Yang goto err;
54407fe6a43SSeth Howell }
54507fe6a43SSeth Howell
5466cc5fd1bSJonas Pfefferle switch (bdev_io->type) {
5476cc5fd1bSJonas Pfefferle case SPDK_BDEV_IO_TYPE_READ:
5485d097aa7SZiye Yang rbd_io->total_len = len;
549e6993554SZiye Yang if (spdk_likely(iovcnt == 1)) {
5506cc5fd1bSJonas Pfefferle ret = rbd_aio_read(image, offset, iov[0].iov_len, iov[0].iov_base,
5516cc5fd1bSJonas Pfefferle rbd_io->comp);
552e6993554SZiye Yang } else {
5530d90cda0SZiye Yang ret = rbd_aio_readv(image, iov, iovcnt, offset, rbd_io->comp);
554e6993554SZiye Yang }
5556cc5fd1bSJonas Pfefferle break;
5566cc5fd1bSJonas Pfefferle case SPDK_BDEV_IO_TYPE_WRITE:
557e6993554SZiye Yang if (spdk_likely(iovcnt == 1)) {
5586cc5fd1bSJonas Pfefferle ret = rbd_aio_write(image, offset, iov[0].iov_len, iov[0].iov_base,
5596cc5fd1bSJonas Pfefferle rbd_io->comp);
560e6993554SZiye Yang } else {
5610d90cda0SZiye Yang ret = rbd_aio_writev(image, iov, iovcnt, offset, rbd_io->comp);
562e6993554SZiye Yang }
5636cc5fd1bSJonas Pfefferle break;
5646cc5fd1bSJonas Pfefferle case SPDK_BDEV_IO_TYPE_UNMAP:
5656d9822c1SJonas Pfefferle ret = rbd_aio_discard(image, offset, len, rbd_io->comp);
5666cc5fd1bSJonas Pfefferle break;
5676cc5fd1bSJonas Pfefferle case SPDK_BDEV_IO_TYPE_FLUSH:
5680d90cda0SZiye Yang ret = rbd_aio_flush(image, rbd_io->comp);
5696cc5fd1bSJonas Pfefferle break;
5706cc5fd1bSJonas Pfefferle case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
5716cc5fd1bSJonas Pfefferle ret = rbd_aio_write_zeroes(image, offset, len, rbd_io->comp, /* zero_flags */ 0,
5726cc5fd1bSJonas Pfefferle /* op_flags */ 0);
5736cc5fd1bSJonas Pfefferle break;
5749fe36c0dSJonas Pfefferle #ifdef LIBRBD_SUPPORTS_COMPARE_AND_WRITE_IOVEC
5759fe36c0dSJonas Pfefferle case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
5769fe36c0dSJonas Pfefferle ret = rbd_aio_compare_and_writev(image, offset, iov /* cmp */, iovcnt,
5779fe36c0dSJonas Pfefferle bdev_io->u.bdev.fused_iovs /* write */,
5789fe36c0dSJonas Pfefferle bdev_io->u.bdev.fused_iovcnt,
5799fe36c0dSJonas Pfefferle rbd_io->comp, NULL,
5809fe36c0dSJonas Pfefferle /* op_flags */ 0);
5819fe36c0dSJonas Pfefferle break;
5829fe36c0dSJonas Pfefferle #endif
5836cc5fd1bSJonas Pfefferle default:
5846cc5fd1bSJonas Pfefferle /* This should not happen.
5856cc5fd1bSJonas Pfefferle * Function should only be called with supported io types in bdev_rbd_submit_request
5866cc5fd1bSJonas Pfefferle */
5876cc5fd1bSJonas Pfefferle SPDK_ERRLOG("Unsupported IO type =%d\n", bdev_io->type);
5886cc5fd1bSJonas Pfefferle ret = -ENOTSUP;
5896cc5fd1bSJonas Pfefferle break;
59007fe6a43SSeth Howell }
59107fe6a43SSeth Howell
59207fe6a43SSeth Howell if (ret < 0) {
5930d90cda0SZiye Yang rbd_aio_release(rbd_io->comp);
5940731c227SZiye Yang goto err;
59507fe6a43SSeth Howell }
59607fe6a43SSeth Howell
5970731c227SZiye Yang return;
5980731c227SZiye Yang
5990731c227SZiye Yang err:
600e1e73440SZiye Yang bdev_rbd_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
60107fe6a43SSeth Howell }
60207fe6a43SSeth Howell
603533470b2SKonrad Sztyber static void
bdev_rbd_start_aio(void * ctx)604533470b2SKonrad Sztyber bdev_rbd_start_aio(void *ctx)
605533470b2SKonrad Sztyber {
606533470b2SKonrad Sztyber struct spdk_bdev_io *bdev_io = ctx;
607533470b2SKonrad Sztyber struct bdev_rbd *disk = (struct bdev_rbd *)bdev_io->bdev->ctxt;
608533470b2SKonrad Sztyber
609533470b2SKonrad Sztyber _bdev_rbd_start_aio(disk,
610533470b2SKonrad Sztyber bdev_io,
611533470b2SKonrad Sztyber bdev_io->u.bdev.iovs,
612533470b2SKonrad Sztyber bdev_io->u.bdev.iovcnt,
613533470b2SKonrad Sztyber bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen,
614533470b2SKonrad Sztyber bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
615533470b2SKonrad Sztyber }
616533470b2SKonrad Sztyber
61707fe6a43SSeth Howell static int bdev_rbd_library_init(void);
6186cbbc682SZiye Yang static void bdev_rbd_library_fini(void);
6196cbbc682SZiye Yang
62007fe6a43SSeth Howell static int
bdev_rbd_get_ctx_size(void)62107fe6a43SSeth Howell bdev_rbd_get_ctx_size(void)
62207fe6a43SSeth Howell {
62307fe6a43SSeth Howell return sizeof(struct bdev_rbd_io);
62407fe6a43SSeth Howell }
62507fe6a43SSeth Howell
62607fe6a43SSeth Howell static struct spdk_bdev_module rbd_if = {
62707fe6a43SSeth Howell .name = "rbd",
62807fe6a43SSeth Howell .module_init = bdev_rbd_library_init,
6296cbbc682SZiye Yang .module_fini = bdev_rbd_library_fini,
63007fe6a43SSeth Howell .get_ctx_size = bdev_rbd_get_ctx_size,
63107fe6a43SSeth Howell
63207fe6a43SSeth Howell };
63307fe6a43SSeth Howell SPDK_BDEV_MODULE_REGISTER(rbd, &rbd_if)
63407fe6a43SSeth Howell
63572cb5297SShuhei Matsumoto static int bdev_rbd_reset_timer(void *arg);
63672cb5297SShuhei Matsumoto
63772cb5297SShuhei Matsumoto static void
bdev_rbd_check_outstanding_ios(struct spdk_bdev * bdev,uint64_t current_qd,void * cb_arg,int rc)63872cb5297SShuhei Matsumoto bdev_rbd_check_outstanding_ios(struct spdk_bdev *bdev, uint64_t current_qd,
63972cb5297SShuhei Matsumoto void *cb_arg, int rc)
64072cb5297SShuhei Matsumoto {
64172cb5297SShuhei Matsumoto struct bdev_rbd *disk = cb_arg;
64272cb5297SShuhei Matsumoto enum spdk_bdev_io_status bio_status;
64372cb5297SShuhei Matsumoto
64472cb5297SShuhei Matsumoto if (rc == 0 && current_qd > 0) {
64572cb5297SShuhei Matsumoto disk->reset_timer = SPDK_POLLER_REGISTER(bdev_rbd_reset_timer, disk, 1000);
64672cb5297SShuhei Matsumoto return;
64772cb5297SShuhei Matsumoto }
64872cb5297SShuhei Matsumoto
64972cb5297SShuhei Matsumoto if (rc != 0) {
65072cb5297SShuhei Matsumoto bio_status = SPDK_BDEV_IO_STATUS_FAILED;
65172cb5297SShuhei Matsumoto } else {
65272cb5297SShuhei Matsumoto bio_status = SPDK_BDEV_IO_STATUS_SUCCESS;
65372cb5297SShuhei Matsumoto }
65472cb5297SShuhei Matsumoto
65572cb5297SShuhei Matsumoto bdev_rbd_io_complete(disk->reset_bdev_io, bio_status);
65672cb5297SShuhei Matsumoto disk->reset_bdev_io = NULL;
65772cb5297SShuhei Matsumoto }
65872cb5297SShuhei Matsumoto
65907fe6a43SSeth Howell static int
bdev_rbd_reset_timer(void * arg)66007fe6a43SSeth Howell bdev_rbd_reset_timer(void *arg)
66107fe6a43SSeth Howell {
66207fe6a43SSeth Howell struct bdev_rbd *disk = arg;
66307fe6a43SSeth Howell
66407fe6a43SSeth Howell spdk_poller_unregister(&disk->reset_timer);
66572cb5297SShuhei Matsumoto
66672cb5297SShuhei Matsumoto spdk_bdev_get_current_qd(&disk->disk, bdev_rbd_check_outstanding_ios, disk);
66707fe6a43SSeth Howell
668eb05cbd6SMaciej Szwed return SPDK_POLLER_BUSY;
66907fe6a43SSeth Howell }
67007fe6a43SSeth Howell
6710731c227SZiye Yang static void
bdev_rbd_reset(void * ctx)672533470b2SKonrad Sztyber bdev_rbd_reset(void *ctx)
67307fe6a43SSeth Howell {
674533470b2SKonrad Sztyber struct spdk_bdev_io *bdev_io = ctx;
675533470b2SKonrad Sztyber struct bdev_rbd *disk = (struct bdev_rbd *)bdev_io->bdev->ctxt;
676533470b2SKonrad Sztyber
67707fe6a43SSeth Howell /*
67807fe6a43SSeth Howell * HACK: Since librbd doesn't provide any way to cancel outstanding aio, just kick off a
67972cb5297SShuhei Matsumoto * poller to wait for in-flight I/O to complete.
68007fe6a43SSeth Howell */
68107fe6a43SSeth Howell assert(disk->reset_bdev_io == NULL);
68207fe6a43SSeth Howell disk->reset_bdev_io = bdev_io;
68372cb5297SShuhei Matsumoto
68472cb5297SShuhei Matsumoto bdev_rbd_reset_timer(disk);
68507fe6a43SSeth Howell }
68607fe6a43SSeth Howell
687b940247bSZiye Yang static void
_bdev_rbd_destruct_done(void * io_device)688abb4baefSJim Harris _bdev_rbd_destruct_done(void *io_device)
689b940247bSZiye Yang {
690b940247bSZiye Yang struct bdev_rbd *rbd = io_device;
691b940247bSZiye Yang
692b940247bSZiye Yang assert(rbd != NULL);
693b940247bSZiye Yang
694abb4baefSJim Harris spdk_bdev_destruct_done(&rbd->disk, 0);
695a70aae46SJim Harris bdev_rbd_free(rbd);
696b940247bSZiye Yang }
697a70aae46SJim Harris
698a70aae46SJim Harris static void
bdev_rbd_free_cb(void * io_device)699abb4baefSJim Harris bdev_rbd_free_cb(void *io_device)
700abb4baefSJim Harris {
701abb4baefSJim Harris struct bdev_rbd *rbd = io_device;
702abb4baefSJim Harris
7031738550bSZiye Yang assert(spdk_get_thread() == spdk_thread_get_app_thread());
7041738550bSZiye Yang
7051738550bSZiye Yang /* free the ctx */
706215a6e4cSZiye Yang if (rbd->cluster_name && rbd->rados_ctx.ctx) {
707215a6e4cSZiye Yang bdev_rbd_put_pool_ctx(rbd->rados_ctx.ctx);
708215a6e4cSZiye Yang rbd->rados_ctx.ctx = NULL;
7091738550bSZiye Yang }
7101738550bSZiye Yang
711abb4baefSJim Harris /* The io device has been unregistered. Send a message back to the
712abb4baefSJim Harris * original thread that started the destruct operation, so that the
713abb4baefSJim Harris * bdev unregister callback is invoked on the same thread that started
714abb4baefSJim Harris * this whole process.
715abb4baefSJim Harris */
716abb4baefSJim Harris spdk_thread_send_msg(rbd->destruct_td, _bdev_rbd_destruct_done, rbd);
717abb4baefSJim Harris }
718abb4baefSJim Harris
719abb4baefSJim Harris static void
_bdev_rbd_destruct(void * ctx)720a70aae46SJim Harris _bdev_rbd_destruct(void *ctx)
721a70aae46SJim Harris {
722a70aae46SJim Harris struct bdev_rbd *rbd = ctx;
723a70aae46SJim Harris
724a70aae46SJim Harris spdk_io_device_unregister(rbd, bdev_rbd_free_cb);
725e1e73440SZiye Yang }
726b940247bSZiye Yang
72707fe6a43SSeth Howell static int
bdev_rbd_destruct(void * ctx)72807fe6a43SSeth Howell bdev_rbd_destruct(void *ctx)
72907fe6a43SSeth Howell {
73007fe6a43SSeth Howell struct bdev_rbd *rbd = ctx;
731a70aae46SJim Harris
732a70aae46SJim Harris /* Start the destruct operation on the rbd bdev's
733a70aae46SJim Harris * main thread. This guarantees it will only start
734a70aae46SJim Harris * executing after any messages related to channel
735a70aae46SJim Harris * deletions have finished completing. *Always*
736a70aae46SJim Harris * send a message, even if this function gets called
737a70aae46SJim Harris * from the main thread, in case there are pending
738a70aae46SJim Harris * channel delete messages in flight to this thread.
739a70aae46SJim Harris */
740abb4baefSJim Harris assert(rbd->destruct_td == NULL);
741d9603a2fSZiye Yang rbd->destruct_td = spdk_get_thread();
742d9603a2fSZiye Yang spdk_thread_send_msg(spdk_thread_get_app_thread(), _bdev_rbd_destruct, rbd);
74307fe6a43SSeth Howell
744abb4baefSJim Harris /* Return 1 to indicate the destruct path is asynchronous. */
745abb4baefSJim Harris return 1;
74607fe6a43SSeth Howell }
74707fe6a43SSeth Howell
74807fe6a43SSeth Howell static void
bdev_rbd_get_buf_cb(struct spdk_io_channel * ch,struct spdk_bdev_io * bdev_io,bool success)74907fe6a43SSeth Howell bdev_rbd_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
75007fe6a43SSeth Howell bool success)
75107fe6a43SSeth Howell {
75207fe6a43SSeth Howell if (!success) {
753e1e73440SZiye Yang bdev_rbd_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
75407fe6a43SSeth Howell return;
75507fe6a43SSeth Howell }
75607fe6a43SSeth Howell
757c441e270SZiye Yang bdev_rbd_start_aio(bdev_io);
75807fe6a43SSeth Howell }
75907fe6a43SSeth Howell
760e1e73440SZiye Yang static void
bdev_rbd_submit_request(struct spdk_io_channel * ch,struct spdk_bdev_io * bdev_io)761e1e73440SZiye Yang bdev_rbd_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
762e1e73440SZiye Yang {
763e1e73440SZiye Yang struct spdk_thread *submit_td = spdk_io_channel_get_thread(ch);
764e1e73440SZiye Yang struct bdev_rbd_io *rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
765e1e73440SZiye Yang
766e1e73440SZiye Yang rbd_io->submit_td = submit_td;
767533470b2SKonrad Sztyber switch (bdev_io->type) {
768533470b2SKonrad Sztyber case SPDK_BDEV_IO_TYPE_READ:
769533470b2SKonrad Sztyber spdk_bdev_io_get_buf(bdev_io, bdev_rbd_get_buf_cb,
770533470b2SKonrad Sztyber bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
771533470b2SKonrad Sztyber break;
772533470b2SKonrad Sztyber
773533470b2SKonrad Sztyber case SPDK_BDEV_IO_TYPE_WRITE:
774533470b2SKonrad Sztyber case SPDK_BDEV_IO_TYPE_UNMAP:
775533470b2SKonrad Sztyber case SPDK_BDEV_IO_TYPE_FLUSH:
776533470b2SKonrad Sztyber case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
7779fe36c0dSJonas Pfefferle #ifdef LIBRBD_SUPPORTS_COMPARE_AND_WRITE_IOVEC
7789fe36c0dSJonas Pfefferle case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
7799fe36c0dSJonas Pfefferle #endif
780c441e270SZiye Yang bdev_rbd_start_aio(bdev_io);
781533470b2SKonrad Sztyber break;
782533470b2SKonrad Sztyber
783533470b2SKonrad Sztyber case SPDK_BDEV_IO_TYPE_RESET:
784d9603a2fSZiye Yang spdk_thread_exec_msg(spdk_thread_get_app_thread(), bdev_rbd_reset, bdev_io);
785533470b2SKonrad Sztyber break;
786533470b2SKonrad Sztyber
787533470b2SKonrad Sztyber default:
788533470b2SKonrad Sztyber SPDK_ERRLOG("Unsupported IO type =%d\n", bdev_io->type);
789533470b2SKonrad Sztyber bdev_rbd_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
790533470b2SKonrad Sztyber break;
791e1e73440SZiye Yang }
792e1e73440SZiye Yang }
793e1e73440SZiye Yang
79407fe6a43SSeth Howell static bool
bdev_rbd_io_type_supported(void * ctx,enum spdk_bdev_io_type io_type)79507fe6a43SSeth Howell bdev_rbd_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
79607fe6a43SSeth Howell {
79707fe6a43SSeth Howell switch (io_type) {
79807fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_READ:
79907fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_WRITE:
8006d9822c1SJonas Pfefferle case SPDK_BDEV_IO_TYPE_UNMAP:
80107fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_FLUSH:
80207fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_RESET:
8038ccdd956SJonas Pfefferle case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
8049fe36c0dSJonas Pfefferle #ifdef LIBRBD_SUPPORTS_COMPARE_AND_WRITE_IOVEC
8059fe36c0dSJonas Pfefferle case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
8069fe36c0dSJonas Pfefferle #endif
80707fe6a43SSeth Howell return true;
80807fe6a43SSeth Howell
80907fe6a43SSeth Howell default:
81007fe6a43SSeth Howell return false;
81107fe6a43SSeth Howell }
81207fe6a43SSeth Howell }
81307fe6a43SSeth Howell
814e1e73440SZiye Yang static int
bdev_rbd_create_cb(void * io_device,void * ctx_buf)815e1e73440SZiye Yang bdev_rbd_create_cb(void *io_device, void *ctx_buf)
816e1e73440SZiye Yang {
817e1e73440SZiye Yang struct bdev_rbd_io_channel *ch = ctx_buf;
818e1e73440SZiye Yang struct bdev_rbd *disk = io_device;
819e1e73440SZiye Yang
820e1e73440SZiye Yang ch->disk = disk;
821c441e270SZiye Yang ch->group_ch = spdk_get_io_channel(&rbd_if);
822c441e270SZiye Yang assert(ch->group_ch != NULL);
823e1e73440SZiye Yang
824e1e73440SZiye Yang return 0;
825e1e73440SZiye Yang }
826e1e73440SZiye Yang
827e1e73440SZiye Yang static void
bdev_rbd_destroy_cb(void * io_device,void * ctx_buf)82807fe6a43SSeth Howell bdev_rbd_destroy_cb(void *io_device, void *ctx_buf)
82907fe6a43SSeth Howell {
830c441e270SZiye Yang struct bdev_rbd_io_channel *ch = ctx_buf;
8316cbbc682SZiye Yang
832c441e270SZiye Yang spdk_put_io_channel(ch->group_ch);
83307fe6a43SSeth Howell }
83407fe6a43SSeth Howell
83507fe6a43SSeth Howell static struct spdk_io_channel *
bdev_rbd_get_io_channel(void * ctx)83607fe6a43SSeth Howell bdev_rbd_get_io_channel(void *ctx)
83707fe6a43SSeth Howell {
83807fe6a43SSeth Howell struct bdev_rbd *rbd_bdev = ctx;
83907fe6a43SSeth Howell
84007fe6a43SSeth Howell return spdk_get_io_channel(rbd_bdev);
84107fe6a43SSeth Howell }
84207fe6a43SSeth Howell
8435c016026SZiye Yang static void
bdev_rbd_cluster_dump_entry(const char * cluster_name,struct spdk_json_write_ctx * w)8445c016026SZiye Yang bdev_rbd_cluster_dump_entry(const char *cluster_name, struct spdk_json_write_ctx *w)
8455c016026SZiye Yang {
8465c016026SZiye Yang struct bdev_rbd_cluster *entry;
8475c016026SZiye Yang
8485c016026SZiye Yang pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
8495c016026SZiye Yang STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
850d33f4486SZiye Yang if (strcmp(cluster_name, entry->name)) {
8515c016026SZiye Yang continue;
8525c016026SZiye Yang }
8535c016026SZiye Yang if (entry->user_id) {
8545c016026SZiye Yang spdk_json_write_named_string(w, "user_id", entry->user_id);
8555c016026SZiye Yang }
8565c016026SZiye Yang
8575c016026SZiye Yang if (entry->config_param) {
8585c016026SZiye Yang char **config_entry = entry->config_param;
8595c016026SZiye Yang
8605c016026SZiye Yang spdk_json_write_named_object_begin(w, "config_param");
8615c016026SZiye Yang while (*config_entry) {
8625c016026SZiye Yang spdk_json_write_named_string(w, config_entry[0], config_entry[1]);
8635c016026SZiye Yang config_entry += 2;
8645c016026SZiye Yang }
8655c016026SZiye Yang spdk_json_write_object_end(w);
866a79af5e7STan Long }
867a79af5e7STan Long if (entry->config_file) {
8685c016026SZiye Yang spdk_json_write_named_string(w, "config_file", entry->config_file);
8695c016026SZiye Yang }
87020c8a3b8STan Long if (entry->key_file) {
87120c8a3b8STan Long spdk_json_write_named_string(w, "key_file", entry->key_file);
87220c8a3b8STan Long }
8735c016026SZiye Yang
8745c016026SZiye Yang pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
8755c016026SZiye Yang return;
8765c016026SZiye Yang }
8775c016026SZiye Yang
8785c016026SZiye Yang pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
8795c016026SZiye Yang }
8805c016026SZiye Yang
88107fe6a43SSeth Howell static int
bdev_rbd_dump_info_json(void * ctx,struct spdk_json_write_ctx * w)88207fe6a43SSeth Howell bdev_rbd_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
88307fe6a43SSeth Howell {
88407fe6a43SSeth Howell struct bdev_rbd *rbd_bdev = ctx;
88507fe6a43SSeth Howell
88607fe6a43SSeth Howell spdk_json_write_named_object_begin(w, "rbd");
88707fe6a43SSeth Howell
88807fe6a43SSeth Howell spdk_json_write_named_string(w, "pool_name", rbd_bdev->pool_name);
88907fe6a43SSeth Howell
89007fe6a43SSeth Howell spdk_json_write_named_string(w, "rbd_name", rbd_bdev->rbd_name);
89107fe6a43SSeth Howell
8925c016026SZiye Yang if (rbd_bdev->cluster_name) {
8935c016026SZiye Yang bdev_rbd_cluster_dump_entry(rbd_bdev->cluster_name, w);
8945c016026SZiye Yang goto end;
8955c016026SZiye Yang }
8965c016026SZiye Yang
89707fe6a43SSeth Howell if (rbd_bdev->user_id) {
89807fe6a43SSeth Howell spdk_json_write_named_string(w, "user_id", rbd_bdev->user_id);
89907fe6a43SSeth Howell }
90007fe6a43SSeth Howell
90107fe6a43SSeth Howell if (rbd_bdev->config) {
90207fe6a43SSeth Howell char **entry = rbd_bdev->config;
90307fe6a43SSeth Howell
90407fe6a43SSeth Howell spdk_json_write_named_object_begin(w, "config");
90507fe6a43SSeth Howell while (*entry) {
90607fe6a43SSeth Howell spdk_json_write_named_string(w, entry[0], entry[1]);
90707fe6a43SSeth Howell entry += 2;
90807fe6a43SSeth Howell }
90907fe6a43SSeth Howell spdk_json_write_object_end(w);
91007fe6a43SSeth Howell }
91107fe6a43SSeth Howell
9125c016026SZiye Yang end:
91307fe6a43SSeth Howell spdk_json_write_object_end(w);
91407fe6a43SSeth Howell
91507fe6a43SSeth Howell return 0;
91607fe6a43SSeth Howell }
91707fe6a43SSeth Howell
91807fe6a43SSeth Howell static void
bdev_rbd_write_config_json(struct spdk_bdev * bdev,struct spdk_json_write_ctx * w)91907fe6a43SSeth Howell bdev_rbd_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
92007fe6a43SSeth Howell {
92107fe6a43SSeth Howell struct bdev_rbd *rbd = bdev->ctxt;
92207fe6a43SSeth Howell
92307fe6a43SSeth Howell spdk_json_write_object_begin(w);
92407fe6a43SSeth Howell
9258ac7cad4SMaciej Wawryk spdk_json_write_named_string(w, "method", "bdev_rbd_create");
92607fe6a43SSeth Howell
92707fe6a43SSeth Howell spdk_json_write_named_object_begin(w, "params");
92807fe6a43SSeth Howell spdk_json_write_named_string(w, "name", bdev->name);
92907fe6a43SSeth Howell spdk_json_write_named_string(w, "pool_name", rbd->pool_name);
93007fe6a43SSeth Howell spdk_json_write_named_string(w, "rbd_name", rbd->rbd_name);
93107fe6a43SSeth Howell spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
93207fe6a43SSeth Howell if (rbd->user_id) {
93307fe6a43SSeth Howell spdk_json_write_named_string(w, "user_id", rbd->user_id);
93407fe6a43SSeth Howell }
93507fe6a43SSeth Howell
93607fe6a43SSeth Howell if (rbd->config) {
93707fe6a43SSeth Howell char **entry = rbd->config;
93807fe6a43SSeth Howell
93907fe6a43SSeth Howell spdk_json_write_named_object_begin(w, "config");
94007fe6a43SSeth Howell while (*entry) {
94107fe6a43SSeth Howell spdk_json_write_named_string(w, entry[0], entry[1]);
94207fe6a43SSeth Howell entry += 2;
94307fe6a43SSeth Howell }
94407fe6a43SSeth Howell spdk_json_write_object_end(w);
94507fe6a43SSeth Howell }
94607fe6a43SSeth Howell
94769f9c9acSJim Harris spdk_json_write_named_uuid(w, "uuid", &bdev->uuid);
9488a154b2cStanlong
94907fe6a43SSeth Howell spdk_json_write_object_end(w);
95007fe6a43SSeth Howell
95107fe6a43SSeth Howell spdk_json_write_object_end(w);
95207fe6a43SSeth Howell }
95307fe6a43SSeth Howell
954c3cf9ec2SZiye Yang static void
dump_single_cluster_entry(struct bdev_rbd_cluster * entry,struct spdk_json_write_ctx * w)955c3cf9ec2SZiye Yang dump_single_cluster_entry(struct bdev_rbd_cluster *entry, struct spdk_json_write_ctx *w)
956c3cf9ec2SZiye Yang {
957c3cf9ec2SZiye Yang assert(entry != NULL);
958c3cf9ec2SZiye Yang
959c3cf9ec2SZiye Yang spdk_json_write_object_begin(w);
960c3cf9ec2SZiye Yang spdk_json_write_named_string(w, "cluster_name", entry->name);
961c3cf9ec2SZiye Yang
962c3cf9ec2SZiye Yang if (entry->user_id) {
963c3cf9ec2SZiye Yang spdk_json_write_named_string(w, "user_id", entry->user_id);
964c3cf9ec2SZiye Yang }
965c3cf9ec2SZiye Yang
966c3cf9ec2SZiye Yang if (entry->config_param) {
967c3cf9ec2SZiye Yang char **config_entry = entry->config_param;
968c3cf9ec2SZiye Yang
969c3cf9ec2SZiye Yang spdk_json_write_named_object_begin(w, "config_param");
970c3cf9ec2SZiye Yang while (*config_entry) {
971c3cf9ec2SZiye Yang spdk_json_write_named_string(w, config_entry[0], config_entry[1]);
972c3cf9ec2SZiye Yang config_entry += 2;
973c3cf9ec2SZiye Yang }
974c3cf9ec2SZiye Yang spdk_json_write_object_end(w);
975a79af5e7STan Long }
976a79af5e7STan Long if (entry->config_file) {
977c3cf9ec2SZiye Yang spdk_json_write_named_string(w, "config_file", entry->config_file);
978c3cf9ec2SZiye Yang }
97920c8a3b8STan Long if (entry->key_file) {
98020c8a3b8STan Long spdk_json_write_named_string(w, "key_file", entry->key_file);
98120c8a3b8STan Long }
982c3cf9ec2SZiye Yang
983b618f056SYue-Zhu if (entry->core_mask) {
984b618f056SYue-Zhu spdk_json_write_named_string(w, "core_mask", entry->core_mask);
985b618f056SYue-Zhu }
986b618f056SYue-Zhu
987c3cf9ec2SZiye Yang spdk_json_write_object_end(w);
988c3cf9ec2SZiye Yang }
989c3cf9ec2SZiye Yang
990c3cf9ec2SZiye Yang int
bdev_rbd_get_clusters_info(struct spdk_jsonrpc_request * request,const char * name)991c3cf9ec2SZiye Yang bdev_rbd_get_clusters_info(struct spdk_jsonrpc_request *request, const char *name)
992c3cf9ec2SZiye Yang {
993c3cf9ec2SZiye Yang struct bdev_rbd_cluster *entry;
994c3cf9ec2SZiye Yang struct spdk_json_write_ctx *w;
995c3cf9ec2SZiye Yang
996c3cf9ec2SZiye Yang pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
997c3cf9ec2SZiye Yang
998c3cf9ec2SZiye Yang if (STAILQ_EMPTY(&g_map_bdev_rbd_cluster)) {
999c3cf9ec2SZiye Yang pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
1000c3cf9ec2SZiye Yang return -ENOENT;
1001c3cf9ec2SZiye Yang }
1002c3cf9ec2SZiye Yang
1003c3cf9ec2SZiye Yang /* If cluster name is provided */
1004c3cf9ec2SZiye Yang if (name) {
1005c3cf9ec2SZiye Yang STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
1006c3cf9ec2SZiye Yang if (strcmp(name, entry->name) == 0) {
1007c3cf9ec2SZiye Yang w = spdk_jsonrpc_begin_result(request);
1008c3cf9ec2SZiye Yang dump_single_cluster_entry(entry, w);
1009c3cf9ec2SZiye Yang spdk_jsonrpc_end_result(request, w);
1010c3cf9ec2SZiye Yang
1011c3cf9ec2SZiye Yang pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
1012c3cf9ec2SZiye Yang return 0;
1013c3cf9ec2SZiye Yang }
1014c3cf9ec2SZiye Yang }
1015c3cf9ec2SZiye Yang
1016c3cf9ec2SZiye Yang pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
1017c3cf9ec2SZiye Yang return -ENOENT;
1018c3cf9ec2SZiye Yang }
1019c3cf9ec2SZiye Yang
1020c3cf9ec2SZiye Yang w = spdk_jsonrpc_begin_result(request);
1021c3cf9ec2SZiye Yang spdk_json_write_array_begin(w);
1022c3cf9ec2SZiye Yang STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
1023c3cf9ec2SZiye Yang dump_single_cluster_entry(entry, w);
1024c3cf9ec2SZiye Yang }
1025c3cf9ec2SZiye Yang spdk_json_write_array_end(w);
1026c3cf9ec2SZiye Yang spdk_jsonrpc_end_result(request, w);
1027c3cf9ec2SZiye Yang pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
1028c3cf9ec2SZiye Yang
1029c3cf9ec2SZiye Yang return 0;
1030c3cf9ec2SZiye Yang }
1031c3cf9ec2SZiye Yang
103207fe6a43SSeth Howell static const struct spdk_bdev_fn_table rbd_fn_table = {
103307fe6a43SSeth Howell .destruct = bdev_rbd_destruct,
103407fe6a43SSeth Howell .submit_request = bdev_rbd_submit_request,
103507fe6a43SSeth Howell .io_type_supported = bdev_rbd_io_type_supported,
103607fe6a43SSeth Howell .get_io_channel = bdev_rbd_get_io_channel,
103707fe6a43SSeth Howell .dump_info_json = bdev_rbd_dump_info_json,
103807fe6a43SSeth Howell .write_config_json = bdev_rbd_write_config_json,
103907fe6a43SSeth Howell };
104007fe6a43SSeth Howell
1041e18eaee2SZiye Yang static int
rbd_thread_set_cpumask(struct spdk_cpuset * set)1042b618f056SYue-Zhu rbd_thread_set_cpumask(struct spdk_cpuset *set)
1043b618f056SYue-Zhu {
1044b618f056SYue-Zhu #ifdef __linux__
1045b618f056SYue-Zhu uint32_t lcore;
1046b618f056SYue-Zhu cpu_set_t mask;
1047b618f056SYue-Zhu
1048b618f056SYue-Zhu assert(set != NULL);
1049b618f056SYue-Zhu CPU_ZERO(&mask);
1050b618f056SYue-Zhu
1051b618f056SYue-Zhu /* get the core id on current spdk_cpuset and set to cpu_set_t */
1052b618f056SYue-Zhu for (lcore = 0; lcore < SPDK_CPUSET_SIZE; lcore++) {
1053b618f056SYue-Zhu if (spdk_cpuset_get_cpu(set, lcore)) {
1054b618f056SYue-Zhu CPU_SET(lcore, &mask);
1055b618f056SYue-Zhu }
1056b618f056SYue-Zhu }
1057b618f056SYue-Zhu
1058b618f056SYue-Zhu /* change current thread core mask */
1059b618f056SYue-Zhu if (sched_setaffinity(0, sizeof(mask), &mask) < 0) {
1060b618f056SYue-Zhu SPDK_ERRLOG("Set non SPDK thread cpu mask error (errno=%d)\n", errno);
1061b618f056SYue-Zhu return -1;
1062b618f056SYue-Zhu }
1063b618f056SYue-Zhu
1064b618f056SYue-Zhu return 0;
1065b618f056SYue-Zhu #else
1066b618f056SYue-Zhu SPDK_ERRLOG("SPDK non spdk thread cpumask setup supports only Linux platform now.\n");
1067b618f056SYue-Zhu return -ENOTSUP;
1068b618f056SYue-Zhu #endif
1069b618f056SYue-Zhu }
1070b618f056SYue-Zhu
1071b618f056SYue-Zhu
1072b618f056SYue-Zhu static int
rbd_register_cluster(const char * name,const char * user_id,const char * const * config_param,const char * config_file,const char * key_file,const char * core_mask)1073e18eaee2SZiye Yang rbd_register_cluster(const char *name, const char *user_id, const char *const *config_param,
1074b618f056SYue-Zhu const char *config_file, const char *key_file, const char *core_mask)
1075e18eaee2SZiye Yang {
1076e18eaee2SZiye Yang struct bdev_rbd_cluster *entry;
1077b618f056SYue-Zhu struct spdk_cpuset rbd_core_mask = {};
1078e18eaee2SZiye Yang int rc;
1079e18eaee2SZiye Yang
1080e18eaee2SZiye Yang pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
1081e18eaee2SZiye Yang STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
1082d33f4486SZiye Yang if (strcmp(name, entry->name) == 0) {
1083e18eaee2SZiye Yang SPDK_ERRLOG("Cluster name=%s already exists\n", name);
1084e18eaee2SZiye Yang pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
1085e18eaee2SZiye Yang return -1;
1086e18eaee2SZiye Yang }
1087e18eaee2SZiye Yang }
1088e18eaee2SZiye Yang
1089e18eaee2SZiye Yang entry = calloc(1, sizeof(*entry));
1090e18eaee2SZiye Yang if (!entry) {
1091e18eaee2SZiye Yang SPDK_ERRLOG("Cannot allocate an entry for name=%s\n", name);
1092e18eaee2SZiye Yang pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
1093e18eaee2SZiye Yang return -1;
1094e18eaee2SZiye Yang }
1095e18eaee2SZiye Yang
1096e18eaee2SZiye Yang entry->name = strdup(name);
1097e18eaee2SZiye Yang if (entry->name == NULL) {
1098e18eaee2SZiye Yang SPDK_ERRLOG("Failed to save the name =%s on entry =%p\n", name, entry);
1099e18eaee2SZiye Yang goto err_handle;
1100e18eaee2SZiye Yang }
1101e18eaee2SZiye Yang
1102e18eaee2SZiye Yang if (user_id) {
1103e18eaee2SZiye Yang entry->user_id = strdup(user_id);
1104e18eaee2SZiye Yang if (entry->user_id == NULL) {
1105e18eaee2SZiye Yang SPDK_ERRLOG("Failed to save the str =%s on entry =%p\n", user_id, entry);
1106e18eaee2SZiye Yang goto err_handle;
1107e18eaee2SZiye Yang }
1108e18eaee2SZiye Yang }
1109e18eaee2SZiye Yang
1110a79af5e7STan Long /* Support specify config_param or config_file separately, or both of them. */
1111e18eaee2SZiye Yang if (config_param) {
1112e18eaee2SZiye Yang entry->config_param = bdev_rbd_dup_config(config_param);
1113e18eaee2SZiye Yang if (entry->config_param == NULL) {
1114e18eaee2SZiye Yang SPDK_ERRLOG("Failed to save the config_param=%p on entry = %p\n", config_param, entry);
1115e18eaee2SZiye Yang goto err_handle;
1116e18eaee2SZiye Yang }
1117a79af5e7STan Long }
1118a79af5e7STan Long
1119a79af5e7STan Long if (config_file) {
1120e18eaee2SZiye Yang entry->config_file = strdup(config_file);
1121e18eaee2SZiye Yang if (entry->config_file == NULL) {
1122e18eaee2SZiye Yang SPDK_ERRLOG("Failed to save the config_file=%s on entry = %p\n", config_file, entry);
1123e18eaee2SZiye Yang goto err_handle;
1124e18eaee2SZiye Yang }
1125e18eaee2SZiye Yang }
1126e18eaee2SZiye Yang
112720c8a3b8STan Long if (key_file) {
112820c8a3b8STan Long entry->key_file = strdup(key_file);
112920c8a3b8STan Long if (entry->key_file == NULL) {
113020c8a3b8STan Long SPDK_ERRLOG("Failed to save the key_file=%s on entry = %p\n", key_file, entry);
113120c8a3b8STan Long goto err_handle;
113220c8a3b8STan Long }
113320c8a3b8STan Long }
113420c8a3b8STan Long
1135b618f056SYue-Zhu if (core_mask) {
1136b618f056SYue-Zhu entry->core_mask = strdup(core_mask);
1137b618f056SYue-Zhu if (entry->core_mask == NULL) {
1138b618f056SYue-Zhu SPDK_ERRLOG("Core_mask=%s allocation failed on entry = %p\n", core_mask, entry);
1139b618f056SYue-Zhu goto err_handle;
1140b618f056SYue-Zhu }
1141b618f056SYue-Zhu
1142b618f056SYue-Zhu if (spdk_cpuset_parse(&rbd_core_mask, entry->core_mask) < 0) {
1143b618f056SYue-Zhu SPDK_ERRLOG("Invalid cpumask=%s on entry = %p\n", entry->core_mask, entry);
1144b618f056SYue-Zhu goto err_handle;
1145b618f056SYue-Zhu }
1146b618f056SYue-Zhu
1147b618f056SYue-Zhu if (rbd_thread_set_cpumask(&rbd_core_mask) < 0) {
1148b618f056SYue-Zhu SPDK_ERRLOG("Failed to change rbd threads to core_mask %s on entry = %p\n", core_mask, entry);
1149b618f056SYue-Zhu goto err_handle;
1150b618f056SYue-Zhu }
1151b618f056SYue-Zhu }
1152b618f056SYue-Zhu
1153b618f056SYue-Zhu
1154b618f056SYue-Zhu /* If rbd thread core mask is given, rados_create() must execute with
1155b618f056SYue-Zhu * the affinity set by rbd_thread_set_cpumask(). The affinity set
1156b618f056SYue-Zhu * by rbd_thread_set_cpumask() will be reverted once rbd_register_cluster() returns
1157b618f056SYue-Zhu * and when we leave the spdk_call_unaffinitized context. */
1158e18eaee2SZiye Yang rc = rados_create(&entry->cluster, user_id);
1159e18eaee2SZiye Yang if (rc < 0) {
1160e18eaee2SZiye Yang SPDK_ERRLOG("Failed to create rados_t struct\n");
1161e18eaee2SZiye Yang goto err_handle;
1162e18eaee2SZiye Yang }
1163e18eaee2SZiye Yang
1164a79af5e7STan Long /* Try default location when entry->config_file is NULL, but ignore failure when it is NULL */
1165a79af5e7STan Long rc = rados_conf_read_file(entry->cluster, entry->config_file);
1166a79af5e7STan Long if (entry->config_file && rc < 0) {
1167a79af5e7STan Long SPDK_ERRLOG("Failed to read conf file %s\n", entry->config_file);
1168a79af5e7STan Long rados_shutdown(entry->cluster);
1169a79af5e7STan Long goto err_handle;
1170a79af5e7STan Long }
1171a79af5e7STan Long
1172e18eaee2SZiye Yang if (config_param) {
1173e18eaee2SZiye Yang const char *const *config_entry = config_param;
1174e18eaee2SZiye Yang while (*config_entry) {
1175e18eaee2SZiye Yang rc = rados_conf_set(entry->cluster, config_entry[0], config_entry[1]);
1176e18eaee2SZiye Yang if (rc < 0) {
1177e18eaee2SZiye Yang SPDK_ERRLOG("Failed to set %s = %s\n", config_entry[0], config_entry[1]);
1178e18eaee2SZiye Yang rados_shutdown(entry->cluster);
1179e18eaee2SZiye Yang goto err_handle;
1180e18eaee2SZiye Yang }
1181e18eaee2SZiye Yang config_entry += 2;
1182e18eaee2SZiye Yang }
1183e18eaee2SZiye Yang }
1184e18eaee2SZiye Yang
118520c8a3b8STan Long if (key_file) {
118620c8a3b8STan Long rc = rados_conf_set(entry->cluster, "keyring", key_file);
118720c8a3b8STan Long if (rc < 0) {
118820c8a3b8STan Long SPDK_ERRLOG("Failed to set keyring = %s\n", key_file);
118920c8a3b8STan Long rados_shutdown(entry->cluster);
119020c8a3b8STan Long goto err_handle;
119120c8a3b8STan Long }
119220c8a3b8STan Long }
119320c8a3b8STan Long
1194e18eaee2SZiye Yang rc = rados_connect(entry->cluster);
1195e18eaee2SZiye Yang if (rc < 0) {
1196e18eaee2SZiye Yang SPDK_ERRLOG("Failed to connect to rbd_pool on cluster=%p\n", entry->cluster);
1197e18eaee2SZiye Yang rados_shutdown(entry->cluster);
1198e18eaee2SZiye Yang goto err_handle;
1199e18eaee2SZiye Yang }
1200e18eaee2SZiye Yang
1201e18eaee2SZiye Yang STAILQ_INSERT_TAIL(&g_map_bdev_rbd_cluster, entry, link);
1202e18eaee2SZiye Yang pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
1203e18eaee2SZiye Yang
1204e18eaee2SZiye Yang return 0;
1205e18eaee2SZiye Yang
1206e18eaee2SZiye Yang err_handle:
1207e18eaee2SZiye Yang bdev_rbd_cluster_free(entry);
1208e18eaee2SZiye Yang pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
1209e18eaee2SZiye Yang return -1;
1210e18eaee2SZiye Yang }
1211e18eaee2SZiye Yang
1212e18eaee2SZiye Yang int
bdev_rbd_unregister_cluster(const char * name)1213e18eaee2SZiye Yang bdev_rbd_unregister_cluster(const char *name)
1214e18eaee2SZiye Yang {
1215e18eaee2SZiye Yang struct bdev_rbd_cluster *entry;
1216e18eaee2SZiye Yang int rc = 0;
1217e18eaee2SZiye Yang
1218e18eaee2SZiye Yang if (name == NULL) {
1219e18eaee2SZiye Yang return -1;
1220e18eaee2SZiye Yang }
1221e18eaee2SZiye Yang
1222e18eaee2SZiye Yang pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
1223e18eaee2SZiye Yang STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
1224d33f4486SZiye Yang if (strcmp(name, entry->name) == 0) {
1225e18eaee2SZiye Yang if (entry->ref == 0) {
1226e18eaee2SZiye Yang STAILQ_REMOVE(&g_map_bdev_rbd_cluster, entry, bdev_rbd_cluster, link);
1227e18eaee2SZiye Yang rados_shutdown(entry->cluster);
1228e18eaee2SZiye Yang bdev_rbd_cluster_free(entry);
1229e18eaee2SZiye Yang } else {
1230e18eaee2SZiye Yang SPDK_ERRLOG("Cluster with name=%p is still used and we cannot delete it\n",
1231e18eaee2SZiye Yang entry->name);
1232e18eaee2SZiye Yang rc = -1;
1233e18eaee2SZiye Yang }
1234e18eaee2SZiye Yang
1235e18eaee2SZiye Yang pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
1236e18eaee2SZiye Yang return rc;
1237e18eaee2SZiye Yang }
1238e18eaee2SZiye Yang }
1239e18eaee2SZiye Yang
1240e18eaee2SZiye Yang pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
1241e18eaee2SZiye Yang
1242e18eaee2SZiye Yang SPDK_ERRLOG("Could not find the cluster name =%p\n", name);
1243e18eaee2SZiye Yang
1244e18eaee2SZiye Yang return -1;
1245e18eaee2SZiye Yang }
1246e18eaee2SZiye Yang
1247e18eaee2SZiye Yang static void *
_bdev_rbd_register_cluster(void * arg)1248e18eaee2SZiye Yang _bdev_rbd_register_cluster(void *arg)
1249e18eaee2SZiye Yang {
1250e18eaee2SZiye Yang struct cluster_register_info *info = arg;
1251e18eaee2SZiye Yang void *ret = arg;
1252e18eaee2SZiye Yang int rc;
1253e18eaee2SZiye Yang
1254e18eaee2SZiye Yang rc = rbd_register_cluster((const char *)info->name, (const char *)info->user_id,
125520c8a3b8STan Long (const char *const *)info->config_param, (const char *)info->config_file,
1256b618f056SYue-Zhu (const char *)info->key_file, info->core_mask);
1257e18eaee2SZiye Yang if (rc) {
1258e18eaee2SZiye Yang ret = NULL;
1259e18eaee2SZiye Yang }
1260e18eaee2SZiye Yang
1261e18eaee2SZiye Yang return ret;
1262e18eaee2SZiye Yang }
1263e18eaee2SZiye Yang
1264e18eaee2SZiye Yang int
bdev_rbd_register_cluster(struct cluster_register_info * info)1265e18eaee2SZiye Yang bdev_rbd_register_cluster(struct cluster_register_info *info)
1266e18eaee2SZiye Yang {
1267e18eaee2SZiye Yang assert(info != NULL);
1268e18eaee2SZiye Yang
1269e18eaee2SZiye Yang /* Rados cluster info need to be created in non SPDK-thread to avoid CPU
1270e18eaee2SZiye Yang * resource contention */
1271e18eaee2SZiye Yang if (spdk_call_unaffinitized(_bdev_rbd_register_cluster, info) == NULL) {
1272e18eaee2SZiye Yang return -1;
1273e18eaee2SZiye Yang }
1274e18eaee2SZiye Yang
1275e18eaee2SZiye Yang return 0;
1276e18eaee2SZiye Yang }
1277e18eaee2SZiye Yang
127807fe6a43SSeth Howell int
bdev_rbd_create(struct spdk_bdev ** bdev,const char * name,const char * user_id,const char * pool_name,const char * const * config,const char * rbd_name,uint32_t block_size,const char * cluster_name,const struct spdk_uuid * uuid)1279289e0f59SSeth Howell bdev_rbd_create(struct spdk_bdev **bdev, const char *name, const char *user_id,
128007fe6a43SSeth Howell const char *pool_name,
128107fe6a43SSeth Howell const char *const *config,
128207fe6a43SSeth Howell const char *rbd_name,
12835c016026SZiye Yang uint32_t block_size,
12848a154b2cStanlong const char *cluster_name,
12858a154b2cStanlong const struct spdk_uuid *uuid)
128607fe6a43SSeth Howell {
128707fe6a43SSeth Howell struct bdev_rbd *rbd;
128807fe6a43SSeth Howell int ret;
128907fe6a43SSeth Howell
1290*b0a5cb96SSebastian Brzezinka if ((pool_name == NULL) || (rbd_name == NULL) || (block_size == 0)) {
129107fe6a43SSeth Howell return -EINVAL;
129207fe6a43SSeth Howell }
129307fe6a43SSeth Howell
129407fe6a43SSeth Howell rbd = calloc(1, sizeof(struct bdev_rbd));
129507fe6a43SSeth Howell if (rbd == NULL) {
129607fe6a43SSeth Howell SPDK_ERRLOG("Failed to allocate bdev_rbd struct\n");
129707fe6a43SSeth Howell return -ENOMEM;
129807fe6a43SSeth Howell }
129907fe6a43SSeth Howell
130007fe6a43SSeth Howell rbd->rbd_name = strdup(rbd_name);
130107fe6a43SSeth Howell if (!rbd->rbd_name) {
130207fe6a43SSeth Howell bdev_rbd_free(rbd);
130307fe6a43SSeth Howell return -ENOMEM;
130407fe6a43SSeth Howell }
130507fe6a43SSeth Howell
130607fe6a43SSeth Howell if (user_id) {
130707fe6a43SSeth Howell rbd->user_id = strdup(user_id);
130807fe6a43SSeth Howell if (!rbd->user_id) {
130907fe6a43SSeth Howell bdev_rbd_free(rbd);
131007fe6a43SSeth Howell return -ENOMEM;
131107fe6a43SSeth Howell }
131207fe6a43SSeth Howell }
131307fe6a43SSeth Howell
13145c016026SZiye Yang if (cluster_name) {
13155c016026SZiye Yang rbd->cluster_name = strdup(cluster_name);
13165c016026SZiye Yang if (!rbd->cluster_name) {
13175c016026SZiye Yang bdev_rbd_free(rbd);
13185c016026SZiye Yang return -ENOMEM;
13195c016026SZiye Yang }
13205c016026SZiye Yang }
132107fe6a43SSeth Howell rbd->pool_name = strdup(pool_name);
132207fe6a43SSeth Howell if (!rbd->pool_name) {
132307fe6a43SSeth Howell bdev_rbd_free(rbd);
132407fe6a43SSeth Howell return -ENOMEM;
132507fe6a43SSeth Howell }
132607fe6a43SSeth Howell
1327289e0f59SSeth Howell if (config && !(rbd->config = bdev_rbd_dup_config(config))) {
132807fe6a43SSeth Howell bdev_rbd_free(rbd);
132907fe6a43SSeth Howell return -ENOMEM;
133007fe6a43SSeth Howell }
133107fe6a43SSeth Howell
1332467148fcSZiye Yang ret = bdev_rbd_init(rbd);
133307fe6a43SSeth Howell if (ret < 0) {
133407fe6a43SSeth Howell bdev_rbd_free(rbd);
133507fe6a43SSeth Howell SPDK_ERRLOG("Failed to init rbd device\n");
133607fe6a43SSeth Howell return ret;
133707fe6a43SSeth Howell }
133807fe6a43SSeth Howell
13398a154b2cStanlong rbd->disk.uuid = *uuid;
134007fe6a43SSeth Howell if (name) {
134107fe6a43SSeth Howell rbd->disk.name = strdup(name);
134207fe6a43SSeth Howell } else {
134307fe6a43SSeth Howell rbd->disk.name = spdk_sprintf_alloc("Ceph%d", bdev_rbd_count);
134407fe6a43SSeth Howell }
134507fe6a43SSeth Howell if (!rbd->disk.name) {
134607fe6a43SSeth Howell bdev_rbd_free(rbd);
134707fe6a43SSeth Howell return -ENOMEM;
134807fe6a43SSeth Howell }
134907fe6a43SSeth Howell rbd->disk.product_name = "Ceph Rbd Disk";
135007fe6a43SSeth Howell bdev_rbd_count++;
135107fe6a43SSeth Howell
135207fe6a43SSeth Howell rbd->disk.write_cache = 0;
135307fe6a43SSeth Howell rbd->disk.blocklen = block_size;
135407fe6a43SSeth Howell rbd->disk.blockcnt = rbd->info.size / rbd->disk.blocklen;
135507fe6a43SSeth Howell rbd->disk.ctxt = rbd;
135607fe6a43SSeth Howell rbd->disk.fn_table = &rbd_fn_table;
135707fe6a43SSeth Howell rbd->disk.module = &rbd_if;
135807fe6a43SSeth Howell
135907fe6a43SSeth Howell SPDK_NOTICELOG("Add %s rbd disk to lun\n", rbd->disk.name);
136007fe6a43SSeth Howell
136107fe6a43SSeth Howell spdk_io_device_register(rbd, bdev_rbd_create_cb,
136207fe6a43SSeth Howell bdev_rbd_destroy_cb,
136307fe6a43SSeth Howell sizeof(struct bdev_rbd_io_channel),
136407fe6a43SSeth Howell rbd_name);
136507fe6a43SSeth Howell ret = spdk_bdev_register(&rbd->disk);
136607fe6a43SSeth Howell if (ret) {
136707fe6a43SSeth Howell spdk_io_device_unregister(rbd, NULL);
136807fe6a43SSeth Howell bdev_rbd_free(rbd);
136907fe6a43SSeth Howell return ret;
137007fe6a43SSeth Howell }
137107fe6a43SSeth Howell
137207fe6a43SSeth Howell *bdev = &(rbd->disk);
137307fe6a43SSeth Howell
137407fe6a43SSeth Howell return ret;
137507fe6a43SSeth Howell }
137607fe6a43SSeth Howell
137707fe6a43SSeth Howell void
bdev_rbd_delete(const char * name,spdk_delete_rbd_complete cb_fn,void * cb_arg)13784573e4ccSShuhei Matsumoto bdev_rbd_delete(const char *name, spdk_delete_rbd_complete cb_fn, void *cb_arg)
137907fe6a43SSeth Howell {
13804573e4ccSShuhei Matsumoto int rc;
138107fe6a43SSeth Howell
13824573e4ccSShuhei Matsumoto rc = spdk_bdev_unregister_by_name(name, &rbd_if, cb_fn, cb_arg);
13834573e4ccSShuhei Matsumoto if (rc != 0) {
13844573e4ccSShuhei Matsumoto cb_fn(cb_arg, rc);
13854573e4ccSShuhei Matsumoto }
138607fe6a43SSeth Howell }
138707fe6a43SSeth Howell
1388e3584976SShuhei Matsumoto static void
dummy_bdev_event_cb(enum spdk_bdev_event_type type,struct spdk_bdev * bdev,void * ctx)1389e3584976SShuhei Matsumoto dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
13906a29c6a9SLiang Yan {
1391e3584976SShuhei Matsumoto }
1392e3584976SShuhei Matsumoto
1393e3584976SShuhei Matsumoto int
bdev_rbd_resize(const char * name,const uint64_t new_size_in_mb)1394e3584976SShuhei Matsumoto bdev_rbd_resize(const char *name, const uint64_t new_size_in_mb)
1395e3584976SShuhei Matsumoto {
1396e3584976SShuhei Matsumoto struct spdk_bdev_desc *desc;
1397e3584976SShuhei Matsumoto struct spdk_bdev *bdev;
1398a7cfb63aSKonrad Sztyber struct bdev_rbd *rbd;
1399e3584976SShuhei Matsumoto int rc = 0;
14006a29c6a9SLiang Yan uint64_t new_size_in_byte;
14016a29c6a9SLiang Yan uint64_t current_size_in_mb;
14026a29c6a9SLiang Yan
1403e3584976SShuhei Matsumoto rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &desc);
1404e3584976SShuhei Matsumoto if (rc != 0) {
1405e3584976SShuhei Matsumoto return rc;
1406e3584976SShuhei Matsumoto }
1407e3584976SShuhei Matsumoto
1408e3584976SShuhei Matsumoto bdev = spdk_bdev_desc_get_bdev(desc);
1409e3584976SShuhei Matsumoto
14106a29c6a9SLiang Yan if (bdev->module != &rbd_if) {
1411e3584976SShuhei Matsumoto rc = -EINVAL;
1412e3584976SShuhei Matsumoto goto exit;
14136a29c6a9SLiang Yan }
14146a29c6a9SLiang Yan
14156a29c6a9SLiang Yan current_size_in_mb = bdev->blocklen * bdev->blockcnt / (1024 * 1024);
14166a29c6a9SLiang Yan if (current_size_in_mb > new_size_in_mb) {
1417e3584976SShuhei Matsumoto SPDK_ERRLOG("The new bdev size must be larger than current bdev size.\n");
1418e3584976SShuhei Matsumoto rc = -EINVAL;
1419e3584976SShuhei Matsumoto goto exit;
14206a29c6a9SLiang Yan }
14216a29c6a9SLiang Yan
1422a7cfb63aSKonrad Sztyber rbd = SPDK_CONTAINEROF(bdev, struct bdev_rbd, disk);
14236a29c6a9SLiang Yan new_size_in_byte = new_size_in_mb * 1024 * 1024;
1424a7cfb63aSKonrad Sztyber rc = rbd_resize(rbd->image, new_size_in_byte);
14256a29c6a9SLiang Yan if (rc != 0) {
14266a29c6a9SLiang Yan SPDK_ERRLOG("failed to resize the ceph bdev.\n");
1427e3584976SShuhei Matsumoto goto exit;
14286a29c6a9SLiang Yan }
14296a29c6a9SLiang Yan
14306a29c6a9SLiang Yan rc = spdk_bdev_notify_blockcnt_change(bdev, new_size_in_byte / bdev->blocklen);
14316a29c6a9SLiang Yan if (rc != 0) {
14326a29c6a9SLiang Yan SPDK_ERRLOG("failed to notify block cnt change.\n");
14336a29c6a9SLiang Yan }
14346a29c6a9SLiang Yan
1435e3584976SShuhei Matsumoto exit:
1436e3584976SShuhei Matsumoto spdk_bdev_close(desc);
14376a29c6a9SLiang Yan return rc;
14386a29c6a9SLiang Yan }
14396a29c6a9SLiang Yan
144007fe6a43SSeth Howell static int
bdev_rbd_group_create_cb(void * io_device,void * ctx_buf)14416cbbc682SZiye Yang bdev_rbd_group_create_cb(void *io_device, void *ctx_buf)
14426cbbc682SZiye Yang {
14436cbbc682SZiye Yang return 0;
14446cbbc682SZiye Yang }
14456cbbc682SZiye Yang
14466cbbc682SZiye Yang static void
bdev_rbd_group_destroy_cb(void * io_device,void * ctx_buf)14476cbbc682SZiye Yang bdev_rbd_group_destroy_cb(void *io_device, void *ctx_buf)
14486cbbc682SZiye Yang {
14496cbbc682SZiye Yang }
14506cbbc682SZiye Yang
14516cbbc682SZiye Yang static int
bdev_rbd_library_init(void)145207fe6a43SSeth Howell bdev_rbd_library_init(void)
145307fe6a43SSeth Howell {
14546cbbc682SZiye Yang spdk_io_device_register(&rbd_if, bdev_rbd_group_create_cb, bdev_rbd_group_destroy_cb,
145562210effSZiye Yang 0, "bdev_rbd_poll_groups");
1456269efef8STomasz Zawadzki return 0;
145707fe6a43SSeth Howell }
145807fe6a43SSeth Howell
14596cbbc682SZiye Yang static void
bdev_rbd_library_fini(void)14606cbbc682SZiye Yang bdev_rbd_library_fini(void)
14616cbbc682SZiye Yang {
14626cbbc682SZiye Yang spdk_io_device_unregister(&rbd_if, NULL);
14636cbbc682SZiye Yang }
14646cbbc682SZiye Yang
14652172c432STomasz Zawadzki SPDK_LOG_REGISTER_COMPONENT(bdev_rbd)
1466