diff --git a/src/rebuild/srv.c b/src/rebuild/srv.c index 24598ab68cf..61f07a46a1d 100644 --- a/src/rebuild/srv.c +++ b/src/rebuild/srv.c @@ -884,14 +884,62 @@ enum { }; static bool -rebuild_is_stoppable(struct rebuild_global_pool_tracker *rgt, bool force) +rebuild_is_stoppable(struct rebuild_global_pool_tracker *rgt, bool force, int *rcp) { - if ((rgt->rgt_opc == RB_OP_REBUILD) || (rgt->rgt_opc == RB_OP_UPGRADE)) + /* NAK if nothing is rebuilding */ + if (rgt == NULL) { + *rcp = -DER_NONEXIST; + return false; + } + + /* NAK if another rebuild is queued for the same pool (it would run after this one stopped) + */ + if (!d_list_empty(&rebuild_gst.rg_queue_list)) { + struct rebuild_task *task; + + d_list_for_each_entry(task, &rebuild_gst.rg_queue_list, dst_list) { + if (uuid_compare(task->dst_pool_uuid, rgt->rgt_pool_uuid) == 0) { + *rcp = -DER_NO_PERM; + return false; + } + } + } + + if ((rgt->rgt_opc == RB_OP_REBUILD) || (rgt->rgt_opc == RB_OP_UPGRADE)) { + *rcp = 0; return true; + } - if ((rgt->rgt_opc == RB_OP_FAIL_RECLAIM) && force && (rgt->rgt_num_op_freclaim_fail > 0)) + /* Defer stop for many Fail_reclaim cases (until after it finishes). Do not return errors. + * Only allow force-stop of repeating failures in Fail_reclaim + */ + if (rgt->rgt_opc == RB_OP_FAIL_RECLAIM && force) { + if (rgt->rgt_num_op_freclaim_fail == 0) { + D_INFO(DF_RB + ": cannot force-stop op:Fail_reclaim with 0 failures - defer stop " + "until after it finishes\n", + DP_RB_RGT(rgt)); + *rcp = 0; + return false; + } + D_INFO(DF_RB ": force-stop in op:Fail_reclaim after %u failures\n", DP_RB_RGT(rgt), + rgt->rgt_num_op_freclaim_fail); + *rcp = 0; return true; + } else if (rgt->rgt_opc == RB_OP_FAIL_RECLAIM) { + D_INFO(DF_RB ": defer stop until after op:Fail_reclaim finishes\n", DP_RB_RGT(rgt)); + *rcp = 0; + return false; + } + + /* NAK if this rebuild is Reclaim (i.e., it's effectively done) */ + if (rgt->rgt_opc == RB_OP_RECLAIM) { + *rcp = -DER_BUSY; + return false; + } + /* Not expected */ + *rcp = -DER_MISC; return false; } @@ -900,34 +948,35 @@ int ds_rebuild_admin_stop(struct ds_pool *pool, uint32_t force) { struct rebuild_global_pool_tracker *rgt; + int rc = 0; /* look up the running rebuild and mark it as aborted (and by the administrator) */ rgt = rebuild_global_pool_tracker_lookup(pool->sp_uuid, -1 /* ver */, -1 /* gen */); - if (rgt == NULL) { - /* nothing running, make it a no-op */ - D_INFO(DF_UUID ": received request to stop rebuild - but nothing found to stop\n", - DP_UUID(pool->sp_uuid)); - return 0; - } - /* admin stop command does not terminate reclaim/fail_reclaim jobs (unless forced) */ - if (rebuild_is_stoppable(rgt, force)) { + /* admin stop command only for specific cases (and force option for failing op:Fail_reclaim) + */ + if (rebuild_is_stoppable(rgt, force, &rc)) { D_INFO(DF_RB ": stopping rebuild force=%u opc %u(%s)\n", DP_RB_RGT(rgt), force, rgt->rgt_opc, RB_OP_STR(rgt->rgt_opc)); rgt->rgt_abort = 1; rgt->rgt_status.rs_errno = -DER_OP_CANCELED; } else { - D_INFO(DF_RB ": NOT stopping rebuild during opc %u(%s)\n", DP_RB_RGT(rgt), - rgt->rgt_opc, RB_OP_STR(rgt->rgt_opc)); + if (rgt) { + D_INFO(DF_RB ": NOT stopping rebuild force=%u opc %u(%s), rc=%d\n", + DP_RB_RGT(rgt), force, rgt->rgt_opc, RB_OP_STR(rgt->rgt_opc), rc); + } else { + DL_INFO(rc, DF_UUID ": nothing found to stop", DP_UUID(pool->sp_uuid)); + return rc; + } } - /* admin stop command does not terminate op:Fail_reclaim, but it is remembered to avoid - * retrying the original op:Rebuild. + /* admin stop command does not usually terminate op:Fail_reclaim, but it is always + * remembered to avoid retrying the original op:Rebuild. */ if (rgt->rgt_abort || (rgt->rgt_opc == RB_OP_FAIL_RECLAIM)) rgt->rgt_stop_admin = 1; rgt_put(rgt); - return 0; + return rc; } /* diff --git a/src/tests/suite/daos_rebuild_common.c b/src/tests/suite/daos_rebuild_common.c index 8407d4af5b0..b0817efdb15 100644 --- a/src/tests/suite/daos_rebuild_common.c +++ b/src/tests/suite/daos_rebuild_common.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1246,8 +1246,7 @@ rebuild_stop_with_dmg_internal(const char *cfg, const uuid_t uuid, const char *g rc = dmg_pool_rebuild_stop(cfg, uuid, grp, force); print_message("dmg pool rebuild stop " DF_UUID ", force=%d, rc=%d\n", DP_UUID(uuid), force, rc); - assert_rc_equal(rc, 0); - return 0; + return rc; } /* stop an in-progress rebuild with dmg pool rebuild stop command */ @@ -1255,14 +1254,18 @@ int rebuild_stop_with_dmg(void *data) { test_arg_t *arg = data; + int rc; - print_message("(before stopping) wait for rebuild to start for pool " DF_UUID "\n", - DP_UUID(arg->pool.pool_uuid)); - test_rebuild_wait_to_start(&arg, 1); - sleep(4); - - return rebuild_stop_with_dmg_internal(arg->dmg_config, arg->pool.pool_uuid, arg->group, - false); + /* Rebuild might be only queued (not yet launched) */ + while (true) { + rc = rebuild_stop_with_dmg_internal(arg->dmg_config, arg->pool.pool_uuid, + arg->group, false); + if (rc != -DER_NONEXIST) + break; + print_message("waiting for stop command to run during active rebuild ...\n"); + sleep(1); + } + return rc; } /* stop an in-progress rebuild with dmg pool rebuild stop command (force stop option) */ @@ -1270,14 +1273,18 @@ int rebuild_force_stop_with_dmg(void *data) { test_arg_t *arg = data; + int rc; - print_message("(before stopping) wait for rebuild to start for pool " DF_UUID "\n", - DP_UUID(arg->pool.pool_uuid)); - test_rebuild_wait_to_start(&arg, 1); - sleep(5); - - return rebuild_stop_with_dmg_internal(arg->dmg_config, arg->pool.pool_uuid, arg->group, - true); + /* Rebuild might be only queued (not yet launched) */ + while (true) { + rc = rebuild_stop_with_dmg_internal(arg->dmg_config, arg->pool.pool_uuid, + arg->group, true); + if (rc != -DER_NONEXIST) + break; + print_message("waiting for force-stop command to run during active rebuild ...\n"); + sleep(1); + } + return rc; } /* start/reesume a stopped rebuild with dmg pool rebuild start command */ @@ -1323,7 +1330,7 @@ rebuild_resume_wait_to_start(void *data) rc = rebuild_start_with_dmg(data); assert_rc_equal(rc, 0); - /* Verify that the rebuild is no longer stopped (has been restarted). */ + /* Verify that the current rebuild is no longer stopped (has been restarted). */ test_rebuild_wait_to_start(&arg, 1); return 0; diff --git a/src/tests/suite/daos_rebuild_interactive.c b/src/tests/suite/daos_rebuild_interactive.c index ea4dc200ffb..dd2607eb583 100644 --- a/src/tests/suite/daos_rebuild_interactive.c +++ b/src/tests/suite/daos_rebuild_interactive.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -101,7 +101,6 @@ int_rebuild_snap_update_recs(void **state) ioreq_fini(&req); /* insert rebuild stop|start into the exclude rebuild execution */ - arg->interactive_rebuild = 1; arg->rebuild_cb = rebuild_stop_with_dmg; arg->rebuild_post_cb = rebuild_resume_wait; rebuild_single_pool_target(arg, ranks_to_kill[0], tgt, false); @@ -190,30 +189,28 @@ rebuild_wait_error_reset_fail_cb(void *data) test_arg_t *arg = data; int rc; - print_message("wait until rebuild errors (and starts Fail_reclaim)\n"); + print_message("wait until rebuild starts erroring\n"); test_rebuild_wait_to_error(&arg, 1); - print_message("check rebuild errored, rs_errno=%d (expecting -DER_IO=%d)\n", + print_message("rebuild version %u erroring, check rs_errno=%d (expecting -DER_IO=%d)\n", + arg->pool.pool_info.pi_rebuild_st.rs_version, arg->pool.pool_info.pi_rebuild_st.rs_errno, -DER_IO); assert_int_equal(arg->pool.pool_info.pi_rebuild_st.rs_errno, -DER_IO); - print_message("rebuild error code check passed\n"); print_message("clearing fault injection on all engines\n"); daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL); daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_VALUE, 0, 0, NULL); daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_NUM, 0, 0, NULL); - /* Give time for transition from op:Rebuild into op:Fail_reclaim */ - sleep(2); + print_message("wait until Fail_reclaim starts\n"); + test_rebuild_wait_to_start_lower(&arg, 1); print_message( "send rebuild stop --force request during first/only Fail_reclaim operation\n"); rc = rebuild_force_stop_with_dmg(data); - if (rc != 0) - print_message("rebuild_force_stop_with_dmg failed, rc=%d\n", rc); + assert_rc_equal(rc, 0); - print_message("wait for rebuild to be stopped\n"); - test_rebuild_wait(&arg, 1); - /* Verifying rs_state/rs_errno will happen in post_cb rebuild_resume_wait() */ + /* Wait for stop, verify rs_state/rs_errno happens in rebuild_post_cb rebuild_resume_wait() + */ return rc; } @@ -231,7 +228,6 @@ int_rebuild_many_objects_with_failure(void **state) return; T_BEGIN(); - arg->interactive_rebuild = 1; D_ALLOC_ARRAY(oids, NUM_OBJS); for (i = 0; i < NUM_OBJS; i++) { char buffer[256]; @@ -257,11 +253,9 @@ int_rebuild_many_objects_with_failure(void **state) } /* For interactive rebuild, we need: - * 1. trigger rebuild (which will fail), query pool reubild state until op:Rebuild fails - * and op:Fail_reclaim begins. See test_rebuild_wait_to_error(). - * 2. Then, while rebuild is in op:Fail_reclaim, issue dmg system stop to test that you - * can't stop during Fail_reclaim (though the command will take effect by not retrying - * rebuild). + * 1. trigger rebuild (which will fail), wait until op:Fail_reclaim begins. + * 2. During op:Fail_reclaim, issue dmg system stop (test that stop does not interrupt + * reclaim, but takes effect by not retrying the rebuild. */ arg->rebuild_cb = rebuild_wait_error_reset_fail_cb; arg->rebuild_post_cb = rebuild_resume_wait; @@ -370,24 +364,30 @@ int_drain_fail_and_retry_objects(void **state) arg->no_rebuild = 1; drain_single_pool_rank(arg, ranks_to_kill[0], false); + arg->no_rebuild = 0; print_message("wait drain to fail and exit\n"); /* NB: could be better to wait (in drain_single_pool_rank or test_rebuild_wait), but that * requires new logic in rebuild_task_complete_schedule() to update state after * Fail_reclaim */ - print_message("wait for op:Reclaim to get -DER_IO\n"); + print_message("wait for drain reubild to get -DER_IO\n"); test_rebuild_wait_to_error(&arg, 1); - print_message("sleep for op:Fail_reclaim to run\n"); - sleep(30); - arg->no_rebuild = 0; + print_message("wait for op:Fail_reclaim to start\n"); + test_rebuild_wait_to_start_lower(&arg, 1); + print_message("clear fault injection on all engines and wait for retry rebuild\n"); daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL); + test_rebuild_wait_to_start_next(&arg, 1); + print_message("drain rebuild retry started, version=%u\n", + arg->pool.pool_info.pi_rebuild_st.rs_version); rebuild_io_validate(arg, oids, OBJ_NR); arg->interactive_rebuild = 1; arg->rebuild_cb = reintegrate_inflight_io; arg->rebuild_cb_arg = &oids[OBJ_NR - 1]; + print_message("inflight IO during drain (that will be stopped/restarted)\n"); drain_single_pool_rank(arg, ranks_to_kill[0], false); + print_message("final data verification\n"); rebuild_io_validate(arg, oids, OBJ_NR); reintegrate_inflight_io_verify(arg); T_END(); @@ -421,8 +421,10 @@ int_extend_drain_cb_internal(void *arg) test_arg->interactive_rebuild ? "stop rebuild before " : "", opc, extend_drain_opstrs[opc]); - if (test_arg->interactive_rebuild) - rebuild_stop_with_dmg(arg); + if (test_arg->interactive_rebuild) { + rc = rebuild_stop_with_dmg(arg); + assert_rc_equal(rc, 0); + } /* Kill another rank during extend */ switch (opc) { @@ -526,19 +528,18 @@ int_extend_cb_internal(void *arg) daos_anchor_t anchor = {0}; bool do_stop = (!cb_arg->kill && test_arg->interactive_rebuild); const char *pre_op = (cb_arg->kill ? "kill" : "extend"); - daos_pool_info_t pinfo = {0}; int rc; int i; - /* get rebuild version for first extend, so we can wait for second rebuild to start - * (by waiting for an in-progress rebuild with version > pinfo.pi_rebuild_st.rs_version) + /* wait for first extend, and (as post-effect) get rebuild version so we can wait for + * the second rebuild to start (by waiting for a rebuild with version > first rs_version) */ - pinfo.pi_bits = DPI_REBUILD_STATUS; - rc = test_pool_get_info(test_arg, &pinfo, NULL /* engine_ranks */); - assert_rc_equal(rc, 0); - + print_message("before waiting for rebuild to start, pmap_ver=%u, rs_version=%u\n", + test_arg->pool.pool_info.pi_map_ver, + test_arg->pool.pool_info.pi_rebuild_st.rs_version); + test_rebuild_wait_to_start_next(&test_arg, 1); print_message("Extending (rs_version=%u), sleep 10, %s rank %u, %sand start op %d (%s)\n", - pinfo.pi_rebuild_st.rs_version, pre_op, cb_arg->rank, + test_arg->pool.pool_info.pi_rebuild_st.rs_version, pre_op, cb_arg->rank, do_stop ? "stop rebuild, " : "", opc, extend_opstrs[opc]); sleep(10); @@ -558,10 +559,14 @@ int_extend_cb_internal(void *arg) if (do_stop) { daos_debug_set_params(test_arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL); - test_rebuild_wait_to_start_after_ver( - &test_arg, 1, - pinfo.pi_rebuild_st.rs_version /* original extend rebuild version */); - rebuild_stop_with_dmg(arg); /* then stop the new rebuild */ + print_message("before waiting for rebuild to start, pmap_ver=%u, rs_version=%u\n", + test_arg->pool.pool_info.pi_map_ver, + test_arg->pool.pool_info.pi_rebuild_st.rs_version); + test_rebuild_wait_to_start_next(&test_arg, 1); + print_message("rebuild version=%u running\n", + test_arg->pool.pool_info.pi_rebuild_st.rs_version); + rc = rebuild_stop_with_dmg(arg); + assert_rc_equal(rc, 0); test_rebuild_wait_to_error(&test_arg, 1); } @@ -639,11 +644,8 @@ static void int_rebuild_dkeys_stop_failing(void **state) { test_arg_t *arg = *state; - daos_pool_info_t pinfo = {0}; d_rank_t kill_rank = 0; int kill_rank_nr; - uint32_t excl_rebuild_ver; - uint32_t reclaim_rebuild_ver; daos_obj_id_t oid; struct ioreq req; int i; @@ -677,6 +679,11 @@ int_rebuild_dkeys_stop_failing(void **state) insert_recxs(key, "a_key_1M", 1, DAOS_TX_NONE, &recx, 1, data, DATA_SIZE, &req); } + /* Quick check that rebuild stop will return -DER_NONEXIST if nothing is rebuilding */ + rc = dmg_pool_rebuild_stop(arg->dmg_config, arg->pool.pool_uuid, arg->group, + false /* force */); + assert_int_equal(rc, -DER_NONEXIST); + get_killing_rank_by_oid(arg, oid, 1, 0, &kill_rank, &kill_rank_nr); ioreq_fini(&req); @@ -687,32 +694,28 @@ int_rebuild_dkeys_stop_failing(void **state) DAOS_REBUILD_OBJ_FAIL | DAOS_FAIL_ALWAYS, 0, NULL); } - /* Trigger exclude and rebuild, fail twice, force-stop it during the second Fail_reclaim */ + /* Trigger exclude and rebuild, fail twice, force-stop command during second Fail_reclaim + * NB: stop will be deferred until after Fail_reclaim (since it did not fail). + */ arg->no_rebuild = 1; rebuild_single_pool_target(arg, kill_rank, -1, false); arg->no_rebuild = 0; + print_message("before waiting for rebuild to start, pmap_ver=%u, rs_version=%u\n", + arg->pool.pool_info.pi_map_ver, arg->pool.pool_info.pi_rebuild_st.rs_version); test_rebuild_wait_to_start(&arg, 1); - pinfo.pi_bits = DPI_REBUILD_STATUS; - rc = test_pool_get_info(arg, &pinfo, NULL /* engine_ranks */); - assert_rc_equal(rc, 0); - excl_rebuild_ver = pinfo.pi_rebuild_st.rs_version; print_message("Wait for exclude rebuild ver %u to fail (and start Fail_reclaim)\n", - excl_rebuild_ver); - test_rebuild_wait_to_start_before_ver(&arg, 1, excl_rebuild_ver); - rc = test_pool_get_info(arg, &pinfo, NULL /* engine_ranks */); - assert_rc_equal(rc, 0); - reclaim_rebuild_ver = pinfo.pi_rebuild_st.rs_version; - + arg->pool.pool_info.pi_rebuild_st.rs_version); + test_rebuild_wait_to_start_lower(&arg, 1); print_message("Wait for Fail_reclaim to finish (and start retry of exclude rebuild)\n"); - test_rebuild_wait_to_start_after_ver(&arg, 1, reclaim_rebuild_ver); + test_rebuild_wait_to_start_next(&arg, 1); print_message("Wait for second exclude rebuild to fail (and start Fail_reclaim)\n"); - test_rebuild_wait_to_start_before_ver(&arg, 1, excl_rebuild_ver); - sleep(2); + test_rebuild_wait_to_start_lower(&arg, 1); print_message("Force-stop runaway failing exclude rebuild retries\n"); rc = rebuild_force_stop_with_dmg(arg); assert_rc_equal(rc, 0); + print_message("Waiting for exclude rebuild to stop\n"); test_rebuild_wait(&arg, 1); assert_int_equal(arg->pool.pool_info.pi_rebuild_st.rs_state, DRS_NOT_STARTED); assert_int_equal(arg->pool.pool_info.pi_rebuild_st.rs_errno, -DER_OP_CANCELED); @@ -723,8 +726,7 @@ int_rebuild_dkeys_stop_failing(void **state) /* Do not restart the rebuild ; instead, go directly to reintegrate the rank */ reintegrate_with_inflight_io(arg, &oid, kill_rank, -1); rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); - if (rc != 0) - assert_rc_equal(rc, -DER_NOSYS); + assert_rc_equal(rc, 0); T_END(); } diff --git a/src/tests/suite/daos_test.h b/src/tests/suite/daos_test.h index 002c54e9473..e3608c16be7 100644 --- a/src/tests/suite/daos_test.h +++ b/src/tests/suite/daos_test.h @@ -421,9 +421,9 @@ void test_rebuild_wait(test_arg_t **args, int args_cnt); void test_rebuild_wait_to_start(test_arg_t **args, int args_cnt); void -test_rebuild_wait_to_start_after_ver(test_arg_t **args, int args_cnt, uint32_t rs_version); +test_rebuild_wait_to_start_next(test_arg_t **args, int args_cnt); void -test_rebuild_wait_to_start_before_ver(test_arg_t **args, int args_cnt, uint32_t rs_version); +test_rebuild_wait_to_start_lower(test_arg_t **args, int args_cnt); void test_rebuild_wait_to_error(test_arg_t **args, int args_cnt); int daos_pool_set_prop(const uuid_t pool_uuid, const char *name, diff --git a/src/tests/suite/daos_test_common.c b/src/tests/suite/daos_test_common.c index 7f823126552..143ffb4105e 100644 --- a/src/tests/suite/daos_test_common.c +++ b/src/tests/suite/daos_test_common.c @@ -803,7 +803,13 @@ rebuild_pool_started_after_ver(test_arg_t *arg, uint32_t rs_version) "(waiting for > %d)\n", DP_UUID(arg->pool.pool_uuid), in_progress ? "" : "not yet ", rst->rs_version, rs_version); - return in_progress && (rst->rs_version > rs_version); + if (in_progress && (rst->rs_version > rs_version)) { + /* save final pool query info to be able to inspect rebuild status */ + memcpy(&arg->pool.pool_info, &pinfo, sizeof(pinfo)); + + return true; + } + return false; } } @@ -825,11 +831,17 @@ rebuild_pool_started_before_ver(test_arg_t *arg, uint32_t rs_version) return false; } else { bool in_progress = (rst->rs_state == DRS_IN_PROGRESS); + print_message("rebuild for pool " DF_UUIDF "has %sstarted, rs_version=%u " "(waiting for < %d)\n", DP_UUID(arg->pool.pool_uuid), in_progress ? "" : "not yet ", rst->rs_version, rs_version); - return in_progress && (rst->rs_version < rs_version); + if (in_progress && (rst->rs_version < rs_version)) { + /* save final pool query info to be able to inspect rebuild status */ + memcpy(&arg->pool.pool_info, &pinfo, sizeof(pinfo)); + return true; + } + return false; } } @@ -935,8 +947,8 @@ test_get_last_svr_rank(test_arg_t *arg) return arg->srv_nnodes - disable_nodes - 1; } -bool -test_rebuild_started_after_ver(test_arg_t **args, int args_cnt, uint32_t rs_version) +static bool +test_rebuild_started_before(test_arg_t **args, int args_cnt, uint32_t *cur_versions) { bool all_started = true; int i; @@ -945,7 +957,7 @@ test_rebuild_started_after_ver(test_arg_t **args, int args_cnt, uint32_t rs_vers bool started = true; if (!args[i]->pool.destroyed) - started = rebuild_pool_started_after_ver(args[i], rs_version); + started = rebuild_pool_started_before_ver(args[i], cur_versions[i]); if (!started) all_started = false; @@ -953,8 +965,8 @@ test_rebuild_started_after_ver(test_arg_t **args, int args_cnt, uint32_t rs_vers return all_started; } -bool -test_rebuild_started_before_ver(test_arg_t **args, int args_cnt, uint32_t rs_version) +static bool +test_rebuild_started_after(test_arg_t **args, int args_cnt, uint32_t *cur_versions) { bool all_started = true; int i; @@ -963,7 +975,7 @@ test_rebuild_started_before_ver(test_arg_t **args, int args_cnt, uint32_t rs_ver bool started = true; if (!args[i]->pool.destroyed) - started = rebuild_pool_started_before_ver(args[i], rs_version); + started = rebuild_pool_started_after_ver(args[i], cur_versions[i]); if (!started) all_started = false; @@ -971,25 +983,67 @@ test_rebuild_started_before_ver(test_arg_t **args, int args_cnt, uint32_t rs_ver return all_started; } +/* wait until pools start rebuilds with rs_version < current (e.g.,. expecting op:Fail_reclaim) */ void -test_rebuild_wait_to_start(test_arg_t **args, int args_cnt) +test_rebuild_wait_to_start_lower(test_arg_t **args, int args_cnt) { - while (!test_rebuild_started_after_ver(args, args_cnt, 0 /* don't care rs_version */)) + uint32_t *cur_versions; + int i; + + D_ALLOC_ARRAY(cur_versions, args_cnt); + assert_true(cur_versions != NULL); + for (i = 0; i < args_cnt; i++) + cur_versions[i] = args[i]->pool.pool_info.pi_rebuild_st.rs_version; + + while (!test_rebuild_started_before(args, args_cnt, cur_versions)) sleep(2); + + /* NB: when control reaches here, each pool's current rs_version has been updated + * (for subsequent calls that will rely on it as a baseline) + */ + D_FREE(cur_versions); } +/* wait until pools start rebuilds with rs_version > current (e.g.,. expecting op:Rebuild) */ void -test_rebuild_wait_to_start_after_ver(test_arg_t **args, int args_cnt, uint32_t rs_version) +test_rebuild_wait_to_start_next(test_arg_t **args, int args_cnt) { - while (!test_rebuild_started_after_ver(args, args_cnt, rs_version)) + uint32_t *cur_versions; + int i; + + D_ALLOC_ARRAY(cur_versions, args_cnt); + assert_true(cur_versions != NULL); + for (i = 0; i < args_cnt; i++) + cur_versions[i] = args[i]->pool.pool_info.pi_rebuild_st.rs_version; + + while (!test_rebuild_started_after(args, args_cnt, cur_versions)) sleep(2); + + /* NB: when control reaches here, each pool's current rs_version has been updated + * (for subsequent calls that will rely on it as a baseline) + */ + D_FREE(cur_versions); } +/* wait until pools start rebuilds with any rs_version > 0 (whatever is current) */ void -test_rebuild_wait_to_start_before_ver(test_arg_t **args, int args_cnt, uint32_t rs_version) +test_rebuild_wait_to_start(test_arg_t **args, int args_cnt) { - while (!test_rebuild_started_before_ver(args, args_cnt, rs_version)) + uint32_t *cur_versions; + int i; + + D_ALLOC_ARRAY(cur_versions, args_cnt); + assert_true(cur_versions != NULL); + for (i = 0; i < args_cnt; i++) + cur_versions[i] = 0; + + while (!test_rebuild_started_after(args, args_cnt, cur_versions)) sleep(2); + + /* NB: when control reaches here, each pool's current rs_version has been updated + * (for subsequent calls that will rely on it as a baseline) + */ + D_FREE(cur_versions); } bool