Skip to content

Commit 0729f20

Browse files
authored
[DE-666] Cluster Rebalance (#287)
* Adding cluster-rebalance methods * Adding cluster-rebalance tests * Updating cluster docs with rebalance example * Stating ArangoDB version in the docs.
1 parent e5ff4d7 commit 0729f20

File tree

4 files changed

+237
-1
lines changed

4 files changed

+237
-1
lines changed

arango/cluster.py

+176-1
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
__all__ = ["Cluster"]
22

3-
from typing import List
3+
from typing import List, Optional
44

55
from arango.api import ApiGroup
66
from arango.exceptions import (
77
ClusterEndpointsError,
88
ClusterHealthError,
99
ClusterMaintenanceModeError,
10+
ClusterRebalanceError,
1011
ClusterServerCountError,
1112
ClusterServerEngineError,
1213
ClusterServerIDError,
@@ -195,3 +196,177 @@ def response_handler(resp: Response) -> List[str]:
195196
return [item["endpoint"] for item in resp.body["endpoints"]]
196197

197198
return self._execute(request, response_handler)
199+
200+
def calculate_imbalance(self) -> Result[Json]:
201+
"""Compute the current cluster imbalance, including
202+
the amount of ongoing and pending move shard operations.
203+
204+
:return: Cluster imbalance information.
205+
:rtype: dict
206+
:raise: arango.exceptions.ClusterRebalanceError: If retrieval fails.
207+
"""
208+
request = Request(method="get", endpoint="/_admin/cluster/rebalance")
209+
210+
def response_handler(resp: Response) -> Json:
211+
if not resp.is_success:
212+
raise ClusterRebalanceError(resp, request)
213+
result: Json = resp.body["result"]
214+
return result
215+
216+
return self._execute(request, response_handler)
217+
218+
def rebalance(
219+
self,
220+
version: int = 1,
221+
max_moves: Optional[int] = None,
222+
leader_changes: Optional[bool] = None,
223+
move_leaders: Optional[bool] = None,
224+
move_followers: Optional[bool] = None,
225+
pi_factor: Optional[float] = None,
226+
exclude_system_collections: Optional[bool] = None,
227+
databases_excluded: Optional[List[str]] = None,
228+
) -> Result[Json]:
229+
"""Compute and execute a cluster rebalance plan.
230+
231+
:param version: Must be set to 1.
232+
:type version: int
233+
:param max_moves: Maximum number of moves to be computed.
234+
:type max_moves: int | None
235+
:param leader_changes: Allow leader changes without moving data.
236+
:type leader_changes: bool | None
237+
:param move_leaders: Allow moving shard leaders.
238+
:type move_leaders: bool | None
239+
:param move_followers: Allow moving shard followers.
240+
:type move_followers: bool | None
241+
:param pi_factor: A weighting factor that should remain untouched.
242+
:type pi_factor: float | None
243+
:param exclude_system_collections: Ignore system collections in the
244+
rebalance plan.
245+
:type exclude_system_collections: bool | None
246+
:param databases_excluded: List of database names to be excluded
247+
from the analysis.
248+
:type databases_excluded: [str] | None
249+
:return: Cluster rebalance plan that has been executed.
250+
:rtype: dict
251+
:raise: arango.exceptions.ClusterRebalanceError: If retrieval fails.
252+
"""
253+
data: Json = dict(version=version)
254+
if max_moves is not None:
255+
data["maximumNumberOfMoves"] = max_moves
256+
if leader_changes is not None:
257+
data["leaderChanges"] = leader_changes
258+
if move_leaders is not None:
259+
data["moveLeaders"] = move_leaders
260+
if move_followers is not None:
261+
data["moveFollowers"] = move_followers
262+
if pi_factor is not None:
263+
data["piFactor"] = pi_factor
264+
if exclude_system_collections is not None:
265+
data["excludeSystemCollections"] = exclude_system_collections
266+
if databases_excluded is not None:
267+
data["databasesExcluded"] = databases_excluded
268+
269+
request = Request(method="put", endpoint="/_admin/cluster/rebalance", data=data)
270+
271+
def response_handler(resp: Response) -> Json:
272+
if not resp.is_success:
273+
raise ClusterRebalanceError(resp, request)
274+
result: Json = resp.body["result"]
275+
return result
276+
277+
return self._execute(request, response_handler)
278+
279+
def calculate_rebalance_plan(
280+
self,
281+
version: int = 1,
282+
max_moves: Optional[int] = None,
283+
leader_changes: Optional[bool] = None,
284+
move_leaders: Optional[bool] = None,
285+
move_followers: Optional[bool] = None,
286+
pi_factor: Optional[float] = None,
287+
exclude_system_collections: Optional[bool] = None,
288+
databases_excluded: Optional[List[str]] = None,
289+
) -> Result[Json]:
290+
"""Compute the cluster rebalance plan.
291+
292+
:param version: Must be set to 1.
293+
:type version: int
294+
:param max_moves: Maximum number of moves to be computed.
295+
:type max_moves: int | None
296+
:param leader_changes: Allow leader changes without moving data.
297+
:type leader_changes: bool | None
298+
:param move_leaders: Allow moving shard leaders.
299+
:type move_leaders: bool | None
300+
:param move_followers: Allow moving shard followers.
301+
:type move_followers: bool | None
302+
:param pi_factor: A weighting factor that should remain untouched.
303+
:type pi_factor: float | None
304+
:param exclude_system_collections: Ignore system collections in the
305+
rebalance plan.
306+
:type exclude_system_collections: bool | None
307+
:param databases_excluded: List of database names to be excluded
308+
from the analysis.
309+
:type databases_excluded: [str] | None
310+
:return: Cluster rebalance plan.
311+
:rtype: dict
312+
:raise: arango.exceptions.ClusterRebalanceError: If retrieval fails.
313+
"""
314+
data: Json = dict(version=version)
315+
if max_moves is not None:
316+
data["maximumNumberOfMoves"] = max_moves
317+
if leader_changes is not None:
318+
data["leaderChanges"] = leader_changes
319+
if move_leaders is not None:
320+
data["moveLeaders"] = move_leaders
321+
if move_followers is not None:
322+
data["moveFollowers"] = move_followers
323+
if pi_factor is not None:
324+
data["piFactor"] = pi_factor
325+
if exclude_system_collections is not None:
326+
data["excludeSystemCollections"] = exclude_system_collections
327+
if databases_excluded is not None:
328+
data["databasesExcluded"] = databases_excluded
329+
330+
request = Request(
331+
method="post", endpoint="/_admin/cluster/rebalance", data=data
332+
)
333+
334+
def response_handler(resp: Response) -> Json:
335+
if not resp.is_success:
336+
raise ClusterRebalanceError(resp, request)
337+
result: Json = resp.body["result"]
338+
return result
339+
340+
return self._execute(request, response_handler)
341+
342+
def execute_rebalance_plan(
343+
self, moves: List[Json], version: int = 1
344+
) -> Result[bool]:
345+
"""Execute the given set of move shard operations.
346+
347+
You can use :meth:`Cluster.calculate_rebalance_plan` to calculate
348+
these operations to improve the balance of shards, leader shards,
349+
and follower shards.
350+
351+
:param moves: List of move shard operations.
352+
:type moves: [dict]
353+
:param version: Must be set to 1.
354+
:type version: int
355+
:return: True if the methods have been accepted and scheduled
356+
for execution.
357+
:rtype: bool
358+
:raise: arango.exceptions.ClusterRebalanceError: If request fails.
359+
"""
360+
data: Json = dict(version=version, moves=moves)
361+
362+
request = Request(
363+
method="post", endpoint="/_admin/cluster/rebalance/execute", data=data
364+
)
365+
366+
def response_handler(resp: Response) -> bool:
367+
if not resp.is_success:
368+
raise ClusterRebalanceError(resp, request)
369+
result: bool = resp.body["code"] == 202
370+
return result
371+
372+
return self._execute(request, response_handler)

arango/exceptions.py

+4
Original file line numberDiff line numberDiff line change
@@ -991,6 +991,10 @@ class ClusterServerCountError(ArangoServerError):
991991
"""Failed to retrieve cluster server count."""
992992

993993

994+
class ClusterRebalanceError(ArangoServerError):
995+
"""Failed to execute cluster re-balancing operation (load/set)."""
996+
997+
994998
##################
995999
# JWT Exceptions #
9961000
##################

docs/cluster.rst

+3
Original file line numberDiff line numberDiff line change
@@ -91,4 +91,7 @@ Below is an example on how to manage clusters using python-arango.
9191
cluster.toggle_maintenance_mode('on')
9292
cluster.toggle_maintenance_mode('off')
9393
94+
# Rebalance the distribution of shards. Available with ArangoDB 3.10+.
95+
cluster.rebalance()
96+
9497
See :ref:`ArangoClient` and :ref:`Cluster` for API specification.

tests/test_cluster.py

+54
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import pytest
2+
from packaging import version
23

34
from arango.errno import DATABASE_NOT_FOUND, FORBIDDEN
45
from arango.exceptions import (
56
ClusterEndpointsError,
67
ClusterHealthError,
78
ClusterMaintenanceModeError,
9+
ClusterRebalanceError,
810
ClusterServerCountError,
911
ClusterServerEngineError,
1012
ClusterServerIDError,
@@ -134,3 +136,55 @@ def test_cluster_server_count(db, bad_db, cluster):
134136
with assert_raises(ClusterServerCountError) as err:
135137
bad_db.cluster.server_count()
136138
assert err.value.error_code in {FORBIDDEN, DATABASE_NOT_FOUND}
139+
140+
141+
def test_cluster_rebalance(sys_db, bad_db, cluster, db_version):
142+
if not cluster:
143+
pytest.skip("Only tested in a cluster setup")
144+
145+
if db_version < version.parse("3.10.0"):
146+
pytest.skip("Only tested on ArangoDB 3.10+")
147+
148+
# Test imbalance retrieval
149+
imbalance = sys_db.cluster.calculate_imbalance()
150+
assert "leader" in imbalance
151+
assert "shards" in imbalance
152+
assert imbalance["pendingMoveShards"] == 0
153+
assert imbalance["todoMoveShards"] == 0
154+
155+
with assert_raises(ClusterRebalanceError) as err:
156+
bad_db.cluster.calculate_imbalance()
157+
assert err.value.error_code == FORBIDDEN
158+
159+
# Test rebalance computation
160+
rebalance = sys_db.cluster.calculate_rebalance_plan(
161+
max_moves=3,
162+
leader_changes=True,
163+
move_leaders=True,
164+
move_followers=True,
165+
pi_factor=1234.5,
166+
databases_excluded=["_system"],
167+
)
168+
assert "imbalanceBefore" in rebalance
169+
assert "imbalanceAfter" in rebalance
170+
assert "moves" in rebalance
171+
172+
with assert_raises(ClusterRebalanceError) as err:
173+
bad_db.cluster.calculate_rebalance_plan()
174+
assert err.value.error_code == FORBIDDEN
175+
176+
# Test rebalance execution
177+
assert sys_db.cluster.execute_rebalance_plan(rebalance["moves"]) is True
178+
with assert_raises(ClusterRebalanceError) as err:
179+
bad_db.cluster.execute_rebalance_plan(rebalance["moves"])
180+
assert err.value.error_code == FORBIDDEN
181+
182+
# Rebalance cluster in one go
183+
rebalance = sys_db.cluster.rebalance()
184+
assert "imbalanceBefore" in rebalance
185+
assert "imbalanceAfter" in rebalance
186+
assert "moves" in rebalance
187+
188+
with assert_raises(ClusterRebalanceError) as err:
189+
bad_db.cluster.rebalance()
190+
assert err.value.error_code == FORBIDDEN

0 commit comments

Comments
 (0)