暗能星系

zhanglu

ps -eo state,pid,user,cmd | awk '$1=="D"'

zhanglu

[root@node2 0]# ps -eo state,pid,user,cmd | awk '$1=="D"'
D 92467 root /usr/bin/mount -o bind,remount /proc/3202/fd/48 /var/lib/kubelet/pods/80dbf45b-924f-4fbf-9ac5-b903580798a0/volume-subpaths/data/ms-hippo-flow-new/0
D 92710 root /usr/bin/mount -o bind,remount /proc/3202/fd/49 /var/lib/kubelet/pods/2a328296-e0f5-4e7c-8def-d571becb3f9b/volume-subpaths/data/ms-hippo-flow/0
D 129596 root /usr/bin/mount -o bind,remount /proc/3202/fd/67 /var/lib/kubelet/pods/404b6cca-8dec-48ed-b0fe-6af0e10bfe1a/volume-subpaths/data/ms-hippo-flow-new/0
D 546675 root (fprintd)
D 720146 root (ostnamed)
D 978816 root (fprintd)
D 1508053 root /usr/bin/mount -o bind,remount /proc/3202/fd/62 /var/lib/kubelet/pods/c4b9e777-73b0-4f48-b73b-6f851e1a133a/volume-subpaths/data/ms-hippo-flow/0
D 1563743 root (fprintd)
D 1616286 root /usr/bin/mount -o bind,remount /proc/3771840/fd/53 /var/lib/kubelet/pods/9e5545a5-5a4f-4e1e-a2e0-fc17a16783e5/volume-subpaths/data/ms-hippo-flow-new/0
D 1637001 root (fprintd)
D 1644126 root /usr/bin/mount -o bind,remount /proc/3202/fd/58 /var/lib/kubelet/pods/599e5d21-3c5f-4898-a87a-38bf7e15e8f3/volume-subpaths/data/ms-hippo-flow-new/0
D 1778075 root /usr/bin/mount -o bind,remount /proc/3202/fd/55 /var/lib/kubelet/pods/cc6add18-2573-4a3f-9635-3e6a0cb18508/volume-subpaths/data/ms-hippo-flow-new/0
D 1779006 root /usr/bin/mount -o bind,remount /proc/3202/fd/43 /var/lib/kubelet/pods/379b60cd-4173-4db2-ae42-6df3f6ff4b5a/volume-subpaths/data/ms-hippo-flow-new/0
D 1897601 root sync
D 1932823 root sync
D 1936874 root sync
D 1994659 root /usr/bin/mount -o bind,remount /proc/3202/fd/79 /var/lib/kubelet/pods/d0b0b5c5-1d4a-4bd7-83cf-fc7342b99e98/volume-subpaths/data/ms-hippo-flow-new/0
D 1994720 root /usr/bin/mount -o bind,remount /proc/3202/fd/81 /var/lib/kubelet/pods/14656dfa-19fb-4ade-8472-7ded179ccec9/volume-subpaths/data/ms-hippo-flow-new/0
D 2050274 root sync
D 2259452 root sync
D 2299728 root sync
D 2334550 root (fprintd)
D 2530241 root sync
D 2595120 root (fprintd)
D 2845864 root (fprintd)
D 2977384 root mount -o bind,remount /proc/3202/fd/48 /var/lib/kubelet/pods/80dbf45b-924f-4fbf-9ac5-b903580798a0/volume-subpaths/data/ms-hippo-flow-new/0
D 3066597 root (fprintd)
D 3127842 root /usr/bin/mount -o bind,remount /proc/3771840/fd/64 /var/lib/kubelet/pods/067e33e7-1364-48d8-ab7e-1045d791170d/volume-subpaths/data/cromwell-frontend/0
D 3185249 root /usr/bin/mount -o bind,remount /proc/3771840/fd/45 /var/lib/kubelet/pods/2a328296-e0f5-4e7c-8def-d571becb3f9b/volume-subpaths/data/ms-hippo-flow/0
D 3302521 root /usr/bin/mount -o bind,remount /proc/3771840/fd/50 /var/lib/kubelet/pods/c4b9e777-73b0-4f48-b73b-6f851e1a133a/volume-subpaths/data/ms-hippo-flow/0
D 3416966 root /usr/bin/mount -o bind,remount /proc/3202/fd/95 /var/lib/kubelet/pods/9f5ea36b-8596-4ae8-a143-0466fa418705/volume-subpaths/data/ms-hippo-flow-new/0
D 3582838 root mount -o bind,remount /proc/3202/fd/48 /var/lib/kubelet/pods/80dbf45b-924f-4fbf-9ac5-b903580798a0/volume-subpaths/data/ms-hippo-flow-new/0
D 3802507 root /usr/bin/mount -o bind,remount /proc/3771840/fd/52 /var/lib/kubelet/pods/0e905f16-fb54-4960-a814-8788a7fa1a5e/volume-subpaths/data/ms-hippo-flow-new/0
D 3802572 root /usr/bin/mount -o bind,remount /proc/3771840/fd/59 /var/lib/kubelet/pods/aa0880e7-5b3c-4608-9769-3bc08473f00f/volume-subpaths/data/ms-hippo-flow-new/0
D 3908263 root sync
D 4109438 root (fprintd)
D 4185624 root /usr/bin/mount -o bind,remount /proc/3202/fd/72 /var/lib/kubelet/pods/97aff715-f0b3-4bd9-b6ef-7630cd9a7cdd/volume-subpaths/data/ms-hippo-flow-new/0
D 4185680 root /usr/bin/mount -o bind,remount /proc/3202/fd/74 /var/lib/kubelet/pods/017ecee2-c97b-4e13-9529-1a7d39e449e9/volume-subpaths/data/ms-hippo-flow-new/0

zhanglu

bash-4.4$ ceph osd status
ID HOST USED AVAIL WR OPS WR DATA RD OPS RD DATA STATE
0 node1 8568G 2380G 0 489k 0 0 exists,up
1 node1 9251G 1697G 4 4096k 11 5967k exists,up
2 node1 8584G 2364G 3 4096k 6 4226k exists,up
3 node1 8306G 2642G 0 641k 0 204k exists,up
4 node1 9422G 1526G 0 0 0 0 exists,up
5 node1 8320G 2628G 0 0 0 0 exists,up
6 node1 8431G 2517G 0 0 1 1798k exists,up
7 node2 9033G 1915G 0 0 0 819k exists,up
8 node2 9677G 1271G 1 854k 4 3347k exists,up
9 node2 9172G 1776G 0 819k 0 0 exists,up
10 node2 9.76T 948G 0 207k 0 204k exists,up
11 node2 9481G 1467G 0 96 0 819k exists,up
12 node2 9272G 1676G 0 79.2k 0 204k exists,up
13 node2 9335G 1613G 0 0 0 0 exists,up
14 node3 9.79T 917G 0 1638k 0 0 exists,up
15 node3 9771G 1177G 0 0 0 819k exists,up
16 node3 8875G 2073G 0 27.0k 0 0 exists,up
17 node3 9616G 1332G 0 819k 0 0 exists,up
18 node3 9877G 1071G 0 0 0 819k exists,up
19 node3 9221G 1727G 0 0 0 0 exists,up
20 node3 9789G 1159G 0 0 0 819k exists,up
21 node8 15.5T 2718G 3 5229k 5 6565k exists,up
22 node7 12.5T 2004G 3 2816k 6 3392k exists,up
23 node7 12.5T 2072G 0 0 0 0 exists,up
24 node7 13.3T 1269G 0 0 0 819k exists,up
25 node7 12.0T 2520G 0 819k 0 0 exists,up
26 node7 13.3T 1258G 0 14.9k 1 4096k exists,up
27 node5 12.3T 2270G 0 819k 0 819k exists,up
28 node5 12.1T 2410G 1 819k 10 3994k exists,up
29 node5 13.4T 1168G 0 1321k 0 819k exists,up
30 node5 11.9T 2686G 4 3276k 7 2517k exists,up
31 node5 12.9T 1658G 0 467k 0 0 exists,up
32 node5 11.6T 2974G 0 819k 0 0 exists,up
33 node5 12.0T 2525G 1 2457k 6 5752k exists,up
34 node6 12.4T 2113G 0 0 0 1638k exists,up
35 node6 12.3T 2272G 0 404k 0 819k exists,up
36 node6 12.8T 1721G 0 32 0 0 exists,up
37 node6 12.6T 1972G 0 288k 0 0 exists,up
38 node6 12.3T 2240G 1 1057k 1 4096k exists,up
39 node6 13.0T 1546G 0 1912k 0 0 exists,up
40 node6 12.5T 2079G 3 2191k 9 3748k exists,up
41 node8 15.6T 2571G 3 9133k 7 8802k exists,up
42 node8 15.0T 3201G 0 0 0 819k exists,up
43 node8 15.4T 2776G 0 819k 0 1478k exists,up
44 node8 15.6T 2585G 2 2459k 7 3307k exists,up
45 node8 15.0T 3167G 1 1754k 0 819k exists,up
46 node8 15.1T 3141G 1 1017k 3 25.6k exists,up
47 node8 15.1T 3089G 0 596k 0 0 exists,up
48 node8 16.6T 1541G 0 884k 0 819k exists,up
49 node8 14.6T 3645G 0 326k 0 819k exists,up
50 node8 14.9T 3363G 0 0 0 819k exists,up

zhanglu

Lowering weight for node2's fullest disk

ceph osd crush reweight osd.10 9.0

Lowering weight for node3's fullest disk

ceph osd crush reweight osd.14 9.0

Lowering weight for node8's fullest disk

ceph osd crush reweight osd.48 16.0

zhanglu

bash-4.4$ ceph -s
cluster:
id: 807d820b-5c5b-451c-9f52-41b93d5d905a
health: HEALTH_ERR
1 large omap objects
1 clients failing to advance oldest client/flush tid
mon bv is low on available space
full ratio(s) out of order
Low space hindering backfill (add storage if this doesn't resolve itself): 18 pgs backfill_toofull
501 pgs not deep-scrubbed in time
356 pgs not scrubbed in time
1 mgr modules have recently crashed

services:
mon: 3 daemons, quorum bt,bu,bv (age 28h)
mgr: a(active, since 28h), standbys: b
mds: 3/3 daemons up, 3 hot standby
osd: 51 osds: 51 up (since 27h), 51 in (since 27h); 243 remapped pgs

data:
volumes: 1/1 healthy
pools: 4 pools, 2097 pgs
objects: 244.93M objects, 290 TiB
usage: 598 TiB used, 103 TiB / 701 TiB avail
pgs: 15120820/520996304 objects misplaced (2.902%)
1854 active+clean
225 active+remapped+backfilling
18 active+remapped+backfill_toofull

io:
client: 325 MiB/s rd, 135 MiB/s wr, 171 op/s rd, 302 op/s wr
recovery: 215 MiB/s, 154 objects/s

zhanglu

full_ratio 0.98
backfillfull_ratio 0.95
nearfull_ratio 0.95

zhanglu

full_ratio 0.98
backfillfull_ratio 0.97
nearfull_ratio 0.95

zhanglu

18 node3 9771G 1177G 3 0 0 0 exists,full,up

zhanglu

Network v274_harbor Created 0.0s
⠹ Container harbor-log Starting 0.6s
Container harbor-db Created 0.2s
Container registry Created 0.2s
Container registryctl Created 0.2s
Container harbor-portal Created 0.2s
Container redis Created 0.2s
Container harbor-core Created 0.1s
Container nginx Created 0.1s
Container harbor-jobservice Created 0.1s
Error response from daemon: driver failed programming external connectivity on endpoint harbor-log (3fa8de62ae840ea23fe6dd59902fc1cd0fdf2ccd585a0c3a4ecf1cf515268535): Bind for 127.0.0.1:1514 failed: port is already allocated
[root@node1 v2.7.4]# vim docker-compose.yml

zhanglu

ID CLASS WEIGHT REWEIGHT SIZE RAW USE DATA OMAP META AVAIL %USE VAR 0 hdd 10.69240 0.95000 11 TiB 7.6 TiB 7.6 TiB 6.9 GiB 20 GiB 3.1 TiB 71.36 0.84 54 1 hdd 10.69240 0.95000 11 TiB 9.1 TiB 9.0 TiB 14 GiB 23 GiB 1.6 TiB 84.89 1.00 65 2 hdd 10.69240 0.95000 11 TiB 8.4 TiB 8.4 TiB 6.9 GiB 20 GiB 2.3 TiB 78.40 0.93 62 3 hdd 10.69240 0.95000 11 TiB 8.0 TiB 7.9 TiB 14 GiB 21 GiB 2.7 TiB 74.50 0.88 59 4 hdd 10.69240 0.95000 11 TiB 9.1 TiB 9.1 TiB 6.9 GiB 22 GiB 1.6 TiB 85.08 1.01 66 5 hdd 10.69240 0.95000 11 TiB 8.1 TiB 8.1 TiB 14 GiB 22 GiB 2.6 TiB 75.90 0.90 61 6 hdd 10.69240 0.95000 11 TiB 8.2 TiB 8.2 TiB 6 KiB 17 GiB 2.5 TiB 76.97 0.91 60 7 hdd 10.69240 0.95000 11 TiB 9.0 TiB 9.0 TiB 14 GiB 23 GiB 1.7 TiB 84.53 1.00 67 8 hdd 10.69240 0.95000 11 TiB 9.7 TiB 9.7 TiB 7.3 GiB 23 GiB 1011 GiB 90.77 1.07 70 9 hdd 10.69240 0.95000 11 TiB 9.0 TiB 8.9 TiB 6.8 GiB 21 GiB 1.7 TiB 83.72 0.99 65 10 hdd 9.00000 0.95000 11 TiB 9.3 TiB 9.3 TiB 8.9 MiB 21 GiB 1.4 TiB 87.07 1.03 66 11 hdd 10.69240 0.95000 11 TiB 9.3 TiB 9.3 TiB 1 KiB 21 GiB 1.4 TiB 87.25 1.03 67 12 hdd 10.69240 0.95000 11 TiB 9.2 TiB 9.2 TiB 116 KiB 20 GiB 1.5 TiB 85.76 1.01 64 13 hdd 10.69240 0.95000 11 TiB 9.1 TiB 9.0 TiB 7.2 GiB 22 GiB 1.6 TiB 84.66 1.00 65 14 hdd 9.00000 0.95000 11 TiB 9.4 TiB 9.4 TiB 327 KiB 22 GiB 1.3 TiB 88.18 1.04 64 15 hdd 10.69240 0.95000 11 TiB 9.5 TiB 9.5 TiB 14 GiB 25 GiB 1.2 TiB 88.77 1.05 69 16 hdd 10.69240 0.95000 11 TiB 8.5 TiB 8.5 TiB 1 KiB 19 GiB 2.1 TiB 79.95 0.95 60 17 hdd 10.69240 0.95000 11 TiB 9.1 TiB 9.1 TiB 4.1 MiB 21 GiB 1.6 TiB 85.42 1.01 66 18 hdd 10.69240 0.95000 11 TiB 9.6 TiB 9.6 TiB 14 GiB 26 GiB 1.1 TiB 90.17 1.07 69 19 hdd 10.69240 0.95000 11 TiB 8.5 TiB 8.5 TiB 21 GiB 26 GiB 2.1 TiB 79.96 0.95 64 20 hdd 10.69240 0.95000 11 TiB 9.1 TiB 9.1 TiB 1 KiB 21 GiB 1.6 TiB 85.46 1.01 64 27 hdd 14.55269 1.00000 15 TiB 12 TiB 12 TiB 735 KiB 27 GiB 2.8 TiB 80.92 0.96 83 28 hdd 14.55269 1.00000 15 TiB 12 TiB 12 TiB 21 GiB 31 GiB 2.4 TiB 83.18 0.98 87 29 hdd 14.55269 1.00000 15 TiB 13 TiB 13 TiB 7.1 GiB 31 GiB 1.2 TiB 91.87 1.09 95 30 hdd 14.55269 1.00000 15 TiB 12 TiB 12 TiB 14 GiB 30 GiB 2.7 TiB 81.47 0.96 87 31 hdd 14.55269 1.00000 15 TiB 13 TiB 13 TiB 1 KiB 28 GiB 1.8 TiB 87.89 1.04 95 32 hdd 14.55269 1.00000 15 TiB 12 TiB 12 TiB 2.6 MiB 26 GiB 2.8 TiB 80.64 0.95 87 33 hdd 14.55269 1.00000 15 TiB 12 TiB 12 TiB 14 GiB 30 GiB 2.4 TiB 83.45 0.99 91 34 hdd 14.55269 1.00000 15 TiB 13 TiB 12 TiB 6.9 GiB 29 GiB 2.0 TiB 86.13 1.02 91 35 hdd 14.55269 1.00000 15 TiB 12 TiB 12 TiB 14 GiB 31 GiB 2.2 TiB 84.90 1.00 90 36 hdd 14.55269 1.00000 15 TiB 13 TiB 13 TiB 1 KiB 28 GiB 1.6 TiB 88.94 1.05 95 37 hdd 14.55269 1.00000 15 TiB 13 TiB 13 TiB 767 KiB 28 GiB 1.9 TiB 86.81 1.03 91 38 hdd 14.55269 1.00000 15 TiB 12 TiB 12 TiB 21 MiB 27 GiB 2.1 TiB 85.36 1.01 88 39 hdd 14.55269 1.00000 15 TiB 13 TiB 13 TiB 467 KiB 28 GiB 1.6 TiB 89.31 1.06 91 40 hdd 14.55269 1.00000 15 TiB 12 TiB 12 TiB 14 GiB 30 GiB 2.1 TiB 85.53 1.01 92 22 hdd 14.55269 1.00000 15 TiB 13 TiB 13 TiB 7.2 GiB 29 GiB 1.9 TiB 86.71 1.03 90 23 hdd 14.55269 1.00000 15 TiB 12 TiB 12 TiB 1 KiB 27 GiB 2.3 TiB 84.04 0.99 87 24 hdd 14.55269 1.00000 15 TiB 13 TiB 13 TiB 6.9 GiB 30 GiB 1.2 TiB 92.03 1.09 95 25 hdd 14.55269 1.00000 15 TiB 12 TiB 12 TiB 1.5 MiB 27 GiB 2.3 TiB 84.45 1.00 86 26 hdd 14.55269 1.00000 15 TiB 13 TiB 13 TiB 6.9 GiB 30 GiB 1.6 TiB 88.82 1.05 90 21 hdd 18.19040 1.00000 18 TiB 16 TiB 16 TiB 14 GiB 37 GiB 2.6 TiB 85.92 1.02 114 41 hdd 18.19040 1.00000 18 TiB 16 TiB 16 TiB 14 GiB 38 GiB 2.3 TiB 87.56 1.04 113 42 hdd 18.19040 1.00000 18 TiB 15 TiB 15 TiB 7.2 GiB 34 GiB 3.1 TiB 83.17 0.98 110 43 hdd 18.19040 1.00000 18 TiB 16 TiB 15 TiB 1 KiB 33 GiB 2.7 TiB 85.23 1.01 113 44 hdd 18.19040 1.00000 18 TiB 15 TiB 15 TiB 14 GiB 37 GiB 3.2 TiB 82.21 0.97 99 45 hdd 18.19040 1.00000 18 TiB 15 TiB 15 TiB 1 KiB 33 GiB 3.2 TiB 82.50 0.98 105 46 hdd 18.19040 1.00000 18 TiB 15 TiB 15 TiB 14 GiB 37 GiB 2.7 TiB 84.92 1.00 107 47 hdd 18.19040 1.00000 18 TiB 15 TiB 15 TiB 6.8 GiB 35 GiB 3.5 TiB 80.89 0.96 103 48 hdd 16.00000 1.00000 18 TiB 17 TiB 16 TiB 2.2 MiB 35 GiB 1.7 TiB 90.79 1.07 113 49 hdd 18.19040 1.00000 18 TiB 15 TiB 15 TiB 7.1 GiB 33 GiB 3.5 TiB 80.81 0.96 105 50 hdd 18.19040 1.00000 18 TiB 15 TiB 15 TiB 7 KiB 33 GiB 3.4 TiB 81.03 0.96 101 TOTAL 701 TiB 593 TiB 591 TiB 336 GiB 1.4 TiB 108 TiB 84.55 PGS STATUS
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up
up

zhanglu

bash-4.4$ ceph -s
cluster:
id: 807d820b-5c5b-451c-9f52-41b93d5d905a
health: HEALTH_WARN
1 large omap objects
mon bv is low on available space
Low space hindering backfill (add storage if this doesn't resolve itself): 3 pgs backfill_toofull
Degraded data redundancy: 1166183/520639685 objects degraded (0.224%), 2 pgs degraded, 2 pgs undersized
582 pgs not deep-scrubbed in time
427 pgs not scrubbed in time
1 mgr modules have recently crashed

services:
mon: 3 daemons, quorum bt,bu,bv (age 2h)
mgr: a(active, since 44h), standbys: b
mds: 3/3 daemons up, 3 hot standby
osd: 51 osds: 51 up (since 118m), 51 in (since 43h); 147 remapped pgs

data:
volumes: 1/1 healthy
pools: 4 pools, 2097 pgs
objects: 244.76M objects, 290 TiB
usage: 593 TiB used, 108 TiB / 701 TiB avail
pgs: 1166183/520639685 objects degraded (0.224%)
7755848/520639685 objects misplaced (1.490%)
1950 active+clean
142 active+remapped+backfilling
3 active+remapped+backfill_toofull
2 active+undersized+degraded+remapped+backfilling

io:
client: 5.7 MiB/s rd, 689 MiB/s wr, 290 op/s rd, 736 op/s wr
recovery: 151 MiB/s, 0 keys/s, 104 objects/s

zhanglu

稍微调低最满 OSD 的权重，让数据往外迁（例如从 0.95 调到 0.90）

ceph osd reweight 24 0.90
ceph osd reweight 29 0.90

zhanglu

kubectl -n rook-ceph rollout restart deployment rook-ceph-operator

zhanglu

ceph -s
cluster:
id: 807d820b-5c5b-451c-9f52-41b93d5d905a
health: HEALTH_ERR
1 large omap objects
mon bv is low on available space
full ratio(s) out of order
Degraded data redundancy: 1694907/494508308 objects degraded (0.343%), 1 pg degraded, 1 pg undersized
715 pgs not deep-scrubbed in time
621 pgs not scrubbed in time
1 mgr modules have recently crashed

services:
mon: 3 daemons, quorum bt,bu,bv (age 16h)
mgr: b(active, since 16h), standbys: a
mds: 3/3 daemons up, 3 hot standby
osd: 51 osds: 51 up (since 12h), 51 in (since 2d); 106 remapped pgs

data:
volumes: 1/1 healthy
pools: 4 pools, 2097 pgs
objects: 232.41M objects, 279 TiB
usage: 570 TiB used, 132 TiB / 701 TiB avail
pgs: 1694907/494508308 objects degraded (0.343%)
5201810/494508308 objects misplaced (1.052%)
1990 active+clean
105 active+remapped+backfilling
1 active+undersized+degraded+remapped+backfilling
1 active+clean+scrubbing+deep

io:
client: 73 MiB/s rd, 588 MiB/s wr, 159 op/s rd, 855 op/s wr
recovery: 97 MiB/s, 27 keys/s, 68 objects/s

zhanglu

bash-4.4$ ceph status
cluster:
id: 807d820b-5c5b-451c-9f52-41b93d5d905a
health: HEALTH_WARN
1 large omap objects
Degraded data redundancy: 1683124/491472480 objects degraded (0.342%), 1 pg degraded, 1 pg undersized
729 pgs not deep-scrubbed in time
671 pgs not scrubbed in time
1 mgr modules have recently crashed

services:
mon: 3 daemons, quorum bt,bu,bv (age 19h)
mgr: b(active, since 19h), standbys: a
mds: 3/3 daemons up, 3 hot standby
osd: 51 osds: 51 up (since 15h), 51 in (since 2d); 91 remapped pgs

data:
volumes: 1/1 healthy
pools: 4 pools, 2097 pgs
objects: 231.00M objects, 278 TiB
usage: 567 TiB used, 135 TiB / 701 TiB avail
pgs: 1683124/491472480 objects degraded (0.342%)
4532435/491472480 objects misplaced (0.922%)
2004 active+clean
90 active+remapped+backfilling
2 active+clean+scrubbing+deep
1 active+undersized+degraded+remapped+backfilling

io:
client: 22 MiB/s rd, 5.7 MiB/s wr, 137 op/s rd, 1.20k op/s wr
recovery: 84 MiB/s, 10 keys/s, 60 objects/s

zhanglu

bash-4.4$ ceph osd blacklist ls
10.233.92.40:6801/1911285772 2026-05-16T06:42:45.415305+0000
10.233.95.126:6801/423702819 2026-05-16T06:39:29.555227+0000
10.233.92.40:6800/1911285772 2026-05-16T06:42:45.415305+0000
10.233.95.126:6800/423702819 2026-05-16T06:39:29.555227+0000
10.233.96.102:6800/263402740 2026-05-16T06:37:42.398974+0000
10.233.95.112:6800/1005951981 2026-05-15T11:36:21.160738+0000
10.233.95.0:0/1064179451 2026-05-15T11:36:14.378873+0000
10.233.95.0:6801/3909978274 2026-05-15T11:36:14.378873+0000
10.233.95.0:6800/3909978274 2026-05-15T11:36:14.378873+0000
10.233.95.112:6801/1005951981 2026-05-15T11:36:21.160738+0000
10.233.95.0:0/4149376748 2026-05-15T11:36:14.378873+0000
10.233.95.0:0/888298246 2026-05-15T11:36:14.378873+0000
10.233.92.42:6801/4254687 2026-05-15T11:35:40.127336+0000
10.233.92.208:0/3664263079 2026-05-15T11:20:14.004549+0000
10.233.108.135:6801/1731972526 2026-05-15T11:34:04.983882+0000
10.233.92.208:0/2326052718 2026-05-15T11:20:14.004549+0000
10.233.92.208:0/3801167330 2026-05-15T11:20:14.004549+0000
10.233.90.67:6801/835486982 2026-05-16T06:41:10.430971+0000
10.233.96.102:6801/263402740 2026-05-16T06:37:42.398974+0000
10.233.92.208:0/3997222985 2026-05-15T11:20:14.004549+0000
10.233.92.208:6801/3458710516 2026-05-15T11:20:14.004549+0000
10.233.90.67:6800/835486982 2026-05-16T06:41:10.430971+0000
10.233.69.0:0/3888805416 2026-05-15T07:38:43.071704+0000
10.233.95.0:0/2053524312 2026-05-15T11:36:14.378873+0000
10.233.92.208:0/3913625702 2026-05-15T11:20:14.004549+0000
10.233.70.84:6800/2865481930 2026-05-15T11:34:04.953630+0000
10.233.95.253:6800/1685858956 2026-05-15T10:24:56.891807+0000
10.233.70.84:6801/2865481930 2026-05-15T11:34:04.953630+0000
10.233.95.253:6801/1685858956 2026-05-15T10:24:56.891807+0000
10.233.108.135:6800/1731972526 2026-05-15T11:34:04.983882+0000
10.233.69.0:0/779221205 2026-05-15T07:38:43.071521+0000
10.233.92.208:6800/3458710516 2026-05-15T11:20:14.004549+0000
10.233.92.42:6800/4254687 2026-05-15T11:35:40.127336+0000
listed 33 entries
bash-4.4$
bash-4.4$ ceph osd blacklist rm 10.233.92.40:6801/1911285772
un-blocklisting 10.233.92.40:6801/1911285772

zhanglu

ceph osd blacklist rm 10.233.92.40:6801/1911285772
ceph osd blacklist rm 10.233.95.126:6801/423702819
ceph osd blacklist rm 10.233.92.40:6800/1911285772
ceph osd blacklist rm 10.233.95.126:6800/423702819
ceph osd blacklist rm 10.233.96.102:6800/263402740
ceph osd blacklist rm 10.233.95.112:6800/1005951981
ceph osd blacklist rm 10.233.95.0:0/1064179451
ceph osd blacklist rm 10.233.95.0:6801/3909978274
ceph osd blacklist rm 10.233.95.0:6800/3909978274
ceph osd blacklist rm 10.233.95.112:6801/1005951981
ceph osd blacklist rm 10.233.95.0:0/4149376748
ceph osd blacklist rm 10.233.95.0:0/888298246
ceph osd blacklist rm 10.233.92.42:6801/4254687
ceph osd blacklist rm 10.233.92.208:0/3664263079
ceph osd blacklist rm 10.233.108.135:6801/1731972526
ceph osd blacklist rm 10.233.92.208:0/2326052718
ceph osd blacklist rm 10.233.92.208:0/3801167330
ceph osd blacklist rm 10.233.90.67:6801/835486982
ceph osd blacklist rm 10.233.96.102:6801/263402740
ceph osd blacklist rm 10.233.92.208:0/3997222985
ceph osd blacklist rm 10.233.92.208:6801/3458710516
ceph osd blacklist rm 10.233.90.67:6800/835486982
ceph osd blacklist rm 10.233.69.0:0/3888805416
ceph osd blacklist rm 10.233.95.0:0/2053524312
ceph osd blacklist rm 10.233.92.208:0/3913625702
ceph osd blacklist rm 10.233.70.84:6800/2865481930
ceph osd blacklist rm 10.233.95.253:6800/1685858956
ceph osd blacklist rm 10.233.70.84:6801/2865481930
ceph osd blacklist rm 10.233.95.253:6801/1685858956
ceph osd blacklist rm 10.233.108.135:6800/1731972526
ceph osd blacklist rm 10.233.69.0:0/779221205
ceph osd blacklist rm 10.233.92.208:6800/3458710516
ceph osd blacklist rm 10.233.92.42:6800/4254687

zhanglu

2026-05-15 07:27:09.479984 I | op-osd: waiting... 5 of 6 OSD prepare jobs have finished processing and 49 of 51 OSDs have been updated
2026-05-15 07:27:10.683717 I | op-osd: OSD 18 is not ok-to-stop. will try updating it again later
2026-05-15 07:27:11.167655 I | clusterdisruption-controller: all "host" failure domains: [node1 node2 node3 node5 node6 node7 node8]. osd is down in failure domain: "". active node drains: false. pg health: "cluster is not fully clean. PGs: [{StateName:active+clean Count:2007} {StateName:active+remapped+backfilling Count:87} {StateName:active+clean+scrubbing+deep Count:2} {StateName:active+undersized+degraded+remapped+backfilling Count:1}]"
2026-05-15 07:27:11.901951 I | op-osd: OSD 46 is not ok-to-stop. will try updating it again later
2026-05-15 07:27:12.683286 I | clusterdisruption-controller: all "host" failure domains: [node1 node2 node3 node5 node6 node7 node8]. osd is down in failure domain: "". active node drains: false. pg health: "cluster is not fully clean. PGs: [{StateName:active+clean Count:2007} {StateName:active+remapped+backfilling Count:87} {StateName:active+clean+scrubbing+deep Count:2} {StateName:active+undersized+degraded+remapped+backfilling Count:1}]"
2026-05-15 07:27:13.194962 I | op-osd: OSD 18 is not ok-to-stop. will try updating it again later
2026-05-15 07:27:14.432436 I | op-osd: OSD 46 is not ok-to-stop. will try updating it again later
2026-05-15 07:27:15.627441 I | op-osd: OSD 18 is not ok-to-stop. will try updating it again later
2026-05-15 07:27:16.947802 I | op-osd: OSD 46 is not ok-to-stop. will try updating it again later
2026-05-15 07:27:18.279735 I | op-osd: OSD 18 is not ok-to-stop. will try updating it again later
2026-05-15 07:27:19.444455 I | op-osd: OSD 46 is not ok-to-stop. will try updating it again later
2026-05-15 07:27:20.563726 I | op-osd: OSD 18 is not ok-to-stop. will try updating it

tmp

Lowering weight for node2's fullest disk

Lowering weight for node3's fullest disk

Lowering weight for node8's fullest disk

稍微调低最满 OSD 的权重，让数据往外迁（例如从 0.95 调到 0.90）