✎ P3855 etcd_recovery_generator.py
Page Menu
Phabricator
Paste
P3855
etcd_recovery_generator.py
Active
Public
Actions
Authored by
Joe
on Aug 19 2016, 4:11 PM.
Edit Paste
Archive Paste
View Raw File
Mute Notifications
Tags
SRE
Referenced Files
F23830013: etcd_recovery_generator.py
Jul 17 2018, 6:06 AM
2018-07-17 06:06:13 (UTC+0)
F23830005: etcd_recovery_generator.py
Jul 17 2018, 6:05 AM
2018-07-17 06:05:51 (UTC+0)
F4377073: etcd_recovery_generator.py
Aug 19 2016, 4:11 PM
2016-08-19 16:11:34 (UTC+0)
Subscribers
None
import
json
import
logging
from
urlparse
import
urlparse
log
logging
getLogger
__name__
## Globals
etcd_clusters
'eqiad.wmnet'
'conf1001'
'http://conf1001.eqiad.wmnet:2380'
'conf1002'
'http://conf1002.eqiad.wmnet:2380'
'conf1003'
'http://conf1003.eqiad.wmnet:2380'
},
'codfw.wmnet'
'conf2001'
'http://conf2001.eqiad.wmnet:2380'
'conf2002'
'http://conf2002.eqiad.wmnet:2380'
'conf2003'
'http://conf2003.eqiad.wmnet:2380'
},
recovery
"/tmp/etcd-recovery"
class
Generator
object
):
def
__init__
self
cluster_name
master
):
self
cluster_name
cluster_name
self
master
master
self
cluster
etcd_clusters
cluster_name
self
current_cluster
""
self
etcd_dir
'/var/lib/etcd/etcd-'
self
cluster_name
def
fqdn
self
host
):
return
urlparse
self
cluster
host
])
netloc
split
':'
)[
def
ssh
self
host
cmd
):
return
"ssh {} {}"
format
self
fqdn
host
),
cmd
def
adv_client_url
self
host
):
return
"https://{}:2379"
format
self
fqdn
host
))
def
listen_client_url
self
):
return
"http://127.0.0.1:2378"
def
stop_etcd_service
self
):
for
host
in
self
cluster
keys
():
log
info
'Stopping etcd on
%s
host
self
ssh
host
'sudo systemctl stop etcd.service'
def
launch_temp_etcd
self
):
"""
Copies the latest backup to the recovery dir, starts etcd there
"""
peer_url
self
cluster
self
master
listen_client_url
self
listen_client_url
()
adv_client_url
self
adv_client_url
self
master
self
ssh
self
master
'sudo cp -ax /srv/backups/etcd/etcd-{0}-backup {1}'
format
self
cluster_name
recovery
self
ssh
self
master
'sudo chown -R etcd:etcd '
recovery
data
'name'
self
master
'data_dir'
recovery
'listen'
listen_client_url
'adv'
adv_client_url
'peer'
peer_url
'args'
"--force-new-cluster"
etcd_cmd
self
_etcd_cmd
data
self
ssh
self
master
etcd_cmd
def
_etcd_cmd
self
data
):
return
"""sudo -u etcd etcd --data-dir {data_dir} --name {name} \
-initial-advertise-peer-urls {peer} \
-listen-peer-urls {peer} \
-listen-client-urls {listen} \
-advertise-client-urls {adv} \
{args}
"""
format
**
data
def
_curl
self
url
method
'GET'
req
None
):
client_url
self
listen_client_url
()
'/v2'
url
cmd
"curl {} -L -X {}"
format
client_url
method
if
req
is
not
None
cmd
+=
' -H "Content-Type: application/json" -d
\'
{}
\'
format
json
dumps
req
))
return
cmd
def
_etcdctl
self
cmd
username
None
):
if
username
usr
"--username "
username
else
usr
""
etcdctl
"etcdctl {} --endpoint {} {}"
format
usr
self
listen_client_url
(),
cmd
return
etcdctl
def
change_etcd_peer_url
self
):
req
'peerURLs'
etcd_clusters
self
cluster_name
][
self
master
]]
"member=$({} | grep {} | cut -d\: -f1)"
format
self
_etcdctl
"member list"
),
self
master
self
_curl
'/members/$member'
method
'PUT'
req
req
self
current_cluster
+=
"{}={}"
format
self
master
self
cluster
self
master
])
def
move_temp_dir
self
):
# kill any etcd running on the server
self
ssh
self
master
'sudo killall -15 etcd'
self
wipe_etcd_dir
self
master
self
ssh
self
master
'sudo mv {} {}'
format
recovery
self
etcd_dir
))
def
wipe_etcd_dir
self
host
):
cmd
'sudo rm -rf {}'
format
self
etcd_dir
self
ssh
host
cmd
def
add_to_cluster
self
host
):
self
ssh
self
master
self
_etcdctl
"member add {} {}"
format
host
self
cluster
host
]),
))
self
current_cluster
+=
',{}={}'
format
host
self
cluster
host
])
def
start_etcd
self
host
):
data
'name'
host
'data_dir'
self
etcd_dir
'listen'
self
listen_client_url
(),
'adv'
self
adv_client_url
host
),
'peer'
self
cluster
host
],
'args'
"--initial-cluster-state existing "
"--initial-cluster {}"
format
self
current_cluster
etcd_cmd
self
_etcd_cmd
data
self
ssh
host
etcd_cmd
def
enable_auth
self
):
self
ssh
self
master
self
_etcdctl
"auth enable"
))
def
main
():
import
sys
cluster_name
sys
argv
master
sys
argv
gen
Generator
cluster_name
master
"""#############################
# ETCD Recovery instructions (generated via etcd_recovery)
# Cluster : {cluster_name}
# Master : {master}
# Nodes: {nodes}
#############################
"""
format
**
'cluster_name'
cluster_name
'master'
master
'nodes'
gen
cluster
})
"""
### STEP 1: stop etcd across the cluster.
"""
gen
stop_etcd_service
()
"""
### STEP 2: set up the new master from its backup
"""
"# Now launch the temporary etcd master from backup"
gen
launch_temp_etcd
()
"# When it works, SSH TO THE MASTER AND launch the following"
gen
change_etcd_peer_url
()
"# Now kill the original etcd running in the original shell, and start it from the right position"
gen
move_temp_dir
()
gen
start_etcd
gen
master
"""
### STEP 3: Add back and start the other nodes
"""
for
host
in
gen
cluster
keys
():
if
host
==
master
continue
gen
add_to_cluster
host
gen
wipe_etcd_dir
host
gen
start_etcd
host
#gen.enable_auth()
if
__name__
==
'__main__'
main
()
Event Timeline
Joe
created this paste.
Aug 19 2016, 4:11 PM
2016-08-19 16:11:34 (UTC+0)
Joe
mentioned this in
T135129: Create backup/restore scripts for etcd
Joe
edited the content of this paste.
(Show Details)
Jul 17 2018, 6:05 AM
2018-07-17 06:05:51 (UTC+0)
Joe
edited the content of this paste.
(Show Details)
joanna_borun
mentioned this in
T203944: Create a spicerack cookbook for restoring an etcd cluster from backups
Jun 15 2022, 10:42 AM
2022-06-15 10:42:20 (UTC+0)
Log In to Comment
Content licensed under Creative Commons Attribution-ShareAlike (CC BY-SA) 4.0 unless otherwise noted; code licensed under GNU General Public License (GPL) 2.0 or later and other open source licenses. By using this site, you agree to the Terms of Use, Privacy Policy, and Code of Conduct.
Wikimedia Foundation
Code of Conduct
Disclaimer
CC-BY-SA
GPL
Credits
US