ANF CEPH 2022 du 03 au 07/10/2022
Sébastien Geiger

## Création d'un volume RBD

# création du pool prbd en mode réplication
[ceph: root@ceph1 /]# ceph osd pool create prbd 16  
pool 'prbd' created
[ceph: root@ceph1 /]# ceph osd pool application enable prbd rbd
enabled application 'rbd' on pool 'prbd'
# création d'un volume rbd de 5Go
[ceph: root@ceph1 /]# rbd create --size 5G --image-feature layering,exclusive-lock prbd/foo
[ceph: root@ceph1 /]#  rbd info prbd/foo
rbd image 'foo':
	size 5 GiB in 1280 objects
	order 22 (4 MiB objects)
	snapshot_count: 0
	id: 5e9f254199fe
	block_name_prefix: rbd_data.5e9f254199fe
	format: 2
	features: layering, exclusive-lock
	op_features: 
	flags: 
	create_timestamp: Thu Aug 11 13:24:42 2022
	access_timestamp: Thu Aug 11 13:24:42 2022
	modify_timestamp: Thu Aug 11 13:24:42 2022

# espace disque du cluster	
[ceph: root@ceph1 /]#  ceph df
--- RAW STORAGE ---
CLASS  SIZE     AVAIL    USED    RAW USED  %RAW USED
hdd    280 GiB  274 GiB  20 MiB   6.0 GiB       2.15
TOTAL  280 GiB  274 GiB  20 MiB   6.0 GiB       2.15
 
--- POOLS ---
POOL                   ID  PGS  STORED  OBJECTS  USED     %USED  MAX AVAIL
device_health_metrics   1    1     0 B        0      0 B      0     86 GiB
prbd                    2   16    35 B        4  384 KiB      0     86 GiB

# remarque : avec seulement 3 nodes, il n'est pas possible de creer des pool en erasure code.
# on ferra cela apres l'agrandissement du cluster avec l'ajout d'un node suplémentaire.
[ceph: root@ceph1 /]# exit
exit
[almalinux@ceph1 ~]$ 


# préparation du client
# créé la clé pour le client pour l'acces au pool prbd
[almalinux@ceph1 ~]$ sudo cephadm shell ceph auth get-or-create client.prbd mon 'profile rbd' osd 'profile rbd pool=prbd, profile rbd pool=prbdec' > ceph.client.prbd.keyring
Inferring fsid 92459a10-1975-11ed-9374-fa163e5fdb7c
Inferring config /var/lib/ceph/92459a10-1975-11ed-9374-fa163e5fdb7c/mon.ceph1/config
Using recent ceph image docker.io/ceph/ceph@sha256:056637972a107df4096f10951e4216b21fcd8ae0b9fb4552e628d35df3f61139
# vérification de la structure du fichier
[almalinux@ceph1 ~]$ cat ./ceph.client.prbd.keyring 
[client.prbd]
	key = AQAEBPViMLYnDBAA7IUOkLh8Kp5/vXUQO4lzUg==

# remarque: la clé ne doit être accessible qu'à ceph. Dans ce TP on bypass quelques règles de sécurité et d'isolation ;)
# install repo ceph-octopus
[almalinux@ceph1 ~]$ ssh root@cephclt dnf install -y centos-release-ceph-octopus.noarch
...
Downloading Packages:
(1/2): centos-release-storage-common-2-2.el8.no 166 kB/s | 9.2 kB     00:00    
(2/2): centos-release-ceph-octopus-1.0-1.el8.no 150 kB/s | 8.6 kB     00:00    
...
Complete!
[almalinux@ceph1 ~]$ 

# install du binaire sur le client
[almalinux@ceph1 ~]$  ssh root@cephclt dnf install -y ceph-common
# vérifie ceph.conf
[almalinux@ceph1 ~]$  ls /etc/ceph
ceph.client.admin.keyring  ceph.conf  ceph.pub
# copie ceph.conf sur le client
[almalinux@ceph1 ~]$ scp /etc/ceph/ceph.conf root@cephclt:/etc/ceph
ceph.conf                                100%  175   144.7KB/s   00:00    
# copie de la clé pour l'accès au pool prbd sur le client
[almalinux@ceph1 ~]$ scp ./ceph.client.prbd.keyring root@cephclt:/etc/ceph/
ceph.client.prbd.keyring                 100%   62    54.9KB/s   00:00    

## montage du volume rbd
[almalinux@ceph1 ~]$  ssh root@cephclt
# montage image rbd (réplication)
[root@cephclt ~]# rbd -n client.prbd device map prbd/foo
/dev/rbd0
[root@cephclt ~]# rbd -n client.prbd device ls
id  pool  namespace  image  snap  device   
0   prbd             foo    -     /dev/rbd0
#formater le device bloc avec xfs
[root@cephclt ~]#  mkfs.xfs /dev/rbd/prbd/foo
meta-data=/dev/rbd/prbd/foo      isize=512    agcount=8, agsize=163840 blks
         =                       sectsz=512   attr=2, projid32bit=1
         =                       crc=1        finobt=1, sparse=1, rmapbt=0
         =                       reflink=1    bigtime=0 inobtcount=0
data     =                       bsize=4096   blocks=1310720, imaxpct=25
         =                       sunit=16     swidth=16 blks
naming   =version 2              bsize=4096   ascii-ci=0, ftype=1
log      =internal log           bsize=4096   blocks=2560, version=2
         =                       sectsz=512   sunit=16 blks, lazy-count=1
realtime =none                   extsz=4096   blocks=0, rtextents=0
Discarding blocks...Done.
# montage
[root@cephclt ~]# mkdir /mnt/rbd
[root@cephclt ~]# mount /dev/rbd0 /mnt/rbd
[root@cephclt ~]# df -h /mnt/rbd
Filesystem      Size  Used Avail Use% Mounted on
/dev/rbd0       5.0G   69M  5.0G   2% /mnt/rbd

[root@cephclt ~]# echo "init 5go" >/mnt/rbd/log.txt
[root@cephclt ~]# rbd -n client.prbd resize --size 10G prbd/foo
Resizing image: 100% complete...done.
[root@cephclt ~]# xfs_growfs /mnt/rbd
meta-data=/dev/rbd0              isize=512    agcount=16, agsize=163840 blks
         =                       sectsz=512   attr=2, projid32bit=1
         =                       crc=1        finobt=1, sparse=1, rmapbt=0
         =                       reflink=1    bigtime=0 inobtcount=0
data     =                       bsize=4096   blocks=2621440, imaxpct=25
         =                       sunit=16     swidth=16 blks
naming   =version 2              bsize=4096   ascii-ci=0, ftype=1
log      =internal log           bsize=4096   blocks=2560, version=2
         =                       sectsz=512   sunit=16 blks, lazy-count=1
realtime =none                   extsz=4096   blocks=0, rtextents=0
[root@cephclt ~]# df -h /mnt/rbd
Filesystem      Size  Used Avail Use% Mounted on
/dev/rbd0        10G  105M  9.9G   2% /mnt/rbd
[root@cephclt ~]# echo "resize 10go" >>/mnt/rbd/log.txt

[root@cephclt ~]# df -h /mnt/rbd
Filesystem      Size  Used Avail Use% Mounted on
/dev/rbd0        10G  105M  9.9G   2% /mnt/rbd
[root@cephclt ~]#  rbd -n client.prbd ls -l prbd
NAME  SIZE    PARENT  FMT  PROT  LOCK
foo   10 GiB            2        excl

# Remarque : il y a un verrou sur l’image pour éviter qu’un autre client ne monte une image déjà utilisée, ceci afin d’éviter de corrompre les données.
# Remarque: l'utilisateur prbd peux augmenter ou diminuer la taille d'un volume rbd du pool prbd.
# Pour éviter les débordements, il faut que l'admin définisse en un quota au pool prbd.

# démontage du volume rbd

[root@cephclt ~]# umount /mnt/rbd
[root@cephclt ~]# rbd -n client.prbd device unmap prbd/foo
[root@cephclt ~]# rbd -n client.prbd device ls


## Montage des images RBD au démarrage du système

# Editer le fichier /etc/ceph/rbdmap pour rajouter une ligne de configuration contenant le pool/image et la clé d'acces
[root@cephclt ~]#  cat /etc/ceph/rbdmap
# RbdDevice		Parameters
#poolname/imagename	id=client,keyring=/etc/ceph/ceph.client.keyring
prbd/foo id=prbd,keyring=/etc/ceph/ceph.client.prbd.keyring

[root@cephclt ~]# rbdmap map
[root@cephclt ~]# rbd device ls
id  pool  namespace  image  snap  device   
0   prbd             foo    -     /dev/rbd0
[root@cephclt ~]# rbdmap unmap
[root@cephclt ~]# rbd device ls
[root@cephclt ~]# systemctl enable rbdmap
Created symlink /etc/systemd/system/multi-user.target.wants/rbdmap.service → /usr/lib/systemd/system/rbdmap.service.
[root@cephclt ~]# systemctl start rbdmap
[root@cephclt ~]# rbd device ls
id  pool  namespace  image  snap  device   
0   prbd             foo    -     /dev/rbd0
[root@cephclt ~]# echo /dev/rbd/prbd/foo /mnt/rbd xfs noauto 0 0 >>/etc/fstab
[root@cephclt ~]# rbdmap unmap
[root@cephclt ~]# df -h |grep mnt
[root@cephclt ~]# rbdmap map
[root@cephclt ~]# df -h |grep mnt
/dev/rbd0        10G  105M  9.9G   2% /mnt/rbd

# yes, ca marche ;)


## Gestion des snapshots

[root@cephclt ~]# echo version1 >>/mnt/rbd/fichier.txt
[root@cephclt ~]# xfs_freeze -f /mnt/rbd
[root@cephclt ~]# rbd -n client.prbd snap create prbd/foo@snapv1
[root@cephclt ~]# xfs_freeze -u /mnt/rbd
[root@cephclt ~]# rbd -n client.prbd snap ls prbd/foo
SNAPID  NAME    SIZE    PROTECTED  TIMESTAMP               
     4  snapv1  10 GiB             Thu Aug 11 14:53:36 2022
[root@cephclt ~]# echo version2 >>/mnt/rbd/fichier.txt
[root@cephclt ~]# cat /mnt/rbd/fichier.txt
version1
version2

# retour à l'état initiale
[root@cephclt ~]# rbd -n client.prbd status prbd/foo
Watchers:
	watcher=172.16.7.215:0/1393234639 client.14400 cookie=18446462598732840961
# l'image est utilise, il faut la démonter
[root@cephclt ~]# rbdmap unmap
[root@cephclt ~]# rbd -n client.prbd snap rollback prbd/foo@snapv1
Rolling back to snapshot: 100% complete...done.
[root@cephclt ~]# rbdmap map
[root@cephclt ~]# cat /mnt/rbd/fichier.txt
version1
[root@cephclt ~]# 
# Remarque: c'est ok on reviens à la version initiale, mais l'ensemble des modifications sont perdues.

## Gestion des Clones

[root@cephclt ~]# rbd -n client.prbd snap protect prbd/foo@snapv1
# Vous ne pouvez plus supprimer un instantané protégé.

[root@cephclt ~]# cat /mnt/rbd/fichier.txt
version1
# modification du fichier
[root@cephclt ~]# echo version2 >>/mnt/rbd/fichier.txt 
[root@cephclt ~]# rbd -n client.prbd clone prbd/foo@snapv1 prbd/fooclone
[root@cephclt ~]# mkdir /opt/fooclone
[root@cephclt ~]# rbd -n client.prbd map prbd/fooclone
/dev/rbd1
[root@cephclt ~]# mount -t xfs -o rw,nouuid /dev/rbd/prbd/fooclone /opt/fooclone
[root@cephclt ~]# cat /opt/fooclone/fichier.txt
version1
# On retrouve la version initiale du fichier
# il est possible de restaurer uniquement les fichiers nécéssaires ;)

[root@cephclt ~]# echo version3>> /opt/fooclone/fichier.txt
[root@cephclt ~]#  cat /opt/fooclone/fichier.txt
version1
version3
# un clone est modifiable
#liste des clones d'un snapshot
[root@cephclt ~]# rbd -n client.prbd children prbd/foo@snapv1
prbd/fooclone
#suppression d’un snap
[root@cephclt ~]# rbd -n client.prbd snap rm prbd/foo@snapv1
Removing snap: 0% complete...failed.
2022-08-11T15:10:13.590+0000 7fb5fe325500 -1 librbd::Operations: snapshot is protected
rbd: snapshot 'snapv1' is protected from removal.
# pour faire un clone, il faut protéger le snapshot contre sa suppression ;)
[root@cephclt ~]# umount /opt/fooclone/
[root@cephclt ~]# rbd -n client.prbd unmap prbd/fooclone
[root@cephclt ~]# rbd -n client.prbd snap unprotect prbd/foo@snapv1
2022-08-11T15:10:31.367+0000 7f6e7e7fc700 -1 librbd::SnapshotUnprotectRequest: cannot unprotect: at least 1 child(ren) [3891dfcdae86] in pool 'prbd'
...
[root@cephclt ~]#  rbd -n client.prbd rm prbd/fooclone
Removing image: 100% complete...done.
[root@cephclt ~]# rbd -n client.prbd snap unprotect prbd/foo@snapv1
[root@cephclt ~]# rbd -n client.prbd snap rm prbd/foo@snapv1
Removing snap: 100% complete...done.
# Et voila ;)

#purger tous les snap d'une image
[root@cephclt ~]# rbd -n client.prbd snap purge prbd/foo
[root@cephclt ~]# 

## agrandissement cluster
il est possible d'agrandir un cluster en ajoutant des nouveaux nodes.
les opérations peuvent être réalisé en productions sans devoir démonter les ressources déjà fournits.

[almalinux@ceph1 ~]$ sudo cephadm shell
Inferring fsid 92459a10-1975-11ed-9374-fa163e5fdb7c
Inferring config /var/lib/ceph/92459a10-1975-11ed-9374-fa163e5fdb7c/mon.ceph1/config
Using recent ceph image docker.io/ceph/ceph@sha256:056637972a107df4096f10951e4216b21fcd8ae0b9fb4552e628d35df3f61139
[ceph: root@ceph1 /]# ceph df
--- RAW STORAGE ---
CLASS  SIZE     AVAIL    USED    RAW USED  %RAW USED
hdd    280 GiB  274 GiB  36 MiB   6.0 GiB       2.16
TOTAL  280 GiB  274 GiB  36 MiB   6.0 GiB       2.16
 
--- POOLS ---
POOL                   ID  PGS  STORED   OBJECTS  USED    %USED  MAX AVAIL
device_health_metrics   1    1      0 B        0     0 B      0     86 GiB
prbd                    2   16  3.8 MiB       23  14 MiB      0     86 GiB

#liste des nodes
[ceph: root@ceph1 /]# ceph orch host ls
HOST   ADDR   LABELS  STATUS  
ceph1  ceph1                  
ceph2  ceph2                  
ceph3  ceph3                  
[ceph: root@ceph1 /]# ceph orch host add ceph4
Added host 'ceph4'

#remarque: les disques libres seront ajouter au cluster automatiquement

[ceph: root@ceph1 /]# ceph -W cephadm
  cluster:
    id:     92459a10-1975-11ed-9374-fa163e5fdb7c
    health: HEALTH_OK
 
  services:
    mon: 3 daemons, quorum ceph1,ceph2,ceph3 (age 2h)
    mgr: ceph1.inxizw(active, since 2h), standbys: ceph2.nwehoh
    osd: 6 osds: 6 up (since 2h), 6 in (since 2h)
 
  data:
    pools:   2 pools, 17 pgs
    objects: 23 objects, 7.7 MiB
    usage:   6.0 GiB used, 274 GiB / 280 GiB avail
    pgs:     17 active+clean
 

2022-08-11T15:39:56.663752+0000 mgr.ceph1.inxizw [INF] refreshing ceph4 facts
2022-08-11T15:39:57.215462+0000 mgr.ceph1.inxizw [INF] Deploying daemon crash.ceph4 on ceph4
2022-08-11T15:39:59.324703+0000 mgr.ceph1.inxizw [INF] deploying 3 monitor(s) instead of 4 so monitors may achieve consensus
2022-08-11T15:39:59.325815+0000 mgr.ceph1.inxizw [INF] Deploying daemon node-exporter.ceph4 on ceph4
2022-08-11T15:40:04.533120+0000 mgr.ceph1.inxizw [INF] Applying drive group all-available-devices on host ceph1...
2022-08-11T15:40:04.533950+0000 mgr.ceph1.inxizw [INF] Applying drive group all-available-devices on host ceph2...
2022-08-11T15:40:04.534697+0000 mgr.ceph1.inxizw [INF] Applying drive group all-available-devices on host ceph3...
2022-08-11T15:40:04.535464+0000 mgr.ceph1.inxizw [INF] Applying drive group all-available-devices on host ceph4...
2022-08-11T15:40:15.656440+0000 mgr.ceph1.inxizw [INF] Deploying daemon osd.6 on ceph4
2022-08-11T15:40:18.631700+0000 mgr.ceph1.inxizw [INF] Deploying daemon osd.7 on ceph4
2022-08-11T15:40:27.455440+0000 mgr.ceph1.inxizw [INF] deploying 3 monitor(s) instead of 4 so monitors may achieve consensus
2022-08-11T15:40:27.461201+0000 mgr.ceph1.inxizw [INF] Applying drive group all-available-devices on host ceph1...
2022-08-11T15:40:27.461580+0000 mgr.ceph1.inxizw [INF] Applying drive group all-available-devices on host ceph2...
2022-08-11T15:40:27.462021+0000 mgr.ceph1.inxizw [INF] Applying drive group all-available-devices on host ceph3...
2022-08-11T15:40:27.462818+0000 mgr.ceph1.inxizw [INF] Applying drive group all-available-devices on host ceph4...
2022-08-11T15:40:31.193167+0000 mgr.ceph1.inxizw [INF] Reconfiguring prometheus.ceph1 (dependencies changed)...
2022-08-11T15:40:31.200429+0000 mgr.ceph1.inxizw [INF] Reconfiguring daemon prometheus.ceph1 on ceph1
2022-08-11T15:40:37.651610+0000 mgr.ceph1.inxizw [INF] refreshing ceph1 facts
2022-08-11T15:40:38.115940+0000 mgr.ceph1.inxizw [INF] deploying 3 monitor(s) instead of 4 so monitors may achieve consensus
2022-08-11T15:40:38.119269+0000 mgr.ceph1.inxizw [INF] Applying drive group all-available-devices on host ceph1...
2022-08-11T15:40:38.120099+0000 mgr.ceph1.inxizw [INF] Applying drive group all-available-devices on host ceph2...
2022-08-11T15:40:38.120586+0000 mgr.ceph1.inxizw [INF] Applying drive group all-available-devices on host ceph3...
2022-08-11T15:40:38.120765+0000 mgr.ceph1.inxizw [INF] Applying drive group all-available-devices on host ceph4...
^C
# liste des osd, ceph4 a été intégrer au cluster
[ceph: root@ceph1 /]# ceph osd tree
ID  CLASS  WEIGHT   TYPE NAME       STATUS  REWEIGHT  PRI-AFF
-1         0.37115  root default                             
-3         0.08789      host ceph1                           
 0    hdd  0.03909          osd.0       up   1.00000  1.00000
 3    hdd  0.04880          osd.3       up   1.00000  1.00000
-7         0.08789      host ceph2                           
 2    hdd  0.03909          osd.2       up   1.00000  1.00000
 4    hdd  0.04880          osd.4       up   1.00000  1.00000
-5         0.09769      host ceph3                           
 1    hdd  0.03909          osd.1       up   1.00000  1.00000
 5    hdd  0.05859          osd.5       up   1.00000  1.00000
-9         0.09769      host ceph4                           
 6    hdd  0.03909          osd.6       up   1.00000  1.00000
 7    hdd  0.05859          osd.7       up   1.00000  1.00000
[ceph: root@ceph1 /]# ceph -s
  cluster:
    id:     92459a10-1975-11ed-9374-fa163e5fdb7c
    health: HEALTH_OK
 
  services:
    mon: 3 daemons, quorum ceph1,ceph2,ceph3 (age 2h)
    mgr: ceph1.inxizw(active, since 2h), standbys: ceph2.nwehoh
    osd: 8 osds: 8 up (since 2m), 8 in (since 2m)
 
  data:
    pools:   2 pools, 17 pgs
    objects: 23 objects, 7.7 MiB
    usage:   8.1 GiB used, 372 GiB / 380 GiB avail
    pgs:     17 active+clean

# remarque : 4 nodes avec 8 osds
[ceph: root@ceph1 /]# ceph df
--- RAW STORAGE ---
CLASS  SIZE     AVAIL    USED    RAW USED  %RAW USED
hdd    380 GiB  372 GiB  56 MiB   8.1 GiB       2.12
TOTAL  380 GiB  372 GiB  56 MiB   8.1 GiB       2.12
 
--- POOLS ---
POOL                   ID  PGS  STORED   OBJECTS  USED    %USED  MAX AVAIL
device_health_metrics   1    1      0 B        0     0 B      0    117 GiB
prbd                    2   16  3.8 MiB       23  14 MiB      0    117 GiB


# gain de 280Go à 380Go en ajoutant 40+60Go


# création du pool prbdec en mode erasure code avec k=2 et m=2
[ceph: root@ceph1 /]# ceph osd pool create prbdec erasure
pool 'prbdec' created

[ceph: root@ceph1 /]# ceph osd pool application enable prbdec rbd
enabled application 'rbd' on pool 'prbdec'

[ceph: root@ceph1 /]# ceph osd pool set prbdec allow_ec_overwrites true
set pool 3 allow_ec_overwrites to true

# création de l'image en mode erasure code.
# remarque : l'image est dans le pool prbd et les data sont dans le pool prbdec
[ceph: root@ceph1 /]# rbd create --size 5G --image-feature layering,exclusive-lock prbd/foo-ec --data-pool prbdec
[ceph: root@ceph1 /]# rbd info prbd/foo-ec
rbd image 'foo-ec':
	size 5 GiB in 1280 objects
	order 22 (4 MiB objects)
	snapshot_count: 0
	id: 5f277e5f2656
	data_pool: prbdec
	block_name_prefix: rbd_data.2.5f277e5f2656
	format: 2
	features: layering, exclusive-lock, data-pool
	op_features: 
	flags: 
	create_timestamp: Thu Aug 18 18:07:57 2022
	access_timestamp: Thu Aug 18 18:07:57 2022
	modify_timestamp: Thu Aug 18 18:07:57 2022

# remarque : on retrouve bien l'info data_pool: prbdec
	
[ceph: root@ceph1 /]#  ceph df
--- RAW STORAGE ---
CLASS  SIZE     AVAIL    USED     RAW USED  %RAW USED
hdd    380 GiB  372 GiB  112 MiB   8.1 GiB       2.13
TOTAL  380 GiB  372 GiB  112 MiB   8.1 GiB       2.13
 
--- POOLS ---
POOL                   ID  PGS  STORED   OBJECTS  USED     %USED  MAX AVAIL
device_health_metrics   1    1      0 B        0      0 B      0    117 GiB
prbd                    2   16  3.8 MiB       25   14 MiB      0    117 GiB
prbdec                  3   32    8 KiB        1  256 KiB      0    176 GiB

# Remarque : l'espace n'est consommé qu'à l'utilisation.
# Remarque : on voit la différence d'espace disponible entre la replication x3 et l'erasure code k=2,m=2

# exit du container
[ceph: root@ceph1 /]# exit
exit
# connexion au client
[almalinux@ceph1 ~]$ ssh root@cephclt


# montage image rbdec (erasure code)
[root@cephclt ~]# mkdir /mnt/rbdec
[root@cephclt ~]# rbd -n client.prbd device map prbd/foo-ec
/dev/rbd1
# remarque: on indique seulement le nom de l'image. pas besion d'indiquer le nom du pool ec pour les datas
[root@cephclt ~]# rbd -n client.prbd device ls
id  pool  namespace  image   snap  device   
0   prbd             foo     -     /dev/rbd0
1   prbd             foo-ec  -     /dev/rbd1
[root@cephclt ~]# mkfs.xfs /dev/rbd/prbd/foo-ec 
meta-data=/dev/rbd/prbd/foo-ec   isize=512    agcount=8, agsize=163840 blks
         =                       sectsz=512   attr=2, projid32bit=1
         =                       crc=1        finobt=1, sparse=1, rmapbt=0
         =                       reflink=1
data     =                       bsize=4096   blocks=1310720, imaxpct=25
         =                       sunit=16     swidth=16 blks
naming   =version 2              bsize=4096   ascii-ci=0, ftype=1
log      =internal log           bsize=4096   blocks=2560, version=2
         =                       sectsz=512   sunit=16 blks, lazy-count=1
realtime =none                   extsz=4096   blocks=0, rtextents=0
Discarding blocks...Done.
[root@cephclt ~]# mount /dev/rbd1 /mnt/rbdec/
[root@cephclt ~]#  df -h /mnt/rbdec/
Filesystem      Size  Used Avail Use% Mounted on
/dev/rbd1       5.0G   69M  5.0G   2% /mnt/rbdec
[root@cephclt ~]# rbd -n client.prbd resize --size 10G prbd/foo-ec
Resizing image: 100% complete...done.
[root@cephclt ~]# xfs_growfs /mnt/rbdec
meta-data=/dev/rbd1              isize=512    agcount=8, agsize=163840 blks
         =                       sectsz=512   attr=2, projid32bit=1
         =                       crc=1        finobt=1, sparse=1, rmapbt=0
         =                       reflink=1
data     =                       bsize=4096   blocks=1310720, imaxpct=25
         =                       sunit=16     swidth=16 blks
naming   =version 2              bsize=4096   ascii-ci=0, ftype=1
log      =internal log           bsize=4096   blocks=2560, version=2
         =                       sectsz=512   sunit=16 blks, lazy-count=1
realtime =none                   extsz=4096   blocks=0, rtextents=0
data blocks changed from 1310720 to 2621440
[root@cephclt ~]# df -h /mnt/rbdec
Filesystem      Size  Used Avail Use% Mounted on
/dev/rbd1        10G  105M  9.9G   2% /mnt/rbdec
[root@cephclt ~]# rbd -n client.prbd ls -l prbd
NAME    SIZE    PARENT  FMT  PROT  LOCK
foo     10 GiB            2        excl
foo-ec  10 GiB            2        excl

# Démontage des  images
[root@cephclt ~]# umount /mnt/rbd
[root@cephclt ~]# rbd -n client.prbd device unmap prbd/foo
[root@cephclt ~]# umount /mnt/rbdec
[root@cephclt ~]# rbd -n client.prbd device unmap prbd/foo-ec
[root@cephclt ~]# rbd -n client.prbd device ls

# remarque: le fonctionnement des snapshots et des clones soint identique en mode réplication ou en mode erasure code.
# vous pouvez tester le fonctionnement des clones comme vu précédament en mode réplication

## rbd live migration

#monter prbd/foo sous /mnt/rbd
[root@cephclt ~]# rbd -n client.prbd device map prbd/foo
/dev/rbd0  
[root@cephclt ~]# mount /dev/rbd/prbd/foo /mnt/rbd
[root@cephclt ~]# df -h |grep mnt
/dev/rbd0        10G  105M  9.9G   2% /mnt/rbd

[root@cephclt ~]# rbd -n client.prbd -p prbd ls -l
NAME    SIZE    PARENT  FMT  PROT  LOCK
foo     10 GiB            2        excl
foo-ec  10 GiB            2        

# les volumes sont montés via krbd
[root@cephclt ~]#  rbd -n client.prbd device ls
id  pool  namespace  image  snap  device   
0   prbd             foo    -     /dev/rbd0
 
[root@cephclt ~]# echo "avant migration en ec" >> /mnt/rbd/fichier.txt

# pour rappel l'image foo est en réplication et l'image foo-ec en erasure code

# démarrer la migration de l'image foo
[root@cephclt ~]# rbd -n client.prbd  migration prepare prbd/foo prbd/foo2ec --data-pool prbdec
2022-08-18T18:41:05.008+0000 7f95a7677500 -1 librbd::Migration: prepare: image has watchers - not migrating
rbd: preparing migration failed: (16) Device or resource busy

# l'erreur est normale étant donnée qu'il faut démonter l'iamge avant de commencer ;)

[root@cephclt ~]# umount /mnt/rbd
[root@cephclt ~]# rbd device unmap /dev/rbd/prbd/foo

#recommençons maintenant que l'image est démontée
[root@cephclt ~]# rbd -n client.prbd  migration prepare prbd/foo prbd/foo2ec --data-pool prbdec
[root@cephclt ~]# rbd -n client.prbd info prbd/foo2ec
rbd image 'foo2ec':
	size 10 GiB in 2560 objects
	order 22 (4 MiB objects)
	snapshot_count: 0
	id: 399c7805f093
	data_pool: prbdec
	block_name_prefix: rbd_data.2.399c7805f093
	format: 2
	features: layering, exclusive-lock, data-pool, migrating
	op_features: 
	flags: 
	create_timestamp: Thu Aug 18 18:42:53 2022
	access_timestamp: Thu Aug 18 18:42:53 2022
	modify_timestamp: Thu Aug 18 18:42:53 2022

# on remarque que l'image a le flag migrating et qu'il y a bien data_pool
[root@cephclt ~]# rbd -n client.prbd  status prbd/foo2ec
Watchers: none
Migration:
	source: prbd/foo (5e9f254199fe)
	destination: prbd/foo2ec (399c7805f093)
	state: prepared

# l'image prbd/foo2ec a bien une image source et est en mode prepared.
# c'est tout bon, mais comment accéder maintenant a mon volume rbd depuis cephclt ?

[root@cephclt ~]# rbd -n client.prbd device map prbd/foo2ec
rbd: sysfs write failed
RBD image feature set mismatch. This image cannot be mapped because the following immutable features are unsupported by the kernel: migrating.
In some cases useful info is found in syslog - try "dmesg | tail".
rbd: map failed: (6) No such device or address

# effectivement krbd ne support pas cette fonction.

# installer le client rbd-ndb avec un acces via librbd en userspace
[root@cephclt ~]# yum install -y rbd-nbd

[root@cephclt ~]# rbd-nbd -n client.prbd  map prbd/foo2ec
/dev/nbd0
[root@cephclt ~]# rbd-nbd list-mapped
id    pool  namespace  image   snap  device   
4079  prbd             foo2ec  -     /dev/nbd0
[root@cephclt ~]# mount /dev/nbd0 /mnt/rbd
[root@cephclt ~]# cat /mnt/rbd/fichier.txt
version1
version2
avant migration en ec
[root@ce	phclt ~]# echo "avant migration execute en ec" >> /mnt/rbd/fichier.txt

# on retrouve bien nos données depuis le client
# et depuis ceph le montage est confirmé

[root@cephclt ~]# rbd  -n client.prbd status prbd/foo2ec
Watchers:
	watcher=172.16.7.215:0/3365632429 client.24478 cookie=140023651379392
Migration:
	source: prbd/foo (5e9f254199fe)
	destination: prbd/foo2ec (399c7805f093)
	state: prepared
	
# migront les données dans le poolec
[root@cephclt ~]# rbd -n client.prbd migration execute prbd/foo2ec
Image migration: 100% complete...done.

[root@cephclt ~]# rbd  -n client.prbd status prbd/foo2ec
Watchers:
	watcher=172.16.7.215:0/3365632429 client.24478 cookie=140023651379392
Migration:
	source: prbd/foo (5e9f254199fe)
	destination: prbd/foo2ec (399c7805f093)
	state: executed

# si c'est ok, supprimon l'image source

[root@cephclt ~]# rbd -n client.prbd migration commit prbd/foo2ec
Commit image migration: 100% complete...done.

[root@cephclt ~]# rbd  -n client.prbd status prbd/foo2ec
Watchers:
	watcher=172.16.7.215:0/3365632429 client.24478 cookie=140023651379392

# on peux garder le montage sur le client ou remonter avec krbd
[root@cephclt ~]# cat /mnt/rbd/fichier.txt
version1
version2
avant migration en ec
avant migration execute en ec
[root@cephclt ~]# echo "apres migration commit en ec" >> /mnt/rbd/fichier.txt
[root@cephclt ~]# umount /mnt/rbd
[root@cephclt ~]# rbd-nbd unmap /dev/nbd0
[root@cephclt ~]# rbd-nbd list-mapped
[root@cephclt ~]# rbd -n client.prbd device map prbd/foo2ec
/dev/rbd0
[root@cephclt ~]# mount /dev/rbd/prbd/foo2ec /mnt/rbd
[root@cephclt ~]# cat /mnt/rbd/fichier.txt
version1
version2
avant migration en ec
avant migration execute en ec
apres migration commit en ec

# Yes, on retrouve nos données
# remarque : la migration d'une image d'un pool ec vers un pool en réplication est également supporté.
# remarque : les opérations de rbd live migration peuvent être réalise depuis le client avec un compte root et l'utilisation de la clé ceph. il est nécéssaire de bien protéger l'acces au compte root.


# Documentation
https://access.redhat.com/documentation/en-us/red_hat_ceph_storage/5/html/operations_guide/index
https://docs.ceph.com/en/latest/cephadm/upgrade/