ANF CEPH 2022 du 03 au 07/10/2022 Sébastien Geiger # démarrer un shell ceph [almalinux@ceph1 ~]$ sudo cephadm shell Inferring fsid 4a29ceba-38c7-11ed-99a4-fa163e4d3be6 Inferring config /var/lib/ceph/4a29ceba-38c7-11ed-99a4-fa163e4d3be6/mon.ceph1/config Using recent ceph image docker.io/ceph/ceph@sha256:056637972a107df4096f10951e4216b21fcd8ae0b9fb4552e628d35df3f61139 [ceph: root@ceph1 /]# [ceph: root@ceph1 /]# ceph fs volume create moncfs [ceph: root@ceph1 /]# ceph -s cluster: id: 92459a10-1975-11ed-9374-fa163e5fdb7c health: HEALTH_OK services: mon: 4 daemons, quorum ceph3,ceph4,ceph1,ceph2 (age 99s) mgr: ceph1.inxizw(active, since 22m), standbys: ceph2.nwehoh mds: 1/1 daemons up, 1 standby osd: 8 osds: 8 up (since 17m), 8 in (since 7d) data: volumes: 1/1 healthy pools: 5 pools, 113 pgs objects: 66 objects, 15 MiB usage: 330 MiB used, 380 GiB / 380 GiB avail pgs: 113 active+clean io: client: 596 B/s wr, 0 op/s rd, 1 op/s wr # remarque : Vérification, on constat un nouveau service mds qui est installé sur 2 nodes. voir la ligne mds: 1/1 daemons up, 1 standby [ceph: root@ceph1 /]# ceph df --- RAW STORAGE --- CLASS SIZE AVAIL USED RAW USED %RAW USED hdd 380 GiB 380 GiB 330 MiB 330 MiB 0.08 TOTAL 380 GiB 380 GiB 330 MiB 330 MiB 0.08 --- POOLS --- POOL ID PGS STORED OBJECTS USED %USED MAX AVAIL device_health_metrics 1 1 0 B 0 0 B 0 120 GiB prbd 2 16 102 KiB 8 881 KiB 0 120 GiB prbdec 3 32 15 MiB 36 44 MiB 0.01 180 GiB cephfs.moncfs.meta 4 32 2.3 KiB 22 1.5 MiB 0 120 GiB cephfs.moncfs.data 5 32 0 B 0 0 B 0 120 GiB # remarque : on retrouve bien cephfs.moncfs.meta et cephfs.moncfs.data les 2 pools utilisé par cephfs.moncfs # remarque : par défaut ces 2 pools sont en réplications, mais il est possible de rajouter des pool en erasure code [ceph: root@ceph1 /]# exit #création de la clé d'acces aux pools de moncfs [almalinux@ceph1 ~]$ sudo cephadm shell ceph fs authorize moncfs client.cephclt / rw >ceph.client.cephclt.keyring Inferring fsid 92459a10-1975-11ed-9374-fa163e5fdb7c Inferring config /var/lib/ceph/92459a10-1975-11ed-9374-fa163e5fdb7c/mon.ceph1/config Using recent ceph image quay.io/ceph/ceph@sha256:c5fd9d806c54e5cc9db8efd50363e1edf7af62f101b264dccacb9d6091dcf7aa # vérification de la clé [almalinux@ceph1 ~]$ cat ceph.client.cephclt.keyring [client.cephclt] key = AQCdl/5iuKcSHxAAg17q8Gn+SqM81IjyvmxwwQ== # copie de la clé sur le client, il n'est pas nécéssaire d'utiliser la clé ceph.admin sur le client [almalinux@ceph1 ~]$ scp ./ceph.client.cephclt.keyring root@cephclt:/etc/ceph/ceph.client.cephclt.keyring ceph.client.cephclt.keyring 100% 65 40.2KB/s 00:00 [almalinux@ceph1 ~]$ ssh root@cephclt chmod -R 640 /etc/ceph [almalinux@ceph1 ~]$ ssh root@cephclt ls -l /etc/ceph total 16 -rw-r-----. 1 root root 65 Aug 18 19:48 ceph.client.cephclt.keyring -rw-r-----. 1 root root 62 Aug 11 13:38 ceph.client.prbd.keyring -rw-r-----. 1 root root 175 Aug 11 13:35 ceph.conf -rw-r-----. 1 root root 152 Aug 11 14:47 rbdmap [almalinux@ceph1 ~]$ #Montage et démontage manuel CephFS [root@cephclt ~]# mkdir /mnt/moncfs [root@cephclt ~]# mount -t ceph ceph1,ceph2,ceph3,ceph4:/ /mnt/moncfs -o name=cephclt [root@cephclt ~]# df -h /mnt/moncfs Filesystem Size Used Avail Use% Mounted on 172.16.7.125,172.16.7.245,172.16.7.180,172.16.7.67:/ 121G 0 121G 0% /mnt/moncfs [root@cephclt ~]# umount /mnt/moncfs [root@cephclt ~]# #CephFS dans /etc/fstab vi /etc/fstab # ajouter la ligne a la fin du fichier /etc/fstab ceph1,ceph2,ceph3,ceph4:/ /mnt/moncfs ceph name=cephclt,noatime,_netdev 0 2 [root@cephclt ~]# mount -a [root@cephclt ~]# df -h /mnt/moncfs Filesystem Size Used Avail Use% Mounted on /dev/vda2 20G 2.7G 18G 14% / [root@cephclt ~]# umount /mnt/moncfs # Définition des quotas [almalinux@ceph1 ~]$ sudo cephadm shell # utilisez ceph-fuse pour acceder avec ceph.admin depuis un contenaire pour definir les quotas [ceph: root@ceph1 /]# ceph-fuse /mnt ceph-fuse[25]: starting ceph client 2022-08-18T20:10:48.594+0000 7f06ad61c380 -1 init, newargv = 0x55bc8fe9f6f0 newargc=15 ceph-fuse[25]: starting fuse [ceph: root@ceph1 /]# mkdir /mnt/rep1/ [ceph: root@ceph1 /]# setfattr -n ceph.quota.max_bytes -v 2000000000 /mnt/rep1/ [ceph: root@ceph1 /]# getfattr -n ceph.quota.max_bytes /mnt/rep1/ getfattr: Removing leading '/' from absolute path names # file: mnt/rep1/ ceph.quota.max_bytes="2000000000" [ceph: root@ceph1 /]# mkdir /mnt/rep2/ [ceph: root@ceph1 /]# setfattr -n ceph.quota.max_bytes -v 4000000000 /mnt/rep2/ [ceph: root@ceph1 /]# getfattr -n ceph.quota.max_bytes /mnt/rep2/ getfattr: Removing leading '/' from absolute path names # file: mnt/rep2/ ceph.quota.max_bytes="4000000000" [ceph: root@ceph1 /]# exit exit [almalinux@ceph1 ~]$ ssh root@cephclt [root@cephclt ~]# mkdir /mnt/rep1 [root@cephclt ~]# mount -t ceph ceph1,ceph2,ceph3,ceph4:/rep1 /mnt/rep1 -o name=cephclt [root@cephclt ~]# df -h /mnt/rep1 Filesystem Size Used Avail Use% Mounted on 172.16.7.125,172.16.7.245,172.16.7.180,172.16.7.67:/rep1 1.9G 0 1.9G 0% /mnt/rep1 # remarque : le quota est activé [root@cephclt ~]# dd if=/dev/zero of=/mnt/rep1/ddfile bs=1G count=1 oflag=direct 1+0 records in 1+0 records out 1073741824 bytes (1.1 GB, 1.0 GiB) copied, 35.8687 s, 29.9 MB/s [root@cephclt ~]# df -h /mnt/rep1 Filesystem Size Used Avail Use% Mounted on 172.16.7.125,172.16.7.245,172.16.7.180,172.16.7.67:/rep1 1.9G 996M 908M 53% /mnt/rep1 # remarque : c'est cohérent [root@cephclt ~]# dd if=/dev/zero of=/mnt/rep1/ddfile bs=2G count=1 oflag=direct dd: error writing '/mnt/rep1/ddfile': Disk quota exceeded 0+1 records in 0+0 records out 0 bytes copied, 0.776468 s, 0.0 kB/s # remarque : le quota est dépassé ## ceph Layout [ceph: root@ceph1 /]# ceph osd pool create cephfs.moncfs.dataec erasure pool 'cephfs.moncfs.dataec' created [ceph: root@ceph1 /]# ceph osd pool set cephfs.moncfs.dataec allow_ec_overwrites true set pool 6 allow_ec_overwrites to true [ceph: root@ceph1 /]# ceph fs add_data_pool moncfs cephfs.moncfs.dataec added data pool 6 to fsmap [ceph: root@ceph1 /]# ceph fs ls name: moncfs, metadata pool: cephfs.moncfs.meta, data pools: [cephfs.moncfs.data cephfs.moncfs.dataec ] # remarque verifier que /mnt est bien monté en ceph-fuse pour la suite de opération [ceph: root@ceph1 /]# df /mnt/ Filesystem 1K-blocks Used Available Use% Mounted on ceph-fuse 398426112 8503296 389922816 3% /mnt [ceph: root@ceph1 /]# mkdir /mnt/repec [ceph: root@ceph1 /]# echo file1 >/mnt/repec/file1 [ceph: root@ceph1 /]# getfattr -n ceph.file.layout /mnt/repec/file1 getfattr: Removing leading '/' from absolute path names # file: mnt/repec/file1 ceph.file.layout="stripe_unit=4194304 stripe_count=1 object_size=4194304 pool=cephfs.moncfs.data" [ceph: root@ceph1 /]# getfattr -n ceph.dir.layout /mnt/repec /mnt/repec: ceph.dir.layout: No such attribute # remarque : si c'est pas definit, c'est le pool definit par le repertoire parent [ceph: root@ceph1 /]# setfattr -n ceph.dir.layout.pool -v cephfs.moncfs.dataec /mnt/repec [ceph: root@ceph1 /]# echo file2 >/mnt/repec/file2 [ceph: root@ceph1 /]# getfattr -n ceph.file.layout /mnt/repec/file2 getfattr: Removing leading '/' from absolute path names # file: mnt/repec/file2 ceph.file.layout="stripe_unit=4194304 stripe_count=1 object_size=4194304 pool=cephfs.moncfs.dataec" # c'est bien pool=cephfs.moncfs.dataec [ceph: root@ceph1 /]# getfattr -n ceph.dir.layout /mnt/repec getfattr: Removing leading '/' from absolute path names # file: mnt/repec ceph.dir.layout="stripe_unit=4194304 stripe_count=1 object_size=4194304 pool=cephfs.moncfs.dataec" # remarque : tous les fichiers et sous dossiers de /repec de cephfs seront par défaut dans le pool cephfs.moncfs.dataec # remarque : il n'y a pas de fonction de déplacer les fichiers ou dossiers vers en autre pool sous CephFS. cela se fera à la réecriture du fichier. [ceph: root@ceph1 /]# getfattr -n ceph.file.layout /mnt/repec/file1 getfattr: Removing leading '/' from absolute path names # file: mnt/repec/file1 ceph.file.layout="stripe_unit=4194304 stripe_count=1 object_size=4194304 pool=cephfs.moncfs.data" [ceph: root@ceph1 /]# echo verionec >> /mnt/repec/file1 [ceph: root@ceph1 /]# getfattr -n ceph.file.layout /mnt/repec/file1 getfattr: Removing leading '/' from absolute path names # file: mnt/repec/file1 ceph.file.layout="stripe_unit=4194304 stripe_count=1 object_size=4194304 pool=cephfs.moncfs.data" # remarque : repec/file1 n'a changé de pool même après modification du fichier. # snap [ceph: root@ceph1 /]# echo $(date) >>/mnt/rep2/cron.txt [ceph: root@ceph1 /]# ls /mnt/rep2/ cron.txt [ceph: root@ceph1 /]# ls /mnt/rep2/.snap/ [ceph: root@ceph1 /]# mkdir /mnt/rep2/.snap/snap1 [ceph: root@ceph1 /]# ls /mnt/rep2/.snap/ snap1 [ceph: root@ceph1 /]# ls /mnt/rep2/.snap/snap1/ cron.txt # snap schedule [ceph: root@ceph1 /]# ceph mgr module enable snap_schedule [ceph: root@ceph1 /]# ceph fs snap-schedule add / 1h Schedule set for path / [ceph: root@ceph1 /]# ceph fs snap-schedule retention add / h 24 Retention added to path / [ceph: root@ceph1 /]# ceph fs snap-schedule add /rep2 1h Schedule set for path /rep2 [ceph: root@ceph1 /]# ceph fs snap-schedule retention add /rep2 24h5d Retention added to path /rep2 [ceph: root@ceph1 /]# ceph fs snap-schedule list / --recursive / 1h 24h /rep2 1h 24h5d [root@cephclt ~]# mount -t ceph ceph1,ceph2,ceph3,ceph4:/ /mnt/moncfs -o name=cephclt # configurer une tache cron pour actualiser le fichier cron.txt [root@cephclt ~]# crontab -l 55 * * * * echo $(date) >>/mnt/moncfs/rep2/cron.txt [root@cephclt ~]# cat /mnt/moncfs/rep2/cron.txt Thu Aug 18 20:35:29 UTC 2022 # apres une heure [root@cephclt ~]# cat /mnt/moncfs/rep2/cron.txt Thu Aug 18 20:35:29 UTC 2022 Thu Aug 18 20:55:01 UTC 2022 [root@cephclt ~]# ls /mnt/moncfs/rep2/.snap _scheduled-2022-08-18-21_00_00_1 scheduled-2022-08-18-21_00_00 snap1 [root@cephclt ~]# cat /mnt/moncfs/rep2/.snap/scheduled-2022-08-18-21_00_00/cron.txt Thu Aug 18 20:35:29 UTC 2022 Thu Aug 18 20:55:01 UTC 2022 [root@cephclt ~]# cat /mnt/moncfs/rep2/.snap/snap1/cron.txt Thu Aug 18 20:35:29 UTC 2022 # apres plusieurs heures de fonctionnement [root@cephclt ~]# tail -n 3 /mnt/moncfs/rep2/cron.txt Fri Aug 19 04:55:01 UTC 2022 Fri Aug 19 05:55:01 UTC 2022 Fri Aug 19 06:55:01 UTC 2022 [root@cephclt ~]# tail -n 3 /mnt/moncfs/rep2/.snap/scheduled-2022-08-19-05_00_00/cron.txt Fri Aug 19 02:55:01 UTC 2022 Fri Aug 19 03:55:01 UTC 2022 Fri Aug 19 04:55:01 UTC 2022 # NFS [ceph: root@ceph1 /]# ceph mgr module enable nfs [ceph: root@ceph1 /]# ceph nfs cluster create monnfs "ceph3 ceph4" NFS Cluster Created Successfully [ceph: root@ceph1 /]# ceph nfs export create cephfs monnfs /rep1 moncfs /rep1 --squash no_root_squash { "bind": "/rep1", "fs": "moncfs", "path": "/rep1", "cluster": "monnfs", "mode": "RW" } [ceph: root@ceph1 /]# ceph nfs export create cephfs monnfs /rep2 moncfs /rep2 --squash no_root_squash { "bind": "/rep2", "fs": "moncfs", "path": "/rep2", "cluster": "monnfs", "mode": "RW" } [ceph: root@ceph1 /]# ceph nfs cluster ls monnfs [ceph: root@ceph1 /]# ceph nfs cluster info monnfs { "monnfs": { "virtual_ip": null, "backend": [ { "hostname": "ceph3", "ip": "172.16.7.180", "port": 2049 }, { "hostname": "ceph4", "ip": "172.16.7.67", "port": 2049 } ] } } [ceph: root@ceph1 /]# ceph nfs export ls monnfs [ "/rep1", "/rep2" ] [ceph: root@ceph1 /]# ceph nfs export info monnfs /rep2 { "export_id": 2, "path": "/rep2", "cluster_id": "monnfs", "pseudo": "/rep2", "access_type": "RW", "squash": "no_root_squash", "security_label": true, "protocols": [ 4 ], "transports": [ "TCP" ], "fsal": { "name": "CEPH", "user_id": "nfs.monnfs.2", "fs_name": "moncfs" }, "clients": [] } # vérification des services mds et nfs [ceph: root@ceph1 /]# ceph orch ls |grep mon mds.moncfs 2/2 6m ago 12h count:2 mon 4/5 6m ago 7d count:5 nfs.monnfs ?:2049 2/2 5m ago 6m ceph3;ceph4 # depuis le client [root@cephclt ~]# yum install -y nfs-utils [root@cephclt ~]# mkdir /mnt/nfsrep1 [root@cephclt ~]# mkdir /mnt/nfsrep2 [root@cephclt ~]# mount -t nfs ceph3:/rep1 /mnt/nfsrep1 [root@cephclt ~]# mount -t nfs ceph3:/rep2 /mnt/nfsrep2 [root@cephclt ~]# df -h |grep nfs ceph3:/rep1 1.9G 1.0G 880M 54% /mnt/nfsrep1 ceph3:/rep2 3.8G 0 3.8G 0% /mnt/nfsrep2 # les snapchots sont accéssible via nfs [root@cephclt ~]# ls /mnt/nfsrep2/.snap/snap1/ cron.txt # verification si les snaps fonctionne [root@cephclt ~]# mkdir /mnt/nfsrep2/nfs2 [root@cephclt ~]# echo nfswrite >>/mnt/nfsrep2/nfs2/nfs2.txt # normalement dans moins d'une heure, le snap devrais se remplir [root@cephclt ~]# cat /mnt/nfsrep2/.snap/scheduled-2022-08-19-11_00_00/nfs2/nfs2.txt nfswrite [root@cephclt ~]# cat /mnt/nfsrep2/nfs2/.snap/_scheduled-2022-08-19-11_00_00_1/nfs2.txt nfswrite # remarque: il est possible d'accédé au snap par le repertoire racine via /.../.snap/scheduled-xx ou via le repertoire courant via ./.snap/_scheduled-xx # nettoyage pour la suite [root@cephclt ~]# umount /mnt/nfsrep1 [root@cephclt ~]# umount /mnt/nfsrep2 [ceph: root@ceph1 /]# ceph nfs cluster rm monnfs NFS Cluster Deleted Successfully [ceph: root@ceph1 /]# ceph orch ls |grep mon mds.moncfs 2/2 3m ago 12h count:2 mon 4/5 3m ago 7d count:5 #remarque: plus de service nfs dans le cluster, le ménage est fait apres la suppression ## NFS HA [ceph: root@ceph1 /]# vip=$(getent ahostsv4 rgwha |head -n 1| awk '{ print $1 }') [ceph: root@ceph1 /]# ceph nfs cluster create monnfs "ceph3 ceph4" --ingress --virtual_ip $vip NFS Cluster Created Successfully [ceph: root@ceph1 /]# ceph nfs export create cephfs monnfs /rep1 moncfs /rep1 --squash no_root_squash { "bind": "/rep1", "fs": "moncfs", "path": "/rep1", "cluster": "monnfs", "mode": "RW" } [ceph: root@ceph1 /]# ceph nfs export create cephfs monnfs /rep2 moncfs /rep2 --squash no_root_squash { "bind": "/rep2", "fs": "moncfs", "path": "/rep2", "cluster": "monnfs", "mode": "RW" } [ceph: root@ceph1 /]# ceph nfs cluster ls monnfs [ceph: root@ceph1 /]# ceph nfs cluster info monnfs { "monnfs": { "virtual_ip": "172.16.7.248", "backend": [ { "hostname": "ceph3", "ip": "172.16.7.180", "port": 12049 }, { "hostname": "ceph4", "ip": "172.16.7.67", "port": 12049 } ], "port": 2049, "monitor_port": 9049 } } # remarque : utilisation d'une virtual_ip pour la gestion du service par le cluster ceph # verification des services [ceph: root@ceph1 /]# ceph orch ls |grep mon ingress.nfs.monnfs 172.16.7.248:2049,9049 4/4 2m ago 3m count:2 mds.moncfs 2/2 2m ago 13h count:2 mon 4/5 3m ago 7d count:5 nfs.monnfs ?:12049 2/2 3m ago 3m ceph3;ceph4 # remarque : il y a 2 services nfs.monnfs et 2*2 services ingress.nfs.monfs sur la vip pour le ha # coté client [root@cephclt ~]# vip=$(getent ahostsv4 rgwha |head -n 1| awk '{ print $1 }') [root@cephclt ~]# mount -t nfs $vip:/rep1 /mnt/nfsrep1 [root@cephclt ~]# mount -t nfs $vip:/rep2 /mnt/nfsrep2 [root@cephclt ~]# tail -n 3 /mnt/nfsrep2/cron.txt Fri Aug 19 06:55:01 UTC 2022 Fri Aug 19 07:55:01 UTC 2022 Fri Aug 19 08:55:01 UTC 2022 # test basculement HA [ceph: root@ceph1 /]# ceph orch ps |grep haproxy haproxy.nfs.monnfs.ceph1.nieqbn ceph1 *:2049,9049 running (15m) 3m ago 16m 3800k - 2.3.21-3ce4ee0 7ecd3fda00f4 cef12c30f3c5 haproxy.nfs.monnfs.ceph2.jehgpw ceph2 *:2049,9049 running (15m) 5m ago 15m 8032k - 2.3.21-3ce4ee0 7ecd3fda00f4 2d6bd0044a85 # la vip est sur l'un des 2 haproxy [almalinux@ceph2 ~]$ ip ad |grep 172.16 inet 172.16.7.245/24 brd 172.16.7.255 scope global dynamic noprefixroute eth0 inet 172.16.7.248/32 scope global eth0 # remarque : ceph2 a bien une ip supplémentaire # arrêt du service pour tester le basculement # remarque : utiliser le nom du service par rapport à votre environement. voir commande ceph orch ps plus haut. [ceph: root@ceph1 /]# ceph orch daemon stop haproxy.nfs.monnfs.ceph2.jehgpw Scheduled to stop haproxy.nfs.monnfs.ceph2.jehgpw on host 'ceph2' # test acces depuis le client [root@cephclt ~]# tail -n 2 /mnt/nfsrep2/cron.txt Fri Aug 19 07:55:01 UTC 2022 Fri Aug 19 08:55:01 UTC 2022 # remarque: on détecte un petit temps d'attente, suite au basculement, mais la connexion est rétablie # vérification, la vip a bien été basculé sur l'autre node ici ceph1 [almalinux@ceph1 ~]$ ip ad |grep 172.16 inet 172.16.7.125/24 brd 172.16.7.255 scope global dynamic noprefixroute eth0 inet 172.16.7.248/32 scope global eth0 [almalinux@ceph2 ~]$ ip ad |grep 172.16 inet 172.16.7.245/24 brd 172.16.7.255 scope global dynamic noprefixroute eth0 [ceph: root@ceph2 /]# ceph orch daemon start haproxy.nfs.monnfs.ceph2.jehgpw Scheduled to start haproxy.nfs.monnfs.ceph2.jehgpw on host 'ceph2' # le redémarrage du haproxy ne change pas le basculement de la vip #liberer les ressources et la vip car elles seront utilisées dans le prochain tp. [root@cephclt ~]# umount /mnt/nfsrep1 [root@cephclt ~]# umount /mnt/nfsrep2 [ceph: root@ceph1 /]# ceph nfs cluster rm monnfs NFS Cluster Deleted Successfully [ceph: root@ceph1 /]# ceph orch ps |grep haproxy [ceph: root@ceph1 /]# ceph orch ps |grep mon mds.moncfs.ceph1.ptxvyj ceph1 running (16h) 5s ago 16h 37.6M - 16.2.10 0d668911f040 cc700f4cc03c mds.moncfs.ceph2.irfqzi ceph2 running (16h) 7s ago 16h 25.3M - 16.2.10 0d668911f040 4f696a85cd73