@@ -36,6 +36,8 @@ timelineInfoNew(TimeLineID tli)
3636 tlinfo -> switchpoint = InvalidXLogRecPtr ;
3737 tlinfo -> parent_link = NULL ;
3838 tlinfo -> xlog_filelist = parray_new ();
39+ tlinfo -> anchor_lsn = InvalidXLogRecPtr ;
40+ tlinfo -> anchor_tli = 0 ;
3941 return tlinfo ;
4042}
4143
@@ -746,6 +748,7 @@ catalog_get_timelines(InstanceConfig *instance)
746748 wal_file -> file = * file ;
747749 wal_file -> segno = segno ;
748750 wal_file -> type = BACKUP_HISTORY_FILE ;
751+ wal_file -> keep = false;
749752 parray_append (tlinfo -> xlog_filelist , wal_file );
750753 continue ;
751754 }
@@ -765,6 +768,7 @@ catalog_get_timelines(InstanceConfig *instance)
765768 wal_file -> file = * file ;
766769 wal_file -> segno = segno ;
767770 wal_file -> type = PARTIAL_SEGMENT ;
771+ wal_file -> keep = false;
768772 parray_append (tlinfo -> xlog_filelist , wal_file );
769773 continue ;
770774 }
@@ -826,6 +830,7 @@ catalog_get_timelines(InstanceConfig *instance)
826830 wal_file -> file = * file ;
827831 wal_file -> segno = segno ;
828832 wal_file -> type = SEGMENT ;
833+ wal_file -> keep = false;
829834 parray_append (tlinfo -> xlog_filelist , wal_file );
830835 }
831836 /* timeline history file */
@@ -895,6 +900,344 @@ catalog_get_timelines(InstanceConfig *instance)
895900 tlinfo -> closest_backup = get_closest_backup (tlinfo );
896901 }
897902
903+ /* determine which WAL segments must be kept because of wal retention */
904+ if (instance -> wal_depth <= 0 )
905+ return timelineinfos ;
906+
907+ /*
908+ * WAL retention for now is fairly simple.
909+ * User can set only one parameter - 'wal-depth'.
910+ * It determines how many latest valid(!) backups on timeline
911+ * must have an ability to perform PITR:
912+ * Consider the example:
913+ *
914+ * ---B1-------B2-------B3-------B4--------> WAL timeline1
915+ *
916+ * If 'wal-depth' is set to 2, then WAL purge should produce the following result:
917+ *
918+ * B1 B2 B3-------B4--------> WAL timeline1
919+ *
920+ * Only valid backup can satisfy 'wal-depth' condition, so if B3 is not OK or DONE,
921+ * then WAL purge should produce the following result:
922+ * B1 B2-------B3-------B4--------> WAL timeline1
923+ *
924+ * Complicated cases, such as branched timelines are taken into account.
925+ * wal-depth is applied to each timeline independently:
926+ *
927+ * |---------> WAL timeline2
928+ * ---B1---|---B2-------B3-------B4--------> WAL timeline1
929+ *
930+ * after WAL purge with wal-depth=2:
931+ *
932+ * |---------> WAL timeline2
933+ * B1---| B2 B3-------B4--------> WAL timeline1
934+ *
935+ * In this example WAL retention prevents purge of WAL required by tli2
936+ * to stay reachable from backup B on tli1.
937+ *
938+ * To protect WAL from purge we try to set 'anchor_lsn' and 'anchor_tli' in every timeline.
939+ * They are usually comes from 'start-lsn' and 'tli' attributes of backup
940+ * calculated by 'wal-depth' parameter.
941+ * With 'wal-depth=2' anchor_backup in tli1 is B3.
942+
943+ * If timeline has not enough valid backups to satisfy 'wal-depth' condition,
944+ * then 'anchor_lsn' and 'anchor_tli' taken from from 'start-lsn' and 'tli
945+ * attribute of closest_backup.
946+ * The interval of WAL starting from closest_backup to switchpoint is
947+ * saved into 'keep_segments' attribute.
948+ * If there is several intermediate timelines between timeline and its closest_backup
949+ * then on every intermediate timeline WAL interval between switchpoint
950+ * and starting segment is placed in 'keep_segments' attributes:
951+ *
952+ * |---------> WAL timeline3
953+ * |------| B5-----B6--> WAL timeline2
954+ * B1---| B2 B3-------B4------------> WAL timeline1
955+ *
956+ * On timeline where closest_backup is located the WAL interval between
957+ * closest_backup and switchpoint is placed into 'keep_segments'.
958+ * If timeline has no 'closest_backup', then 'wal-depth' rules cannot be applied
959+ * to this timeline and its WAL must be purged by following the basic rules of WAL purging.
960+ *
961+ * Third part is handling of ARCHIVE backups.
962+ * If B1 and B2 have ARCHIVE wal-mode, then we must preserve WAL intervals
963+ * between start_lsn and stop_lsn for each of them in 'keep_segments'.
964+ */
965+
966+ /* determine anchor_lsn and keep_segments for every timeline */
967+ for (int i = 0 ; i < parray_num (timelineinfos ); i ++ )
968+ {
969+ int count = 0 ;
970+ timelineInfo * tlinfo = parray_get (timelineinfos , i );
971+
972+ /*
973+ * Iterate backward on backups belonging to this timeline to find
974+ * anchor_backup. NOTE Here we rely on the fact that backups list
975+ * is ordered by start_lsn DESC.
976+ */
977+ if (tlinfo -> backups )
978+ {
979+ for (int j = 0 ; j < parray_num (tlinfo -> backups ); j ++ )
980+ {
981+ pgBackup * backup = parray_get (tlinfo -> backups , j );
982+
983+ /* skip invalid backups */
984+ if (backup -> status != BACKUP_STATUS_OK &&
985+ backup -> status != BACKUP_STATUS_DONE )
986+ continue ;
987+
988+ /* sanity */
989+ if (XLogRecPtrIsInvalid (backup -> start_lsn ) ||
990+ backup -> tli <= 0 )
991+ continue ;
992+
993+ count ++ ;
994+
995+ if (count == instance -> wal_depth )
996+ {
997+ elog (LOG , "On timeline %i WAL is protected from purge at %X/%X" ,
998+ tlinfo -> tli ,
999+ (uint32 ) (backup -> start_lsn >> 32 ),
1000+ (uint32 ) (backup -> start_lsn ));
1001+
1002+ tlinfo -> anchor_lsn = backup -> start_lsn ;
1003+ tlinfo -> anchor_tli = backup -> tli ;
1004+ break ;
1005+ }
1006+ }
1007+ }
1008+
1009+ /*
1010+ * Failed to find anchor backup for this timeline.
1011+ * We cannot just thrown it to the wolves, because by
1012+ * doing that we will violate our own guarantees.
1013+ * So check the existence of closest_backup for
1014+ * this timeline. If there is one, then
1015+ * set the 'anchor_lsn' and 'anchor_tli' to closest_backup
1016+ * 'start-lsn' and 'tli' respectively.
1017+ * |-------------B5----------> WAL timeline3
1018+ * |-----|-------------------------> WAL timeline2
1019+ * B1 B2---| B3 B4-------B6-----> WAL timeline1
1020+ *
1021+ * wal-depth=2
1022+ *
1023+ * If number of valid backups on timelines is less than 'wal-depth'
1024+ * then timeline must(!) stay reachable via parent timelines if any.
1025+ * If closest_backup is not available, then general WAL purge rules
1026+ * are applied.
1027+ */
1028+ if (XLogRecPtrIsInvalid (tlinfo -> anchor_lsn ))
1029+ {
1030+ /*
1031+ * Failed to find anchor_lsn in our own timeline.
1032+ * Consider the case:
1033+ * -------------------------------------> tli5
1034+ * ----------------------------B4-------> tli4
1035+ * S3`--------------> tli3
1036+ * S1`------------S3---B3-------B6-> tli2
1037+ * B1---S1-------------B2--------B5-----> tli1
1038+ *
1039+ * B* - backups
1040+ * S* - switchpoints
1041+ * wal-depth=2
1042+ *
1043+ * Expected result:
1044+ * TLI5 will be purged entirely
1045+ * B4-------> tli4
1046+ * S2`--------------> tli3
1047+ * S1`------------S2 B3-------B6-> tli2
1048+ * B1---S1 B2--------B5-----> tli1
1049+ */
1050+ pgBackup * closest_backup = NULL ;
1051+ xlogInterval * interval = NULL ;
1052+ TimeLineID tli = 0 ;
1053+ /* check if tli has closest_backup */
1054+ if (!tlinfo -> closest_backup )
1055+ /* timeline has no closest_backup, wal retention cannot be
1056+ * applied to this timeline.
1057+ * Timeline will be purged up to oldest_backup if any or
1058+ * purge entirely if there is none.
1059+ * In example above: tli5 and tli4.
1060+ */
1061+ continue ;
1062+
1063+ /* sanity for closest_backup */
1064+ if (XLogRecPtrIsInvalid (tlinfo -> closest_backup -> start_lsn ) ||
1065+ tlinfo -> closest_backup -> tli <= 0 )
1066+ continue ;
1067+
1068+ /*
1069+ * Set anchor_lsn and anchor_tli to protect whole timeline from purge
1070+ * In the example above: tli3.
1071+ */
1072+ tlinfo -> anchor_lsn = tlinfo -> closest_backup -> start_lsn ;
1073+ tlinfo -> anchor_tli = tlinfo -> closest_backup -> tli ;
1074+
1075+ /* closest backup may be located not in parent timeline */
1076+ closest_backup = tlinfo -> closest_backup ;
1077+
1078+ tli = tlinfo -> tli ;
1079+
1080+ /*
1081+ * Iterate over parent timeline chain and
1082+ * look for timeline where closest_backup belong
1083+ */
1084+ while (tlinfo -> parent_link )
1085+ {
1086+ /* In case of intermediate timeline save to keep_segments
1087+ * begin_segno and switchpoint segment.
1088+ * In case of final timelines save to keep_segments
1089+ * closest_backup start_lsn segment and switchpoint segment.
1090+ */
1091+ XLogRecPtr switchpoint = tlinfo -> switchpoint ;
1092+
1093+ tlinfo = tlinfo -> parent_link ;
1094+
1095+ if (tlinfo -> keep_segments == NULL )
1096+ tlinfo -> keep_segments = parray_new ();
1097+
1098+ /* in any case, switchpoint segment must be added to interval */
1099+ interval = palloc (sizeof (xlogInterval ));
1100+ GetXLogSegNo (switchpoint , interval -> end_segno , instance -> xlog_seg_size );
1101+
1102+ /* Save [S1`, S2] to keep_segments */
1103+ if (tlinfo -> tli != closest_backup -> tli )
1104+ interval -> begin_segno = tlinfo -> begin_segno ;
1105+ /* Save [B1, S1] to keep_segments */
1106+ else
1107+ GetXLogSegNo (closest_backup -> start_lsn , interval -> begin_segno , instance -> xlog_seg_size );
1108+
1109+ /*
1110+ * TODO: check, maybe this interval is already here or
1111+ * covered by other larger interval.
1112+ */
1113+
1114+ elog (LOG , "Timeline %i to stay reachable from timeline %i "
1115+ "protect from purge WAL interval between "
1116+ "%08X%08X and %08X%08X on timeline %i" ,
1117+ tli , closest_backup -> tli ,
1118+ (uint32 ) interval -> begin_segno / instance -> xlog_seg_size ,
1119+ (uint32 ) interval -> begin_segno % instance -> xlog_seg_size ,
1120+ (uint32 ) interval -> end_segno / instance -> xlog_seg_size ,
1121+ (uint32 ) interval -> end_segno % instance -> xlog_seg_size ,
1122+ tlinfo -> tli );
1123+ parray_append (tlinfo -> keep_segments , interval );
1124+ continue ;
1125+ }
1126+ continue ;
1127+ }
1128+
1129+ /* Iterate over backups left */
1130+ for (int j = count ; j < parray_num (tlinfo -> backups ); j ++ )
1131+ {
1132+ XLogSegNo segno = 0 ;
1133+ xlogInterval * interval = NULL ;
1134+ pgBackup * backup = parray_get (tlinfo -> backups , j );
1135+
1136+ /*
1137+ * We must calculate keep_segments intervals for ARCHIVE backups
1138+ * with start_lsn less than anchor_lsn.
1139+ */
1140+
1141+ /* STREAM backups cannot contribute to keep_segments */
1142+ if (backup -> stream )
1143+ continue ;
1144+
1145+ /* sanity */
1146+ if (XLogRecPtrIsInvalid (backup -> start_lsn ) ||
1147+ backup -> tli <= 0 )
1148+ continue ;
1149+
1150+ /* no point in clogging keep_segments by backups protected by anchor_lsn */
1151+ if (backup -> start_lsn >= tlinfo -> anchor_lsn )
1152+ continue ;
1153+
1154+ /* append interval to keep_segments */
1155+ interval = palloc (sizeof (xlogInterval ));
1156+ GetXLogSegNo (backup -> start_lsn , segno , instance -> xlog_seg_size );
1157+ interval -> begin_segno = segno ;
1158+ GetXLogSegNo (backup -> stop_lsn , segno , instance -> xlog_seg_size );
1159+
1160+ /*
1161+ * On replica it is possible to get STOP_LSN pointing to contrecord,
1162+ * so set end_segno to the next segment after STOP_LSN just to be safe.
1163+ */
1164+ if (backup -> from_replica )
1165+ interval -> end_segno = segno + 1 ;
1166+ else
1167+ interval -> end_segno = segno ;
1168+
1169+ elog (LOG , "Archive backup %s to stay consistent "
1170+ "protect from purge WAL interval "
1171+ "between %08X%08X and %08X%08X on timeline %i" ,
1172+ base36enc (backup -> start_time ),
1173+ (uint32 ) interval -> begin_segno / instance -> xlog_seg_size ,
1174+ (uint32 ) interval -> begin_segno % instance -> xlog_seg_size ,
1175+ (uint32 ) interval -> end_segno / instance -> xlog_seg_size ,
1176+ (uint32 ) interval -> end_segno % instance -> xlog_seg_size ,
1177+ backup -> tli );
1178+
1179+ if (tlinfo -> keep_segments == NULL )
1180+ tlinfo -> keep_segments = parray_new ();
1181+
1182+ parray_append (tlinfo -> keep_segments , interval );
1183+ }
1184+ }
1185+
1186+ /*
1187+ * Protect WAL segments from deletion by setting 'keep' flag.
1188+ * We must keep all WAL segments after anchor_lsn (including), and also segments
1189+ * required by ARCHIVE backups for consistency - WAL between [start_lsn, stop_lsn].
1190+ */
1191+ for (int i = 0 ; i < parray_num (timelineinfos ); i ++ )
1192+ {
1193+ XLogSegNo anchor_segno = 0 ;
1194+ timelineInfo * tlinfo = parray_get (timelineinfos , i );
1195+
1196+ /*
1197+ * At this point invalid anchor_lsn can be only in one case:
1198+ * timeline is going to be purged by regular WAL purge rules.
1199+ */
1200+ if (XLogRecPtrIsInvalid (tlinfo -> anchor_lsn ))
1201+ continue ;
1202+
1203+ /*
1204+ * anchor_lsn is located in another timeline, it means that the timeline
1205+ * will be protected from purge entirely.
1206+ */
1207+ if (tlinfo -> anchor_tli > 0 && tlinfo -> anchor_tli != tlinfo -> tli )
1208+ continue ;
1209+
1210+ GetXLogSegNo (tlinfo -> anchor_lsn , anchor_segno , instance -> xlog_seg_size );
1211+
1212+ for (int i = 0 ; i < parray_num (tlinfo -> xlog_filelist ); i ++ )
1213+ {
1214+ xlogFile * wal_file = (xlogFile * ) parray_get (tlinfo -> xlog_filelist , i );
1215+
1216+ if (wal_file -> segno >= anchor_segno )
1217+ {
1218+ wal_file -> keep = true;
1219+ continue ;
1220+ }
1221+
1222+ /* no keep segments */
1223+ if (!tlinfo -> keep_segments )
1224+ continue ;
1225+
1226+ /* Protect segments belonging to one of the keep invervals */
1227+ for (int j = 0 ; j < parray_num (tlinfo -> keep_segments ); j ++ )
1228+ {
1229+ xlogInterval * keep_segments = (xlogInterval * ) parray_get (tlinfo -> keep_segments , j );
1230+
1231+ if ((wal_file -> segno >= keep_segments -> begin_segno ) &&
1232+ wal_file -> segno <= keep_segments -> end_segno )
1233+ {
1234+ wal_file -> keep = true;
1235+ break ;
1236+ }
1237+ }
1238+ }
1239+ }
1240+
8981241 return timelineinfos ;
8991242}
9001243
0 commit comments