1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17   
 18   
 19   
 20   
 21   
 22   
 23   
 24   
 25   
 26   
 27   
 28   
 29   
 30   
 31   
 32   
 33   
 34   
 35   
 36   
 37   
 38  """ 
 39  Implements the standard 'collect' action. 
 40  @sort: executeCollect 
 41  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
 42  """ 
 43   
 44   
 45   
 46   
 47   
 48   
 49   
 50  import os 
 51  import logging 
 52  import pickle 
 53   
 54   
 55  from CedarBackup2.filesystem import BackupFileList, FilesystemList 
 56  from CedarBackup2.util import isStartOfWeek, changeOwnership, displayBytes, buildNormalizedPath 
 57  from CedarBackup2.actions.constants import DIGEST_EXTENSION, COLLECT_INDICATOR 
 58  from CedarBackup2.actions.util import writeIndicatorFile 
 59   
 60   
 61   
 62   
 63   
 64   
 65  logger = logging.getLogger("CedarBackup2.log.actions.collect") 
 66   
 67   
 68   
 69   
 70   
 71   
 72   
 73   
 74   
 75   
 76   
 78     """ 
 79     Executes the collect backup action. 
 80   
 81     @note: When the collect action is complete, we will write a collect 
 82     indicator to the collect directory, so it's obvious that the collect action 
 83     has completed.  The stage process uses this indicator to decide whether a 
 84     peer is ready to be staged. 
 85   
 86     @param configPath: Path to configuration file on disk. 
 87     @type configPath: String representing a path on disk. 
 88   
 89     @param options: Program command-line options. 
 90     @type options: Options object. 
 91   
 92     @param config: Program configuration. 
 93     @type config: Config object. 
 94   
 95     @raise ValueError: Under many generic error conditions 
 96     @raise TarError: If there is a problem creating a tar file 
 97     """ 
 98     logger.debug("Executing the 'collect' action.") 
 99     if config.options is None or config.collect is None: 
100        raise ValueError("Collect configuration is not properly filled in.") 
101     if ((config.collect.collectFiles is None or len(config.collect.collectFiles) < 1) and 
102         (config.collect.collectDirs is None or len(config.collect.collectDirs) < 1)): 
103        raise ValueError("There must be at least one collect file or collect directory.") 
104     fullBackup = options.full 
105     logger.debug("Full backup flag is [%s]", fullBackup) 
106     todayIsStart = isStartOfWeek(config.options.startingDay) 
107     resetDigest = fullBackup or todayIsStart 
108     logger.debug("Reset digest flag is [%s]", resetDigest) 
109     if config.collect.collectFiles is not None: 
110        for collectFile in config.collect.collectFiles: 
111           logger.debug("Working with collect file [%s]", collectFile.absolutePath) 
112           collectMode = _getCollectMode(config, collectFile) 
113           archiveMode = _getArchiveMode(config, collectFile) 
114           digestPath = _getDigestPath(config, collectFile.absolutePath) 
115           tarfilePath = _getTarfilePath(config, collectFile.absolutePath, archiveMode) 
116           if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart): 
117              logger.debug("File meets criteria to be backed up today.") 
118              _collectFile(config, collectFile.absolutePath, tarfilePath, 
119                           collectMode, archiveMode, resetDigest, digestPath) 
120           else: 
121              logger.debug("File will not be backed up, per collect mode.") 
122           logger.info("Completed collecting file [%s]", collectFile.absolutePath) 
123     if config.collect.collectDirs is not None: 
124        for collectDir in config.collect.collectDirs: 
125           logger.debug("Working with collect directory [%s]", collectDir.absolutePath) 
126           collectMode = _getCollectMode(config, collectDir) 
127           archiveMode = _getArchiveMode(config, collectDir) 
128           ignoreFile = _getIgnoreFile(config, collectDir) 
129           linkDepth = _getLinkDepth(collectDir) 
130           dereference = _getDereference(collectDir) 
131           recursionLevel = _getRecursionLevel(collectDir) 
132           (excludePaths, excludePatterns) = _getExclusions(config, collectDir) 
133           if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart): 
134              logger.debug("Directory meets criteria to be backed up today.") 
135              _collectDirectory(config, collectDir.absolutePath, 
136                                collectMode, archiveMode, ignoreFile, linkDepth, dereference, 
137                                resetDigest, excludePaths, excludePatterns, recursionLevel) 
138           else: 
139              logger.debug("Directory will not be backed up, per collect mode.") 
140           logger.info("Completed collecting directory [%s]", collectDir.absolutePath) 
141     writeIndicatorFile(config.collect.targetDir, COLLECT_INDICATOR, 
142                        config.options.backupUser, config.options.backupGroup) 
143     logger.info("Executed the 'collect' action successfully.") 
 144   
145   
146   
147   
148   
149   
150   
151   
152   
153   
154 -def _collectFile(config, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath): 
 155     """ 
156     Collects a configured collect file. 
157   
158     The indicated collect file is collected into the indicated tarfile. 
159     For files that are collected incrementally, we'll use the indicated 
160     digest path and pay attention to the reset digest flag (basically, the reset 
161     digest flag ignores any existing digest, but a new digest is always 
162     rewritten). 
163   
164     The caller must decide what the collect and archive modes are, since they 
165     can be on both the collect configuration and the collect file itself. 
166   
167     @param config: Config object. 
168     @param absolutePath: Absolute path of file to collect. 
169     @param tarfilePath: Path to tarfile that should be created. 
170     @param collectMode: Collect mode to use. 
171     @param archiveMode: Archive mode to use. 
172     @param resetDigest: Reset digest flag. 
173     @param digestPath: Path to digest file on disk, if needed. 
174     """ 
175     backupList = BackupFileList() 
176     backupList.addFile(absolutePath) 
177     _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath) 
 178   
179   
180   
181   
182   
183   
184 -def _collectDirectory(config, absolutePath, collectMode, archiveMode, 
185                        ignoreFile, linkDepth, dereference, resetDigest, 
186                        excludePaths, excludePatterns, recursionLevel): 
 187     """ 
188     Collects a configured collect directory. 
189   
190     The indicated collect directory is collected into the indicated tarfile. 
191     For directories that are collected incrementally, we'll use the indicated 
192     digest path and pay attention to the reset digest flag (basically, the reset 
193     digest flag ignores any existing digest, but a new digest is always 
194     rewritten). 
195   
196     The caller must decide what the collect and archive modes are, since they 
197     can be on both the collect configuration and the collect directory itself. 
198   
199     @param config: Config object. 
200     @param absolutePath: Absolute path of directory to collect. 
201     @param collectMode: Collect mode to use. 
202     @param archiveMode: Archive mode to use. 
203     @param ignoreFile: Ignore file to use. 
204     @param linkDepth: Link depth value to use. 
205     @param dereference: Dereference flag to use. 
206     @param resetDigest: Reset digest flag. 
207     @param excludePaths: List of absolute paths to exclude. 
208     @param excludePatterns: List of patterns to exclude. 
209     @param recursionLevel: Recursion level (zero for no recursion) 
210     """ 
211     if recursionLevel == 0: 
212         
213        logger.info("Collecting directory [%s]", absolutePath) 
214        tarfilePath = _getTarfilePath(config, absolutePath, archiveMode) 
215        digestPath = _getDigestPath(config, absolutePath) 
216   
217        backupList = BackupFileList() 
218        backupList.ignoreFile = ignoreFile 
219        backupList.excludePaths = excludePaths 
220        backupList.excludePatterns = excludePatterns 
221        backupList.addDirContents(absolutePath, linkDepth=linkDepth, dereference=dereference) 
222   
223        _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath) 
224     else: 
225         
226        subdirs = FilesystemList() 
227        subdirs.excludeFiles = True 
228        subdirs.excludeLinks = True 
229        subdirs.excludePaths = excludePaths 
230        subdirs.excludePatterns = excludePatterns 
231        subdirs.addDirContents(path=absolutePath, recursive=False, addSelf=False) 
232   
233         
234        for subdir in subdirs: 
235           _collectDirectory(config, subdir, collectMode, archiveMode, 
236                             ignoreFile, linkDepth, dereference, resetDigest, 
237                             excludePaths, excludePatterns, recursionLevel-1) 
238           excludePaths.append(subdir)  
239   
240         
241        _collectDirectory(config, absolutePath, collectMode, archiveMode, 
242                          ignoreFile, linkDepth, dereference, resetDigest, 
243                          excludePaths, excludePatterns, 0) 
 244   
245   
246   
247   
248   
249   
250 -def _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath): 
 251     """ 
252     Execute the backup process for the indicated backup list. 
253   
254     This function exists mainly to consolidate functionality between the 
255     L{_collectFile} and L{_collectDirectory} functions.  Those functions build 
256     the backup list; this function causes the backup to execute properly and 
257     also manages usage of the digest file on disk as explained in their 
258     comments. 
259   
260     For collect files, the digest file will always just contain the single file 
261     that is being backed up.  This might little wasteful in terms of the number 
262     of files that we keep around, but it's consistent and easy to understand. 
263   
264     @param config: Config object. 
265     @param backupList: List to execute backup for 
266     @param absolutePath: Absolute path of directory or file to collect. 
267     @param tarfilePath: Path to tarfile that should be created. 
268     @param collectMode: Collect mode to use. 
269     @param archiveMode: Archive mode to use. 
270     @param resetDigest: Reset digest flag. 
271     @param digestPath: Path to digest file on disk, if needed. 
272     """ 
273     if collectMode != 'incr': 
274        logger.debug("Collect mode is [%s]; no digest will be used.", collectMode) 
275        if len(backupList) == 1 and backupList[0] == absolutePath:   
276           logger.info("Backing up file [%s] (%s).", absolutePath, displayBytes(backupList.totalSize())) 
277        else: 
278           logger.info("Backing up %d files in [%s] (%s).", len(backupList), absolutePath, displayBytes(backupList.totalSize())) 
279        if len(backupList) > 0: 
280           backupList.generateTarfile(tarfilePath, archiveMode, True) 
281           changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup) 
282     else: 
283        if resetDigest: 
284           logger.debug("Based on resetDigest flag, digest will be cleared.") 
285           oldDigest = {} 
286        else: 
287           logger.debug("Based on resetDigest flag, digest will loaded from disk.") 
288           oldDigest = _loadDigest(digestPath) 
289        (removed, newDigest) = backupList.removeUnchanged(oldDigest, captureDigest=True) 
290        logger.debug("Removed %d unchanged files based on digest values.", removed) 
291        if len(backupList) == 1 and backupList[0] == absolutePath:   
292           logger.info("Backing up file [%s] (%s).", absolutePath, displayBytes(backupList.totalSize())) 
293        else: 
294           logger.info("Backing up %d files in [%s] (%s).", len(backupList), absolutePath, displayBytes(backupList.totalSize())) 
295        if len(backupList) > 0: 
296           backupList.generateTarfile(tarfilePath, archiveMode, True) 
297           changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup) 
298        _writeDigest(config, newDigest, digestPath) 
 299   
300   
301   
302   
303   
304   
306     """ 
307     Loads the indicated digest path from disk into a dictionary. 
308   
309     If we can't load the digest successfully (either because it doesn't exist or 
310     for some other reason), then an empty dictionary will be returned - but the 
311     condition will be logged. 
312   
313     @param digestPath: Path to the digest file on disk. 
314   
315     @return: Dictionary representing contents of digest path. 
316     """ 
317     if not os.path.isfile(digestPath): 
318        digest = {} 
319        logger.debug("Digest [%s] does not exist on disk.", digestPath) 
320     else: 
321        try: 
322           digest = pickle.load(open(digestPath, "r")) 
323           logger.debug("Loaded digest [%s] from disk: %d entries.", digestPath, len(digest)) 
324        except: 
325           digest = {} 
326           logger.error("Failed loading digest [%s] from disk.", digestPath) 
327     return digest 
 328   
329   
330   
331   
332   
333   
335     """ 
336     Writes the digest dictionary to the indicated digest path on disk. 
337   
338     If we can't write the digest successfully for any reason, we'll log the 
339     condition but won't throw an exception. 
340   
341     @param config: Config object. 
342     @param digest: Digest dictionary to write to disk. 
343     @param digestPath: Path to the digest file on disk. 
344     """ 
345     try: 
346        pickle.dump(digest, open(digestPath, "w")) 
347        changeOwnership(digestPath, config.options.backupUser, config.options.backupGroup) 
348        logger.debug("Wrote new digest [%s] to disk: %d entries.", digestPath, len(digest)) 
349     except: 
350        logger.error("Failed to write digest [%s] to disk.", digestPath) 
 351   
352   
353   
354   
355   
356   
357   
358   
359   
360   
362     """ 
363     Gets the collect mode that should be used for a collect directory or file. 
364     If possible, use the one on the file or directory, otherwise take from collect section. 
365     @param config: Config object. 
366     @param item: C{CollectFile} or C{CollectDir} object 
367     @return: Collect mode to use. 
368     """ 
369     if item.collectMode is None: 
370        collectMode = config.collect.collectMode 
371     else: 
372        collectMode = item.collectMode 
373     logger.debug("Collect mode is [%s]", collectMode) 
374     return collectMode 
 375   
376   
377   
378   
379   
380   
382     """ 
383     Gets the archive mode that should be used for a collect directory or file. 
384     If possible, use the one on the file or directory, otherwise take from collect section. 
385     @param config: Config object. 
386     @param item: C{CollectFile} or C{CollectDir} object 
387     @return: Archive mode to use. 
388     """ 
389     if item.archiveMode is None: 
390        archiveMode = config.collect.archiveMode 
391     else: 
392        archiveMode = item.archiveMode 
393     logger.debug("Archive mode is [%s]", archiveMode) 
394     return archiveMode 
 395   
396   
397   
398   
399   
400   
402     """ 
403     Gets the ignore file that should be used for a collect directory or file. 
404     If possible, use the one on the file or directory, otherwise take from collect section. 
405     @param config: Config object. 
406     @param item: C{CollectFile} or C{CollectDir} object 
407     @return: Ignore file to use. 
408     """ 
409     if item.ignoreFile is None: 
410        ignoreFile = config.collect.ignoreFile 
411     else: 
412        ignoreFile = item.ignoreFile 
413     logger.debug("Ignore file is [%s]", ignoreFile) 
414     return ignoreFile 
 415   
416   
417   
418   
419   
420   
422     """ 
423     Gets the link depth that should be used for a collect directory. 
424     If possible, use the one on the directory, otherwise set a value of 0 (zero). 
425     @param item: C{CollectDir} object 
426     @return: Link depth to use. 
427     """ 
428     if item.linkDepth is None: 
429        linkDepth = 0 
430     else: 
431        linkDepth = item.linkDepth 
432     logger.debug("Link depth is [%d]", linkDepth) 
433     return linkDepth 
 434   
435   
436   
437   
438   
439   
441     """ 
442     Gets the dereference flag that should be used for a collect directory. 
443     If possible, use the one on the directory, otherwise set a value of False. 
444     @param item: C{CollectDir} object 
445     @return: Dereference flag to use. 
446     """ 
447     if item.dereference is None: 
448        dereference = False 
449     else: 
450        dereference = item.dereference 
451     logger.debug("Dereference flag is [%s]", dereference) 
452     return dereference 
 453   
454   
455   
456   
457   
458   
472   
473   
474   
475   
476   
477   
479     """ 
480     Gets the digest path associated with a collect directory or file. 
481     @param config: Config object. 
482     @param absolutePath: Absolute path to generate digest for 
483     @return: Absolute path to the digest associated with the collect directory or file. 
484     """ 
485     normalized = buildNormalizedPath(absolutePath) 
486     filename = "%s.%s" % (normalized, DIGEST_EXTENSION) 
487     digestPath = os.path.join(config.options.workingDir, filename) 
488     logger.debug("Digest path is [%s]", digestPath) 
489     return digestPath 
 490   
491   
492   
493   
494   
495   
497     """ 
498     Gets the tarfile path (including correct extension) associated with a collect directory. 
499     @param config: Config object. 
500     @param absolutePath: Absolute path to generate tarfile for 
501     @param archiveMode: Archive mode to use for this tarfile. 
502     @return: Absolute path to the tarfile associated with the collect directory. 
503     """ 
504     if archiveMode == 'tar': 
505        extension = "tar" 
506     elif archiveMode == 'targz': 
507        extension = "tar.gz" 
508     elif archiveMode == 'tarbz2': 
509        extension = "tar.bz2" 
510     normalized = buildNormalizedPath(absolutePath) 
511     filename = "%s.%s" % (normalized, extension) 
512     tarfilePath = os.path.join(config.collect.targetDir, filename) 
513     logger.debug("Tarfile path is [%s]", tarfilePath) 
514     return tarfilePath 
 515   
516   
517   
518   
519   
520   
555