1   
   2   
   3   
   4   
   5   
   6   
   7   
   8   
   9   
  10   
  11   
  12   
  13   
  14   
  15   
  16   
  17   
  18   
  19   
  20   
  21   
  22   
  23   
  24   
  25   
  26   
  27   
  28   
  29   
  30   
  31   
  32   
  33   
  34   
  35   
  36   
  37   
  38  """ 
  39  Provides filesystem-related objects. 
  40  @sort: FilesystemList, BackupFileList, PurgeItemList 
  41  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
  42  """ 
  43   
  44   
  45   
  46   
  47   
  48   
  49   
  50  import os 
  51  import re 
  52  import math 
  53  import logging 
  54  import tarfile 
  55   
  56   
  57  from CedarBackup2.knapsack import firstFit, bestFit, worstFit, alternateFit 
  58  from CedarBackup2.util import AbsolutePathList, UnorderedList, RegexList 
  59  from CedarBackup2.util import removeKeys, displayBytes, calculateFileAge, encodePath, dereferenceLink 
  60   
  61   
  62   
  63   
  64   
  65   
  66  logger = logging.getLogger("CedarBackup2.log.filesystem") 
  74   
  75      
  76      
  77      
  78   
  79     """ 
  80     Represents a list of filesystem items. 
  81   
  82     This is a generic class that represents a list of filesystem items.  Callers 
  83     can add individual files or directories to the list, or can recursively add 
  84     the contents of a directory.  The class also allows for up-front exclusions 
  85     in several forms (all files, all directories, all items matching a pattern, 
  86     all items whose basename matches a pattern, or all directories containing a 
  87     specific "ignore file").  Symbolic links are typically backed up 
  88     non-recursively, i.e. the link to a directory is backed up, but not the 
  89     contents of that link (we don't want to deal with recursive loops, etc.). 
  90   
  91     The custom methods such as L{addFile} will only add items if they exist on 
  92     the filesystem and do not match any exclusions that are already in place. 
  93     However, since a FilesystemList is a subclass of Python's standard list 
  94     class, callers can also add items to the list in the usual way, using 
  95     methods like C{append()} or C{insert()}.  No validations apply to items 
  96     added to the list in this way; however, many list-manipulation methods deal 
  97     "gracefully" with items that don't exist in the filesystem, often by 
  98     ignoring them. 
  99   
 100     Once a list has been created, callers can remove individual items from the 
 101     list using standard methods like C{pop()} or C{remove()} or they can use 
 102     custom methods to remove specific types of entries or entries which match a 
 103     particular pattern. 
 104   
 105     @note: Regular expression patterns that apply to paths are assumed to be 
 106     bounded at front and back by the beginning and end of the string, i.e. they 
 107     are treated as if they begin with C{^} and end with C{$}.  This is true 
 108     whether we are matching a complete path or a basename. 
 109   
 110     @note: Some platforms, like Windows, do not support soft links.  On those 
 111     platforms, the ignore-soft-links flag can be set, but it won't do any good 
 112     because the operating system never reports a file as a soft link. 
 113   
 114     @sort: __init__, addFile, addDir, addDirContents, removeFiles, removeDirs, 
 115            removeLinks, removeMatch, removeInvalid, normalize, 
 116            excludeFiles, excludeDirs, excludeLinks, excludePaths, 
 117            excludePatterns, excludeBasenamePatterns, ignoreFile 
 118     """ 
 119   
 120   
 121      
 122      
 123      
 124   
 142   
 143   
 144      
 145      
 146      
 147   
 149        """ 
 150        Property target used to set the exclude files flag. 
 151        No validations, but we normalize the value to C{True} or C{False}. 
 152        """ 
 153        if value: 
 154           self._excludeFiles = True 
 155        else: 
 156           self._excludeFiles = False 
  157   
 159        """ 
 160        Property target used to get the exclude files flag. 
 161        """ 
 162        return self._excludeFiles 
  163   
 165        """ 
 166        Property target used to set the exclude directories flag. 
 167        No validations, but we normalize the value to C{True} or C{False}. 
 168        """ 
 169        if value: 
 170           self._excludeDirs = True 
 171        else: 
 172           self._excludeDirs = False 
  173   
 175        """ 
 176        Property target used to get the exclude directories flag. 
 177        """ 
 178        return self._excludeDirs 
  179   
 181        """ 
 182        Property target used to set the exclude soft links flag. 
 183        No validations, but we normalize the value to C{True} or C{False}. 
 184        """ 
 185        if value: 
 186           self._excludeLinks = True 
 187        else: 
 188           self._excludeLinks = False 
  189   
 191        """ 
 192        Property target used to get the exclude soft links flag. 
 193        """ 
 194        return self._excludeLinks 
  195   
 197        """ 
 198        Property target used to set the exclude paths list. 
 199        A C{None} value is converted to an empty list. 
 200        Elements do not have to exist on disk at the time of assignment. 
 201        @raise ValueError: If any list element is not an absolute path. 
 202        """ 
 203        self._excludePaths = AbsolutePathList() 
 204        if value is not None: 
 205           self._excludePaths.extend(value) 
  206   
 208        """ 
 209        Property target used to get the absolute exclude paths list. 
 210        """ 
 211        return self._excludePaths 
  212   
 214        """ 
 215        Property target used to set the exclude patterns list. 
 216        A C{None} value is converted to an empty list. 
 217        """ 
 218        self._excludePatterns = RegexList() 
 219        if value is not None: 
 220           self._excludePatterns.extend(value) 
  221   
 223        """ 
 224        Property target used to get the exclude patterns list. 
 225        """ 
 226        return self._excludePatterns 
  227   
 229        """ 
 230        Property target used to set the exclude basename patterns list. 
 231        A C{None} value is converted to an empty list. 
 232        """ 
 233        self._excludeBasenamePatterns = RegexList() 
 234        if value is not None: 
 235           self._excludeBasenamePatterns.extend(value) 
  236   
 238        """ 
 239        Property target used to get the exclude basename patterns list. 
 240        """ 
 241        return self._excludeBasenamePatterns 
  242   
 244        """ 
 245        Property target used to set the ignore file. 
 246        The value must be a non-empty string if it is not C{None}. 
 247        @raise ValueError: If the value is an empty string. 
 248        """ 
 249        if value is not None: 
 250           if len(value) < 1: 
 251              raise ValueError("The ignore file must be a non-empty string.") 
 252        self._ignoreFile = value 
  253   
 255        """ 
 256        Property target used to get the ignore file. 
 257        """ 
 258        return self._ignoreFile 
  259   
 260     excludeFiles = property(_getExcludeFiles, _setExcludeFiles, None, "Boolean indicating whether files should be excluded.") 
 261     excludeDirs = property(_getExcludeDirs, _setExcludeDirs, None, "Boolean indicating whether directories should be excluded.") 
 262     excludeLinks = property(_getExcludeLinks, _setExcludeLinks, None, "Boolean indicating whether soft links should be excluded.") 
 263     excludePaths = property(_getExcludePaths, _setExcludePaths, None, "List of absolute paths to be excluded.") 
 264     excludePatterns = property(_getExcludePatterns, _setExcludePatterns, None, 
 265                                "List of regular expression patterns (matching complete path) to be excluded.") 
 266     excludeBasenamePatterns = property(_getExcludeBasenamePatterns, _setExcludeBasenamePatterns, 
 267                                        None, "List of regular expression patterns (matching basename) to be excluded.") 
 268     ignoreFile = property(_getIgnoreFile, _setIgnoreFile, None, "Name of file which will cause directory contents to be ignored.") 
 269   
 270   
 271      
 272      
 273      
 274   
 276        """ 
 277        Adds a file to the list. 
 278   
 279        The path must exist and must be a file or a link to an existing file.  It 
 280        will be added to the list subject to any exclusions that are in place. 
 281   
 282        @param path: File path to be added to the list 
 283        @type path: String representing a path on disk 
 284   
 285        @return: Number of items added to the list. 
 286   
 287        @raise ValueError: If path is not a file or does not exist. 
 288        @raise ValueError: If the path could not be encoded properly. 
 289        """ 
 290        path = encodePath(path) 
 291        if not os.path.exists(path) or not os.path.isfile(path): 
 292           logger.debug("Path [%s] is not a file or does not exist on disk.", path) 
 293           raise ValueError("Path is not a file or does not exist on disk.") 
 294        if self.excludeLinks and os.path.islink(path): 
 295           logger.debug("Path [%s] is excluded based on excludeLinks.", path) 
 296           return 0 
 297        if self.excludeFiles: 
 298           logger.debug("Path [%s] is excluded based on excludeFiles.", path) 
 299           return 0 
 300        if path in self.excludePaths: 
 301           logger.debug("Path [%s] is excluded based on excludePaths.", path) 
 302           return 0 
 303        for pattern in self.excludePatterns: 
 304           pattern = encodePath(pattern)   
 305           if re.compile(r"^%s$" % pattern).match(path):  
 306              logger.debug("Path [%s] is excluded based on pattern [%s].", path, pattern) 
 307              return 0 
 308        for pattern in self.excludeBasenamePatterns:  
 309           pattern = encodePath(pattern)   
 310           if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 
 311              logger.debug("Path [%s] is excluded based on basename pattern [%s].", path, pattern) 
 312              return 0 
 313        self.append(path) 
 314        logger.debug("Added file to list: [%s]", path) 
 315        return 1 
  316   
 318        """ 
 319        Adds a directory to the list. 
 320   
 321        The path must exist and must be a directory or a link to an existing 
 322        directory.  It will be added to the list subject to any exclusions that 
 323        are in place.  The L{ignoreFile} does not apply to this method, only to 
 324        L{addDirContents}. 
 325   
 326        @param path: Directory path to be added to the list 
 327        @type path: String representing a path on disk 
 328   
 329        @return: Number of items added to the list. 
 330   
 331        @raise ValueError: If path is not a directory or does not exist. 
 332        @raise ValueError: If the path could not be encoded properly. 
 333        """ 
 334        path = encodePath(path) 
 335        path = normalizeDir(path) 
 336        if not os.path.exists(path) or not os.path.isdir(path): 
 337           logger.debug("Path [%s] is not a directory or does not exist on disk.", path) 
 338           raise ValueError("Path is not a directory or does not exist on disk.") 
 339        if self.excludeLinks and os.path.islink(path): 
 340           logger.debug("Path [%s] is excluded based on excludeLinks.", path) 
 341           return 0 
 342        if self.excludeDirs: 
 343           logger.debug("Path [%s] is excluded based on excludeDirs.", path) 
 344           return 0 
 345        if path in self.excludePaths: 
 346           logger.debug("Path [%s] is excluded based on excludePaths.", path) 
 347           return 0 
 348        for pattern in self.excludePatterns:  
 349           pattern = encodePath(pattern)   
 350           if re.compile(r"^%s$" % pattern).match(path): 
 351              logger.debug("Path [%s] is excluded based on pattern [%s].", path, pattern) 
 352              return 0 
 353        for pattern in self.excludeBasenamePatterns:  
 354           pattern = encodePath(pattern)   
 355           if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 
 356              logger.debug("Path [%s] is excluded based on basename pattern [%s].", path, pattern) 
 357              return 0 
 358        self.append(path) 
 359        logger.debug("Added directory to list: [%s]", path) 
 360        return 1 
  361   
 362 -   def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0, dereference=False): 
  363        """ 
 364        Adds the contents of a directory to the list. 
 365   
 366        The path must exist and must be a directory or a link to a directory. 
 367        The contents of the directory (as well as the directory path itself) will 
 368        be recursively added to the list, subject to any exclusions that are in 
 369        place.  If you only want the directory and its immediate contents to be 
 370        added, then pass in C{recursive=False}. 
 371   
 372        @note: If a directory's absolute path matches an exclude pattern or path, 
 373        or if the directory contains the configured ignore file, then the 
 374        directory and all of its contents will be recursively excluded from the 
 375        list. 
 376   
 377        @note: If the passed-in directory happens to be a soft link, it will be 
 378        recursed.  However, the linkDepth parameter controls whether any soft 
 379        links I{within} the directory will be recursed.  The link depth is 
 380        maximum depth of the tree at which soft links should be followed.  So, a 
 381        depth of 0 does not follow any soft links, a depth of 1 follows only 
 382        links within the passed-in directory, a depth of 2 follows the links at 
 383        the next level down, etc. 
 384   
 385        @note: Any invalid soft links (i.e.  soft links that point to 
 386        non-existent items) will be silently ignored. 
 387   
 388        @note: The L{excludeDirs} flag only controls whether any given directory 
 389        path itself is added to the list once it has been discovered.  It does 
 390        I{not} modify any behavior related to directory recursion. 
 391   
 392        @note: If you call this method I{on a link to a directory} that link will 
 393        never be dereferenced (it may, however, be followed). 
 394   
 395        @param path: Directory path whose contents should be added to the list 
 396        @type path: String representing a path on disk 
 397   
 398        @param recursive: Indicates whether directory contents should be added recursively. 
 399        @type recursive: Boolean value 
 400   
 401        @param addSelf: Indicates whether the directory itself should be added to the list. 
 402        @type addSelf: Boolean value 
 403   
 404        @param linkDepth: Maximum depth of the tree at which soft links should be followed 
 405        @type linkDepth: Integer value, where zero means not to follow any soft links 
 406   
 407        @param dereference: Indicates whether soft links, if followed, should be dereferenced 
 408        @type dereference: Boolean value 
 409   
 410        @return: Number of items recursively added to the list 
 411   
 412        @raise ValueError: If path is not a directory or does not exist. 
 413        @raise ValueError: If the path could not be encoded properly. 
 414        """ 
 415        path = encodePath(path) 
 416        path = normalizeDir(path) 
 417        return self._addDirContentsInternal(path, addSelf, recursive, linkDepth, dereference) 
  418   
 419 -   def _addDirContentsInternal(self, path, includePath=True, recursive=True, linkDepth=0, dereference=False): 
  420        """ 
 421        Internal implementation of C{addDirContents}. 
 422   
 423        This internal implementation exists due to some refactoring.  Basically, 
 424        some subclasses have a need to add the contents of a directory, but not 
 425        the directory itself.  This is different than the standard C{FilesystemList} 
 426        behavior and actually ends up making a special case out of the first 
 427        call in the recursive chain.  Since I don't want to expose the modified 
 428        interface, C{addDirContents} ends up being wholly implemented in terms 
 429        of this method. 
 430   
 431        The linkDepth parameter controls whether soft links are followed when we 
 432        are adding the contents recursively.  Any recursive calls reduce the 
 433        value by one.  If the value zero or less, then soft links will just be 
 434        added as directories, but will not be followed.  This means that links 
 435        are followed to a I{constant depth} starting from the top-most directory. 
 436   
 437        There is one difference between soft links and directories: soft links 
 438        that are added recursively are not placed into the list explicitly.  This 
 439        is because if we do add the links recursively, the resulting tar file 
 440        gets a little confused (it has a link and a directory with the same 
 441        name). 
 442   
 443        @note: If you call this method I{on a link to a directory} that link will 
 444        never be dereferenced (it may, however, be followed). 
 445   
 446        @param path: Directory path whose contents should be added to the list. 
 447        @param includePath: Indicates whether to include the path as well as contents. 
 448        @param recursive: Indicates whether directory contents should be added recursively. 
 449        @param linkDepth: Depth of soft links that should be followed 
 450        @param dereference: Indicates whether soft links, if followed, should be dereferenced 
 451   
 452        @return: Number of items recursively added to the list 
 453   
 454        @raise ValueError: If path is not a directory or does not exist. 
 455        """ 
 456        added = 0 
 457        if not os.path.exists(path) or not os.path.isdir(path): 
 458           logger.debug("Path [%s] is not a directory or does not exist on disk.", path) 
 459           raise ValueError("Path is not a directory or does not exist on disk.") 
 460        if path in self.excludePaths: 
 461           logger.debug("Path [%s] is excluded based on excludePaths.", path) 
 462           return added 
 463        for pattern in self.excludePatterns:  
 464           pattern = encodePath(pattern)   
 465           if re.compile(r"^%s$" % pattern).match(path): 
 466              logger.debug("Path [%s] is excluded based on pattern [%s].", path, pattern) 
 467              return added 
 468        for pattern in self.excludeBasenamePatterns:  
 469           pattern = encodePath(pattern)   
 470           if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 
 471              logger.debug("Path [%s] is excluded based on basename pattern [%s].", path, pattern) 
 472              return added 
 473        if self.ignoreFile is not None and os.path.exists(os.path.join(path, self.ignoreFile)): 
 474           logger.debug("Path [%s] is excluded based on ignore file.", path) 
 475           return added 
 476        if includePath: 
 477           added += self.addDir(path)     
 478        for entry in os.listdir(path): 
 479           entrypath = os.path.join(path, entry) 
 480           if os.path.isfile(entrypath): 
 481              if linkDepth > 0 and dereference: 
 482                 derefpath = dereferenceLink(entrypath) 
 483                 if derefpath != entrypath: 
 484                    added += self.addFile(derefpath) 
 485              added += self.addFile(entrypath) 
 486           elif os.path.isdir(entrypath): 
 487              if os.path.islink(entrypath): 
 488                 if recursive: 
 489                    if linkDepth > 0: 
 490                       newDepth = linkDepth - 1 
 491                       if dereference: 
 492                          derefpath = dereferenceLink(entrypath) 
 493                          if derefpath != entrypath: 
 494                             added += self._addDirContentsInternal(derefpath, True, recursive, newDepth, dereference) 
 495                          added += self.addDir(entrypath) 
 496                       else: 
 497                          added += self._addDirContentsInternal(entrypath, False, recursive, newDepth, dereference) 
 498                    else: 
 499                       added += self.addDir(entrypath) 
 500                 else: 
 501                    added += self.addDir(entrypath) 
 502              else: 
 503                 if recursive: 
 504                    newDepth = linkDepth - 1 
 505                    added += self._addDirContentsInternal(entrypath, True, recursive, newDepth, dereference) 
 506                 else: 
 507                    added += self.addDir(entrypath) 
 508        return added 
  509   
 510   
 511      
 512      
 513      
 514   
 516        """ 
 517        Removes file entries from the list. 
 518   
 519        If C{pattern} is not passed in or is C{None}, then all file entries will 
 520        be removed from the list.  Otherwise, only those file entries matching 
 521        the pattern will be removed.  Any entry which does not exist on disk 
 522        will be ignored (use L{removeInvalid} to purge those entries). 
 523   
 524        This method might be fairly slow for large lists, since it must check the 
 525        type of each item in the list.  If you know ahead of time that you want 
 526        to exclude all files, then you will be better off setting L{excludeFiles} 
 527        to C{True} before adding items to the list. 
 528   
 529        @param pattern: Regular expression pattern representing entries to remove 
 530   
 531        @return: Number of entries removed 
 532        @raise ValueError: If the passed-in pattern is not a valid regular expression. 
 533        """ 
 534        removed = 0 
 535        if pattern is None: 
 536           for entry in self[:]: 
 537              if os.path.exists(entry) and os.path.isfile(entry): 
 538                 self.remove(entry) 
 539                 logger.debug("Removed path [%s] from list.", entry) 
 540                 removed += 1 
 541        else: 
 542           try: 
 543              pattern = encodePath(pattern)   
 544              compiled = re.compile(pattern) 
 545           except re.error: 
 546              raise ValueError("Pattern is not a valid regular expression.") 
 547           for entry in self[:]: 
 548              if os.path.exists(entry) and os.path.isfile(entry): 
 549                 if compiled.match(entry): 
 550                    self.remove(entry) 
 551                    logger.debug("Removed path [%s] from list.", entry) 
 552                    removed += 1 
 553        logger.debug("Removed a total of %d entries.", removed) 
 554        return removed 
  555   
 557        """ 
 558        Removes directory entries from the list. 
 559   
 560        If C{pattern} is not passed in or is C{None}, then all directory entries 
 561        will be removed from the list.  Otherwise, only those directory entries 
 562        matching the pattern will be removed.  Any entry which does not exist on 
 563        disk will be ignored (use L{removeInvalid} to purge those entries). 
 564   
 565        This method might be fairly slow for large lists, since it must check the 
 566        type of each item in the list.  If you know ahead of time that you want 
 567        to exclude all directories, then you will be better off setting 
 568        L{excludeDirs} to C{True} before adding items to the list (note that this 
 569        will not prevent you from recursively adding the I{contents} of 
 570        directories). 
 571   
 572        @param pattern: Regular expression pattern representing entries to remove 
 573   
 574        @return: Number of entries removed 
 575        @raise ValueError: If the passed-in pattern is not a valid regular expression. 
 576        """ 
 577        removed = 0 
 578        if pattern is None: 
 579           for entry in self[:]: 
 580              if os.path.exists(entry) and os.path.isdir(entry): 
 581                 self.remove(entry) 
 582                 logger.debug("Removed path [%s] from list.", entry) 
 583                 removed += 1 
 584        else: 
 585           try: 
 586              pattern = encodePath(pattern)   
 587              compiled = re.compile(pattern) 
 588           except re.error: 
 589              raise ValueError("Pattern is not a valid regular expression.") 
 590           for entry in self[:]: 
 591              if os.path.exists(entry) and os.path.isdir(entry): 
 592                 if compiled.match(entry): 
 593                    self.remove(entry) 
 594                    logger.debug("Removed path [%s] from list based on pattern [%s].", entry, pattern) 
 595                    removed += 1 
 596        logger.debug("Removed a total of %d entries.", removed) 
 597        return removed 
  598   
 600        """ 
 601        Removes soft link entries from the list. 
 602   
 603        If C{pattern} is not passed in or is C{None}, then all soft link entries 
 604        will be removed from the list.  Otherwise, only those soft link entries 
 605        matching the pattern will be removed.  Any entry which does not exist on 
 606        disk will be ignored (use L{removeInvalid} to purge those entries). 
 607   
 608        This method might be fairly slow for large lists, since it must check the 
 609        type of each item in the list.  If you know ahead of time that you want 
 610        to exclude all soft links, then you will be better off setting 
 611        L{excludeLinks} to C{True} before adding items to the list. 
 612   
 613        @param pattern: Regular expression pattern representing entries to remove 
 614   
 615        @return: Number of entries removed 
 616        @raise ValueError: If the passed-in pattern is not a valid regular expression. 
 617        """ 
 618        removed = 0 
 619        if pattern is None: 
 620           for entry in self[:]: 
 621              if os.path.exists(entry) and os.path.islink(entry): 
 622                 self.remove(entry) 
 623                 logger.debug("Removed path [%s] from list.", entry) 
 624                 removed += 1 
 625        else: 
 626           try: 
 627              pattern = encodePath(pattern)   
 628              compiled = re.compile(pattern) 
 629           except re.error: 
 630              raise ValueError("Pattern is not a valid regular expression.") 
 631           for entry in self[:]: 
 632              if os.path.exists(entry) and os.path.islink(entry): 
 633                 if compiled.match(entry): 
 634                    self.remove(entry) 
 635                    logger.debug("Removed path [%s] from list based on pattern [%s].", entry, pattern) 
 636                    removed += 1 
 637        logger.debug("Removed a total of %d entries.", removed) 
 638        return removed 
  639   
 641        """ 
 642        Removes from the list all entries matching a pattern. 
 643   
 644        This method removes from the list all entries which match the passed in 
 645        C{pattern}.  Since there is no need to check the type of each entry, it 
 646        is faster to call this method than to call the L{removeFiles}, 
 647        L{removeDirs} or L{removeLinks} methods individually.  If you know which 
 648        patterns you will want to remove ahead of time, you may be better off 
 649        setting L{excludePatterns} or L{excludeBasenamePatterns} before adding 
 650        items to the list. 
 651   
 652        @note: Unlike when using the exclude lists, the pattern here is I{not} 
 653        bounded at the front and the back of the string.  You can use any pattern 
 654        you want. 
 655   
 656        @param pattern: Regular expression pattern representing entries to remove 
 657   
 658        @return: Number of entries removed. 
 659        @raise ValueError: If the passed-in pattern is not a valid regular expression. 
 660        """ 
 661        try: 
 662           pattern = encodePath(pattern)   
 663           compiled = re.compile(pattern) 
 664        except re.error: 
 665           raise ValueError("Pattern is not a valid regular expression.") 
 666        removed = 0 
 667        for entry in self[:]: 
 668           if compiled.match(entry): 
 669              self.remove(entry) 
 670              logger.debug("Removed path [%s] from list based on pattern [%s].", entry, pattern) 
 671              removed += 1 
 672        logger.debug("Removed a total of %d entries.", removed) 
 673        return removed 
  674   
 676        """ 
 677        Removes from the list all entries that do not exist on disk. 
 678   
 679        This method removes from the list all entries which do not currently 
 680        exist on disk in some form.  No attention is paid to whether the entries 
 681        are files or directories. 
 682   
 683        @return: Number of entries removed. 
 684        """ 
 685        removed = 0 
 686        for entry in self[:]: 
 687           if not os.path.exists(entry): 
 688              self.remove(entry) 
 689              logger.debug("Removed path [%s] from list.", entry) 
 690              removed += 1 
 691        logger.debug("Removed a total of %d entries.", removed) 
 692        return removed 
  693   
 694   
 695      
 696      
 697      
 698   
 700        """Normalizes the list, ensuring that each entry is unique.""" 
 701        orig = len(self) 
 702        self.sort() 
 703        dups = filter(lambda x, self=self: self[x] == self[x+1], range(0, len(self) - 1))  
 704        items = map(lambda x, self=self: self[x], dups)                                    
 705        map(self.remove, items) 
 706        new = len(self) 
 707        logger.debug("Completed normalizing list; removed %d items (%d originally, %d now).", new-orig, orig, new) 
  708   
 710        """ 
 711        Verifies that all entries in the list exist on disk. 
 712        @return: C{True} if all entries exist, C{False} otherwise. 
 713        """ 
 714        for entry in self: 
 715           if not os.path.exists(entry): 
 716              logger.debug("Path [%s] is invalid; list is not valid.", entry) 
 717              return False 
 718        logger.debug("All entries in list are valid.") 
 719        return True 
   720   
 721   
 722   
 723   
 724   
 725   
 726 -class SpanItem(object):  
  727     """ 
 728     Item returned by L{BackupFileList.generateSpan}. 
 729     """ 
 730 -   def __init__(self, fileList, size, capacity, utilization): 
  731        """ 
 732        Create object. 
 733        @param fileList: List of files 
 734        @param size: Size (in bytes) of files 
 735        @param utilization: Utilization, as a percentage (0-100) 
 736        """ 
 737        self.fileList = fileList 
 738        self.size = size 
 739        self.capacity = capacity 
 740        self.utilization = utilization 
  741   
 748   
 749      
 750      
 751      
 752   
 753     """ 
 754     List of files to be backed up. 
 755   
 756     A BackupFileList is a L{FilesystemList} containing a list of files to be 
 757     backed up.  It only contains files, not directories (soft links are treated 
 758     like files).  On top of the generic functionality provided by 
 759     L{FilesystemList}, this class adds functionality to keep a hash (checksum) 
 760     for each file in the list, and it also provides a method to calculate the 
 761     total size of the files in the list and a way to export the list into tar 
 762     form. 
 763   
 764     @sort: __init__, addDir, totalSize, generateSizeMap, generateDigestMap, 
 765            generateFitted, generateTarfile, removeUnchanged 
 766     """ 
 767   
 768      
 769      
 770      
 771   
 775   
 776   
 777      
 778      
 779      
 780   
 782        """ 
 783        Adds a directory to the list. 
 784   
 785        Note that this class does not allow directories to be added by themselves 
 786        (a backup list contains only files).  However, since links to directories 
 787        are technically files, we allow them to be added. 
 788   
 789        This method is implemented in terms of the superclass method, with one 
 790        additional validation: the superclass method is only called if the 
 791        passed-in path is both a directory and a link.  All of the superclass's 
 792        existing validations and restrictions apply. 
 793   
 794        @param path: Directory path to be added to the list 
 795        @type path: String representing a path on disk 
 796   
 797        @return: Number of items added to the list. 
 798   
 799        @raise ValueError: If path is not a directory or does not exist. 
 800        @raise ValueError: If the path could not be encoded properly. 
 801        """ 
 802        path = encodePath(path) 
 803        path = normalizeDir(path) 
 804        if os.path.isdir(path) and not os.path.islink(path): 
 805           return 0 
 806        else: 
 807           return FilesystemList.addDir(self, path) 
  808   
 809   
 810      
 811      
 812      
 813   
 815        """ 
 816        Returns the total size among all files in the list. 
 817        Only files are counted. 
 818        Soft links that point at files are ignored. 
 819        Entries which do not exist on disk are ignored. 
 820        @return: Total size, in bytes 
 821        """ 
 822        total = 0.0 
 823        for entry in self: 
 824           if os.path.isfile(entry) and not os.path.islink(entry): 
 825              total += float(os.stat(entry).st_size) 
 826        return total 
  827   
 829        """ 
 830        Generates a mapping from file to file size in bytes. 
 831        The mapping does include soft links, which are listed with size zero. 
 832        Entries which do not exist on disk are ignored. 
 833        @return: Dictionary mapping file to file size 
 834        """ 
 835        table = { } 
 836        for entry in self: 
 837           if os.path.islink(entry): 
 838              table[entry] = 0.0 
 839           elif os.path.isfile(entry): 
 840              table[entry] = float(os.stat(entry).st_size) 
 841        return table 
  842   
 844        """ 
 845        Generates a mapping from file to file digest. 
 846   
 847        Currently, the digest is an SHA hash, which should be pretty secure.  In 
 848        the future, this might be a different kind of hash, but we guarantee that 
 849        the type of the hash will not change unless the library major version 
 850        number is bumped. 
 851   
 852        Entries which do not exist on disk are ignored. 
 853   
 854        Soft links are ignored.  We would end up generating a digest for the file 
 855        that the soft link points at, which doesn't make any sense. 
 856   
 857        If C{stripPrefix} is passed in, then that prefix will be stripped from 
 858        each key when the map is generated.  This can be useful in generating two 
 859        "relative" digest maps to be compared to one another. 
 860   
 861        @param stripPrefix: Common prefix to be stripped from paths 
 862        @type stripPrefix: String with any contents 
 863   
 864        @return: Dictionary mapping file to digest value 
 865        @see: L{removeUnchanged} 
 866        """ 
 867        table = { } 
 868        if stripPrefix is not None: 
 869           for entry in self: 
 870              if os.path.isfile(entry) and not os.path.islink(entry): 
 871                 table[entry.replace(stripPrefix, "", 1)] = BackupFileList._generateDigest(entry) 
 872        else: 
 873           for entry in self: 
 874              if os.path.isfile(entry) and not os.path.islink(entry): 
 875                 table[entry] = BackupFileList._generateDigest(entry) 
 876        return table 
  877   
 878     @staticmethod 
 880        """ 
 881        Generates an SHA digest for a given file on disk. 
 882   
 883        The original code for this function used this simplistic implementation, 
 884        which requires reading the entire file into memory at once in order to 
 885        generate a digest value:: 
 886   
 887           sha.new(open(path).read()).hexdigest() 
 888   
 889        Not surprisingly, this isn't an optimal solution.  The U{Simple file 
 890        hashing <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/259109>} 
 891        Python Cookbook recipe describes how to incrementally generate a hash 
 892        value by reading in chunks of data rather than reading the file all at 
 893        once.  The recipe relies on the the C{update()} method of the various 
 894        Python hashing algorithms. 
 895   
 896        In my tests using a 110 MB file on CD, the original implementation 
 897        requires 111 seconds.  This implementation requires only 40-45 seconds, 
 898        which is a pretty substantial speed-up. 
 899   
 900        Experience shows that reading in around 4kB (4096 bytes) at a time yields 
 901        the best performance.  Smaller reads are quite a bit slower, and larger 
 902        reads don't make much of a difference.  The 4kB number makes me a little 
 903        suspicious, and I think it might be related to the size of a filesystem 
 904        read at the hardware level.  However, I've decided to just hardcode 4096 
 905        until I have evidence that shows it's worthwhile making the read size 
 906        configurable. 
 907   
 908        @param path: Path to generate digest for. 
 909   
 910        @return: ASCII-safe SHA digest for the file. 
 911        @raise OSError: If the file cannot be opened. 
 912        """ 
 913         
 914        try: 
 915           import hashlib 
 916           s = hashlib.sha1() 
 917        except ImportError: 
 918           import sha 
 919           s = sha.new() 
 920        f = open(path, mode="rb")   
 921        readBytes = 4096   
 922        while readBytes > 0: 
 923           readString = f.read(readBytes) 
 924           s.update(readString) 
 925           readBytes = len(readString) 
 926        f.close() 
 927        digest = s.hexdigest() 
 928        logger.debug("Generated digest [%s] for file [%s].", digest, path) 
 929        return digest 
  930   
 932        """ 
 933        Generates a list of items that fit in the indicated capacity. 
 934   
 935        Sometimes, callers would like to include every item in a list, but are 
 936        unable to because not all of the items fit in the space available.  This 
 937        method returns a copy of the list, containing only the items that fit in 
 938        a given capacity.  A copy is returned so that we don't lose any 
 939        information if for some reason the fitted list is unsatisfactory. 
 940   
 941        The fitting is done using the functions in the knapsack module.  By 
 942        default, the first fit algorithm is used, but you can also choose 
 943        from best fit, worst fit and alternate fit. 
 944   
 945        @param capacity: Maximum capacity among the files in the new list 
 946        @type capacity: Integer, in bytes 
 947   
 948        @param algorithm: Knapsack (fit) algorithm to use 
 949        @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit" 
 950   
 951        @return: Copy of list with total size no larger than indicated capacity 
 952        @raise ValueError: If the algorithm is invalid. 
 953        """ 
 954        table = self._getKnapsackTable() 
 955        function = BackupFileList._getKnapsackFunction(algorithm) 
 956        return function(table, capacity)[0] 
  957   
 959        """ 
 960        Splits the list of items into sub-lists that fit in a given capacity. 
 961   
 962        Sometimes, callers need split to a backup file list into a set of smaller 
 963        lists.  For instance, you could use this to "span" the files across a set 
 964        of discs. 
 965   
 966        The fitting is done using the functions in the knapsack module.  By 
 967        default, the first fit algorithm is used, but you can also choose 
 968        from best fit, worst fit and alternate fit. 
 969   
 970        @note: If any of your items are larger than the capacity, then it won't 
 971        be possible to find a solution.  In this case, a value error will be 
 972        raised. 
 973   
 974        @param capacity: Maximum capacity among the files in the new list 
 975        @type capacity: Integer, in bytes 
 976   
 977        @param algorithm: Knapsack (fit) algorithm to use 
 978        @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit" 
 979   
 980        @return: List of L{SpanItem} objects. 
 981   
 982        @raise ValueError: If the algorithm is invalid. 
 983        @raise ValueError: If it's not possible to fit some items 
 984        """ 
 985        spanItems = [] 
 986        function = BackupFileList._getKnapsackFunction(algorithm) 
 987        table = self._getKnapsackTable(capacity) 
 988        iteration = 0 
 989        while len(table) > 0: 
 990           iteration += 1 
 991           fit = function(table, capacity) 
 992           if len(fit[0]) == 0: 
 993               
 994              raise ValueError("After iteration %d, unable to add any new items." % iteration) 
 995           removeKeys(table, fit[0]) 
 996           utilization = (float(fit[1])/float(capacity))*100.0 
 997           item = SpanItem(fit[0], fit[1], capacity, utilization) 
 998           spanItems.append(item) 
 999        return spanItems 
 1000   
1002        """ 
1003        Converts the list into the form needed by the knapsack algorithms. 
1004        @return: Dictionary mapping file name to tuple of (file path, file size). 
1005        """ 
1006        table = { } 
1007        for entry in self: 
1008           if os.path.islink(entry): 
1009              table[entry] = (entry, 0.0) 
1010           elif os.path.isfile(entry): 
1011              size = float(os.stat(entry).st_size) 
1012              if capacity is not None: 
1013                 if size > capacity: 
1014                    raise ValueError("File [%s] cannot fit in capacity %s." % (entry, displayBytes(capacity))) 
1015              table[entry] = (entry, size) 
1016        return table 
 1017   
1018     @staticmethod 
1020        """ 
1021        Returns a reference to the function associated with an algorithm name. 
1022        Algorithm name must be one of "first_fit", "best_fit", "worst_fit", "alternate_fit" 
1023        @param algorithm: Name of the algorithm 
1024        @return: Reference to knapsack function 
1025        @raise ValueError: If the algorithm name is unknown. 
1026        """ 
1027        if algorithm == "first_fit": 
1028           return firstFit 
1029        elif algorithm == "best_fit": 
1030           return bestFit 
1031        elif algorithm == "worst_fit": 
1032           return worstFit 
1033        elif algorithm == "alternate_fit": 
1034           return alternateFit 
1035        else: 
1036           raise ValueError("Algorithm [%s] is invalid." % algorithm) 
 1037   
1039        """ 
1040        Creates a tar file containing the files in the list. 
1041   
1042        By default, this method will create uncompressed tar files.  If you pass 
1043        in mode C{'targz'}, then it will create gzipped tar files, and if you 
1044        pass in mode C{'tarbz2'}, then it will create bzipped tar files. 
1045   
1046        The tar file will be created as a GNU tar archive, which enables extended 
1047        file name lengths, etc.  Since GNU tar is so prevalent, I've decided that 
1048        the extra functionality out-weighs the disadvantage of not being 
1049        "standard". 
1050   
1051        If you pass in C{flat=True}, then a "flat" archive will be created, and 
1052        all of the files will be added to the root of the archive.  So, the file 
1053        C{/tmp/something/whatever.txt} would be added as just C{whatever.txt}. 
1054   
1055        By default, the whole method call fails if there are problems adding any 
1056        of the files to the archive, resulting in an exception.  Under these 
1057        circumstances, callers are advised that they might want to call 
1058        L{removeInvalid()} and then attempt to extract the tar file a second 
1059        time, since the most common cause of failures is a missing file (a file 
1060        that existed when the list was built, but is gone again by the time the 
1061        tar file is built). 
1062   
1063        If you want to, you can pass in C{ignore=True}, and the method will 
1064        ignore errors encountered when adding individual files to the archive 
1065        (but not errors opening and closing the archive itself). 
1066   
1067        We'll always attempt to remove the tarfile from disk if an exception will 
1068        be thrown. 
1069   
1070        @note: No validation is done as to whether the entries in the list are 
1071        files, since only files or soft links should be in an object like this. 
1072        However, to be safe, everything is explicitly added to the tar archive 
1073        non-recursively so it's safe to include soft links to directories. 
1074   
1075        @note: The Python C{tarfile} module, which is used internally here, is 
1076        supposed to deal properly with long filenames and links.  In my testing, 
1077        I have found that it appears to be able to add long really long filenames 
1078        to archives, but doesn't do a good job reading them back out, even out of 
1079        an archive it created.  Fortunately, all Cedar Backup does is add files 
1080        to archives. 
1081   
1082        @param path: Path of tar file to create on disk 
1083        @type path: String representing a path on disk 
1084   
1085        @param mode: Tar creation mode 
1086        @type mode: One of either C{'tar'}, C{'targz'} or C{'tarbz2'} 
1087   
1088        @param ignore: Indicates whether to ignore certain errors. 
1089        @type ignore: Boolean 
1090   
1091        @param flat: Creates "flat" archive by putting all items in root 
1092        @type flat: Boolean 
1093   
1094        @raise ValueError: If mode is not valid 
1095        @raise ValueError: If list is empty 
1096        @raise ValueError: If the path could not be encoded properly. 
1097        @raise TarError: If there is a problem creating the tar file 
1098        """ 
1099         
1100        path = encodePath(path) 
1101        if len(self) == 0: raise ValueError("Empty list cannot be used to generate tarfile.") 
1102        if mode == 'tar': tarmode = "w:" 
1103        elif mode == 'targz': tarmode = "w:gz" 
1104        elif mode == 'tarbz2': tarmode = "w:bz2" 
1105        else: raise ValueError("Mode [%s] is not valid." % mode) 
1106        try: 
1107           tar = tarfile.open(path, tarmode) 
1108           try: 
1109              tar.format = tarfile.GNU_FORMAT 
1110           except AttributeError: 
1111              tar.posix = False 
1112           for entry in self: 
1113              try: 
1114                 if flat: 
1115                    tar.add(entry, arcname=os.path.basename(entry), recursive=False) 
1116                 else: 
1117                    tar.add(entry, recursive=False) 
1118              except tarfile.TarError, e: 
1119                 if not ignore: 
1120                    raise e 
1121                 logger.info("Unable to add file [%s]; going on anyway.", entry) 
1122              except OSError, e: 
1123                 if not ignore: 
1124                    raise tarfile.TarError(e) 
1125                 logger.info("Unable to add file [%s]; going on anyway.", entry) 
1126           tar.close() 
1127        except tarfile.ReadError, e: 
1128           try: tar.close() 
1129           except: pass 
1130           if os.path.exists(path): 
1131              try: os.remove(path) 
1132              except: pass 
1133           raise tarfile.ReadError("Unable to open [%s]; maybe directory doesn't exist?" % path) 
1134        except tarfile.TarError, e: 
1135           try: tar.close() 
1136           except: pass 
1137           if os.path.exists(path): 
1138              try: os.remove(path) 
1139              except: pass 
1140           raise e 
 1141   
1143        """ 
1144        Removes unchanged entries from the list. 
1145   
1146        This method relies on a digest map as returned from L{generateDigestMap}. 
1147        For each entry in C{digestMap}, if the entry also exists in the current 
1148        list I{and} the entry in the current list has the same digest value as in 
1149        the map, the entry in the current list will be removed. 
1150   
1151        This method offers a convenient way for callers to filter unneeded 
1152        entries from a list.  The idea is that a caller will capture a digest map 
1153        from C{generateDigestMap} at some point in time (perhaps the beginning of 
1154        the week), and will save off that map using C{pickle} or some other 
1155        method.  Then, the caller could use this method sometime in the future to 
1156        filter out any unchanged files based on the saved-off map. 
1157   
1158        If C{captureDigest} is passed-in as C{True}, then digest information will 
1159        be captured for the entire list before the removal step occurs using the 
1160        same rules as in L{generateDigestMap}.  The check will involve a lookup 
1161        into the complete digest map. 
1162   
1163        If C{captureDigest} is passed in as C{False}, we will only generate a 
1164        digest value for files we actually need to check, and we'll ignore any 
1165        entry in the list which isn't a file that currently exists on disk. 
1166   
1167        The return value varies depending on C{captureDigest}, as well.  To 
1168        preserve backwards compatibility, if C{captureDigest} is C{False}, then 
1169        we'll just return a single value representing the number of entries 
1170        removed.  Otherwise, we'll return a tuple of C{(entries removed, digest 
1171        map)}.  The returned digest map will be in exactly the form returned by 
1172        L{generateDigestMap}. 
1173   
1174        @note: For performance reasons, this method actually ends up rebuilding 
1175        the list from scratch.  First, we build a temporary dictionary containing 
1176        all of the items from the original list.  Then, we remove items as needed 
1177        from the dictionary (which is faster than the equivalent operation on a 
1178        list).  Finally, we replace the contents of the current list based on the 
1179        keys left in the dictionary.  This should be transparent to the caller. 
1180   
1181        @param digestMap: Dictionary mapping file name to digest value. 
1182        @type digestMap: Map as returned from L{generateDigestMap}. 
1183   
1184        @param captureDigest: Indicates that digest information should be captured. 
1185        @type captureDigest: Boolean 
1186   
1187        @return: Results as discussed above (format varies based on arguments) 
1188        """ 
1189        if captureDigest: 
1190           removed = 0 
1191           table = {} 
1192           captured = {} 
1193           for entry in self: 
1194              if os.path.isfile(entry) and not os.path.islink(entry): 
1195                 table[entry] = BackupFileList._generateDigest(entry) 
1196                 captured[entry] = table[entry] 
1197              else: 
1198                 table[entry] = None 
1199           for entry in digestMap.keys(): 
1200              if table.has_key(entry): 
1201                 if table[entry] is not None:   
1202                    digest = table[entry] 
1203                    if digest == digestMap[entry]: 
1204                       removed += 1 
1205                       del table[entry] 
1206                       logger.debug("Discarded unchanged file [%s].", entry) 
1207           self[:] = table.keys() 
1208           return (removed, captured) 
1209        else: 
1210           removed = 0 
1211           table = {} 
1212           for entry in self: 
1213              table[entry] = None 
1214           for entry in digestMap.keys(): 
1215              if table.has_key(entry): 
1216                 if os.path.isfile(entry) and not os.path.islink(entry): 
1217                    digest = BackupFileList._generateDigest(entry) 
1218                    if digest == digestMap[entry]: 
1219                       removed += 1 
1220                       del table[entry] 
1221                       logger.debug("Discarded unchanged file [%s].", entry) 
1222           self[:] = table.keys() 
1223           return removed 
 1224   
1231   
1232      
1233      
1234      
1235   
1236     """ 
1237     List of files and directories to be purged. 
1238   
1239     A PurgeItemList is a L{FilesystemList} containing a list of files and 
1240     directories to be purged.  On top of the generic functionality provided by 
1241     L{FilesystemList}, this class adds functionality to remove items that are 
1242     too young to be purged, and to actually remove each item in the list from 
1243     the filesystem. 
1244   
1245     The other main difference is that when you add a directory's contents to a 
1246     purge item list, the directory itself is not added to the list.  This way, 
1247     if someone asks to purge within in C{/opt/backup/collect}, that directory 
1248     doesn't get removed once all of the files within it is gone. 
1249     """ 
1250   
1251      
1252      
1253      
1254   
1258   
1259   
1260      
1261      
1262      
1263   
1264 -   def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0, dereference=False): 
 1265        """ 
1266        Adds the contents of a directory to the list. 
1267   
1268        The path must exist and must be a directory or a link to a directory. 
1269        The contents of the directory (but I{not} the directory path itself) will 
1270        be recursively added to the list, subject to any exclusions that are in 
1271        place.  If you only want the directory and its contents to be added, then 
1272        pass in C{recursive=False}. 
1273   
1274        @note: If a directory's absolute path matches an exclude pattern or path, 
1275        or if the directory contains the configured ignore file, then the 
1276        directory and all of its contents will be recursively excluded from the 
1277        list. 
1278   
1279        @note: If the passed-in directory happens to be a soft link, it will be 
1280        recursed.  However, the linkDepth parameter controls whether any soft 
1281        links I{within} the directory will be recursed.  The link depth is 
1282        maximum depth of the tree at which soft links should be followed.  So, a 
1283        depth of 0 does not follow any soft links, a depth of 1 follows only 
1284        links within the passed-in directory, a depth of 2 follows the links at 
1285        the next level down, etc. 
1286   
1287        @note: Any invalid soft links (i.e.  soft links that point to 
1288        non-existent items) will be silently ignored. 
1289   
1290        @note: The L{excludeDirs} flag only controls whether any given soft link 
1291        path itself is added to the list once it has been discovered.  It does 
1292        I{not} modify any behavior related to directory recursion. 
1293   
1294        @note: The L{excludeDirs} flag only controls whether any given directory 
1295        path itself is added to the list once it has been discovered.  It does 
1296        I{not} modify any behavior related to directory recursion. 
1297   
1298        @note: If you call this method I{on a link to a directory} that link will 
1299        never be dereferenced (it may, however, be followed). 
1300   
1301        @param path: Directory path whose contents should be added to the list 
1302        @type path: String representing a path on disk 
1303   
1304        @param recursive: Indicates whether directory contents should be added recursively. 
1305        @type recursive: Boolean value 
1306   
1307        @param addSelf: Ignored in this subclass. 
1308   
1309        @param linkDepth: Depth of soft links that should be followed 
1310        @type linkDepth: Integer value, where zero means not to follow any soft links 
1311   
1312        @param dereference: Indicates whether soft links, if followed, should be dereferenced 
1313        @type dereference: Boolean value 
1314   
1315        @return: Number of items recursively added to the list 
1316   
1317        @raise ValueError: If path is not a directory or does not exist. 
1318        @raise ValueError: If the path could not be encoded properly. 
1319        """ 
1320        path = encodePath(path) 
1321        path = normalizeDir(path) 
1322        return super(PurgeItemList, self)._addDirContentsInternal(path, False, recursive, linkDepth, dereference) 
 1323   
1324   
1325      
1326      
1327      
1328   
1330        """ 
1331        Removes from the list files younger than a certain age (in days). 
1332   
1333        Any file whose "age" in days is less than (C{<}) the value of the 
1334        C{daysOld} parameter will be removed from the list so that it will not be 
1335        purged later when L{purgeItems} is called.  Directories and soft links 
1336        will be ignored. 
1337   
1338        The "age" of a file is the amount of time since the file was last used, 
1339        per the most recent of the file's C{st_atime} and C{st_mtime} values. 
1340   
1341        @note: Some people find the "sense" of this method confusing or 
1342        "backwards".  Keep in mind that this method is used to remove items 
1343        I{from the list}, not from the filesystem!  It removes from the list 
1344        those items that you would I{not} want to purge because they are too 
1345        young.  As an example, passing in C{daysOld} of zero (0) would remove 
1346        from the list no files, which would result in purging all of the files 
1347        later.  I would be happy to make a synonym of this method with an 
1348        easier-to-understand "sense", if someone can suggest one. 
1349   
1350        @param daysOld: Minimum age of files that are to be kept in the list. 
1351        @type daysOld: Integer value >= 0. 
1352   
1353        @return: Number of entries removed 
1354        """ 
1355        removed = 0 
1356        daysOld = int(daysOld) 
1357        if daysOld < 0: 
1358           raise ValueError("Days old value must be an integer >= 0.") 
1359        for entry in self[:]: 
1360           if os.path.isfile(entry) and not os.path.islink(entry): 
1361              try: 
1362                 ageInDays = calculateFileAge(entry) 
1363                 ageInWholeDays = math.floor(ageInDays) 
1364                 if ageInWholeDays < 0: ageInWholeDays = 0 
1365                 if ageInWholeDays < daysOld: 
1366                    removed += 1 
1367                    self.remove(entry) 
1368              except OSError: 
1369                 pass 
1370        return removed 
 1371   
1373        """ 
1374        Purges all items in the list. 
1375   
1376        Every item in the list will be purged.  Directories in the list will 
1377        I{not} be purged recursively, and hence will only be removed if they are 
1378        empty.  Errors will be ignored. 
1379   
1380        To faciliate easy removal of directories that will end up being empty, 
1381        the delete process happens in two passes: files first (including soft 
1382        links), then directories. 
1383   
1384        @return: Tuple containing count of (files, dirs) removed 
1385        """ 
1386        files = 0 
1387        dirs = 0 
1388        for entry in self: 
1389           if os.path.exists(entry) and (os.path.isfile(entry) or os.path.islink(entry)): 
1390              try: 
1391                 os.remove(entry) 
1392                 files += 1 
1393                 logger.debug("Purged file [%s].", entry) 
1394              except OSError: 
1395                 pass 
1396        for entry in self: 
1397           if os.path.exists(entry) and os.path.isdir(entry) and not os.path.islink(entry): 
1398              try: 
1399                 os.rmdir(entry) 
1400                 dirs += 1 
1401                 logger.debug("Purged empty directory [%s].", entry) 
1402              except OSError: 
1403                 pass 
1404        return (files, dirs) 
 1405   
1406   
1407   
1408   
1409   
1410   
1411   
1412   
1413   
1414   
1415 -def normalizeDir(path): 
 1416     """ 
1417     Normalizes a directory name. 
1418   
1419     For our purposes, a directory name is normalized by removing the trailing 
1420     path separator, if any.  This is important because we want directories to 
1421     appear within lists in a consistent way, although from the user's 
1422     perspective passing in C{/path/to/dir/} and C{/path/to/dir} are equivalent. 
1423   
1424     @param path: Path to be normalized. 
1425     @type path: String representing a path on disk 
1426   
1427     @return: Normalized path, which should be equivalent to the original. 
1428     """ 
1429     if path != os.sep and path[-1:] == os.sep: 
1430        return path[:-1] 
1431     return path 
 1432   
1433   
1434   
1435   
1436   
1437   
1438 -def compareContents(path1, path2, verbose=False): 
 1439     """ 
1440     Compares the contents of two directories to see if they are equivalent. 
1441   
1442     The two directories are recursively compared.  First, we check whether they 
1443     contain exactly the same set of files.  Then, we check to see every given 
1444     file has exactly the same contents in both directories. 
1445   
1446     This is all relatively simple to implement through the magic of 
1447     L{BackupFileList.generateDigestMap}, which knows how to strip a path prefix 
1448     off the front of each entry in the mapping it generates.  This makes our 
1449     comparison as simple as creating a list for each path, then generating a 
1450     digest map for each path and comparing the two. 
1451   
1452     If no exception is thrown, the two directories are considered identical. 
1453   
1454     If the C{verbose} flag is C{True}, then an alternate (but slower) method is 
1455     used so that any thrown exception can indicate exactly which file caused the 
1456     comparison to fail.  The thrown C{ValueError} exception distinguishes 
1457     between the directories containing different files, and containing the same 
1458     files with differing content. 
1459   
1460     @note: Symlinks are I{not} followed for the purposes of this comparison. 
1461   
1462     @param path1: First path to compare. 
1463     @type path1: String representing a path on disk 
1464   
1465     @param path2: First path to compare. 
1466     @type path2: String representing a path on disk 
1467   
1468     @param verbose: Indicates whether a verbose response should be given. 
1469     @type verbose: Boolean 
1470   
1471     @raise ValueError: If a directory doesn't exist or can't be read. 
1472     @raise ValueError: If the two directories are not equivalent. 
1473     @raise IOError: If there is an unusual problem reading the directories. 
1474     """ 
1475     try: 
1476        path1List = BackupFileList() 
1477        path1List.addDirContents(path1) 
1478        path1Digest = path1List.generateDigestMap(stripPrefix=normalizeDir(path1)) 
1479        path2List = BackupFileList() 
1480        path2List.addDirContents(path2) 
1481        path2Digest = path2List.generateDigestMap(stripPrefix=normalizeDir(path2)) 
1482        compareDigestMaps(path1Digest, path2Digest, verbose) 
1483     except IOError, e: 
1484        logger.error("I/O error encountered during consistency check.") 
1485        raise e 
 1486   
1488     """ 
1489     Compares two digest maps and throws an exception if they differ. 
1490   
1491     @param digest1: First digest to compare. 
1492     @type digest1: Digest as returned from BackupFileList.generateDigestMap() 
1493   
1494     @param digest2: Second digest to compare. 
1495     @type digest2: Digest as returned from BackupFileList.generateDigestMap() 
1496   
1497     @param verbose: Indicates whether a verbose response should be given. 
1498     @type verbose: Boolean 
1499   
1500     @raise ValueError: If the two directories are not equivalent. 
1501     """ 
1502     if not verbose: 
1503        if digest1 != digest2: 
1504           raise ValueError("Consistency check failed.") 
1505     else: 
1506        list1 = UnorderedList(digest1.keys()) 
1507        list2 = UnorderedList(digest2.keys()) 
1508        if list1 != list2: 
1509           raise ValueError("Directories contain a different set of files.") 
1510        for key in list1: 
1511           if digest1[key] != digest2[key]: 
1512              raise ValueError("File contents for [%s] vary between directories." % key) 
 1513