1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17   
 18   
 19   
 20   
 21   
 22   
 23   
 24   
 25   
 26   
 27   
 28   
 29   
 30   
 31   
 32   
 33   
 34   
 35   
 36   
 37   
 38  """ 
 39  Provides an extension to split up large files in staging directories. 
 40   
 41  When this extension is executed, it will look through the configured Cedar 
 42  Backup staging directory for files exceeding a specified size limit, and split 
 43  them down into smaller files using the 'split' utility.  Any directory which 
 44  has already been split (as indicated by the C{cback.split} file) will be 
 45  ignored. 
 46   
 47  This extension requires a new configuration section <split> and is intended 
 48  to be run immediately after the standard stage action or immediately before the 
 49  standard store action.  Aside from its own configuration, it requires the 
 50  options and staging configuration sections in the standard Cedar Backup 
 51  configuration file. 
 52   
 53  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
 54  """ 
 55   
 56   
 57   
 58   
 59   
 60   
 61  import os 
 62  import re 
 63  import logging 
 64   
 65   
 66  from CedarBackup2.util import resolveCommand, executeCommand, changeOwnership 
 67  from CedarBackup2.xmlutil import createInputDom, addContainerNode 
 68  from CedarBackup2.xmlutil import readFirstChild 
 69  from CedarBackup2.actions.util import findDailyDirs, writeIndicatorFile, getBackupFiles 
 70  from CedarBackup2.config import ByteQuantity, readByteQuantity, addByteQuantityNode 
 71   
 72   
 73   
 74   
 75   
 76   
 77  logger = logging.getLogger("CedarBackup2.log.extend.split") 
 78   
 79  SPLIT_COMMAND = [ "split", ] 
 80  SPLIT_INDICATOR = "cback.split" 
 88   
 89     """ 
 90     Class representing split configuration. 
 91   
 92     Split configuration is used for splitting staging directories. 
 93   
 94     The following restrictions exist on data in this class: 
 95   
 96        - The size limit must be a ByteQuantity 
 97        - The split size must be a ByteQuantity 
 98   
 99     @sort: __init__, __repr__, __str__, __cmp__, sizeLimit, splitSize 
100     """ 
101   
102 -   def __init__(self, sizeLimit=None, splitSize=None): 
 103        """ 
104        Constructor for the C{SplitCOnfig} class. 
105   
106        @param sizeLimit: Size limit of the files, in bytes 
107        @param splitSize: Size that files exceeding the limit will be split into, in bytes 
108   
109        @raise ValueError: If one of the values is invalid. 
110        """ 
111        self._sizeLimit = None 
112        self._splitSize = None 
113        self.sizeLimit = sizeLimit 
114        self.splitSize = splitSize 
 115   
117        """ 
118        Official string representation for class instance. 
119        """ 
120        return "SplitConfig(%s, %s)" % (self.sizeLimit, self.splitSize) 
 121   
123        """ 
124        Informal string representation for class instance. 
125        """ 
126        return self.__repr__() 
 127   
129        """ 
130        Definition of equals operator for this class. 
131        Lists within this class are "unordered" for equality comparisons. 
132        @param other: Other object to compare to. 
133        @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 
134        """ 
135        if other is None: 
136           return 1 
137        if self.sizeLimit != other.sizeLimit: 
138           if self.sizeLimit < other.sizeLimit: 
139              return -1 
140           else: 
141              return 1 
142        if self.splitSize != other.splitSize: 
143           if self.splitSize < other.splitSize: 
144              return -1 
145           else: 
146              return 1 
147        return 0 
 148   
150        """ 
151        Property target used to set the size limit. 
152        If not C{None}, the value must be a C{ByteQuantity} object. 
153        @raise ValueError: If the value is not a C{ByteQuantity} 
154        """ 
155        if value is None: 
156           self._sizeLimit = None 
157        else: 
158           if not isinstance(value, ByteQuantity): 
159              raise ValueError("Value must be a C{ByteQuantity} object.") 
160           self._sizeLimit = value 
 161   
163        """ 
164        Property target used to get the size limit. 
165        """ 
166        return self._sizeLimit 
 167   
169        """ 
170        Property target used to set the split size. 
171        If not C{None}, the value must be a C{ByteQuantity} object. 
172        @raise ValueError: If the value is not a C{ByteQuantity} 
173        """ 
174        if value is None: 
175           self._splitSize = None 
176        else: 
177           if not isinstance(value, ByteQuantity): 
178              raise ValueError("Value must be a C{ByteQuantity} object.") 
179           self._splitSize = value 
 180   
182        """ 
183        Property target used to get the split size. 
184        """ 
185        return self._splitSize 
 186   
187     sizeLimit = property(_getSizeLimit, _setSizeLimit, None, doc="Size limit, as a ByteQuantity") 
188     splitSize = property(_getSplitSize, _setSplitSize, None, doc="Split size, as a ByteQuantity") 
 189   
196   
197     """ 
198     Class representing this extension's configuration document. 
199   
200     This is not a general-purpose configuration object like the main Cedar 
201     Backup configuration object.  Instead, it just knows how to parse and emit 
202     split-specific configuration values.  Third parties who need to read and 
203     write configuration related to this extension should access it through the 
204     constructor, C{validate} and C{addConfig} methods. 
205   
206     @note: Lists within this class are "unordered" for equality comparisons. 
207   
208     @sort: __init__, __repr__, __str__, __cmp__, split, validate, addConfig 
209     """ 
210   
211 -   def __init__(self, xmlData=None, xmlPath=None, validate=True): 
 212        """ 
213        Initializes a configuration object. 
214   
215        If you initialize the object without passing either C{xmlData} or 
216        C{xmlPath} then configuration will be empty and will be invalid until it 
217        is filled in properly. 
218   
219        No reference to the original XML data or original path is saved off by 
220        this class.  Once the data has been parsed (successfully or not) this 
221        original information is discarded. 
222   
223        Unless the C{validate} argument is C{False}, the L{LocalConfig.validate} 
224        method will be called (with its default arguments) against configuration 
225        after successfully parsing any passed-in XML.  Keep in mind that even if 
226        C{validate} is C{False}, it might not be possible to parse the passed-in 
227        XML document if lower-level validations fail. 
228   
229        @note: It is strongly suggested that the C{validate} option always be set 
230        to C{True} (the default) unless there is a specific need to read in 
231        invalid configuration from disk. 
232   
233        @param xmlData: XML data representing configuration. 
234        @type xmlData: String data. 
235   
236        @param xmlPath: Path to an XML file on disk. 
237        @type xmlPath: Absolute path to a file on disk. 
238   
239        @param validate: Validate the document after parsing it. 
240        @type validate: Boolean true/false. 
241   
242        @raise ValueError: If both C{xmlData} and C{xmlPath} are passed-in. 
243        @raise ValueError: If the XML data in C{xmlData} or C{xmlPath} cannot be parsed. 
244        @raise ValueError: If the parsed configuration document is not valid. 
245        """ 
246        self._split = None 
247        self.split = None 
248        if xmlData is not None and xmlPath is not None: 
249           raise ValueError("Use either xmlData or xmlPath, but not both.") 
250        if xmlData is not None: 
251           self._parseXmlData(xmlData) 
252           if validate: 
253              self.validate() 
254        elif xmlPath is not None: 
255           xmlData = open(xmlPath).read() 
256           self._parseXmlData(xmlData) 
257           if validate: 
258              self.validate() 
 259   
261        """ 
262        Official string representation for class instance. 
263        """ 
264        return "LocalConfig(%s)" % (self.split) 
 265   
267        """ 
268        Informal string representation for class instance. 
269        """ 
270        return self.__repr__() 
 271   
273        """ 
274        Definition of equals operator for this class. 
275        Lists within this class are "unordered" for equality comparisons. 
276        @param other: Other object to compare to. 
277        @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 
278        """ 
279        if other is None: 
280           return 1 
281        if self.split != other.split: 
282           if self.split < other.split: 
283              return -1 
284           else: 
285              return 1 
286        return 0 
 287   
289        """ 
290        Property target used to set the split configuration value. 
291        If not C{None}, the value must be a C{SplitConfig} object. 
292        @raise ValueError: If the value is not a C{SplitConfig} 
293        """ 
294        if value is None: 
295           self._split = None 
296        else: 
297           if not isinstance(value, SplitConfig): 
298              raise ValueError("Value must be a C{SplitConfig} object.") 
299           self._split = value 
 300   
302        """ 
303        Property target used to get the split configuration value. 
304        """ 
305        return self._split 
 306   
307     split = property(_getSplit, _setSplit, None, "Split configuration in terms of a C{SplitConfig} object.") 
308   
310        """ 
311        Validates configuration represented by the object. 
312   
313        Split configuration must be filled in.  Within that, both the size limit 
314        and split size must be filled in. 
315   
316        @raise ValueError: If one of the validations fails. 
317        """ 
318        if self.split is None: 
319           raise ValueError("Split section is required.") 
320        if self.split.sizeLimit is None: 
321           raise ValueError("Size limit must be set.") 
322        if self.split.splitSize is None: 
323           raise ValueError("Split size must be set.") 
 324   
326        """ 
327        Adds a <split> configuration section as the next child of a parent. 
328   
329        Third parties should use this function to write configuration related to 
330        this extension. 
331   
332        We add the following fields to the document:: 
333   
334           sizeLimit      //cb_config/split/size_limit 
335           splitSize      //cb_config/split/split_size 
336   
337        @param xmlDom: DOM tree as from C{impl.createDocument()}. 
338        @param parentNode: Parent that the section should be appended to. 
339        """ 
340        if self.split is not None: 
341           sectionNode = addContainerNode(xmlDom, parentNode, "split") 
342           addByteQuantityNode(xmlDom, sectionNode, "size_limit", self.split.sizeLimit) 
343           addByteQuantityNode(xmlDom, sectionNode, "split_size", self.split.splitSize) 
 344   
346        """ 
347        Internal method to parse an XML string into the object. 
348   
349        This method parses the XML document into a DOM tree (C{xmlDom}) and then 
350        calls a static method to parse the split configuration section. 
351   
352        @param xmlData: XML data to be parsed 
353        @type xmlData: String data 
354   
355        @raise ValueError: If the XML cannot be successfully parsed. 
356        """ 
357        (xmlDom, parentNode) = createInputDom(xmlData) 
358        self._split = LocalConfig._parseSplit(parentNode) 
 359   
360     @staticmethod 
362        """ 
363        Parses an split configuration section. 
364   
365        We read the following individual fields:: 
366   
367           sizeLimit      //cb_config/split/size_limit 
368           splitSize      //cb_config/split/split_size 
369   
370        @param parent: Parent node to search beneath. 
371   
372        @return: C{EncryptConfig} object or C{None} if the section does not exist. 
373        @raise ValueError: If some filled-in value is invalid. 
374        """ 
375        split = None 
376        section = readFirstChild(parent, "split") 
377        if section is not None: 
378           split = SplitConfig() 
379           split.sizeLimit = readByteQuantity(section, "size_limit") 
380           split.splitSize = readByteQuantity(section, "split_size") 
381        return split 
  382   
383   
384   
385   
386   
387   
388   
389   
390   
391   
392   
393 -def executeAction(configPath, options, config): 
 419   
420   
421   
422   
423   
424   
425 -def _splitDailyDir(dailyDir, sizeLimit, splitSize, backupUser, backupGroup): 
 426     """ 
427     Splits large files in a daily staging directory. 
428   
429     Files that match INDICATOR_PATTERNS (i.e. C{"cback.store"}, 
430     C{"cback.stage"}, etc.) are assumed to be indicator files and are ignored. 
431     All other files are split. 
432   
433     @param dailyDir: Daily directory to encrypt 
434     @param sizeLimit: Size limit, in bytes 
435     @param splitSize: Split size, in bytes 
436     @param backupUser: User that target files should be owned by 
437     @param backupGroup: Group that target files should be owned by 
438   
439     @raise ValueError: If the encrypt mode is not supported. 
440     @raise ValueError: If the daily staging directory does not exist. 
441     """ 
442     logger.debug("Begin splitting contents of [%s].", dailyDir) 
443     fileList = getBackupFiles(dailyDir)   
444     for path in fileList: 
445        size = float(os.stat(path).st_size) 
446        if size > sizeLimit: 
447           _splitFile(path, splitSize, backupUser, backupGroup, removeSource=True) 
448     logger.debug("Completed splitting contents of [%s].", dailyDir) 
 449   
450   
451   
452   
453   
454   
455 -def _splitFile(sourcePath, splitSize, backupUser, backupGroup, removeSource=False): 
 456     """ 
457     Splits the source file into chunks of the indicated size. 
458   
459     The split files will be owned by the indicated backup user and group.  If 
460     C{removeSource} is C{True}, then the source file will be removed after it is 
461     successfully split. 
462   
463     @param sourcePath: Absolute path of the source file to split 
464     @param splitSize: Encryption mode (only "gpg" is allowed) 
465     @param backupUser: User that target files should be owned by 
466     @param backupGroup: Group that target files should be owned by 
467     @param removeSource: Indicates whether to remove the source file 
468   
469     @raise IOError: If there is a problem accessing, splitting or removing the source file. 
470     """ 
471     cwd = os.getcwd() 
472     try: 
473        if not os.path.exists(sourcePath): 
474           raise ValueError("Source path [%s] does not exist." % sourcePath) 
475        dirname = os.path.dirname(sourcePath) 
476        filename = os.path.basename(sourcePath) 
477        prefix = "%s_" % filename 
478        bytes = int(splitSize.bytes)  
479        os.chdir(dirname)  
480        command = resolveCommand(SPLIT_COMMAND) 
481        args = [ "--verbose", "--numeric-suffixes", "--suffix-length=5", "--bytes=%d" % bytes, filename, prefix, ] 
482        (result, output) = executeCommand(command, args, returnOutput=True, ignoreStderr=False) 
483        if result != 0: 
484           raise IOError("Error [%d] calling split for [%s]." % (result, sourcePath)) 
485        pattern = re.compile(r"(creating file [`'])(%s)(.*)(')" % prefix) 
486        match = pattern.search(output[-1:][0]) 
487        if match is None: 
488           raise IOError("Unable to parse output from split command.") 
489        value = int(match.group(3).strip()) 
490        for index in range(0, value): 
491           path = "%s%05d" % (prefix, index) 
492           if not os.path.exists(path): 
493              raise IOError("After call to split, expected file [%s] does not exist." % path) 
494           changeOwnership(path, backupUser, backupGroup) 
495        if removeSource: 
496           if os.path.exists(sourcePath): 
497              try: 
498                 os.remove(sourcePath) 
499                 logger.debug("Completed removing old file [%s].", sourcePath) 
500              except: 
501                 raise IOError("Failed to remove file [%s] after splitting it." % (sourcePath)) 
502     finally: 
503        os.chdir(cwd) 
 504