| Classes | Job Modules | Data Objects | Services | Algorithms | Tools | Packages | Directories | Tracs |

In This Package:

svndiff.py

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 """
00003 SVN parsing machinery, intended to parse output from:
00004 
00005 #. svn log
00006 #. svnlook changed
00007 
00008 #. svn diff
00009 #. svnlook ... diff
00010 
00011 
00012 The full text is wrapped into an object hierarchy::
00013 
00014    Diff                (the full output)
00015       Delta            (section corresponding to a single file)
00016          Block         (a region of change within the file)
00017              Hunk      (either a context, addition or subtraction section of the changed region)
00018              Hunk
00019              ... 
00020          Block
00021          ...    
00022       Delta
00023       ...
00024 
00025 
00026 Initial development was done at 
00027 * http://dayabay.phys.ntu.edu.tw/tracs/env/browser/trunk/svn/svndiff.py
00028 """
00029 import os, re
00030 
00031 class Cmd(dict):
00032     cmd = property(lambda self:self._cmd % self ) 
00033     diff   = property(lambda self:self(cmd="diff") )
00034     def __init__(self, *args, **kwargs):
00035         dict.__init__(self, *args, **kwargs )
00036     def __call__(self, *args, **kwargs):
00037         self.update( kwargs )
00038         print "performing command [%s] " % self.cmd
00039         return os.popen( self.cmd ).read()
00040 
00041 class SVN(Cmd):
00042     _cmd = "%(exepath)s %(opt)s %(cmd)s %(path)s "
00043     _cmdr = _cmd + " --revision %(revision)s "
00044     def __cmd(self):
00045         if self.get('revision',None):
00046             return self._cmdr % self
00047         else:
00048             return self._cmd % self
00049     cmd = property(__cmd )     
00050     msg    = property(lambda self:self.get("msg","naughty-no-msg") )
00051     author = property(lambda self:self.get("author","no-author") )
00052 
00053     def _changed(self):
00054         """
00055         List of full repository paths changed in the specified revision
00056         """
00057         c = []
00058         pthl = False
00059         for i,line in enumerate(self(cmd="log",opt="--verbose --non-interactive").rstrip().split("\n")):
00060             if pthl:
00061                 ele = line.split()              
00062             else:
00063                 ele = []
00064             if line=="Changed paths:":
00065                 pthl = True
00066             elif line=="":
00067                 pthl = False
00068             else:
00069                 pass
00070             if len(ele)==2:
00071                 path = ele[1]
00072                 if path[0]=='/':
00073                     path = path[1:]
00074                 c.append(path)       
00075             elif len(ele)==0:
00076                 pass
00077             else:
00078                 print "SVN._changed unexpected [%-2d][%s][%s][%r]" % (i,pthl,line,ele)
00079         return c 
00080     changed = property(_changed )
00081  
00082 
00083     def __init__(self, *args, **kwargs):
00084         Cmd.__init__(self,*args,**kwargs)
00085         self.update( exepath=os.environ.get("SVN", "svn"), opt="" )
00086         path = self.get('path',None)
00087         assert path, "SVN a path argument is required "
00088         if path.startswith('http'):
00089             pass
00090         else: 
00091             self['path'] = os.path.expanduser( self['path'] )
00092             assert os.path.isdir( os.path.join( self['path'], ".svn"  )), "Directory %r is not SVN working copy " % self
00093 
00094 class SVNLook(Cmd):
00095     _cmd = "%(sudo)s%(exepath)s %(cmd)s %(repo_path)s "
00096     _cmdt = _cmd + " --transaction %(txn_name)s "
00097     _cmdr = _cmd + " --revision %(revision)s "
00098     def __cmd(self):
00099         if self.get('txn_name',None):
00100             return self._cmdt % self
00101         elif self.get('revision',None):
00102             return self._cmdr % self
00103         else:
00104             return None
00105     cmd = property(__cmd )     
00106     msg    = property(lambda self:self(cmd="log") )
00107     author = property(lambda self:self(cmd="author").rstrip() )
00108 
00109     def _changed(self):
00110         """
00111         List of full repository paths changed in the txn or commit
00112 
00113         """
00114         d = []
00115         for line in self(cmd="changed").rstrip().split("\n"):
00116             ele = line.split()
00117             assert len(ele) == 2 , "SVNLook._changed unexpected ele %r " % ele
00118             #print "[%s][%s]" % ( ele[0], ele[1] )
00119             path = ele[1]
00120             if path[0]=='/':
00121                 path = path[1:]    
00122             d.append(path)
00123         return d 
00124     changed = property(_changed )
00125     def __init__(self, *args, **kwargs):
00126         Cmd.__init__(self, *args, **kwargs )
00127         self.update( exepath=os.environ.get("SVNLOOK", "svnlook"), sudo=kwargs.get("sudo",""), cmd="to_be_set" )
00128         assert os.path.isdir( os.path.join( self['repo_path'], "hooks" )), "Directory %s is not an SVN repository " % path
00129 
00130 
00131 class Text(list):
00132     """
00133     Base class that holds lines of text as a list and
00134     provides facility for splitting the text and verifying 
00135     can recreate the unsplit from the pieces 
00136     """
00137     def __init__(self, *args, **kwargs):
00138         list.__init__(self, args[0] )
00139         self.meta = {}
00140         self.meta.update(kwargs)
00141         self.divs = []
00142         self.children = []
00143 
00144     def split_(self, cls, predicate=lambda line:True, offset=0 ):
00145         divs = []
00146         for n,line in enumerate(self):
00147             if predicate(line):
00148                 divs.append(n+offset)
00149         self.divs = divs
00150         children = []
00151         for i, n in enumerate(divs):
00152             if i + 1 < len(divs):
00153                 m = divs[i+1]
00154             else:
00155                 m = len(self) 
00156             child = cls(self[n:m], index=i, begin=n+1, end=m)   ## 1-based begin/end numbering for direct vi "set nu" comparison 
00157             children.append(child) 
00158         return children
00159 
00160     def _nchild(self):
00161         return len(filter(lambda _:len(_)>0, [c.smry for c in self.children]))
00162     nchild = property(_nchild)   
00163 
00164     def _smry(self):
00165         return "".join([c.smry for c in self.children])
00166     smry = property(_smry)    
00167 
00168     def __str__(self):
00169         return "\n".join(self)
00170     def __repr__(self):
00171         try:
00172             label = "%(index)s[%(begin)s:%(end)s]" % self.meta
00173         except KeyError:
00174             label = "%r" % self.meta     
00175         return "%s %s (%d) %d [%s] " % ( self.__class__.__name__, label, len(self), self.nchild, self.smry  ) 
00176 
00177     rejoin = property(lambda self:"\n".join([str(c) for c in self.children]))
00178 
00179     def check(self, verbose=False, hdr=None ):
00180         """Check can put together the split text """ 
00181         rejo = self.rejoin 
00182         if hdr:
00183             rejo = "\n".join([ hdr, rejo])
00184 
00185         agree = str(self) == rejo
00186 
00187         if verbose or not(agree):
00188             print "." * 50 + " original " + "." * 50 
00189             print str(self)
00190             print "." * 50 + " recombined " + "." * 50 
00191             print rejo
00192             print "." * 100
00193         assert agree , ("mismatch for %s %r " % ( self.__class__.__name__, self) ) 
00194 
00195  
00196 class Hunk(Text):
00197     """
00198     I define a hunk to be a stretch of diff text that shares the same first character...
00199     """
00200     def __init__(self, *args, **kwargs):
00201         Text.__init__(self, *args, **kwargs)
00202     def _smry(self):
00203         c = self.meta['c'] 
00204         if c in "+-":
00205             return c
00206         else:
00207             return ""
00208     smry = property(_smry)
00209 
00210 
00211 class Block(Text):
00212     ptn = re.compile("^@@ (?P<ablk>[-\+,\d]*) (?P<bblk>[-\+,\d]*) @@$")
00213     hdr = property(lambda self:self[0])
00214     def __init__(self, *args, **kwargs):
00215         Text.__init__(self, *args, **kwargs)
00216         self.children = []
00217         self.parse_hdr()
00218         self.parse_body()
00219         self.check(verbose=False)
00220     
00221     def parse_hdr(self):
00222         m = self.ptn.match(self[0])   
00223         assert m, ( "failed to match %s " % self[0] )
00224         self.meta.update( m.groupdict() )
00225 
00226     def parse_body(self, verbose=False):
00227         """
00228         Looks for contiguous Hunks of text with the same first character.
00229         records prior when transitions to new contiguous first char, avoids fake initial hunk
00230         """
00231         l,start  = "<",0
00232         index = 0
00233         for i,line in enumerate(self + [">"]):
00234             if len(line)>0:
00235                 c = line[0]
00236             else:
00237                 c = "."
00238             assert c in " >@+-."
00239             if verbose:
00240                 print "[%2d, %s,%s,%d] %s " % ( i+1,c,l,start+1, line)
00241             if c == l:
00242                 pass
00243             else:
00244                 if l == "<":
00245                     pass
00246                 else:
00247                     hnk = Hunk( self[start:i], c=l, begin=start+1 , end=i , index=index )
00248                     self.children.append( hnk )
00249                     if hnk.smry != "":   ## only hunks with non empty summaries qualify for an index 
00250                         index += 1
00251                 l = c
00252                 start = i
00253 
00254 class Delta(Text):
00255     """
00256     Hold raw text of a single difference ... 
00257     split into sub-Blocks using the block divider
00258 
00259     adate and bdate are present for svnlook diffs 
00260 
00261     """
00262     req = 'label path div apath bpath abracket bbracket'.split()
00263     ptn = (
00264            re.compile("^(?P<label>\S*): (?P<path>\S*)"),
00265            re.compile("^(?P<div>===================================================================)"),
00266            re.compile("^--- (?P<apath>\S*)\t(?P<adate>.*)\((?P<abracket>.*)\)"),
00267            re.compile("^\+\+\+ (?P<bpath>\S*)\t(?P<bdate>.*)\((?P<bbracket>.*)\)"),
00268          )
00269     hdr = property(lambda self:"\n".join(self[0:4]))
00270     def __init__(self, *args, **kwargs):
00271         Text.__init__(self, *args, **kwargs)
00272         self.parse_hdr()
00273         self.children = self.split_(Block, lambda l:Block.ptn.match(l), offset=0 )  ## offset controls where to divide ...  
00274         self.check(hdr=self.hdr)
00275         pass
00276     def parse_hdr(self):
00277         """Line by line pattern matching of the header """
00278         for i,ptn in enumerate(self.ptn):
00279             m = self.ptn[i].match( self[i] )
00280             assert m, ( "failed to match %s " % self[i] )
00281             self.meta.update( m.groupdict() )
00282         for req in self.req:
00283             assert req in self.meta, "required match parameter not found %s " % req
00284         assert self.meta['apath'] == self.meta['bpath'] == self.meta['path'] , ( "path mismatch", self.meta )
00285         del self.meta['apath']
00286         del self.meta['bpath']
00287         del self.meta['div']
00288         pass
00289     def __repr__(self):
00290         return Text.__repr__(self) + self.meta['path'] + " " + self.basename
00291 
00292     basename = property(lambda self:os.path.basename(self.meta['path']))
00293     name =     property(lambda self:os.path.splitext(self.basename)[0])
00294     ext  =     property(lambda self:os.path.splitext(self.basename)[1])
00295     
00296 
00297 class Diff(Text):
00298     """
00299     Hold the raw text of the full output of "svn diff" or "svnlook diff"
00300     and split into sub-Delta using the divider
00301     """
00302     def __init__(self, *args, **kwargs ):
00303         Text.__init__(self, *args, **kwargs)
00304         self.children = self.split_(Delta,lambda l:Delta.ptn[1].match(l), offset=-1 )
00305         self.check()
00306         pass
00307 
00308     def dump(self):
00309         print repr(self)
00310         for dlt in self.children:
00311             print repr(dlt)
00312             if self.meta.get('verbose',False):
00313                 for blk in dlt.children:
00314                     print repr(blk)
00315                     for hnk in blk.children:
00316                         if hnk.smry:
00317                             print repr(hnk)
00318 
00319 
00320 if __name__=='__main__':
00321     l = SVNLook( sudo="sudo ", repo_path="/var/scm/repos/newtest" , revision="7" )
00322     print l.changed
00323 
00324     s1 = SVN( path="~/DybPython" , revision="11175" )
00325     print s1.changed
00326 
00327     s2 = SVN( path="http://dayabay.ihep.ac.cn/svn/dybsvn" , revision="11175" )
00328     print s2.changed
00329 
00330 
00331 
| Classes | Job Modules | Data Objects | Services | Algorithms | Tools | Packages | Directories | Tracs |

Generated on Mon Apr 11 20:13:00 2011 for DybPython by doxygen 1.4.7