00001
00002 """
00003 SVN parsing machinery, intended to parse output from:
00004
00005 #. svn log
00006 #. svnlook changed
00007
00008 #. svn diff
00009 #. svnlook ... diff
00010
00011
00012 The full text is wrapped into an object hierarchy::
00013
00014 Diff (the full output)
00015 Delta (section corresponding to a single file)
00016 Block (a region of change within the file)
00017 Hunk (either a context, addition or subtraction section of the changed region)
00018 Hunk
00019 ...
00020 Block
00021 ...
00022 Delta
00023 ...
00024
00025
00026 Initial development was done at
00027 * http://dayabay.phys.ntu.edu.tw/tracs/env/browser/trunk/svn/svndiff.py
00028 """
00029 import os, re
00030
00031 class Cmd(dict):
00032 cmd = property(lambda self:self._cmd % self )
00033 diff = property(lambda self:self(cmd="diff") )
00034 def __init__(self, *args, **kwargs):
00035 dict.__init__(self, *args, **kwargs )
00036 def __call__(self, *args, **kwargs):
00037 self.update( kwargs )
00038 print "performing command [%s] " % self.cmd
00039 return os.popen( self.cmd ).read()
00040
00041 class SVN(Cmd):
00042 _cmd = "%(exepath)s %(opt)s %(cmd)s %(path)s "
00043 _cmdr = _cmd + " --revision %(revision)s "
00044 def __cmd(self):
00045 if self.get('revision',None):
00046 return self._cmdr % self
00047 else:
00048 return self._cmd % self
00049 cmd = property(__cmd )
00050 msg = property(lambda self:self.get("msg","naughty-no-msg") )
00051 author = property(lambda self:self.get("author","no-author") )
00052
00053 def _changed(self):
00054 """
00055 List of full repository paths changed in the specified revision
00056 """
00057 c = []
00058 pthl = False
00059 for i,line in enumerate(self(cmd="log",opt="--verbose --non-interactive").rstrip().split("\n")):
00060 if pthl:
00061 ele = line.split()
00062 else:
00063 ele = []
00064 if line=="Changed paths:":
00065 pthl = True
00066 elif line=="":
00067 pthl = False
00068 else:
00069 pass
00070 if len(ele)==2:
00071 path = ele[1]
00072 if path[0]=='/':
00073 path = path[1:]
00074 c.append(path)
00075 elif len(ele)==0:
00076 pass
00077 else:
00078 print "SVN._changed unexpected [%-2d][%s][%s][%r]" % (i,pthl,line,ele)
00079 return c
00080 changed = property(_changed )
00081
00082
00083 def __init__(self, *args, **kwargs):
00084 Cmd.__init__(self,*args,**kwargs)
00085 self.update( exepath=os.environ.get("SVN", "svn"), opt="" )
00086 path = self.get('path',None)
00087 assert path, "SVN a path argument is required "
00088 if path.startswith('http'):
00089 pass
00090 else:
00091 self['path'] = os.path.expanduser( self['path'] )
00092 assert os.path.isdir( os.path.join( self['path'], ".svn" )), "Directory %r is not SVN working copy " % self
00093
00094 class SVNLook(Cmd):
00095 _cmd = "%(sudo)s%(exepath)s %(cmd)s %(repo_path)s "
00096 _cmdt = _cmd + " --transaction %(txn_name)s "
00097 _cmdr = _cmd + " --revision %(revision)s "
00098 def __cmd(self):
00099 if self.get('txn_name',None):
00100 return self._cmdt % self
00101 elif self.get('revision',None):
00102 return self._cmdr % self
00103 else:
00104 return None
00105 cmd = property(__cmd )
00106 msg = property(lambda self:self(cmd="log") )
00107 author = property(lambda self:self(cmd="author").rstrip() )
00108
00109 def _changed(self):
00110 """
00111 List of full repository paths changed in the txn or commit
00112
00113 """
00114 d = []
00115 for line in self(cmd="changed").rstrip().split("\n"):
00116 ele = line.split()
00117 assert len(ele) == 2 , "SVNLook._changed unexpected ele %r " % ele
00118
00119 path = ele[1]
00120 if path[0]=='/':
00121 path = path[1:]
00122 d.append(path)
00123 return d
00124 changed = property(_changed )
00125 def __init__(self, *args, **kwargs):
00126 Cmd.__init__(self, *args, **kwargs )
00127 self.update( exepath=os.environ.get("SVNLOOK", "svnlook"), sudo=kwargs.get("sudo",""), cmd="to_be_set" )
00128 assert os.path.isdir( os.path.join( self['repo_path'], "hooks" )), "Directory %s is not an SVN repository " % path
00129
00130
00131 class Text(list):
00132 """
00133 Base class that holds lines of text as a list and
00134 provides facility for splitting the text and verifying
00135 can recreate the unsplit from the pieces
00136 """
00137 def __init__(self, *args, **kwargs):
00138 list.__init__(self, args[0] )
00139 self.meta = {}
00140 self.meta.update(kwargs)
00141 self.divs = []
00142 self.children = []
00143
00144 def split_(self, cls, predicate=lambda line:True, offset=0 ):
00145 divs = []
00146 for n,line in enumerate(self):
00147 if predicate(line):
00148 divs.append(n+offset)
00149 self.divs = divs
00150 children = []
00151 for i, n in enumerate(divs):
00152 if i + 1 < len(divs):
00153 m = divs[i+1]
00154 else:
00155 m = len(self)
00156 child = cls(self[n:m], index=i, begin=n+1, end=m)
00157 children.append(child)
00158 return children
00159
00160 def _nchild(self):
00161 return len(filter(lambda _:len(_)>0, [c.smry for c in self.children]))
00162 nchild = property(_nchild)
00163
00164 def _smry(self):
00165 return "".join([c.smry for c in self.children])
00166 smry = property(_smry)
00167
00168 def __str__(self):
00169 return "\n".join(self)
00170 def __repr__(self):
00171 try:
00172 label = "%(index)s[%(begin)s:%(end)s]" % self.meta
00173 except KeyError:
00174 label = "%r" % self.meta
00175 return "%s %s (%d) %d [%s] " % ( self.__class__.__name__, label, len(self), self.nchild, self.smry )
00176
00177 rejoin = property(lambda self:"\n".join([str(c) for c in self.children]))
00178
00179 def check(self, verbose=False, hdr=None ):
00180 """Check can put together the split text """
00181 rejo = self.rejoin
00182 if hdr:
00183 rejo = "\n".join([ hdr, rejo])
00184
00185 agree = str(self) == rejo
00186
00187 if verbose or not(agree):
00188 print "." * 50 + " original " + "." * 50
00189 print str(self)
00190 print "." * 50 + " recombined " + "." * 50
00191 print rejo
00192 print "." * 100
00193 assert agree , ("mismatch for %s %r " % ( self.__class__.__name__, self) )
00194
00195
00196 class Hunk(Text):
00197 """
00198 I define a hunk to be a stretch of diff text that shares the same first character...
00199 """
00200 def __init__(self, *args, **kwargs):
00201 Text.__init__(self, *args, **kwargs)
00202 def _smry(self):
00203 c = self.meta['c']
00204 if c in "+-":
00205 return c
00206 else:
00207 return ""
00208 smry = property(_smry)
00209
00210
00211 class Block(Text):
00212 ptn = re.compile("^@@ (?P<ablk>[-\+,\d]*) (?P<bblk>[-\+,\d]*) @@$")
00213 hdr = property(lambda self:self[0])
00214 def __init__(self, *args, **kwargs):
00215 Text.__init__(self, *args, **kwargs)
00216 self.children = []
00217 self.parse_hdr()
00218 self.parse_body()
00219 self.check(verbose=False)
00220
00221 def parse_hdr(self):
00222 m = self.ptn.match(self[0])
00223 assert m, ( "failed to match %s " % self[0] )
00224 self.meta.update( m.groupdict() )
00225
00226 def parse_body(self, verbose=False):
00227 """
00228 Looks for contiguous Hunks of text with the same first character.
00229 records prior when transitions to new contiguous first char, avoids fake initial hunk
00230 """
00231 l,start = "<",0
00232 index = 0
00233 for i,line in enumerate(self + [">"]):
00234 if len(line)>0:
00235 c = line[0]
00236 else:
00237 c = "."
00238 assert c in " >@+-."
00239 if verbose:
00240 print "[%2d, %s,%s,%d] %s " % ( i+1,c,l,start+1, line)
00241 if c == l:
00242 pass
00243 else:
00244 if l == "<":
00245 pass
00246 else:
00247 hnk = Hunk( self[start:i], c=l, begin=start+1 , end=i , index=index )
00248 self.children.append( hnk )
00249 if hnk.smry != "":
00250 index += 1
00251 l = c
00252 start = i
00253
00254 class Delta(Text):
00255 """
00256 Hold raw text of a single difference ...
00257 split into sub-Blocks using the block divider
00258
00259 adate and bdate are present for svnlook diffs
00260
00261 """
00262 req = 'label path div apath bpath abracket bbracket'.split()
00263 ptn = (
00264 re.compile("^(?P<label>\S*): (?P<path>\S*)"),
00265 re.compile("^(?P<div>===================================================================)"),
00266 re.compile("^--- (?P<apath>\S*)\t(?P<adate>.*)\((?P<abracket>.*)\)"),
00267 re.compile("^\+\+\+ (?P<bpath>\S*)\t(?P<bdate>.*)\((?P<bbracket>.*)\)"),
00268 )
00269 hdr = property(lambda self:"\n".join(self[0:4]))
00270 def __init__(self, *args, **kwargs):
00271 Text.__init__(self, *args, **kwargs)
00272 self.parse_hdr()
00273 self.children = self.split_(Block, lambda l:Block.ptn.match(l), offset=0 )
00274 self.check(hdr=self.hdr)
00275 pass
00276 def parse_hdr(self):
00277 """Line by line pattern matching of the header """
00278 for i,ptn in enumerate(self.ptn):
00279 m = self.ptn[i].match( self[i] )
00280 assert m, ( "failed to match %s " % self[i] )
00281 self.meta.update( m.groupdict() )
00282 for req in self.req:
00283 assert req in self.meta, "required match parameter not found %s " % req
00284 assert self.meta['apath'] == self.meta['bpath'] == self.meta['path'] , ( "path mismatch", self.meta )
00285 del self.meta['apath']
00286 del self.meta['bpath']
00287 del self.meta['div']
00288 pass
00289 def __repr__(self):
00290 return Text.__repr__(self) + self.meta['path'] + " " + self.basename
00291
00292 basename = property(lambda self:os.path.basename(self.meta['path']))
00293 name = property(lambda self:os.path.splitext(self.basename)[0])
00294 ext = property(lambda self:os.path.splitext(self.basename)[1])
00295
00296
00297 class Diff(Text):
00298 """
00299 Hold the raw text of the full output of "svn diff" or "svnlook diff"
00300 and split into sub-Delta using the divider
00301 """
00302 def __init__(self, *args, **kwargs ):
00303 Text.__init__(self, *args, **kwargs)
00304 self.children = self.split_(Delta,lambda l:Delta.ptn[1].match(l), offset=-1 )
00305 self.check()
00306 pass
00307
00308 def dump(self):
00309 print repr(self)
00310 for dlt in self.children:
00311 print repr(dlt)
00312 if self.meta.get('verbose',False):
00313 for blk in dlt.children:
00314 print repr(blk)
00315 for hnk in blk.children:
00316 if hnk.smry:
00317 print repr(hnk)
00318
00319
00320 if __name__=='__main__':
00321 l = SVNLook( sudo="sudo ", repo_path="/var/scm/repos/newtest" , revision="7" )
00322 print l.changed
00323
00324 s1 = SVN( path="~/DybPython" , revision="11175" )
00325 print s1.changed
00326
00327 s2 = SVN( path="http://dayabay.ihep.ac.cn/svn/dybsvn" , revision="11175" )
00328 print s2.changed
00329
00330
00331