Skip to content

Commit e9405ac

Browse files
authored
Merge pull request #474 from gitpython-developers/keep-raw-bytes-on-diffs
Store raw path bytes in Diff instances
2 parents 930d03f + 3ee291c commit e9405ac

File tree

4 files changed

+51
-18
lines changed

4 files changed

+51
-18
lines changed

‎doc/source/changes.rst‎

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ Changelog
55
2.0.6 - Fixes and Features
66
==========================
77

8+
* API: Diffs now have `a_rawpath`, `b_rawpath`, `raw_rename_from`,
9+
`raw_rename_to` properties, which are the raw-bytes equivalents of their
10+
unicode path counterparts.
811
* Fix: TypeError about passing keyword argument to string decode() on
912
Python 2.6.
1013
* Feature: `setUrl API on Remotes <https://github.com/gitpython-developers/GitPython/pull/446#issuecomment-224670539>`_

‎git/compat.py‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ def mviter(d):
3535
returnd.values()
3636
range=xrange
3737
unicode=str
38+
binary_type=bytes
3839
else:
3940
FileType=file
4041
# usually, this is just ascii, which might not enough for our encoding needs
@@ -44,6 +45,7 @@ def mviter(d):
4445
byte_ord=ord
4546
bchr=chr
4647
unicode=unicode
48+
binary_type=str
4749
range=xrange
4850
defmviter(d):
4951
returnd.itervalues()

‎git/diff.py‎

Lines changed: 41 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
fromgitdb.utilimporthex_to_bin
99

10+
from .compatimportbinary_type
1011
from .objects.blobimportBlob
1112
from .objects.utilimportmode_str_to_int
1213

@@ -245,18 +246,20 @@ class Diff(object):
245246
NULL_HEX_SHA="0"*40
246247
NULL_BIN_SHA=b"\0"*20
247248

248-
__slots__= ("a_blob", "b_blob", "a_mode", "b_mode", "a_path", "b_path",
249-
"new_file", "deleted_file", "rename_from", "rename_to", "diff")
249+
__slots__= ("a_blob", "b_blob", "a_mode", "b_mode", "a_rawpath", "b_rawpath",
250+
"new_file", "deleted_file", "raw_rename_from", "raw_rename_to", "diff")
250251

251-
def__init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode,
252-
b_mode, new_file, deleted_file, rename_from,
253-
rename_to, diff):
252+
def__init__(self, repo, a_rawpath, b_rawpath, a_blob_id, b_blob_id, a_mode,
253+
b_mode, new_file, deleted_file, raw_rename_from,
254+
raw_rename_to, diff):
254255

255256
self.a_mode=a_mode
256257
self.b_mode=b_mode
257258

258-
self.a_path=a_path
259-
self.b_path=b_path
259+
asserta_rawpathisNoneorisinstance(a_rawpath, binary_type)
260+
assertb_rawpathisNoneorisinstance(b_rawpath, binary_type)
261+
self.a_rawpath=a_rawpath
262+
self.b_rawpath=b_rawpath
260263

261264
ifself.a_mode:
262265
self.a_mode=mode_str_to_int(self.a_mode)
@@ -266,19 +269,21 @@ def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode,
266269
ifa_blob_idisNoneora_blob_id==self.NULL_HEX_SHA:
267270
self.a_blob=None
268271
else:
269-
self.a_blob=Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=a_path)
272+
self.a_blob=Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=self.a_path)
270273

271274
ifb_blob_idisNoneorb_blob_id==self.NULL_HEX_SHA:
272275
self.b_blob=None
273276
else:
274-
self.b_blob=Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=b_path)
277+
self.b_blob=Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=self.b_path)
275278

276279
self.new_file=new_file
277280
self.deleted_file=deleted_file
278281

279282
# be clear and use None instead of empty strings
280-
self.rename_from=rename_fromorNone
281-
self.rename_to=rename_toorNone
283+
assertraw_rename_fromisNoneorisinstance(raw_rename_from, binary_type)
284+
assertraw_rename_toisNoneorisinstance(raw_rename_to, binary_type)
285+
self.raw_rename_from=raw_rename_fromorNone
286+
self.raw_rename_to=raw_rename_toorNone
282287

283288
self.diff=diff
284289

@@ -344,6 +349,22 @@ def __str__(self):
344349
# end
345350
returnres
346351

352+
@property
353+
defa_path(self):
354+
returnself.a_rawpath.decode(defenc, 'replace') ifself.a_rawpathelseNone
355+
356+
@property
357+
defb_path(self):
358+
returnself.b_rawpath.decode(defenc, 'replace') ifself.b_rawpathelseNone
359+
360+
@property
361+
defrename_from(self):
362+
returnself.raw_rename_from.decode(defenc, 'replace') ifself.raw_rename_fromelseNone
363+
364+
@property
365+
defrename_to(self):
366+
returnself.raw_rename_to.decode(defenc, 'replace') ifself.raw_rename_toelseNone
367+
347368
@property
348369
defrenamed(self):
349370
""":returns: True if the blob of our diff has been renamed
@@ -388,6 +409,7 @@ def _index_from_patch_format(cls, repo, stream):
388409
new_file_mode, deleted_file_mode, \
389410
a_blob_id, b_blob_id, b_mode, \
390411
a_path, b_path=header.groups()
412+
391413
new_file, deleted_file=bool(new_file_mode), bool(deleted_file_mode)
392414

393415
a_path=cls._pick_best_path(a_path, rename_from, a_path_fallback)
@@ -404,15 +426,15 @@ def _index_from_patch_format(cls, repo, stream):
404426
a_mode=old_modeordeleted_file_modeor (a_pathand (b_modeornew_modeornew_file_mode))
405427
b_mode=b_modeornew_modeornew_file_modeor (b_pathanda_mode)
406428
index.append(Diff(repo,
407-
a_pathanda_path.decode(defenc, 'replace'),
408-
b_pathandb_path.decode(defenc, 'replace'),
429+
a_path,
430+
b_path,
409431
a_blob_idanda_blob_id.decode(defenc),
410432
b_blob_idandb_blob_id.decode(defenc),
411433
a_modeanda_mode.decode(defenc),
412434
b_modeandb_mode.decode(defenc),
413435
new_file, deleted_file,
414-
rename_fromandrename_from.decode(defenc, 'replace'),
415-
rename_toandrename_to.decode(defenc, 'replace'),
436+
rename_from,
437+
rename_to,
416438
None))
417439

418440
previous_header=header
@@ -438,8 +460,8 @@ def _index_from_raw_format(cls, repo, stream):
438460
meta, _, path=line[1:].partition('\t')
439461
old_mode, new_mode, a_blob_id, b_blob_id, change_type=meta.split(None, 4)
440462
path=path.strip()
441-
a_path=path
442-
b_path=path
463+
a_path=path.encode(defenc)
464+
b_path=path.encode(defenc)
443465
deleted_file=False
444466
new_file=False
445467
rename_from=None
@@ -455,6 +477,8 @@ def _index_from_raw_format(cls, repo, stream):
455477
new_file=True
456478
elifchange_type[0] =='R': # parses RXXX, where XXX is a confidence value
457479
a_path, b_path=path.split('\t', 1)
480+
a_path=a_path.encode(defenc)
481+
b_path=b_path.encode(defenc)
458482
rename_from, rename_to=a_path, b_path
459483
# END add/remove handling
460484

‎git/test/test_diff.py‎

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ def test_diff_with_rename(self):
9090
assert_true(diff.renamed)
9191
assert_equal(diff.rename_from, u'Jérôme')
9292
assert_equal(diff.rename_to, u'müller')
93+
assert_equal(diff.raw_rename_from, b'J\xc3\xa9r\xc3\xb4me')
94+
assert_equal(diff.raw_rename_to, b'm\xc3\xbcller')
9395
assertisinstance(str(diff), str)
9496

9597
output=StringProcessAdapter(fixture('diff_rename_raw'))
@@ -129,7 +131,7 @@ def test_diff_index_raw_format(self):
129131
output=StringProcessAdapter(fixture('diff_index_raw'))
130132
res=Diff._index_from_raw_format(None, output.stdout)
131133
assertres[0].deleted_file
132-
assertres[0].b_path==''
134+
assertres[0].b_pathisNone
133135

134136
deftest_diff_initial_commit(self):
135137
initial_commit=self.rorepo.commit('33ebe7acec14b25c5f84f35a664803fcab2f7781')
@@ -162,7 +164,9 @@ def test_diff_unsafe_paths(self):
162164
self.assertEqual(res[7].b_path, u'path/with-question-mark?')
163165
self.assertEqual(res[8].b_path, u'path/¯\\_(ツ)_|¯')
164166
self.assertEqual(res[9].b_path, u'path/💩.txt')
167+
self.assertEqual(res[9].b_rawpath, b'path/\xf0\x9f\x92\xa9.txt')
165168
self.assertEqual(res[10].b_path, u'path/�-invalid-unicode-path.txt')
169+
self.assertEqual(res[10].b_rawpath, b'path/\x80-invalid-unicode-path.txt')
166170

167171
# The "Moves"
168172
# NOTE: The path prefixes a/ and b/ here are legit! We're actually

0 commit comments

Comments
(0)