gcc-changelog: workaround for utf8 filenames
contrib/ChangeLog: * gcc-changelog/git_commit.py: Add decode_path function. * gcc-changelog/git_email.py: Use it in order to solve utf8 encoding filename issues. * gcc-changelog/git_repository.py: Likewise. * gcc-changelog/test_email.py: Test it.
This commit is contained in:
parent
ac3966e315
commit
57706dd7e0
4 changed files with 26 additions and 15 deletions
|
@ -174,6 +174,24 @@ REVIEW_PREFIXES = ('reviewed-by: ', 'reviewed-on: ', 'signed-off-by: ',
|
|||
DATE_FORMAT = '%Y-%m-%d'
|
||||
|
||||
|
||||
def decode_path(path):
|
||||
# When core.quotepath is true (default value), utf8 chars are encoded like:
|
||||
# "b/ko\304\215ka.txt"
|
||||
#
|
||||
# The upstream bug is fixed:
|
||||
# https://github.com/gitpython-developers/GitPython/issues/1099
|
||||
#
|
||||
# but we still need a workaround for older versions of the library.
|
||||
# Please take a look at the explanation of the transformation:
|
||||
# https://stackoverflow.com/questions/990169/how-do-convert-unicode-escape-sequences-to-unicode-characters-in-a-python-string
|
||||
|
||||
if path.startswith('"') and path.endswith('"'):
|
||||
return (path.strip('"').encode('utf8').decode('unicode-escape')
|
||||
.encode('latin-1').decode('utf8'))
|
||||
else:
|
||||
return path
|
||||
|
||||
|
||||
class Error:
|
||||
def __init__(self, message, line=None):
|
||||
self.message = message
|
||||
|
@ -303,14 +321,6 @@ class GitCommit:
|
|||
'separately from normal commits'))
|
||||
return
|
||||
|
||||
# check for an encoded utf-8 filename
|
||||
hint = 'git config --global core.quotepath false'
|
||||
for modified, _ in self.info.modified_files:
|
||||
if modified.startswith('"') or modified.endswith('"'):
|
||||
self.errors.append(Error('Quoted UTF8 filename, please set: '
|
||||
f'"{hint}"', modified))
|
||||
return
|
||||
|
||||
all_are_ignored = (len(project_files) + len(ignored_files)
|
||||
== len(self.info.modified_files))
|
||||
self.parse_lines(all_are_ignored)
|
||||
|
|
|
@ -22,7 +22,7 @@ from itertools import takewhile
|
|||
|
||||
from dateutil.parser import parse
|
||||
|
||||
from git_commit import GitCommit, GitInfo
|
||||
from git_commit import GitCommit, GitInfo, decode_path
|
||||
|
||||
from unidiff import PatchSet, PatchedFile
|
||||
|
||||
|
@ -52,8 +52,8 @@ class GitEmail(GitCommit):
|
|||
modified_files = []
|
||||
for f in diff:
|
||||
# Strip "a/" and "b/" prefixes
|
||||
source = f.source_file[2:]
|
||||
target = f.target_file[2:]
|
||||
source = decode_path(f.source_file)[2:]
|
||||
target = decode_path(f.target_file)[2:]
|
||||
|
||||
if f.is_added_file:
|
||||
t = 'A'
|
||||
|
|
|
@ -26,7 +26,7 @@ except ImportError:
|
|||
print(' Debian, Ubuntu: python3-git')
|
||||
exit(1)
|
||||
|
||||
from git_commit import GitCommit, GitInfo
|
||||
from git_commit import GitCommit, GitInfo, decode_path
|
||||
|
||||
|
||||
def parse_git_revisions(repo_path, revisions, strict=True):
|
||||
|
@ -51,11 +51,11 @@ def parse_git_revisions(repo_path, revisions, strict=True):
|
|||
# Consider that renamed files are two operations:
|
||||
# the deletion of the original name
|
||||
# and the addition of the new one.
|
||||
modified_files.append((file.a_path, 'D'))
|
||||
modified_files.append((decode_path(file.a_path), 'D'))
|
||||
t = 'A'
|
||||
else:
|
||||
t = 'M'
|
||||
modified_files.append((file.b_path, t))
|
||||
modified_files.append((decode_path(file.b_path), t))
|
||||
|
||||
date = datetime.utcfromtimestamp(c.committed_date)
|
||||
author = '%s <%s>' % (c.author.name, c.author.email)
|
||||
|
|
|
@ -402,4 +402,5 @@ class TestGccChangelog(unittest.TestCase):
|
|||
|
||||
def test_bad_unicode_chars_in_filename(self):
|
||||
email = self.from_patch_glob('0001-Add-horse2.patch')
|
||||
assert email.errors[0].message.startswith('Quoted UTF8 filename')
|
||||
assert not email.errors
|
||||
assert email.changelog_entries[0].files == ['koníček.txt']
|
||||
|
|
Loading…
Add table
Reference in a new issue