# HG changeset patch # User Mads Kiilerich <mads@kiilerich.com> # Date 2020-11-14 14:20:40 # Node ID fd61f678577fe70d5f356ffebc233e922e2830a0 # Parent 817dcce30d67c026e144df58fc47372d44c10dfa diff: improved handling of Git diffs with " quoting Kallithea would intentionally and explicitly fail with an ugly exception when trying to parse Git diffs with quoted filenames. Improve this by parsing quotes ... and ignore them, as long as they are matching. The content inside the quotes might be \-escaped ... but that could potentially also be the case without quoting. We will fix that later. Adding some test cases that before would have failed to parse and raised an exception. Thanks to stypr of Flatt Security for raising this. diff --git a/kallithea/lib/diffs.py b/kallithea/lib/diffs.py --- a/kallithea/lib/diffs.py +++ b/kallithea/lib/diffs.py @@ -511,7 +511,7 @@ def _escaper(diff_line): _git_header_re = re.compile(br""" - ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n + ^diff[ ]--git[ ](?P<a_path_quote>"?)a/(?P<a_path>.+?)(?P=a_path_quote)[ ](?P<b_path_quote>"?)b/(?P<b_path>.+?)(?P=a_path_quote)\n (?:^old[ ]mode[ ](?P<old_mode>\d+)\n ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))? (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n @@ -522,8 +522,8 @@ def _escaper(diff_line): (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+) \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))? (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))? - (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))? - (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))? + (?:^---[ ](?P<a_file_quote>"?)(a/(?P<a_file>.+?)(?P=a_file_quote)|/dev/null)\t?(?:\n|$))? + (?:^\+\+\+[ ](?P<b_file_quote>"?)(b/(?P<b_file>.+?)(?P=b_file_quote)|/dev/null)\t?(?:\n|$))? """, re.VERBOSE | re.MULTILINE) diff --git a/kallithea/tests/fixtures/git_diff_quoting.diff b/kallithea/tests/fixtures/git_diff_quoting.diff new file mode 100644 --- /dev/null +++ b/kallithea/tests/fixtures/git_diff_quoting.diff @@ -0,0 +1,53 @@ +diff --git "a/\"foo\"" "b/\"foo\"" +new file mode 100644 +index 0000000..8b13789 +--- /dev/null ++++ "b/\"foo\"" +@@ -0,0 +1 @@ ++ +diff --git a/'foo' b/'foo' +new file mode 100644 +index 0000000..8b13789 +--- /dev/null ++++ b/'foo' +@@ -0,0 +1 @@ ++ +diff --git "a/'foo'\"foo\"" "b/'foo'\"foo\"" +new file mode 100644 +index 0000000..8b13789 +--- /dev/null ++++ "b/'foo'\"foo\"" +@@ -0,0 +1 @@ ++ +diff --git "a/a\r\nb" "b/a\r\nb" +new file mode 100644 +index 0000000..30d74d2 +--- /dev/null ++++ "b/a\r\nb" +@@ -0,0 +1 @@ ++test +\ No newline at end of file +diff --git "a/foo\rfoo" "b/foo\rfoo" +new file mode 100644 +index 0000000..e69de29 +diff --git a/foo bar b/foo bar +new file mode 100644 +index 0000000..219ea2b +--- /dev/null ++++ b/foo bar +@@ -0,0 +1 @@ ++foo bar +\ No newline at end of file +diff --git a/test b/test +new file mode 100644 +index 0000000..9daeafb +--- /dev/null ++++ b/test +@@ -0,0 +1 @@ ++test +diff --git "a/esc\033foo" "b/esc\033foo" +new file mode 100644 +index 0000000..e69de29 +diff --git "a/tab\tfoo" "b/tab\tfoo" +new file mode 100644 +index 0000000..e69de29 diff --git a/kallithea/tests/models/test_diff_parsers.py b/kallithea/tests/models/test_diff_parsers.py --- a/kallithea/tests/models/test_diff_parsers.py +++ b/kallithea/tests/models/test_diff_parsers.py @@ -268,6 +268,62 @@ DIFF_FIXTURES = { 'binary': False, 'ops': {RENAMED_FILENODE: 'file renamed from oh no to oh yes'}}), ], + 'git_diff_quoting.diff': [ + (r'\"foo\"', # TODO: quotes should not be escaped + 'added', + {'added': 1, + 'binary': False, + 'deleted': 0, + 'ops': {1: 'new file 100644'}}), + ("'foo'", + 'added', + {'added': 1, + 'binary': False, + 'deleted': 0, + 'ops': {1: 'new file 100644'}}), + ("'foo'" r'\"foo\"', # TODO: quotes should not be escaped + 'added', + {'added': 1, + 'binary': False, + 'deleted': 0, + 'ops': {1: 'new file 100644'}}), + (r'a\r\nb', # TODO: escaped + 'added', + {'added': 1, + 'binary': False, + 'deleted': 0, + 'ops': {1: 'new file 100644'}}), + (r'foo\rfoo', # TODO: escaped + 'added', + {'added': 0, + 'binary': True, + 'deleted': 0, + 'ops': {1: 'new file 100644'}}), + ('foo bar', + 'added', + {'added': 1, + 'binary': False, + 'deleted': 0, + 'ops': {1: 'new file 100644'}}), + ('test', + 'added', + {'added': 1, + 'binary': False, + 'deleted': 0, + 'ops': {1: 'new file 100644'}}), + (r'esc\033foo', # TODO: escaped + 'added', + {'added': 0, + 'binary': True, + 'deleted': 0, + 'ops': {1: 'new file 100644'}}), + (r'tab\tfoo', # TODO: escaped + 'added', + {'added': 0, + 'binary': True, + 'deleted': 0, + 'ops': {1: 'new file 100644'}}), + ], }