Changeset - fd61f678577f
[Not reviewed]
default
0 2 1
Mads Kiilerich (kiilerix) - 4 years ago 2020-11-14 14:20:40
mads@kiilerich.com
Grafted from: 9ae72340369d
diff: improved handling of Git diffs with " quoting

Kallithea would intentionally and explicitly fail with an ugly exception when
trying to parse Git diffs with quoted filenames.

Improve this by parsing quotes ... and ignore them, as long as they are
matching. The content inside the quotes might be \-escaped ... but that could
potentially also be the case without quoting. We will fix that later.

Adding some test cases that before would have failed to parse and raised an
exception.

Thanks to stypr of Flatt Security for raising this.
3 files changed with 112 insertions and 3 deletions:
0 comments (0 inline, 0 general) First comment
kallithea/lib/diffs.py
Show inline comments
 
@@ -511,7 +511,7 @@ def _escaper(diff_line):
 

	
 

	
 
_git_header_re = re.compile(br"""
 
    ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
    ^diff[ ]--git[ ](?P<a_path_quote>"?)a/(?P<a_path>.+?)(?P=a_path_quote)[ ](?P<b_path_quote>"?)b/(?P<b_path>.+?)(?P=a_path_quote)\n
 
    (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
       ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
    (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
 
@@ -522,8 +522,8 @@ def _escaper(diff_line):
 
    (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
        \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
    (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
    (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    (?:^---[ ](?P<a_file_quote>"?)(a/(?P<a_file>.+?)(?P=a_file_quote)|/dev/null)\t?(?:\n|$))?
 
    (?:^\+\+\+[ ](?P<b_file_quote>"?)(b/(?P<b_file>.+?)(?P=b_file_quote)|/dev/null)\t?(?:\n|$))?
 
""", re.VERBOSE | re.MULTILINE)
 

	
 

	
kallithea/tests/fixtures/git_diff_quoting.diff
Show inline comments
 
new file 100644
 
diff --git "a/\"foo\"" "b/\"foo\""
 
new file mode 100644
 
index 0000000..8b13789
 
--- /dev/null
 
+++ "b/\"foo\""
 
@@ -0,0 +1 @@
 
+
 
diff --git a/'foo' b/'foo'
 
new file mode 100644
 
index 0000000..8b13789
 
--- /dev/null
 
+++ b/'foo'
 
@@ -0,0 +1 @@
 
+
 
diff --git "a/'foo'\"foo\"" "b/'foo'\"foo\""
 
new file mode 100644
 
index 0000000..8b13789
 
--- /dev/null
 
+++ "b/'foo'\"foo\""
 
@@ -0,0 +1 @@
 
+
 
diff --git "a/a\r\nb" "b/a\r\nb"
 
new file mode 100644
 
index 0000000..30d74d2
 
--- /dev/null
 
+++ "b/a\r\nb"
 
@@ -0,0 +1 @@
 
+test
 
\ No newline at end of file
 
diff --git "a/foo\rfoo" "b/foo\rfoo"
 
new file mode 100644
 
index 0000000..e69de29
 
diff --git a/foo bar b/foo bar
 
new file mode 100644
 
index 0000000..219ea2b
 
--- /dev/null
 
+++ b/foo bar	
 
@@ -0,0 +1 @@
 
+foo  bar
 
\ No newline at end of file
 
diff --git a/test b/test
 
new file mode 100644
 
index 0000000..9daeafb
 
--- /dev/null
 
+++ b/test
 
@@ -0,0 +1 @@
 
+test
 
diff --git "a/esc\033foo" "b/esc\033foo"
 
new file mode 100644
 
index 0000000..e69de29
 
diff --git "a/tab\tfoo" "b/tab\tfoo"
 
new file mode 100644
 
index 0000000..e69de29
kallithea/tests/models/test_diff_parsers.py
Show inline comments
 
@@ -268,6 +268,62 @@ DIFF_FIXTURES = {
 
          'binary': False,
 
          'ops': {RENAMED_FILENODE: 'file renamed from oh no to oh yes'}}),
 
    ],
 
    'git_diff_quoting.diff': [
 
        (r'\"foo\"',  # TODO: quotes should not be escaped
 
         'added',
 
         {'added': 1,
 
          'binary': False,
 
          'deleted': 0,
 
          'ops': {1: 'new file 100644'}}),
 
        ("'foo'",
 
         'added',
 
         {'added': 1,
 
          'binary': False,
 
          'deleted': 0,
 
          'ops': {1: 'new file 100644'}}),
 
        ("'foo'" r'\"foo\"',  # TODO: quotes should not be escaped
 
         'added',
 
         {'added': 1,
 
          'binary': False,
 
          'deleted': 0,
 
          'ops': {1: 'new file 100644'}}),
 
        (r'a\r\nb',  # TODO: escaped
 
         'added',
 
         {'added': 1,
 
          'binary': False,
 
          'deleted': 0,
 
          'ops': {1: 'new file 100644'}}),
 
        (r'foo\rfoo',  # TODO: escaped
 
         'added',
 
        {'added': 0,
 
         'binary': True,
 
         'deleted': 0,
 
          'ops': {1: 'new file 100644'}}),
 
        ('foo bar',
 
         'added',
 
         {'added': 1,
 
          'binary': False,
 
          'deleted': 0,
 
          'ops': {1: 'new file 100644'}}),
 
        ('test',
 
         'added',
 
         {'added': 1,
 
          'binary': False,
 
          'deleted': 0,
 
          'ops': {1: 'new file 100644'}}),
 
        (r'esc\033foo',  # TODO: escaped
 
         'added',
 
         {'added': 0,
 
          'binary': True,
 
          'deleted': 0,
 
          'ops': {1: 'new file 100644'}}),
 
        (r'tab\tfoo',  # TODO: escaped
 
         'added',
 
         {'added': 0,
 
          'binary': True,
 
          'deleted': 0,
 
          'ops': {1: 'new file 100644'}}),
 
    ],
 
}
 

	
 

	
0 comments (0 inline, 0 general) First comment
You need to be logged in to comment. Login now