Add scripts and tests to reproduce fuzzer errors in pytest

- move fuzzing tests in to test folder - create a script that runs the fuzzer and extracts the test case if the test fails
2023-11-21 14:53:47 +00:00 · 2023-11-21 14:53:47 +00:00 · 0af037b63c
commit 0af037b63c
--- a/.gitignore
+++ b/.gitignore
@ -21,3 +21,4 @@ src/icalendar.egg-info/
 !.github
 !.gitignore
 venv
+/ical_fuzzer.pkg.spec
--- a/src/icalendar/fuzzing/ical_fuzzer.py
+++ b/src/icalendar/fuzzing/ical_fuzzer.py
@ -16,9 +16,11 @@
 ################################################################################
 import atheris
 import sys
+import base64

 with atheris.instrument_imports():
    import icalendar
+    from icalendar.tests.fuzzed import fuzz_calendar_v1

 _value_error_matches = [
    "component", "parse", "Expected", "Wrong date format", "END encountered",
@ -30,28 +32,19 @@ _value_error_matches = [
 ]


-def _fuzz_calendar(cal: icalendar.Calendar, should_walk: bool):
-    if should_walk:
-        for event in cal.walk('VEVENT'):
-            event.to_ical()
-    else:
-        cal.to_ical()
-
-
@atheris.instrument_func
 def TestOneInput(data):
+    print("sys.argv: ", sys.argv)
    fdp = atheris.FuzzedDataProvider(data)
    try:
        multiple = fdp.ConsumeBool()
        should_walk = fdp.ConsumeBool()
+        calendar_string = fdp.ConsumeString(fdp.remaining_bytes())
+        print("--- start calendar ---")
+        print(base64.b64encode(calendar_string.encode("UTF-8")).decode("ASCII"))
+        print("--- end calendar ---")

-        cal = icalendar.Calendar.from_ical(fdp.ConsumeString(fdp.remaining_bytes()), multiple=multiple)
-
-        if multiple:
-            for c in cal:
-                _fuzz_calendar(c, should_walk)
-        else:
-            _fuzz_calendar(cal, should_walk)
+        fuzz_calendar_v1(icalendar.Calendar.from_ical, calendar_string, multiple, should_walk)
    except ValueError as e:
        if any(m in str(e) for m in _value_error_matches):
            return -1
@ -65,4 +58,3 @@ def main():

 if __name__ == "__main__":
    main()
-
--- a/src/icalendar/tests/conftest.py
+++ b/src/icalendar/tests/conftest.py
@ -19,9 +19,9 @@ class DataSource:
        """Return all the files that could be used."""
        return [file[:-4] for file in os.listdir(self._data_source_folder) if file.lower().endswith(".ics")]

-    def __getattr__(self, attribute):
+    def __getitem__(self, attribute):
        """Parse a file and return the result stored in the attribute."""
-        source_file = attribute.replace('-', '_') + '.ics'
+        source_file = attribute + '.ics'
        source_path = os.path.join(self._data_source_folder, source_file)
        if not os.path.isfile(source_path):
            raise AttributeError(f"{source_path} does not exist.")
@ -33,8 +33,8 @@ class DataSource:
        self.__dict__[attribute] = source
        return source

-    def __getitem__(self, key):
-        return getattr(self, key)
+    def __getattr__(self, key):
+        return self[key]

    def __repr__(self):
        return repr(self.__dict__)
@ -82,7 +82,7 @@ def in_timezone(request):
    return request.param


-@pytest.fixture(params=[
+ICS_FILES = [
    (data, key)
    for data in [CALENDARS, TIMEZONES, EVENTS]
    for key in data.keys() if key not in
@ -90,9 +90,17 @@ def in_timezone(request):
        "big_bad_calendar", "issue_104_broken_calendar", "small_bad_calendar",
        "multiple_calendar_components", "pr_480_summary_with_colon"
    )
-])
+]
+@pytest.fixture(params=ICS_FILES)
 def ics_file(request):
    """An example ICS file."""
    data, key = request.param
    print(key)
    return data[key]
+
+
+FUZZ_V1 = [os.path.join(CALENDARS_FOLDER, key) for key in os.listdir(CALENDARS_FOLDER) if "fuzz-testcase" in key]
+@pytest.fixture(params=FUZZ_V1)
+def fuzz_v1_calendar(request):
+    """Clusterfuzz calendars."""
+    return request.param
--- a/src/icalendar/tests/fuzzed/init.py
+++ b/src/icalendar/tests/fuzzed/init.py
@ -0,0 +1,25 @@
+"""This is a collection of test files that are generated from the fuzzer.
+
+The fuzzer finds the cases in which the icalendar module breaks.
+These test cases reproduce the failure.
+Some more tests can be added to make sure that the behavior works properly.
+"""
+
+def fuzz_calendar_v1(from_ical, calendar_string: str, multiple: bool, should_walk: bool):
+    """Take a from_ical function and reproduce the error.
+
+    The calendar_string is a fuzzed input.
+    """
+    cal = from_ical(calendar_string, multiple=multiple)
+
+    if multiple:
+        for c in cal:
+            _fuzz_calendar(c, should_walk)
+    else:
+        _fuzz_calendar(cal, should_walk)
+
+    if should_walk:
+        for event in cal.walk('VEVENT'):
+            event.to_ical()
+    else:
+        cal.to_ical()
--- a/src/icalendar/tests/fuzzed/generate_python_test_cases_from_downloaded_clusterfuzz_test_cases.sh
+++ b/src/icalendar/tests/fuzzed/generate_python_test_cases_from_downloaded_clusterfuzz_test_cases.sh
@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+#
+# This script generates a test case from a test case file that was downloaded.
+#
+# You will need to follow the setup instructions here:
+#   https://google.github.io/oss-fuzz/advanced-topics/reproducing/#reproduce-using-local-source-checkout
+#
+set -e
+
+HERE="`dirname \"$0\"`"
+OSS_FUZZ_DIRECTORY="$HOME/oss-fuzz"
+DOWNLOADS_DIRECTORY="$HOME/Downloads"
+LOCAL_ICALENDAR_DIRECTORY="$HERE/../../../../"
+PYTHON_TEST_CASE_DIRECTORY="$HERE/../calendars/"
+PROJECT_NAME="icalendar"
+
+echo "### Building Project $PROJECT_NAME"
+python "$OSS_FUZZ_DIRECTORY/infra/helper.py" build_fuzzers --sanitizer undefined "$PROJECT_NAME" "$LOCAL_ICALENDAR_DIRECTORY"
+
+# we capture the output
+OUTPUT="`mktemp`"
+
+# test case files look like this:
+#   clusterfuzz-testcase-minimized-ical_fuzzer-4878676239712256
+for testcase in "$DOWNLOADS_DIRECTORY/clusterfuzz-testcase-"*
+do
+  echo "### Reproducing $testcase"
+  python "$OSS_FUZZ_DIRECTORY/infra/helper.py" reproduce "$PROJECT_NAME" ical_fuzzer "$testcase" | tee "$OUTPUT"
+  if [ $PIPESTATUS -eq 0 ]
+  then
+    echo "### Testcase fixed! $testcase"
+    continue
+  fi
+  echo "### Testcase reproduced! $testcase"
+  TEST_FILE_CONTENT="`cat \"$OUTPUT\" | sed -n '/--- start calendar ---/,/--- end calendar ---/{/--- start calendar ---/b;/--- end calendar ---/b;p}'`"
+  if [ -z "$TEST_FILE_CONTENT" ]
+  then
+    echo "### No test file content for $testcase"
+    exit 1
+  fi
+  ICS_FILE="$PYTHON_TEST_CASE_DIRECTORY/`basename \"$testcase\"`.ics"
+  # decode and ignore garbage, see https://stackoverflow.com/a/15490765/1320237
+  echo $TEST_FILE_CONTENT | base64 -di > /dev/null
+  echo "Created $ICS_FILE"
+done
--- a/src/icalendar/tests/fuzzed/test_fuzzed_calendars.py
+++ b/src/icalendar/tests/fuzzed/test_fuzzed_calendars.py
@ -0,0 +1,13 @@
+"""This test tests all fuzzed calendars."""
+from icalendar.tests.fuzzed import fuzz_calendar_v1
+import icalendar
+
+def test_fuzz_v1(fuzz_v1_calendar):
+    """Test a calendar."""
+    with open(fuzz_v1_calendar, "rb") as f:
+        fuzz_calendar_v1(
+            icalendar.Calendar.from_ical,
+            f.read(),
+            multiple=True,
+            should_walk=True
+        )