From 0af037b63c906a28d9c045ba3ccb9e4247a2a9e0 Mon Sep 17 00:00:00 2001 From: Nicco Kunzmann Date: Tue, 21 Nov 2023 14:53:47 +0000 Subject: [PATCH] Add scripts and tests to reproduce fuzzer errors in pytest - move fuzzing tests in to test folder - create a script that runs the fuzzer and extracts the test case if the test fails --- .gitignore | 1 + src/icalendar/fuzzing/ical_fuzzer.py | 24 ++++------ src/icalendar/tests/conftest.py | 20 ++++++--- src/icalendar/tests/fuzzed/__init__.py | 25 +++++++++++ ..._from_downloaded_clusterfuzz_test_cases.sh | 45 +++++++++++++++++++ .../tests/fuzzed/test_fuzzed_calendars.py | 13 ++++++ 6 files changed, 106 insertions(+), 22 deletions(-) create mode 100644 src/icalendar/tests/fuzzed/__init__.py create mode 100755 src/icalendar/tests/fuzzed/generate_python_test_cases_from_downloaded_clusterfuzz_test_cases.sh create mode 100644 src/icalendar/tests/fuzzed/test_fuzzed_calendars.py diff --git a/.gitignore b/.gitignore index 76c5611..61c8bde 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,4 @@ src/icalendar.egg-info/ !.github !.gitignore venv +/ical_fuzzer.pkg.spec diff --git a/src/icalendar/fuzzing/ical_fuzzer.py b/src/icalendar/fuzzing/ical_fuzzer.py index 8c73ddb..b4e3480 100644 --- a/src/icalendar/fuzzing/ical_fuzzer.py +++ b/src/icalendar/fuzzing/ical_fuzzer.py @@ -16,9 +16,11 @@ ################################################################################ import atheris import sys +import base64 with atheris.instrument_imports(): import icalendar + from icalendar.tests.fuzzed import fuzz_calendar_v1 _value_error_matches = [ "component", "parse", "Expected", "Wrong date format", "END encountered", @@ -30,28 +32,19 @@ _value_error_matches = [ ] -def _fuzz_calendar(cal: icalendar.Calendar, should_walk: bool): - if should_walk: - for event in cal.walk('VEVENT'): - event.to_ical() - else: - cal.to_ical() - - @atheris.instrument_func def TestOneInput(data): + print("sys.argv: ", sys.argv) fdp = atheris.FuzzedDataProvider(data) try: multiple = fdp.ConsumeBool() should_walk = fdp.ConsumeBool() + calendar_string = fdp.ConsumeString(fdp.remaining_bytes()) + print("--- start calendar ---") + print(base64.b64encode(calendar_string.encode("UTF-8")).decode("ASCII")) + print("--- end calendar ---") - cal = icalendar.Calendar.from_ical(fdp.ConsumeString(fdp.remaining_bytes()), multiple=multiple) - - if multiple: - for c in cal: - _fuzz_calendar(c, should_walk) - else: - _fuzz_calendar(cal, should_walk) + fuzz_calendar_v1(icalendar.Calendar.from_ical, calendar_string, multiple, should_walk) except ValueError as e: if any(m in str(e) for m in _value_error_matches): return -1 @@ -65,4 +58,3 @@ def main(): if __name__ == "__main__": main() - diff --git a/src/icalendar/tests/conftest.py b/src/icalendar/tests/conftest.py index 4a954a2..b9e12d0 100644 --- a/src/icalendar/tests/conftest.py +++ b/src/icalendar/tests/conftest.py @@ -19,9 +19,9 @@ class DataSource: """Return all the files that could be used.""" return [file[:-4] for file in os.listdir(self._data_source_folder) if file.lower().endswith(".ics")] - def __getattr__(self, attribute): + def __getitem__(self, attribute): """Parse a file and return the result stored in the attribute.""" - source_file = attribute.replace('-', '_') + '.ics' + source_file = attribute + '.ics' source_path = os.path.join(self._data_source_folder, source_file) if not os.path.isfile(source_path): raise AttributeError(f"{source_path} does not exist.") @@ -33,8 +33,8 @@ class DataSource: self.__dict__[attribute] = source return source - def __getitem__(self, key): - return getattr(self, key) + def __getattr__(self, key): + return self[key] def __repr__(self): return repr(self.__dict__) @@ -82,7 +82,7 @@ def in_timezone(request): return request.param -@pytest.fixture(params=[ +ICS_FILES = [ (data, key) for data in [CALENDARS, TIMEZONES, EVENTS] for key in data.keys() if key not in @@ -90,9 +90,17 @@ def in_timezone(request): "big_bad_calendar", "issue_104_broken_calendar", "small_bad_calendar", "multiple_calendar_components", "pr_480_summary_with_colon" ) -]) +] +@pytest.fixture(params=ICS_FILES) def ics_file(request): """An example ICS file.""" data, key = request.param print(key) return data[key] + + +FUZZ_V1 = [os.path.join(CALENDARS_FOLDER, key) for key in os.listdir(CALENDARS_FOLDER) if "fuzz-testcase" in key] +@pytest.fixture(params=FUZZ_V1) +def fuzz_v1_calendar(request): + """Clusterfuzz calendars.""" + return request.param diff --git a/src/icalendar/tests/fuzzed/__init__.py b/src/icalendar/tests/fuzzed/__init__.py new file mode 100644 index 0000000..daf3470 --- /dev/null +++ b/src/icalendar/tests/fuzzed/__init__.py @@ -0,0 +1,25 @@ +"""This is a collection of test files that are generated from the fuzzer. + +The fuzzer finds the cases in which the icalendar module breaks. +These test cases reproduce the failure. +Some more tests can be added to make sure that the behavior works properly. +""" + +def fuzz_calendar_v1(from_ical, calendar_string: str, multiple: bool, should_walk: bool): + """Take a from_ical function and reproduce the error. + + The calendar_string is a fuzzed input. + """ + cal = from_ical(calendar_string, multiple=multiple) + + if multiple: + for c in cal: + _fuzz_calendar(c, should_walk) + else: + _fuzz_calendar(cal, should_walk) + + if should_walk: + for event in cal.walk('VEVENT'): + event.to_ical() + else: + cal.to_ical() diff --git a/src/icalendar/tests/fuzzed/generate_python_test_cases_from_downloaded_clusterfuzz_test_cases.sh b/src/icalendar/tests/fuzzed/generate_python_test_cases_from_downloaded_clusterfuzz_test_cases.sh new file mode 100755 index 0000000..1b5eb22 --- /dev/null +++ b/src/icalendar/tests/fuzzed/generate_python_test_cases_from_downloaded_clusterfuzz_test_cases.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# +# This script generates a test case from a test case file that was downloaded. +# +# You will need to follow the setup instructions here: +# https://google.github.io/oss-fuzz/advanced-topics/reproducing/#reproduce-using-local-source-checkout +# +set -e + +HERE="`dirname \"$0\"`" +OSS_FUZZ_DIRECTORY="$HOME/oss-fuzz" +DOWNLOADS_DIRECTORY="$HOME/Downloads" +LOCAL_ICALENDAR_DIRECTORY="$HERE/../../../../" +PYTHON_TEST_CASE_DIRECTORY="$HERE/../calendars/" +PROJECT_NAME="icalendar" + +echo "### Building Project $PROJECT_NAME" +python "$OSS_FUZZ_DIRECTORY/infra/helper.py" build_fuzzers --sanitizer undefined "$PROJECT_NAME" "$LOCAL_ICALENDAR_DIRECTORY" + +# we capture the output +OUTPUT="`mktemp`" + +# test case files look like this: +# clusterfuzz-testcase-minimized-ical_fuzzer-4878676239712256 +for testcase in "$DOWNLOADS_DIRECTORY/clusterfuzz-testcase-"* +do + echo "### Reproducing $testcase" + python "$OSS_FUZZ_DIRECTORY/infra/helper.py" reproduce "$PROJECT_NAME" ical_fuzzer "$testcase" | tee "$OUTPUT" + if [ $PIPESTATUS -eq 0 ] + then + echo "### Testcase fixed! $testcase" + continue + fi + echo "### Testcase reproduced! $testcase" + TEST_FILE_CONTENT="`cat \"$OUTPUT\" | sed -n '/--- start calendar ---/,/--- end calendar ---/{/--- start calendar ---/b;/--- end calendar ---/b;p}'`" + if [ -z "$TEST_FILE_CONTENT" ] + then + echo "### No test file content for $testcase" + exit 1 + fi + ICS_FILE="$PYTHON_TEST_CASE_DIRECTORY/`basename \"$testcase\"`.ics" + # decode and ignore garbage, see https://stackoverflow.com/a/15490765/1320237 + echo $TEST_FILE_CONTENT | base64 -di > /dev/null + echo "Created $ICS_FILE" +done diff --git a/src/icalendar/tests/fuzzed/test_fuzzed_calendars.py b/src/icalendar/tests/fuzzed/test_fuzzed_calendars.py new file mode 100644 index 0000000..c38d36f --- /dev/null +++ b/src/icalendar/tests/fuzzed/test_fuzzed_calendars.py @@ -0,0 +1,13 @@ +"""This test tests all fuzzed calendars.""" +from icalendar.tests.fuzzed import fuzz_calendar_v1 +import icalendar + +def test_fuzz_v1(fuzz_v1_calendar): + """Test a calendar.""" + with open(fuzz_v1_calendar, "rb") as f: + fuzz_calendar_v1( + icalendar.Calendar.from_ical, + f.read(), + multiple=True, + should_walk=True + )