Skip to content

Commit a621ff0

Browse files
authored
Merge pull request #1950 from DaveLak/fix-fuzz-submodules-filename-exception
Fix Several Bugs in the `fuzz_submodule` Causing a lot of False Alarms in the OSS-Fuzz Bug Tracker
2 parents 855befa + 2ed3334 commit a621ff0

File tree

5 files changed

+118
-33
lines changed

5 files changed

+118
-33
lines changed

‎fuzzing/fuzz-targets/fuzz_submodule.py‎

Lines changed: 16 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,16 @@
33
importos
44
importtempfile
55
fromconfigparserimportParsingError
6-
fromutilsimportis_expected_exception_message, get_max_filename_length
7-
8-
ifgetattr(sys, "frozen", False) andhasattr(sys, "_MEIPASS"): # pragma: no cover
9-
path_to_bundled_git_binary=os.path.abspath(os.path.join(os.path.dirname(__file__), "git"))
10-
os.environ["GIT_PYTHON_GIT_EXECUTABLE"] =path_to_bundled_git_binary
11-
6+
fromutilsimport (
7+
setup_git_environment,
8+
handle_exception,
9+
get_max_filename_length,
10+
)
11+
12+
# Setup the git environment
13+
setup_git_environment()
1214
fromgitimportRepo, GitCommandError, InvalidGitRepositoryError
1315

14-
ifnotsys.warnoptions: # pragma: no cover
15-
# The warnings filter below can be overridden by passing the -W option
16-
# to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable.
17-
importwarnings
18-
importlogging
19-
20-
# Fuzzing data causes some modules to generate a large number of warnings
21-
# which are not usually interesting and make the test output hard to read, so we ignore them.
22-
warnings.simplefilter("ignore")
23-
logging.getLogger().setLevel(logging.ERROR)
24-
2516

2617
defTestOneInput(data):
2718
fdp=atheris.FuzzedDataProvider(data)
@@ -35,12 +26,13 @@ def TestOneInput(data):
3526
sub_repo=Repo.init(submodule_temp_dir, bare=fdp.ConsumeBool())
3627
sub_repo.index.commit(fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512)))
3728

38-
submodule_name=f"submodule_{fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512))}"
29+
submodule_name=fdp.ConsumeUnicodeNoSurrogates(
30+
fdp.ConsumeIntInRange(1, max(1, get_max_filename_length(repo.working_tree_dir)))
31+
)
3932
submodule_path=os.path.join(repo.working_tree_dir, submodule_name)
40-
submodule_url=sub_repo.git_dir
4133

42-
submodule=repo.create_submodule(submodule_name, submodule_path, url=submodule_url)
43-
repo.index.commit(f"Added submodule{submodule_name}")
34+
submodule=repo.create_submodule(submodule_name, submodule_path, url=sub_repo.git_dir)
35+
repo.index.commit("Added submodule")
4436

4537
withsubmodule.config_writer() aswriter:
4638
key_length=fdp.ConsumeIntInRange(1, max(1, fdp.remaining_bytes()))
@@ -88,18 +80,11 @@ def TestOneInput(data):
8880
BrokenPipeError,
8981
):
9082
return-1
91-
exceptValueErrorase:
92-
expected_messages= [
93-
"SHA is empty",
94-
"Reference at",
95-
"embedded null byte",
96-
"This submodule instance does not exist anymore",
97-
"cmd stdin was empty",
98-
]
99-
ifis_expected_exception_message(e, expected_messages):
83+
exceptExceptionase:
84+
ifisinstance(e, ValueError) and"embedded null byte"instr(e):
10085
return-1
10186
else:
102-
raisee
87+
returnhandle_exception(e)
10388

10489

10590
defmain():

‎fuzzing/fuzz-targets/utils.py‎

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
importatheris# pragma: no cover
22
importos# pragma: no cover
3-
fromtypingimportList# pragma: no cover
3+
importre# pragma: no cover
4+
importtraceback# pragma: no cover
5+
importsys# pragma: no cover
6+
fromtypingimportSet, Tuple, List# pragma: no cover
47

58

69
@atheris.instrument_func
@@ -35,3 +38,85 @@ def get_max_filename_length(path: str) -> int: # pragma: no cover
3538
int: The maximum filename length.
3639
"""
3740
returnos.pathconf(path, "PC_NAME_MAX")
41+
42+
43+
@atheris.instrument_func
44+
defread_lines_from_file(file_path: str) ->list:
45+
"""Read lines from a file and return them as a list."""
46+
try:
47+
withopen(file_path, "r") asf:
48+
return [line.strip() forlineinfifline.strip()]
49+
exceptFileNotFoundError:
50+
print(f"File not found: {file_path}")
51+
return []
52+
exceptIOErrorase:
53+
print(f"Error reading file {file_path}: {e}")
54+
return []
55+
56+
57+
@atheris.instrument_func
58+
defload_exception_list(file_path: str="explicit-exceptions-list.txt") ->Set[Tuple[str, str]]:
59+
"""Load and parse the exception list from a default or specified file."""
60+
try:
61+
bundle_dir=os.path.dirname(os.path.abspath(__file__))
62+
full_path=os.path.join(bundle_dir, file_path)
63+
lines=read_lines_from_file(full_path)
64+
exception_list: Set[Tuple[str, str]] =set()
65+
forlineinlines:
66+
match=re.match(r"(.+):(\d+):", line)
67+
ifmatch:
68+
file_path: str=match.group(1).strip()
69+
line_number: str=str(match.group(2).strip())
70+
exception_list.add((file_path, line_number))
71+
returnexception_list
72+
exceptExceptionase:
73+
print(f"Error loading exception list: {e}")
74+
returnset()
75+
76+
77+
@atheris.instrument_func
78+
defmatch_exception_with_traceback(exception_list: Set[Tuple[str, str]], exc_traceback) ->bool:
79+
"""Match exception traceback with the entries in the exception list."""
80+
forfilename, lineno, _, _intraceback.extract_tb(exc_traceback):
81+
forfile_pattern, line_patterninexception_list:
82+
# Ensure filename and line_number are strings for regex matching
83+
ifre.fullmatch(file_pattern, filename) andre.fullmatch(line_pattern, str(lineno)):
84+
returnTrue
85+
returnFalse
86+
87+
88+
@atheris.instrument_func
89+
defcheck_exception_against_list(exc_traceback, exception_file: str="explicit-exceptions-list.txt") ->bool:
90+
"""Check if the exception traceback matches any entry in the exception list."""
91+
exception_list=load_exception_list(exception_file)
92+
returnmatch_exception_with_traceback(exception_list, exc_traceback)
93+
94+
95+
@atheris.instrument_func
96+
defhandle_exception(e: Exception) ->int:
97+
"""Encapsulate exception handling logic for reusability."""
98+
exc_traceback=e.__traceback__
99+
ifcheck_exception_against_list(exc_traceback):
100+
return-1
101+
else:
102+
raisee
103+
104+
105+
@atheris.instrument_func
106+
defsetup_git_environment() ->None:
107+
"""Set up the environment variables for Git."""
108+
bundle_dir=os.path.dirname(os.path.abspath(__file__))
109+
ifgetattr(sys, "frozen", False) andhasattr(sys, "_MEIPASS"): # pragma: no cover
110+
bundled_git_binary_path=os.path.join(bundle_dir, "git")
111+
os.environ["GIT_PYTHON_GIT_EXECUTABLE"] =bundled_git_binary_path
112+
113+
ifnotsys.warnoptions: # pragma: no cover
114+
# The warnings filter below can be overridden by passing the -W option
115+
# to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable.
116+
importwarnings
117+
importlogging
118+
119+
# Fuzzing data causes some modules to generate a large number of warnings
120+
# which are not usually interesting and make the test output hard to read, so we ignore them.
121+
warnings.simplefilter("ignore")
122+
logging.getLogger().setLevel(logging.ERROR)

‎fuzzing/oss-fuzz-scripts/build.sh‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@ find "$SRC" -maxdepth 1 \
1515

1616
# Build fuzzers in $OUT.
1717
find "$SRC/gitpython/fuzzing" -name 'fuzz_*.py' -print0 |while IFS= read -r -d '' fuzz_harness;do
18-
compile_python_fuzzer "$fuzz_harness" --add-binary="$(command -v git):."
18+
compile_python_fuzzer "$fuzz_harness" --add-binary="$(command -v git):." --add-data="$SRC/explicit-exceptions-list.txt:."
1919
done

‎fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh‎

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,17 @@ create_seed_corpora_zips "$WORK/qa-assets/gitpython/corpora"
9191

9292
prepare_dictionaries_for_fuzz_targets "$WORK/qa-assets/gitpython/dictionaries""$SRC/gitpython/fuzzing"
9393

94+
pushd"$SRC/gitpython/"
95+
# Search for 'raise' and 'assert' statements in Python files within GitPython's source code and submodules, saving the
96+
# matched file path, line number, and line content to a file named 'explicit-exceptions-list.txt'.
97+
# This file can then be used by fuzz harnesses to check exception tracebacks and filter out explicitly raised or otherwise
98+
# anticipated exceptions to reduce false positive test failures.
99+
100+
git grep -n --recurse-submodules -e '\braise\b' -e '\bassert\b' -- '*.py' -- ':!setup.py' -- ':!test/**' -- ':!fuzzing/**'>"$SRC/explicit-exceptions-list.txt"
101+
102+
popd
103+
104+
94105
# The OSS-Fuzz base image has outdated dependencies by default so we upgrade them below.
95106
python3 -m pip install --upgrade pip
96107
# Upgrade to the latest versions known to work at the time the below changes were introduced:

‎pyproject.toml‎

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ lint.unfixable = [
7878
"test/**" = [
7979
"B018", # useless-expression
8080
]
81+
"fuzzing/fuzz-targets/**" = [
82+
"E402", # environment setup must happen before the `git` module is imported, thus cannot happen at top of file
83+
]
84+
8185

8286
[tool.codespell]
8387
ignore-words-list="gud,doesnt"

0 commit comments

Comments
(0)