diff --git a/.gitignore b/.gitignore index 9c8e263..6df612a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,10 @@ target +external/GitAflplusplus +targets +**/fuzzing_campaigns* .idea .vscode **/inputs -**/traces \ No newline at end of file +**/traces +**/binutils-gdb +**/saved_corpus* \ No newline at end of file diff --git a/README.md b/README.md index 7c5fac8..6f93dcc 100644 --- a/README.md +++ b/README.md @@ -5,18 +5,22 @@ This repository contains the source code of the tool, **SeedUI: Understanding In A demonstration of SeedUI is available in [YouTube](https://youtu.be/qpPjutmIcTs). - SeedUI requires some [pre-requisites](#pre-requisites) to be installed in your system. -- To skip running AFL++ and gathering fuzzing corpus, you can directly follow the [quick instructions](#seedui-in-action-short) which uses saved corpus in the `examples` directory. -- If you would like to know the complete sequence of steps that can work on any target that can be fuzzed using AFL++, you can follow the [complete instructions](#seedui-in-action-long). +- To skip running fuzzing campaigns and gathering fuzzing corpus, you can directly follow the [quick instructions](#seedui-in-action-short) which uses saved corpus in the `examples` directory. +- If you would like to know the complete sequence of steps that can work on any target that can be fuzzed using AFL++ or Honggfuzz, you can follow the [complete instructions](#seedui-in-action-long). ## Pre-requisites: - [Rust](https://www.rust-lang.org/tools/install) - - [AflPlusPlus](https://github.com/AFLplusplus/AFLplusplus) compiled after applying `external/rename_seeds.patch`. + - [AflPlusPlus](https://github.com/AFLplusplus/AFLplusplus) compiled after applying `external/rename_seeds_afl.patch`. + + [or] + + [Honggfuzz](https://github.com/google/honggfuzz.git) compiled after applying `external/rename_seeds_honggfuzz.patch` - [LLVM](https://github.com/llvm/llvm-project) >= 14.0.6 - [npm](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm) ## SeedUI in action (short) -In general, a corpus should be extracted from AFL++ fuzzing campaigns (more information in [run-afl](#1-run-afl-fuzzing-campaign)). -However, to quickly look at SeedUI, we provide a fuzzing corpora that is extracted from two fuzzing campaigns of 5 minutes on `readelf`. +In general, a corpus should be extracted from fuzzing campaigns (more information in [run-fuzzers](#1-run-fuzzing-campaigns)). +However, to quickly look at SeedUI, we provide a fuzzing corpora that is extracted from 5 minutes fuzzing campaigns of AFL++ and Honggfuzz on `readelf`. We will use this saved corpus in the following steps. There are three steps to run SeedUI, as follows: @@ -40,7 +44,6 @@ There are three steps to run SeedUI, as follows: cd examples/readelf/ unzip saved_corpus.zip - mkdir traces python3 extract_binary_traces.py ``` @@ -54,7 +57,7 @@ There are three steps to run SeedUI, as follows: [or] cd server - cargo run -r --package server --bin server ../targets/config_ncurses_multiple_allowlist.ron + cargo run -r --package server --bin server ../examples/readelf.ron ``` ### 3. Client @@ -71,46 +74,15 @@ There are three steps to run SeedUI, as follows: ## SeedUI in action (long) There are four steps to run SeedUI, as follows: - 1. [Run AFL++ fuzzing campaign(s) on a target](#1-run-afl-fuzzing-campaign) + 1. [Run fuzzing campaign(s) on a target](#1-run-fuzzing-campaigns) 2. [Extract `drcov` traces for each of the seed saved in the corpus](#2-extract-line-coverage) 3. [Start SeedUI server](#3-server) 4. [Start SeedUI web client](#4-client) -### 1. Run AFL++ fuzzing campaign +### 1. Run fuzzing campaigns -SeedUI works on the output directory generated by AFL++. -After running the fuzzing campaign make sure you persist the `afl-out` directory. -In the following we explain the general knowledge of running AFL++ on a target. -We use `readelf` from `binutils-gdb` as a target. - -#### AFL++ on readelf -We have provided some example scripts for fuzzing `readelf` in the `examples` directory. -We recommend running the fuzzing campaign in a docker container using the following commands: - - Build the docker image (from the root directory of the repository): - `docker build -t readelf_seed_ui -f examples/readelf/docker/Dockerfile.readelf .` - - Start two docker containers in background one for each fuzzing run: - ``` - cd examples/readelf - docker-compose up -d - ``` - - Fuzz `readelf` for desired amount of time. Our example script `examples/readelf/docker/readelf.sh` timeouts after 5 minutes (you can edit the value for -V command line argument in line 22 and provide the desired amount of time in seconds). - ``` - # First fuzzing campaign - docker exec -it readelf_example_1 /bin/bash /home/readelf.sh /home/seeds/cmin-seeds-1 - # Second fuzzing campaign - docker exec -it readelf_example_2 /bin/bash /home/readelf.sh /home/seeds/cmin-seeds-2 - ``` - - Copy the generated corpus into host machines: - ``` - cd examples/readelf - mkdir inputs - docker cp readelf_example_1:/home/binutils-gdb/obj-afl/afl-out inputs/readelf_1 - docker cp readelf_example_2:/home/binutils-gdb/obj-afl/afl-out inputs/readelf_2 - ``` - - Stop the docker containers: - ``` - docker-compose down - ``` + - Please refer to [Afl++ documentation](./docs/aflplusplus.md) for instructions to run AFL++. + - Please refer to [Honggfuzz documentation](./docs/honggfuzz.md) for instructions to run Honggfuzz. ### 2. Extract line coverage - Compile AFL++ in your local machine. SeedUI uses `afl-qemu-trace` to extract `drcov` trace information. You can use our handy script in the `external` directory for this purpose or use the instructions mentioned in [AFL++ Wiki]((https://github.com/AFLplusplus/AFLplusplus/tree/stable/qemu_mode#12-coverage-information)). diff --git a/docs/aflplusplus.md b/docs/aflplusplus.md new file mode 100644 index 0000000..292a453 --- /dev/null +++ b/docs/aflplusplus.md @@ -0,0 +1,31 @@ +## SeedUI with AFL++ + +SeedUI expects the file names of the seeds generated during a fuzzing campaign contains a unique identifier, generated time, and its parent information. +As part of this repository, we provided a patch file, in `external/rename_seeds_afl.patch`, that modifies AFL++ source code to provide this information. +You need to apply this patch to AFL++ before running the fuzzing campaign. +We provided a utility shell script, `external/compile_aflpp.sh`, for this purpose. + +SeedUI works on the output directory generated by AFL++. +After running the fuzzing campaign make sure you persist the `afl-out` directory. +In the following we explain the general knowledge of running AFL++ on a target. +We use `readelf` from `binutils-gdb` as a target. + +### AFL++ on readelf +We have provided some example scripts for fuzzing `readelf` in the `examples` directory. +We recommend running the fuzzing campaign in a docker container using the following commands: + - Build the docker image (from the root directory of the repository): + `docker build -t readelf_seed_ui -f examples/readelf/docker/Dockerfile.readelf .` + - Start a docker container and run fuzzing campaigns in background: + ``` + cd examples/readelf + docker-compose up -d + ``` + We have configured the default timeout of 5 minutes. One can change it to the desired timeout by updating the last command line parameter to the `readelf_afl.sh` script in `examples/readelf/docker-compose.yaml` file. + - After the fuzzing campaign, the generated corpus for the fuzzing run can be found in the `examples/readelf/saved_corpus`. You may need to modify the folder permissions to access the files using the following command, + ``` + sudo chmod -R 775 saved_corpus/* + ``` + - Stop the docker containers: + ``` + docker-compose down + ``` \ No newline at end of file diff --git a/docs/honggfuzz.md b/docs/honggfuzz.md new file mode 100644 index 0000000..e30f3a2 --- /dev/null +++ b/docs/honggfuzz.md @@ -0,0 +1,40 @@ +## SeedUI with Honggfuzz + +### Run Honggfuzz inside Docker +In the following we explain the general knowledge of running Honggfuzz on a target. +We use `readelf` from `binutils-gdb` as a target. + +We have provided some example scripts for fuzzing `readelf` in the `examples` directory. +We recommend running the fuzzing campaign in a docker container using the following commands: + - The same docker image as AFL++ can be used as it also builds Honggfuzz. For convinience, we provide the docker build command here again: + `docker build -t readelf_seed_ui -f examples/readelf/docker/Dockerfile.readelf .` + - Start a docker container and run fuzzing campaigns in background: + ``` + cd examples/readelf + docker-compose up -d + ``` + We have configured the default timeout of 5 minutes. One can change it to the desired timeout by updating the last command line parameter to the `readelf_hfuzz.sh` script in `examples/readelf/docker-compose.yaml` file. + We have also configured the command to run `external/post_process_honggfuzz_inputs.py` script after the fuzzing campaign. So, you don't need to run it again. + - After the fuzzing campaign, the generated corpus for each run can be found in the `examples/readelf/saved_corpus`. You may need to modify the folder permissions to access the files using the following command, + ``` + sudo chmod -R 775 saved_corpus/* + ``` + - Stop the docker containers: + ``` + docker-compose down + ``` + +### Run Honggfuzz locally + +SeedUI expects the file names of the seeds generated during a fuzzing campaign contains a unique identifier, generated time, and its parent information. +As part of this repository, we provided a patch file, in `external/rename_seeds_honggfuzz.patch`, that modifies Honggfuzz source code to provide the identifier and generated time for each seed. +You need to apply this patch to Honggfuzz before running the fuzzing campaign. + +The parent information is logged in a file during the fuzzing campaign. +We provided another script, `external/post_process_honggfuzz_inputs.py`, that parses the log and adds the parent information to the seed filenames. +You need to run this script after the fuzzing campaign is finished as follows: +```python3 +python3 post_process_honggfuzz_inputs.py --hfuzz-out=/path/to/hfuzz-out/ --hfuzz-log=/path/to/hfuzz.log --initial-seeds=/path/to/initial_seeds --output-folder=/path/to/store/renamed_seeds +``` + +After running the fuzzing campaign make sure you persist the `hfuzz-out` directory. \ No newline at end of file diff --git a/examples/readelf.ron b/examples/readelf.ron index 403af96..1554853 100644 --- a/examples/readelf.ron +++ b/examples/readelf.ron @@ -8,15 +8,15 @@ UserConfig( fuzzer_infos: [ ( fuzzer_configuration_id: 1, - fuzzer_configuration: "run1", - traces_directory_path: "../examples/readelf/traces/readelf_1", - inputs_directory_path: "../examples/readelf/saved_corpus/readelf_1", + fuzzer_configuration: "afl++", + traces_directory_path: "../examples/readelf/saved_corpus/afl_traces", + inputs_directory_path: "../examples/readelf/saved_corpus/afl_run/afl-out/default/queue", ), ( fuzzer_configuration_id: 2, - fuzzer_configuration: "run2", - traces_directory_path: "../examples/readelf/traces/readelf_2", - inputs_directory_path: "../examples/readelf/saved_corpus/readelf_2", + fuzzer_configuration: "hfuzz", + traces_directory_path: "../examples/readelf/saved_corpus/hfuzz_traces", + inputs_directory_path: "../examples/readelf/saved_corpus/hfuzz_run/renamed_seeds", ) ] ) \ No newline at end of file diff --git a/examples/readelf/docker-compose.yaml b/examples/readelf/docker-compose.yaml index 64c0b27..4de6975 100644 --- a/examples/readelf/docker-compose.yaml +++ b/examples/readelf/docker-compose.yaml @@ -1,11 +1,36 @@ -version: '3.9' - services: - example: + afl_run: image: readelf_seed_ui - command: /bin/bash tty: true - scale: 2 + scale: 1 + volumes: + - ./saved_corpus/afl_run/:/home/binutils-gdb/obj-afl + command: bash -c " + cd /home/ && bash readelf_afl.sh /home/seeds/cmin-seeds-1 300 + " + ulimits: + nofile: + soft: 1000000 + hard: 1000000 + deploy: + resources: + limits: + cpus: "2" + memory: 2G + reservations: + cpus: "2" + memory: 2G + + hfuzz_run: + image: readelf_seed_ui + volumes: + - ./saved_corpus/hfuzz_run/:/home/binutils-gdb-hfuzz/obj-hfuzz + command: bash -c " + cd /home/ && bash readelf_hfuzz.sh /home/seeds/cmin-seeds-1 300 && + python3 post_process_honggfuzz_inputs.py --hfuzz-out=/home/binutils-gdb-hfuzz/obj-hfuzz/hfuzz-out --hfuzz-log=/home/binutils-gdb-hfuzz/obj-hfuzz/hfuzz.log --initial-seeds=/home/seeds/cmin-seeds-1 --output-folder=/home/binutils-gdb-hfuzz/obj-hfuzz/renamed_seeds + " + tty: true + scale: 1 ulimits: nofile: soft: 1000000 diff --git a/examples/readelf/docker/Dockerfile.readelf b/examples/readelf/docker/Dockerfile.readelf index 56f3449..bece132 100644 --- a/examples/readelf/docker/Dockerfile.readelf +++ b/examples/readelf/docker/Dockerfile.readelf @@ -42,10 +42,10 @@ RUN git clone https://github.com/AFLplusplus/AFLplusplus.git GitAflplusplus; \ cd GitAflplusplus; \ git checkout b89727bea903aec80d003b6764fb53c232d33d95 -COPY external/rename_seeds.patch /home/GitAflplusplus +COPY external/rename_seeds_afl.patch /home/GitAflplusplus RUN cd GitAflplusplus; \ - git apply rename_seeds.patch; \ + git apply rename_seeds_afl.patch; \ make # AFL++ settings @@ -56,10 +56,22 @@ ENV AFL_NO_UI=1 # ENV AFL_QUIET=1 ENV AFL_LLVM_INSTRUMENT=CLASSIC +RUN apt-get update && apt-get install -y binutils-dev libblocksruntime-dev libunwind-dev + +RUN git clone https://github.com/google/honggfuzz.git GitHonggfuzz; \ + cd GitHonggfuzz; \ + git checkout 7c495f834b3c4e25bd4d1b21239a404761bbb29c + +COPY external/rename_seeds_honggfuzz.patch /home/GitHonggfuzz + +RUN cd GitHonggfuzz; \ + git apply rename_seeds_honggfuzz.patch; \ + make + RUN apt-get update && \ apt install -y autoconf automake autotools-dev libtool libssl-dev libglib2.0-dev -RUN git clone git://sourceware.org/git/binutils-gdb.git +RUN git clone --depth 1 -b binutils-2_45 https://github.com/bminor/binutils-gdb.git RUN set -eux; \ cd binutils-gdb; \ @@ -68,9 +80,22 @@ RUN set -eux; \ CFLAGS="-Wno-error -DHAVE_SYS_STAT_H -DHAVE_SYS_WAIT_H -DHAVE_LIMITS_H -DHAVE_STDLIB_H -DHAVE_STRING_H -DHAVE_FCNTL_H" CC=/home/GitAflplusplus/afl-clang-fast CXX=/home/GitAflplusplus/afl-clang-fast++ ../configure --disable-shared --disable-gdb --disable-gdbserver --disable-gdbsupport --disable-libdecnumber --disable-ld --disable-gold --disable-gprof --disable-gprofng --disable-gas --disable-cpu --disable-intl --disable-libctf --disable-zlib --disable-texinfo --disable-sim --disable-readline --disable-libbacktrace; \ make clean && make +RUN git clone --depth 1 -b binutils-2_45 https://github.com/bminor/binutils-gdb.git binutils-gdb-hfuzz + +RUN apt-get install -y libc++-dev + +RUN set -eux; \ + cd binutils-gdb-hfuzz; \ + mkdir build; \ + cd build && \ + CFLAGS="-Wno-error -DHAVE_SYS_STAT_H -DHAVE_SYS_WAIT_H -DHAVE_LIMITS_H -DHAVE_STDLIB_H -DHAVE_STRING_H -DHAVE_FCNTL_H" CC=/home/GitHonggfuzz/hfuzz_cc/hfuzz-clang CXX=/home/GitHonggfuzz/hfuzz_cc/hfuzz-clang++ ../configure --disable-shared --disable-gdb --disable-gdbserver --disable-gdbsupport --disable-libdecnumber --disable-ld --disable-gold --disable-gprof --disable-gprofng --disable-gas --disable-cpu --disable-intl --disable-libctf --disable-zlib --disable-texinfo --disable-sim --disable-readline --disable-libbacktrace; \ + make clean && make # Seeds taken from https://github.com/HexHive/fuzzing-seed-selection/tree/main/fuzzing/readelf/seeds COPY examples/readelf/docker/seeds /home/seeds -COPY examples/readelf/docker/readelf.sh /home/ +COPY examples/readelf/docker/readelf_afl.sh /home/ +COPY examples/readelf/docker/readelf_hfuzz.sh /home/ +COPY external/post_process_honggfuzz_inputs.py /home/ -RUN chmod +x /home/readelf.sh \ No newline at end of file +RUN chmod +x /home/readelf_afl.sh +RUN chmod +x /home/readelf_hfuzz.sh \ No newline at end of file diff --git a/examples/readelf/docker/readelf.sh b/examples/readelf/docker/readelf_afl.sh similarity index 84% rename from examples/readelf/docker/readelf.sh rename to examples/readelf/docker/readelf_afl.sh index eef8aac..909d2fa 100644 --- a/examples/readelf/docker/readelf.sh +++ b/examples/readelf/docker/readelf_afl.sh @@ -19,4 +19,4 @@ export AFL_LLVM_INSTRUMENT=CLASSIC export DRIVER_DIR="${SUBJECT}/obj-afl" -$AFL/afl-fuzz -i "$1" -o "$DRIVER_DIR/afl-out" -V 300 -- "${SUBJECT}/build/binutils/readelf" -a @@ \ No newline at end of file +$AFL/afl-fuzz -i "$1" -o "$DRIVER_DIR/afl-out" -V $2 -- "${SUBJECT}/build/binutils/readelf" -a @@ \ No newline at end of file diff --git a/examples/readelf/docker/readelf_hfuzz.sh b/examples/readelf/docker/readelf_hfuzz.sh new file mode 100644 index 0000000..fa5b88c --- /dev/null +++ b/examples/readelf/docker/readelf_hfuzz.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +cd /home + +export SUBJECT="/home/binutils-gdb-hfuzz" + +cd "${SUBJECT}" + +rm -rf obj-hfuzz +mkdir -p obj-hfuzz + +export HFUZZ="/home/GitHonggfuzz" +export DRIVER_DIR="${SUBJECT}/obj-hfuzz" + +$HFUZZ/honggfuzz --logfile "$DRIVER_DIR/hfuzz.log" -i "$1" --run_time $2 -o "$DRIVER_DIR/hfuzz-out" -- "${SUBJECT}/build/binutils/readelf" -a ___FILE___ \ No newline at end of file diff --git a/examples/readelf/extract_binary_traces.py b/examples/readelf/extract_binary_traces.py index 846f50e..57a6db6 100644 --- a/examples/readelf/extract_binary_traces.py +++ b/examples/readelf/extract_binary_traces.py @@ -7,9 +7,9 @@ QEMU_BIN = Path("../../external/GitAflplusplus/afl-qemu-trace") QEMU_PLUGIN_DIR = Path(f"{QEMU_BIN.parent}/qemu_mode/qemuafl/build/contrib/plugins/libdrcov.so") -for item in ["readelf_1", "readelf_2"]: - INPUTS_DIR = Path(f"./saved_corpus/{item}") - TRACE_DIR = Path(f"./traces/{item}") +for item in [("./saved_corpus/afl_run/afl-out/default/queue", "./saved_corpus/afl_traces"), ("./saved_corpus/hfuzz_run/renamed_seeds", "./saved_corpus/hfuzz_traces")]: + INPUTS_DIR = Path(item[0]) + TRACE_DIR = Path(item[1]) if not TRACE_DIR.exists(): os.makedirs(TRACE_DIR, exist_ok=False) diff --git a/examples/readelf/local/compile_binutils_debug.sh b/examples/readelf/local/compile_binutils_debug.sh index 8bf5c4f..1e542ce 100644 --- a/examples/readelf/local/compile_binutils_debug.sh +++ b/examples/readelf/local/compile_binutils_debug.sh @@ -1,6 +1,6 @@ #!/bin/bash -git clone git://sourceware.org/git/binutils-gdb.git +git clone --depth 1 -b binutils-2_45 https://github.com/bminor/binutils-gdb.git set -eux; \ cd binutils-gdb; \ diff --git a/examples/readelf/saved_corpus.zip b/examples/readelf/saved_corpus.zip index 8386b04..5db613b 100644 Binary files a/examples/readelf/saved_corpus.zip and b/examples/readelf/saved_corpus.zip differ diff --git a/external/compile_aflpp.sh b/external/compile_aflpp.sh index b520ca7..43dc7b8 100644 --- a/external/compile_aflpp.sh +++ b/external/compile_aflpp.sh @@ -3,8 +3,8 @@ git clone https://github.com/AFLplusplus/AFLplusplus.git GitAflplusplus cd GitAflplusplus git checkout b89727bea903aec80d003b6764fb53c232d33d95 -cp ../rename_seeds.patch . -git apply rename_seeds.patch +cp ../rename_seeds_afl.patch . +git apply rename_seeds_afl.patch CC=clang CXX=clang++ make cd qemu_mode diff --git a/external/post_process_honggfuzz_inputs.py b/external/post_process_honggfuzz_inputs.py new file mode 100644 index 0000000..b91118d --- /dev/null +++ b/external/post_process_honggfuzz_inputs.py @@ -0,0 +1,111 @@ +import argparse +from pathlib import Path +import shutil +import os + +class RenameSeeds: + def __init__(self, hfuzz_out, hfuzz_log, initial_seeds, output_folder): + self.hfuzz_out = Path(hfuzz_out) + self.hfuzz_log = Path(hfuzz_log) + self.initial_seeds = Path(initial_seeds) + self.output_folder = Path(output_folder) + self.initial_seeds_map = {} + self.filename_source_map = {} + + def parse_initial_seeds(self): + initial_seed_id = 1 + for root, dirnames, filenames in os.walk(self.hfuzz_out): + initial_seed_id += len(filenames) + + for f in self.initial_seeds.glob("*"): + self.initial_seeds_map[f.name] = {'id': initial_seed_id, 'edges_found': 0, 'executed_on': 0} + initial_seed_id += 1 + + def get_equivalent_source(self, seed_src): + seed_src_split = Path(seed_src).name.split(",") + for filename in self.filename_source_map.keys(): + filename_split = Path(filename).name.split(",") + similar_elements = set(seed_src_split).intersection(set(filename_split)) + if len(similar_elements) == len(seed_src_split) - 1: + return filename + return seed_src + + def rename(self): + self.parse_initial_seeds() + log_lines = None + with open(self.hfuzz_log, "r") as hlog: + log_lines = hlog.readlines() + total_seeds = 0 + for log_line in log_lines: + if log_line.startswith("Adding file"): + seed_file_name = log_line.split(" '")[1].split("' ")[0] + seed_file_name_split = Path(seed_file_name).stem.split(',') + source_file_name = log_line.split("from source '")[-1].split("'")[0] + curr_executed_on = int(seed_file_name_split[1].split(':')[-1]) + curr_edges_found = int(seed_file_name_split[2].split(':')[-1]) + assert(seed_file_name not in self.filename_source_map.keys()) + self.filename_source_map[seed_file_name] = { + 'src': source_file_name, + 'id': int(seed_file_name_split[0].split(':')[-1]), + 'executed_on': curr_executed_on, + 'edges_found': curr_edges_found, + } + + total_seeds += 1 + + if source_file_name in self.initial_seeds_map.keys(): + if self.initial_seeds_map[source_file_name]['executed_on'] == 0: + self.initial_seeds_map[source_file_name]['executed_on'] = curr_executed_on + self.initial_seeds_map[source_file_name]['edges_found'] = curr_edges_found + elif self.initial_seeds_map[source_file_name]['executed_on'] > curr_executed_on: + self.initial_seeds_map[source_file_name]['executed_on'] = curr_executed_on + self.initial_seeds_map[source_file_name]['edges_found'] = curr_edges_found + + assert(total_seeds == len(self.filename_source_map.keys())) + + if self.output_folder.exists(): + shutil.rmtree(self.output_folder) + + self.output_folder.mkdir(parents=True) + for initial_seed, details in self.initial_seeds_map.items(): + source_file = self.initial_seeds / initial_seed + destination_file = self.output_folder / f"id:{details['id']},executed_on:{details['executed_on']},edges_found:{details['edges_found']},orig:{details['id']}" + shutil.copy(source_file, destination_file) + + not_found = [] + for descendant, details in self.filename_source_map.items(): + source_path = Path(descendant) + source_id = 0 + if details['src'] in self.initial_seeds_map.keys(): + source_id = self.initial_seeds_map[details['src']]['id'] + else: + seed_src = f"{self.hfuzz_out.absolute()}/{details['src']}" + if seed_src not in self.filename_source_map: + seed_src = self.get_equivalent_source(seed_src) + source_id = self.filename_source_map[seed_src]['id'] + assert(details['id'] != source_id) + destination_path = self.output_folder / f"cycle:1,id:{details['id']},executed_on:{details['executed_on']},src:{source_id},edges_found:{details['edges_found']}" + shutil.copy(source_path, destination_path) + + with open(f"{self.hfuzz_log.parent}/renaming_errors.log", "w+") as err_file: + for nf in not_found: + err_file.write(f"Seed not converted: {nf}\n") + + print(f"total_seeds: {total_seeds}, errors: {len(not_found)}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Reduce data") + parser.add_argument("--hfuzz-out", type=str, required=True, help="Folder where honggfuzz output files are present") + parser.add_argument("--hfuzz-log", type=str, required=True, help="Log file from honggfuzz") + parser.add_argument("--initial-seeds", type=str, required=True, help="Folder where initial seeds are present") + parser.add_argument("--output-folder", type=str, required=True, help="Folder to store the renamed seeds") + args = parser.parse_args() + + experiment = RenameSeeds( + hfuzz_out=args.hfuzz_out, + hfuzz_log=args.hfuzz_log, + initial_seeds=args.initial_seeds, + output_folder=args.output_folder, + ) + experiment.rename() diff --git a/external/rename_seeds.patch b/external/rename_seeds_afl.patch similarity index 100% rename from external/rename_seeds.patch rename to external/rename_seeds_afl.patch diff --git a/external/rename_seeds_honggfuzz.patch b/external/rename_seeds_honggfuzz.patch new file mode 100644 index 0000000..58f3d6d --- /dev/null +++ b/external/rename_seeds_honggfuzz.patch @@ -0,0 +1,133 @@ +diff --git a/.gitignore b/.gitignore +index c72e57c3..30671e57 100644 +--- a/.gitignore ++++ b/.gitignore +@@ -14,3 +14,5 @@ libs + obj + examples/badcode/targets/badcode1 + examples/badcode/targets/badcode2 ++kts_examples ++kts_examples/binutils-gdb +\ No newline at end of file +diff --git a/input.c b/input.c +index a1063a38..e78108eb 100644 +--- a/input.c ++++ b/input.c +@@ -22,6 +22,7 @@ + + #include "input.h" + ++#include + #include + #include + #include +@@ -320,28 +321,44 @@ bool input_parseBlacklist(honggfuzz_t* hfuzz) { + return true; + } + +-static void input_generateFileName(dynfile_t* dynfile, const char* dir, char fname[PATH_MAX]) { ++long long current_timestamp() { ++ struct timespec spec; ++ clock_gettime(CLOCK_REALTIME, &spec); ++ return (long long)spec.tv_sec * 1000 + (long long)spec.tv_nsec / 1000000; ++} ++ ++static void input_generateFileName( ++ dynfile_t* dynfile, const char* dir, char fname[PATH_MAX], run_t* run) { + uint64_t crc64f = util_CRC64(dynfile->data, dynfile->size); + uint64_t crc64r = util_CRC64Rev(dynfile->data, dynfile->size); + if (dir) { +- snprintf(fname, PATH_MAX, "%s/%016" PRIx64 "%016" PRIx64 ".%08" PRIx32 ".honggfuzz.cov", +- dir, crc64f, crc64r, (uint32_t)dynfile->size); ++ snprintf(fname, PATH_MAX, ++ "%s/" ++ "id:%zu,executed_on:%lld,edges_found:%lu,%" ++ "016" PRIx64 "%016" PRIx64 ".%08" PRIx32 ".honggfuzz.cov", ++ dir, dynfile->idx, current_timestamp(), ++ run->global->feedback.hwCnts.softCntEdge, crc64f, crc64r, ++ (uint32_t)dynfile->size); + } else { +- snprintf(fname, PATH_MAX, "%016" PRIx64 "%016" PRIx64 ".%08" PRIx32 ".honggfuzz.cov", +- crc64f, crc64r, (uint32_t)dynfile->size); ++ snprintf(fname, PATH_MAX, ++ "id:%zu,executed_on:%lld,edges_found:%lu,%" ++ "016" PRIx64 "%016" PRIx64 ".%08" PRIx32 ".honggfuzz.cov", ++ dynfile->idx, current_timestamp(), ++ run->global->feedback.hwCnts.softCntEdge, crc64f, crc64r, ++ (uint32_t)dynfile->size); + } + } + +-bool input_writeCovFile(const char* dir, dynfile_t* dynfile) { ++bool input_writeCovFile(const char* dir, dynfile_t* dynfile, run_t* run) { + char fname[PATH_MAX]; +- input_generateFileName(dynfile, dir, fname); ++ input_generateFileName(dynfile, dir, fname, run); + + if (files_exists(fname)) { + LOG_D("File '%s' already exists in the output corpus directory '%s'", fname, dir); + return true; + } + +- LOG_D("Adding file '%s' to the corpus directory '%s'", fname, dir); ++ LOG_I("Adding file '%s' to the corpus directory '%s' from source '%s'", fname, dir, run->dynfile->path); + + if (!files_writeBufToFile( + fname, dynfile->data, dynfile->size, O_WRONLY | O_CREAT | O_EXCL | O_CLOEXEC)) { +@@ -384,11 +401,9 @@ void input_addDynamicInput(run_t* run) { + } + dynfile->phase = fuzz_getState(run->global); + dynfile->timedout = run->tmOutSignaled; +- input_generateFileName(dynfile, NULL, dynfile->path); + +- MX_SCOPED_RWLOCK_WRITE(&run->global->mutex.dynfileq); + +- dynfile->idx = ATOMIC_PRE_INC(run->global->io.dynfileqCnt); ++ + + run->global->feedback.maxCov[0] = HF_MAX(run->global->feedback.maxCov[0], dynfile->cov[0]); + run->global->feedback.maxCov[1] = HF_MAX(run->global->feedback.maxCov[1], dynfile->cov[1]); +@@ -396,6 +411,12 @@ void input_addDynamicInput(run_t* run) { + run->global->feedback.maxCov[3] = HF_MAX(run->global->feedback.maxCov[3], dynfile->cov[3]); + + run->global->io.dynfileqMaxSz = HF_MAX(run->global->io.dynfileqMaxSz, dynfile->size); ++ ++ MX_SCOPED_RWLOCK_WRITE(&run->global->mutex.dynfileq); ++ ++ dynfile->idx = ATOMIC_PRE_INC(run->global->io.dynfileqCnt); ++ ++ input_generateFileName(dynfile, NULL, dynfile->path, run); + + /* Sort it by coverage - put better coverage earlier in the list */ + dynfile_t* iter = NULL; +@@ -416,7 +437,7 @@ void input_addDynamicInput(run_t* run) { + + const char* outDir = + run->global->io.outputDir ? run->global->io.outputDir : run->global->io.inputDir; +- if (!input_writeCovFile(outDir, dynfile)) { ++ if (!input_writeCovFile(outDir, dynfile, run)) { + LOG_E("Couldn't save the coverage data to '%s'", run->global->io.outputDir); + } + +@@ -427,7 +448,7 @@ void input_addDynamicInput(run_t* run) { + + ATOMIC_POST_INC(run->global->io.newUnitsAdded); + +- if (run->global->io.covDirNew && !input_writeCovFile(run->global->io.covDirNew, dynfile)) { ++ if (run->global->io.covDirNew && !input_writeCovFile(run->global->io.covDirNew, dynfile, run)) { + LOG_E("Couldn't save the new coverage data to '%s'", run->global->io.covDirNew); + } + } +diff --git a/input.h b/input.h +index d812036c..eb705a78 100644 +--- a/input.h ++++ b/input.h +@@ -38,7 +38,7 @@ extern bool input_init(honggfuzz_t* hfuzz); + extern bool input_parseDictionary(honggfuzz_t* hfuzz); + extern void input_freeDictionary(honggfuzz_t* hfuzz); + extern bool input_parseBlacklist(honggfuzz_t* hfuzz); +-extern bool input_writeCovFile(const char* dir, dynfile_t* dynfile); ++extern bool input_writeCovFile(const char* dir, dynfile_t* dynfile, run_t* run); + extern void input_addDynamicInput(run_t* run); + extern bool input_inDynamicCorpus(run_t* run, const char* fname, size_t len); + extern void input_renumerateInputs(honggfuzz_t* hfuzz);