From 8b84c3a7b872aa7ee159770621b7af4d4580e6e3 Mon Sep 17 00:00:00 2001 From: Simon Resch Date: Fri, 7 Nov 2025 11:01:00 +0100 Subject: [PATCH 1/3] feat: sometimes interpret char[] mutations as single bytes When mutating char[] randomly interpret the bytes from libFuzzer as individual (single byte) chars. This helps to make use of libFuzzers table of recent compare entries (encoded as CESU8) if the char[] is used as a String inside the fuzz test. --- .../lang/PrimitiveArrayMutatorFactory.java | 34 ++++++++++++++++--- tests/BUILD.bazel | 10 ++++++ .../java/com/example/CharArrayFuzzer.java | 29 ++++++++++++++++ 3 files changed, 69 insertions(+), 4 deletions(-) create mode 100644 tests/src/test/java/com/example/CharArrayFuzzer.java diff --git a/src/main/java/com/code_intelligence/jazzer/mutation/mutator/lang/PrimitiveArrayMutatorFactory.java b/src/main/java/com/code_intelligence/jazzer/mutation/mutator/lang/PrimitiveArrayMutatorFactory.java index dd321c72d..f5d4ab6b0 100644 --- a/src/main/java/com/code_intelligence/jazzer/mutation/mutator/lang/PrimitiveArrayMutatorFactory.java +++ b/src/main/java/com/code_intelligence/jazzer/mutation/mutator/lang/PrimitiveArrayMutatorFactory.java @@ -43,6 +43,7 @@ import java.lang.reflect.AnnotatedType; import java.nio.ByteBuffer; import java.util.Optional; +import java.util.function.BiFunction; import java.util.function.Function; import java.util.function.Predicate; @@ -81,6 +82,7 @@ public static final class PrimitiveArrayMutator extends SerializingMutator private final SerializingMutator innerMutator; private final Function toPrimitive; private final Function toBytes; + private final BiFunction toPrimitiveAfterMutate; @SuppressWarnings("unchecked") public PrimitiveArrayMutator(AnnotatedType type) { @@ -92,6 +94,8 @@ public PrimitiveArrayMutator(AnnotatedType type) { innerMutator = (SerializingMutator) LibFuzzerMutatorFactory.tryCreate(innerByteArray).get(); toPrimitive = (Function) makeBytesToPrimitiveArrayConverter(elementType); + toPrimitiveAfterMutate = + (BiFunction) makeBytesToPrimitiveArrayAfterMutate(elementType); toBytes = (Function) makePrimitiveArrayToBytesConverter(elementType); } @@ -128,14 +132,13 @@ public T init(PseudoRandom prng) { @Override public T mutate(T value, PseudoRandom prng) { - return (T) toPrimitive.apply(innerMutator.mutate(toBytes.apply(value), prng)); + return toPrimitiveAfterMutate.apply(innerMutator.mutate(toBytes.apply(value), prng), prng); } @Override public T crossOver(T value, T otherValue, PseudoRandom prng) { - return (T) - toPrimitive.apply( - innerMutator.crossOver(toBytes.apply(value), toBytes.apply(otherValue), prng)); + return toPrimitive.apply( + innerMutator.crossOver(toBytes.apply(value), toBytes.apply(otherValue), prng)); } private void extractRange(AnnotatedType type) { @@ -250,6 +253,29 @@ private static AnnotatedType convertWithLength(AnnotatedType type, AnnotatedType } } + // Randomly maps the byte array from libFuzzer directly onto char[] or converts each byte into a + // 2 byte char. This helps in cases where a String is constructed out of char[] and libFuzzer + // inserts CESU8 encoded bytes into the byte[]. + public char[] postMutateChars(byte[] bytes, PseudoRandom prng) { + if (prng.choice()) { + return (char[]) toPrimitive.apply(bytes); + } else { + char[] chars = new char[bytes.length]; + for (int i = 0; i < chars.length; i++) { + chars[i] = (char) bytes[i]; + } + return chars; + } + } + + public BiFunction makeBytesToPrimitiveArrayAfterMutate( + AnnotatedType type) { + if (type.getType().getTypeName().equals("char")) { + return this::postMutateChars; + } + return (bytes, ignored) -> makeBytesToPrimitiveArrayConverter(type).apply(bytes); + } + public static Function makePrimitiveArrayToBytesConverter(AnnotatedType type) { switch (type.getType().getTypeName()) { case "int": diff --git a/tests/BUILD.bazel b/tests/BUILD.bazel index 7289f9d3a..88df62574 100644 --- a/tests/BUILD.bazel +++ b/tests/BUILD.bazel @@ -1136,6 +1136,16 @@ java_fuzz_target_test( ], ) +java_fuzz_target_test( + name = "CharArrayFuzzer", + srcs = [ + "src/test/java/com/example/CharArrayFuzzer.java", + ], + allowed_findings = ["java.lang.RuntimeException"], + target_class = "com.example.CharArrayFuzzer", + verify_crash_reproducer = False, +) + filegroup( name = "fuzz_test_lister_classes", srcs = ["src/test/data/fuzz_test_lister_test"], diff --git a/tests/src/test/java/com/example/CharArrayFuzzer.java b/tests/src/test/java/com/example/CharArrayFuzzer.java new file mode 100644 index 000000000..685ef0589 --- /dev/null +++ b/tests/src/test/java/com/example/CharArrayFuzzer.java @@ -0,0 +1,29 @@ +/* + * Copyright 2025 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example; + +public class CharArrayFuzzer { + public static void fuzzerTestOneInput(char[] data) { + if (data == null) { + return; + } + String expression = new String(data); + if (expression.contains("jazzer")) { + throw new RuntimeException("found jazzer"); + } + } +} From 96e19d687037cc11aa1c72356995a1455f42ce9b Mon Sep 17 00:00:00 2001 From: Khaled Yakdan Date: Tue, 11 Nov 2025 15:11:32 +0100 Subject: [PATCH 2/3] feat: decode bytes as CESU-8 when converting to char[] --- .../mutator/lang/PrimitiveArrayMutatorFactory.java | 12 +++++++----- tests/src/test/java/com/example/CharArrayFuzzer.java | 4 ++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/code_intelligence/jazzer/mutation/mutator/lang/PrimitiveArrayMutatorFactory.java b/src/main/java/com/code_intelligence/jazzer/mutation/mutator/lang/PrimitiveArrayMutatorFactory.java index f5d4ab6b0..bca31037d 100644 --- a/src/main/java/com/code_intelligence/jazzer/mutation/mutator/lang/PrimitiveArrayMutatorFactory.java +++ b/src/main/java/com/code_intelligence/jazzer/mutation/mutator/lang/PrimitiveArrayMutatorFactory.java @@ -42,6 +42,7 @@ import java.lang.reflect.AnnotatedArrayType; import java.lang.reflect.AnnotatedType; import java.nio.ByteBuffer; +import java.nio.charset.Charset; import java.util.Optional; import java.util.function.BiFunction; import java.util.function.Function; @@ -71,6 +72,7 @@ public Optional> tryCreate( public static final class PrimitiveArrayMutator extends SerializingMutator { private static final int DEFAULT_MIN_LENGTH = 0; private static final int DEFAULT_MAX_LENGTH = 1000; + private static final Charset FUZZED_DATA_CHARSET = Charset.forName("CESU-8"); private long minRange; private long maxRange; private boolean allowNaN; @@ -253,16 +255,16 @@ private static AnnotatedType convertWithLength(AnnotatedType type, AnnotatedType } } - // Randomly maps the byte array from libFuzzer directly onto char[] or converts each byte into a - // 2 byte char. This helps in cases where a String is constructed out of char[] and libFuzzer - // inserts CESU8 encoded bytes into the byte[]. + // The strings we pass to native callbacks to trace data flow are CESU-8 encoded. + // As a result, libFuzzer's TORC contains CESU-8 encoded strings. + // Therefore, in 50% of times we decode the byte array as a CESU-8 string. public char[] postMutateChars(byte[] bytes, PseudoRandom prng) { if (prng.choice()) { return (char[]) toPrimitive.apply(bytes); } else { - char[] chars = new char[bytes.length]; + char[] chars = new String(bytes, FUZZED_DATA_CHARSET).toCharArray(); for (int i = 0; i < chars.length; i++) { - chars[i] = (char) bytes[i]; + chars[i] = (char) forceInRange(chars[i], minRange, maxRange); } return chars; } diff --git a/tests/src/test/java/com/example/CharArrayFuzzer.java b/tests/src/test/java/com/example/CharArrayFuzzer.java index 685ef0589..8a63800c1 100644 --- a/tests/src/test/java/com/example/CharArrayFuzzer.java +++ b/tests/src/test/java/com/example/CharArrayFuzzer.java @@ -22,8 +22,8 @@ public static void fuzzerTestOneInput(char[] data) { return; } String expression = new String(data); - if (expression.contains("jazzer")) { - throw new RuntimeException("found jazzer"); + if (expression.equals("中 Bös3r \uD801\uDC00 C0d3 中")) { + throw new RuntimeException("Found evil code"); } } } From ef7ea967f951ee11b978d151d185e53a9d9d01bd Mon Sep 17 00:00:00 2001 From: Khaled Yakdan Date: Wed, 19 Nov 2025 13:14:40 +0100 Subject: [PATCH 3/3] feat: ensure correct length constraints for char[] mutation --- .../src/test/java/com/example/BUILD.bazel | 17 ++++++++ .../example/CharArrayWithLengthFuzzTest.java | 35 ++++++++++++++++ .../lang/PrimitiveArrayMutatorFactory.java | 42 +++++++++++++++---- .../lang/PrimitiveArrayMutatorTest.java | 4 +- 4 files changed, 89 insertions(+), 9 deletions(-) create mode 100644 examples/junit/src/test/java/com/example/CharArrayWithLengthFuzzTest.java diff --git a/examples/junit/src/test/java/com/example/BUILD.bazel b/examples/junit/src/test/java/com/example/BUILD.bazel index e1fc1d5e9..a4d119678 100644 --- a/examples/junit/src/test/java/com/example/BUILD.bazel +++ b/examples/junit/src/test/java/com/example/BUILD.bazel @@ -224,6 +224,23 @@ java_fuzz_target_test( ], ) +java_fuzz_target_test( + name = "CharArrayWithLengthFuzzTest", + srcs = ["CharArrayWithLengthFuzzTest.java"], + allowed_findings = ["java.lang.RuntimeException"], + tags = ["no-jdk8"], + target_class = "com.example.CharArrayWithLengthFuzzTest", + verify_crash_reproducer = False, + runtime_deps = [ + ":junit_runtime", + ], + deps = [ + "//src/main/java/com/code_intelligence/jazzer/junit:fuzz_test", + "@maven//:org_junit_jupiter_junit_jupiter_api", + "@maven//:org_mockito_mockito_core", + ], +) + java_fuzz_target_test( name = "MutatorFuzzTest", srcs = ["MutatorFuzzTest.java"], diff --git a/examples/junit/src/test/java/com/example/CharArrayWithLengthFuzzTest.java b/examples/junit/src/test/java/com/example/CharArrayWithLengthFuzzTest.java new file mode 100644 index 000000000..17276b8fe --- /dev/null +++ b/examples/junit/src/test/java/com/example/CharArrayWithLengthFuzzTest.java @@ -0,0 +1,35 @@ +/* + * Copyright 2025 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example; + +import com.code_intelligence.jazzer.junit.FuzzTest; +import com.code_intelligence.jazzer.mutation.annotation.NotNull; +import com.code_intelligence.jazzer.mutation.annotation.WithLength; +import java.nio.charset.Charset; + +public class CharArrayWithLengthFuzzTest { + @FuzzTest + public void fuzzCharArray(char @NotNull @WithLength(max = 5) [] data) { + String expression = new String(data); + // Each '中' character is encoded using three bytes with CESU8. To satisfy this check, the + // underlying CESU8-encoded byte array should have at least 15 bytes. + if (expression.equals("中中中中中")) { + assert expression.getBytes(Charset.forName("CESU-8")).length == 15; + throw new RuntimeException("Found evil code"); + } + } +} diff --git a/src/main/java/com/code_intelligence/jazzer/mutation/mutator/lang/PrimitiveArrayMutatorFactory.java b/src/main/java/com/code_intelligence/jazzer/mutation/mutator/lang/PrimitiveArrayMutatorFactory.java index bca31037d..fa7c33a4c 100644 --- a/src/main/java/com/code_intelligence/jazzer/mutation/mutator/lang/PrimitiveArrayMutatorFactory.java +++ b/src/main/java/com/code_intelligence/jazzer/mutation/mutator/lang/PrimitiveArrayMutatorFactory.java @@ -43,6 +43,7 @@ import java.lang.reflect.AnnotatedType; import java.nio.ByteBuffer; import java.nio.charset.Charset; +import java.util.Arrays; import java.util.Optional; import java.util.function.BiFunction; import java.util.function.Function; @@ -75,6 +76,8 @@ public static final class PrimitiveArrayMutator extends SerializingMutator private static final Charset FUZZED_DATA_CHARSET = Charset.forName("CESU-8"); private long minRange; private long maxRange; + private int minLength; + private int maxLength; private boolean allowNaN; private float minFloatRange; private float maxFloatRange; @@ -90,6 +93,7 @@ public static final class PrimitiveArrayMutator extends SerializingMutator public PrimitiveArrayMutator(AnnotatedType type) { elementType = ((AnnotatedArrayType) type).getAnnotatedGenericComponentType(); extractRange(elementType); + extractLength(type); AnnotatedType innerByteArray = forwardAnnotations( type, convertWithLength(type, new TypeHolder() {}.annotatedType())); @@ -209,11 +213,15 @@ private void extractRange(AnnotatedType type) { } } - private static AnnotatedType convertWithLength(AnnotatedType type, AnnotatedType newType) { - AnnotatedType elementType = ((AnnotatedArrayType) type).getAnnotatedGenericComponentType(); + private void extractLength(AnnotatedType type) { Optional withLength = Optional.ofNullable(type.getAnnotation(WithLength.class)); - int minLength = withLength.map(WithLength::min).orElse(DEFAULT_MIN_LENGTH); - int maxLength = withLength.map(WithLength::max).orElse(DEFAULT_MAX_LENGTH); + withLength.ifPresent(System.err::println); + minLength = withLength.map(WithLength::min).orElse(DEFAULT_MIN_LENGTH); + maxLength = withLength.map(WithLength::max).orElse(DEFAULT_MAX_LENGTH); + } + + private AnnotatedType convertWithLength(AnnotatedType type, AnnotatedType newType) { + AnnotatedType elementType = ((AnnotatedArrayType) type).getAnnotatedGenericComponentType(); switch (elementType.getType().getTypeName()) { case "int": case "float": @@ -222,8 +230,13 @@ private static AnnotatedType convertWithLength(AnnotatedType type, AnnotatedType case "double": return withLength(newType, minLength * 8, maxLength * 8); case "short": - case "char": return withLength(newType, minLength * 2, maxLength * 2); + case "char": + // CESU-8 encoding uses at maximum 6 bytes to encode a character. This value represents + // the maximum size needed for the underlying byte array to hold the corresponding + // char array with the specified length range. After the conversion to a char array in the + // mutator, we should ensure the exact length constraints + return withLength(newType, minLength * 6, maxLength * 6); case "boolean": case "byte": return withLength(newType, minLength, maxLength); @@ -241,7 +254,7 @@ private static AnnotatedType convertWithLength(AnnotatedType type, AnnotatedType case "short": return getShortPrimitiveArray(minRange, maxRange); case "char": - return getCharPrimitiveArray(minRange, maxRange); + return getCharPrimitiveArray(minRange, maxRange, minLength, maxLength); case "float": return getFloatPrimitiveArray(minFloatRange, maxFloatRange, allowNaN); case "double": @@ -263,9 +276,16 @@ public char[] postMutateChars(byte[] bytes, PseudoRandom prng) { return (char[]) toPrimitive.apply(bytes); } else { char[] chars = new String(bytes, FUZZED_DATA_CHARSET).toCharArray(); + for (int i = 0; i < chars.length; i++) { chars[i] = (char) forceInRange(chars[i], minRange, maxRange); } + + if (chars.length < minLength) { + return Arrays.copyOf(chars, minLength); + } else if (chars.length > maxLength) { + return Arrays.copyOf(chars, maxLength); + } return chars; } } @@ -407,10 +427,18 @@ public static Function getShortPrimitiveArray(long minRange, lo }; } - public static Function getCharPrimitiveArray(long minRange, long maxRange) { + public static Function getCharPrimitiveArray( + long minRange, long maxRange, int minLength, int maxLength) { int nBytes = 2; return (byte[] byteArray) -> { if (byteArray == null) return null; + + if (byteArray.length < minLength * 2) { + byteArray = Arrays.copyOf(byteArray, minLength * 2); + } else if (byteArray.length > maxLength * 2) { + byteArray = Arrays.copyOf(byteArray, maxLength * 2); + } + char extraBytes = (char) (byteArray.length % nBytes); char[] result = new char[byteArray.length / nBytes + (extraBytes > 0 ? 1 : 0)]; ByteBuffer buffer = ByteBuffer.wrap(byteArray); diff --git a/src/test/java/com/code_intelligence/jazzer/mutation/mutator/lang/PrimitiveArrayMutatorTest.java b/src/test/java/com/code_intelligence/jazzer/mutation/mutator/lang/PrimitiveArrayMutatorTest.java index 219414470..1cadd9713 100644 --- a/src/test/java/com/code_intelligence/jazzer/mutation/mutator/lang/PrimitiveArrayMutatorTest.java +++ b/src/test/java/com/code_intelligence/jazzer/mutation/mutator/lang/PrimitiveArrayMutatorTest.java @@ -86,7 +86,7 @@ public class PrimitiveArrayMutatorTest { static Function charsToBytes = (Function) makePrimitiveArrayToBytesConverter(annotatedType_char); static Function bytesToChars = - getCharPrimitiveArray(Character.MIN_VALUE, Character.MAX_VALUE); + getCharPrimitiveArray(Character.MIN_VALUE, Character.MAX_VALUE, 0, 1000); static Function booleansToBytes = (Function) makePrimitiveArrayToBytesConverter(annotatedType_boolean); @@ -305,7 +305,7 @@ static Stream bytes2charsTestCases() { @ParameterizedTest @MethodSource("bytes2charsTestCases") void testArrayConversion_bytes2chars(byte[] input, char[] expected, long min, long max) { - Function fn = getCharPrimitiveArray(min, max); + Function fn = getCharPrimitiveArray(min, max, 0, 100); assertThat(fn.apply(input)).isEqualTo(expected); }