From 6a1b6f8794b348bfd73c1b778d100c1dfd405fa9 Mon Sep 17 00:00:00 2001 From: tonicsoft Date: Tue, 20 Sep 2016 00:41:38 +0100 Subject: [PATCH 1/4] Issue #35 - printToString doesn't encode to bytes For any AbstractCharBasedFormatter, printToString can print directly to a String and not have to convert to bytes and back again. This avoids call to ByteArrayOutputStream.toString() at ProtobufFormatter:91 which uses JVM default encoding instead of ProtobufFormatter.defaultCharset. (I discovered this issue when trying to create JSON with a broad range of unicode characters on a machine where the default Charset did not cover the full Unicode range. --- .../protobuf/format/AbstractCharBasedFormatter.java | 12 ++++++++++++ .../googlecode/protobuf/format/JsonFormatTest.java | 12 ++++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/main/java/com/googlecode/protobuf/format/AbstractCharBasedFormatter.java b/src/main/java/com/googlecode/protobuf/format/AbstractCharBasedFormatter.java index ad5c653..af36413 100644 --- a/src/main/java/com/googlecode/protobuf/format/AbstractCharBasedFormatter.java +++ b/src/main/java/com/googlecode/protobuf/format/AbstractCharBasedFormatter.java @@ -46,6 +46,18 @@ public void print(UnknownFieldSet fields, OutputStream output, Charset cs) abstract public void print(UnknownFieldSet fields, Appendable output) throws IOException; + @Override + public String printToString(Message message) { + StringBuilder output = new StringBuilder(); + try { + print(message, output); + } catch (IOException e) { + throw new RuntimeException("Writing to a StringBuilder threw an IOException (should never happen).", + e); + } + return output.toString(); + } + @Override public void merge(InputStream input, Charset cs, ExtensionRegistry extensionRegistry, Builder builder) throws IOException { diff --git a/src/test/java/com/googlecode/protobuf/format/JsonFormatTest.java b/src/test/java/com/googlecode/protobuf/format/JsonFormatTest.java index ad48fe4..396f7d3 100644 --- a/src/test/java/com/googlecode/protobuf/format/JsonFormatTest.java +++ b/src/test/java/com/googlecode/protobuf/format/JsonFormatTest.java @@ -9,6 +9,7 @@ import org.testng.annotations.Test; import org.testng.reporters.Files; import protobuf_unittest.UnittestProto; +import protobuf_unittest.UnittestProto.OneString; import sun.nio.cs.StandardCharsets; import java.io.IOException; @@ -86,4 +87,15 @@ public void testDeserializeNullFieldFromJson() throws Exception { System.out.println(actual); assertThat(actual, equalTo(UnittestProto.TestNullField.newBuilder().build())); } + + @Test + public void testSerializeToStringDoesNotRequireAnyEncoding() { + String aChineseCharacter = "\u2F76"; + Charset encodingThatDoesntSupportChineseCharacters = Charset.forName("ASCII"); + JsonFormat jsonFomat = new JsonFormat(); + jsonFomat.setDefaultCharset(encodingThatDoesntSupportChineseCharacters); + OneString message = OneString.newBuilder().setData(aChineseCharacter).build(); + + assertThat(jsonFomat.printToString(message), is("{\"data\": \""+aChineseCharacter + "\"}")); + } } From 2b42bb49007a1c09e330270b293684eb7abcc134 Mon Sep 17 00:00:00 2001 From: tonicsoft Date: Fri, 24 Feb 2017 23:34:49 +0000 Subject: [PATCH 2/4] Issue #35 - use the same char set when converting from characters to bytes and back again. - Remove usage of deprecated jsonFactory API in order to respect charset when printing to an OutputStream using the JsonJacksonFormat. - Added test class hierarchy to match production code hierarchy so tests do not need to be duplicated for each ProtobufFormatter. --- .../protobuf/format/JsonJacksonFormat.java | 10 ++-- .../protobuf/format/ProtobufFormatter.java | 2 +- .../protobuf/format/JsonFormatTest.java | 7 ++- .../format/JsonJacksonFormatTest.java | 7 ++- .../format/ProtobufFormatterTest.java | 52 +++++++++++++++++++ 5 files changed, 72 insertions(+), 6 deletions(-) create mode 100644 src/test/java/com/googlecode/protobuf/format/ProtobufFormatterTest.java diff --git a/src/main/java/com/googlecode/protobuf/format/JsonJacksonFormat.java b/src/main/java/com/googlecode/protobuf/format/JsonJacksonFormat.java index 7ecc658..384d009 100644 --- a/src/main/java/com/googlecode/protobuf/format/JsonJacksonFormat.java +++ b/src/main/java/com/googlecode/protobuf/format/JsonJacksonFormat.java @@ -33,6 +33,7 @@ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.io.OutputStreamWriter; import java.math.BigInteger; import java.nio.charset.Charset; import java.util.Iterator; @@ -78,7 +79,7 @@ public class JsonJacksonFormat extends ProtobufFormatter { * original Protocol Buffer system) */ public void print(final Message message, OutputStream output, Charset cs) throws IOException { - JsonGenerator generator = createGenerator(output); + JsonGenerator generator = createGenerator(output, cs); print(message, generator); generator.close(); } @@ -144,9 +145,12 @@ public void merge(JsonParser parser, } - protected JsonGenerator createGenerator(OutputStream output) throws IOException { - JsonGenerator generator = jsonFactory.createJsonGenerator(output, JsonEncoding.UTF8); + return createGenerator(output, Charset.forName(JsonEncoding.UTF8.getJavaName())); + } + + private JsonGenerator createGenerator(OutputStream output, Charset cs) throws IOException { + JsonGenerator generator = jsonFactory.createGenerator(new OutputStreamWriter(output, cs)); generator.disable(JsonGenerator.Feature.AUTO_CLOSE_TARGET); return generator; } diff --git a/src/main/java/com/googlecode/protobuf/format/ProtobufFormatter.java b/src/main/java/com/googlecode/protobuf/format/ProtobufFormatter.java index 95288f3..b3f1493 100644 --- a/src/main/java/com/googlecode/protobuf/format/ProtobufFormatter.java +++ b/src/main/java/com/googlecode/protobuf/format/ProtobufFormatter.java @@ -88,7 +88,7 @@ public String printToString(final Message message) { ByteArrayOutputStream out = new ByteArrayOutputStream(); print(message, out, defaultCharset); out.flush(); - return out.toString(); + return out.toString(defaultCharset.name()); } catch (IOException e) { throw new RuntimeException("Writing to a StringBuilder threw an IOException (should never happen).", e); diff --git a/src/test/java/com/googlecode/protobuf/format/JsonFormatTest.java b/src/test/java/com/googlecode/protobuf/format/JsonFormatTest.java index 396f7d3..178d663 100644 --- a/src/test/java/com/googlecode/protobuf/format/JsonFormatTest.java +++ b/src/test/java/com/googlecode/protobuf/format/JsonFormatTest.java @@ -25,7 +25,7 @@ * @author scr on 10/13/15. */ @Test -public class JsonFormatTest { +public class JsonFormatTest extends ProtobufFormatterTest { private static final String RESOURCE_PATH = "/expectations/JsonFormatTest/"; private static final FormatFactory FORMAT_FACTORY = new FormatFactory(); private static final JsonFormat JSON_FORMATTER = @@ -35,6 +35,11 @@ private static String getExpected(String name) throws IOException { return Files.readFile(JsonFormatTest.class.getResourceAsStream(RESOURCE_PATH + name)).trim(); } + @Override + protected JsonFormat getFormatterUnderTest() { + return new JsonFormat(); + } + @DataProvider(name = "data") public static Object[][] data() throws IOException { return new Object[][]{ diff --git a/src/test/java/com/googlecode/protobuf/format/JsonJacksonFormatTest.java b/src/test/java/com/googlecode/protobuf/format/JsonJacksonFormatTest.java index cf5d856..5d96114 100644 --- a/src/test/java/com/googlecode/protobuf/format/JsonJacksonFormatTest.java +++ b/src/test/java/com/googlecode/protobuf/format/JsonJacksonFormatTest.java @@ -21,9 +21,14 @@ * @author scr on 10/13/15. */ @Test -public class JsonJacksonFormatTest { +public class JsonJacksonFormatTest extends ProtobufFormatterTest { private static final JsonFactory JSON_FACTORY = new JsonFactory(); + @Override + protected JsonJacksonFormat getFormatterUnderTest() { + return new JsonJacksonFormat(); + } + @Test public void testPrettyPrint() throws Exception { StringWriter writer = new StringWriter(); diff --git a/src/test/java/com/googlecode/protobuf/format/ProtobufFormatterTest.java b/src/test/java/com/googlecode/protobuf/format/ProtobufFormatterTest.java new file mode 100644 index 0000000..fa00809 --- /dev/null +++ b/src/test/java/com/googlecode/protobuf/format/ProtobufFormatterTest.java @@ -0,0 +1,52 @@ +package com.googlecode.protobuf.format; + +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; +import protobuf_unittest.UnittestProto; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.Charset; + +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.testng.Assert.assertNotEquals; + +public abstract class ProtobufFormatterTest { + private T protobufFormatter; + private Charset platformDefaultCharset = Charset.defaultCharset(); + private Charset nonDefaultCharset = Charset.forName("UTF-16"); + private String testData = "A"; + private final String testDataAsJson = "\"" + testData + "\""; + private UnittestProto.OneString testProto = UnittestProto.OneString.newBuilder().setData(testData).build(); + + protected abstract T getFormatterUnderTest(); + + @BeforeMethod + public void setUp() throws Exception { + protobufFormatter = getFormatterUnderTest(); + } + + @Test + public void validateTestSetUp() { + //test is redundant unless default and non default charsets produce different results + assertNotEquals(platformDefaultCharset, nonDefaultCharset); + byte[] nonDefaultBytes = testData.getBytes(nonDefaultCharset); + assertNotEquals(testData.getBytes(platformDefaultCharset), nonDefaultBytes); + assertNotEquals(testData, new String(nonDefaultBytes, platformDefaultCharset)); + } + + @Test + public void canSerialiseToBytesUsingNonDefaultCharset() throws IOException { + ByteArrayOutputStream output = new ByteArrayOutputStream(); + protobufFormatter.print(testProto, output, nonDefaultCharset); + assertThat(output.toString(nonDefaultCharset.name()), containsString(testDataAsJson)); + } + + @Test + public void canSerialiseUsingNonDefaultCharSet() { + protobufFormatter.setDefaultCharset(nonDefaultCharset); + UnittestProto.OneString message = UnittestProto.OneString.newBuilder().setData(testData).build(); + assertThat(protobufFormatter.printToString(message), containsString(testDataAsJson)); + } +} \ No newline at end of file From 42ae46af57196967f2d5300ebe08d5014effac28 Mon Sep 17 00:00:00 2001 From: tonicsoft Date: Fri, 24 Feb 2017 23:46:08 +0000 Subject: [PATCH 3/4] Issue #35 - update release notes --- RELEASE-NOTES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 7cc1f7a..fa167ca 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -1,5 +1,6 @@ # Release Notes for protobuf-java-format * 1.5 + * [#35](https://github.com/bivas/protobuf-java-format/issues/35) - Fix character encoding bugs when printing to strings and byte arrays. * [#37](https://github.com/bivas/protobuf-java-format/pull/37) - Fix NPE during json deserialization. * 1.4 * [#15](https://github.com/bivas/protobuf-java-format/issues/15) - Fix unsigned values for JacksonFormatter. From 49652670a4adf93dc8f6ec3bb5f3b5e025749c20 Mon Sep 17 00:00:00 2001 From: tonicsoft Date: Tue, 28 Feb 2017 21:33:05 +0000 Subject: [PATCH 4/4] Issue #35 - use standard charsets --- .../java/com/googlecode/protobuf/format/JsonJacksonFormat.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/googlecode/protobuf/format/JsonJacksonFormat.java b/src/main/java/com/googlecode/protobuf/format/JsonJacksonFormat.java index 384d009..7bd202e 100644 --- a/src/main/java/com/googlecode/protobuf/format/JsonJacksonFormat.java +++ b/src/main/java/com/googlecode/protobuf/format/JsonJacksonFormat.java @@ -36,6 +36,7 @@ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT import java.io.OutputStreamWriter; import java.math.BigInteger; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Iterator; import java.util.List; import java.util.Locale; @@ -146,7 +147,7 @@ public void merge(JsonParser parser, protected JsonGenerator createGenerator(OutputStream output) throws IOException { - return createGenerator(output, Charset.forName(JsonEncoding.UTF8.getJavaName())); + return createGenerator(output, StandardCharsets.UTF_8); } private JsonGenerator createGenerator(OutputStream output, Charset cs) throws IOException {