Skip to content

Commit 28f7c5a

Browse files
#11: Added Avro Nested Data Support (#12)
Fixes #11. Fixes #5. Co-authored-by: Anastasiia Sergienko <46891819+AnastasiiaSergienko@users.noreply.github.com>
1 parent a9fb5ae commit 28f7c5a

File tree

14 files changed

+649
-51
lines changed

14 files changed

+649
-51
lines changed

doc/changes/changes_0.1.1.md

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,26 @@
33
## Features
44

55
* #9: Added SLF4J Logging Library as Common Dependency (PR #10)
6+
* #11: Added Support for Complex Avro Types (Array, Map, Record) (PR #12)
7+
8+
## Documentation
9+
10+
* #7: Added developer guide (PR #8)
611

712
## Dependency Updates
813

914
### Runtime Dependency Updates
1015

11-
* Updated ``sbt.version`` from `1.3.13` to `1.4.1`.
12-
* Updated ``com.fasterxml.jackson.core:jackson-databind`` from `2.11.2` to `2.11.3`.
16+
* Updated `sbt.version` from `1.3.13` to `1.4.1`.
17+
* Added `com.fasterxml.jackson.module:jackson-module-scala` version `2.11.3`.
18+
* Updated `com.fasterxml.jackson.core:jackson-databind` from `2.11.2` to `2.11.3`.
1319

1420
### Test Dependency Updates
1521

16-
* Updated ``org.mockito:mockito-core`` from `3.5.10` to `3.5.15`.
22+
* Updated `org.mockito:mockito-core` from `3.5.10` to `3.5.15`.
1723

1824
### Plugin Updates
1925

20-
* Updated ``com.github.cb372:sbt-explicit-dependencies`` from `0.2.13` to `0.2.14`.
26+
* Updated `com.github.cb372:sbt-explicit-dependencies` from `0.2.13` to `0.2.15`.
27+
* Updated `org.wartremover:sbt-wartremover` from `2.4.10` to `2.4.11`.
28+
* Updated `org.wartremover:sbt-wartremover-contib` from `1.3.8` to `1.3.9`.

project/Dependencies.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ object Dependencies {
3434
ExclusionRule(organization = "com.fasterxml.jackson.core"),
3535
ExclusionRule(organization = "com.fasterxml.jackson.module")
3636
),
37-
"com.fasterxml.jackson.core" % "jackson-databind" % JacksonVersion
37+
"com.fasterxml.jackson.core" % "jackson-databind" % JacksonVersion,
38+
"com.fasterxml.jackson.module" %% "jackson-module-scala" % JacksonVersion
3839
)
3940

4041
lazy val TestDependencies: Seq[ModuleID] = Seq(

project/plugins.sbt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
// Adds a `wartremover` a flexible Scala code linting tool
22
// http://github.com/puffnfresh/wartremover
3-
addSbtPlugin("org.wartremover" % "sbt-wartremover" % "2.4.10")
3+
addSbtPlugin("org.wartremover" % "sbt-wartremover" % "2.4.11")
44

55
// Adds Contrib Warts
66
// http://github.com/wartremover/wartremover-contrib/
7-
addSbtPlugin("org.wartremover" % "sbt-wartremover-contrib" % "1.3.8")
7+
addSbtPlugin("org.wartremover" % "sbt-wartremover-contrib" % "1.3.9")
88

99
// Adds most common doc api mappings
1010
// https://github.com/ThoughtWorksInc/sbt-api-mappings
@@ -50,7 +50,7 @@ addSbtPlugin("com.typesafe.sbt" % "sbt-git" % "1.0.0")
5050

5151
// Adds a `sbt-explicit-dependencies` plugin
5252
// https://github.com/cb372/sbt-explicit-dependencies
53-
addSbtPlugin("com.github.cb372" % "sbt-explicit-dependencies" % "0.2.14")
53+
addSbtPlugin("com.github.cb372" % "sbt-explicit-dependencies" % "0.2.15")
5454

5555
// Setup this and project/project/plugins.sbt for formatting
5656
// project/*.scala files with scalafmt
Lines changed: 87 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
package com.exasol.common.avro
22

33
import java.nio.ByteBuffer
4+
import java.util.{Map => JMap}
5+
import java.util.Collection
46

57
import com.exasol.common.data.Row
8+
import com.exasol.common.json.JsonMapper
69

710
import org.apache.avro.Schema
811
import org.apache.avro.generic.GenericFixed
912
import org.apache.avro.generic.GenericRecord
13+
import org.apache.avro.generic.IndexedRecord
1014
import org.apache.avro.util.Utf8
1115

1216
/**
@@ -15,40 +19,63 @@ import org.apache.avro.util.Utf8
1519
*/
1620
object AvroRow {
1721

22+
/**
23+
* Converts an Avro record into an internal [[com.exasol.common.data.Row]].
24+
*
25+
* @param avroRecord a generic Avro record
26+
* @return a Row representation of the given Avro record
27+
*/
1828
def apply(avroRecord: GenericRecord): Row = {
19-
val size = avroRecord.getSchema.getFields.size
20-
val values = Array.ofDim[Any](size)
2129
val fields = avroRecord.getSchema().getFields()
22-
for { index <- 0 until fields.size } {
23-
values.update(index, getAvroRecordValue(avroRecord.get(index), fields.get(index).schema))
30+
val size = fields.size()
31+
val values = Array.ofDim[Any](size)
32+
for { i <- 0 until size } {
33+
values.update(i, getAvroFieldValue(fields.get(i).schema(), avroRecord.get(i)))
2434
}
2535
Row(values.toSeq)
2636
}
2737

38+
private[this] def getAvroFieldValue(schema: Schema, value: Any): Any = {
39+
val fieldValue = getAvroValue(value, schema)
40+
if (isPrimitiveAvroType(schema.getType())) {
41+
fieldValue
42+
} else {
43+
JsonMapper.toJson(fieldValue)
44+
}
45+
}
46+
47+
private[this] def isPrimitiveAvroType(avroType: Schema.Type): Boolean =
48+
avroType match {
49+
case Schema.Type.ARRAY => false
50+
case Schema.Type.MAP => false
51+
case Schema.Type.RECORD => false
52+
case _ => true
53+
}
54+
2855
@SuppressWarnings(Array("org.wartremover.warts.Return", "org.wartremover.warts.ToString"))
29-
private[this] def getAvroRecordValue(value: Any, field: Schema): Any = {
56+
private[this] def getAvroValue(value: Any, field: Schema): Any = {
3057
if (value == null) {
3158
return null // scalastyle:ignore return
32-
3359
}
34-
field.getType match {
60+
field.getType() match {
3561
case Schema.Type.NULL => value
3662
case Schema.Type.BOOLEAN => value
3763
case Schema.Type.INT => value
3864
case Schema.Type.LONG => value
3965
case Schema.Type.FLOAT => value
4066
case Schema.Type.DOUBLE => value
41-
case Schema.Type.STRING => getAvroValueAsString(value, field)
42-
case Schema.Type.FIXED => getAvroValueAsString(value, field)
43-
case Schema.Type.BYTES => getAvroValueAsString(value, field)
67+
case Schema.Type.STRING => getStringValue(value, field)
68+
case Schema.Type.FIXED => getStringValue(value, field)
69+
case Schema.Type.BYTES => getStringValue(value, field)
4470
case Schema.Type.ENUM => value.toString
45-
case Schema.Type.UNION => getAvroUnionValue(value, field)
46-
case field =>
47-
throw new IllegalArgumentException(s"Avro ${field.getName} type is not supported!")
71+
case Schema.Type.UNION => getUnionValue(value, field)
72+
case Schema.Type.ARRAY => getArrayValue(value, field)
73+
case Schema.Type.MAP => getMapValue(value, field)
74+
case Schema.Type.RECORD => getRecordValue(value)
4875
}
4976
}
5077

51-
private[this] def getAvroValueAsString(value: Any, field: Schema): String =
78+
private[this] def getStringValue(value: Any, field: Schema): String =
5279
value match {
5380
case str: String => str
5481
case utf: Utf8 => utf.toString
@@ -61,16 +88,16 @@ object AvroRow {
6188
)
6289
}
6390

64-
private[this] def getAvroUnionValue(value: Any, field: Schema): Any = {
91+
private[this] def getUnionValue(value: Any, field: Schema): Any = {
6592
val types = field.getTypes()
6693
val typesSize = types.size()
6794
typesSize match {
68-
case 1 => getAvroRecordValue(value, types.get(0))
95+
case 1 => getAvroValue(value, types.get(0))
6996
case 2 =>
7097
if (types.get(0).getType() == Schema.Type.NULL) {
71-
getAvroRecordValue(value, types.get(1))
98+
getAvroValue(value, types.get(1))
7299
} else if (types.get(1).getType() == Schema.Type.NULL) {
73-
getAvroRecordValue(value, types.get(0))
100+
getAvroValue(value, types.get(0))
74101
} else {
75102
throw new IllegalArgumentException(
76103
"Avro Union type should contain a primitive and null!"
@@ -81,4 +108,46 @@ object AvroRow {
81108
}
82109
}
83110

111+
private[this] def getArrayValue(value: Any, field: Schema): Array[Any] = value match {
112+
case array: Array[_] => array.map(getAvroValue(_, field.getElementType()))
113+
case list: Collection[_] =>
114+
val result = new Array[Any](list.size)
115+
var i = 0
116+
list.stream().forEach { element =>
117+
val _ = result.update(i, getAvroValue(element, field.getElementType()))
118+
i += 1
119+
}
120+
result
121+
case other =>
122+
throw new IllegalArgumentException(
123+
s"Unsupported Avro Array type '${other.getClass.getName()}'."
124+
)
125+
}
126+
127+
private[this] def getMapValue(map: Any, field: Schema): JMap[String, Any] = {
128+
val result = new java.util.HashMap[String, Any]()
129+
map.asInstanceOf[JMap[String, _]].forEach { (key, value) =>
130+
val _ = result.put(key, getAvroValue(value, field.getValueType()))
131+
}
132+
result
133+
}
134+
135+
private[this] def getRecordValue(value: Any): JMap[String, Any] = value match {
136+
case record: IndexedRecord =>
137+
val size = record.getSchema().getFields().size
138+
val fields = record.getSchema().getFields()
139+
val result = new java.util.HashMap[String, Any]()
140+
var i = 0
141+
while (i < size) {
142+
val _ =
143+
result.put(fields.get(i).name, getAvroValue(record.get(i), fields.get(i).schema))
144+
i += 1
145+
}
146+
result
147+
case other =>
148+
throw new IllegalArgumentException(
149+
s"Unsupported Avro Record type '${other.getClass.getName()}'."
150+
)
151+
}
152+
84153
}

src/main/scala/com/exasol/common/Row.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ final case class Row(protected[data] val values: Seq[Any]) {
1313
*
1414
* If the value is null, null is returned.
1515
*/
16+
@throws[IndexOutOfBoundsException]("When index is out of bounds")
1617
def get(index: Int): Any = values(index)
1718

1819
/** Returns the value at position {@code index} casted to the type. */
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package com.exasol.common.json
2+
3+
import com.fasterxml.jackson.databind.ObjectMapper
4+
import com.fasterxml.jackson.module.scala.DefaultScalaModule
5+
import com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper
6+
7+
@SuppressWarnings(Array("org.wartremover.warts.NonUnitStatements"))
8+
object JsonMapper {
9+
private[this] val mapper = new ObjectMapper with ScalaObjectMapper
10+
mapper.registerModule(DefaultScalaModule)
11+
12+
def toJson[T](value: T): String = mapper.writeValueAsString(value)
13+
14+
def parseJson[T: Manifest](jsonString: String): T =
15+
mapper.readValue[T](jsonString)
16+
}

0 commit comments

Comments
 (0)