Skip to content

Commit cbbed3d

Browse files
committed
Fix tests for Scala 2.13
- Explicit mutable sequences to numerous tests - ViT: Lazy model for test - Fix PerceptronApproach MapView
1 parent 38cbe71 commit cbbed3d

File tree

14 files changed

+64
-48
lines changed

14 files changed

+64
-48
lines changed

src/main/scala/com/johnsnowlabs/nlp/HasBatchedAnnotate.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,9 @@ trait HasBatchedAnnotate[M <: Model[M]] {
6161
private def processBatchRows(batchedRows: Seq[Row]): Seq[Row] = {
6262
val inputAnnotations = batchedRows.map(row => {
6363
getInputCols.flatMap(inputCol => {
64-
row.getAs[mutable.Seq[Row]](inputCol).map(Annotation(_)) // TODO fix which mutable and immutable
64+
row
65+
.getAs[mutable.Seq[Row]](inputCol)
66+
.map(Annotation(_)) // TODO fix which mutable and immutable
6567
})
6668
})
6769
val outputAnnotations = batchAnnotate(inputAnnotations.toSeq)

src/main/scala/com/johnsnowlabs/nlp/HasBatchedAnnotateAudio.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ import org.apache.spark.ml.Model
2020
import org.apache.spark.ml.param.IntParam
2121
import org.apache.spark.sql.Row
2222

23+
import scala.collection.mutable
24+
2325
trait HasBatchedAnnotateAudio[M <: Model[M]] {
2426

2527
this: RawAnnotator[M] =>
@@ -54,7 +56,7 @@ trait HasBatchedAnnotateAudio[M <: Model[M]] {
5456
val inputAnnotations = batchedRows.map(row => {
5557
getInputCols.flatMap(inputCol => {
5658
row
57-
.getAs[Seq[Row]](inputCol)
59+
.getAs[mutable.Seq[Row]](inputCol)
5860
.map(r =>
5961
AnnotationAudio(
6062
r.getString(0),

src/main/scala/com/johnsnowlabs/nlp/HasBatchedAnnotateImage.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ import org.apache.spark.ml.Model
2020
import org.apache.spark.ml.param.IntParam
2121
import org.apache.spark.sql.Row
2222

23+
import scala.collection.mutable
24+
2325
trait HasBatchedAnnotateImage[M <: Model[M]] {
2426

2527
this: RawAnnotator[M] =>
@@ -55,7 +57,7 @@ trait HasBatchedAnnotateImage[M <: Model[M]] {
5557
val inputAnnotations = batchedRows.map(row => {
5658
getInputCols.flatMap(inputCol => {
5759
row
58-
.getAs[Seq[Row]](inputCol)
60+
.getAs[mutable.Seq[Row]](inputCol)
5961
.map(r =>
6062
AnnotationImage(
6163
r.getString(0),

src/main/scala/com/johnsnowlabs/nlp/LightPipeline.scala

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -425,16 +425,18 @@ class LightPipeline(val pipelineModel: PipelineModel, parseEmbeddings: Boolean =
425425

426426
def annotate(target: String, optionalTarget: String = ""): Map[String, Seq[String]] = {
427427
val annotations = fullAnnotate(target, optionalTarget)
428-
annotations.view.mapValues(_.map {
429-
case annotation: Annotation =>
430-
annotation.annotatorType match {
431-
case AnnotatorType.WORD_EMBEDDINGS | AnnotatorType.SENTENCE_EMBEDDINGS
432-
if parseEmbeddings =>
433-
annotation.embeddings.mkString(" ")
434-
case _ => annotation.result
435-
}
436-
case _ => ""
437-
}).toMap
428+
annotations.view
429+
.mapValues(_.map {
430+
case annotation: Annotation =>
431+
annotation.annotatorType match {
432+
case AnnotatorType.WORD_EMBEDDINGS | AnnotatorType.SENTENCE_EMBEDDINGS
433+
if parseEmbeddings =>
434+
annotation.embeddings.mkString(" ")
435+
case _ => annotation.result
436+
}
437+
case _ => ""
438+
})
439+
.toMap
438440
}
439441

440442
def annotate(

src/main/scala/com/johnsnowlabs/nlp/annotators/common/Tagged.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import com.johnsnowlabs.nlp.annotators.common.Annotated.{NerTaggedSentence, PosT
2323
import org.apache.spark.sql.{Dataset, Row}
2424

2525
import java.util
26-
import scala.collection.Map
26+
import scala.collection.{Map, mutable}
2727
import scala.util.Random
2828

2929
trait Tagged[T >: TaggedSentence <: TaggedSentence] extends Annotated[T] {
@@ -114,7 +114,7 @@ trait Tagged[T >: TaggedSentence <: TaggedSentence] extends Annotated[T] {
114114
}
115115

116116
def getAnnotations(row: Row, colNum: Int): Seq[Annotation] = {
117-
row.getAs[Seq[Row]](colNum).map(obj => Annotation(obj))
117+
row.getAs[mutable.Seq[Row]](colNum).map(obj => Annotation(obj)).toSeq
118118
}
119119

120120
protected def getLabelsFromSentences(

src/main/scala/com/johnsnowlabs/nlp/annotators/er/EntityRulerApproach.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -475,31 +475,31 @@ class EntityRulerApproach(override val uid: String)
475475
cleanPatternsDataFrame(regexPatternsDataFrame, idFieldExist)
476476

477477
cleanedRegexPatternsDataFrame.rdd.toLocalIterator.foreach { row =>
478-
val patterns = row.getAs[Seq[String]]("flatten_patterns")
478+
val patterns = row.getAs[mutable.Seq[String]]("flatten_patterns")
479479
val entity =
480480
if (idFieldExist) row.getAs[String]("label_id") else row.getAs[String]("label")
481481
storageReadWriter.getOrElse(None) match {
482482
case patternsWriter: PatternsReadWriter =>
483483
storePatterns(patterns.toIterator, entity, patternsWriter)
484484
case regexPatternsWriter: RegexPatternsReadWriter =>
485-
storeRegexPattern(patterns, entity, regexPatternsWriter)
486-
case None => computePatterns(patterns, isRegex = true, entity)
485+
storeRegexPattern(patterns.toSeq, entity, regexPatternsWriter)
486+
case None => computePatterns(patterns.toSeq, isRegex = true, entity)
487487
}
488488
}
489489

490490
val keywordsDataFrame = patternsDataFrame.filter(col("regex") === false)
491491
val cleanedKeywordsDataFrame = cleanPatternsDataFrame(keywordsDataFrame, idFieldExist)
492492

493493
cleanedKeywordsDataFrame.rdd.toLocalIterator.foreach { row =>
494-
val patterns = row.getAs[Seq[String]]("flatten_patterns")
494+
val patterns = row.getAs[mutable.Seq[String]]("flatten_patterns")
495495
if (idFieldExist) {
496496
val labelId = row.getAs[String]("label_id")
497497
val label = labelId.split(",")(0)
498498
val id = labelId.split(",")(1)
499-
keywordsPatterns.append(EntityPattern(label, patterns, Some(id), Some(true)))
499+
keywordsPatterns.append(EntityPattern(label, patterns.toSeq, Some(id), Some(true)))
500500
} else {
501501
val label = row.getAs[String]("label")
502-
keywordsPatterns.append(EntityPattern(label, patterns, None, Some(true)))
502+
keywordsPatterns.append(EntityPattern(label, patterns.toSeq, None, Some(true)))
503503
}
504504

505505
}

src/main/scala/com/johnsnowlabs/nlp/annotators/pos/perceptron/PerceptronApproachDistributed.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ class PerceptronApproachDistributed(override val uid: String)
159159
val tagFrequenciesByWord = taggedSentences
160160
.flatMap(_.taggedWords)
161161
.groupByKey(tw => tw.word.toLowerCase)
162-
.mapGroups { (lw, tw) => (lw, tw.toSeq.groupBy(_.tag).view.mapValues(_.length)) }
162+
.mapGroups { (lw, tw) => (lw, tw.toSeq.groupBy(_.tag).view.mapValues(_.length).toMap) }
163163
.filter { lwtw =>
164164
val (_, mode) = lwtw._2.maxBy(t => t._2)
165165
val n = lwtw._2.values.sum

src/main/scala/com/johnsnowlabs/nlp/annotators/sentence_detector_dl/SentenceDetectorDLModel.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -576,7 +576,8 @@ class SentenceDetectorDLModel(override val uid: String)
576576
})
577577

578578
outputAnnotations
579-
.filter(anno => anno.result.length >= getMinLength && anno.result.length <= getMaxLength).toSeq
579+
.filter(anno => anno.result.length >= getMinLength && anno.result.length <= getMaxLength)
580+
.toSeq
580581
}
581582

582583
override protected def afterAnnotate(dataset: DataFrame): DataFrame = {

src/main/scala/com/johnsnowlabs/nlp/embeddings/WordEmbeddingsModel.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ import org.apache.spark.ml.util.Identifiable
3232
import org.apache.spark.sql.functions.{col, udf}
3333
import org.apache.spark.sql.{DataFrame, Dataset, Row}
3434

35+
import scala.collection.mutable
36+
3537
/** Word Embeddings lookup annotator that maps tokens to vectors
3638
*
3739
* This is the instantiated model of [[WordEmbeddings]].
@@ -334,7 +336,7 @@ trait EmbeddingsCoverage {
334336
val words = dataset
335337
.select(embeddingsCol)
336338
.flatMap(row => {
337-
val annotations = row.getAs[Seq[Row]](embeddingsCol)
339+
val annotations = row.getAs[mutable.Seq[Row]](embeddingsCol)
338340
annotations.map(annotation =>
339341
Tuple2(
340342
annotation.getAs[Map[String, String]]("metadata")("token"),

src/test/scala/com/johnsnowlabs/nlp/annotators/ChunkerBehaviors.scala

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,10 @@ import com.johnsnowlabs.nlp.training.POS
2222
import com.johnsnowlabs.nlp.util.io.ResourceHelper
2323
import com.johnsnowlabs.nlp.{AnnotatorBuilder, DocumentAssembler, Finisher, SparkAccessor}
2424
import com.johnsnowlabs.tags.FastTest
25-
2625
import org.apache.spark.ml.{Pipeline, PipelineModel}
2726
import org.apache.spark.sql.{Dataset, Row}
28-
2927
import org.scalatest.flatspec.AnyFlatSpec
28+
import scala.collection.mutable
3029

3130
trait ChunkerBehaviors {
3231
this: AnyFlatSpec =>
@@ -189,7 +188,7 @@ trait ChunkerBehaviors {
189188
.transform(testData)
190189
.select("finished_chunks")
191190
.collect()
192-
.map(row => row.get(0).asInstanceOf[Seq[String]].toList)
191+
.map(row => row.get(0).asInstanceOf[mutable.Seq[String]].toList)
193192
finished_chunks.map(row => assert(row.isEmpty))
194193
}
195194
}
@@ -246,7 +245,7 @@ trait ChunkerBehaviors {
246245
.transform(dataset)
247246
.select("finished_chunks")
248247
.collect()
249-
.map(row => row.get(0).asInstanceOf[Seq[String]].toList)
248+
.map(row => row.get(0).asInstanceOf[mutable.Seq[String]].toList)
250249
finished_chunks.map(row => assert(row.nonEmpty))
251250

252251
}

0 commit comments

Comments
 (0)