typdep

Type Members

class ConllData extends AnyRef
class DependencyArcList extends AnyRef
class DependencyInstance extends Serializable
class DependencyPipe extends Serializable
class LocalFeatureData extends AnyRef
class LowRankTensor extends AnyRef
class Options extends Serializable
class Parameters extends Serializable
class PredictionParameters extends AnyRef
trait ReadablePretrainedTypedDependency extends ParamsAndFeaturesReadable[TypedDependencyParserModel] with HasPretrained[TypedDependencyParserModel]
class TrainDependencies extends Serializable
case class TrainFile(path: String, conllFormat: String) extends Product with Serializable
class TypedDependencyParser extends Serializable

class TypedDependencyParserApproach extends AnnotatorApproach[TypedDependencyParserModel]

Labeled parser that finds a grammatical relation between two words in a sentence.

Labeled parser that finds a grammatical relation between two words in a sentence. Its input is either a CoNLL2009 or ConllU dataset.

For instantiated/pretrained models, see TypedDependencyParserModel.

Dependency parsers provide information about word relationship. For example, dependency parsing can tell you what the subjects and objects of a verb are, as well as which words are modifying (describing) the subject. This can help you find precise answers to specific questions.

The parser requires the dependant tokens beforehand with e.g. DependencyParser. The required training data can be set in two different ways (only one can be chosen for a particular model):

Dataset in the CoNLL 2009 format set with setConll2009
Dataset in the CoNLL-U format set with setConllU

Apart from that, no additional training data is needed.

See TypedDependencyParserApproachTestSpec for further reference on this API.

Example

import spark.implicits._
import com.johnsnowlabs.nlp.base.DocumentAssembler
import com.johnsnowlabs.nlp.annotators.sbd.pragmatic.SentenceDetector
import com.johnsnowlabs.nlp.annotators.Tokenizer
import com.johnsnowlabs.nlp.annotators.pos.perceptron.PerceptronModel
import com.johnsnowlabs.nlp.annotators.parser.dep.DependencyParserModel
import com.johnsnowlabs.nlp.annotators.parser.typdep.TypedDependencyParserApproach
import org.apache.spark.ml.Pipeline

val documentAssembler = new DocumentAssembler()
  .setInputCol("text")
  .setOutputCol("document")

val sentence = new SentenceDetector()
  .setInputCols("document")
  .setOutputCol("sentence")

val tokenizer = new Tokenizer()
  .setInputCols("sentence")
  .setOutputCol("token")

val posTagger = PerceptronModel.pretrained()
  .setInputCols("sentence", "token")
  .setOutputCol("pos")

val dependencyParser = DependencyParserModel.pretrained()
  .setInputCols("sentence", "pos", "token")
  .setOutputCol("dependency")

val typedDependencyParser = new TypedDependencyParserApproach()
  .setInputCols("dependency", "pos", "token")
  .setOutputCol("dependency_type")
  .setConllU("src/test/resources/parser/labeled/train_small.conllu.txt")
  .setNumberOfIterations(1)

val pipeline = new Pipeline().setStages(Array(
  documentAssembler,
  sentence,
  tokenizer,
  posTagger,
  dependencyParser,
  typedDependencyParser
))

// Additional training data is not needed, the dependency parser relies on CoNLL-U only.
val emptyDataSet = Seq.empty[String].toDF("text")
val pipelineModel = pipeline.fit(emptyDataSet)

class TypedDependencyParserModel extends AnnotatorModel[TypedDependencyParserModel] with HasSimpleAnnotate[TypedDependencyParserModel]

Labeled parser that finds a grammatical relation between two words in a sentence.

Labeled parser that finds a grammatical relation between two words in a sentence. Its input is either a CoNLL2009 or ConllU dataset.

The parser requires the dependant tokens beforehand with e.g. DependencyParser.

Pretrained models can be loaded with pretrained of the companion object:

val typedDependencyParser = TypedDependencyParserModel.pretrained()
  .setInputCols("dependency", "pos", "token")
  .setOutputCol("dependency_type")

The default model is "dependency_typed_conllu", if no name is provided. For available pretrained models please see the Models Hub.

For extended examples of usage, see the Spark NLP Workshop and the TypedDependencyModelTestSpec.

Example

import spark.implicits._
import com.johnsnowlabs.nlp.base.DocumentAssembler
import com.johnsnowlabs.nlp.annotators.Tokenizer
import com.johnsnowlabs.nlp.annotators.sbd.pragmatic.SentenceDetector
import com.johnsnowlabs.nlp.annotators.pos.perceptron.PerceptronModel
import com.johnsnowlabs.nlp.annotators.parser.dep.DependencyParserModel
import com.johnsnowlabs.nlp.annotators.parser.typdep.TypedDependencyParserModel
import org.apache.spark.ml.Pipeline

val documentAssembler = new DocumentAssembler()
  .setInputCol("text")
  .setOutputCol("document")

val sentence = new SentenceDetector()
  .setInputCols("document")
  .setOutputCol("sentence")

val tokenizer = new Tokenizer()
  .setInputCols("sentence")
  .setOutputCol("token")

val posTagger = PerceptronModel.pretrained()
  .setInputCols("sentence", "token")
  .setOutputCol("pos")

val dependencyParser = DependencyParserModel.pretrained()
  .setInputCols("sentence", "pos", "token")
  .setOutputCol("dependency")

val typedDependencyParser = TypedDependencyParserModel.pretrained()
  .setInputCols("dependency", "pos", "token")
  .setOutputCol("dependency_type")

val pipeline = new Pipeline().setStages(Array(
  documentAssembler,
  sentence,
  tokenizer,
  posTagger,
  dependencyParser,
  typedDependencyParser
))

val data = Seq(
  "Unions representing workers at Turner Newall say they are 'disappointed' after talks with stricken parent " +
    "firm Federal Mogul."
).toDF("text")
val result = pipeline.fit(data).transform(data)

result.selectExpr("explode(arrays_zip(token.result, dependency.result, dependency_type.result)) as cols")
  .selectExpr("cols['0'] as token", "cols['1'] as dependency", "cols['2'] as dependency_type")
  .show(8, truncate = false)
+------------+------------+---------------+
|token       |dependency  |dependency_type|
+------------+------------+---------------+
|Unions      |ROOT        |root           |
|representing|workers     |amod           |
|workers     |Unions      |flat           |
|at          |Turner      |case           |
|Turner      |workers     |flat           |
|Newall      |say         |nsubj          |
|say         |Unions      |parataxis      |
|they        |disappointed|nsubj          |
+------------+------------+---------------+

Value Members

object TypedDependencyParserApproach extends DefaultParamsReadable[TypedDependencyParserApproach] with Serializable

This is the companion object of TypedDependencyParserApproach.
This is the companion object of TypedDependencyParserApproach. Please refer to that class for the documentation.
object TypedDependencyParserModel extends ReadablePretrainedTypedDependency with Serializable

This is the companion object of TypedDependencyParserModel.
This is the companion object of TypedDependencyParserModel. Please refer to that class for the documentation.
package feature
package io
package util

package typdep

Type Members

class ConllData extends AnyRef

class DependencyArcList extends AnyRef

class DependencyInstance extends Serializable

class DependencyPipe extends Serializable

class LocalFeatureData extends AnyRef

class LowRankTensor extends AnyRef

class Options extends Serializable

class Parameters extends Serializable

class PredictionParameters extends AnyRef

trait ReadablePretrainedTypedDependency extends ParamsAndFeaturesReadable[TypedDependencyParserModel] with HasPretrained[TypedDependencyParserModel]

class TrainDependencies extends Serializable

case class TrainFile(path: String, conllFormat: String) extends Product with Serializable

class TypedDependencyParser extends Serializable

class TypedDependencyParserApproach extends AnnotatorApproach[TypedDependencyParserModel]

Example

class TypedDependencyParserModel extends AnnotatorModel[TypedDependencyParserModel] with HasSimpleAnnotate[TypedDependencyParserModel]

Example

Value Members

object TypedDependencyParserApproach extends DefaultParamsReadable[TypedDependencyParserApproach] with Serializable

object TypedDependencyParserModel extends ReadablePretrainedTypedDependency with Serializable

package feature

package io

package util

Ungrouped