CluProcessor

Instance Constructors

new CluProcessor(config: Config = ...)

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def annotate(doc: Document): Document

Definition Classes
CluProcessor → Processor
def annotate(text: String, keepText: Boolean = false): Document

Definition Classes
Processor
def annotateFromSentences(sentences: Iterable[String], keepText: Boolean = false): Document

Definition Classes
Processor
def annotateFromTokens(sentences: Iterable[Iterable[String]], keepText: Boolean = false): Document

Definition Classes
Processor
final def asInstanceOf[T0]: T0

Definition Classes
Any
def chunking(doc: Document): Unit

Shallow parsing; modifies the document in place
Shallow parsing; modifies the document in place

Definition Classes
CluProcessor → Processor
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
val config: Config
lazy val depParser: Parser
def discourse(doc: Document): Unit

Discourse parsing; modifies the document in place
Discourse parsing; modifies the document in place

Definition Classes
CluProcessor → Processor
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
def getArgBoolean(argPath: String, defaultValue: Option[Boolean]): Boolean

Attributes
protected
Definition Classes
Configured
def getArgInt(argPath: String, defaultValue: Option[Int]): Int

Attributes
protected
Definition Classes
Configured
def getArgString(argPath: String, defaultValue: Option[String]): String

Attributes
protected
Definition Classes
Configured
def getArgStrings(argPath: String, defaultValue: Option[Seq[String]]): Seq[String]

Attributes
protected
Definition Classes
Configured
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getConf: Config

Definition Classes
CluProcessor → Configured
def hashCode(): Int

Definition Classes
AnyRef → Any
val internStrings: Boolean
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def lemmatize(doc: Document): Unit

Lematization; modifies the document in place
Lematization; modifies the document in place

Definition Classes
CluProcessor → Processor
def mkDocument(text: String, keepText: Boolean = false): Document

Constructs a document of tokens from free text; includes sentence splitting and tokenization
Constructs a document of tokens from free text; includes sentence splitting and tokenization

Definition Classes
CluProcessor → Processor
def mkDocumentFromSentences(sentences: Iterable[String], keepText: Boolean = false, charactersBetweenSentences: Int = 1): Document

Constructs a document of tokens from an array of untokenized sentences
Constructs a document of tokens from an array of untokenized sentences

Definition Classes
CluProcessor → Processor
def mkDocumentFromTokens(sentences: Iterable[Iterable[String]], keepText: Boolean = false, charactersBetweenSentences: Int = 1, charactersBetweenTokens: Int = 1): Document

Constructs a document of tokens from an array of tokenized sentences
Constructs a document of tokens from an array of tokenized sentences

Definition Classes
CluProcessor → Processor
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def parse(doc: Document): Unit

Syntactic parsing; modifies the document in place
Syntactic parsing; modifies the document in place

Definition Classes
CluProcessor → Processor
lazy val posPostProcessor: Option[SentencePostProcessor]
lazy val posTagger: PartOfSpeechTagger
def preprocessSentences(origSentences: Iterable[String]): Iterable[String]

Runs preprocessText on each sentence
Runs preprocessText on each sentence

Definition Classes
Processor
def preprocessText(origText: String): String

Hook to allow the preprocessing of input text This is useful for domain-specific corrections, such as the ones in BioNLPProcessor, where we remove Table and Fig references Note that this is allowed to change character offsets
Hook to allow the preprocessing of input text This is useful for domain-specific corrections, such as the ones in BioNLPProcessor, where we remove Table and Fig references Note that this is allowed to change character offsets
origText
The original input text
returns
The preprocessed text

Definition Classes
CluProcessor → Processor
def preprocessTokens(origSentences: Iterable[Iterable[String]]): Iterable[Iterable[String]]

Runs preprocessText on each token
Runs preprocessText on each token

Definition Classes
Processor
def recognizeNamedEntities(doc: Document): Unit

NER; modifies the document in place
NER; modifies the document in place

Definition Classes
CluProcessor → Processor
def resolveCoreference(doc: Document): Unit

Coreference resolution; modifies the document in place
Coreference resolution; modifies the document in place

Definition Classes
CluProcessor → Processor
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def tagPartsOfSpeech(doc: Document): Unit

Part of speech tagging
Part of speech tagging

Definition Classes
CluProcessor → Processor
def toString(): String

Definition Classes
AnyRef → Any
lazy val tokenizer: Tokenizer
lazy val tokenizerPostProcessor: Option[TokenizerPostProcessor]
lazy val tokenizerPreProcessor: Option[TokenizerPreProcessor]
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Docs: object CluProcessor | package clu

class CluProcessor extends Processor with Configured

Instance Constructors

new CluProcessor(config: Config = ...)

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

def annotate(doc: Document): Document

def annotate(text: String, keepText: Boolean = false): Document

def annotateFromSentences(sentences: Iterable[String], keepText: Boolean = false): Document

def annotateFromTokens(sentences: Iterable[Iterable[String]], keepText: Boolean = false): Document

final def asInstanceOf[T0]: T0

def chunking(doc: Document): Unit

def clone(): AnyRef

val config: Config

lazy val depParser: Parser

def discourse(doc: Document): Unit

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

def getArgBoolean(argPath: String, defaultValue: Option[Boolean]): Boolean

def getArgInt(argPath: String, defaultValue: Option[Int]): Int

def getArgString(argPath: String, defaultValue: Option[String]): String

def getArgStrings(argPath: String, defaultValue: Option[Seq[String]]): Seq[String]

final def getClass(): Class[_]

def getConf: Config

def hashCode(): Int

val internStrings: Boolean

final def isInstanceOf[T0]: Boolean

def lemmatize(doc: Document): Unit

def mkDocument(text: String, keepText: Boolean = false): Document

def mkDocumentFromSentences(sentences: Iterable[String], keepText: Boolean = false, charactersBetweenSentences: Int = 1): Document

def mkDocumentFromTokens(sentences: Iterable[Iterable[String]], keepText: Boolean = false, charactersBetweenSentences: Int = 1, charactersBetweenTokens: Int = 1): Document

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def parse(doc: Document): Unit

lazy val posPostProcessor: Option[SentencePostProcessor]

lazy val posTagger: PartOfSpeechTagger

def preprocessSentences(origSentences: Iterable[String]): Iterable[String]

def preprocessText(origText: String): String

def preprocessTokens(origSentences: Iterable[Iterable[String]]): Iterable[Iterable[String]]

def recognizeNamedEntities(doc: Document): Unit

def resolveCoreference(doc: Document): Unit

final def synchronized[T0](arg0: ⇒ T0): T0

def tagPartsOfSpeech(doc: Document): Unit

def toString(): String

lazy val tokenizer: Tokenizer

lazy val tokenizerPostProcessor: Option[TokenizerPostProcessor]

lazy val tokenizerPreProcessor: Option[TokenizerPreProcessor]

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from Configured

Inherited from Processor

Inherited from AnyRef

Inherited from Any

Ungrouped