ShallowNLPProcessor

Instance Constructors

new ShallowNLPProcessor(internStrings: Boolean = true)

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def annotate(doc: Document): Document

Definition Classes
Processor
def annotate(text: String, keepText: Boolean = false): Document

Definition Classes
Processor
def annotateFromSentences(sentences: Iterable[String], keepText: Boolean = false): Document

Definition Classes
Processor
def annotateFromTokens(sentences: Iterable[Iterable[String]], keepText: Boolean = false): Document

Definition Classes
Processor
def arrayOrNone[T](b: ArrayBuffer[T])(implicit arg0: ClassTag[T]): Option[Array[T]]
final def asInstanceOf[T0]: T0

Definition Classes
Any
def basicSanityCheck(doc: Document, checkAnnotation: Boolean = true): Option[Annotation]
lazy val chunker: CRFChunker
def chunking(doc: Document): Unit

Shallow parsing; modifies the document in place
Shallow parsing; modifies the document in place

Definition Classes
ShallowNLPProcessor → Processor
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def discourse(doc: Document): Unit

Discourse parsing; modifies the document in place
Discourse parsing; modifies the document in place

Definition Classes
ShallowNLPProcessor → Processor
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def hashCode(): Int

Definition Classes
AnyRef → Any
def in(s: String): String
val internStrings: Boolean
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def labelSemanticRoles(doc: Document): Unit

SRL; modifies the document in place
SRL; modifies the document in place

Definition Classes
ShallowNLPProcessor → Processor
def lemmatize(doc: Document): Unit

Lematization; modifies the document in place
Lematization; modifies the document in place

Definition Classes
ShallowNLPProcessor → Processor
lazy val lemmatizer: StanfordCoreNLP
def mkChunker: CRFChunker
def mkDocument(text: String, keepText: Boolean): Document

Constructs a document of tokens from free text; includes sentence splitting and tokenization
Constructs a document of tokens from free text; includes sentence splitting and tokenization

Definition Classes
ShallowNLPProcessor → Processor
def mkDocumentFromSentences(sentences: Iterable[String], keepText: Boolean, charactersBetweenSentences: Int = 1): Document

Constructs a document of tokens from an array of untokenized sentences
Constructs a document of tokens from an array of untokenized sentences

Definition Classes
ShallowNLPProcessor → Processor
def mkDocumentFromTokens(sentences: Iterable[Iterable[String]], keepText: Boolean, charactersBetweenSentences: Int = 1, charactersBetweenTokens: Int = 1): Document

Constructs a document of tokens from an array of tokenized sentences
Constructs a document of tokens from an array of tokenized sentences

Definition Classes
ShallowNLPProcessor → Processor
def mkLemmatizer: StanfordCoreNLP
def mkNer: StanfordCoreNLP
def mkPosTagger: StanfordCoreNLP
def mkSentence(annotation: CoreMap): Sentence
def mkTokenizerWithSentenceSplitting: StanfordCoreNLP
def mkTokenizerWithoutSentenceSplitting: StanfordCoreNLP
def namedEntitySanityCheck(doc: Document): Option[Annotation]
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
lazy val ner: StanfordCoreNLP
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def parse(doc: Document): Unit

Syntactic parsing; modifies the document in place
Syntactic parsing; modifies the document in place

Definition Classes
ShallowNLPProcessor → Processor
lazy val posTagger: StanfordCoreNLP
def postprocessTags(annotation: Annotation): Unit

Hook to allow postprocessing of CoreNLP POS tagging *in place*, overwriting original POS tags This is useful for domain-specific corrections
Hook to allow postprocessing of CoreNLP POS tagging *in place*, overwriting original POS tags This is useful for domain-specific corrections
annotation
The CoreNLP annotation
def postprocessTokens(sentence: CoreMap): List[CoreLabel]

Attributes
protected
def postprocessTokens(originalTokens: Array[CoreLabel]): Array[CoreLabel]

Hook to allow postprocessing of CoreNLP tokenization This is useful for domain-specific corrections, such as the ones in BioNLPProcessor If you change the tokens, make sure to store them back in the sentence!
Hook to allow postprocessing of CoreNLP tokenization This is useful for domain-specific corrections, such as the ones in BioNLPProcessor If you change the tokens, make sure to store them back in the sentence!
originalTokens
Input CoreNLP sentence
returns
The modified tokens
def preprocessSentences(origSentences: Iterable[String]): Iterable[String]

Runs preprocessText on each sentence
Runs preprocessText on each sentence

Definition Classes
Processor
def preprocessText(origText: String): String

Hook to allow the preprocessing of input text This is useful for domain-specific corrections, such as the ones in BioNLPProcessor, where we remove Table and Fig references Note that this is allowed to change character offsets
Hook to allow the preprocessing of input text This is useful for domain-specific corrections, such as the ones in BioNLPProcessor, where we remove Table and Fig references Note that this is allowed to change character offsets
origText
The original input text
returns
The preprocessed text

Definition Classes
Processor
def preprocessTokens(origSentences: Iterable[Iterable[String]]): Iterable[Iterable[String]]

Runs preprocessText on each token
Runs preprocessText on each token

Definition Classes
Processor
def recognizeNamedEntities(doc: Document): Unit

NER; modifies the document in place
NER; modifies the document in place

Definition Classes
ShallowNLPProcessor → Processor
def resolveCoreference(doc: Document): Unit

Coreference resolution; modifies the document in place
Coreference resolution; modifies the document in place

Definition Classes
ShallowNLPProcessor → Processor
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def tagPartsOfSpeech(doc: Document): Unit

Part of speech tagging This modifies the document in place, which is not too elegant.
Part of speech tagging This modifies the document in place, which is not too elegant. But there are two reasons for this: (a) Some annotators (e.g., Stanford's CoreNLP) require some state (i.e., their Annotation object) to be passed between operations; (b) This is more efficient during annotate() where all the possible operations are chained.

Definition Classes
ShallowNLPProcessor → Processor
def toString(): String

Definition Classes
AnyRef → Any
lazy val tokenizerWithSentenceSplitting: StanfordCoreNLP
lazy val tokenizerWithoutSentenceSplitting: StanfordCoreNLP
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Doc: package shallownlp

class ShallowNLPProcessor extends Processor

Instance Constructors

new ShallowNLPProcessor(internStrings: Boolean = true)

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

def annotate(doc: Document): Document

def annotate(text: String, keepText: Boolean = false): Document

def annotateFromSentences(sentences: Iterable[String], keepText: Boolean = false): Document

def annotateFromTokens(sentences: Iterable[Iterable[String]], keepText: Boolean = false): Document

def arrayOrNone[T](b: ArrayBuffer[T])(implicit arg0: ClassTag[T]): Option[Array[T]]

final def asInstanceOf[T0]: T0

def basicSanityCheck(doc: Document, checkAnnotation: Boolean = true): Option[Annotation]

lazy val chunker: CRFChunker

def chunking(doc: Document): Unit

def clone(): AnyRef

def discourse(doc: Document): Unit

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def hashCode(): Int

def in(s: String): String

val internStrings: Boolean

final def isInstanceOf[T0]: Boolean

def labelSemanticRoles(doc: Document): Unit

def lemmatize(doc: Document): Unit

lazy val lemmatizer: StanfordCoreNLP

def mkChunker: CRFChunker

def mkDocument(text: String, keepText: Boolean): Document

def mkDocumentFromSentences(sentences: Iterable[String], keepText: Boolean, charactersBetweenSentences: Int = 1): Document

def mkDocumentFromTokens(sentences: Iterable[Iterable[String]], keepText: Boolean, charactersBetweenSentences: Int = 1, charactersBetweenTokens: Int = 1): Document

def mkLemmatizer: StanfordCoreNLP

def mkNer: StanfordCoreNLP

def mkPosTagger: StanfordCoreNLP

def mkSentence(annotation: CoreMap): Sentence

def mkTokenizerWithSentenceSplitting: StanfordCoreNLP

def mkTokenizerWithoutSentenceSplitting: StanfordCoreNLP

def namedEntitySanityCheck(doc: Document): Option[Annotation]

final def ne(arg0: AnyRef): Boolean

lazy val ner: StanfordCoreNLP

final def notify(): Unit

final def notifyAll(): Unit

def parse(doc: Document): Unit

lazy val posTagger: StanfordCoreNLP

def postprocessTags(annotation: Annotation): Unit

def postprocessTokens(sentence: CoreMap): List[CoreLabel]

def postprocessTokens(originalTokens: Array[CoreLabel]): Array[CoreLabel]

def preprocessSentences(origSentences: Iterable[String]): Iterable[String]

def preprocessText(origText: String): String

def preprocessTokens(origSentences: Iterable[Iterable[String]]): Iterable[Iterable[String]]

def recognizeNamedEntities(doc: Document): Unit

def resolveCoreference(doc: Document): Unit

final def synchronized[T0](arg0: ⇒ T0): T0

def tagPartsOfSpeech(doc: Document): Unit

def toString(): String

lazy val tokenizerWithSentenceSplitting: StanfordCoreNLP

lazy val tokenizerWithoutSentenceSplitting: StanfordCoreNLP

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from Processor

Inherited from AnyRef

Inherited from Any

Ungrouped