BioNLPProcessor

Instance Constructors

new BioNLPProcessor(internStrings: Boolean = true, withNER: Boolean = true, withDiscourse: Boolean = false, maxSentenceLength: Int = 100, removeFigTabReferences: Boolean = true)

Type Members

class MatchException extends RuntimeException

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def addBioTokenizerOptions(props: Properties): Unit
def annotate(doc: Document): Document

Definition Classes
Processor
def annotate(text: String): Document

Definition Classes
Processor
def annotateFromSentences(sentences: Iterable[String]): Document

Definition Classes
Processor
def annotateFromTokens(sentences: Iterable[Iterable[String]]): Document

Definition Classes
Processor
def arrayOrNone[T](b: ArrayBuffer[T])(implicit arg0: ClassTag[T]): Option[Array[T]]

Definition Classes
ShallowNLPProcessor
final def asInstanceOf[T0]: T0

Definition Classes
Any
lazy val banner: BannerWrapper
val basicDependencies: Boolean

Definition Classes
CoreNLPProcessor
def basicSanityCheck(doc: Document, checkAnnotation: Boolean = true): Option[Annotation]

Definition Classes
ShallowNLPProcessor
def chunking(doc: Document): Unit

Shallow parsing; modifies the document in place
Shallow parsing; modifies the document in place

Definition Classes
ShallowNLPProcessor → Processor
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
lazy val coref: StanfordCoreNLP

Definition Classes
CoreNLPProcessor
def discourse(doc: Document): Unit

Discourse parsing; modifies the document in place
Discourse parsing; modifies the document in place

Definition Classes
CoreNLPProcessor → ShallowNLPProcessor → Processor
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
lazy val gsf: GrammaticalStructureFactory

Definition Classes
CoreNLPProcessor
def hashCode(): Int

Definition Classes
AnyRef → Any
lazy val headFinder: SemanticHeadFinder

Definition Classes
CoreNLPProcessor
def in(s: String): String

Definition Classes
ShallowNLPProcessor
val internStrings: Boolean

Definition Classes
ShallowNLPProcessor
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def labelSemanticRoles(doc: Document): Unit

SRL; modifies the document in place
SRL; modifies the document in place

Definition Classes
ShallowNLPProcessor → Processor
def lemmatize(doc: Document): Unit

Lematization; modifies the document in place
Lematization; modifies the document in place

Definition Classes
ShallowNLPProcessor → Processor
lazy val lemmatizer: StanfordCoreNLP

Definition Classes
ShallowNLPProcessor
val maxSentenceLength: Int

Definition Classes
CoreNLPProcessor
def mkCoref: StanfordCoreNLP

Definition Classes
CoreNLPProcessor
def mkDocument(origText: String): Document

Constructs a document of tokens from free text; includes sentence splitting and tokenization
Constructs a document of tokens from free text; includes sentence splitting and tokenization

Definition Classes
ShallowNLPProcessor → Processor
def mkDocumentFromSentences(origSentences: Iterable[String], charactersBetweenSentences: Int = 1): Document

Constructs a document of tokens from an array of untokenized sentences
Constructs a document of tokens from an array of untokenized sentences

Definition Classes
ShallowNLPProcessor → Processor
def mkDocumentFromTokens(sentences: Iterable[Iterable[String]], charactersBetweenSentences: Int = 1, charactersBetweenTokens: Int = 1): Document

Constructs a document of tokens from an array of tokenized sentences
Constructs a document of tokens from an array of tokenized sentences

Definition Classes
ShallowNLPProcessor → Processor
def mkGSF: GrammaticalStructureFactory

Definition Classes
CoreNLPProcessor
def mkLemmatizer: StanfordCoreNLP

Definition Classes
ShallowNLPProcessor
def mkLexicalizedParser: LexicalizedParser

Definition Classes
CoreNLPProcessor
def mkNer: StanfordCoreNLP

Definition Classes
ShallowNLPProcessor
def mkPosTagger: StanfordCoreNLP

Definition Classes
ShallowNLPProcessor
def mkSentence(annotation: CoreMap): Sentence

Definition Classes
ShallowNLPProcessor
def mkTokenizerWithSentenceSplitting: StanfordCoreNLP

Definition Classes
BioNLPProcessor → ShallowNLPProcessor
def mkTokenizerWithoutSentenceSplitting: StanfordCoreNLP

Definition Classes
BioNLPProcessor → ShallowNLPProcessor
def namedEntitySanityCheck(doc: Document): Option[Annotation]

Definition Classes
ShallowNLPProcessor
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
lazy val ner: StanfordCoreNLP

Definition Classes
ShallowNLPProcessor
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def parse(doc: Document): Unit

Syntactic parsing; modifies the document in place
Syntactic parsing; modifies the document in place

Definition Classes
CoreNLPProcessor → ShallowNLPProcessor → Processor
lazy val posTagger: StanfordCoreNLP

Definition Classes
ShallowNLPProcessor
lazy val postProcessor: BioNLPTokenizerPostProcessor
def postprocessTags(annotation: Annotation): Unit

Hook to allow postprocessing of CoreNLP POS tagging *in place*, overwriting original POS tags This is useful for domain-specific corrections
Hook to allow postprocessing of CoreNLP POS tagging *in place*, overwriting original POS tags This is useful for domain-specific corrections
annotation
The CoreNLP annotation

Definition Classes
BioNLPProcessor → ShallowNLPProcessor
def postprocessTokens(originalTokens: Array[CoreLabel]): Array[CoreLabel]

Implements the bio-specific post-processing steps from McClosky et al.
Implements the bio-specific post-processing steps from McClosky et al. (2011)
originalTokens
Input CoreNLP sentence
returns
The modified tokens

Definition Classes
BioNLPProcessor → ShallowNLPProcessor
def preprocessText(origText: String): String

Removes Figure and Table references that appear within parentheses
Removes Figure and Table references that appear within parentheses
origText
The original input text
returns
The preprocessed text

Definition Classes
BioNLPProcessor → ShallowNLPProcessor
def recognizeNamedEntities(doc: Document): Unit

NER; modifies the document in place
NER; modifies the document in place

Definition Classes
BioNLPProcessor → ShallowNLPProcessor → Processor
def removeFigTabRefs(pattern: Pattern, text: String): String
def resolveCoreference(doc: Document): Unit

Coreference resolution; modifies the document in place
Coreference resolution; modifies the document in place

Definition Classes
BioNLPProcessor → CoreNLPProcessor → ShallowNLPProcessor → Processor
lazy val rstConstituentParser: RSTParser

Definition Classes
CoreNLPProcessor
def runBioNer(sentence: Sentence): Array[String]

Runs the bio-specific NER and returns an array of BIO (begin-input-output) labels for the sentence
Runs the bio-specific NER and returns an array of BIO (begin-input-output) labels for the sentence
sentence
Our own sentence, containing words, lemmas, and POS tags
returns
an array of BIO labels
def stanfordParse(sentence: CoreMap): Tree

Definition Classes
CoreNLPProcessor
lazy val stanfordParser: LexicalizedParser

Definition Classes
CoreNLPProcessor
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def tagPartsOfSpeech(doc: Document): Unit

Part of speech tagging This modifies the document in place, which is not too elegant.
Part of speech tagging This modifies the document in place, which is not too elegant. But there are two reasons for this: (a) Some annotators (e.g., Stanford's CoreNLP) require some state (i.e., their Annotation object) to be passed between operations; (b) This is more efficient during annotate() where all the possible operations are chained.

Definition Classes
ShallowNLPProcessor → Processor
def toDirectedGraph(sa: CoreMap): DirectedGraph[String]

Definition Classes
CoreNLPProcessor
def toString(): String

Definition Classes
AnyRef → Any
lazy val tokenizerWithSentenceSplitting: StanfordCoreNLP

Definition Classes
ShallowNLPProcessor
lazy val tokenizerWithoutSentenceSplitting: StanfordCoreNLP

Definition Classes
ShallowNLPProcessor
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
val withDiscourse: Boolean

Definition Classes
CoreNLPProcessor

Related Docs: object BioNLPProcessor | package bionlp

class BioNLPProcessor extends CoreNLPProcessor

Instance Constructors

new BioNLPProcessor(internStrings: Boolean = true, withNER: Boolean = true, withDiscourse: Boolean = false, maxSentenceLength: Int = 100, removeFigTabReferences: Boolean = true)

Type Members

class MatchException extends RuntimeException

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

def addBioTokenizerOptions(props: Properties): Unit

def annotate(doc: Document): Document

def annotate(text: String): Document

def annotateFromSentences(sentences: Iterable[String]): Document

def annotateFromTokens(sentences: Iterable[Iterable[String]]): Document

def arrayOrNone[T](b: ArrayBuffer[T])(implicit arg0: ClassTag[T]): Option[Array[T]]

final def asInstanceOf[T0]: T0

lazy val banner: BannerWrapper

val basicDependencies: Boolean

def basicSanityCheck(doc: Document, checkAnnotation: Boolean = true): Option[Annotation]

def chunking(doc: Document): Unit

def clone(): AnyRef

lazy val coref: StanfordCoreNLP

def discourse(doc: Document): Unit

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

lazy val gsf: GrammaticalStructureFactory

def hashCode(): Int

lazy val headFinder: SemanticHeadFinder

def in(s: String): String

val internStrings: Boolean

final def isInstanceOf[T0]: Boolean

def labelSemanticRoles(doc: Document): Unit

def lemmatize(doc: Document): Unit

lazy val lemmatizer: StanfordCoreNLP

val maxSentenceLength: Int

def mkCoref: StanfordCoreNLP

def mkDocument(origText: String): Document

def mkDocumentFromSentences(origSentences: Iterable[String], charactersBetweenSentences: Int = 1): Document

def mkDocumentFromTokens(sentences: Iterable[Iterable[String]], charactersBetweenSentences: Int = 1, charactersBetweenTokens: Int = 1): Document

def mkGSF: GrammaticalStructureFactory

def mkLemmatizer: StanfordCoreNLP

def mkLexicalizedParser: LexicalizedParser

def mkNer: StanfordCoreNLP

def mkPosTagger: StanfordCoreNLP

def mkSentence(annotation: CoreMap): Sentence

def mkTokenizerWithSentenceSplitting: StanfordCoreNLP

def mkTokenizerWithoutSentenceSplitting: StanfordCoreNLP

def namedEntitySanityCheck(doc: Document): Option[Annotation]

final def ne(arg0: AnyRef): Boolean

lazy val ner: StanfordCoreNLP

final def notify(): Unit

final def notifyAll(): Unit

def parse(doc: Document): Unit

lazy val posTagger: StanfordCoreNLP

lazy val postProcessor: BioNLPTokenizerPostProcessor

def postprocessTags(annotation: Annotation): Unit

def postprocessTokens(originalTokens: Array[CoreLabel]): Array[CoreLabel]

def preprocessText(origText: String): String

def recognizeNamedEntities(doc: Document): Unit

def removeFigTabRefs(pattern: Pattern, text: String): String

def resolveCoreference(doc: Document): Unit

lazy val rstConstituentParser: RSTParser

def runBioNer(sentence: Sentence): Array[String]

def stanfordParse(sentence: CoreMap): Tree

lazy val stanfordParser: LexicalizedParser

final def synchronized[T0](arg0: ⇒ T0): T0

def tagPartsOfSpeech(doc: Document): Unit

def toDirectedGraph(sa: CoreMap): DirectedGraph[String]

def toString(): String

lazy val tokenizerWithSentenceSplitting: StanfordCoreNLP

lazy val tokenizerWithoutSentenceSplitting: StanfordCoreNLP

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

val withDiscourse: Boolean

Inherited from CoreNLPProcessor