trees

Type Members

case class AnnotatedLabel(label: String, headTag: Option[String] = None, parents: IndexedSeq[String] = IndexedSeq.empty, siblings: IndexedSeq[Either[String, String]] = IndexedSeq.empty, features: Set[Annotation] = Set.empty, index: Int = 1) extends Feature with CachedHashCode with Product with Serializable

The standard label used in the parser (used to be String).
The standard label used in the parser (used to be String).
Useful for Klein-and-Manning style annotated labels and other explicit-annotation strategies

Annotations
@SerialVersionUID()
trait Annotation extends Serializable

Something we can throw in an AnnotatedLabel
Something we can throw in an AnnotatedLabel

Annotations
@SerialVersionUID()
sealed trait BinarizedTree[+L] extends Tree[L]
final case class BinaryRule[+L](parent: L, left: L, right: L) extends Rule[L] with Product with Serializable

Annotations
@SerialVersionUID()
case class BinaryTree[+L](label: L, leftChild: BinarizedTree[L], rightChild: BinarizedTree[L], span: Span) extends BinarizedTree[L] with Product with Serializable
trait Debinarizer[L] extends (BinarizedTree[L]) ⇒ Tree[L] with Serializable

Class that turns BinarizedTrees into normal trees.
Class that turns BinarizedTrees into normal trees. Should replace unary chains in addition to removing intermediates.
case class DependencyTree[+L, +W](dependencies: IndexedSeq[(L, Int)], words: IndexedSeq[W]) extends Product with Serializable

root index is words.length
case class FunctionalTag(tag: String) extends Annotation with Product with Serializable
case class HeadDB[B](symbolArityHeadChildCounts: Counter2[(B, Int), Int, Int], ruleHeadChildCounts: Counter2[(B, Seq[B]), Int, Int], defaultToLeft: Boolean = true) extends Product with Serializable
trait HeadFinder[L] extends AnyRef
case class HeadRule[L](dir: Dir, dis: Boolean, heads: Seq[L]) extends Product with Serializable

Based on Aria's comments:
Based on Aria's comments:
Basically, you're looking for the head label by searching in Dir for each parent -> rule expansion.
Dir is whether or not to look left to right or right to left Dis determines whether you are looking for the first match of any of the categories, or if you're looking for any match of the first category, then the second, etc. etc.
trait HeadRules[L] extends Serializable

Annotations
@SerialVersionUID()
final case class LexicalProduction[+L, +W](parent: L, word: W) extends Production[L, W] with Product with Serializable
case class NaryTree[L](label: L, children: IndexedSeq[Tree[L]], span: Span) extends Tree[L] with Product with Serializable
case class NullRule[+L](parent: L) extends Production[L, Nothing] with Product with Serializable
case class NullaryTree[+L](label: L, span: Span) extends BinarizedTree[L] with Product with Serializable
case class PartialTreeProcessor() extends Product with Serializable
class PennTreeReader extends Iterator[(Tree[String], IndexedSeq[String])]

PennTreeReader due to Adam Pauls.
PennTreeReader due to Adam Pauls.
This reader returns empty categories as leaves of the tree below the -NONE-. These leaves span 0 words.
For example, (TOP (S-IMP (NP-SBJ (-NONE- *PRO*)) (VP (VB Look) (PP-DIR (IN at) (NP (DT that)))) (. /.)))
will return (TOP[0:4] (S-IMP[0:4] (NP-SBJ[0:0] (-NONE-[0:0] (*PRO*[0:0]))) (VP[0:4]...)
case class ProcessedTreebank(path: File, maxLength: Int = 10000, includeDevInTrain: Boolean = false, binarization: String = "head", treebankType: String = "penn", numSentences: Int = Int.MaxValue, keepUnaryChainsFromTrain: Boolean = true, debuckwalterize: Boolean = false, supervisedHeadFinderPtbPath: String = "", supervisedHeadFinderConllPath: String = "") extends Product with Serializable

Represents a treebank with attendant spans, binarization, etc.
Represents a treebank with attendant spans, binarization, etc. Used in all the parser trainers.

Annotations
@Help()
sealed trait Production[+L, +W] extends Feature
sealed trait Rule[+L] extends Production[L, Nothing]
class RuleBasedHeadFinder[L] extends HeadFinder[L] with Serializable

Can annotate a tree with the head word.
Can annotate a tree with the head word. Usually you should just use HeadFinder.collinsHeadFinder

Annotations
@SerialVersionUID()
class SimpleTreebank extends Treebank[String]

A SimpleTreebank can be easily specified by paths to the trees in Penn treebank format
final class Span extends AnyVal with Serializable
case class StandardTreeProcessor(headFinder: HeadFinder[AnnotatedLabel] = HeadFinder.collins, removeTraces: Boolean = true) extends (Tree[AnnotatedLabel]) ⇒ BinarizedTree[AnnotatedLabel] with Product with Serializable
class SubsampledTreebank extends Treebank[String]

A Treebank that uses a few number of training and test sentences.
class SupervisedHeadFinder[L] extends HeadFinder[L]
trait SupervisedHeadFinderInnards[L, B] extends Serializable
class TraceRemover[T, W] extends (Tree[T]) ⇒ Tree[T]

Removes all traces from the word sequence, deleting all empty categories while it's at it.
class TraceToSlashCategoryConverter extends (Tree[AnnotatedLabel]) ⇒ Tree[AnnotatedLabel]

Removes traces from the word sequence, and makes the tree have empty spans
trait Tree[+L] extends Serializable

Annotations
@SerialVersionUID()
case class TreeInstance[L, +W](id: String, tree: BinarizedTree[L], words: IndexedSeq[W]) extends Example[BinarizedTree[L], IndexedSeq[W]] with Product with Serializable
trait Treebank[L] extends AnyRef

A Treebank contains a train set, a test set, and a dev set, which are "Portions".
A Treebank contains a train set, a test set, and a dev set, which are "Portions". Portions are made up of sections, which have the trees.
final case class UnaryRule[+L](parent: L, child: L, chain: IndexedSeq[String]) extends Rule[L] with Product with Serializable

Annotations
@SerialVersionUID()
case class UnaryTree[+L](label: L, child: BinarizedTree[L], chain: IndexedSeq[String], span: Span) extends BinarizedTree[L] with Product with Serializable

Value Members

object AnnotatedLabel extends Serializable
object BinaryRule extends Serializable
object Debinarizer extends Serializable
object DependencyTree extends Serializable
object HeadFinder

Implements HeadFinding as in the Collins parser.
Implements HeadFinding as in the Collins parser. You can use HeadFinder.left[L] or right[L] to not use any head rules
Based on Aria's code.
object HeadRules extends Serializable
object SimpleTreebank
object Span extends Serializable
object StandardTreeProcessor extends Serializable
object SupervisedHeadFinder
object SupervisedHeadFinderInnards extends Serializable
object TraceToSlashCategoryConverter
object Tree extends Serializable
object Treebank
object Trees
object TstTreebank
object UnaryChainCollapser

Removes unaries chains A -> B -> ...
Removes unaries chains A -> B -> ... -> C, replacing them with A -> C and modifying the tree to know about the unaries
object UnaryRule extends Serializable
package annotations
package util

package trees

Type Members

trait Annotation extends Serializable

sealed trait BinarizedTree[+L] extends Tree[L]

final case class BinaryRule[+L](parent: L, left: L, right: L) extends Rule[L] with Product with Serializable

case class BinaryTree[+L](label: L, leftChild: BinarizedTree[L], rightChild: BinarizedTree[L], span: Span) extends BinarizedTree[L] with Product with Serializable

trait Debinarizer[L] extends (BinarizedTree[L]) ⇒ Tree[L] with Serializable

case class DependencyTree[+L, +W](dependencies: IndexedSeq[(L, Int)], words: IndexedSeq[W]) extends Product with Serializable

case class FunctionalTag(tag: String) extends Annotation with Product with Serializable

case class HeadDB[B](symbolArityHeadChildCounts: Counter2[(B, Int), Int, Int], ruleHeadChildCounts: Counter2[(B, Seq[B]), Int, Int], defaultToLeft: Boolean = true) extends Product with Serializable

trait HeadFinder[L] extends AnyRef

case class HeadRule[L](dir: Dir, dis: Boolean, heads: Seq[L]) extends Product with Serializable

trait HeadRules[L] extends Serializable

final case class LexicalProduction[+L, +W](parent: L, word: W) extends Production[L, W] with Product with Serializable

case class NaryTree[L](label: L, children: IndexedSeq[Tree[L]], span: Span) extends Tree[L] with Product with Serializable

case class NullRule[+L](parent: L) extends Production[L, Nothing] with Product with Serializable

case class NullaryTree[+L](label: L, span: Span) extends BinarizedTree[L] with Product with Serializable

case class PartialTreeProcessor() extends Product with Serializable

class PennTreeReader extends Iterator[(Tree[String], IndexedSeq[String])]

sealed trait Production[+L, +W] extends Feature

sealed trait Rule[+L] extends Production[L, Nothing]

class RuleBasedHeadFinder[L] extends HeadFinder[L] with Serializable

class SimpleTreebank extends Treebank[String]

final class Span extends AnyVal with Serializable

case class StandardTreeProcessor(headFinder: HeadFinder[AnnotatedLabel] = HeadFinder.collins, removeTraces: Boolean = true) extends (Tree[AnnotatedLabel]) ⇒ BinarizedTree[AnnotatedLabel] with Product with Serializable

class SubsampledTreebank extends Treebank[String]

class SupervisedHeadFinder[L] extends HeadFinder[L]

trait SupervisedHeadFinderInnards[L, B] extends Serializable

class TraceRemover[T, W] extends (Tree[T]) ⇒ Tree[T]

class TraceToSlashCategoryConverter extends (Tree[AnnotatedLabel]) ⇒ Tree[AnnotatedLabel]

trait Tree[+L] extends Serializable

case class TreeInstance[L, +W](id: String, tree: BinarizedTree[L], words: IndexedSeq[W]) extends Example[BinarizedTree[L], IndexedSeq[W]] with Product with Serializable

trait Treebank[L] extends AnyRef

final case class UnaryRule[+L](parent: L, child: L, chain: IndexedSeq[String]) extends Rule[L] with Product with Serializable

case class UnaryTree[+L](label: L, child: BinarizedTree[L], chain: IndexedSeq[String], span: Span) extends BinarizedTree[L] with Product with Serializable

Value Members

object AnnotatedLabel extends Serializable

object BinaryRule extends Serializable

object Debinarizer extends Serializable

object DependencyTree extends Serializable

object HeadFinder

object HeadRules extends Serializable

object SimpleTreebank

object Span extends Serializable

object StandardTreeProcessor extends Serializable

object SupervisedHeadFinder

object SupervisedHeadFinderInnards extends Serializable

object TraceToSlashCategoryConverter

object Tree extends Serializable

object Treebank

object Trees

object TstTreebank

object UnaryChainCollapser

object UnaryRule extends Serializable

package annotations

package util

Ungrouped