public class TermUtils
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
static java.util.Comparator<Term> |
frequencyComparator
Most frequent first
|
Constructor and Description |
---|
TermUtils() |
Modifier and Type | Method and Description |
---|---|
static java.lang.String |
collapseText(java.lang.String coveredText) |
static Term |
findBiggestPrefix(TermIndex termIndex,
java.util.List<TermWord> words)
|
static Term |
findBiggestSuffix(TermIndex termIndex,
java.util.List<TermWord> words)
|
static TermFormGetter |
formGetter(TermIndex termIndex,
boolean downcaseForms) |
static Term |
getExtensionAffix(TermIndex termIndex,
Term base,
Term extension)
Finds in a
TermIndex the biggest extension affix term of a term depending
on a base term. |
static double |
getExtensionGain(Term extension,
Term extensionAffix) |
static int |
getGeneralFrequency(Lang l,
Term t) |
static int |
getPosition(Term subTerm,
Term term)
Finds the index of appearance of a term's sub-term.
|
static java.util.List<Term> |
getSingleWordTerms(TermIndex termIndex,
Term term)
Finds in an input term all single-word terms it is made off.
|
static double |
getStrictness(Term t1,
Term t2)
Returns the strictness of t1 based on t2, i.e. the ratio of appearance
in an occurrence that do not overlap with t2.
|
static boolean |
isIncludedIn(Term term,
Term inTerm) |
static boolean |
isPrefixOf(Term term,
Term ofTerm) |
static boolean |
isSuffixOf(Term term,
Term ofTerm) |
static void |
showCompounds(TermIndex index,
java.io.PrintStream out,
int threshhold) |
static void |
showContextVector(ContextVector contextVector,
int topN) |
static void |
showIndex(TermIndex index,
java.io.PrintStream stream) |
static void |
showIndex(TermIndex index,
java.io.PrintStream stream,
com.google.common.base.Optional<java.util.regex.Pattern> watchExpression) |
static void |
showTopNTermsBy(TermIndex index,
TermMeasure measure,
java.io.PrintStream out,
int n) |
static java.util.List<java.util.Set<Component>> |
toComponentSets(java.lang.Iterable<Word> words)
Transforms a term into a list of component sets.
|
static java.lang.String |
toGroupingKey(TermWord termWord) |
public static java.util.Comparator<Term> frequencyComparator
public static TermFormGetter formGetter(TermIndex termIndex, boolean downcaseForms)
public static void showIndex(TermIndex index, java.io.PrintStream stream)
public static void showIndex(TermIndex index, java.io.PrintStream stream, com.google.common.base.Optional<java.util.regex.Pattern> watchExpression)
public static void showTopNTermsBy(TermIndex index, TermMeasure measure, java.io.PrintStream out, int n)
public static void showCompounds(TermIndex index, java.io.PrintStream out, int threshhold)
public static java.util.List<Term> getSingleWordTerms(TermIndex termIndex, Term term)
TermIndexes.SINGLE_WORD_LEMMA
.termIndex
- The TermIndex
in which single word terms must be found.term
- The input term.Term#asComponentIterator(boolean)
public static java.lang.String collapseText(java.lang.String coveredText)
public static void showContextVector(ContextVector contextVector, int topN)
public static double getStrictness(Term t1, Term t2)
t1
- the term to analyzet2
- the base termpublic static Term getExtensionAffix(TermIndex termIndex, Term base, Term extension)
TermIndex
the biggest extension affix term of a term depending
on a base term.
For example, the term "offshore wind turbine" is an extension of
"wind turbine". The extension affix is the term "offshore".termIndex
- The term index that both terms belong to.base
- The base termextension
- The extension termtermIndex
, null
if none
has been found.java.lang.IllegalArgumentException
- if extension
id not an
extension of the term base
.public static Term findBiggestPrefix(TermIndex termIndex, java.util.List<TermWord> words)
public static Term findBiggestSuffix(TermIndex termIndex, java.util.List<TermWord> words)
public static int getPosition(Term subTerm, Term term)
subTerm
- the inner term, must be included in term
term
- the container term.subTerm
in term
. -1 otherwise.public static int getGeneralFrequency(Lang l, Term t)
l
- t
- public static java.lang.String toGroupingKey(TermWord termWord)