public class BasicAnnotationOrthography extends Object implements AnnotationOrthography
Modifier and Type | Field and Description |
---|---|
protected static org.apache.log4j.Logger |
log |
Constructor and Description |
---|
BasicAnnotationOrthography(String personType,
boolean extLists,
String unknownType,
URL nicknameFile,
Double minimumNicknameLikelihood,
String encoding) |
Modifier and Type | Method and Description |
---|---|
boolean |
allNonStopTokensInOtherAnnot(List<Annotation> firstName,
List<Annotation> secondName,
String TOKEN_STRING_FEATURE_NAME,
boolean caseSensitive) |
Set<String> |
buildTables(AnnotationSet nameAllAnnots)
Tables for namematch info (used by the namematch rules)
|
boolean |
fuzzyMatch(String s1,
String s2) |
String |
getStringForAnnotation(Annotation a,
Document d)
Returns normalized content of an annotation - removes extra white spaces.
|
protected Map<String,Set<String>> |
initNicknames(String nicknameFileEncoding,
URL fileURL) |
boolean |
isUnknownGender(String gender) |
boolean |
matchedAlready(Annotation annot1,
Annotation annot2,
List<List<Integer>> matchesDocFeature,
AnnotationSet nameAllAnnots) |
String |
stripPersonTitle(String annotString,
Annotation annot,
Document doc,
Map<Integer,List<Annotation>> tokensMap,
Map<Integer,List<Annotation>> normalizedTokensMap,
AnnotationSet nameAllAnnots)
Return a person name without a title.
|
void |
updateMatches(Annotation newAnnot,
Annotation prevAnnot,
List<List<Integer>> matchesDocFeature,
AnnotationSet nameAllAnnots) |
Annotation |
updateMatches(Annotation newAnnot,
String annotString,
Map<Integer,String> processedAnnots,
AnnotationSet nameAllAnnots,
List<List<Integer>> matchesDocFeature) |
protected static final org.apache.log4j.Logger log
public String getStringForAnnotation(Annotation a, Document d) throws ExecutionException
AnnotationOrthography
getStringForAnnotation
in interface AnnotationOrthography
ExecutionException
public boolean fuzzyMatch(String s1, String s2)
fuzzyMatch
in interface AnnotationOrthography
public boolean allNonStopTokensInOtherAnnot(List<Annotation> firstName, List<Annotation> secondName, String TOKEN_STRING_FEATURE_NAME, boolean caseSensitive)
allNonStopTokensInOtherAnnot
in interface AnnotationOrthography
public String stripPersonTitle(String annotString, Annotation annot, Document doc, Map<Integer,List<Annotation>> tokensMap, Map<Integer,List<Annotation>> normalizedTokensMap, AnnotationSet nameAllAnnots) throws ExecutionException
stripPersonTitle
in interface AnnotationOrthography
ExecutionException
public boolean matchedAlready(Annotation annot1, Annotation annot2, List<List<Integer>> matchesDocFeature, AnnotationSet nameAllAnnots)
matchedAlready
in interface AnnotationOrthography
public Annotation updateMatches(Annotation newAnnot, String annotString, Map<Integer,String> processedAnnots, AnnotationSet nameAllAnnots, List<List<Integer>> matchesDocFeature)
updateMatches
in interface AnnotationOrthography
public void updateMatches(Annotation newAnnot, Annotation prevAnnot, List<List<Integer>> matchesDocFeature, AnnotationSet nameAllAnnots)
updateMatches
in interface AnnotationOrthography
public Set<String> buildTables(AnnotationSet nameAllAnnots)
buildTables
in interface AnnotationOrthography
public boolean isUnknownGender(String gender)
isUnknownGender
in interface AnnotationOrthography
protected Map<String,Set<String>> initNicknames(String nicknameFileEncoding, URL fileURL) throws IOException
IOException