gate.util
Class CorpusBenchmarkTool

java.lang.Object
  extended by gate.util.CorpusBenchmarkTool

public class CorpusBenchmarkTool
extends Object


Constructor Summary
CorpusBenchmarkTool()
           
 
Method Summary
protected  String avgPrint(double value, int count)
           
protected  void calculateAvgTotal()
           
protected  int countWords(Document annotDoc)
          Count all Token.kind=word annotations in the document
protected  void evaluateAllThree(Document persDoc, Document cleanDoc, Document markedDoc, File errDir)
           
protected  void evaluateCorpus(File fileDir, File processedDir, File markedDir, File errorDir)
           
protected  void evaluateDocuments(Document persDoc, Document cleanDoc, Document markedDoc, File errDir)
           
protected  void evaluateMarkedClean(File markedDir, File cleanDir, File errDir)
           
protected  void evaluateMarkedStored(File markedDir, File storedDir, File errDir)
           
protected  void evaluateTwoDocs(Document keyDoc, Document respDoc, File errDir)
           
 void execute()
           
 void execute(File dir)
           
protected  void generateCorpus(File fileDir, File outputDir)
           
 Set getDiffFeaturesList()
           
 double getFMeasureAverage()
           
 double getFmeasureAverageCalc()
           
 double getFMeasureAverageProc()
           
 boolean getGenerateMode()
           
 boolean getMarkedClean()
           
 boolean getMarkedDS()
           
 boolean getMarkedStored()
           
 boolean getMoreInfo()
           
 double getPrecisionAverage()
          Returns the average precision over the entire set of processed documents.
 double getPrecisionAverageCalc()
           
 double getPrecisionAverageProc()
          For processed documents
 double getRecallAverage()
          Returns the average recall over the entire set of processed documents.
 double getRecallAverageCalc()
           
 double getRecallAverageProc()
           
 File getStartDirectory()
           
 double getThreshold()
           
 boolean getVerboseMode()
           
 void init()
           
 void initPRs()
           
 boolean isGenerateMode()
           
static void main(String[] args)
           
protected  AnnotationDiffer measureDocs(Document keyDoc, Document respDoc, String annotType)
           
protected  void printAnnotations(AnnotationDiffer annotDiff, Document keyDoc, Document respDoc)
           
protected  void printAnnotations(Set set, Document doc)
           
 void printStatistics()
           
protected  void printStatsForType(String annotType)
           
protected  void printTableHeader()
           
protected  void processDocument(Document doc)
           
 void setApplicationFile(File newAppFile)
           
 void setDiffFeaturesList(Set features)
           
 void setGenerateMode(boolean mode)
           
 void setMarkedClean(boolean mode)
           
 void setMarkedDS(boolean mode)
           
 void setMarkedStored(boolean mode)
           
 void setMoreInfo(boolean mode)
           
 void setStartDirectory(File dir)
           
 void setThreshold(double newValue)
           
 void setVerboseMode(boolean mode)
           
protected  void storeAnnotations(String type, AnnotationDiffer annotDiffer, Document keyDoc, Document respDoc, Writer errFileWriter)
           
protected  void storeAnnotations(String type, Set set, Document doc, Writer file)
           
 void unloadPRs()
           
protected  void updateStatistics(AnnotationDiffer annotDiffer, String annotType)
           
protected  void updateStatisticsProc(AnnotationDiffer annotDiffer, String annotType)
          Update statistics for processed documents The same procedure as updateStatistics with different hashTables
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

CorpusBenchmarkTool

public CorpusBenchmarkTool()
Method Detail

initPRs

public void initPRs()

unloadPRs

public void unloadPRs()

execute

public void execute()

init

public void init()

execute

public void execute(File dir)

main

public static void main(String[] args)
                 throws GateException
Throws:
GateException

setGenerateMode

public void setGenerateMode(boolean mode)

getGenerateMode

public boolean getGenerateMode()

getVerboseMode

public boolean getVerboseMode()

setVerboseMode

public void setVerboseMode(boolean mode)

setMoreInfo

public void setMoreInfo(boolean mode)

getMoreInfo

public boolean getMoreInfo()

setDiffFeaturesList

public void setDiffFeaturesList(Set features)

getDiffFeaturesList

public Set getDiffFeaturesList()

setMarkedStored

public void setMarkedStored(boolean mode)

getMarkedStored

public boolean getMarkedStored()

setMarkedClean

public void setMarkedClean(boolean mode)

getMarkedClean

public boolean getMarkedClean()

setMarkedDS

public void setMarkedDS(boolean mode)

getMarkedDS

public boolean getMarkedDS()

setApplicationFile

public void setApplicationFile(File newAppFile)

getPrecisionAverage

public double getPrecisionAverage()
Returns the average precision over the entire set of processed documents.

If the tool has been evaluating the original documents against the previously-stored automatically annotated ones, then the precision will be the average precision on those two sets.

If the tool was run in -marked mode, i.e., was evaluating the stored automatically processed ones against the human-annotated ones, then the precision will be the average precision on those two sets of documents.


getRecallAverage

public double getRecallAverage()
Returns the average recall over the entire set of processed documents.

If the tool has been evaluating the original documents against the previously-stored automatically annotated ones, then the recall will be the average recall on those two sets.

If the tool was run in -marked mode, i.e., was evaluating the stored automatically processed ones against the human-annotated ones, then the recall will be the average recall on those two sets of documents.


getFMeasureAverage

public double getFMeasureAverage()

getPrecisionAverageProc

public double getPrecisionAverageProc()
For processed documents


getRecallAverageProc

public double getRecallAverageProc()

getFMeasureAverageProc

public double getFMeasureAverageProc()

isGenerateMode

public boolean isGenerateMode()

getThreshold

public double getThreshold()

setThreshold

public void setThreshold(double newValue)

getStartDirectory

public File getStartDirectory()

setStartDirectory

public void setStartDirectory(File dir)

generateCorpus

protected void generateCorpus(File fileDir,
                              File outputDir)

evaluateCorpus

protected void evaluateCorpus(File fileDir,
                              File processedDir,
                              File markedDir,
                              File errorDir)

evaluateMarkedStored

protected void evaluateMarkedStored(File markedDir,
                                    File storedDir,
                                    File errDir)

evaluateMarkedClean

protected void evaluateMarkedClean(File markedDir,
                                   File cleanDir,
                                   File errDir)

processDocument

protected void processDocument(Document doc)

evaluateDocuments

protected void evaluateDocuments(Document persDoc,
                                 Document cleanDoc,
                                 Document markedDoc,
                                 File errDir)
                          throws ResourceInstantiationException
Throws:
ResourceInstantiationException

countWords

protected int countWords(Document annotDoc)
Count all Token.kind=word annotations in the document


evaluateAllThree

protected void evaluateAllThree(Document persDoc,
                                Document cleanDoc,
                                Document markedDoc,
                                File errDir)
                         throws ResourceInstantiationException
Throws:
ResourceInstantiationException

evaluateTwoDocs

protected void evaluateTwoDocs(Document keyDoc,
                               Document respDoc,
                               File errDir)
                        throws ResourceInstantiationException
Throws:
ResourceInstantiationException

printTableHeader

protected void printTableHeader()

updateStatistics

protected void updateStatistics(AnnotationDiffer annotDiffer,
                                String annotType)

updateStatisticsProc

protected void updateStatisticsProc(AnnotationDiffer annotDiffer,
                                    String annotType)
Update statistics for processed documents The same procedure as updateStatistics with different hashTables


printStatistics

public void printStatistics()

printStatsForType

protected void printStatsForType(String annotType)

avgPrint

protected String avgPrint(double value,
                          int count)

getPrecisionAverageCalc

public double getPrecisionAverageCalc()

getRecallAverageCalc

public double getRecallAverageCalc()

getFmeasureAverageCalc

public double getFmeasureAverageCalc()

calculateAvgTotal

protected void calculateAvgTotal()

measureDocs

protected AnnotationDiffer measureDocs(Document keyDoc,
                                       Document respDoc,
                                       String annotType)
                                throws ResourceInstantiationException
Throws:
ResourceInstantiationException

storeAnnotations

protected void storeAnnotations(String type,
                                AnnotationDiffer annotDiffer,
                                Document keyDoc,
                                Document respDoc,
                                Writer errFileWriter)

storeAnnotations

protected void storeAnnotations(String type,
                                Set set,
                                Document doc,
                                Writer file)
                         throws IOException
Throws:
IOException

printAnnotations

protected void printAnnotations(AnnotationDiffer annotDiff,
                                Document keyDoc,
                                Document respDoc)

printAnnotations

protected void printAnnotations(Set set,
                                Document doc)