|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object gate.util.AbstractFeatureBearer gate.creole.AbstractResource gate.creole.AbstractProcessingResource gate.creole.AbstractLanguageAnalyser gate.creole.splitter.RegexSentenceSplitter
public class RegexSentenceSplitter
A fast sentence splitter replacement based on regular expressions.
Nested Class Summary |
---|
Nested classes/interfaces inherited from class gate.creole.AbstractProcessingResource |
---|
AbstractProcessingResource.InternalStatusListener, AbstractProcessingResource.IntervalProgressListener |
Field Summary | |
---|---|
protected String |
encoding
Encoding used when reading config files |
protected URL |
externalSplitListURL
URL pointing to a file with regex patterns for external sentence splits. |
protected Pattern |
externalSplitsPattern
|
protected URL |
internalSplitListURL
URL pointing to a file with regex patterns for internal sentence splits. |
protected Pattern |
internalSplitsPattern
|
protected URL |
nonSplitListURL
URL pointing to a file with regex patterns for non sentence splits. |
protected Pattern |
nonSplitsPattern
|
protected String |
outputASName
Output annotation set name. |
static String |
SPLIT_DOCUMENT_PARAMETER_NAME
Parameter name |
static String |
SPLIT_ENCODING_PARAMETER_NAME
Parameter name |
static String |
SPLIT_INPUT_AS_PARAMETER_NAME
Parameter name |
static String |
SPLIT_NON_SPLIT_LIST_PARAMETER_NAME
Parameter name |
static String |
SPLIT_OUTPUT_AS_PARAMETER_NAME
Parameter name |
static String |
SPLIT_SPLIT_LIST_PARAMETER_NAME
Parameter name |
Fields inherited from class gate.creole.AbstractLanguageAnalyser |
---|
corpus, document |
Fields inherited from class gate.creole.AbstractProcessingResource |
---|
interrupted |
Fields inherited from class gate.creole.AbstractResource |
---|
name |
Fields inherited from class gate.util.AbstractFeatureBearer |
---|
features |
Constructor Summary | |
---|---|
RegexSentenceSplitter()
|
Method Summary | |
---|---|
protected Pattern |
compilePattern(URL paternsListUrl,
String encoding)
|
void |
execute()
Run the resource. |
String |
getEncoding()
|
URL |
getExternalSplitListURL()
|
URL |
getInternalSplitListURL()
|
Pattern |
getInternalSplitsPattern()
|
URL |
getNonSplitListURL()
|
String |
getOutputASName()
|
Resource |
init()
Initialise this resource, and return it. |
void |
setEncoding(String encoding)
|
void |
setExternalSplitListURL(URL externalSplitListURL)
|
void |
setInternalSplitListURL(URL internalSplitListURL)
|
void |
setInternalSplitsPattern(Pattern internalSplitsPattern)
|
void |
setNonSplitListURL(URL nonSplitListURL)
|
void |
setOutputASName(String outputASName)
|
Methods inherited from class gate.creole.AbstractLanguageAnalyser |
---|
getCorpus, getDocument, setCorpus, setDocument |
Methods inherited from class gate.creole.AbstractProcessingResource |
---|
addProgressListener, addStatusListener, cleanup, fireProcessFinished, fireProgressChanged, fireStatusChanged, getRuntimeParameterValues, getRuntimeParameterValues, interrupt, isInterrupted, reInit, removeProgressListener, removeStatusListener |
Methods inherited from class gate.creole.AbstractResource |
---|
checkParameterValues, getBeanInfo, getInitParameterValues, getInitParameterValues, getName, getParameterValue, getParameterValue, getParameterValues, removeResourceListeners, setName, setParameterValue, setParameterValue, setParameterValues, setParameterValues, setResourceListeners |
Methods inherited from class gate.util.AbstractFeatureBearer |
---|
getFeatures, setFeatures |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Methods inherited from interface gate.ProcessingResource |
---|
reInit |
Methods inherited from interface gate.Resource |
---|
cleanup, getParameterValue, setParameterValue, setParameterValues |
Methods inherited from interface gate.util.FeatureBearer |
---|
getFeatures, setFeatures |
Methods inherited from interface gate.util.NameBearer |
---|
getName, setName |
Methods inherited from interface gate.Executable |
---|
interrupt, isInterrupted |
Field Detail |
---|
public static final String SPLIT_DOCUMENT_PARAMETER_NAME
public static final String SPLIT_INPUT_AS_PARAMETER_NAME
public static final String SPLIT_OUTPUT_AS_PARAMETER_NAME
public static final String SPLIT_ENCODING_PARAMETER_NAME
public static final String SPLIT_SPLIT_LIST_PARAMETER_NAME
public static final String SPLIT_NON_SPLIT_LIST_PARAMETER_NAME
protected String outputASName
protected String encoding
protected URL internalSplitListURL
protected URL externalSplitListURL
protected URL nonSplitListURL
protected Pattern internalSplitsPattern
protected Pattern externalSplitsPattern
protected Pattern nonSplitsPattern
Constructor Detail |
---|
public RegexSentenceSplitter()
Method Detail |
---|
protected Pattern compilePattern(URL paternsListUrl, String encoding) throws UnsupportedEncodingException, IOException
UnsupportedEncodingException
IOException
public void execute() throws ExecutionException
AbstractProcessingResource
execute
in interface Executable
execute
in class AbstractProcessingResource
ExecutionException
public Resource init() throws ResourceInstantiationException
AbstractProcessingResource
init
in interface Resource
init
in class AbstractProcessingResource
ResourceInstantiationException
public String getOutputASName()
public void setOutputASName(String outputASName)
outputASName
- the outputASName to setpublic String getEncoding()
public void setEncoding(String encoding)
encoding
- the encoding to setpublic URL getInternalSplitListURL()
public void setInternalSplitListURL(URL internalSplitListURL)
internalSplitListURL
- the internalSplitListURL to setpublic URL getExternalSplitListURL()
public void setExternalSplitListURL(URL externalSplitListURL)
externalSplitListURL
- the externalSplitListURL to setpublic URL getNonSplitListURL()
public void setNonSplitListURL(URL nonSplitListURL)
nonSplitListURL
- the nonSplitListURL to setpublic Pattern getInternalSplitsPattern()
public void setInternalSplitsPattern(Pattern internalSplitsPattern)
internalSplitsPattern
- the internalSplitsPattern to set
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |