public class TXTCorpus extends FileSystemCorpus implements TextualCorpus
Modifier and Type | Field and Description |
---|---|
static java.lang.String |
TXT_EXTENSION |
static java.lang.String |
TXT_PATTERN |
Constructor and Description |
---|
TXTCorpus(Lang lang,
java.nio.file.Path rootDirectory) |
Modifier and Type | Method and Description |
---|---|
java.lang.String |
cleanRawText(java.lang.String rawText) |
java.util.stream.Stream<Document> |
documents() |
java.lang.String |
readDocumentText(Document doc) |
getEncoding, getExtension, getLang, getPattern, getRootDirectory, pathWalker, readFileContent, setEncoding, setExtension, setLang, setPattern, setRootDirectory, toString
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
getLang
public static final java.lang.String TXT_PATTERN
public static final java.lang.String TXT_EXTENSION
public TXTCorpus(Lang lang, java.nio.file.Path rootDirectory)
public java.util.stream.Stream<Document> documents()
documents
in interface TextualCorpus
public java.lang.String readDocumentText(Document doc)
readDocumentText
in interface TextualCorpus
public java.lang.String cleanRawText(java.lang.String rawText)