-
- All Implemented Interfaces:
public class Textricator
Textricator. This is the primary starting point.
-
-
Field Summary
Fields Modifier and Type Field Description private final String
DEFAULT_PDF_PARSER
private final String
TEXT_OUTPUT_FORMAT_CSV
private final String
TEXT_OUTPUT_FORMAT_JSON
private final String
RECORD_OUTPUT_FORMAT_CSV
private final String
RECORD_OUTPUT_FORMAT_JSON
private final String
RECORD_OUTPUT_FORMAT_JSON_FLAT
private final String
RECORD_OUTPUT_FORMAT_NULL
public final static Textricator
INSTANCE
-
Method Summary
-
-
Method Detail
-
extractText
final static Unit extractText(InputStream input, String inputFormat, OutputStream output, String outputFormat, Function1<Integer, Boolean> pageFilter, TextExtractorOptions textExtractorOptions, Float maxRowDistance)
-
extractText
final static Unit extractText(TextExtractor extractor, TextOutput output, Float maxRowDistance, Function1<Integer, Boolean> pageFilter)
-
extractText
final static Sequence<Text> extractText(TextExtractor extractor, Float maxRowDistance, Function1<Integer, Boolean> pageFilter)
-
parseForm
final static Unit parseForm(File inputFile, File outputFile, FormParseConfig config)
-
parseForm
final static Unit parseForm(InputStream input, String inputFormat, OutputStream output, String outputFormat, FormParseConfig config, FormParseEventListener eventListener)
-
parseForm
final static Unit parseForm(TextExtractor extractor, RecordOutput recordOutput, FormParseConfig config, FormParseEventListener eventListener)
-
parseForm
final static Sequence<Record> parseForm(TextExtractor extractor, FormParseConfig config, FormParseEventListener eventListener)
-
parseTable
final static Unit parseTable(File inputFile, File outputFile, File configFile)
-
parseTable
final static Unit parseTable(InputStream input, String inputFormat, OutputStream output, String outputFormat, TableParseConfig config)
-
parseTable
final static Unit parseTable(TextExtractor extractor, RecordOutput output, TableParseConfig config)
-
parseTable
final static Sequence<Record> parseTable(TextExtractor extractor, TableParseConfig config)
-
getExtractor
final static TextExtractor getExtractor(InputStream input, String inputFormat, TextExtractorOptions options)
-
extract
final static Sequence<Text> extract(TextExtractor extractor, Function1<Integer, Boolean> pageFilter)
-
extractPages
final static Sequence<Page> extractPages(TextExtractor extractor, Function1<Integer, Boolean> pageFilter)
-
getDEFAULT_PDF_PARSER
final String getDEFAULT_PDF_PARSER()
-
getTEXT_OUTPUT_FORMAT_CSV
final String getTEXT_OUTPUT_FORMAT_CSV()
-
getTEXT_OUTPUT_FORMAT_JSON
final String getTEXT_OUTPUT_FORMAT_JSON()
-
getRECORD_OUTPUT_FORMAT_CSV
final String getRECORD_OUTPUT_FORMAT_CSV()
-
getRECORD_OUTPUT_FORMAT_JSON
final String getRECORD_OUTPUT_FORMAT_JSON()
-
getRECORD_OUTPUT_FORMAT_JSON_FLAT
final String getRECORD_OUTPUT_FORMAT_JSON_FLAT()
-
getRECORD_OUTPUT_FORMAT_NULL
final String getRECORD_OUTPUT_FORMAT_NULL()
-
-
-
-