Serialized Form
-
Package org.apache.tika.config
-
Package org.apache.tika.detect
-
Class org.apache.tika.detect.CompositeDetector extends Object implements Serializable
- serialVersionUID:
- 5980683158436430252L
-
Serialized Fields
-
detectors
List<Detector> detectors
-
registry
MediaTypeRegistry registry
-
-
Class org.apache.tika.detect.CompositeEncodingDetector extends Object implements Serializable
- serialVersionUID:
- 5980683158436430252L
-
Serialized Fields
-
detectors
List<EncodingDetector> detectors
-
-
Class org.apache.tika.detect.DefaultDetector extends CompositeDetector implements Serializable
- serialVersionUID:
- -8170114575326908027L
-
Class org.apache.tika.detect.DefaultEncodingDetector extends CompositeEncodingDetector implements Serializable
-
Class org.apache.tika.detect.DefaultProbDetector extends CompositeDetector implements Serializable
- serialVersionUID:
- -8836240060532323352L
-
Class org.apache.tika.detect.EmptyDetector extends Object implements Serializable
-
Class org.apache.tika.detect.MagicDetector extends Object implements Serializable
-
Serialized Fields
-
isRegex
boolean isRegex
True if pattern is a regular expression, false otherwise. -
isStringIgnoreCase
boolean isStringIgnoreCase
True if we're doing a case-insensitive string match, false otherwise. -
length
int length
Length of the comparison window. -
mask
byte[] mask
Bit mask that is applied to the source bytes before pattern matching. -
offsetRangeBegin
int offsetRangeBegin
First offset (inclusive) of the comparison window within the document input stream. Greater than or equal to zero. -
offsetRangeEnd
int offsetRangeEnd
Last offset (inclusive) of the comparison window within the document input stream. Greater than or equal to thefirst offset
.Note that this is not the offset of the last byte read from the document stream. Instead, the last window of bytes to be compared starts at this offset.
-
pattern
byte[] pattern
The magic match pattern. If this byte pattern is equal to the possibly bit-masked bytes from the input stream, then the type detection succeeds and the configuredMagicDetector.type
is returned. -
patternLength
int patternLength
Length of the pattern, which in the case of regular expressions will not be the same as the comparison window length. -
type
MediaType type
The matching media type. Returned by theMagicDetector.detect(InputStream, Metadata)
method if a match is found.
-
-
-
Class org.apache.tika.detect.NameDetector extends Object implements Serializable
-
Class org.apache.tika.detect.NNExampleModelDetector extends TrainedModelDetector implements Serializable
- serialVersionUID:
- 1L
-
Class org.apache.tika.detect.NonDetectingEncodingDetector extends Object implements Serializable
-
Serialized Fields
-
charset
Charset charset
-
-
-
Class org.apache.tika.detect.OverrideDetector extends Object implements Serializable
-
Class org.apache.tika.detect.TextDetector extends Object implements Serializable
- serialVersionUID:
- 4774601079503507765L
-
Serialized Fields
-
bytesToTest
int bytesToTest
-
-
Class org.apache.tika.detect.TrainedModelDetector extends Object implements Serializable
- serialVersionUID:
- 1L
-
Serialized Fields
-
MODEL_MAP
Map<MediaType,TrainedModel> MODEL_MAP
-
-
Class org.apache.tika.detect.TypeDetector extends Object implements Serializable
-
Class org.apache.tika.detect.ZeroSizeFileDetector extends Object implements Serializable
-
-
Package org.apache.tika.embedder
-
Class org.apache.tika.embedder.ExternalEmbedder extends Object implements Serializable
- serialVersionUID:
- -2828829275642475697L
-
Serialized Fields
-
command
String[] command
The external command to invoke.- See Also:
Runtime.exec(String[])
-
commandAppendOperator
String commandAppendOperator
-
commandAssignmentDelimeter
String commandAssignmentDelimeter
-
commandAssignmentOperator
String commandAssignmentOperator
-
metadataCommandArguments
Map<Property,String[]> metadataCommandArguments
Mapping of Tika metadata to command line parameters. -
quoteAssignmentValues
boolean quoteAssignmentValues
-
supportedEmbedTypes
Set<MediaType> supportedEmbedTypes
Media types supported by the external program. -
tmp
TemporaryResources tmp
-
-
-
Package org.apache.tika.exception
-
Class org.apache.tika.exception.AccessPermissionException extends TikaException implements Serializable
-
Class org.apache.tika.exception.CorruptedFileException extends TikaException implements Serializable
-
Class org.apache.tika.exception.EncryptedDocumentException extends TikaException implements Serializable
-
Class org.apache.tika.exception.TikaConfigException extends TikaException implements Serializable
-
Class org.apache.tika.exception.TikaException extends Exception implements Serializable
-
Class org.apache.tika.exception.TikaMemoryLimitException extends TikaException implements Serializable
-
Class org.apache.tika.exception.UnsupportedFormatException extends TikaException implements Serializable
-
Class org.apache.tika.exception.ZeroByteFileException extends TikaException implements Serializable
-
-
Package org.apache.tika.extractor
-
Class org.apache.tika.extractor.EmbeddedDocumentUtil extends Object implements Serializable
-
Serialized Fields
-
context
ParseContext context
-
detector
Detector detector
-
embeddedDocumentExtractor
EmbeddedDocumentExtractor embeddedDocumentExtractor
-
mimeTypes
MimeTypes mimeTypes
-
tikaConfig
TikaConfig tikaConfig
-
-
-
Class org.apache.tika.extractor.ParserContainerExtractor extends Object implements Serializable
- serialVersionUID:
- 2261131045580861514L
-
-
Package org.apache.tika.fork
-
Class org.apache.tika.fork.ForkParser extends AbstractParser implements Serializable
- serialVersionUID:
- -4962742892274663950L
-
Serialized Fields
-
currentlyInUse
int currentlyInUse
-
java
List<String> java
Java command line -
loader
ClassLoader loader
-
maxFilesProcessedPerClient
int maxFilesProcessedPerClient
-
parser
Parser parser
-
parserFactoryFactory
ParserFactoryFactory parserFactoryFactory
-
pool
Queue<org.apache.tika.fork.ForkClient> pool
-
poolSize
int poolSize
Process pool size -
serverParseTimeoutMillis
long serverParseTimeoutMillis
-
serverPulseMillis
long serverPulseMillis
-
serverWaitTimeoutMillis
long serverWaitTimeoutMillis
-
tikaBin
Path tikaBin
-
-
Class org.apache.tika.fork.ParserFactoryFactory extends Object implements Serializable
- serialVersionUID:
- 4710974869988895410L
-
-
Package org.apache.tika.io
-
Class org.apache.tika.io.EndianUtils.BufferUnderrunException extends TikaException implements Serializable
- serialVersionUID:
- 8358288231138076276L
-
Class org.apache.tika.io.IOExceptionWithCause extends IOException implements Serializable
- serialVersionUID:
- 1L
-
Class org.apache.tika.io.TaggedIOException extends IOExceptionWithCause implements Serializable
-
Serialized Fields
-
tag
Object tag
The object reference used to tag the exception.
-
-
-
-
Package org.apache.tika.metadata
-
Class org.apache.tika.metadata.Metadata extends Object implements Serializable
- serialVersionUID:
- 5623926545693153182L
-
Class org.apache.tika.metadata.PropertyTypeException extends IllegalArgumentException implements Serializable
-
-
Package org.apache.tika.mime
-
Class org.apache.tika.mime.MediaType extends Object implements Serializable
- serialVersionUID:
- -3831000556189036392L
-
Serialized Fields
-
parameters
Map<String,String> parameters
Immutable sorted map of media type parameters. -
semicolon
int semicolon
Location of the first ";" character separating the type part ofMediaType.string
from possible parameters. Length ofMediaType.string
in case there are no parameters. -
slash
int slash
Location of the "/" character separating the type and the subtype tokens inMediaType.string
. -
string
String string
Canonical string representation of this media type.
-
-
Class org.apache.tika.mime.MediaTypeRegistry extends Object implements Serializable
- serialVersionUID:
- 4710974869988895410L
-
Serialized Fields
-
inheritance
Map<MediaType,MediaType> inheritance
Known type inheritance relationships. The mapping is from a media type to the closest supertype. -
registry
Map<MediaType,MediaType> registry
Registry of known media types, including type aliases. A canonical media type is handled as an identity mapping, while an alias is stored as a mapping from the alias to the corresponding canonical type.
-
-
Class org.apache.tika.mime.MimeType extends Object implements Serializable
- serialVersionUID:
- 4357830439860729201L
-
Serialized Fields
-
acronym
String acronym
The MimeType acronym -
description
String description
Description of this media type. -
extensions
List<String> extensions
All known file extensions of this type, in order of preference (best first). -
isInterpreted
boolean isInterpreted
Whether this mime-type is used for server-side scripts, and thus cannot reliably be used for filename-based type detection -
links
List<URI> links
Documentation Links -
magics
List<org.apache.tika.mime.Magic> magics
The magics associated to this Mime-Type -
minLength
int minLength
The minimum length of data to provides for magic analyzis -
rootXML
List<org.apache.tika.mime.MimeType.RootXML> rootXML
The root-XML associated to this Mime-Type -
type
MediaType type
The normalized media type name. -
uti
String uti
The http://en.wikipedia.org/wiki/Uniform_Type_Identifier
-
-
Class org.apache.tika.mime.MimeTypeException extends TikaException implements Serializable
-
Class org.apache.tika.mime.MimeTypes extends Object implements Serializable
- serialVersionUID:
- -1350863170146349036L
-
Serialized Fields
-
htmlMimeType
MimeType htmlMimeType
html type, text/html -
magics
List<org.apache.tika.mime.Magic> magics
Sorted list of all registered magics -
patterns
org.apache.tika.mime.Patterns patterns
The patterns matcher -
registry
MediaTypeRegistry registry
Registered media types and their aliases. -
rootMimeType
MimeType rootMimeType
Root type, application/octet-stream. -
rootMimeTypeL
List<MimeType> rootMimeTypeL
-
textMimeType
MimeType textMimeType
Text type, text/plain. -
types
Map<MediaType,MimeType> types
All the registered MimeTypes indexed on their canonical names -
xmlMimeType
MimeType xmlMimeType
xml type, application/xml -
xmls
List<MimeType> xmls
Sorted list of all registered rootXML
-
-
Class org.apache.tika.mime.ProbabilisticMimeDetectionSelector extends Object implements Serializable
- serialVersionUID:
- 224589862960269260L
-
Serialized Fields
-
changeRate
float changeRate
-
extension_neg
float extension_neg
-
extension_trust
float extension_trust
-
magic_neg
float magic_neg
-
magic_trust
float magic_trust
-
meta_neg
float meta_neg
-
meta_trust
float meta_trust
-
mimeTypes
MimeTypes mimeTypes
-
priorExtensionFileType
float priorExtensionFileType
-
priorMagicFileType
float priorMagicFileType
-
priorMetaFileType
float priorMetaFileType
-
rootMediaType
MediaType rootMediaType
-
threshold
float threshold
-
-
-
Package org.apache.tika.parser
-
Class org.apache.tika.parser.AbstractEncodingDetectorParser extends AbstractParser implements Serializable
-
Serialized Fields
-
encodingDetector
EncodingDetector encodingDetector
-
-
-
Class org.apache.tika.parser.AbstractParser extends Object implements Serializable
- serialVersionUID:
- 7186985395903074255L
-
Class org.apache.tika.parser.AutoDetectParser extends CompositeParser implements Serializable
- serialVersionUID:
- 6110455808615143122L
-
Serialized Fields
-
detector
Detector detector
The type detector used by this parser to auto-detect the type of a document.
-
-
Class org.apache.tika.parser.CompositeParser extends AbstractParser implements Serializable
- serialVersionUID:
- 2192845797749627824L
-
Serialized Fields
-
fallback
Parser fallback
The fallback parser, used when no better parser is available. -
parsers
List<Parser> parsers
List of component parsers. -
registry
MediaTypeRegistry registry
Media type registry.
-
-
Class org.apache.tika.parser.CryptoParser extends DelegatingParser implements Serializable
- serialVersionUID:
- -3507995752666557731L
-
Class org.apache.tika.parser.DefaultParser extends CompositeParser implements Serializable
- serialVersionUID:
- 3612324825403757520L
-
Class org.apache.tika.parser.DelegatingParser extends AbstractParser implements Serializable
-
Class org.apache.tika.parser.DigestingParser extends ParserDecorator implements Serializable
-
Serialized Fields
-
digester
DigestingParser.Digester digester
-
-
-
Class org.apache.tika.parser.EmptyParser extends AbstractParser implements Serializable
- serialVersionUID:
- -4218649699095732123L
-
Class org.apache.tika.parser.ErrorParser extends AbstractParser implements Serializable
- serialVersionUID:
- 7727423956957641824L
-
Class org.apache.tika.parser.NetworkParser extends AbstractParser implements Serializable
-
Class org.apache.tika.parser.ParseContext extends Object implements Serializable
- serialVersionUID:
- -5921436862145826534L
-
Class org.apache.tika.parser.ParserDecorator extends AbstractParser implements Serializable
- serialVersionUID:
- -3861669115439125268L
-
Serialized Fields
-
parser
Parser parser
The decorated parser instance.
-
-
Class org.apache.tika.parser.ParserPostProcessor extends ParserDecorator implements Serializable
-
Class org.apache.tika.parser.RecursiveParserWrapper extends ParserDecorator implements Serializable
- serialVersionUID:
- 9086536568120690938L
-
Serialized Fields
-
catchEmbeddedExceptions
boolean catchEmbeddedExceptions
-
contentHandlerFactory
ContentHandlerFactory contentHandlerFactory
Deprecated.this should be passed in via theRecursiveParserWrapperHandler
-
lastParseState
org.apache.tika.parser.RecursiveParserWrapper.ParserState lastParseState
Deprecated.this is here only for legacy behavior; it will be removed in 2.0 and/or 1.20 -
maxEmbeddedResources
int maxEmbeddedResources
Deprecated.this is here only for legacy behavior; it will be removed in 2.0 and/or 1.20set this on the RecursiveParserWrapperHandler instead
-
-
-
Package org.apache.tika.parser.external
-
Class org.apache.tika.parser.external.CompositeExternalParser extends CompositeParser implements Serializable
- serialVersionUID:
- 6962436916649024024L
-
Class org.apache.tika.parser.external.ExternalParser extends AbstractParser implements Serializable
- serialVersionUID:
- -1079128990650687037L
-
Serialized Fields
-
command
String[] command
The external command to invoke.- See Also:
Runtime.exec(String[])
-
ignoredLineConsumer
ExternalParser.LineConsumer ignoredLineConsumer
A consumer for ignored Lines -
metadataPatterns
Map<Pattern,String> metadataPatterns
Regular Expressions to run over STDOUT to extract Metadata. -
supportedTypes
Set<MediaType> supportedTypes
Media types supported by the external program.
-
-
-
Package org.apache.tika.sax
-
Class org.apache.tika.sax.AbstractRecursiveParserWrapperHandler extends DefaultHandler implements Serializable
-
Serialized Fields
-
contentHandlerFactory
ContentHandlerFactory contentHandlerFactory
-
embeddedDepth
int embeddedDepth
-
embeddedResources
int embeddedResources
-
maxEmbeddedResources
int maxEmbeddedResources
-
-
-
Class org.apache.tika.sax.BasicContentHandlerFactory extends Object implements Serializable
-
Serialized Fields
-
type
BasicContentHandlerFactory.HANDLER_TYPE type
-
writeLimit
int writeLimit
-
-
-
Class org.apache.tika.sax.RecursiveParserWrapperHandler extends AbstractRecursiveParserWrapperHandler implements Serializable
-
Class org.apache.tika.sax.TaggedSAXException extends SAXException implements Serializable
-
Serialized Fields
-
tag
Object tag
The object reference used to tag the exception.
-
-
-
-
Package org.apache.tika.utils
-
Class org.apache.tika.utils.XMLReaderUtils extends Object implements Serializable
- serialVersionUID:
- 6110455808615143122L
-