Package org.apache.tika.parser.microsoft
Class HSLFExtractor
java.lang.Object
org.apache.tika.parser.microsoft.HSLFExtractor
-
Field Summary
FieldsModifier and TypeFieldDescriptionprotected final org.apache.tika.parser.ParseContextprotected final OfficeParserConfigprotected final org.apache.tika.metadata.Metadata -
Constructor Summary
ConstructorsConstructorDescriptionHSLFExtractor(org.apache.tika.parser.ParseContext context, org.apache.tika.metadata.Metadata metadata) -
Method Summary
Modifier and TypeMethodDescriptionprotected org.apache.tika.detect.Detectorprotected StringReturns the password to be used for this file, or null if no / default password should be usedprotected org.apache.tika.config.TikaConfigprotected voidhandleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, String resourceName, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml) Handle an office document that's embedded at the POIFS levelprotected voidhandleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml) Handle an office document that's embedded at the POIFS levelprotected voidhandleEmbeddedResource(org.apache.tika.io.TikaInputStream resource, String filename, String relationshipID, String mediaType, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml) protected voidhandleEmbeddedResource(org.apache.tika.io.TikaInputStream resource, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml) protected voidhandleEmbeddedResource(org.apache.tika.io.TikaInputStream resource, org.apache.tika.metadata.Metadata embeddedMetadata, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml) protected voidparse(org.apache.poi.poifs.filesystem.DirectoryNode root, org.apache.tika.sax.XHTMLContentHandler xhtml) protected voidparse(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem, org.apache.tika.sax.XHTMLContentHandler xhtml) static StringtryToGetMsgTitle(org.apache.poi.poifs.filesystem.DirectoryEntry node, String defaultVal)
-
Field Details
-
parentMetadata
protected final org.apache.tika.metadata.Metadata parentMetadata -
officeParserConfig
-
context
protected final org.apache.tika.parser.ParseContext context
-
-
Constructor Details
-
HSLFExtractor
public HSLFExtractor(org.apache.tika.parser.ParseContext context, org.apache.tika.metadata.Metadata metadata)
-
-
Method Details
-
parse
protected void parse(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem, org.apache.tika.sax.XHTMLContentHandler xhtml) throws IOException, SAXException, org.apache.tika.exception.TikaException - Throws:
IOExceptionSAXExceptionorg.apache.tika.exception.TikaException
-
parse
protected void parse(org.apache.poi.poifs.filesystem.DirectoryNode root, org.apache.tika.sax.XHTMLContentHandler xhtml) throws IOException, SAXException, org.apache.tika.exception.TikaException - Throws:
IOExceptionSAXExceptionorg.apache.tika.exception.TikaException
-
getTikaConfig
protected org.apache.tika.config.TikaConfig getTikaConfig() -
getDetector
protected org.apache.tika.detect.Detector getDetector() -
getPassword
Returns the password to be used for this file, or null if no / default password should be used -
handleEmbeddedResource
protected void handleEmbeddedResource(org.apache.tika.io.TikaInputStream resource, String filename, String relationshipID, String mediaType, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, org.apache.tika.exception.TikaException - Throws:
IOExceptionSAXExceptionorg.apache.tika.exception.TikaException
-
handleEmbeddedResource
protected void handleEmbeddedResource(org.apache.tika.io.TikaInputStream resource, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, org.apache.tika.exception.TikaException - Throws:
IOExceptionSAXExceptionorg.apache.tika.exception.TikaException
-
handleEmbeddedResource
protected void handleEmbeddedResource(org.apache.tika.io.TikaInputStream resource, org.apache.tika.metadata.Metadata embeddedMetadata, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, org.apache.tika.exception.TikaException - Throws:
IOExceptionSAXExceptionorg.apache.tika.exception.TikaException
-
handleEmbeddedOfficeDoc
protected void handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, org.apache.tika.exception.TikaException Handle an office document that's embedded at the POIFS level- Throws:
IOExceptionSAXExceptionorg.apache.tika.exception.TikaException
-
handleEmbeddedOfficeDoc
protected void handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, String resourceName, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, org.apache.tika.exception.TikaException Handle an office document that's embedded at the POIFS level- Throws:
IOExceptionSAXExceptionorg.apache.tika.exception.TikaException
-
tryToGetMsgTitle
-