Package org.apache.tika.parser.microsoft
Class HSLFExtractor
- java.lang.Object
-
- org.apache.tika.parser.microsoft.HSLFExtractor
-
public class HSLFExtractor extends Object
-
-
Field Summary
Fields Modifier and Type Field Description protected org.apache.tika.parser.ParseContextcontextprotected OfficeParserConfigofficeParserConfigprotected org.apache.tika.metadata.MetadataparentMetadata
-
Constructor Summary
Constructors Constructor Description HSLFExtractor(org.apache.tika.parser.ParseContext context, org.apache.tika.metadata.Metadata metadata)
-
Method Summary
All Methods Static Methods Instance Methods Concrete Methods Modifier and Type Method Description protected org.apache.tika.detect.DetectorgetDetector()protected StringgetPassword()Returns the password to be used for this file, or null if no / default password should be usedprotected org.apache.tika.config.TikaConfiggetTikaConfig()protected voidhandleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, String resourceName, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml)Handle an office document that's embedded at the POIFS levelprotected voidhandleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, org.apache.tika.metadata.Metadata metadata, String resourceName, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml)Handle an office document that's embedded at the POIFS levelprotected voidhandleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml)Handle an office document that's embedded at the POIFS levelprotected voidhandleEmbeddedResource(org.apache.tika.io.TikaInputStream resource, String filename, String relationshipID, String mediaType, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml)protected voidhandleEmbeddedResource(org.apache.tika.io.TikaInputStream resource, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml)protected voidhandleEmbeddedResource(org.apache.tika.io.TikaInputStream resource, org.apache.tika.metadata.Metadata embeddedMetadata, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml)protected voidparse(org.apache.poi.poifs.filesystem.DirectoryNode root, org.apache.tika.sax.XHTMLContentHandler xhtml)protected voidparse(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem, org.apache.tika.sax.XHTMLContentHandler xhtml)static StringtryToGetMsgTitle(org.apache.poi.poifs.filesystem.DirectoryEntry node, String defaultVal)
-
-
-
Field Detail
-
parentMetadata
protected final org.apache.tika.metadata.Metadata parentMetadata
-
officeParserConfig
protected final OfficeParserConfig officeParserConfig
-
context
protected final org.apache.tika.parser.ParseContext context
-
-
Method Detail
-
parse
protected void parse(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem, org.apache.tika.sax.XHTMLContentHandler xhtml) throws IOException, SAXException, org.apache.tika.exception.TikaException- Throws:
IOExceptionSAXExceptionorg.apache.tika.exception.TikaException
-
parse
protected void parse(org.apache.poi.poifs.filesystem.DirectoryNode root, org.apache.tika.sax.XHTMLContentHandler xhtml) throws IOException, SAXException, org.apache.tika.exception.TikaException- Throws:
IOExceptionSAXExceptionorg.apache.tika.exception.TikaException
-
getTikaConfig
protected org.apache.tika.config.TikaConfig getTikaConfig()
-
getDetector
protected org.apache.tika.detect.Detector getDetector()
-
getPassword
protected String getPassword()
Returns the password to be used for this file, or null if no / default password should be used
-
handleEmbeddedResource
protected void handleEmbeddedResource(org.apache.tika.io.TikaInputStream resource, String filename, String relationshipID, String mediaType, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, org.apache.tika.exception.TikaException- Throws:
IOExceptionSAXExceptionorg.apache.tika.exception.TikaException
-
handleEmbeddedResource
protected void handleEmbeddedResource(org.apache.tika.io.TikaInputStream resource, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, org.apache.tika.exception.TikaException- Throws:
IOExceptionSAXExceptionorg.apache.tika.exception.TikaException
-
handleEmbeddedResource
protected void handleEmbeddedResource(org.apache.tika.io.TikaInputStream resource, org.apache.tika.metadata.Metadata embeddedMetadata, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, org.apache.tika.exception.TikaException- Throws:
IOExceptionSAXExceptionorg.apache.tika.exception.TikaException
-
handleEmbeddedOfficeDoc
protected void handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, org.apache.tika.exception.TikaExceptionHandle an office document that's embedded at the POIFS level- Throws:
IOExceptionSAXExceptionorg.apache.tika.exception.TikaException
-
handleEmbeddedOfficeDoc
protected void handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, String resourceName, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, org.apache.tika.exception.TikaExceptionHandle an office document that's embedded at the POIFS level- Throws:
IOExceptionSAXExceptionorg.apache.tika.exception.TikaException
-
handleEmbeddedOfficeDoc
protected void handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, org.apache.tika.metadata.Metadata metadata, String resourceName, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, org.apache.tika.exception.TikaExceptionHandle an office document that's embedded at the POIFS level- Throws:
IOExceptionSAXExceptionorg.apache.tika.exception.TikaException
-
-