Class HSLFExtractor


  • public class HSLFExtractor
    extends Object
    • Constructor Summary

      Constructors 
      Constructor Description
      HSLFExtractor​(org.apache.tika.parser.ParseContext context, org.apache.tika.metadata.Metadata metadata)  
    • Method Summary

      All Methods Static Methods Instance Methods Concrete Methods 
      Modifier and Type Method Description
      protected org.apache.tika.detect.Detector getDetector()  
      protected String getPassword()
      Returns the password to be used for this file, or null if no / default password should be used
      protected org.apache.tika.config.TikaConfig getTikaConfig()  
      protected void handleEmbeddedOfficeDoc​(org.apache.poi.poifs.filesystem.DirectoryEntry dir, String resourceName, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml)
      Handle an office document that's embedded at the POIFS level
      protected void handleEmbeddedOfficeDoc​(org.apache.poi.poifs.filesystem.DirectoryEntry dir, org.apache.tika.metadata.Metadata metadata, String resourceName, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml)
      Handle an office document that's embedded at the POIFS level
      protected void handleEmbeddedOfficeDoc​(org.apache.poi.poifs.filesystem.DirectoryEntry dir, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml)
      Handle an office document that's embedded at the POIFS level
      protected void handleEmbeddedResource​(org.apache.tika.io.TikaInputStream resource, String filename, String relationshipID, String mediaType, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml)  
      protected void handleEmbeddedResource​(org.apache.tika.io.TikaInputStream resource, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml)  
      protected void handleEmbeddedResource​(org.apache.tika.io.TikaInputStream resource, org.apache.tika.metadata.Metadata embeddedMetadata, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml)  
      protected void parse​(org.apache.poi.poifs.filesystem.DirectoryNode root, org.apache.tika.sax.XHTMLContentHandler xhtml)  
      protected void parse​(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem, org.apache.tika.sax.XHTMLContentHandler xhtml)  
      static String tryToGetMsgTitle​(org.apache.poi.poifs.filesystem.DirectoryEntry node, String defaultVal)  
    • Field Detail

      • parentMetadata

        protected final org.apache.tika.metadata.Metadata parentMetadata
      • context

        protected final org.apache.tika.parser.ParseContext context
    • Constructor Detail

      • HSLFExtractor

        public HSLFExtractor​(org.apache.tika.parser.ParseContext context,
                             org.apache.tika.metadata.Metadata metadata)
    • Method Detail

      • parse

        protected void parse​(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem,
                             org.apache.tika.sax.XHTMLContentHandler xhtml)
                      throws IOException,
                             SAXException,
                             org.apache.tika.exception.TikaException
        Throws:
        IOException
        SAXException
        org.apache.tika.exception.TikaException
      • parse

        protected void parse​(org.apache.poi.poifs.filesystem.DirectoryNode root,
                             org.apache.tika.sax.XHTMLContentHandler xhtml)
                      throws IOException,
                             SAXException,
                             org.apache.tika.exception.TikaException
        Throws:
        IOException
        SAXException
        org.apache.tika.exception.TikaException
      • getTikaConfig

        protected org.apache.tika.config.TikaConfig getTikaConfig()
      • getDetector

        protected org.apache.tika.detect.Detector getDetector()
      • getPassword

        protected String getPassword()
        Returns the password to be used for this file, or null if no / default password should be used
      • handleEmbeddedResource

        protected void handleEmbeddedResource​(org.apache.tika.io.TikaInputStream resource,
                                              String filename,
                                              String relationshipID,
                                              String mediaType,
                                              org.apache.tika.sax.XHTMLContentHandler xhtml,
                                              boolean outputHtml)
                                       throws IOException,
                                              SAXException,
                                              org.apache.tika.exception.TikaException
        Throws:
        IOException
        SAXException
        org.apache.tika.exception.TikaException
      • handleEmbeddedResource

        protected void handleEmbeddedResource​(org.apache.tika.io.TikaInputStream resource,
                                              String filename,
                                              String relationshipID,
                                              org.apache.poi.hpsf.ClassID storageClassID,
                                              String mediaType,
                                              org.apache.tika.sax.XHTMLContentHandler xhtml,
                                              boolean outputHtml)
                                       throws IOException,
                                              SAXException,
                                              org.apache.tika.exception.TikaException
        Throws:
        IOException
        SAXException
        org.apache.tika.exception.TikaException
      • handleEmbeddedResource

        protected void handleEmbeddedResource​(org.apache.tika.io.TikaInputStream resource,
                                              org.apache.tika.metadata.Metadata embeddedMetadata,
                                              String filename,
                                              String relationshipID,
                                              org.apache.poi.hpsf.ClassID storageClassID,
                                              String mediaType,
                                              org.apache.tika.sax.XHTMLContentHandler xhtml,
                                              boolean outputHtml)
                                       throws IOException,
                                              SAXException,
                                              org.apache.tika.exception.TikaException
        Throws:
        IOException
        SAXException
        org.apache.tika.exception.TikaException
      • handleEmbeddedOfficeDoc

        protected void handleEmbeddedOfficeDoc​(org.apache.poi.poifs.filesystem.DirectoryEntry dir,
                                               org.apache.tika.sax.XHTMLContentHandler xhtml,
                                               boolean outputHtml)
                                        throws IOException,
                                               SAXException,
                                               org.apache.tika.exception.TikaException
        Handle an office document that's embedded at the POIFS level
        Throws:
        IOException
        SAXException
        org.apache.tika.exception.TikaException
      • handleEmbeddedOfficeDoc

        protected void handleEmbeddedOfficeDoc​(org.apache.poi.poifs.filesystem.DirectoryEntry dir,
                                               String resourceName,
                                               org.apache.tika.sax.XHTMLContentHandler xhtml,
                                               boolean outputHtml)
                                        throws IOException,
                                               SAXException,
                                               org.apache.tika.exception.TikaException
        Handle an office document that's embedded at the POIFS level
        Throws:
        IOException
        SAXException
        org.apache.tika.exception.TikaException
      • handleEmbeddedOfficeDoc

        protected void handleEmbeddedOfficeDoc​(org.apache.poi.poifs.filesystem.DirectoryEntry dir,
                                               org.apache.tika.metadata.Metadata metadata,
                                               String resourceName,
                                               org.apache.tika.sax.XHTMLContentHandler xhtml,
                                               boolean outputHtml)
                                        throws IOException,
                                               SAXException,
                                               org.apache.tika.exception.TikaException
        Handle an office document that's embedded at the POIFS level
        Throws:
        IOException
        SAXException
        org.apache.tika.exception.TikaException
      • tryToGetMsgTitle

        public static String tryToGetMsgTitle​(org.apache.poi.poifs.filesystem.DirectoryEntry node,
                                              String defaultVal)