Class HSLFExtractor

java.lang.Object
org.apache.tika.parser.microsoft.HSLFExtractor

public class HSLFExtractor extends Object
  • Field Summary

    Fields
    Modifier and Type
    Field
    Description
    protected final org.apache.tika.parser.ParseContext
     
    protected final OfficeParserConfig
     
    protected final org.apache.tika.metadata.Metadata
     
  • Constructor Summary

    Constructors
    Constructor
    Description
    HSLFExtractor(org.apache.tika.parser.ParseContext context, org.apache.tika.metadata.Metadata metadata)
     
  • Method Summary

    Modifier and Type
    Method
    Description
    protected org.apache.tika.detect.Detector
     
    protected String
    Returns the password to be used for this file, or null if no / default password should be used
    protected org.apache.tika.config.TikaConfig
     
    protected void
    handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, String resourceName, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml)
    Handle an office document that's embedded at the POIFS level
    protected void
    handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml)
    Handle an office document that's embedded at the POIFS level
    protected void
    handleEmbeddedResource(org.apache.tika.io.TikaInputStream resource, String filename, String relationshipID, String mediaType, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml)
     
    protected void
    handleEmbeddedResource(org.apache.tika.io.TikaInputStream resource, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml)
     
    protected void
    handleEmbeddedResource(org.apache.tika.io.TikaInputStream resource, org.apache.tika.metadata.Metadata embeddedMetadata, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml)
     
    protected void
    parse(org.apache.poi.poifs.filesystem.DirectoryNode root, org.apache.tika.sax.XHTMLContentHandler xhtml)
     
    protected void
    parse(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem, org.apache.tika.sax.XHTMLContentHandler xhtml)
     
    static String
    tryToGetMsgTitle(org.apache.poi.poifs.filesystem.DirectoryEntry node, String defaultVal)
     

    Methods inherited from class java.lang.Object

    clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
  • Field Details

    • parentMetadata

      protected final org.apache.tika.metadata.Metadata parentMetadata
    • officeParserConfig

      protected final OfficeParserConfig officeParserConfig
    • context

      protected final org.apache.tika.parser.ParseContext context
  • Constructor Details

    • HSLFExtractor

      public HSLFExtractor(org.apache.tika.parser.ParseContext context, org.apache.tika.metadata.Metadata metadata)
  • Method Details

    • parse

      protected void parse(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem, org.apache.tika.sax.XHTMLContentHandler xhtml) throws IOException, SAXException, org.apache.tika.exception.TikaException
      Throws:
      IOException
      SAXException
      org.apache.tika.exception.TikaException
    • parse

      protected void parse(org.apache.poi.poifs.filesystem.DirectoryNode root, org.apache.tika.sax.XHTMLContentHandler xhtml) throws IOException, SAXException, org.apache.tika.exception.TikaException
      Throws:
      IOException
      SAXException
      org.apache.tika.exception.TikaException
    • getTikaConfig

      protected org.apache.tika.config.TikaConfig getTikaConfig()
    • getDetector

      protected org.apache.tika.detect.Detector getDetector()
    • getPassword

      protected String getPassword()
      Returns the password to be used for this file, or null if no / default password should be used
    • handleEmbeddedResource

      protected void handleEmbeddedResource(org.apache.tika.io.TikaInputStream resource, String filename, String relationshipID, String mediaType, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, org.apache.tika.exception.TikaException
      Throws:
      IOException
      SAXException
      org.apache.tika.exception.TikaException
    • handleEmbeddedResource

      protected void handleEmbeddedResource(org.apache.tika.io.TikaInputStream resource, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, org.apache.tika.exception.TikaException
      Throws:
      IOException
      SAXException
      org.apache.tika.exception.TikaException
    • handleEmbeddedResource

      protected void handleEmbeddedResource(org.apache.tika.io.TikaInputStream resource, org.apache.tika.metadata.Metadata embeddedMetadata, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, org.apache.tika.exception.TikaException
      Throws:
      IOException
      SAXException
      org.apache.tika.exception.TikaException
    • handleEmbeddedOfficeDoc

      protected void handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, org.apache.tika.exception.TikaException
      Handle an office document that's embedded at the POIFS level
      Throws:
      IOException
      SAXException
      org.apache.tika.exception.TikaException
    • handleEmbeddedOfficeDoc

      protected void handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, String resourceName, org.apache.tika.sax.XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, org.apache.tika.exception.TikaException
      Handle an office document that's embedded at the POIFS level
      Throws:
      IOException
      SAXException
      org.apache.tika.exception.TikaException
    • tryToGetMsgTitle

      public static String tryToGetMsgTitle(org.apache.poi.poifs.filesystem.DirectoryEntry node, String defaultVal)