Class

edu.psu.sagnik.research.pdsimplify.text.impl

ProcessText

Related Doc: package impl

Permalink

class ProcessText extends PDFTextStripper

Created by schoudhury on 6/27/16.

Linear Supertypes
PDFTextStripper, PDFTextStreamEngine, PDFStreamEngine, AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. ProcessText
  2. PDFTextStripper
  3. PDFTextStreamEngine
  4. PDFStreamEngine
  5. AnyRef
  6. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Instance Constructors

  1. new ProcessText(page: PDPage)

    Permalink

Value Members

  1. final def !=(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  4. final def addOperator(arg0: OperatorProcessor): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
  5. def applyTextAdjustment(arg0: Float, arg1: Float): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  6. final def asInstanceOf[T0]: T0

    Permalink
    Definition Classes
    Any
  7. def beginText(): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  8. def clone(): AnyRef

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  9. var currentChars: List[PDChar]

    Permalink
  10. var currentParagraphs: List[PDParagraph]

    Permalink
  11. var currentTextLines: List[PDTextLine]

    Permalink
  12. var currentWords: List[PDWord]

    Permalink
  13. def endArticle(): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  14. def endDocument(arg0: PDDocument): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  15. def endPage(arg0: PDPage): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  16. def endText(): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  17. final def eq(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  18. def equals(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  19. def finalize(): Unit

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  20. def getAddMoreFormatting(): Boolean

    Permalink
    Definition Classes
    PDFTextStripper
  21. def getAppearance(arg0: PDAnnotation): PDAppearanceStream

    Permalink
    Definition Classes
    PDFStreamEngine
  22. def getArticleEnd(): String

    Permalink
    Definition Classes
    PDFTextStripper
  23. def getArticleStart(): String

    Permalink
    Definition Classes
    PDFTextStripper
  24. def getAverageCharTolerance(): Float

    Permalink
    Definition Classes
    PDFTextStripper
  25. def getCharactersByArticle(): List[List[TextPosition]]

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
  26. final def getClass(): Class[_]

    Permalink
    Definition Classes
    AnyRef → Any
  27. def getCurrentPage(): PDPage

    Permalink
    Definition Classes
    PDFStreamEngine
  28. def getCurrentPageNo(): Int

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
  29. def getDropThreshold(): Float

    Permalink
    Definition Classes
    PDFTextStripper
  30. def getEndBookmark(): PDOutlineItem

    Permalink
    Definition Classes
    PDFTextStripper
  31. def getEndPage(): Int

    Permalink
    Definition Classes
    PDFTextStripper
  32. def getGraphicsStackSize(): Int

    Permalink
    Definition Classes
    PDFStreamEngine
  33. def getGraphicsState(): PDGraphicsState

    Permalink
    Definition Classes
    PDFStreamEngine
  34. def getIndentThreshold(): Float

    Permalink
    Definition Classes
    PDFTextStripper
  35. def getInitialMatrix(): Matrix

    Permalink
    Definition Classes
    PDFStreamEngine
  36. def getLineSeparator(): String

    Permalink
    Definition Classes
    PDFTextStripper
  37. def getListItemPatterns(): List[Pattern]

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
  38. def getOutput(): Writer

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
  39. def getPageEnd(): String

    Permalink
    Definition Classes
    PDFTextStripper
  40. def getPageStart(): String

    Permalink
    Definition Classes
    PDFTextStripper
  41. def getParagraphEnd(): String

    Permalink
    Definition Classes
    PDFTextStripper
  42. def getParagraphStart(): String

    Permalink
    Definition Classes
    PDFTextStripper
  43. def getResources(): PDResources

    Permalink
    Definition Classes
    PDFStreamEngine
  44. def getSeparateByBeads(): Boolean

    Permalink
    Definition Classes
    PDFTextStripper
  45. def getSortByPosition(): Boolean

    Permalink
    Definition Classes
    PDFTextStripper
  46. def getSpacingTolerance(): Float

    Permalink
    Definition Classes
    PDFTextStripper
  47. def getStartBookmark(): PDOutlineItem

    Permalink
    Definition Classes
    PDFTextStripper
  48. def getStartPage(): Int

    Permalink
    Definition Classes
    PDFTextStripper
  49. def getSuppressDuplicateOverlappingText(): Boolean

    Permalink
    Definition Classes
    PDFTextStripper
  50. def getText(arg0: PDDocument): String

    Permalink
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  51. def getTextLineMatrix(): Matrix

    Permalink
    Definition Classes
    PDFStreamEngine
  52. def getTextMatrix(): Matrix

    Permalink
    Definition Classes
    PDFStreamEngine
  53. def getWordSeparator(): String

    Permalink
    Definition Classes
    PDFTextStripper
  54. def hashCode(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  55. final def isInstanceOf[T0]: Boolean

    Permalink
    Definition Classes
    Any
  56. lazy val logger: Logger

    Permalink
  57. final def ne(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  58. final def notify(): Unit

    Permalink
    Definition Classes
    AnyRef
  59. final def notifyAll(): Unit

    Permalink
    Definition Classes
    AnyRef
  60. def operatorException(arg0: Operator, arg1: List[COSBase], arg2: IOException): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  61. def processAnnotation(arg0: PDAnnotation, arg1: PDAppearanceStream): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  62. def processChildStream(arg0: PDContentStream, arg1: PDPage): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  63. def processOperator(arg0: Operator, arg1: List[COSBase]): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  64. def processOperator(arg0: String, arg1: List[COSBase]): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  65. def processPage(arg0: PDPage): Unit

    Permalink
    Definition Classes
    PDFTextStripper → PDFTextStreamEngine → PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  66. def processPages(arg0: PDPageTree): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  67. def processSoftMask(arg0: PDTransparencyGroup): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  68. def processTextPosition(arg0: TextPosition): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper → PDFTextStreamEngine
  69. final def processTilingPattern(arg0: PDTilingPattern, arg1: PDColor, arg2: PDColorSpace, arg3: Matrix): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  70. final def processTilingPattern(arg0: PDTilingPattern, arg1: PDColor, arg2: PDColorSpace): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  71. def processTransparencyGroup(arg0: PDTransparencyGroup): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  72. def processType3Stream(arg0: PDType3CharProc, arg1: Matrix): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  73. final def restoreGraphicsStack(arg0: Stack[PDGraphicsState]): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
  74. def restoreGraphicsState(): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
  75. final def saveGraphicsStack(): Stack[PDGraphicsState]

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
  76. def saveGraphicsState(): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
  77. def setAddMoreFormatting(arg0: Boolean): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  78. def setArticleEnd(arg0: String): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  79. def setArticleStart(arg0: String): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  80. def setAverageCharTolerance(arg0: Float): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  81. def setDropThreshold(arg0: Float): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  82. def setEndBookmark(arg0: PDOutlineItem): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  83. def setEndPage(arg0: Int): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  84. def setIndentThreshold(arg0: Float): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  85. def setLineDashPattern(arg0: COSArray, arg1: Int): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
  86. def setLineSeparator(arg0: String): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  87. def setListItemPatterns(arg0: List[Pattern]): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
  88. def setPageEnd(arg0: String): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  89. def setPageStart(arg0: String): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  90. def setParagraphEnd(arg0: String): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  91. def setParagraphStart(arg0: String): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  92. def setShouldSeparateByBeads(arg0: Boolean): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  93. def setSortByPosition(arg0: Boolean): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  94. def setSpacingTolerance(arg0: Float): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  95. def setStartBookmark(arg0: PDOutlineItem): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  96. def setStartPage(arg0: Int): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  97. def setSuppressDuplicateOverlappingText(arg0: Boolean): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  98. def setTextLineMatrix(arg0: Matrix): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
  99. def setTextMatrix(arg0: Matrix): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
  100. def setWordSeparator(arg0: String): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  101. def showAnnotation(arg0: PDAnnotation): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  102. def showFontGlyph(arg0: Matrix, arg1: PDFont, arg2: Int, arg3: String, arg4: Vector): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  103. def showForm(arg0: PDFormXObject): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  104. def showGlyph(arg0: Matrix, arg1: PDFont, arg2: Int, arg3: String, arg4: Vector): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStreamEngine → PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  105. def showText(arg0: Array[Byte]): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  106. def showTextString(arg0: Array[Byte]): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  107. def showTextStrings(arg0: COSArray): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  108. def showTransparencyGroup(arg0: PDTransparencyGroup): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  109. def showType3Glyph(arg0: Matrix, arg1: PDType3Font, arg2: Int, arg3: String, arg4: Vector): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  110. def startArticle(arg0: Boolean): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  111. def startArticle(): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  112. def startDocument(arg0: PDDocument): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  113. def startPage(arg0: PDPage): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  114. def stripPage(pdPageNum: Int, document: PDDocument): List[PDParagraph]

    Permalink
  115. final def synchronized[T0](arg0: ⇒ T0): T0

    Permalink
    Definition Classes
    AnyRef
  116. def toString(): String

    Permalink
    Definition Classes
    AnyRef → Any
  117. def transformWidth(arg0: Float): Float

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
  118. def transformedPoint(arg0: Float, arg1: Float): Float

    Permalink
    Definition Classes
    PDFStreamEngine
  119. def unsupportedOperator(arg0: Operator, arg1: List[COSBase]): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  120. final def wait(): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  121. final def wait(arg0: Long, arg1: Int): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  122. final def wait(arg0: Long): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  123. def wordFromTextPositions(tPs: List[TextPosition]): Option[PDWord]

    Permalink
    Attributes
    protected
  124. def writeCharacters(arg0: TextPosition): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  125. def writeLineSeparator(): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    ProcessText → PDFTextStripper
    Annotations
    @Override() @throws( ... )
  126. def writePage(): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  127. def writePageEnd(): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  128. def writePageStart(): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  129. def writeParagraphEnd(): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    ProcessText → PDFTextStripper
    Annotations
    @Override() @throws( ... )
  130. def writeParagraphSeparator(): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  131. def writeParagraphStart(): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  132. def writeString(s: String, textPositions: List[TextPosition]): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    ProcessText → PDFTextStripper
    Annotations
    @Override() @throws( ... )
  133. def writeString(arg0: String): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  134. def writeText(arg0: PDDocument, arg1: Writer): Unit

    Permalink
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  135. def writeWordSeparator(): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    ProcessText → PDFTextStripper
    Annotations
    @Override() @throws( ... )

Deprecated Value Members

  1. def registerOperatorProcessor(arg0: String, arg1: OperatorProcessor): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
    Annotations
    @Deprecated @deprecated
    Deprecated

    (Since version ) see corresponding Javadoc for more information.

Inherited from PDFTextStripper

Inherited from PDFTextStreamEngine

Inherited from PDFStreamEngine

Inherited from AnyRef

Inherited from Any

Ungrouped