|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectorg.apache.pdfbox.pdfparser.BaseParser
org.apache.pdfbox.pdfparser.ConformingPDFParser
public class ConformingPDFParser
Field Summary | |
---|---|
protected RandomAccess |
inputFile
|
Fields inherited from class org.apache.pdfbox.pdfparser.BaseParser |
---|
DEF, document, ENDOBJ, ENDSTREAM, FORCE_PARSING, forceParsing, pdfSource |
Constructor Summary | |
---|---|
ConformingPDFParser(File inputFile)
Constructor. |
Method Summary | |
---|---|
protected byte |
consumeWhitespace()
This will read all bytes until a non-whitespace character is found. |
protected byte |
consumeWhitespaceBackwards()
This will read all bytes (backwards) until a non-whitespace character is found. |
COSDocument |
getDocument()
This will get the document that was parsed. |
COSBase |
getObject(long objectNumber,
long generation)
|
PDDocument |
getPDDocument()
This will get the PD document that was parsed. |
boolean |
isRecursivlyRead()
|
void |
parse()
This will parse the stream and populate the COSDocument object. |
protected COSNumber |
parseNumber(String number)
|
protected long |
parseTrailerInformation()
|
protected COSBase |
processCosObject(String string)
|
protected String |
readBackwardUntilWhitespace()
|
protected byte |
readByte()
|
protected byte |
readByteBackwards()
|
protected COSDictionary |
readDictionaryBackwards()
|
protected int |
readInt()
This will read an integer from the stream. |
protected String |
readLine()
This will read a line starting with the byte at offset and going forward until it finds a newline. |
protected String |
readLineBackwards()
This will read a line starting with the byte at offset and going backwards until it finds a newline. |
protected long |
readLongBackwards()
This will consume any whitespace, read in bytes until whitespace is found again and then parse the characters which have been read as a long. |
protected COSName |
readNameBackwards()
|
protected COSNumber |
readNumber()
This will read in a number and return the COS version of the number (be it a COSInteger or a COSFloat). |
protected COSBase |
readObject()
This actually reads the object data. |
COSBase |
readObject(long objectNumber,
long generation)
This will read an object from the inputFile at whatever our currentOffset is. |
protected COSBase |
readObjectBackwards()
|
protected String |
readString()
This will read the next string from the stream. |
protected String |
readWord()
|
void |
setRecursivlyRead(boolean recursivlyRead)
|
Methods inherited from class org.apache.pdfbox.pdfparser.BaseParser |
---|
isClosing, isClosing, isEndOfName, isEOL, isEOL, isWhitespace, isWhitespace, parseBoolean, parseCOSArray, parseCOSDictionary, parseCOSName, parseCOSStream, parseCOSString, parseDirObject, readExpectedString, readString, setDocument, skipSpaces |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
protected RandomAccess inputFile
Constructor Detail |
---|
public ConformingPDFParser(File inputFile) throws IOException
input
- The input stream that contains the PDF document.
IOException
- If there is an error initializing the stream.Method Detail |
---|
public void parse() throws IOException
IOException
- If there is an error reading from the stream or corrupt data
is found.public COSDocument getDocument() throws IOException
IOException
- If there is an error getting the document.public PDDocument getPDDocument() throws IOException
IOException
- If there is an error getting the document.protected long parseTrailerInformation() throws IOException, NumberFormatException
IOException
NumberFormatException
protected byte readByteBackwards() throws IOException
IOException
protected byte readByte() throws IOException
IOException
protected String readBackwardUntilWhitespace() throws IOException
IOException
protected byte consumeWhitespaceBackwards() throws IOException
IOException
- if there is an error reading from the fileprotected byte consumeWhitespace() throws IOException
IOException
- if there is an error reading from the fileprotected long readLongBackwards() throws IOException, NumberFormatException
IOException
- if there is an error reading from the file
NumberFormatException
- if the bytes read can not be converted to a numberprotected int readInt() throws IOException
BaseParser
readInt
in class BaseParser
IOException
- If there is an error reading from the stream.protected COSNumber readNumber() throws IOException
IOException
protected COSNumber parseNumber(String number) throws IOException
IOException
protected COSBase processCosObject(String string) throws IOException
IOException
protected COSBase readObjectBackwards() throws IOException
IOException
protected COSName readNameBackwards() throws IOException
IOException
public COSBase getObject(long objectNumber, long generation) throws IOException
IOException
public COSBase readObject(long objectNumber, long generation) throws IOException
objectNumber
- the object number you expect to readgeneration
- the generation you expect this object to be
IOException
protected COSBase readObject() throws IOException
IOException
protected String readString() throws IOException
readString
in class BaseParser
IOException
- If there is an error reading from the stream.protected COSDictionary readDictionaryBackwards() throws IOException
IOException
protected String readLineBackwards() throws IOException
offset
- the location of the file where we should start reading
IOException
- if there was an error reading data from the fileprotected String readLine() throws IOException
readLine
in class BaseParser
offset
- the location of the file where we should start reading
IOException
- if there was an error reading data from the fileprotected String readWord() throws IOException
IOException
public boolean isRecursivlyRead()
public void setRecursivlyRead(boolean recursivlyRead)
recursivlyRead
- the recursivlyRead to set
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |