WARCWriterChainProcessor
@Deprecated public class WARCWriterProcessor extends BaseWARCWriterProcessor implements org.archive.io.warc.WARCWriterPoolSettings
TODO: Remove ANVLRecord. Rename NameValue or use RFC822 (commons-httpclient?) or find something else.
generator, stats, urlsWritten
ANNOTATION_UNWRITTEN, compress, directory, frequentFlushes, maxFileSizeBytes, maxTotalBytesToWrite, maxWaitForIdleMs, poolMaxActive, prefix, serverCache, skipIdenticalDigests, startNewFilesOnCheckpoint, storePaths, template, writeBufferSize
Constructor and Description |
---|
WARCWriterProcessor()
Deprecated.
|
Modifier and Type | Method and Description |
---|---|
protected void |
fromCheckpointJson(org.json.JSONObject json)
Deprecated.
Restore internal state from JSONObject stored at earlier
checkpoint-time.
|
boolean |
getWriteMetadata()
Deprecated.
|
boolean |
getWriteRequests()
Deprecated.
|
protected ProcessResult |
innerProcessResult(CrawlURI curi)
Deprecated.
Writes a CrawlURI and its associated data to store file.
|
protected URI |
qualifyRecordID(URI base,
String key,
String value)
Deprecated.
|
protected void |
saveHeader(CrawlURI curi,
org.archive.util.anvl.ANVLRecord warcHeaders,
String origName,
String newName)
Deprecated.
Saves a header from the given HTTP operation into the
provider headers under a new name
|
void |
setWriteMetadata(boolean writeMetadata)
Deprecated.
|
void |
setWriteRequests(boolean writeRequests)
Deprecated.
|
void |
setWriteRevisitForIdenticalDigests(boolean writeRevisits)
Deprecated.
|
void |
setWriteRevisitForNotModified(boolean writeRevisits)
Deprecated.
|
protected org.json.JSONObject |
toCheckpointJson()
Deprecated.
Return a JSONObject of current stat that can be consulted
on recovery to restore necessary values.
|
protected ProcessResult |
write(String lowerCaseScheme,
CrawlURI curi)
Deprecated.
|
protected void |
writeDnsRecords(CrawlURI curi,
org.archive.io.warc.WARCWriter w,
URI baseid,
String timestamp)
Deprecated.
|
protected URI |
writeFtpControlConversation(org.archive.io.warc.WARCWriter w,
String timestamp,
URI baseid,
CrawlURI curi,
org.archive.util.anvl.ANVLRecord headers,
String controlConversation)
Deprecated.
|
protected void |
writeFtpRecords(org.archive.io.warc.WARCWriter w,
CrawlURI curi,
URI baseid,
String timestamp)
Deprecated.
|
protected void |
writeHttpRecords(CrawlURI curi,
org.archive.io.warc.WARCWriter w,
URI baseid,
String timestamp)
Deprecated.
|
protected URI |
writeMetadata(org.archive.io.warc.WARCWriter w,
String timestamp,
URI baseid,
CrawlURI curi,
org.archive.util.anvl.ANVLRecord namedFields)
Deprecated.
|
protected URI |
writeRequest(org.archive.io.warc.WARCWriter w,
String timestamp,
String mimetype,
URI baseid,
CrawlURI curi,
org.archive.util.anvl.ANVLRecord namedFields)
Deprecated.
|
protected URI |
writeResource(org.archive.io.warc.WARCWriter w,
String timestamp,
String mimetype,
URI baseid,
CrawlURI curi,
org.archive.util.anvl.ANVLRecord namedFields)
Deprecated.
|
protected URI |
writeResponse(org.archive.io.warc.WARCWriter w,
String timestamp,
String mimetype,
URI baseid,
CrawlURI curi,
org.archive.util.anvl.ANVLRecord suppliedFields)
Deprecated.
|
protected URI |
writeRevisit(org.archive.io.warc.WARCWriter w,
String timestamp,
String mimetype,
URI baseid,
CrawlURI curi,
org.archive.util.anvl.ANVLRecord headers)
Deprecated.
|
protected URI |
writeRevisit(org.archive.io.warc.WARCWriter w,
String timestamp,
String mimetype,
URI baseid,
CrawlURI curi,
org.archive.util.anvl.ANVLRecord headers,
long contentLength)
Deprecated.
|
protected void |
writeWhoisRecords(org.archive.io.warc.WARCWriter w,
CrawlURI curi,
URI baseid,
String timestamp)
Deprecated.
|
addIfNotBlank, addStats, copyStats, getDefaultMaxFileSize, getDefaultStorePaths, getMetadata, getRecordID, getRecordIDGenerator, getStats, report, setRecordIDGenerator, setupPool, updateMetadataAfterWrite
calcOutputDirs, checkBytesWritten, copyForwardWriteTagIfDupe, doCheckpoint, getCompress, getDirectory, getFrequentFlushes, getHostAddress, getMaxFileSizeBytes, getMaxTotalBytesToWrite, getMaxWaitForIdleMs, getMetadataProvider, getPool, getPoolMaxActive, getPrefix, getSerialNo, getServerCache, getSkipIdenticalDigests, getStartNewFilesOnCheckpoint, getStorePaths, getTemplate, getTotalBytesWritten, getWriteBufferSize, innerProcess, innerRejectProcess, setCompress, setDirectory, setFrequentFlushes, setMaxFileSizeBytes, setMaxTotalBytesToWrite, setMaxWaitForIdleMs, setMetadataProvider, setPool, setPoolMaxActive, setPrefix, setServerCache, setSkipIdenticalDigests, setStartNewFilesOnCheckpoint, setStorePaths, setTemplate, setTotalBytesWritten, setWriteBufferSize, shouldProcess, shouldWrite, start, stop
finishCheckpoint, flattenVia, getBeanName, getEnabled, getKeyedProperties, getRecordedSize, getShouldProcessRule, getURICount, hasHttpAuthenticationCredential, isRunning, isSuccess, process, setBeanName, setEnabled, setRecoveryCheckpoint, setShouldProcessRule, startCheckpoint
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
getRecordIDGenerator
calcOutputDirs, getCompress, getFrequentFlushes, getMaxFileSizeBytes, getMetadata, getPrefix, getTemplate, getWriteBufferSize
finishCheckpoint, setRecoveryCheckpoint, startCheckpoint
public boolean getWriteRequests()
public void setWriteRequests(boolean writeRequests)
public boolean getWriteMetadata()
public void setWriteMetadata(boolean writeMetadata)
@Deprecated public void setWriteRevisitForIdenticalDigests(boolean writeRevisits)
@Deprecated public void setWriteRevisitForNotModified(boolean writeRevisits)
protected ProcessResult innerProcessResult(CrawlURI curi)
innerProcessResult
in class WriterPoolProcessor
curi
- CrawlURI to process.protected ProcessResult write(String lowerCaseScheme, CrawlURI curi) throws IOException
IOException
protected void writeDnsRecords(CrawlURI curi, org.archive.io.warc.WARCWriter w, URI baseid, String timestamp) throws IOException
IOException
protected void writeWhoisRecords(org.archive.io.warc.WARCWriter w, CrawlURI curi, URI baseid, String timestamp) throws IOException
IOException
protected void writeHttpRecords(CrawlURI curi, org.archive.io.warc.WARCWriter w, URI baseid, String timestamp) throws IOException
IOException
protected void writeFtpRecords(org.archive.io.warc.WARCWriter w, CrawlURI curi, URI baseid, String timestamp) throws IOException
IOException
protected URI writeFtpControlConversation(org.archive.io.warc.WARCWriter w, String timestamp, URI baseid, CrawlURI curi, org.archive.util.anvl.ANVLRecord headers, String controlConversation) throws IOException
IOException
protected URI writeRequest(org.archive.io.warc.WARCWriter w, String timestamp, String mimetype, URI baseid, CrawlURI curi, org.archive.util.anvl.ANVLRecord namedFields) throws IOException
IOException
protected URI writeResponse(org.archive.io.warc.WARCWriter w, String timestamp, String mimetype, URI baseid, CrawlURI curi, org.archive.util.anvl.ANVLRecord suppliedFields) throws IOException
IOException
protected URI writeResource(org.archive.io.warc.WARCWriter w, String timestamp, String mimetype, URI baseid, CrawlURI curi, org.archive.util.anvl.ANVLRecord namedFields) throws IOException
IOException
protected URI writeRevisit(org.archive.io.warc.WARCWriter w, String timestamp, String mimetype, URI baseid, CrawlURI curi, org.archive.util.anvl.ANVLRecord headers) throws IOException
IOException
protected URI writeRevisit(org.archive.io.warc.WARCWriter w, String timestamp, String mimetype, URI baseid, CrawlURI curi, org.archive.util.anvl.ANVLRecord headers, long contentLength) throws IOException
IOException
protected void saveHeader(CrawlURI curi, org.archive.util.anvl.ANVLRecord warcHeaders, String origName, String newName)
protected URI writeMetadata(org.archive.io.warc.WARCWriter w, String timestamp, URI baseid, CrawlURI curi, org.archive.util.anvl.ANVLRecord namedFields) throws IOException
IOException
protected URI qualifyRecordID(URI base, String key, String value) throws IOException
IOException
protected org.json.JSONObject toCheckpointJson() throws org.json.JSONException
Processor
toCheckpointJson
in class WriterPoolProcessor
org.json.JSONException
protected void fromCheckpointJson(org.json.JSONObject json) throws org.json.JSONException
Processor
fromCheckpointJson
in class WriterPoolProcessor
json
- JSONObjectorg.json.JSONException
Copyright © 2003–2021 Internet Archive. All rights reserved.