Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DUPLICATE |
"dupByHash" |
public static final String |
DUPLICATECOUNT |
"dupByHashCount" |
public static final String |
NOTMODIFIED |
"notModified" |
public static final String |
NOTMODIFIEDCOUNT |
"notModifiedCount" |
public static final String |
NOVEL |
"novel" |
public static final String |
NOVELCOUNT |
"novelCount" |
public static final String |
OTHERDUPLICATE |
"otherDup" |
public static final String |
OTHERDUPLICATECOUNT |
"otherDupCount" |
public static final String |
WARC_NOVEL_CONTENT_BYTES |
"warcNovelContentBytes" |
public static final String |
WARC_NOVEL_URLS |
"warcNovelUrls" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
A_ANNOTATIONS |
"annotations" |
public static final String |
A_CONTENT_TYPE |
"content-type" |
public static final String |
A_CREDENTIALS_KEY |
"credentials" |
public static final String |
A_DELAY_FACTOR |
"delay-factor" |
public static final String |
A_DISTANCE_FROM_SEED |
"distance-from-seed" |
public static final String |
A_DNS_FETCH_TIME |
"dns-fetch-time" |
public static final String |
A_FETCH_BEGAN_TIME |
"fetch-began-time" |
public static final String |
A_FETCH_COMPLETED_TIME |
"fetch-completed-time" |
public static final String |
A_FORCE_RETIRE |
"force-retire" |
public static final String |
A_FTP_CONTROL_CONVERSATION |
"ftp-control-conversation" |
public static final String |
A_FTP_FETCH_STATUS |
"ftp-fetch-status" |
public static final String |
A_HERITABLE_KEYS |
"heritable" |
public static final String |
A_HTML_BASE |
"html-base-href" |
public static final String |
A_HTTP_AUTH_CHALLENGES |
"http-auth-challenges" |
public static final String |
A_HTTP_PROXY_HOST |
"http-proxy-host" |
public static final String |
A_HTTP_PROXY_PORT |
"http-proxy-port" |
public static final String |
A_HTTP_RESPONSE_HEADERS |
"http-response-headers" |
public static final String |
A_MINIMUM_DELAY |
"minimum-delay" |
public static final String |
A_MIRROR_PATH |
"mirror-path" |
public static final String |
A_NONFATAL_ERRORS |
"nonfatal-errors" |
public static final String |
A_PRECALC_PRECEDENCE |
"precalc-precedence" |
public static final String |
A_PREREQUISITE_URI |
"prerequisite-uri" |
public static final String |
A_RETRY_DELAY |
"retry-delay" |
public static final String |
A_RRECORD_SET_LABEL |
"dns-records" |
public static final String |
A_RUNTIME_EXCEPTION |
"runtime-exception" |
public static final String |
A_SERVER_IP |
"server-ip" |
public static final String |
A_SOURCE_TAG |
"source" |
public static final String |
A_SUBMIT_DATA |
"submit-data" |
public static final String |
A_SUBMIT_ENCTYPE |
"submit-enctype" |
public static final String |
A_WARC_RESPONSE_HEADERS |
"warc-response-headers" |
public static final String |
A_WARC_STATS |
"warc-stats" |
public static final String |
HEADER_TRUNC |
"headerTrunc" |
public static final String |
LENGTH_TRUNC |
"lenTrunc" |
public static final String |
TIMER_TRUNC |
"timeTrunc" |
public static final String |
TRUNC_SUFFIX |
"Trunc" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
A_FETCH_HISTORY |
"fetch-history" |
public static final int |
UNCALCULATED |
-1 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
HIGH |
1 |
public static final int |
HIGHEST |
0 |
public static final int |
MEDIUM |
2 |
public static final int |
NORMAL |
3 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
HEADER_PREDICTS_MISSING |
-1 |
Modifier and Type | Constant Field | Value |
---|---|---|
protected static final String |
CSS_BACKSLASH_ESCAPE |
"\\\\([,\'\"\\(\\)\\s])" |
protected static final String |
CSS_URI_EXTRACTOR |
"(?i)(?:@import (?:url[(]|)|url[(])\\s*([\\\"\']?)([^\\\"\'].{0,2083}?)\\1\\s*[);]" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
A_FORM_OFFSETS |
"form-offsets" |
public static final String |
A_META_ROBOTS |
"meta-robots" |
Modifier and Type | Constant Field | Value |
---|---|---|
protected static final String |
JAVASCRIPT_STRING_EXTRACTOR |
"(\\\\{0,8}+(?:[\'\"]|u002[27]))([^\'\"]{0,2083})(?:\\1)" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
ANNOTATION_IS_SITEMAP |
"isSitemap" |
Modifier and Type | Constant Field | Value |
---|---|---|
protected static final String |
JSSTRING |
"javascript:" |
Modifier and Type | Constant Field | Value |
---|---|---|
protected static final String |
ABS_HTTP_URI_PATTERN |
"^https?://[^\\s<>]*$" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
MAX_COOKIES_FOR_DOMAIN |
50 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
HEADER_TRUNC |
"headerTrunc" |
public static final String |
LENGTH_TRUNC |
"lenTrunc" |
public static final String |
TIMER_TRUNC |
"timeTrunc" |
public static final String |
TRUNC_SUFFIX |
"Trunc" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
HTTP_BIND_ADDRESS |
"httpBindAddress" |
public static final String |
HTTP_SCHEME |
"http" |
public static final String |
HTTPS_SCHEME |
"https" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
FETCH_DISREGARDS |
"fetchDisregards" |
public static final String |
FETCH_FAILURES |
"fetchFailures" |
public static final String |
FETCH_NONRESPONSES |
"fetchNonResponses" |
public static final String |
FETCH_RESPONSES |
"fetchResponses" |
public static final String |
FETCH_SUCCESSES |
"fetchSuccesses" |
public static final String |
ROBOTS_DENIALS |
"robotsDenials" |
public static final String |
SUCCESS_BYTES |
"successBytes" |
public static final String |
TOTAL_BYTES |
"totalBytes" |
public static final String |
TOTAL_SCHEDULED |
"totalScheduled" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
S_BLOCKED_BY_CUSTOM_PROCESSOR |
-5002 |
public static final int |
S_BLOCKED_BY_QUOTA |
-5003 |
public static final int |
S_BLOCKED_BY_RUNTIME_LIMIT |
-5004 |
public static final int |
S_BLOCKED_BY_USER |
-5001 |
public static final int |
S_CONNECT_FAILED |
-2 |
public static final int |
S_CONNECT_LOST |
-3 |
public static final int |
S_DEEMED_CHAFF |
-4000 |
public static final int |
S_DEEMED_NOT_FOUND |
-404 |
public static final int |
S_DEFERRED |
-50 |
public static final int |
S_DELETED_BY_USER |
-6000 |
public static final int |
S_DNS_SUCCESS |
1 |
public static final int |
S_DOMAIN_PREREQUISITE_FAILURE |
-6 |
public static final int |
S_DOMAIN_UNRESOLVABLE |
-1 |
public static final int |
S_GETBYNAME_SUCCESS |
1001 |
public static final int |
S_NOT_FOUND |
404 |
public static final int |
S_OTHER_PREREQUISITE_FAILURE |
-62 |
public static final int |
S_OUT_OF_SCOPE |
-5000 |
public static final int |
S_PREREQUISITE_UNSCHEDULABLE_FAILURE |
-63 |
public static final int |
S_PROCESSING_THREAD_KILLED |
-7000 |
public static final int |
S_ROBOTS_PRECLUDED |
-9998 |
public static final int |
S_ROBOTS_PREREQUISITE_FAILURE |
-61 |
public static final int |
S_RUNTIME_EXCEPTION |
-5 |
public static final int |
S_SERIOUS_ERROR |
-3000 |
public static final int |
S_TIMEOUT |
-4 |
public static final int |
S_TOO_MANY_EMBED_HOPS |
-4002 |
public static final int |
S_TOO_MANY_LINK_HOPS |
-4001 |
public static final int |
S_TOO_MANY_RETRIES |
-8 |
public static final int |
S_UNATTEMPTED |
0 |
public static final int |
S_UNFETCHABLE_URI |
-7 |
public static final int |
S_UNQUEUEABLE |
-60 |
public static final int |
S_WHOIS_GENERIC_FINISHED |
2002 |
public static final int |
S_WHOIS_SUCCESS |
2001 |
Modifier and Type | Constant Field | Value |
---|---|---|
protected static final String |
DEFAULT_IP_WHOIS_SERVER |
"whois.arin.net" |
public static final String |
IP_ADDRESS_REGEX |
"\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}" |
protected static final String |
ULTRA_SUFFIX_WHOIS_SERVER |
"whois.iana.org" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
A_HTML_FORM_OBJECTS |
"html-form-objects" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final long |
IP_NEVER_EXPIRES |
-1L |
public static final long |
IP_NEVER_LOOKED_UP |
-2L |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final long |
MIN_ROBOTS_RETRIES |
3L |
public static final long |
ROBOTS_NOT_FETCHED |
-1L |
Modifier and Type | Constant Field | Value |
---|---|---|
protected static final int |
MAX_SIZE |
512000 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
URI_HISTORY_DBNAME |
"uri_history" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
A_CONTENT_DIGEST |
"content-digest" |
public static final String |
A_CONTENT_DIGEST_COUNT |
"content-digest-count" |
public static final String |
A_CONTENT_DIGEST_HISTORY |
"content-digest-history" |
public static final String |
A_ETAG_HEADER |
"etag" |
public static final String |
A_FETCH_HISTORY |
"fetch-history" |
public static final String |
A_LAST_MODIFIED_HEADER |
"last-modified" |
public static final String |
A_ORIGINAL_DATE |
"content-written-date" |
public static final String |
A_ORIGINAL_URL |
"original-url" |
public static final String |
A_REFERENCE_LENGTH |
"reference-length" |
public static final String |
A_STATUS |
"status" |
public static final String |
A_WARC_FILE_OFFSET |
"warc-file-offset" |
public static final String |
A_WARC_FILENAME |
"warc-filename" |
public static final String |
A_WARC_RECORD_ID |
"warc-record-id" |
public static final String |
A_WRITE_TAG |
"write-tag" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
ARCHIVE_TIME_KEY |
"HTTP-Archive-Time" |
public static final String |
COLLECTION_KEY |
"HTTP-Collection" |
public static final String |
CONTENT_LENGTH_KEY |
"HTTP-Content-Length" |
public static final String |
CONTENT_MD5_KEY |
"HTTP-Content-MD5" |
public static final String |
CONTENT_TYPE_KEY |
"Content-Type" |
public static final String |
HARVESTER_KEY |
"HTTP-Harvester" |
public static final String |
HEADER_LENGTH_KEY |
"HTTP-Header-Length" |
public static final String |
HEADER_MD5_KEY |
"HTTP-Header-MD5" |
public static final String |
IP_ADDRESS_KEY |
"HTTP-IP-Address" |
public static final String |
STATUS_CODE_KEY |
"HTTP-Status-Code" |
public static final String |
URL_KEY |
"HTTP-URL" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
ATTR_MAX_BYTES_WRITTEN |
"total-bytes-to-write" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
A_MIRROR_PATH |
"mirror-path" |
Modifier and Type | Constant Field | Value |
---|---|---|
protected static final String |
ANNOTATION_UNWRITTEN |
"unwritten" |
Copyright © 2003–2022 Internet Archive. All rights reserved.