public class GenericXmlReader<D extends org.apache.uima.jcas.cas.TOP> extends Object
<s><det>the</det> <n>dog</n></s>
.
In the JCas, this will be represented as the document text "the dog", with
three annotations of the type XMLElement
: One annotation covers the
entire string (and has the tag name s
as a feature), one
annotation covers "the" (tag name: det
), and one annotation
covers "dog" (tag name: n
). In addition, we store a CSS selector
for each annotation, which allows finding the element in the DOM tree. After
the initial conversion, rules are applied to convert some XML elements to
other UIMA annotations. Rules are expressed in CSS-like syntax.
Selector
for a detailed description. Classes
implementing de.unistuttgart.quadrama.io.core.AbstractDramaUrlReader
contain usage examples.Modifier and Type | Class | Description |
---|---|---|
static class |
GenericXmlReader.Rule<T extends org.apache.uima.jcas.cas.TOP> |
This class represents the rules we apply
|
Constructor | Description |
---|---|
GenericXmlReader(Class<D> documentClass) |
Modifier and Type | Method | Description |
---|---|---|
<T extends org.apache.uima.jcas.cas.TOP> |
addGlobalRule(String selector,
Class<T> targetClass,
java.util.function.BiConsumer<T,org.jsoup.nodes.Element> callback) |
|
void |
addGlobalRule(String selector,
java.util.function.BiConsumer<D,org.jsoup.nodes.Element> callback) |
|
void |
addRule(GenericXmlReader.Rule<?> rule) |
|
<T extends org.apache.uima.jcas.cas.TOP> |
addRule(String selector,
Class<T> targetClass) |
|
<T extends org.apache.uima.jcas.cas.TOP> |
addRule(String selector,
Class<T> targetClass,
java.util.function.BiConsumer<T,org.jsoup.nodes.Element> callback) |
|
protected <T extends org.apache.uima.jcas.cas.TOP> |
applyRule(org.apache.uima.jcas.JCas jcas,
org.jsoup.nodes.Element rootElement,
Map<String,XMLElement> annoMap,
GenericXmlReader.Rule<T> mapping) |
|
boolean |
exists(String id) |
|
Map.Entry<org.jsoup.nodes.Element,org.apache.uima.cas.FeatureStructure> |
getAnnotation(String id) |
|
org.jsoup.nodes.Document |
getDocument() |
|
protected <T extends org.apache.uima.jcas.cas.TOP> |
getFeatureStructure(org.apache.uima.jcas.JCas jcas,
XMLElement hAnno,
org.jsoup.nodes.Element elm,
GenericXmlReader.Rule<T> mapping) |
|
static <T extends org.apache.uima.jcas.cas.TOP> |
getOrCreate(org.apache.uima.jcas.JCas jcas,
Class<T> targetClass) |
|
String |
getTextRootSelector() |
|
boolean |
isPreserveWhitespace() |
|
org.apache.uima.jcas.JCas |
read(org.apache.uima.jcas.JCas jcas,
InputStream xmlStream) |
|
void |
setPreserveWhitespace(boolean preserveWhitespace) |
|
void |
setTextRootSelector(String textRootSelector) |
public org.apache.uima.jcas.JCas read(org.apache.uima.jcas.JCas jcas, InputStream xmlStream) throws IOException
IOException
public void addRule(GenericXmlReader.Rule<?> rule)
public <T extends org.apache.uima.jcas.cas.TOP> void addRule(String selector, Class<T> targetClass)
public <T extends org.apache.uima.jcas.cas.TOP> void addRule(String selector, Class<T> targetClass, java.util.function.BiConsumer<T,org.jsoup.nodes.Element> callback)
public void addGlobalRule(String selector, java.util.function.BiConsumer<D,org.jsoup.nodes.Element> callback)
public <T extends org.apache.uima.jcas.cas.TOP> void addGlobalRule(String selector, Class<T> targetClass, java.util.function.BiConsumer<T,org.jsoup.nodes.Element> callback)
public Map.Entry<org.jsoup.nodes.Element,org.apache.uima.cas.FeatureStructure> getAnnotation(String id)
public boolean exists(String id)
protected <T extends org.apache.uima.jcas.cas.TOP> T getFeatureStructure(org.apache.uima.jcas.JCas jcas, XMLElement hAnno, org.jsoup.nodes.Element elm, GenericXmlReader.Rule<T> mapping)
protected <T extends org.apache.uima.jcas.cas.TOP> void applyRule(org.apache.uima.jcas.JCas jcas, org.jsoup.nodes.Element rootElement, Map<String,XMLElement> annoMap, GenericXmlReader.Rule<T> mapping)
public String getTextRootSelector()
public void setTextRootSelector(String textRootSelector)
public org.jsoup.nodes.Document getDocument()
public boolean isPreserveWhitespace()
public void setPreserveWhitespace(boolean preserveWhitespace)
public static <T extends org.apache.uima.jcas.cas.TOP> T getOrCreate(org.apache.uima.jcas.JCas jcas, Class<T> targetClass)
Copyright © 2018. All rights reserved.