JSONUtils

Value Members

final def !=(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def !=(arg0: Any): Boolean

Definition Classes
Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def ==(arg0: Any): Boolean

Definition Classes
Any
def JSONRDDToDataFrame(flattenedPaths: Array[JSONPath], sqlContext: SQLContext, rdd: RDD[String]): DataFrame

A utility which transforms a JSON org.apache.spark.rdd.RDD to a org.apache.spark.sql.DataFrame of flattened JSON from a provided array of JSONPaths.
A utility which transforms a JSON org.apache.spark.rdd.RDD to a org.apache.spark.sql.DataFrame of flattened JSON from a provided array of JSONPaths.
NOTE: The resulting org.apache.spark.sql.DataFrame is suitable for loading to a target table that has additional columns with defaults (including TIMESTAMP default CURRENT_TIME and computed columns).
For instance, given JSON blobs of the form { "a" : value1, "b" : { "c" : value2, "d" : value3 } } The paths
```
Array(JSONPath("a"), JSONPath("b","c"), JSONPath("b","d"))
```
will produce a DataFrame like
```
   +--------+--------+--------+
   | a      | b_c    | b_d    |
   +--------+--------+--------+
   | value1 | value2 | value3 |
   +--------+--------+--------+
```
For non-leaf-paths, you will get the flattened JSON as scala.Predef.String. for instance,
```
Array(JSONPath("b"))
```
will yield
```
   +--------+--------+---------+
   | b                         |
   +--------+--------+---------+
   | {"c":value1, "d":value2"} |
   +--------+--------+---------+
```
Any nonexisting paths will yield null. Malformed JSON will throw a runtime com.fasterxml.jackson.core.JsonParseException on the executors. This utility currently does not support flattening JSON arrays.
def JSONRDDToRows(flattenedPaths: Array[JSONPath], rdd: RDD[String]): RDD[Row]

Equivalent to JSONRDDToDataFrame.
Equivalent to JSONRDDToDataFrame.rdd, but does not create a org.apache.spark.sql.DataFrame.
flattenedPaths
An scala.Array of JSONPaths to extract.
rdd
The org.apache.spark.rdd.RDD to parse as JSON.
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

object JSONUtils

Value Members

final def !=(arg0: AnyRef): Boolean

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: AnyRef): Boolean

final def ==(arg0: Any): Boolean

def JSONRDDToDataFrame(flattenedPaths: Array[JSONPath], sqlContext: SQLContext, rdd: RDD[String]): DataFrame

def JSONRDDToRows(flattenedPaths: Array[JSONPath], rdd: RDD[String]): RDD[Row]

final def asInstanceOf[T0]: T0

def clone(): AnyRef

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from AnyRef

Inherited from Any

Ungrouped