object
Gru
Value Members
-
final
def
!=(arg0: Any): Boolean
-
final
def
##(): Int
-
final
def
==(arg0: Any): Boolean
-
-
-
def
apply(dt: DataType)(input: Tensor[dt.type], weight: Tensor[dt.type], recurrence: Tensor[dt.type], bias0: Option[Tensor[dt.type]], sequenceLens: Option[Tensor[Int32.type]], initialH: Option[Tensor[dt.type]], activationAlpha: List[Float], activationBeta: List[Float], activations: List[ActivationFn], clip: Float, direction: Direction, hiddenSize: Int, linearBeforeReset: Boolean): Try[Output[dt.type]]
-
final
def
asInstanceOf[T0]: T0
-
def
clone(): AnyRef
-
final
def
eq(arg0: AnyRef): Boolean
-
def
equals(arg0: Any): Boolean
-
def
finalize(): Unit
-
final
def
getClass(): Class[_]
-
def
hashCode(): Int
-
final
def
isInstanceOf[T0]: Boolean
-
final
def
ne(arg0: AnyRef): Boolean
-
final
def
notify(): Unit
-
final
def
notifyAll(): Unit
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
-
def
toString(): String
-
final
def
wait(): Unit
-
final
def
wait(arg0: Long, arg1: Int): Unit
-
final
def
wait(arg0: Long): Unit
Inherited from AnyRef
Inherited from Any
GRU
Computes an one-layer GRU. This operator is usually supported via some custom implementation such as CuDNN.
See https://github.com/onnx/onnx/blob/master/docs/Operators.md#GRU
Notations: - X - input tensor - z - update gate - r - reset gate - h - hidden gate - t - time step (t-1 means previous time step) - W[zrh] - W parameter weight matrix for update, reset, and hidden gates - R[zrh] - R recurrence weight matrix for update, reset, and hidden gates - Wb[zrh] - W bias vectors for update, reset, and hidden gates - Rb[zrh] - R bias vectors for update, reset, and hidden gates - WB[zrh] - W parameter weight matrix for backward update, reset, and hidden gates - RB[zrh] - R recurrence weight matrix for backward update, reset, and hidden gates - WBb[zrh] - W bias vectors for backward update, reset, and hidden gates - RBb[zrh] - R bias vectors for backward update, reset, and hidden gates - H - Hidden state - num_directions - 2 if direction == bidirectional else 1
Activation functions (first 3 required, rest optional): - Relu(x) - max(0, x) - Tanh(x) - (1 - e{-2x})/(1 + e{-2x}) - Sigmoid(x) - 1/(1 + e{-x}) (optional below this line) - Affine(x) - alpha*x + beta - LeakyRelu(x) - x if x >= 0 else alpha * x - ThresholdedRelu(x) - x if x >= alpha else 0 - ScaledTanh(x) - alpha*Tanh(beta*x) - HardSigmoid(x) - min(max(alpha*x + beta, 0), 1) - Elu(x) - x if x >= 0 else alpha*(ex - 1) - Softsign(x) - x/(1 + |x|) - Softplus(x) - log(1 + e^x)
Equations (Default: f=Sigmoid, g=Tanh): - zt = f(Xt*(WzT) + Ht-1*(RzT) + Wbz + Rbz) - rt = f(Xt*(WrT) + Ht-1*(RrT) + Wbr + Rbr) - ht = g(Xt*(WhT) + (rt (.) Ht-1)*(RhT) + Rbh + Wbh) # default, when linear_before_reset = 0 - ht = g(Xt*(WhT) + (rt (.) (Ht-1*(RhT) + Rbh)) + Wbh) # when linear_before_reset != 0 - Ht = (1 - zt) (.) ht + zt (.) Ht-1