Title: | Unnest Hierarchical Data Structures |
---|---|
Description: | Fast flattening of hierarchical data structures (e.g. JSON, XML) into data.frames with a flexible spec language. |
Authors: | Vitalie Spinu [aut, cre] |
Maintainer: | Vitalie Spinu <[email protected]> |
License: | GPL (>= 2) |
Version: | 0.0.4.9000 |
Built: | 2024-10-31 22:11:26 UTC |
Source: | https://github.com/vspinu/unnest |
Unnest spec is a nested list with the same structure as the
nested json. It specifies how the deeply nested lists ought to be
unnested. spec()
is a handy constructor for spec lists. s()
is a
shorthand alias for spec()
.
spec( selector = NULL, ..., as = NULL, children = NULL, groups = NULL, include = NULL, exclude = NULL, stack = NULL, process = NULL, default = NULL ) s( selector = NULL, ..., as = NULL, children = NULL, groups = NULL, include = NULL, exclude = NULL, stack = NULL, process = NULL, default = NULL )
spec( selector = NULL, ..., as = NULL, children = NULL, groups = NULL, include = NULL, exclude = NULL, stack = NULL, process = NULL, default = NULL ) s( selector = NULL, ..., as = NULL, children = NULL, groups = NULL, include = NULL, exclude = NULL, stack = NULL, process = NULL, default = NULL )
selector |
A shorthand syntax for an
|
as |
name for this field in the extracted data.frame |
children , ...
|
Unnamed list of children spec. |
groups |
Named list of specs to be processed in parallel. The return
value is a named list of unnested data.frames. The results is the same as
when each spec is |
include , exclude
|
A list, a numeric vector or a character vector specifying components to include or exclude. A list can combine numeric indexes and character elements to extract. |
stack |
Whether to stack this node (TRUE) or to spread it (FALSE). When
|
process |
Extra processing step for this element. Either NULL for no processing (the default), "as_is" to return the entire element in a list column, "paste" to paste elements together into a character column. |
default |
Default value to insert if the |
s()
: a canonical spec - a list consumed by C++ unnesting routines.
s("a") s("a//c2") s("a/2/c2,cid")
s("a") s("a//c2") s("a/2/c2,cid")
Unnest nested lists into a flat data.frames.
unnest( x, spec = NULL, dedupe = FALSE, stack_atomic = NULL, process_atomic = NULL, process_unnamed_lists = NULL, cross_join = TRUE )
unnest( x, spec = NULL, dedupe = FALSE, stack_atomic = NULL, process_atomic = NULL, process_unnamed_lists = NULL, cross_join = TRUE )
x |
a nested list to unnest |
spec |
spec to use for unnesting. See |
dedupe |
whether to dedupe repeated elements. If TRUE, if a node is
visited for a second time and is not explicitly declared in the |
stack_atomic |
Whether atomic leaf vectors should be stacked or not. If NULL, the default, data.frame vectors are stacked, all others are spread. |
process_atomic |
Process spec for atomic leaf vectors. Either NULL for no processing (the default), "as_is" to return the entire element in a list column, "paste" to paste elements together into a character column. |
process_unnamed_lists |
How to process unnamed lists. Can be one of "as_is" - return a list column, "exclude" - drop these elements unless they are explicitly included in the spec, "paste" - return a character column, "stack" - automatically stack. If NULL (the default), do nothing - process them normally according to the specs. |
cross_join |
Specifies how the results from sibling nodes are joined
( |
A data.frame
, data.table
or a tibble
as specified by the option
unnest.return.type
. Defaults to data.frame
.
x <- list(a = list(b = list(x = 1, y = 1:2, z = 10), c = list(x = 2, y = 100:102))) xxx <- list(x, x, x) ## spreading unnest(x, s("a")) unnest(x, s("a"), stack_atomic = TRUE) unnest(x, s("a/b"), stack_atomic = TRUE) unnest(x, s("a/c"), stack_atomic = TRUE) unnest(x, s("a"), stack_atomic = TRUE, cross_join = TRUE) unnest(x, s("a//x")) unnest(x, s("a//x,z")) unnest(x, s("a/2/x,y")) ## stacking unnest(x, s("a/", stack = TRUE)) unnest(x, s("a/", stack = TRUE, as = "A")) unnest(x, s("a/", stack = TRUE, as = "A"), stack_atomic = TRUE) unnest(x, s("a/", stack = "id"), stack_atomic = TRUE) unnest(x, s("a/", stack = "id", as = ""), stack_atomic = TRUE) unnest(xxx, s(stack = "id")) unnest(xxx, s(stack = "id"), stack_atomic = TRUE) unnest(xxx, s(stack = "id", s("a/b/y/", stack = TRUE))) ## exclusion unnest(x, s("a/b/", exclude = "x")) ## dedupe unnest(x, s("a", s("b/y"), s("b")), stack_atomic = TRUE) unnest(x, s("a", s("b/y"), s("b")), dedupe = TRUE, stack_atomic = TRUE) ## grouping unnest(xxx, stack_atomic = TRUE, s(stack = TRUE, groups = list(first = s("a/b/x,y"), second = s("a/b")))) unnest(xxx, stack_atomic = TRUE, dedupe = TRUE, s(stack = TRUE, groups = list(first = s("a/b/x,y"), second = s("a/b")))) ## processing as_is str(unnest(xxx, s(stack = "id", s("a/b/y", process = "as_is"), s("a/c", process = "as_is")))) str(unnest(xxx, s(stack = "id", s("a/b/", process = "as_is")))) str(unnest(xxx, s(stack = "id", s("a/b", process = "as_is")))) ## processing paste str(unnest(x, s("a/b/y", process = "paste"))) str(unnest(xxx, s(stack = TRUE, s("a/b/", process = "paste")))) str(unnest(xxx, s(stack = TRUE, s("a/b", process = "paste")))) ## default unnest(x, s("a/b/c/", s("b", default = 100))) unnest(x, s("a/b/c/", stack = "ix", s("b", default = 100)))
x <- list(a = list(b = list(x = 1, y = 1:2, z = 10), c = list(x = 2, y = 100:102))) xxx <- list(x, x, x) ## spreading unnest(x, s("a")) unnest(x, s("a"), stack_atomic = TRUE) unnest(x, s("a/b"), stack_atomic = TRUE) unnest(x, s("a/c"), stack_atomic = TRUE) unnest(x, s("a"), stack_atomic = TRUE, cross_join = TRUE) unnest(x, s("a//x")) unnest(x, s("a//x,z")) unnest(x, s("a/2/x,y")) ## stacking unnest(x, s("a/", stack = TRUE)) unnest(x, s("a/", stack = TRUE, as = "A")) unnest(x, s("a/", stack = TRUE, as = "A"), stack_atomic = TRUE) unnest(x, s("a/", stack = "id"), stack_atomic = TRUE) unnest(x, s("a/", stack = "id", as = ""), stack_atomic = TRUE) unnest(xxx, s(stack = "id")) unnest(xxx, s(stack = "id"), stack_atomic = TRUE) unnest(xxx, s(stack = "id", s("a/b/y/", stack = TRUE))) ## exclusion unnest(x, s("a/b/", exclude = "x")) ## dedupe unnest(x, s("a", s("b/y"), s("b")), stack_atomic = TRUE) unnest(x, s("a", s("b/y"), s("b")), dedupe = TRUE, stack_atomic = TRUE) ## grouping unnest(xxx, stack_atomic = TRUE, s(stack = TRUE, groups = list(first = s("a/b/x,y"), second = s("a/b")))) unnest(xxx, stack_atomic = TRUE, dedupe = TRUE, s(stack = TRUE, groups = list(first = s("a/b/x,y"), second = s("a/b")))) ## processing as_is str(unnest(xxx, s(stack = "id", s("a/b/y", process = "as_is"), s("a/c", process = "as_is")))) str(unnest(xxx, s(stack = "id", s("a/b/", process = "as_is")))) str(unnest(xxx, s(stack = "id", s("a/b", process = "as_is")))) ## processing paste str(unnest(x, s("a/b/y", process = "paste"))) str(unnest(xxx, s(stack = TRUE, s("a/b/", process = "paste")))) str(unnest(xxx, s(stack = TRUE, s("a/b", process = "paste")))) ## default unnest(x, s("a/b/c/", s("b", default = 100))) unnest(x, s("a/b/c/", stack = "ix", s("b", default = 100)))