Create hierarchical json file with nesting data and attach leaves to next parent node which is not empty

32 views Asked by At

I want to convert my given data into a hierarchical json file. On other Stack Overflow pages I have at least found what I am looking for, that the nesting of the data is exactly as I want it to be, except for one small detail.

I want every 'leaf' to be attached to the next parent node which has no empty, NA, value.

My current code for creating the nesting:

library(dplyr)
library(tidyr)
library(jsonlite)

data <- read.csv(input_file_path, header = TRUE)

nested_data <- data %>% 
  group_by(Level1, Level2, Level3, Name, Id) %>% 
  summarise(across(everything(), list), .groups = "drop")

# deepest level in the json
nested_data <- nested_data %>%  
  nest(children = !c(Level1, Level2, Level3))

nested_data <- nested_data %>%  
  nest(children = !c(Level1, Level2))

# highest level in the json
nested_data <- nested_data %>% 
  nest(children = !c(Level1))

json_output <- toJSON(unbox(fromJSON(toJSON(nested_data))), pretty = TRUE)
write(json_output, output_file_path)

My input data:

       Level1 Level2 Level3  Name Id
1      A      B   <NA>  Anna  1
2      A      B     C1  Otto  2
3      A      B     C1   Tom  3
4      A      B     C2 Maria  4

The output I currently get:

{
    "Level1": "A",
    "children": [
      {
        "Level2": "B",
        "children": [
          {
            "Level3": "C1",
            "children": [
              {
                "Name": "Otto",
                "Id": 2
              },
              {
                "Name": "Tom",
                "Id": 3
              }
            ]
          },
          {
            "Level3": "C2",
            "children": [
              {
                "Name": "Maria",
                "Id": 4
              }
            ]
          },
          {
            "children": [
              {
                "Name": "Anna",
                "Id": 1
              }
            ]
          }
        ]
      }
    ]
  }

BUT what I want is:

{
    "Level1": "A",
    "children": [
      {
        "Level2": "B",
        "children": [
          {
            "Level3": "C1",
            "children": [
              {
                "Name": "Otto",
                "Id": 2
              },
              {
                "Name": "Tom",
                "Id": 3
              }
            ]
          },
          {
            "Level3": "C2",
            "children": [
              {
                "Name": "Maria",
                "Id": 4
              }
            ]
          },
          {
           "Name": "Anna",
            "Id": 1
          }
        ]
      }
    ]
  }

The difference is that if the deepest level, Level3, is empty, then the leaf (Name, Id) should instead be attached to the next higher parent node were the value is not empty. And only if the Level3 is not empty then the leaf should be attached there.

What I already tried was to manually nest the deepest level by writing a function that creates a list of 'Name' and 'Id'. But unfortunately the output is still not what I want.

Here is my not really working approach:

library(dplyr)
library(tidyr)
library(jsonlite)
library(purrr)

data <- read.csv(input_file_path, header = TRUE)

create_children <- function(row) {
  if (!is.na(row["Level3"])) {
    children_list <- as.list(row)
    children_list <- children_list[!(names(children_list) %in% c("Level1", "Level2", "Level3"))]
    children_tbl <- as_tibble(children_list)
    return(as.list(children_tbl))
  } else {
    return(list())
  }
}

nested_data_test <- data %>%
  mutate(children = apply(., 1, create_children))

multiple_level3_rows <- nested_data_test %>%
  filter(!is.na(Level3)) %>%
  group_by(Level1, Level2, Level3) %>%
  filter(n() > 1)

grouped_rows <- multiple_level3_rows %>%
  group_by(Level1, Level2, Level3) %>%
  summarise(
    children = list(
      lapply(1:length(Name), function(i) {
        tibble(Name = Name[i], Id = Id[i])
      })
    )
  )

final_df <- bind_rows(grouped_rows, nested_data_test %>%
                        anti_join(multiple_level3_rows, by = c("Level1", "Level2", "Level3")))

final_df <- final_df %>%
  mutate(Name = ifelse(!map_lgl(children, is_empty), NA, Name),
         Id = ifelse(!map_lgl(children, is_empty), NA, Id))

final_df <- subset(final_df, select = -c(Level3) )

final_df <- final_df %>%  
  nest(children = !c(Level1, Level2))

# highest level in the json
final_df <- final_df %>% 
  nest(children = !c(Level1))

json_output <- toJSON(unbox(fromJSON(toJSON(final_df))), pretty = TRUE)
write(json_output, output_file_path)

Output of my approach:

{
    "Level1": "A",
    "children": [
      {
        "Level2": "B",
        "children": [
          {
            "children": [
              [
                {
                  "Name": "Otto",
                  "Id": 2
                }
              ],
              [
                {
                  "Name": "Tom",
                  "Id": 3
                }
              ]
            ]
          },
          {
            "children": [],
            "Name": "Anna",
            "Id": 1
          },
          {
            "children": {
              "Name": ["Maria"],
              "Id": ["4"]
            }
          }
        ]
      }
    ]
  }

What would probably also help me is the data structure generated by nest() so that I can simulate it correctly.

For two elements in children: I already tied it with list(list(), list()) but that didn't work because it is then [ [ { }, { } ] ] in the json. I've also tried list(c(), c()), but that doesn't work either. And list(as.tibble(), as.tibble()) didn't work either.

0

There are 0 answers