Problem with Json parser to Csv in Python

75 views Asked by At

I am trying to convert a large Json file (~100GB) into csv using the ijson library in Python. This is my code:

import ijson
import csv

input_file_path = 'path_to_json_file' #json file is in dump
output_file_path = 'path_to_the_csv_file.csv'

# List the fieldnames you want to include in the CSV file
desired_fieldnames = [
"col_1","col_2","col_3",...
]

# Buffer to store rows before writing to CSV
buffer_size = 1000000
rows_buffer = []

def write_buffer(writer, buffer):
    for row in buffer:
        writer.writerow(row)

with open(input_file_path, 'rb') as input_file, open(output_file_path, 'w', newline='', encoding='utf-8') as output_file:
    objects = ijson.items(input_file, 'rows.item.doc')
    writer = csv.DictWriter(output_file, fieldnames=desired_fieldnames)
    writer.writeheader()

    for item in objects:
        # Create a new dictionary with only the desired fields
        filtered_item = {field: item.get(field, '') for field in desired_fieldnames}
        rows_buffer.append(filtered_item)

        if len(rows_buffer) >= buffer_size:
            write_buffer(writer, rows_buffer)
            rows_buffer = []

    # Write any remaining rows in the buffer
    if rows_buffer:
        write_buffer(writer, rows_buffer)



And it results in this error:

Traceback (most recent call last):
  File "path_to_python_program", line 64, in <module>
    for item in objects:
ijson.common.IncompleteJSONError: parse error: unallowed token at this point in JSON text
          ~version":"sahdhdhash=="}},  ]}
                     (right here) ------^
0

There are 0 answers