5.4. CSV DictReader

5.4.1. Rationale

  • csv.DictReader: list[dict]

5.4.2. Example

>>> import csv
>>>
>>> FILE = r'_temporary.csv'
>>> DATA = """sepal_length,sepal_width,petal_length,petal_width,species
... 5.4,3.9,1.3,0.4,setosa
... 5.9,3.0,5.1,1.8,virginica
... 6.0,3.4,4.5,1.6,versicolor"""
>>>
>>> with open(FILE, mode='w') as file:
...     _ = file.write(DATA)
>>>
>>>
>>> with open(FILE) as file:
...     result = csv.DictReader(file)
...
...     for line in result:
...         print(line)
{'sepal_length': '5.4', 'sepal_width': '3.9', 'petal_length': '1.3', 'petal_width': '0.4', 'species': 'setosa'}
{'sepal_length': '5.9', 'sepal_width': '3.0', 'petal_length': '5.1', 'petal_width': '1.8', 'species': 'virginica'}
{'sepal_length': '6.0', 'sepal_width': '3.4', 'petal_length': '4.5', 'petal_width': '1.6', 'species': 'versicolor'}

Read data from CSV file using csv.DictReader(). While giving custom names note, that first line (typically a header) will be treated like normal data. Therefore we skip it using header = file.readline():

sepal_length,sepal_width,petal_length,petal_width,species
5.4,3.9,1.3,0.4,setosa
5.9,3.0,5.1,1.8,virginica
6.0,3.4,4.5,1.6,versicolor
>>> import csv
>>>
>>> FILE = r'_temporary.csv'
>>> DATA = """sepal_length,sepal_width,petal_length,petal_width,species
... 5.4,3.9,1.3,0.4,setosa
... 5.9,3.0,5.1,1.8,virginica
... 6.0,3.4,4.5,1.6,versicolor"""
>>>
>>> with open(FILE, mode='w') as file:
...     _ = file.write(DATA)
>>>
>>>
>>> FIELDNAMES = ['Sepal Length', 'Sepal Width',
...               'Petal Length', 'Petal Width', 'Species']
>>>
>>>
>>> with open(FILE) as file:
...     result = csv.DictReader(file, fieldnames=FIELDNAMES, delimiter=',')
...     header = file.readline()  # skip the first line (old header)
...
...     for line in result:
...         print(line)
{'Sepal Length': '5.4', 'Sepal Width': '3.9', 'Petal Length': '1.3', 'Petal Width': '0.4', 'Species': 'setosa'}
{'Sepal Length': '5.9', 'Sepal Width': '3.0', 'Petal Length': '5.1', 'Petal Width': '1.8', 'Species': 'virginica'}
{'Sepal Length': '6.0', 'Sepal Width': '3.4', 'Petal Length': '4.5', 'Petal Width': '1.6', 'Species': 'versicolor'}

5.4.3. Use Cases

'sepal_length';'sepal_width';'petal_length';'petal_width';'species'
'5,4';'3,9';'1,3';'0,4';'setosa'
'5,9';'3,0';'5,1';'1,8';'virginica'
'6,0';'3,4';'4,5';'1,6';'versicolor'
>>> import csv
>>>
>>>
>>> FILE = r'_temporary.csv'
>>> DATA = """'sepal_length';'sepal_width';'petal_length';'petal_width';'species'
... '5,4';'3,9';'1,3';'0,4';'setosa'
... '5,9';'3,0';'5,1';'1,8';'virginica'
... '6,0';'3,4';'4,5';'1,6';'versicolor'"""
>>>
>>> with open(FILE, mode='w') as file:
...     _ = file.write(DATA)
>>>
>>>
>>> def isnumeric(value):
...     try:
...         float(value)
...         return True
...     except ValueError:
...         return False
>>>
>>>
>>> def clean(line):
...     return {key: float(v) if isnumeric(v) else v
...             for key, value in line.items()
...             if (v := value.replace(',', '.'))}
>>>
>>>
>>> with open(FILE) as file:
...     result = csv.DictReader(file, delimiter=';', quotechar="'")
...
...     for line in result:
...         print(clean(line))
{'sepal_length': 5.4, 'sepal_width': 3.9, 'petal_length': 1.3, 'petal_width': 0.4, 'species': 'setosa'}
{'sepal_length': 5.9, 'sepal_width': 3.0, 'petal_length': 5.1, 'petal_width': 1.8, 'species': 'virginica'}
{'sepal_length': 6.0, 'sepal_width': 3.4, 'petal_length': 4.5, 'petal_width': 1.6, 'species': 'versicolor'}

5.4.4. Assignments

Code 5.12. Solution
"""
* Assignment: CSV DictReader Iris
* Complexity: easy
* Lines of code: 5 lines
* Time: 8 min

English:
    1. Using `csv.DictReader` read the `FILE` content
    2. Use explicit `encoding`, `delimiter` and `quotechar`
    3. Replace column names with `FIELDNAMES`
    4. Skip the first line (header)
    5. Add rows to `result: list[dict]`
    6. Run doctests - all must succeed

Polish:
    1. Korzystając z `csv.DictReader` wczytaj zawartość pliku `FILE`
    2. Podaj jawnie `encoding`, `delimiter` oraz `quotechar`
    3. Podmień nazwy kolumn na `FIELDNAMES`
    4. Pomiń pierwszą linię (nagłówek)
    5. Dodaj wiersze do `result: list[dict]`
    6. Uruchom doctesty - wszystkie muszą się powieść

Hint:
    * For Python before 3.8: `dict(OrderedDict)`

Tests:
    >>> import sys; sys.tracebacklimit = 0

    >>> type(result)
    <class 'list'>
    >>> result  # doctest: +NORMALIZE_WHITESPACE
    [{'Sepal Length': '5.8', 'Sepal Width': '2.7', 'Petal Length': '5.1',
      'Petal Width': '1.9', 'Species': 'virginica'},
     {'Sepal Length': '5.1', 'Sepal Width': '3.5', 'Petal Length': '1.4',
      'Petal Width': '0.2', 'Species': 'setosa'},
     {'Sepal Length': '5.7', 'Sepal Width': '2.8', 'Petal Length': '4.1',
      'Petal Width': '1.3', 'Species': 'versicolor'}]
    >>> from os import remove
    >>> remove(FILE)
"""

import csv


DATA = """sepal_length,sepal_width,petal_length,petal_width,species
5.8,2.7,5.1,1.9,virginica
5.1,3.5,1.4,0.2,setosa
5.7,2.8,4.1,1.3,versicolor"""

FIELDNAMES = ['Sepal Length', 'Sepal Width',
              'Petal Length', 'Petal Width', 'Species']

FILE = r'_temporary.csv'

with open(FILE, mode='w') as file:
    file.write(DATA)

result: list = []