Package covidseverity

Python package defend-covid-severity to generate severity-prediction views of the datasets provided by FAPESP COVID-19 DataSharing/BR.

Expand source code
"""
Python package `defend-covid-severity` to generate severity-prediction views of
the datasets provided by FAPESP COVID-19 DataSharing/BR.
"""

import pandas as pd


def read_patients(filepath_or_buffer, sep='|'):
    """
    Reads and organizes patients file.
    """
    data = pd.read_table(filepath_or_buffer, sep=sep)
    assert data.shape[1] == 7

    data.rename(str.upper, axis='columns', inplace=True)
    data.rename(lambda x: x[3:], axis='columns', inplace=True)

    data.drop(columns=['PAIS', 'UF', 'MUNICIPIO', 'CEPREDUZIDO'], inplace=True)
    data['NASCIMENTO'] = pd.to_numeric(
        data['NASCIMENTO'], errors='coerce', downcast='integer')

    data.drop_duplicates(inplace=True)

    return data


def read_tests(filepath_or_buffer, sep='|'):
    """
    Reads and organizes tests file.
    """
    data = pd.read_table(filepath_or_buffer, sep=sep)
    assert data.shape[1] == 9

    data.rename(str.upper, axis='columns', inplace=True)
    data.rename(lambda x: x[3:], axis='columns', inplace=True)
    data.rename({'COLETA': 'DATA_COLETA'}, axis='columns', inplace=True)

    data.drop(columns=['ORIGEM', 'VALOR_REFERENCIA'], inplace=True)

    for column in ['EXAME', 'ANALITO', 'RESULTADO', 'UNIDADE']:
        data[column] = data[column].str.strip()

    date_columns = [col for col in data if col.startswith('DATA_')]
    for column in date_columns:
        data[column] = pd.to_datetime(data[column], format='%d/%m/%Y')

    data.drop_duplicates(inplace=True)

    return data


def read_outcomes(filepath_or_buffer, sep='|'):
    """
    Reads and organizes outcomes file.
    """
    data = pd.read_table(filepath_or_buffer, sep=sep)
    assert data.shape[1] == 8

    data.rename(str.upper, axis='columns', inplace=True)
    data.drop(columns=['ID_CLINICA', 'DE_CLINICA'], inplace=True)

    for column in ['DT_DESFECHO', 'DT_ATENDIMENTO']:
        data.loc[data[column] == 'DDMMAA', column] = None

    data['DT_ATENDIMENTO'] = pd.to_datetime(
        data['DT_ATENDIMENTO'], format='%d/%m/%Y')
    data['DT_DESFECHO'] = pd.to_datetime(
        data['DT_DESFECHO'], format='%d/%m/%Y')

    data['XX_DIAS_ATE_DESFECHO'] = (
        data['DT_DESFECHO'] - data['DT_ATENDIMENTO']
    ).dt.days

    data.drop(columns=['DT_DESFECHO'], inplace=True)
    data.rename({'DT_ATENDIMENTO': 'DT_DATA_ATENDIMENTO'},
                axis='columns', inplace=True)
    data.rename(lambda x: x[3:], axis='columns', inplace=True)

    data.drop_duplicates(inplace=True)

    data['CLASSE'] = assess_severity(data)
    data.drop(columns=['DIAS_ATE_DESFECHO'], inplace=True)

    return data


def assess_sample_severity(sample):
    if sample['DIAS_ATE_DESFECHO'] >= 10 and sample['TIPO_ATENDIMENTO'] == 'Internado':
        return 'Grave'

    if "Óbito" in sample['DESFECHO']:
        return 'Grave'

    return 'Leve'


def assess_severity(data):
    """
    Computes severity for each sample in `data`.

    Parameters
    ----------
    data: DataFrame (n_samples, 9)
        Data with outcomes, see `read_outcomes`.
    """
    return data.apply(assess_sample_severity, axis=1)

Functions

def assess_sample_severity(sample)
Expand source code
def assess_sample_severity(sample):
    if sample['DIAS_ATE_DESFECHO'] >= 10 and sample['TIPO_ATENDIMENTO'] == 'Internado':
        return 'Grave'

    if "Óbito" in sample['DESFECHO']:
        return 'Grave'

    return 'Leve'
def assess_severity(data)

Computes severity for each sample in data.

Parameters

data : DataFrame (n_samples, 9)
Data with outcomes, see read_outcomes().
Expand source code
def assess_severity(data):
    """
    Computes severity for each sample in `data`.

    Parameters
    ----------
    data: DataFrame (n_samples, 9)
        Data with outcomes, see `read_outcomes`.
    """
    return data.apply(assess_sample_severity, axis=1)
def read_outcomes(filepath_or_buffer, sep='|')

Reads and organizes outcomes file.

Expand source code
def read_outcomes(filepath_or_buffer, sep='|'):
    """
    Reads and organizes outcomes file.
    """
    data = pd.read_table(filepath_or_buffer, sep=sep)
    assert data.shape[1] == 8

    data.rename(str.upper, axis='columns', inplace=True)
    data.drop(columns=['ID_CLINICA', 'DE_CLINICA'], inplace=True)

    for column in ['DT_DESFECHO', 'DT_ATENDIMENTO']:
        data.loc[data[column] == 'DDMMAA', column] = None

    data['DT_ATENDIMENTO'] = pd.to_datetime(
        data['DT_ATENDIMENTO'], format='%d/%m/%Y')
    data['DT_DESFECHO'] = pd.to_datetime(
        data['DT_DESFECHO'], format='%d/%m/%Y')

    data['XX_DIAS_ATE_DESFECHO'] = (
        data['DT_DESFECHO'] - data['DT_ATENDIMENTO']
    ).dt.days

    data.drop(columns=['DT_DESFECHO'], inplace=True)
    data.rename({'DT_ATENDIMENTO': 'DT_DATA_ATENDIMENTO'},
                axis='columns', inplace=True)
    data.rename(lambda x: x[3:], axis='columns', inplace=True)

    data.drop_duplicates(inplace=True)

    data['CLASSE'] = assess_severity(data)
    data.drop(columns=['DIAS_ATE_DESFECHO'], inplace=True)

    return data
def read_patients(filepath_or_buffer, sep='|')

Reads and organizes patients file.

Expand source code
def read_patients(filepath_or_buffer, sep='|'):
    """
    Reads and organizes patients file.
    """
    data = pd.read_table(filepath_or_buffer, sep=sep)
    assert data.shape[1] == 7

    data.rename(str.upper, axis='columns', inplace=True)
    data.rename(lambda x: x[3:], axis='columns', inplace=True)

    data.drop(columns=['PAIS', 'UF', 'MUNICIPIO', 'CEPREDUZIDO'], inplace=True)
    data['NASCIMENTO'] = pd.to_numeric(
        data['NASCIMENTO'], errors='coerce', downcast='integer')

    data.drop_duplicates(inplace=True)

    return data
def read_tests(filepath_or_buffer, sep='|')

Reads and organizes tests file.

Expand source code
def read_tests(filepath_or_buffer, sep='|'):
    """
    Reads and organizes tests file.
    """
    data = pd.read_table(filepath_or_buffer, sep=sep)
    assert data.shape[1] == 9

    data.rename(str.upper, axis='columns', inplace=True)
    data.rename(lambda x: x[3:], axis='columns', inplace=True)
    data.rename({'COLETA': 'DATA_COLETA'}, axis='columns', inplace=True)

    data.drop(columns=['ORIGEM', 'VALOR_REFERENCIA'], inplace=True)

    for column in ['EXAME', 'ANALITO', 'RESULTADO', 'UNIDADE']:
        data[column] = data[column].str.strip()

    date_columns = [col for col in data if col.startswith('DATA_')]
    for column in date_columns:
        data[column] = pd.to_datetime(data[column], format='%d/%m/%Y')

    data.drop_duplicates(inplace=True)

    return data