Source code for csverve.core.csverve_output_data_frame

from typing import List, Dict

import pandas as pd  # type: ignore
from csverve.core import CsverveOutput
from csverve.errors import CsverveWriterError


[docs]class CsverveOutputDataFrame(CsverveOutput): def __init__( self, df: pd.DataFrame, filepath: str, dtypes: Dict[str, str], skip_header: bool = False, na_rep: str = 'NaN', sep: str = ',', ) -> None: """ CSV file and all related metadata. @param filepath: CSV file path. @param dtypes: Dictionary of pandas dtypes, where key = column name, value = dtype. @param header: boolean, True = write header, False = don't write header. @param na_rep: replace NaN with this value. @param columns: List of column names. """ self.df = df columns: List[str] = list(self.df.columns.values) super().__init__( filepath, dtypes, columns, skip_header=skip_header, na_rep=na_rep, sep=sep ) self._cast_df() def _cast_df(self): """ Cast dataframe dtypes. @return: """ for column_name in self.df.columns.values: try: self.df[column_name] = self.df[column_name].astype(self.dtypes[column_name]) except ValueError: raise CsverveWriterError( 'Unable to cast the column {} to {}'.format(column_name, self.dtypes[column_name]) )
[docs] def write_df(self) -> None: """ Write out dataframe to CSV. @param df: Pandas DataFrames. @param chunks: bool. @return: """ if isinstance(self.df, pd.io.parsers.TextFileReader): for i, data in enumerate(self.df): if i == 0: data.to_csv( self.filepath, sep=self.sep, na_rep=self.na_rep, index=False, compression='gzip', header=(not self.skip_header), mode='w' ) else: data.to_csv( self.filepath, sep=self.sep, na_rep=self.na_rep, index=False, compression='gzip', header=False, mode='a' ) elif isinstance(self.df, pd.DataFrame): self.df.to_csv( self.filepath, sep=self.sep, na_rep=self.na_rep, index=False, compression='gzip', header=(not self.skip_header) ) else: raise CsverveWriterError('Invalid df provided as input') self.write_yaml()