Source code for pylablib.core.fileio.savefile
"""
Utilities for writing data files.
"""
from . import datafile
from . import location
from . import dict_entry
from ..utils import string as string_utils
from ..utils import dictionary
import numpy as np
import pandas as pd
import datetime
def _is_file(value):
return isinstance(value,datafile.DataFile)
def _is_table(value, allow_1D=False):
return (isinstance(value, np.ndarray) and (value.ndim==2 or (allow_1D and value.ndim==1)) ) \
or (isinstance(value, pd.DataFrame))
def _table_row_iterator(value):
if isinstance(value, pd.DataFrame):
return value.itertuples(index=False)
else:
return value
##### FILE FORMAT #####
[docs]class IOutputFileFormat:
"""
Generic class for an output file format.
Args:
format_name (str): The name of the format (to be defined in subclasses).
"""
def __init__(self, format_name):
self.format_name=format_name
[docs] def write_file(self, location_file, to_save):
raise NotImplementedError("IOutputFileFormat.write_file")
[docs] def write_data(self, location_file, data):
raise NotImplementedError("IOutputFileFormat.write_data")
[docs] def write(self, location_file, data):
if not _is_file(data):
data=datafile.DataFile(data)
self.write_file(location_file,data)
[docs]class ITextOutputFileFormat(IOutputFileFormat): # pylint: disable=abstract-method
"""
Generic class for a text output file format.
Args:
format_name (str): The name of the format (to be defined in subclasses).
save_props (bool): If ``True`` and saving :class:`.datafile.DataFile` object, save its props metainfo.
save_comments (bool): If ``True`` and saving :class:`.datafile.DataFile` object, save its comments metainfo.
save_time (bool): If ``True``, append the file creation time in the end.
new_time (bool): If saving :class:`.datafile.DataFile` object, determines if the time should be updated to the current time.
"""
def __init__(self, format_name, save_props=True, save_comments=True, save_time=True, new_time=True):
IOutputFileFormat.__init__(self,format_name)
self.save_props=save_props
self.save_comments=save_comments
self.save_time=save_time
self.new_time=new_time
[docs] def make_comment_line(self, comment):
return "# "+string_utils.escape_string(comment,location="parameter")
[docs] def make_prop_line(self, name, value):
return "# {0} :\t{1}".format(name,string_utils.to_string(value,"parameter"))
[docs] def make_savetime_line(self, time):
return "# Saved on {0}".format(time.strftime("%Y/%m/%d %H:%M:%S"))
[docs] def write_comments(self, stream, comments):
if self.save_comments and len(comments)>0:
self.write_line(stream,"")
for c in comments:
self.write_line(stream,self.make_comment_line(c))
[docs] def write_props(self, stream, props):
if self.save_props and len(props)>0:
self.write_line(stream,"")
for name,value in props.items():
self.write_line(stream,self.make_prop_line(name,value))
[docs] def write_savetime(self, stream, time):
if self.save_time:
self.write_line(stream,"")
self.write_line(stream,self.make_savetime_line(time))
[docs] def write_file(self, location_file, to_save):
with location_file.open("w") as stream:
self.write_data(location_file,to_save.data)
self.write_props(stream,to_save.props)
self.write_comments(stream,to_save.comments)
self.write_savetime(stream,datetime.datetime.now() if self.new_time else to_save.creation_time)
[docs]class CSVTableOutputFileFormat(ITextOutputFileFormat):
"""
Class for CSV output file format.
Args:
delimiters (str): Used to separate entries in a row.
value_formats (str): If not ``None``, defines value formats to be passed to :func:`.utils.string.to_string` function.
use_rep_classes (bool): If ``True``, use representation classes for Dictionary entries (e.g., numpy arrays will be represented as ``"array([1, 2, 3])"`` instead of just ``"[1, 2, 3]"``);
This improves storage fidelity, but makes result harder to parse (e.g., by external string parsers).
save_columns (bool): If ``True``, save column names as a comment line in the beginning of the file.
save_props (bool): If ``True`` and saving :class:`.datafile.DataFile` object, save its props metainfo.
save_comments (bool): If ``True`` and saving :class:`.datafile.DataFile` object, save its comments metainfo.
save_time (bool): If ``True``, append the file creation time in the end.
"""
def __init__(self, delimiters="\t", value_formats=None, use_rep_classes=False, save_columns=True, save_props=True, save_comments=True, save_time=True):
ITextOutputFileFormat.__init__(self,"csv",save_props,save_comments,save_time)
self.delimiters=delimiters
self.value_formats=value_formats
self.use_rep_classes=use_rep_classes
self.save_columns=save_columns
[docs] def get_table_line(self, line):
line=[string_utils.to_string(e,"entry",value_formats=self.value_formats,use_classes=self.use_rep_classes) for e in line]
return self.delimiters.join(line)
[docs] def get_columns_line(self, columns):
if self.save_columns:
return "# "+self.get_table_line(columns)
else:
return None
[docs] def write_data(self, location_file, data):
"""
Write data to a CSV file.
Args:
location_file: Location of the destination.
data: Data to be saved. Can be a pandas DataFrame or an arbitrary 2D array (numpy array, 2D list, etc.);
if the data is not DataFrame or numpy 2D array, it gets converted into a DataFrame using the standard constructor (i.e., 2D list is interpreted as a list of rows)
"""
if not _is_table(data):
data=pd.DataFrame(data)
stream=location_file.stream
if isinstance(data, pd.DataFrame):
self.write_line(stream,self.get_columns_line(data.columns))
for line in _table_row_iterator(data):
self.write_line(stream,self.get_table_line(line))
[docs]class DictionaryOutputFileFormat(ITextOutputFileFormat):
"""
Class for Dictionary output file format.
Args:
param_formats (str): If not ``None``, defines value formats to be passed to :func:`.utils.string.to_string` function when writing Dictionary entries.
use_rep_classes (bool): If ``True``, use representation classes for Dictionary entries (e.g., numpy arrays will be represented as ``"array([1, 2, 3])"`` instead of just ``"[1, 2, 3]"``);
This improves storage fidelity, but makes result harder to parse (e.g., by external string parsers).
table_format (str): Default format for table (numpy arrays or pandas DataFrames) entries. Can be
``'inline'`` (table is written inside the file),
``'csv'`` (external CSV file) or
``'bin'`` (external binary file).
inline_delimiters (str): Used to separate entries in a row for inline tables.
inline_formats (str): If not ``None``, defines value formats to be passed to :func:`.utils.string.to_string` function when writing inline tables.
save_props (bool): If ``True`` and saving :class:`.datafile.DataFile` object, save its props metainfo.
save_comments (bool): If ``True`` and saving :class:`.datafile.DataFile` object, save its comments metainfo.
save_time (bool): If ``True``, append the file creation time in the end.
"""
def __init__(self, param_formats=None, use_rep_classes=False, table_format="inline", inline_delimiters="\t", inline_formats=None, save_props=True, save_comments=True, save_time=True):
ITextOutputFileFormat.__init__(self,"dict",save_props,save_comments,save_time)
self.param_formats=param_formats
self.inline_delimiters=inline_delimiters
self.table_format=table_format
self.inline_formats=inline_formats
self.use_rep_classes=use_rep_classes
[docs] def get_dictionary_line(self, path, value):
path=string_utils.escape_string("/".join(path),location="entry",escape_convertible=False)
value=string_utils.to_string(value,"parameter",value_formats=self.param_formats,use_classes=self.use_rep_classes)
return "{0}\t{1}".format(path,value)
def _write_table_inline(self, stream, table):
self.write_line(stream,"## Table start ##")
for line in _table_row_iterator(table):
line=[string_utils.to_string(e,"entry",value_formats=self.inline_formats,use_classes=self.use_rep_classes) for e in line]
line=self.inline_delimiters.join(line)
self.write_line(stream,line)
self.write_line(stream,"## Table end ##")
[docs] def write_data(self, location_file, data):
"""
Write data to a Dictionary file.
Args:
location_file: Location of the destination.
data: Data to be saved. Should be object of class :class:`.Dictionary`.
"""
if not dictionary.is_dictionary(data):
raise ValueError("format '{0}' can't save data {1}".format(self.format_name,data))
loc=location_file.loc
stream=location_file.stream
table_builder=dict_entry.table_entry_builder(self.table_format)
for path, value in data.iternodes(ordered=True,to_visit="leafs",include_path=True):
if string_utils.is_convertible(value):
self.write_line(stream,self.get_dictionary_line(path,value))
elif isinstance(value,dict_entry.InlineTable):
self.write_line(stream,self.get_dictionary_line(path,"table"))
self._write_table_inline(stream,value.table)
else:
rel_path=path[len(data.get_path()):]
dict_ptr=data.branch_pointer(rel_path)
table_entry=dict_entry.from_data(value,[table_builder])
if table_entry is None:
self.write_line(stream,self.get_dictionary_line(path,value))
else:
d=table_entry.to_dict(dict_ptr,loc)
br=data.detach(rel_path)
data.add_entry(rel_path,d,branch_option="attach")
try:
self.write_data(location_file,data.branch_pointer(rel_path))
finally:
data.detach(rel_path)
data.add_entry(rel_path,br,branch_option="attach")
[docs]class IBinaryOutputFileFormat(IOutputFileFormat): # pylint: disable=abstract-method
[docs] def get_preamble(self, location_file, data): # pylint: disable=unused-argument
return dictionary.Dictionary()
[docs]class TableBinaryOutputFileFormat(IBinaryOutputFileFormat):
"""
Class for binary output file format.
Args:
dtype: a string with numpy dtype (e.g., ``"<f8"``) used to save the data. By default, use little-endian (``"<"``) variant kind of the supplied data array dtype
transposed (bool): If ``False``, write the data row-wise; otherwise, write it column-wise.
"""
def __init__(self, dtype=None, transposed=False):
IBinaryOutputFileFormat.__init__(self,"bin")
self.dtype=dtype
self.transposed=transposed
[docs] def get_dtype(self, table):
if self.dtype is None:
return np.asarray(table).dtype.newbyteorder("<").str
else:
return self.dtype
[docs] def get_preamble(self, location_file, data):
"""
Generate a preamble (dictionary describing the file format).
The parameters are ``'dtype'``, ``'packing'`` (``'transposed'`` or ``'flatten'``, depending on the `transposed` attribute),
``'ncol'`` (number of columns) and ``'nrows'`` (number of rows).
"""
preamble=dictionary.Dictionary()
preamble["nrows"]=data.shape[0]
preamble["ncols"]=data.shape[1]
preamble["dtype"]=self.get_dtype(data)
if self.transposed:
preamble["packing"]="transposed"
else:
preamble["packing"]="flatten"
return preamble
[docs] def write_data(self, location_file, data):
"""
Write data to a binary file.
Args:
location_file: Location of the destination.
data: Data to be saved. Can be a pandas DataFrame or an arbitrary 2D array (numpy array, 2D list, etc.)
Converted to numpy array before saving.
"""
stream=location_file.stream
data=np.asarray(data)
dtype=self.get_dtype(data)
if self.transposed:
data=data.transpose()
data.flatten().astype(dtype).tofile(stream,format=dtype)
[docs] def write_file(self, location_file, to_save):
data=to_save.data
if _is_table(data):
with location_file.open("wb"):
self.write_data(location_file,data)
else:
raise ValueError("can't save data {}".format(data))
[docs]def get_output_format(data, output_format, **kwargs):
if isinstance(output_format, IOutputFileFormat):
return data,output_format
if output_format=="csv":
return data,CSVTableOutputFileFormat(**kwargs)
elif output_format=="csv_desc":
data=dict_entry.InlineTableDictionaryEntry(data,**kwargs)
data=dictionary.Dictionary({"__data__":data})
return data,DictionaryOutputFileFormat()
elif output_format=="dict":
return dictionary.as_dictionary(data),DictionaryOutputFileFormat(**kwargs)
elif output_format=="bin":
return data,TableBinaryOutputFileFormat(**kwargs)
elif output_format=="bin_desc":
data=dict_entry.ExternalBinTableDictionaryEntry(data,name="data|bin",force_name=True,**kwargs)
data=dictionary.Dictionary({"__data__":data})
return data,DictionaryOutputFileFormat()
else:
raise ValueError("unknown output file format: {0}".format(output_format))
[docs]def save_csv(data, path, delimiters="\t", value_formats=None, use_rep_classes=False, save_columns=True, save_props=True, save_comments=True, save_time=True, loc="file", encoding=None):
"""
Save data to a CSV file.
Args:
data: Data to be saved (2D numpy array, pandas DataFrame, or a :class:`.datafile.DataFile` object containing this data).
path (str): Path to the file or a file-like object.
delimiters (str): Used to separate entries in a row.
value_formats (str): If not ``None``, defines value formats to be passed to :func:`.utils.string.to_string` function.
use_rep_classes (bool): If ``True``, use representation classes for Dictionary entries (e.g., numpy arrays will be represented as ``"array([1, 2, 3])"`` instead of just ``"[1, 2, 3]"``);
This improves storage fidelity, but makes result harder to parse (e.g., by external string parsers).
save_columns (bool): If ``True``, save column names as a comment line in the beginning of the file.
save_props (bool): If ``True`` and saving :class:`.datafile.DataFile` object, save its props metainfo.
save_comments (bool): If ``True`` and saving :class:`.datafile.DataFile` object, save its comments metainfo.
save_time (bool): If ``True``, append the file creation time in the end.
loc (str): Location type.
encoding: if a new file location is opened, this specifies the encoding.
"""
data,output_format=get_output_format(data,"csv",delimiters=delimiters,value_formats=value_formats,use_rep_classes=use_rep_classes,save_columns=save_columns,
save_props=save_props,save_comments=save_comments,save_time=save_time)
f=location.LocationFile(location.get_location(path,loc,encoding=encoding))
output_format.write(f,data)
[docs]def save_csv_desc(data, path, loc="file", encoding=None):
"""
Save data table to a dictionary file with an inlined table.
Compared to :func:`save_csv`, supports more pandas features (index, column multi-index), but can only be directly read by pylablib.
Args:
data: Data to be saved (2D numpy array, pandas DataFrame, or a :class:`.datafile.DataFile` object containing this data).
path (str): Path to the file or a file-like object.
loc (str): Location type.
encoding: if a new file location is opened, this specifies the encoding.
"""
data,output_format=get_output_format(data,"csv_desc")
f=location.LocationFile(location.get_location(path,loc,encoding=encoding))
output_format.write(f,data)
[docs]def save_bin(data, path, dtype=None, transposed=False, loc="file", encoding=None):
"""
Save data to a binary file.
Args:
data: Data to be saved (2D numpy array, pandas DataFrame, or a :class:`.datafile.DataFile` object containing this data).
path (str): Path to the file or a file-like object.
dtype: :class:`numpy.dtype` describing the data. By default, use little-endian (``"<"``) variant kind of the supplied data array dtype.
transposed (bool): If ``False``, write the data row-wise; otherwise, write it column-wise.
loc (str): Location type.
encoding: if a new file location is opened, this specifies the encoding.
"""
data,output_format=get_output_format(data,"bin",dtype=dtype,transposed=transposed)
f=location.LocationFile(location.get_location(path,loc,encoding=encoding))
output_format.write(f,data)
[docs]def save_bin_desc(data, path, loc="file", encoding=None):
"""
Save data to a binary file with an additional description file, which contains all of the data related to loading (shape, dtype, columns, etc.)
Args:
data: Data to be saved (2D numpy array, pandas DataFrame, or a :class:`.datafile.DataFile` object containing this data).
path (str): Path to the file or a file-like object.
loc (str): Location type.
encoding: if a new file location is opened, this specifies the encoding.
"""
data,output_format=get_output_format(data,"bin_desc")
f=location.LocationFile(location.get_location(path,loc,encoding=encoding))
output_format.write(f,data)
[docs]def save_dict(data, path, param_formats=None, use_rep_classes=False, table_format="inline", inline_delimiters="\t", inline_formats=None, save_props=True, save_comments=True, save_time=True, loc="file", encoding=None):
"""
Save dictionary to a text file.
Args:
data: Data to be saved.
path (str): Path to the file or a file-like object.
param_formats (str): If not ``None``, defines value formats to be passed to :func:`.utils.string.to_string` function when writing Dictionary entries.
use_rep_classes (bool): If ``True``, use representation classes for Dictionary entries (e.g., numpy arrays will be represented as ``"array([1, 2, 3])"`` instead of just ``"[1, 2, 3]"``);
This improves storage fidelity, but makes result harder to parse (e.g., by external string parsers).
table_format (str): Default format for table (numpy arrays or pandas DataFrames) entries. Can be
``'inline'`` (table is written inside the file),
``'csv'`` (external CSV file) or
``'bin'`` (external binary file).
inline_delimiters (str): Used to separate entries in a row for inline tables.
inline_formats (str): If not ``None``, defines value formats to be passed to :func:`.utils.string.to_string` function when writing inline tables.
save_props (bool): If ``True`` and saving :class:`.datafile.DataFile` object, save its props metainfo.
save_comments (bool): If ``True`` and saving :class:`.datafile.DataFile` object, save its comments metainfo.
save_time (bool): If ``True``, append the file creation time in the end.
loc (str): Location type.
encoding: if a new file location is opened, this specifies the encoding.
"""
data,output_format=get_output_format(data,"dict",param_formats=param_formats,use_rep_classes=use_rep_classes,table_format=table_format,inline_delimiters=inline_delimiters,
inline_formats=inline_formats,save_props=save_props,save_comments=save_comments,save_time=save_time)
f=location.LocationFile(location.get_location(path,loc,encoding=encoding))
output_format.write(f,data)
[docs]def save_generic(data, path, output_format=None, loc="file", encoding=None, **kwargs):
"""
Save data to a file.
Args:
data: Data to be saved.
path (str): Path to the file or a file-like object.
output_format (str): Output file format. Can be either
``None`` (defaults to ``'csv'`` for table data and ``'dict'`` for Dictionary data),
a string with one of the default format names, or
an already prepared :class:`IOutputFileFormat` object.
loc (str): Location type.
encoding: if a new file location is opened, this specifies the encoding.
`**kwargs` are passed to the file formatter constructor
(see :class:`CSVTableOutputFileFormat`, :class:`DictionaryOutputFileFormat` and :class:`TableBinaryOutputFileFormat` for the possible arguments).
The default format names are:
- ``'csv'``: CSV file, corresponds to :class:`CSVTableOutputFileFormat` and :func:`save_csv`;
- ``'csv'``: CSV file with an additional dictionary containing format description, corresponds to :class:`DictionaryOutputFileFormat` and :func:`save_csv_desc`;
- ``'bin'``: Binary file, corresponds to :class:`TableBinaryOutputFileFormat` and :func:`save_bin`;
- ``'bin_desc'``: Binary file with an additional dictionary containing format description, corresponds to :class:`DictionaryOutputFileFormat` and :func:`save_bin_desc`;
- ``'dict'``: Dictionary file, corresponds to :class:`DictionaryOutputFileFormat` and :func:`save_dict`
"""
if output_format is None:
if _is_table(data,allow_1D=True) or (_is_file(data) and _is_table(data.data)):
output_format="csv"
elif dictionary.is_dictionary(data) or isinstance(data,dict) or (_is_file(data) and (dictionary.is_dictionary(data.data) or isinstance(data.data,dict))):
output_format="dict"
else:
raise ValueError("can't determine output file format for data: {}".format(data))
data,output_format=get_output_format(data,output_format,**kwargs)
loc=location.get_location(path,loc,encoding=encoding)
f=location.LocationFile(loc)
output_format.write(f,data)