Source code for maidenhair.classification.unite

# coding=utf-8
"""
"""
__author__ = 'Alisue <lambdalisue@hashnote.net>'
import os
from maidenhair.loaders.base import unite_dataset as _unite_dataset
from maidenhair.compat import OrderedDict


[docs]def default_unite_function(data): """ A default unite_function which recieve `data` and return filename without middle extensions >>> # [<filename>] is mimicking `data` >>> default_unite_function(['./foo/foo.bar.hoge.piyo']) './foo/foo.piyo' >>> default_unite_function(['./foo/foo.piyo']) './foo/foo.piyo' >>> default_unite_function(['./foo/foo']) './foo/foo' """ # data[0] indicate the filename of the data rootname, basename = os.path.split(data[0]) filename, ext = os.path.splitext(basename) if '.' in filename: filename = filename.rsplit('.')[0] filename = os.path.join(rootname, filename + ext) return filename
[docs]def unite_dataset(dataset, basecolumn, fn=None): """ Unite dataset via fn Parameters ---------- dataset : list A list of data basecolumn : int A number of column which will be respected in uniting dataset fn : function A function which recieve :attr:`data` and return classification string. It if is None, a function which return the first item of the :attr:`data` will be used (See ``with_filename`` parameter of :func:`maidenhair.load` function). Returns ------- list A united dataset """ # create default unite_fn if fn is None: fn = default_unite_function # classify dataset via unite_fn united_dataset = OrderedDict() for data in dataset: unite_name = fn(data) if unite_name not in united_dataset: united_dataset[unite_name] = [] united_dataset[unite_name].append(data[1:]) # unite dataset via maidenhair.loaders.base.unite_dataset for name, dataset in united_dataset.items(): united_dataset[name] = _unite_dataset(dataset, basecolumn)[0] # create new dataset (respect the order of the dataset) dataset = [] for name, _dataset in united_dataset.items(): dataset.append([name] + _dataset) return dataset