From 45de6e3ac43bbe7a3b974bde33abc5d03f2a4d84 Mon Sep 17 00:00:00 2001 From: Alix Damman Date: Tue, 15 Oct 2019 11:59:20 +0200 Subject: [PATCH 1/5] improved unit tests for Session objects --- larray/tests/test_session.py | 329 +++++++++++++++-------------------- 1 file changed, 141 insertions(+), 188 deletions(-) diff --git a/larray/tests/test_session.py b/larray/tests/test_session.py index d4d70bda3..929f6a75b 100644 --- a/larray/tests/test_session.py +++ b/larray/tests/test_session.py @@ -10,7 +10,7 @@ from larray.tests.common import assert_array_nan_equal, inputpath, tmp_path, meta, needs_xlwings from larray import (Session, Axis, Array, Group, isnan, zeros_like, ndtest, ones_like, ones, full, local_arrays, global_arrays, arrays) -from larray.util.misc import pickle +from larray.util.misc import pickle, PY2 def equal(o1, o2): @@ -29,27 +29,30 @@ def assertObjListEqual(got, expected): a = Axis('a=a0..a2') +a2 = Axis('a=a0..a4') +anonymous = Axis(4) a01 = a['a0,a1'] >> 'a01' -b = Axis('b=b0..b2') -b12 = b['b1,b2'] >> 'b12' +ano01 = a['a0,a1'] +b = Axis('b=0..4') +b024 = b[[0, 2, 4]] >> 'b024' c = 'c' d = {} -e = ndtest([(2, 'a0'), (3, 'a1')]) +e = ndtest([(2, 'a'), (3, 'b')]) _e = ndtest((3, 3)) -e2 = ndtest(('a=a0..a2', 'b=b0..b2')) -f = ndtest([(3, 'a0'), (2, 'a1')]) -g = ndtest([(2, 'a0'), (4, 'a1')]) +f = ndtest((Axis(3), Axis(2))) +g = ndtest([(2, 'a'), (4, 'b')]) +h = ndtest(('a=a0..a2', 'b=b0..b4')) @pytest.fixture() def session(): - return Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), - ('c', c), ('d', d), ('e', e), ('g', g), ('f', f)]) + return Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous), + ('a01', a01), ('ano01', ano01), ('c', c), ('d', d), ('e', e), ('g', g), ('f', f), ('h', h)]) def test_init_session(meta): - s = Session(b, b12, a, a01, c=c, d=d, e=e, f=f, g=g) - assert s.names == ['a', 'a01', 'b', 'b12', 'c', 'd', 'e', 'f', 'g'] + s = Session(b, b024, a, a01, a2=a2, anonymous=anonymous, ano01=ano01, c=c, d=d, e=e, f=f, g=g, h=h) + assert s.names == ['a', 'a01', 'a2', 'ano01', 'anonymous', 'b', 'b024', 'c', 'd', 'e', 'f', 'g', 'h'] s = Session(inputpath('test_session.h5')) assert s.names == ['e', 'f', 'g'] @@ -63,24 +66,31 @@ def test_init_session(meta): # assertEqual(s.names, ['e', 'f', 'g']) # metadata - s = Session(b, b12, a, a01, c=c, d=d, e=e, f=f, g=g, meta=meta) + s = Session(b, b024, a, a01, a2=a2, anonymous=anonymous, ano01=ano01, c=c, d=d, e=e, f=f, g=g, h=h, meta=meta) assert s.meta == meta def test_getitem(session): assert session['a'] is a + assert session['a2'] is a2 + assert session['anonymous'] is anonymous assert session['b'] is b assert session['a01'] is a01 - assert session['b12'] is b12 + assert session['ano01'] is ano01 + assert session['b024'] is b024 assert session['c'] == 'c' assert session['d'] == {} + assert equal(session['e'], e) + assert equal(session['h'], h) def test_getitem_list(session): assert list(session[[]]) == [] assert list(session[['b', 'a']]) == [b, a] assert list(session[['a', 'b']]) == [a, b] - assert list(session[['b12', 'a']]) == [b12, a] + assert list(session[['a', 'a2']]) == [a, a2] + assert list(session[['anonymous', 'ano01']]) == [anonymous, ano01] + assert list(session[['b024', 'a']]) == [b024, a] assert list(session[['e', 'a01']]) == [e, a01] assert list(session[['a', 'e', 'g']]) == [a, e, g] assert list(session[['g', 'a', 'e']]) == [g, a, e] @@ -92,7 +102,7 @@ def test_getitem_larray(session): res_eq = s1[s1.element_equals(s2)] res_neq = s1[~(s1.element_equals(s2))] assert list(res_eq) == [f] - assert list(res_neq) == [e, g] + assert list(res_neq) == [e, g, h] def test_setitem(session): @@ -103,173 +113,139 @@ def test_setitem(session): def test_getattr(session): assert session.a is a + assert session.a2 is a2 + assert session.anonymous is anonymous assert session.b is b assert session.a01 is a01 - assert session.b12 is b12 + assert session.ano01 is ano01 + assert session.b024 is b024 assert session.c == 'c' assert session.d == {} def test_setattr(session): s = session.copy() - s.h = 'h' - assert s.h == 'h' + s.i = 'i' + assert s.i == 'i' def test_add(session): - h = Axis('h=h0..h2') - h01 = h['h0,h1'] >> 'h01' - session.add(h, h01, i='i') - assert h.equals(session.h) - assert h01 == session.h01 - assert session.i == 'i' + i = Axis('i=i0..i2') + i01 = i['i0,i1'] >> 'i01' + session.add(i, i01, j='j') + assert i.equals(session.i) + assert i01 == session.i01 + assert session.j == 'j' def test_iter(session): - expected = [b, b12, a, a01, c, d, e, g, f] + expected = [b, b024, a, a2, anonymous, a01, ano01, c, d, e, g, f, h] assertObjListEqual(session, expected) def test_filter(session): session.ax = 'ax' - assertObjListEqual(session.filter(), [b, b12, a, a01, 'c', {}, e, g, f, 'ax']) - assertObjListEqual(session.filter('a*'), [a, a01, 'ax']) + assertObjListEqual(session.filter(), [b, b024, a, a2, anonymous, a01, ano01, 'c', {}, e, g, f, h, 'ax']) + assertObjListEqual(session.filter('a*'), [a, a2, anonymous, a01, ano01, 'ax']) assert list(session.filter('a*', dict)) == [] assert list(session.filter('a*', str)) == ['ax'] - assert list(session.filter('a*', Axis)) == [a] - assert list(session.filter(kind=Axis)) == [b, a] + assert list(session.filter('a*', Axis)) == [a, a2, anonymous] + assert list(session.filter(kind=Axis)) == [b, a, a2, anonymous] assert list(session.filter('a01', Group)) == [a01] - assert list(session.filter(kind=Group)) == [b12, a01] - assertObjListEqual(session.filter(kind=Array), [e, g, f]) + assert list(session.filter(kind=Group)) == [b024, a01, ano01] + assertObjListEqual(session.filter(kind=Array), [e, g, f, h]) assert list(session.filter(kind=dict)) == [{}] - assert list(session.filter(kind=(Axis, Group))) == [b, b12, a, a01] + assert list(session.filter(kind=(Axis, Group))) == [b, b024, a, a2, anonymous, a01, ano01] def test_names(session): - assert session.names == ['a', 'a01', 'b', 'b12', 'c', 'd', 'e', 'f', 'g'] + assert session.names == ['a', 'a01', 'a2', 'ano01', 'anonymous', 'b', 'b024', + 'c', 'd', 'e', 'f', 'g', 'h'] # add them in the "wrong" order session.add(i='i') - session.add(h='h') - assert session.names == ['a', 'a01', 'b', 'b12', 'c', 'd', 'e', 'f', 'g', 'h', 'i'] + session.add(j='j') + assert session.names == ['a', 'a01', 'a2', 'ano01', 'anonymous', 'b', 'b024', + 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] -def test_h5_io(tmpdir, session, meta): - fpath = tmp_path(tmpdir, 'test_session.h5') +def _test_io(fpath, session, meta, engine): + is_excel_or_csv = 'excel' in engine or 'csv' in engine + + kind = Array if is_excel_or_csv else (Axis, Group, Array) + session = session.filter(kind=kind) + session.meta = meta - session.save(fpath) + # save and load + session.save(fpath, engine=engine) s = Session() - s.load(fpath) - # HDF does *not* keep ordering (ie, keys are always sorted + - # read Axis objects, then Groups objects and finally Array objects) - assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g'] - assert s.meta == meta + s.load(fpath, engine=engine) + # use Session.names instead of Session.keys because CSV, Excel and HDF do *not* keep ordering + assert s.names == session.names + assert s.equals(session) + if not PY2 and not is_excel_or_csv: + for key in s.filter(kind=Axis).keys(): + assert s[key].dtype == session[key].dtype + if engine != 'pandas_excel': + assert s.meta == meta # update a Group + an Axis + an array (overwrite=False) - a2 = Axis('a=0..2') - a2_01 = a2['0,1'] >> 'a01' - e2 = ndtest((a2, 'b=b0..b2')) - Session(a=a2, a01=a2_01, e=e2).save(fpath, overwrite=False) + a3 = Axis('a=0..3') + a3_01 = a3['0,1'] >> 'a01' + e2 = ndtest((a3, 'b=b0..b2')) + Session(a=a3, a01=a3_01, e=e2).save(fpath, overwrite=False, engine=engine) s = Session() - s.load(fpath) - assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g'] - assert s['a'].equals(a2) - assert all(s['a01'] == a2_01) + s.load(fpath, engine=engine) + if engine == 'pandas_excel': + # Session.save() via engine='pandas_excel' always overwrite the output Excel files + assert s.names == ['e'] + elif is_excel_or_csv: + assert s.names == ['e', 'f', 'g', 'h'] + else: + assert s.names == session.names + assert s['a'].equals(a3) + assert s['a01'].equals(a3_01) assert_array_nan_equal(s['e'], e2) - assert s.meta == meta + if engine != 'pandas_excel': + assert s.meta == meta # load only some objects + session.save(fpath, engine=engine) s = Session() - s.load(fpath, names=['a', 'a01', 'e', 'f']) - assert list(s.keys()) == ['a', 'a01', 'e', 'f'] - assert s.meta == meta - + names_to_load = ['e', 'f'] if is_excel_or_csv else ['a', 'a01', 'a2', 'anonymous', 'e', 'f'] + s.load(fpath, names=names_to_load, engine=engine) + assert s.names == names_to_load + if engine != 'pandas_excel': + assert s.meta == meta -def test_xlsx_pandas_io(tmpdir, session, meta): - fpath = tmp_path(tmpdir, 'test_session.xlsx') - session.meta = meta - session.save(fpath, engine='pandas_excel') - s = Session() - s.load(fpath, engine='pandas_excel') - assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'g', 'f'] - assert s.meta == meta +def test_h5_io(tmpdir, session, meta): + fpath = tmp_path(tmpdir, 'test_session.h5') + _test_io(fpath, session, meta, engine='pandas_hdf') - # update a Group + an Axis + an array - # XXX: overwrite is not taken into account by the pandas_excel engine - a2 = Axis('a=0..2') - a2_01 = a2['0,1'] >> 'a01' - e2 = ndtest((a2, 'b=b0..b2')) - Session(a=a2, a01=a2_01, e=e2, meta=meta).save(fpath, engine='pandas_excel') - s = Session() - s.load(fpath, engine='pandas_excel') - assert list(s.keys()) == ['a', 'a01', 'e'] - assert s['a'].equals(a2) - assert all(s['a01'] == a2_01) - assert_array_nan_equal(s['e'], e2) - assert s.meta == meta - # load only some objects - session.save(fpath, engine='pandas_excel') - s = Session() - s.load(fpath, names=['a', 'a01', 'e', 'f'], engine='pandas_excel') - assert list(s.keys()) == ['a', 'a01', 'e', 'f'] - assert s.meta == meta +def test_xlsx_pandas_io(tmpdir, session, meta): + fpath = tmp_path(tmpdir, 'test_session.xlsx') + _test_io(fpath, session, meta, engine='pandas_excel') @needs_xlwings def test_xlsx_xlwings_io(tmpdir, session, meta): - fpath = tmp_path(tmpdir, 'test_session_xw.xlsx') - session.meta = meta - # test save when Excel file does not exist - session.save(fpath, engine='xlwings_excel') - - s = Session() - s.load(fpath, engine='xlwings_excel') - # ordering is only kept if the file did not exist previously (otherwise the ordering is left intact) - assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'g', 'f'] - assert s.meta == meta - - # update a Group + an Axis + an array (overwrite=False) - a2 = Axis('a=0..2') - a2_01 = a2['0,1'] >> 'a01' - e2 = ndtest((a2, 'b=b0..b2')) - Session(a=a2, a01=a2_01, e=e2).save(fpath, engine='xlwings_excel', overwrite=False) - s = Session() - s.load(fpath, engine='xlwings_excel') - assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'g', 'f'] - assert s['a'].equals(a2) - assert all(s['a01'] == a2_01) - assert_array_nan_equal(s['e'], e2) - assert s.meta == meta - - # load only some objects - s = Session() - s.load(fpath, names=['a', 'a01', 'e', 'f'], engine='xlwings_excel') - assert list(s.keys()) == ['a', 'a01', 'e', 'f'] - assert s.meta == meta + fpath = tmp_path(tmpdir, 'test_session.xlsx') + _test_io(fpath, session, meta, engine='xlwings_excel') def test_csv_io(tmpdir, session, meta): + fpath = tmp_path(tmpdir, 'test_session_csv') try: - fpath = tmp_path(tmpdir, 'test_session_csv') - session.meta = meta - session.to_csv(fpath) + _test_io(fpath, session, meta, engine='pandas_csv') - # test loading a directory - s = Session() - s.load(fpath, engine='pandas_csv') - # CSV cannot keep ordering (so we always sort keys) - # Also, Axis objects are read first, then Groups objects and finally Array objects - assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g'] - assert s.meta == meta + names = session.filter(kind=(Axis, Group, Array)).names # test loading with a pattern pattern = os.path.join(fpath, '*.csv') s = Session(pattern) - # s = Session() - # s.load(pattern) - assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g'] + assert s.names == names assert s.meta == meta # create an invalid .csv file @@ -284,13 +260,7 @@ def test_csv_io(tmpdir, session, meta): # test loading a pattern, ignoring invalid/unsupported files s = Session() s.load(pattern, ignore_exceptions=True) - assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g'] - assert s.meta == meta - - # load only some objects - s = Session() - s.load(fpath, names=['a', 'a01', 'e', 'f']) - assert list(s.keys()) == ['a', 'a01', 'e', 'f'] + assert s.names == names assert s.meta == meta finally: shutil.rmtree(fpath) @@ -298,34 +268,7 @@ def test_csv_io(tmpdir, session, meta): def test_pickle_io(tmpdir, session, meta): fpath = tmp_path(tmpdir, 'test_session.pkl') - session.meta = meta - session.save(fpath) - - s = Session() - s.load(fpath, engine='pickle') - assert list(s.keys()) == ['b', 'a', 'b12', 'a01', 'e', 'g', 'f'] - assert s.meta == meta - - # update a Group + an Axis + an array (overwrite=False) - a2 = Axis('a=0..2') - a2_01 = a2['0,1'] >> 'a01' - e2 = ndtest((a2, 'b=b0..b2')) - Session(a=a2, a01=a2_01, e=e2).save(fpath, overwrite=False) - s = Session() - s.load(fpath, engine='pickle') - assert list(s.keys()) == ['b', 'a', 'b12', 'a01', 'e', 'g', 'f'] - assert s['a'].equals(a2) - assert isinstance(a2_01, Group) - assert isinstance(s['a01'], Group) - assert s['a01'].eval() == a2_01.eval() - assert_array_nan_equal(s['e'], e2) - assert s.meta == meta - - # load only some objects - s = Session() - s.load(fpath, names=['a', 'a01', 'e', 'f'], engine='pickle') - assert list(s.keys()) == ['a', 'a01', 'e', 'f'] - assert s.meta == meta + _test_io(fpath, session, meta, engine='pickle') def test_to_globals(session): @@ -362,66 +305,76 @@ def test_to_globals(session): def test_element_equals(session): sess = session.filter(kind=(Axis, Group, Array)) - expected = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), - ('e', e), ('g', g), ('f', f)]) + expected = Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous), + ('a01', a01), ('ano01', ano01), ('e', e), ('g', g), ('f', f), ('h', h)]) assert all(sess.element_equals(expected)) - other = Session({'a': a, 'a01': a01, 'e': e, 'f': f}) + other = Session([('a', a), ('a2', a2), ('anonymous', anonymous), + ('a01', a01), ('ano01', ano01), ('e', e), ('f', f), ('h', h)]) res = sess.element_equals(other) assert res.ndim == 1 assert res.axes.names == ['name'] - assert np.array_equal(res.axes.labels[0], ['b', 'b12', 'a', 'a01', 'e', 'g', 'f']) - assert list(res) == [False, False, True, True, True, False, True] + assert np.array_equal(res.axes.labels[0], ['b', 'b024', 'a', 'a2', 'anonymous', 'a01', 'ano01', + 'e', 'g', 'f', 'h']) + assert list(res) == [False, False, True, True, True, True, True, True, False, True, True] e2 = e.copy() e2.i[1, 1] = 42 - other = Session({'a': a, 'a01': a01, 'e': e2, 'f': f}) + other = Session([('a', a), ('a2', a2), ('anonymous', anonymous), + ('a01', a01), ('ano01', ano01), ('e', e2), ('f', f), ('h', h)]) res = sess.element_equals(other) assert res.axes.names == ['name'] - assert np.array_equal(res.axes.labels[0], ['b', 'b12', 'a', 'a01', 'e', 'g', 'f']) - assert list(res) == [False, False, True, True, False, False, True] + assert np.array_equal(res.axes.labels[0], ['b', 'b024', 'a', 'a2', 'anonymous', 'a01', 'ano01', + 'e', 'g', 'f', 'h']) + assert list(res) == [False, False, True, True, True, True, True, False, False, True, True] def test_eq(session): sess = session.filter(kind=(Axis, Group, Array)) - expected = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), - ('e', e), ('g', g), ('f', f)]) + expected = Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous), + ('a01', a01), ('ano01', ano01), ('e', e), ('g', g), ('f', f), ('h', h)]) assert all([item.all() if isinstance(item, Array) else item for item in (sess == expected).values()]) - other = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), ('e', e), ('f', f)]) + other = Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous), + ('a01', a01), ('ano01', ano01), ('e', e), ('f', f), ('h', h)]) res = sess == other - assert list(res.keys()) == ['b', 'b12', 'a', 'a01', 'e', 'g', 'f'] + assert list(res.keys()) == ['b', 'b024', 'a', 'a2', 'anonymous', 'a01', 'ano01', + 'e', 'g', 'f', 'h'] assert [item.all() if isinstance(item, Array) else item - for item in res.values()] == [True, True, True, True, True, False, True] + for item in res.values()] == [True, True, True, True, True, True, True, True, False, True, True] e2 = e.copy() e2.i[1, 1] = 42 - other = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), ('e', e2), ('f', f)]) + other = Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous), + ('a01', a01), ('ano01', ano01), ('e', e2), ('f', f), ('h', h)]) res = sess == other assert [item.all() if isinstance(item, Array) else item - for item in res.values()] == [True, True, True, True, False, False, True] + for item in res.values()] == [True, True, True, True, True, True, True, False, False, True, True] def test_ne(session): sess = session.filter(kind=(Axis, Group, Array)) - expected = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), - ('e', e), ('g', g), ('f', f)]) + expected = Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous), + ('a01', a01), ('ano01', ano01), ('e', e), ('g', g), ('f', f), ('h', h)]) assert ([(~item).all() if isinstance(item, Array) else not item for item in (sess != expected).values()]) - other = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), ('e', e), ('f', f)]) + other = Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous), + ('a01', a01), ('ano01', ano01), ('e', e), ('f', f), ('h', h)]) res = sess != other - assert list(res.keys()) == ['b', 'b12', 'a', 'a01', 'e', 'g', 'f'] + assert list(res.keys()) == ['b', 'b024', 'a', 'a2', 'anonymous', 'a01', 'ano01', + 'e', 'g', 'f', 'h'] assert [(~item).all() if isinstance(item, Array) else not item - for item in res.values()] == [True, True, True, True, True, False, True] + for item in res.values()] == [True, True, True, True, True, True, True, True, False, True, True] e2 = e.copy() e2.i[1, 1] = 42 - other = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), ('e', e2), ('f', f)]) + other = Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous), + ('a01', a01), ('ano01', ano01), ('e', e2), ('f', f), ('h', h)]) res = sess != other assert [(~item).all() if isinstance(item, Array) else not item - for item in res.values()] == [True, True, True, True, False, False, True] + for item in res.values()] == [True, True, True, True, True, True, True, False, False, True, True] def test_sub(session): @@ -560,27 +513,27 @@ def test_local_arrays(): def test_global_arrays(): # exclude private global arrays s = global_arrays() - s_expected = Session([('e', e), ('e2', e2), ('f', f), ('g', g)]) + s_expected = Session([('e', e), ('f', f), ('g', g), ('h', h)]) assert s.equals(s_expected) # all global arrays s = global_arrays(include_private=True) - s_expected = Session([('e', e), ('_e', _e), ('e2', e2), ('f', f), ('g', g)]) + s_expected = Session([('e', e), ('_e', _e), ('f', f), ('g', g), ('h', h)]) assert s.equals(s_expected) def test_arrays(): - h = ndtest(2) - _h = ndtest(3) + i = ndtest(2) + _i = ndtest(3) # exclude private arrays s = arrays() - s_expected = Session([('e', e), ('e2', e2), ('f', f), ('g', g), ('h', h)]) + s_expected = Session([('e', e), ('f', f), ('g', g), ('h', h), ('i', i)]) assert s.equals(s_expected) # all arrays s = arrays(include_private=True) - s_expected = Session([('_e', _e), ('_h', _h), ('e', e), ('e2', e2), ('f', f), ('g', g), ('h', h)]) + s_expected = Session([('_e', _e), ('_i', _i), ('e', e), ('f', f), ('g', g), ('h', h), ('i', i)]) assert s.equals(s_expected) From 17c9540cb32cb69c0436fc40189a648f9394c746 Mon Sep 17 00:00:00 2001 From: Alix Damman Date: Wed, 16 Oct 2019 10:54:02 +0200 Subject: [PATCH 2/5] fix #803 : made it possible to export and read two or more Axis objects with the same name or anonymous using the HDF format --- doc/source/changes/version_0_32.rst.inc | 3 +++ larray/inout/hdf.py | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/changes/version_0_32.rst.inc b/doc/source/changes/version_0_32.rst.inc index b8d87b480..59b0ab69a 100644 --- a/doc/source/changes/version_0_32.rst.inc +++ b/doc/source/changes/version_0_32.rst.inc @@ -61,3 +61,6 @@ Fixes * fixed :py:obj:`zip_array_values()` and :py:obj:`zip_array_items()` functions not available when importing the entire larray library as ``from larray import *`` (closes :issue:`816`). + +* fixed wrong axes and groups names when loading a session from an HDF file + (closes :issue:`803`). diff --git a/larray/inout/hdf.py b/larray/inout/hdf.py index 3407471d1..db68e10fd 100644 --- a/larray/inout/hdf.py +++ b/larray/inout/hdf.py @@ -132,10 +132,8 @@ def _read_item(self, key, type, *args, **kwargs): hdf_key = '/' + key elif type == 'Axis': hdf_key = '__axes__/' + key - kwargs['name'] = key elif type == 'Group': hdf_key = '__groups__/' + key - kwargs['name'] = key else: raise TypeError() return read_hdf(self.handle, hdf_key, *args, **kwargs) From 8daa25356c8c0f69a5f05a157e21d73ced749432 Mon Sep 17 00:00:00 2001 From: Alix Damman Date: Fri, 18 Oct 2019 11:58:59 +0200 Subject: [PATCH 3/5] ugly hack to avoid anonymous axes converted as axes with name 'Unnamed: x' by pandas --- larray/inout/pandas.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py index d24a83478..b37527091 100644 --- a/larray/inout/pandas.py +++ b/larray/inout/pandas.py @@ -325,13 +325,18 @@ def df_asarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header= series.name = df.index.name if sort_rows: raise ValueError('sort_rows=True is not valid for 1D arrays. Please use sort_columns instead.') - return from_series(series, sort_rows=sort_columns) + res = from_series(series, sort_rows=sort_columns) else: axes_names = [decode(name, 'utf8') if isinstance(name, basestring) else name for name in df.index.names] unfold_last_axis_name = isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1] - return from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header, - unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs) + res = from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header, + unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs) + + # ugly hack to avoid anonymous axes converted as axes with name 'Unnamed: x' by pandas + # TODO : find a more robust and elegant solution + res = res.rename({axis: None for axis in res.axes if isinstance(axis.name, basestring) and 'Unnamed' in axis.name}) + return res # #################################### # From 640274086d79755f396b7adb9a4422582d0f07c8 Mon Sep 17 00:00:00 2001 From: Alix Damman Date: Fri, 18 Oct 2019 15:07:33 +0200 Subject: [PATCH 4/5] fix #815 : Dropped possibility to read and export Axis and Group objects from/to CSV and Excel files --- doc/source/changes/version_0_32.rst.inc | 7 +- doc/source/tutorial/tutorial_IO.ipyml | 8 +- doc/source/tutorial/tutorial_IO.ipynb | 8 +- larray/core/session.py | 26 ++--- larray/inout/csv.py | 46 +-------- larray/inout/excel.py | 94 +----------------- larray/inout/pandas.py | 54 ---------- larray/tests/data/demography_eurostat.xlsx | Bin 15834 -> 14349 bytes .../data/demography_eurostat/__axes__.csv | 6 -- .../data/demography_eurostat/__groups__.csv | 4 - larray/tests/test_session.py | 2 +- 11 files changed, 23 insertions(+), 232 deletions(-) delete mode 100644 larray/tests/data/demography_eurostat/__axes__.csv delete mode 100644 larray/tests/data/demography_eurostat/__groups__.csv diff --git a/doc/source/changes/version_0_32.rst.inc b/doc/source/changes/version_0_32.rst.inc index 59b0ab69a..e8ce40b60 100644 --- a/doc/source/changes/version_0_32.rst.inc +++ b/doc/source/changes/version_0_32.rst.inc @@ -10,7 +10,12 @@ Syntax changes Backward incompatible changes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -* other backward incompatible changes +* Because it was broken, the possibility to dump and load Axis and Group objects + contained in a session has been removed for the CSV and Excel formats. + Fixing it would have taken too much time considering it is very rarely used + (no one complains it was broken) so the decision to remove it was taken. + However, this is still possible using the HDF format. + Closes :issue:`815`. New features diff --git a/doc/source/tutorial/tutorial_IO.ipyml b/doc/source/tutorial/tutorial_IO.ipyml index 27012c187..ff325d696 100644 --- a/doc/source/tutorial/tutorial_IO.ipyml +++ b/doc/source/tutorial/tutorial_IO.ipyml @@ -660,13 +660,7 @@ cells: - markdown: |
- Note: Concerning the CSV and Excel formats: - - - all Axis objects are saved together in the same Excel sheet (CSV file) named `__axes__(.csv)` - - all Group objects are saved together in the same Excel sheet (CSV file) named `__groups__(.csv)` - - metadata is saved in one Excel sheet (CSV file) named `__metadata__(.csv)` - - These sheet (CSV file) names cannot be changed. + Note: Concerning the CSV and Excel formats, the metadata is saved in one Excel sheet (CSV file) named `__metadata__(.csv)`. This sheet (CSV file) name cannot be changed.
diff --git a/doc/source/tutorial/tutorial_IO.ipynb b/doc/source/tutorial/tutorial_IO.ipynb index 6bf0e7b41..be0067840 100644 --- a/doc/source/tutorial/tutorial_IO.ipynb +++ b/doc/source/tutorial/tutorial_IO.ipynb @@ -961,13 +961,7 @@ "metadata": {}, "source": [ "
\n", - " Note: Concerning the CSV and Excel formats: \n", - " \n", - " - all Axis objects are saved together in the same Excel sheet (CSV file) named `__axes__(.csv)` \n", - " - all Group objects are saved together in the same Excel sheet (CSV file) named `__groups__(.csv)` \n", - " - metadata is saved in one Excel sheet (CSV file) named `__metadata__(.csv)` \n", - " \n", - " These sheet (CSV file) names cannot be changed. \n", + " Note: Concerning the CSV and Excel formats, the metadata is saved in one Excel sheet (CSV file) named `__metadata__(.csv)`. This sheet (CSV file) name cannot be changed. \n", "
" ] }, diff --git a/larray/core/session.py b/larray/core/session.py index 748cccd98..4ebb03322 100644 --- a/larray/core/session.py +++ b/larray/core/session.py @@ -344,7 +344,8 @@ def __setstate__(self, d): def load(self, fname, names=None, engine='auto', display=False, **kwargs): r""" - Load Array, Axis and Group objects from a file, or several .csv files. + Load Array objects from a file, or several .csv files (all formats). + Load also Axis and Group objects from a file (HDF and pickle formats). WARNING: never load a file using the pickle engine (.pkl or .pickle) from an untrusted source, as it can lead to arbitrary code execution. @@ -431,7 +432,8 @@ def load(self, fname, names=None, engine='auto', display=False, **kwargs): def save(self, fname, names=None, engine='auto', overwrite=True, display=False, **kwargs): r""" - Dumps Array, Axis and Group objects from the current session to a file. + Dumps Array objects from the current session to a file (all formats). + Dumps also Axis and Group objects from the current session to a file (HDF and pickle format). Parameters ---------- @@ -450,10 +452,6 @@ def save(self, fname, names=None, engine='auto', overwrite=True, display=False, display : bool, optional Whether or not to display which file is being worked on. Defaults to False. - Notes - ----- - See Notes section from :py:meth:`~Session.to_csv` and :py:meth:`~Session.to_excel`. - Examples -------- >>> # axes @@ -652,15 +650,15 @@ def to_hdf(self, fname, names=None, overwrite=True, display=False, **kwargs): def to_excel(self, fname, names=None, overwrite=True, display=False, **kwargs): r""" - Dumps Array, Axis and Group objects from the current session to an Excel file. + Dumps Array objects from the current session to an Excel file. Parameters ---------- fname : str Path of the file for the dump. names : list of str or None, optional - Names of Array/Axis/Group objects to dump. - Defaults to all objects present in the Session. + Names of Array objects to dump. + Defaults to all Array objects present in the Session. overwrite: bool, optional Whether or not to overwrite an existing file, if any. If False, file is updated. Defaults to True. display : bool, optional @@ -669,8 +667,6 @@ def to_excel(self, fname, names=None, overwrite=True, display=False, **kwargs): Notes ----- - each array is saved in a separate sheet - - all Axis objects are saved together in the same sheet named __axes__ - - all Group objects are saved together in the same sheet named __groups__ - all session metadata is saved in the same sheet named __metadata__ Examples @@ -700,23 +696,21 @@ def to_excel(self, fname, names=None, overwrite=True, display=False, **kwargs): def to_csv(self, fname, names=None, display=False, **kwargs): r""" - Dumps Array, Axis and Group objects from the current session to CSV files. + Dumps Array objects from the current session to CSV files. Parameters ---------- fname : str Path for the directory that will contain CSV files. names : list of str or None, optional - Names of Array/Axis/Group objects to dump. - Defaults to all objects present in the Session. + Names of Array objects to dump. + Defaults to all Array objects present in the Session. display : bool, optional Whether or not to display which file is being worked on. Defaults to False. Notes ----- - each array is saved in a separate file - - all Axis objects are saved together in the same CSV file named __axes__.csv - - all Group objects are saved together in the same CSV file named __groups__.csv - all session metadata is saved in the same CSV file named __metadata__.csv Examples diff --git a/larray/inout/csv.py b/larray/inout/csv.py index dd2512a3a..847c06f54 100644 --- a/larray/inout/csv.py +++ b/larray/inout/csv.py @@ -17,7 +17,7 @@ from larray.util.misc import skip_comment_cells, strip_rows, csv_open, deprecate_kwarg from larray.inout.session import register_file_handler from larray.inout.common import _get_index_col, FileHandler -from larray.inout.pandas import df_asarray, _axes_to_df, _df_to_axes, _groups_to_df, _df_to_groups +from larray.inout.pandas import df_asarray from larray.example import get_example_filepath @@ -284,26 +284,9 @@ def _to_filepath(self, key): else: return key - def _load_axes_and_groups(self): - # load all axes - filepath_axes = self._to_filepath('__axes__') - if os.path.isfile(filepath_axes): - df = pd.read_csv(filepath_axes, sep=self.sep) - self.axes = _df_to_axes(df) - else: - self.axes = OrderedDict() - # load all groups - filepath_groups = self._to_filepath('__groups__') - if os.path.isfile(filepath_groups): - df = pd.read_csv(filepath_groups, sep=self.sep) - self.groups = _df_to_groups(df, self.axes) - else: - self.groups = OrderedDict() - def _open_for_read(self): if self.directory and not os.path.isdir(self.directory): raise ValueError("Directory '{}' does not exist".format(self.directory)) - self._load_axes_and_groups() def _open_for_write(self): if self.directory is not None: @@ -312,8 +295,6 @@ def _open_for_write(self): except OSError: if not os.path.isdir(self.directory): raise ValueError("Path {} must represent a directory".format(self.directory)) - self.axes = OrderedDict() - self.groups = OrderedDict() def list_items(self): fnames = glob(self.pattern) if self.pattern is not None else [] @@ -327,36 +308,18 @@ def list_items(self): fnames.remove('__metadata__') except: pass - try: - fnames.remove('__axes__') - items = [(name, 'Axis') for name in sorted(self.axes.keys())] - except: - pass - try: - fnames.remove('__groups__') - items += [(name, 'Group') for name in sorted(self.groups.keys())] - except: - pass items += [(name, 'Array') for name in fnames] return items def _read_item(self, key, type, *args, **kwargs): if type == 'Array': return read_csv(self._to_filepath(key), *args, **kwargs) - elif type == 'Axis': - return self.axes[key] - elif type == 'Group': - return self.groups[key] else: raise TypeError() def _dump_item(self, key, value, *args, **kwargs): if isinstance(value, Array): value.to_csv(self._to_filepath(key), *args, **kwargs) - elif isinstance(value, Axis): - self.axes[key] = value - elif isinstance(value, Group): - self.groups[key] = value else: raise TypeError() @@ -374,12 +337,7 @@ def _dump_metadata(self, metadata): meta.to_csv(self._to_filepath('__metadata__'), sep=self.sep, wide=False, value_name='') def save(self): - if len(self.axes) > 0: - df = _axes_to_df(self.axes.values()) - df.to_csv(self._to_filepath('__axes__'), sep=self.sep, index=False) - if len(self.groups) > 0: - df = _groups_to_df(self.groups.values()) - df.to_csv(self._to_filepath('__groups__'), sep=self.sep, index=False) + pass def close(self): pass diff --git a/larray/inout/excel.py b/larray/inout/excel.py index 1ceaaaf49..df134523b 100644 --- a/larray/inout/excel.py +++ b/larray/inout/excel.py @@ -19,7 +19,7 @@ from larray.util.misc import deprecate_kwarg from larray.inout.session import register_file_handler from larray.inout.common import _get_index_col, FileHandler -from larray.inout.pandas import df_asarray, _axes_to_df, _df_to_axes, _groups_to_df, _df_to_groups +from larray.inout.pandas import df_asarray from larray.inout.xw_excel import open_excel from larray.example import get_example_filepath @@ -231,33 +231,12 @@ class PandasExcelHandler(FileHandler): """ def __init__(self, fname, overwrite_file=False): super(PandasExcelHandler, self).__init__(fname, overwrite_file) - self.axes = None - self.groups = None - - def _load_axes_and_groups(self): - # load all axes - sheet_axes = '__axes__' - if sheet_axes in self.handle.sheet_names: - df = pd.read_excel(self.handle, sheet_axes, index_col=None) - self.axes = _df_to_axes(df) - else: - self.axes = OrderedDict() - # load all groups - sheet_groups = '__groups__' - if sheet_groups in self.handle.sheet_names: - df = pd.read_excel(self.handle, sheet_groups, index_col=None) - self.groups = _df_to_groups(df, self.axes) - else: - self.groups = OrderedDict() def _open_for_read(self): self.handle = pd.ExcelFile(self.fname) - self._load_axes_and_groups() def _open_for_write(self): self.handle = pd.ExcelWriter(self.fname) - self.axes = OrderedDict() - self.groups = OrderedDict() def list_items(self): sheet_names = self.handle.sheet_names @@ -266,16 +245,6 @@ def list_items(self): sheet_names.remove('__metadata__') except: pass - try: - sheet_names.remove('__axes__') - items = [(name, 'Axis') for name in sorted(self.axes.keys())] - except: - pass - try: - sheet_names.remove('__groups__') - items += [(name, 'Group') for name in sorted(self.groups.keys())] - except: - pass items += [(name, 'Array') for name in sheet_names] return items @@ -283,10 +252,6 @@ def _read_item(self, key, type, *args, **kwargs): if type == 'Array': df = self.handle.parse(key, *args, **kwargs) return df_asarray(df, raw=True) - elif type == 'Axis': - return self.axes[key] - elif type == 'Group': - return self.groups[key] else: raise TypeError() @@ -294,10 +259,6 @@ def _dump_item(self, key, value, *args, **kwargs): kwargs['engine'] = 'xlsxwriter' if isinstance(value, Array): value.to_excel(self.handle, key, *args, **kwargs) - elif isinstance(value, Axis): - self.axes[key] = value - elif isinstance(value, Group): - self.groups[key] = value else: raise TypeError() @@ -315,12 +276,7 @@ def _dump_metadata(self, metadata): metadata.to_excel(self.handle, '__metadata__', engine='xlsxwriter', wide=False, value_name='') def save(self): - if len(self.axes) > 0: - df = _axes_to_df(self.axes.values()) - df.to_excel(self.handle, '__axes__', index=False, engine='xlsxwriter') - if len(self.groups) > 0: - df = _groups_to_df(self.groups.values()) - df.to_excel(self.handle, '__groups__', index=False, engine='xlsxwriter') + pass def close(self): self.handle.close() @@ -333,36 +289,16 @@ class XLWingsHandler(FileHandler): """ def __init__(self, fname, overwrite_file=False): super(XLWingsHandler, self).__init__(fname, overwrite_file) - self.axes = None - self.groups = None def _get_original_file_name(self): # for XLWingsHandler, no need to create a temporary file, the job is already done in the Workbook class pass - def _load_axes_and_groups(self): - # load all axes - sheet_axes = '__axes__' - if sheet_axes in self.handle: - df = self.handle[sheet_axes][:].options(pd.DataFrame, index=False).value - self.axes = _df_to_axes(df) - else: - self.axes = OrderedDict() - # load all groups - sheet_groups = '__groups__' - if sheet_groups in self.handle: - df = self.handle[sheet_groups][:].options(pd.DataFrame, index=False).value - self.groups = _df_to_groups(df, self.axes) - else: - self.groups = OrderedDict() - def _open_for_read(self): self.handle = open_excel(self.fname) - self._load_axes_and_groups() def _open_for_write(self): self.handle = open_excel(self.fname, overwrite_file=self.overwrite_file) - self._load_axes_and_groups() def list_items(self): sheet_names = self.handle.sheet_names() @@ -371,36 +307,18 @@ def list_items(self): sheet_names.remove('__metadata__') except: pass - try: - sheet_names.remove('__axes__') - items = [(name, 'Axis') for name in sorted(self.axes.keys())] - except: - pass - try: - sheet_names.remove('__groups__') - items += [(name, 'Group') for name in sorted(self.groups.keys())] - except: - pass items += [(name, 'Array') for name in sheet_names] return items def _read_item(self, key, type, *args, **kwargs): if type == 'Array': return self.handle[key].load(*args, **kwargs) - elif type == 'Axis': - return self.axes[key] - elif type == 'Group': - return self.groups[key] else: raise TypeError() def _dump_item(self, key, value, *args, **kwargs): if isinstance(value, Array): self.handle[key] = value.dump(*args, **kwargs) - elif isinstance(value, Axis): - self.axes[key] = value - elif isinstance(value, Group): - self.groups[key] = value else: raise TypeError() @@ -418,14 +336,6 @@ def _dump_metadata(self, metadata): self.handle['__metadata__'] = metadata.dump(wide=False, value_name='') def save(self): - if len(self.axes) > 0: - df = _axes_to_df(self.axes.values()) - self.handle['__axes__'] = '' - self.handle['__axes__'][:].options(pd.DataFrame, index=False).value = df - if len(self.groups) > 0: - df = _groups_to_df(self.groups.values()) - self.handle['__groups__'] = '' - self.handle['__groups__'][:].options(pd.DataFrame, index=False).value = df self.handle.save() def close(self): diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py index b37527091..edd74a327 100644 --- a/larray/inout/pandas.py +++ b/larray/inout/pandas.py @@ -337,57 +337,3 @@ def df_asarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header= # TODO : find a more robust and elegant solution res = res.rename({axis: None for axis in res.axes if isinstance(axis.name, basestring) and 'Unnamed' in axis.name}) return res - - -# #################################### # -# SERIES <--> AXIS, GROUP, META # -# #################################### # - -def _axis_to_series(axis, dtype=None): - return pd.Series(data=axis.labels, name=str(axis), dtype=dtype) - - -def _series_to_axis(series): - return Axis(labels=series.values, name=series.name) - - -def _group_to_series(group, dtype=None): - name = group.name if group.name is not None else '{?}' - if group.axis.name is None: - raise ValueError("Cannot save a group with an anonymous associated axis") - name += '@{}'.format(group.axis.name) - return pd.Series(data=group.eval(), name=name, dtype=dtype) - - -def _series_to_group(series, axis): - name = series.name.split('@')[0] - return LGroup(key=series.values, name=name, axis=axis) - - -# ######################################## # -# DATAFRAME <--> AXES, GROUPS, META # -# ######################################## # - -def _df_to_axes(df): - return OrderedDict([(col_name, _series_to_axis(df[col_name])) for col_name in df.columns.values]) - - -def _axes_to_df(axes): - # set dtype to np.object otherwise pd.concat below may convert an int row/column as float - # if trailing NaN need to be added - return pd.concat([_axis_to_series(axis, dtype=np.object) for axis in axes], axis=1) - - -def _df_to_groups(df, axes): - groups = OrderedDict() - for name, values in df.iteritems(): - group_name, axis_name = name.split('@') - axis = axes[axis_name] - groups[group_name] = _series_to_group(values, axis) - return groups - - -def _groups_to_df(groups): - # set dtype to np.object otherwise pd.concat below may convert an int row/column as float - # if trailing NaN need to be added - return pd.concat([_group_to_series(group, dtype=np.object) for group in groups], axis=1) diff --git a/larray/tests/data/demography_eurostat.xlsx b/larray/tests/data/demography_eurostat.xlsx index d7da56f1d7319297f7b0fb0ceba35fc8091c5acd..adcb9ce1e2230a42a43a1210028a6186370c7b09 100644 GIT binary patch delta 4616 zcmZu#cU05QvJXjUp@ouALq`O`1Vky)q$6DfL8V1ddJ&MSUkF7yNDN)+h9Zcd2q?WI z6zQVUrHV)w0)q0;^WN{>d(YkdV`pbSJ7;%hcXnsSLlV8YObsAV4iF894g>-Tfpj** zlKrVbAO+f&T*6?0(CgWTfooF;KUp5VvB2iBSRlf+1$vRCKOmEV8d7`{+gvbUCN`*~ z>W&qAT2FBlwWBi`*V2DBWVR?D>Llz)XJ|{C?z7 zwH{J96euRdO)p@pZNwFte~`XAV&X>p?D+_J`9@k9$R*y|h@Ctb1ZO)FX1Xd>X)XQu zv-iekGSEq$A6Ajk+X2k>)sv<(y>I5ylip`~Pd=q5O{w`nd&Fk9f{x5$WkZ;0qKj#i z{gt(b?)MWm1!p3NP&LIClQkQ@$6g{{s4K%E3Zby<(_Rhcx*D?)IFF1sQFCLDJYx$< z%!n8TF682pgr!3AN-%oy$ zG8}I?)vpzcno$Lob2j*Lf$5I~9}{g-AiGHDZIOrzifF5v#G#MbeL2E~_#z3tPJoxE zkaj$OfHU{E!iWXSEr`9y<8w?c+a{DJDjey{0O@oS6Os* zjIj9Wp8bc&^IyZ@u@w8w_tA@Ti$QiHI{fO;iWulmo9=N>_3|A`%LlE=-HObe2I`pe zCqjK2THHGT3`a% zHE^>ESc{+t1*PRt3_CYFE<*(z5jj;jag6)1EDw!~ONG|#JC=e2aZlz3g_)|jY5pR& zl-gg}qmow;vXaBy+O49}Sh;YanV~!E1zz~47FFLAYS{%>=R}v!IB@uB{=5uKho#U4 zk`n~!PZ0C@1^>H8ES;%0>s!HUe0BW4 zn}5)oB8T#N&PXus1DOxP$wz?Zx{z^R^jt<)I-Kr%W!6otz+EoUz$IF1XN~zR+c}dy z$5dk@HKrVl5oh290{|x@9COz=*qUD~FYngEftS=T?Y(wbZQ)adi5&mhJn8DM_}f;8 zrHq!H>uEza6j9`PxxE=H^G%zCn|As~Uk<#9oXsya((*3weX3f-P{aYJ2{x`axXSsetZpRoU}1D{T(P@x1yzcXDLIm+^rVz(}v3igjn;_A~v26loYpM zu79DlMu1Nkmz*AKJQ~)Y(j1qGq1*fZxr13{#I~+!#YmHs6AaI;Pgi09s`WTyUqH*I zJHH0JV^%sW{L*S6WgxteD0qNU9{K2pPhpn5tUCa1x>X&kX5?Oj#8b0I;zou zU!poxGn^f`Hg2azIk3&#zQEA#9*S9wau`$1AX-C;Es?Bo^3`Y2#ClneevHG5pG9<_*?PCd$)?&saz zXl{!lN;)BClD5t_4<-K8HrSom`)D^N5VUpO_H+e@eI0>!_lgCD@QlzMb=wE1@7wr6 zLzX~S*Wq$ItT4mkFH9GTf5CzKW%ye99+-wGu%EBC=g1s9HhLmZF$rWZvDGK{;~Gfg}3ngdgYFGzQtk$-_X&4161=OkKPBi|F~0#9>+YlyGHm zMCZP6Ib_Sixsu)P8L?b5W%- z0Bqg*?BKX>O{tWm+G;O&#vyCtMo2|td$}0n1d|Ll)*b^zv4)&tdXmp>4&6hAeQIM? z-+=prg1hn|lk40(bc3ZJz-gxrhN&~pUv}ddH?nVq@1)S_h~SpFY1s0qV?5a-BN>IB z#gm1TzFuJaIw)j2aEgvf?6?k&3A`FZFU5sbWovVf?>(PVeTeind=kSg1d(Eq4(cqkXHj`6!CNO?#lp10&$WT>($f;E0sZx$jjL!ze2Q3SWlno&h_d3v|P zI}o#SV5<&IgN8d{;TN?(ISG0QGwxktgfh0i7SH6e6AGP3mvtZp_y6K-^x3tNYLx97cS7BTV0M+(Y$vJT9QZ`4 z1Y$`q-LO8P9Xg@PsuET?DKFzPocLmAnTNCjz;PkPpv%;)@AE|NoBB-LabRJJG^&7y zD(Ujc6#L>u(q`0jCk1QV9e+)YUyBkYMfNgAUM;nFWjR11e&MgqQe3Sp(phF^W!vI2 z_so$Z@py37D2rvvCS7FJUgW1Mxr(Y-0orMTS@d7?ATNwbd5TzzAOngaMT=WquGlo9 zsCk!Svx|{hq!&zxlx~>7&^0z#_CEl0y2qC}pA#HJ4aRkdTAlExzuY zC_qIkK|iG7Jt0wVkmvqNq+RvXxK%~Lz(t9#dG#ZtoF6is$ z^jdCzF+r2vl%m~OOPw&}5tSqQo|@3)*;OtY?gsfyKMxjI6n9aZ?`qd$fO^ zDSW0fyyo<2RT#b4vMS2()vCxm_{grt!eMz&++A1qXR#@?g z(jT|nW#`sk$iF>9cW!!IzGxs@zT!pGtuVCYci-z72i<)jvFsbUlWp&sCI#%c>{~~% zQnq4=>gB_14 zL8#Bh2!YH6&nQVReBifJmf42}Xqv6g(<-ub8jo7a^d|Fn7B`9JVN1A#;rQ)=v{R!-J)4Y z^>0}xS~#C3J?vFYh8FZV7{%pu{BBS}gqXT+dT30lyHz{es8C0-WbS(AQgCVoyMF|2 zevxb2P?kTsR;iGujL5kQ>9aW1j9yf=k}b|K-g2H^uW}r#_|+iK*3tfDQY%;@#PwQ% zZUkMZ)Ofmrgje@`wN-v78$L*H4%2jo{MeY@$Wv|n%Z0YeKhd0M2da|oBC@P@9|B^?XddI$o*t=?xB!Rh^x_a#<^KPPqGm#A^m=)8J?T z65wBfm{#<6A7X3n-Vr0Q=T}rUY~zJ^QP)tvUbrk?9pDrQxRV=Jy#K9n2zJ_E0&qp# z^gHh~V+>22^)(Z1kS@D5=$BSMyF>gWiK#R0s-|H#+f0wF)BXCemI_tCfvQltx}bsF z&6TU=O_)4oAevC`Rs#2R&EoT{KO>^|nph zL-pIl=8W;J`{mQ>dGayozH{LY3n-tSYHb{huIH|?;#TnTOmBL~E9WGl{B9%j=uJ6Q zYfHKf{RWR+A;#7F=W868N)LgbHgCrwRKy+P9ylLz2psiKQGMBoL*ua%>P9V+Hl2Q8 zbB#&j8qG-dbKWZ2=TF^i8Gc<-O=>o_MROhym8Zs~#P@rTQ-ObCU@EXQ=)WZ<@P3MB zRF(KHMSkA@jEql$P# zdqR%0ose2!TjXC(3z8bYs07C&dEj`1i!ks!Lq}hAJ952>=Yh1ONc^01W5D{y-oApp02Z z!2m*}jX3x6LKH4je-R`#>00J;DUb#$JRBh_J40LcdCUM^`bmL}Sc~>8E!TpfYki0& zU2;Dj;@q|oHPwX~hV)JCrg$RpU(38uO!G^7f4S!_X-?>uSEQjRH#{5UDB0Rh@pO)i zH{U*vouj?!GRsO&hMAX-Oz_>-C*9sJ6`5`bj`uBZfybWWf!UOYvN!W{RE~lHVaA*( z3E6t}9rSLwi3$XnuN(A-!y3v*!&tRCr+-GOwtFg<@-O$~*ZU|?#EOVdvvyHC?wghg zRi{AI!cKYe8E`wbZof2NNAjI+)heSE4W06%*}Yd#pi!O>hY;Za%*vQ5RhZ!&n~%OC z9&$4hs9uiJonKe3Jh}|(V9YX^E)UW^mf_t-%BagSs zO9x9A7YA-1C&x-XCzk?I$la@{-w*n|vA4N`GAglEJW?;7>eej%CSrftjnxU-cKlLU ze$ll?0b*A1{O-wPe=wg-DJ^h1+B|25|2$8Mn47JlSiJSUN1P7(h;2jW_chuy#axHS z8?ZhEM#Z2Gg8ngz|K|M7zSO2V^`~q>UXAFoY*hoLV)m)5lxN0u89J`-pSSlDr3Ji* z$so>(&Vm?_6+0Pp8dw_cy$g>*^n;h2?cm#-PH?TEF-%sBG=SMwJy}Ns|L>?4qSKOM zVJ*E`b4xiKn3-Wgt6uTsVYMaH1>5x|Nb(TTy*?hv^WMDnCiCm=MtO2^7FJ`bzWL&T zjM-HJCvhGU0_C>)!KYe1G$f!a2GwvwAxm$2(oiAkl`LT1?4^F^%*LwmtefWtsbZF* z593koXhQ_Kwp6s<8hUeAuR>$%<^fMKybi{@b5)eXB+9KvndT$3!O_VXkLrGwRnGes z5{d6{c_YaJNLP~LIA)srdU<%uVU5^Uzs2%;4t zz}#RGPNv>s8m!ZxkdW@iIZ|TpiyKX7(HAah-S2cilX!H-Iv7Kh(XoQEr;(1x7>~F+ zi2SMJ*`fr$*ob4cFQe)F0<*d)JMxKH;wc#k^uKy%!>=!R$_3fGc*W@P1`T0ptqxV6 zbA{~*_;zQeRcKIS@8}>J-nrthE62{eJN9{Uj$=AICk9I*Xn(!_#$Q)OmLmq5_tFO} zjv3DA_4}C+MUy_5b+FwoN0mo0nrtSuMz7f@UyIF_8AuU@n~Fp&9!FJ+6f!8dQu$GS=qTVa z(kT{$#9@Sh*fqZAlPSaqU&iB&tz9!)eXQt(Nna)pjZ4V(P_I#Euo(dt39@6BVI(TTuC3W!X1~+0KK#*h!H28Myyd#0 z=Y6`3D-2zrBxE}$GMfceQ#JB_Xub5*;lc1KWlvXqi#eXTicSW0rMWiFwa*8f=(i^~ zyd2}a9OR?30;97SNS_E5W8S(KZ|5~c=+y2DMYeqZ{X zO zCq5zZY_h}IJr=yK1f$FZ&#w$gT=0I-WETikmv2$mpI`YST0~jj?EW@IX$E8`pZQhf zU2*x0*ksP_Jd7-i$?K&13~y?Uq|Fy!bQ<1p2l^{~HJ8jU{y9X2KZH^`5_a3X0gWYA zhCB~EsEqpstRWL{l(3ElkrGkztYYZl1 z2JoB>9*BP@1zsvuF<5)y5e97Bb(qj~XUhRZ6Nq^GT|-YJ>8b&3%-u}fi&Knf9?R?@ z+#omNm)&wqIilvVwO>aq?(c0tVM&A|2YD0x<-NHm_y~;aswPQI@il#SHw?GdiNtLq z2*Ec+M7?))7KXkPk74}dhCYgHi-6ABnarunFwA9$P`|;Dxn|$QF5|0?@N37PJyH77)sL=&=@ATs)p|5U| z2(~C{n#s6P0_HJ!mih#FyORt-3EjlJh%G`ET3pFqs&riIY*P|>dRSW^(jg>=uZk~aEd_tH{ z6(1F+2X*b1-wLOp)9lfX8+^g9DlEK8{6JoY8CG1+$mFbdU@_Bw_)LgTg0_wxaXEkt zS-_&Kh%)F+!u$^5^5DsB*2m7(ff(a1PCX!f6UZ_P&AaXX9N6e07)J4$tQosMa&h#Y{7OV$h2?=7SN*Zdw9&G0q>TXK(WJ(ky z>ei`Zg5O3|;!g_pKdAk5XQwVwKhv`9(-&t=XR5Ioyz4E&*L zt8%v8GWMt7w`%I-obzHqi4bskcKKo+jY?dE-vsaCf1Ke;%6ff@gF+%^5oV1&-~5kH z2zojYJH05G1DA@U_8Bg|3&VuKOI*pvW@~{1C-vbSlTLU3=C5$}jjG&cV?6|-nOA<+ z=r}3lRmGN^H;*M2W%xD@{BX23z&pX^Ur@HPUaj`}!Lq@daX}ID7H+qwaNac8v6SES z?_}xkJQle|3C0Ff^#n5CLsAUmFG!011<919Kah<014+)cP%&nvsM=w>nU2!>?|Z9! zEYEv;bKNjThRcqidLafRZCYBe2)9h6} zDnviG$$2V(#;K7RG2qKEd8yt+1$qiImBt!hE5E1>QlZ2yZiNb-`V6U%in7H+qsXCF zjDAI(-Yf|=MIlww#yA}hZP^u8>3rOGB*wtyb%kPoM1c-Yaba4xTkw-#SuP_v$?SQK zbStwc$fty;jRzKZi1z%D%c89Wu%2sp_;Fa=gSSp^;1>)z>p3HX)JhI68&^o5J^2&A z@0_3Z2ctI?n(-wOL}PEdT+_Be&_-SQkFP1SZ-y9pnN-`Zuj#KeQRlI^D*@j~UqpmP zBqY&{$E8TLonV;yUa+-@-7LrLR6G{r+3n;v+d3g8Uz!3~BhXch=8(7T8yP*#z)yIJ zazq^VwrMb5d2<@u?`ew!J?f6fN7hJ9l6t{V#ERFKF)r!+xo$5{OsG;e&J4L$#z^nj_>V6mul}VW zPZ^$KV+q%nWLn*;?{7#r7Y#zzWr-Iz{21-xA?CX#@x35R4rqyO)4}`m=@l?~<>)+& zX_$B~cc|te^1^zDMJnXi6|HWKnRb{IDOS3^vu}f|%z-;~vr&t*dgizpO%y{Dg}|3# z8s+8I>#IC6&05p6)P3E^rE;P*?Z+*{Xmz??ElfIXY1PSjkj5!uyBNgGxuu&nGwwn6 zPSQ-XnKLvNdKY*0qdLG{jVrILL8Wj$-_pcRTc7n%Z7w&dc*7gs!XbLr(^saVjNwQk zlNiPqqQO+NP4@E$&kTfx9`VqsZnA^`PWw8itjeO12kj@T=tFPt&*Pmi8sFMMy6|d8 zIk;+#l14y0cri6=jLle8x1`n``i(bLVk2$X49`w@Q0gOhtli=X_0FWDqXhYMh~(uZ z($QCvdTZ_qzPCWMSKO^Ml?@l1;};+mghm$;^gXva{WumlmF?qC$NAEmJz}|$ydpL5 z%eM5NWCx?HIU-s1Lq9hY5RsB>rryg-@^MY=7GHI(sai(njgvgHq>3ej{HD8|Tuw@2L1IL6-N9?i2mZWZ$o}p}T z{MU*0_KunDI;&$CszHD76sfg*wf)xw71=bZH zZP6Zy>W>s{bvLwFHFbGbrWaCE0So0At`T`4tYL{KL?MU{+DZ?EscmYoHa0>q?+g1; z;%5Wai=$BDj=JjoLfR(2$FD-x^v7$QW}cPW=;LanyQlHtOgk7o(%FD;m8R%?^Wk;z zdZxLfZ?R$oygl);>ySu8Yv7v93s$1nn@ttvaM>*R=odY#2|uz*{#+HUhH-8l#va8j zA8El8P2h_~ktV{4^->wpl8p*ZD$2yjgBr7i;`kLcZhz{})U-K@>Kk_x)0(fxI0}fp zI-(513^R=P_$_#r9_o8@{zMm#T(o^MPbk%<-t~&m;RUC-)^34$USKbbp>1n<1e^ND zYr6ehl21Z1>$?cJbIfCv0Ku1Qxh$`U2b!P+Vd4X~!|@T0uEvdLUBxX)G&zVBS|`BM zkD(0>$ujF{dpykVPKtAP_vq@#0$?#RO|mWCG4XbsS7&U(ME1rQoAp791z$@dd=z)n z8o}QJV1|Z`Y(ve17195GzvU)ihYXnAi5yufJ6knJcV1)*PSAE}`4=<$zVF&t5XSAnb}R7``xbR)$nj5R_R zTGC5-pZpM_6~`$B_FgBG{7TJCWyP9B`#G1D4rx8W-x$-J9PbvE)PFj<=Xf>n$6*BG za?n)dz`G1@$lHVxWUdGke6<~6u5Tmd<4!24H^i9wX5I~zgG?;%_)cpk!mmBA>h{hL z8Scm;v!42eYIK+RAe!Cw{ zAp5JCK!$ Date: Tue, 22 Oct 2019 09:30:19 +0200 Subject: [PATCH 5/5] fix #820 : added argument _axes_display_names to the Array.dump() function to specify if exported axes names are defined by the AxisCollection.names or AxisCollection.display_names property --- larray/core/array.py | 32 ++++++++++++++++++++++---------- larray/inout/pandas.py | 10 ++++++++-- larray/tests/test_array.py | 24 ++++++++++++++++++++++-- 3 files changed, 52 insertions(+), 14 deletions(-) diff --git a/larray/core/array.py b/larray/core/array.py index 54d50a656..1ec617f1a 100644 --- a/larray/core/array.py +++ b/larray/core/array.py @@ -2415,7 +2415,8 @@ def __str__(self): elif not len(self): return 'Array([])' else: - table = self.dump(maxlines=_OPTIONS[DISPLAY_MAXLINES], edgeitems=_OPTIONS[DISPLAY_EDGEITEMS]) + table = self.dump(maxlines=_OPTIONS[DISPLAY_MAXLINES], edgeitems=_OPTIONS[DISPLAY_EDGEITEMS], + _axes_display_names=True) return table2str(table, 'nan', maxwidth=_OPTIONS[DISPLAY_WIDTH], keepcols=self.ndim - 1, precision=_OPTIONS[DISPLAY_PRECISION]) __repr__ = __str__ @@ -2436,12 +2437,15 @@ def as_table(self, maxlines=-1, edgeitems=5, light=False, wide=True, value_name= """ warnings.warn("Array.as_table() is deprecated. Please use Array.dump() instead.", FutureWarning, stacklevel=2) - return self.dump(maxlines=maxlines, edgeitems=edgeitems, light=light, wide=wide, value_name=value_name) + return self.dump(maxlines=maxlines, edgeitems=edgeitems, light=light, wide=wide, value_name=value_name, + _axes_display_names=True) # XXX: dump as a 2D Array with row & col dims? def dump(self, header=True, wide=True, value_name='value', light=False, axes_names=True, na_repr='as_is', - maxlines=-1, edgeitems=5): - r""" + maxlines=-1, edgeitems=5, _axes_display_names=False): + r"""dump(self, header=True, wide=True, value_name='value', light=False, axes_names=True, na_repr='as_is', + maxlines=-1, edgeitems=5) + Dump array as a 2D nested list. This is especially useful when writing to an Excel sheet via open_excel(). Parameters @@ -2462,7 +2466,7 @@ def dump(self, header=True, wide=True, value_name='value', light=False, axes_nam Assuming header is True, whether or not to include axes names. If axes_names is 'except_last', all axes names will be included except the last. Defaults to True. na_repr : any scalar, optional - Replace missing values (NaN floats) by this value. Default to 'as_is' (do not do any replacement). + Replace missing values (NaN floats) by this value. Defaults to 'as_is' (do not do any replacement). maxlines : int, optional Maximum number of lines to show. Defaults to -1 (all lines are shown). edgeitems : int, optional @@ -2516,7 +2520,11 @@ def dump(self, header=True, wide=True, value_name='value', light=False, axes_nam ['...', '...', '...', '...'], ['a1', 'b1', 6, 7]] """ - display_axes_names = axes_names + # _axes_display_names : bool, optional + # Whether or not to get axes names using AxisCollection.display_names instead of + # AxisCollection.names. Defaults to False. + + dump_axes_names = axes_names if not header: # ensure_no_numpy_type is there mostly to avoid problems with xlwings, but I am unsure where that problem @@ -2540,14 +2548,18 @@ def dump(self, header=True, wide=True, value_name='value', light=False, axes_nam data = self.data.reshape(height, width) # get list of names of axes - axes_names = self.axes.display_names[:] + if _axes_display_names: + axes_names = self.axes.display_names[:] + else: + axes_names = [axis_name if axis_name is not None else '' for axis_name in self.axes.names] # transforms ['a', 'b', 'c', 'd'] into ['a', 'b', 'c\\d'] if wide and len(axes_names) > 1: - if display_axes_names is True: - axes_names[-2] = '\\'.join(axes_names[-2:]) + if dump_axes_names is True: + separator = '\\' if axes_names[-1] else '' + axes_names[-2] = separator.join(axes_names[-2:]) axes_names.pop() - elif display_axes_names == 'except_last': + elif dump_axes_names == 'except_last': axes_names = axes_names[:-1] else: axes_names = [''] * (len(axes_names) - 1) diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py index edd74a327..7a7e7a369 100644 --- a/larray/inout/pandas.py +++ b/larray/inout/pandas.py @@ -226,6 +226,7 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo if unfold_last_axis_name: if isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]: last_axes = [name.strip() for name in axes_names[-1].split('\\')] + last_axes = [name if name else None for name in last_axes] axes_names = axes_names[:-1] + last_axes else: axes_names += [None] @@ -327,8 +328,13 @@ def df_asarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header= raise ValueError('sort_rows=True is not valid for 1D arrays. Please use sort_columns instead.') res = from_series(series, sort_rows=sort_columns) else: - axes_names = [decode(name, 'utf8') if isinstance(name, basestring) else name - for name in df.index.names] + def parse_axis_name(name): + if isinstance(name, basestring): + name = decode(name, 'utf8') + if not name: + name = None + return name + axes_names = [parse_axis_name(name) for name in df.index.names] unfold_last_axis_name = isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1] res = from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header, unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs) diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py index 3a5597922..7c2587bb0 100644 --- a/larray/tests/test_array.py +++ b/larray/tests/test_array.py @@ -4128,11 +4128,22 @@ def test_to_excel_xlwings(tmpdir): def test_dump(): + # narrow format res = list(ndtest(3).dump(wide=False, value_name='data')) assert res == [['a', 'data'], ['a0', 0], ['a1', 1], ['a2', 2]] + # array with an anonymous axis and a wildcard axis + arr = ndtest((Axis('a0,a1'), Axis(2, 'b'))) + res = arr.dump() + assert res == [['\\b', 0, 1], + ['a0', 0, 1], + ['a1', 2, 3]] + res = arr.dump(_axes_display_names=True) + assert res == [['{0}\\b*', 0, 1], + ['a0', 0, 1], + ['a1', 2, 3]] @needs_xlwings @@ -4293,7 +4304,7 @@ def test_open_excel(tmpdir): assert_array_equal(res, a3.data.reshape((6, 4))) # 4) Blank cells - # ======================== + # ============== # Excel sheet with blank cells on right/bottom border of the array to read fpath = inputpath('test_blank_cells.xlsx') with open_excel(fpath) as wb: @@ -4309,7 +4320,16 @@ def test_open_excel(tmpdir): assert_array_equal(bad3, good2) assert_array_equal(bad4, good2) - # 5) crash test + # 5) anonymous and wilcard axes + # ============================= + arr = ndtest((Axis('a0,a1'), Axis(2, 'b'))) + fpath = tmp_path(tmpdir, 'anonymous_and_wildcard_axes.xlsx') + with open_excel(fpath, overwrite_file=True) as wb: + wb[0] = arr.dump() + res = wb[0].load() + assert arr.equals(res) + + # 6) crash test # ============= arr = ndtest((2, 2)) fpath = tmp_path(tmpdir, 'temporary_test_file.xlsx')