@@ -593,7 +593,7 @@ Base.transpose(ds::Union{GroupBy, GatherBy}, cols::Tuple; id = nothing, renameco
593593
594594
595595"""
596- flatten(ds::AbstractDataset, cols; mapformats = false )
596+ flatten(ds::AbstractDataset, cols)
597597
598598When columns `cols` of data set `ds` have iterable elements that define
599599`length` (for example a `Vector` of `Vector`s), return a `Dataset` where each
@@ -605,8 +605,6 @@ same for each `col` in `cols`, or else an error is raised. Note that these
605605elements are not copied, and thus if they are mutable changing them in the
606606returned `Dataset` will affect `ds`.
607607
608- When `mapformats = true`, the function uses the formatted values of `cols`.
609-
610608`cols` can be any column selector ($COLUMNINDEX_STR ; $MULTICOLUMNINDEX_STR ).
611609
612610See [`flatten!`](@ref)
@@ -676,140 +674,34 @@ julia> ds3 = Dataset(a = [1, 2], b = [[1, 2], [3, 4]], c = [[5, 6], [7]])
676674
677675julia> flatten(ds3, [:b, :c])
678676ERROR: ArgumentError: Lengths of iterables stored in columns :b and :c are not the same in row 2
679-
680- julia> ds = Dataset(x=1:3, y=["ab,cd", "e", missing], z=[[1,2], 2, 3])
681- 3×3 Dataset
682- Row │ x y z
683- │ identity identity identity
684- │ Int64? String? Any
685- ─────┼──────────────────────────────
686- 1 │ 1 ab,cd [1, 2]
687- 2 │ 2 e 2
688- 3 │ 3 missing 3
689-
690- julia> fmt(x) = split(x, ",")
691- fmt (generic function with 2 methods)
692-
693- julia> fmt(::Missing) = missing
694- fmt (generic function with 2 methods)
695-
696- julia> setformat!(ds, :y => fmt)
697- 3×3 Dataset
698- Row │ x y z
699- │ identity fmt identity
700- │ Int64? String? Any
701- ─────┼───────────────────────────────────────────────────
702- 1 │ 1 SubString{String}["ab", "cd"] [1, 2]
703- 2 │ 2 SubString{String}["e"] 2
704- 3 │ 3 missing 3
705-
706- julia> flatten(ds, :y)
707- 7×3 Dataset
708- Row │ x y z
709- │ identity identity identity
710- │ Int64? Char? Any
711- ─────┼──────────────────────────────
712- 1 │ 1 a [1, 2]
713- 2 │ 1 b [1, 2]
714- 3 │ 1 , [1, 2]
715- 4 │ 1 c [1, 2]
716- 5 │ 1 d [1, 2]
717- 6 │ 2 e 2
718- 7 │ 3 missing 3
719-
720- julia> flatten(ds, :y, mapformats = true)
721- 4×3 Dataset
722- Row │ x y z
723- │ identity identity identity
724- │ Int64? SubStrin…? Any
725- ─────┼────────────────────────────────
726- 1 │ 1 ab [1, 2]
727- 2 │ 1 cd [1, 2]
728- 3 │ 2 e 2
729- 4 │ 3 missing 3
730-
731- julia> flatten(ds, 2:3, mapformats = true)
732- 4×3 Dataset
733- Row │ x y z
734- │ identity identity identity
735- │ Int64? SubStrin…? Int64?
736- ─────┼────────────────────────────────
737- 1 │ 1 ab 1
738- 2 │ 1 cd 2
739- 3 │ 2 e 2
740- 4 │ 3 missing 3
741677```
742678"""
743679flatten (ds, cols)
744680
745681"""
746- flatten!(ds, cols; mapformats = false )
682+ flatten!(ds, cols)
747683
748684Variant of `flatten` that does flatten `ds` in-place.
749685"""
750686flatten!
751687
752- function _ELTYPE (x; fmt = identity)
753- if fmt == identity
754- eltype (x)
755- else
756- eltype (fmt (x))
757- end
758- end
759- function _ELTYPE (x:: Missing ; fmt = identity)
760- if fmt == identity
761- Missing
762- elseif ismissing (fmt (x))
763- Missing
764- else
765- eltype (fmt (x))
766- end
767- end
768-
769-
770- function _LENGTH (x; fmt = identity)
771- if fmt == identity
772- res = length (x)
773- else
774- res = length (fmt (x))
775- end
776- res
777- end
778-
779- function _LENGTH (x:: Missing ; fmt = identity)
780- if fmt == identity
781- res = 1
782- elseif ismissing (fmt (x))
783- res = 1
784- else
785- res = length (fmt (x))
786- end
787- res
788- end
789-
688+ _ELTYPE (x) = eltype (x)
689+ _ELTYPE (:: Missing ) = Missing
690+ _LENGTH (x) = length (x)
691+ _LENGTH (:: Missing ) = 1
790692
791693function flatten! (ds:: Dataset ,
792- cols:: Union{ColumnIndex, MultiColumnIndex} ; mapformats = false )
694+ cols:: Union{ColumnIndex, MultiColumnIndex} )
793695 _check_consistency (ds)
794696
795697 idxcols = index (ds)[cols]
796- isempty (idxcols) && return ds
698+ isempty (idxcols) && return copy (ds)
797699 col1 = first (idxcols)
798- if mapformats
799- f_fmt = getformat (ds, col1)
800- lengths = _LENGTH .(_columns (ds)[col1], fmt = f_fmt)
801- else
802- lengths = _LENGTH .(_columns (ds)[col1])
803- end
700+ lengths = _LENGTH .(_columns (ds)[col1])
804701 for col in idxcols
805702 v = _columns (ds)[col]
806- if mapformats
807- f_fmt = getformat (ds, col)
808- else
809- f_fmt = identity
810- end
811- if any (x -> _LENGTH (x[1 ], fmt = f_fmt) != x[2 ], zip (v, lengths))
812- r = findfirst (x -> x != 0 , _LENGTH .(v, fmt = f_fmt) .- lengths)
703+ if any (x -> _LENGTH (x[1 ]) != x[2 ], zip (v, lengths))
704+ r = findfirst (x -> x != 0 , _LENGTH .(v) .- lengths)
813705 colnames = _names (ds)
814706 throw (ArgumentError (" Lengths of iterables stored in columns :$(colnames[col1]) " *
815707 " and :$(colnames[col]) are not the same in row $r " ))
@@ -821,14 +713,9 @@ function flatten!(ds::Dataset,
821713 length (idxcols) > 1 && sort! (idxcols)
822714 for col in idxcols
823715 col_to_flatten = _columns (ds)[col]
824- if mapformats
825- f_fmt = getformat (ds, col)
826- else
827- f_fmt = identity
828- end
829- T = mapreduce (x-> _ELTYPE (x, fmt = f_fmt), promote_type, col_to_flatten)
716+ T = mapreduce (_ELTYPE, promote_type, col_to_flatten)
830717 _res = allocatecol (T, new_total)
831- _fill_flatten! (_res, col_to_flatten, lengths; fmt = f_fmt )
718+ _fill_flatten! (_res, col_to_flatten, lengths)
832719 if length (idxcols) == ncol (ds)
833720 _columns (ds)[col] = _res
834721 else
@@ -842,27 +729,17 @@ end
842729
843730
844731function flatten (ds:: AbstractDataset ,
845- cols:: Union{ColumnIndex, MultiColumnIndex} ; mapformats = false )
732+ cols:: Union{ColumnIndex, MultiColumnIndex} )
846733 _check_consistency (ds)
847734
848735 idxcols = index (ds)[cols]
849736 isempty (idxcols) && return copy (ds)
850737 col1 = first (idxcols)
851- if mapformats
852- f_fmt = getformat (ds, col1)
853- lengths = _LENGTH .(_columns (ds)[col1], fmt = f_fmt)
854- else
855- lengths = _LENGTH .(_columns (ds)[col1])
856- end
738+ lengths = _LENGTH .(_columns (ds)[col1])
857739 for col in idxcols
858740 v = _columns (ds)[col]
859- if mapformats
860- f_fmt = getformat (ds, col)
861- else
862- f_fmt = identity
863- end
864- if any (x -> _LENGTH (x[1 ], fmt = f_fmt) != x[2 ], zip (v, lengths))
865- r = findfirst (x -> x != 0 , _LENGTH .(v, fmt = f_fmt) .- lengths)
741+ if any (x -> _LENGTH (x[1 ]) != x[2 ], zip (v, lengths))
742+ r = findfirst (x -> x != 0 , _LENGTH .(v) .- lengths)
866743 colnames = _names (ds)
867744 throw (ArgumentError (" Lengths of iterables stored in columns :$(colnames[col1]) " *
868745 " and :$(colnames[col]) are not the same in row $r " ))
@@ -876,41 +753,34 @@ function flatten(ds::AbstractDataset,
876753 length (idxcols) > 1 && sort! (idxcols)
877754 for col in idxcols
878755 col_to_flatten = _columns (ds)[col]
879- if mapformats
880- f_fmt = getformat (ds, col)
881- else
882- f_fmt = identity
883- end
884- T = mapreduce (x-> _ELTYPE (x, fmt = f_fmt), promote_type, col_to_flatten)
756+ T = mapreduce (_ELTYPE, promote_type, col_to_flatten)
885757 _res = allocatecol (T, new_total)
886- _fill_flatten! (_res, col_to_flatten, lengths; fmt = f_fmt)
887- insertcols! (new_ds, col, _names (ds)[col] => _res, unsupported_copy_cols = false )
888- end
889- for j in setdiff (1 : ncol (ds), idxcols)
890- setformat! (new_ds, j=> getformat (ds, j))
758+ _fill_flatten! (_res, col_to_flatten, lengths)
759+ insertcols! (new_ds, col, _names (ds)[col] => _res)
891760 end
761+ setformat! (new_ds, copy (index (ds). format))
892762 setinfo! (new_ds, _attributes (ds). meta. info[])
893763 _reset_grouping_info! (new_ds)
894764 new_ds
895765end
896766
897767
898- function _fill_flatten!_barrier (_res, val, counter; fmt = identity )
899- for j in fmt ( val)
768+ function _fill_flatten!_barrier (_res, val, counter)
769+ for j in val
900770 _res[counter] = j
901771 counter += 1
902772 end
903773 counter
904774end
905775
906- function _fill_flatten! (_res, col_to_flatten, lengths; fmt = identity )
776+ function _fill_flatten! (_res, col_to_flatten, lengths)
907777 counter = 1
908778 for i in 1 : length (col_to_flatten)
909- if ismissing (fmt ( col_to_flatten[i]) )
779+ if ismissing (col_to_flatten[i])
910780 _res[counter] = missing
911781 counter += 1
912782 else
913- counter = _fill_flatten!_barrier (_res, col_to_flatten[i], counter; fmt = fmt )
783+ counter = _fill_flatten!_barrier (_res, col_to_flatten[i], counter)
914784 end
915785 end
916786end
0 commit comments