(* ::Package:: *)
(* ......................................................................*)
(* :Title: Unsorted Operations *)
(* :Author: Leonid B. Shifrin *)
(* :Summary: Functions to perform certain structural operations
with several lists, such as unsorted union, intersection and
complement, mapping on common or different elements of several
lists, obtaining positions of same elements in different lists,
and a few others *)
(* :Context: UnsortedOperations` *)
(* :Package version: 1.0 *)
(* :Copyright: Copyright 2008, Leonid B.Shifrin.
Permission is granted to distribute verbatim copies of this
package together with any of your packages that use it,
provided the following acknowledgement is printed in a standard
place:
"UnsortedOperations.m is distributed with permission
by Leonid B. Shifrin."
*)
(* :History: Version 1.0 April 2008 *)
(* :Keywords: unsorted, intersection, complement, union, membership *)
(* :Mathematica version: 5.0 *)
(* :Discussion:
The goal of the package is to provide fast functions for operations
such as unsorted Union, Intersection and Complement, as well as some
versions of MapAt which maps a function on those elements of one list
which are for example members of another list, and some other
operations of the similar kind.
The implementation hinges on the use of ,
and built-ins (which are very fast with a native sorting or
sameness functions) in conjunction with Dispatched lists of rules
relating list elements and their positions. Another important
ingredient is the ability of Part built-in to quickly extract many
parts of the list simultaneously. In addition, for variants of MapAt,
the ability of built-in Part to make simultaneous assignments to many
parts of the list very fast, was used. The standard MapAt was not used
because it becomes slow for large lists of positions.
As a result, we have functions with approximately linear time C*
as a function of the lists intersection size , since the most
expensive operation is application of Dispatched rules plus the time
to Dispatch the rules (built-in Sort, Ordering, Union, Intersection
are log-linear but very fast, so in practice their cost is negligible).
Dispatched rules are quite fast though, so the constant is not
too large.
The element comparison is based on SameQ, since this is the native
sameness function for Union, Intersection and Complement, and also
the sorting functions use the canonical ordering. I did not implement
an option to use a user-defined sameness function since this will both
be far less efficient and more error-prone.
This also means that one should not use the present functions directly
for numerical (non-integer) lists. For those, one will be better off by
defining some key function (in the spirit of hash-functions) and then
use the present functions on lists of integer keys generated from the
original lists.
*)
(* ....................................................................*)
BeginPackage["`UnsortedOperations`"];
(*====================================================================*)
(*====================================================================*)
(*============== Usage messages ======================*)
(*====================================================================*)
(*====================================================================*)
If[Not@ValueQ[MemberPositions::usage],
MemberPositions::usage = "MemberPositions[x,y] takes
2 lists and returns a list of positions of elements in the first
list (x) which are also present in the second list (y). The element
comparison is based on SameQ (syntactic)."
];
If[Not@ValueQ[PositionsOfSame::usage],
PositionsOfSame::usage ="PositionsOfSame[lists__List] takes any
number of lists , and returns a nested list where each
sublist gives positions of some element , present in all in
all the input lists, in first, second etc lists. Since any given
input list may contain more than one copy of the element ,
the positions are grouped in lists.
For example, PositionsOfSame[{1,2,3,4},{2,4,2,2},{4,2,4,1}] ->
{{{2},{1,3,4},{2}},{{4},{2},{1,3}}}, the first sublist corresponding
to positions of element <2> and the second - to element <4>.
One can also use PositionsOfSame[x_List] to group together
together positions of identical elements in the list .
The order of position sublists in all cases corresponds to the
canonically sorted common elements (as given by Intersection[lists]).
"
];
If[Not@ValueQ[PositionsOfDifferent::usage],
PositionsOfDifferent::usage = "PositionsOfDifferent[x_List,y_List]
takes 2 lists x and y and returns a list of positions of those
elements of which are not in . The comparison is based on
SameQ (syntactic)"
];
If[Not@ValueQ[UnsortedUnion::usage],
UnsortedUnion::usage = "UnsortedUnion[x_List] computes an unsorted
Union of the list - that is, removes repeated elements without
sorting the resulting list"
];
If[Not@ValueQ[UnsortedIntersection::usage],
UnsortedIntersection::usage = "
UnsortedIntersection[lists__List,{positions__Integer?Positive}]
takes any number of lists and a list of positions,
specifying for which of the the result should be displayed.
For the lists indicated, it keeps only those of their elements which
are present in all of . For those not indicated in the
{positions} list, the result is not computed and not returned.
UnsortedIntersection[lists__List,All] does the same as above but
for all lists. Finally, UnsortedIntersection[lists__List,n_Integer]
computes the same for a single list {lists}[[n]], and is equivalent
to UnsortedIntersection[lists,{n}], but somewhat faster.
For example:
UnsortedIntersection[{1,2,3,4},{2,4,2,2},{4,2,4,1},{1,3}]->
{{2,4},{4,2,4}}
UnsortedIntersection[{1,2,3,4},{2,4,2,2},{4,2,4,1},All]->
{{2,4},{2,4,2,2},{4,2,4}}
UnsortedIntersection[{1,2,3,4},{2,4,2,2},{4,2,4,1},2]->
{2,4,2,2}"
];
If[Not@ValueQ[UnsortedComplement::usage],
UnsortedComplement::usage = "UnsortedComplement[x_List,y_List]
returns all the elements of which are not in , without
sorting the resulting list. The comparison is based on SameQ
(syntactic)"
];
If[Not@ValueQ[MapAtIntersection::usage],
MapAtIntersection::usage = "MapAtIntersection[f_,{lists__List}]
maps a function on those elements in each of the lists
which are present in all .
MapAtIntersection[{funs__},{lists__List}] maps a different
function {funs}[[k]] on a list {lists}[[k]]. The number of
functions have to match the number of lists"
];
If[Not@ValueQ[MapAtComplement::usage],
MapAtComplement::usage = "MapAtComplement[f_,x_List,y_List]
maps a function on those elements of which are not
in "
];
If[Not@ValueQ[MapAtMembers::usage],
MapAtMembers::usage = "MapAtMembers[f_,x_List,y_List] maps
a function on those elements of which are also in
"
];
If[Not@ValueQ[Subsequences::usage],
Subsequences::usage = "Subsequences[x_List,separators_List]
extracts the subsequences of separated by any number of
elements which are members of the list. For
example,
Subsequences[{2,3,4,2,1,5,1,2,4,3,2,3},{1,2}]->
{{3,4},{5},{4,3},{3}}
"
];
If[Not@ValueQ[MemberPositionsSequences::usage],
MemberPositionsSequences::usage =
"MemberPositionsSequences[x_List,y_List] gives sequences of
positions of consequitive elements of which are also in
. The comparison is based on SameQ (syntactic)"
];
If[Not@ValueQ[MemberSequences::usage],
MemberSequences::usage =
"MemberSequences[x_List,y_List] extracts from the list
sequences of consequitive elements which are also members of
.The comparison is based on SameQ (syntactic)"
];
(*====================================================================*)
(*====================================================================*)
(*============== Error messages ======================*)
(*====================================================================*)
(*====================================================================*)
MemberPositions::"nolst"=
"Both arguments of MemberPositions must be lists";
MemberPositions::"argnum" =
"MemberPositions was called with `1` arguments. Exactly two
arguments were expected";
PositionsOfSame::"argnum" = "PositionsOfSame called with zero
arguments. One or more arguments were expected";
PositionsOfSame::"nolst" = "All arguments of PositionsOfSame must
be lists";
PositionsOfDifferent::"nolst"=
"Both arguments of PositionsOfDifferent must be lists";
PositionsOfDifferent::"argnum" = "PositionsOfDifferent was called
with `1` arguments. Exactly two arguments were expected";
UnsortedUnion::"nolst" =
"The argument of UnsortedUnion must be a list";
UnsortedUnion::"argnum" =
"UnsortedUnion was called with `1` arguments. Exactly one argument
is expected";
UnsortedIntersection::"posmax" = "All positions in the last
argument (list) of UnsortedIntersection must be smaller or equal
to the number of the input lists";
UnsortedIntersection::"badpos" = "All positions in the last
argument (list) of UnsortedIntersection must be positive integers";
UnsortedIntersection::"lstarg" =
"The last argument of UnsortedIntersection must be a list
of positions, a single integer position or the keyword";
UnsortedIntersection::"argnum" = "UnsortedIntersection was
called with `1` arguments. At least 2 arguments are expected";
UnsortedIntersection::"nolst" =
"All arguments of UnsortedIntersection except possibly the
last one, must be lists";
UnsortedComplement::"nolst" = "Both arguments of UnsortedComplement
must be lists";
UnsortedComplement::"argnum" = "UnsortedComplement was called with
`1` arguments. Exactly 2 arguments are expected";
MapAtIntersection::"difflen" = "The lengths of the function
list and the list of input lists are supposed to be the same";
MapAtIntersection::"badfrmt" =
"The input lists in MapAtIntersection have to be combined in
a list";
MapAtIntersection::"argnum" = "MapAtIntersection was called
with `1` arguments. Exactly 2 arguments are expected";
MapAtComplement::"nolst" = "The last two arguments of MapAtComplement
are supposed to be lists";
MapAtComplement::"argnum" =
"MapAtComplement was called with `1` arguments. Exactly 3 arguments
were expected";
MapAtMembers::"nolst" = "The last two arguments of MapAtMembers
are supposed to be lists";
MapAtMembers::"argnum" =
"MapAtMembers was called with `1` arguments. Exactly 3 arguments
were expected";
Subsequences::"nolst" = "Both arguments of Subsequences
must be lists";
Subsequences::"argnum" =
"Subsequences was called with `1` arguments. Exactly 2 arguments
were expected";
MemberPositionsSequences::"nolst" =
"Both arguments of MemberPositionsSequences must be lists";
MemberPositionsSequences::"argnum" =
"MemberPositionsSequences was called with `1` arguments.
Exactly 2 arguments were expected";
MemberSequences::"nolst" =
"Both arguments of MemberSequences must be lists";
MemberSequences::"argnum" =
"MemberSequences was called with `1` arguments.
Exactly 2 arguments were expected";
(* =====================================================================*)
(* =====================================================================*)
(* ========== THE CODE STARTS HERE =========*)
(* =====================================================================*)
(* =====================================================================*)
Begin["`Private`"];
(* =====================================================================*)
(* ======================= Private functions ===========================*)
(* =====================================================================*)
Options[MemberPositionGroups]={GroupedPositions->True};
SplitPositions[poslist_List]:=
Split[poslist,#2==#1+1&];
ExtractSublists[x_List,groupedpositions_List]:=
Map[x[[#]]&,groupedpositions,Heads-> False];
PositionIntervals[{lengths__Integer?NonNegative}]:=
Transpose[{Most[#],Rest[#]-1}&
[FoldList[Plus,1,{lengths}]]
];
FrequenciesInSorted[{}]={};
FrequenciesInSorted[{sorted__}]:=
Map[{#[[1]],Length[#]}&,Split[{sorted}],Heads-> False];
PositionIntsOfDistinctInSorted[{}] = {};
PositionIntsOfDistinctInSorted[sorted_List]:=
MapAt[PositionIntervals,
Transpose[FrequenciesInSorted[sorted]],
2
];
MakeDispatchedRules[lhs_List,rhs_List]:=
Dispatch[Thread[Rule[lhs,rhs]]]/;Length[lhs]==Length[rhs];
FastMapAtSimpleList[f_,x_List,positions_List]:=
Module[{copy=x},
copy[[positions]]=Map[f,x[[positions]],Heads->False];
Return[copy]
];
MemberPositionGroups[x_List,inter_List,opts___?OptionQ]:=
Module[{order,xsorted,distinct,rules,positionsInSorted,
posintervals,
groupQ = GroupedPositions/.Flatten[{opts}]/.
Options[MemberPositionGroups]},
xsorted =x[[order = Ordering[x]]];
{distinct,posintervals} =
PositionIntsOfDistinctInSorted[xsorted];
rules = MakeDispatchedRules[distinct,posintervals];
positionsInSorted =
If[groupQ,#,Flatten[Apply[Range,#,1,Heads->False]]]&
[ReplaceAll[inter,rules]];
Return[
If[groupQ,
Map[Take[order,#]&,Sort[positionsInSorted],Heads->False],
(* else *)
Sort[order[[positionsInSorted]]]
]
]
];
(*======================================================================*)
(*======================== Public functions ============================*)
(*======================================================================*)
(*=================================================*)
MemberPositions[{},_List]:={};
MemberPositions[x_List,y_List]:=
MemberPositionGroups[x,Intersection[x,y,SameTest-> Automatic],
GroupedPositions->False
];
(* Error messages *)
MemberPositions[_,_]:=
"never happens"/;Message[MemberPositions::"nolst"];
MemberPositions[x___]:=
"never happens"/;Message[MemberPositions::"argnum",Length[{x}]];
(*=================================================*)
PositionsOfSame[___List,{},___List]:={};
PositionsOfSame[lists__List]:=
With[{inter = Intersection[lists,SameTest-> Automatic]},
Transpose[
Map[MemberPositionGroups[#,inter,GroupedPositions->True]&,
{lists},
Heads-> False
]
]
];
(* Error messages *)
PositionsOfSame[]:=
"never happens"/;Message[PositionsOfSame::"argnum"];
PositionsOfSame[__]:=
"never happens"/;Message[PositionsOfSame::"nolst"];
(*=================================================*)
PositionsOfDifferent[x_List,y_List]:=
Complement[Range[Length[x]],MemberPositions[x,y],SameTest-> Automatic];
(* Error messages *)
PositionsOfDifferent[_,_]:=
"never happens"/;Message[PositionsOfDifferent::"nolst"];
PositionsOfDifferent[x___]:=
"never happens"/;Message[PositionsOfDifferent::"argnum",Length[{x}]];
(*=================================================*)
UnsortedUnion[x_List]:=
Part[x,
Sort[Union[x,SameTest-> Automatic]/.
MakeDispatchedRules[x,Range[Length[x]]]
]
]
(* Error messages *)
UnsortedUnion[_]:=
"never happens"/;Message[UnsortedUnion::"nolst"];
UnsortedUnion[x___]:=
"never happens"/;Message[UnsortedUnion::"argnum",Length[{x}]];
(*=================================================*)
UnsortedIntersection[lists__List,{positions__Integer?Positive}]/;
Max[{positions}]<= Length[{lists}]:=
With[{sameposlist = PositionsOfSame[lists]},
MapThread[#1[[#2]]&,
{
{lists}[[{positions}]],
Map[Sort[Flatten[#]]&,
Transpose[sameposlist][[{positions}]],
Heads->False
]
}
]
];
UnsortedIntersection[lists__List,All]:=
UnsortedIntersection[lists,Range[Length[{lists}]]];
UnsortedIntersection[lists__List,n_Integer?Positive]/;n<=Length[{lists}]:=
With[{ourlist ={lists}[[n]]},
Part[ourlist,
MemberPositionGroups[ourlist,
Intersection[lists,SameTest-> Automatic],
GroupedPositions->False
]
]
];
(* Error messages *)
UnsortedIntersection[lists__List,{positions__Integer?Positive}]:=
"never happens"/;Message[UnsortedIntersection::"posmax"];
UnsortedIntersection[lists__List,{positions__}]:=
"never happens"/;Message[UnsortedIntersection::"badpos"];
UnsortedIntersection[lists__List,_]:=
"never happens"/;Message[UnsortedIntersection::"lstarg"];
UnsortedIntersection[x___]:=
(Length[{x}]<=1)&&
("never happens"/;Message[UnsortedIntersection::"argnum",Length[{x}]]);
UnsortedIntersection[x___]:=
"never happens"/;Message[UnsortedIntersection::"nolst"];
(*=================================================*)
UnsortedComplement[x_List,y_List]:=x[[PositionsOfDifferent[x,y]]];
(* Error messages *)
UnsortedComplement[_,_]:=
"never happens"/;Message[UnsortedComplement::"nolst"];
UnsortedComplement[x___]:=
"never happens"/;Message[UnsortedComplement::"argnum",Length[{x}]];
(*=================================================*)
MapAtIntersection[{funs__},{lists__List}]/;Length[{funs}]==Length[{lists}]:=
MapThread[FastMapAtSimpleList,{{funs},{lists},
Map[Flatten,Transpose[PositionsOfSame[lists]],Heads->False]}
];
MapAtIntersection[f_,{lists__List}]/;Head[f]=!=List:=
MapAtIntersection[Table[f,{Length[{lists}]}],{lists}];
(* Error messages *)
MapAtIntersection[{funs__},{lists__List}]:=
"never happens"/;Message[MapAtIntersection::"difflen"];
MapAtIntersection[_,lists__List]:=
"never happens"/;Message[MapAtIntersection::"badfrmt"];
MapAtIntersection[x___]:=
"never happens"/;Message[MapAtIntersection::"argnum",Length[{x}]];
(*=================================================*)
MapAtComplement[f_,x_List,y_List]:=
FastMapAtSimpleList[f,x,PositionsOfDifferent[x,y]];
(* Error messages *)
MapAtComplement[_,_,_]:=
"never happens"/;Message[MapAtComplement::"nolst"];
MapAtComplement[x___]:=
"never happens"/;Message[MapAtComplement::"argnum",Length[{x}]];
(*=================================================*)
MapAtMembers[f_,x_List,y_List]:=
FastMapAtSimpleList[f,x,MemberPositions[x,y]];
(* Error messages *)
MapAtMembers[_,_,_]:=
"never happens"/;Message[MapAtMembers::"nolst"];
MapAtMembers[x___]:=
"never happens"/;Message[MapAtMembers::"argnum",Length[{x}]];
(*=================================================*)
Subsequences[x_List,separators_List]:=
ExtractSublists[x,
SplitPositions[
PositionsOfDifferent[x,separators]
]
];
(* Error messages *)
Subsequences[_,_]:=
"never happens"/;Message[Subsequences::"nolst"];
Subsequences[x___]:=
"never happens"/;Message[Subsequences::"argnum",Length[{x}]];
(*=================================================*)
MemberPositionsSequences[x_List,y_List]:=
SplitPositions[MemberPositions[x,y]];
(* Error messages *)
MemberPositionsSequences[_,_]:=
"never happens"/;Message[MemberPositionsSequences::"nolst"];
MemberPositionsSequences[x___]:=
"never happens"/;Message[MemberPositionsSequences::"argnum",Length[{x}]];
(*=================================================*)
MemberSequences[x_List,y_List]:=
ExtractSublists[x,MemberPositionsSequences[x,y]];
(* Error messages *)
MemberSequences[_,_]:=
"never happens"/;Message[MemberSequences::"nolst"];
MemberSequences[x___]:=
"never happens"/;Message[MemberSequences::"argnum",Length[{x}]];
(* =====================================================================*)
(* ========== THE CODE ENDS HERE =========*)
(* =====================================================================*)
End[];
Protect[MemberPositions,PositionsOfSame,PositionsOfDifferent,
UnsortedUnion,UnsortedIntersection,UnsortedComplement,
MapAtIntersection, MapAtComplement,MapAtMembers,Subsequences,
MemberPositionsSequences,MemberSequences];
EndPackage[];