sequence utilities (#35)(an instance of Generic Utilities Package made by Hacker)     A sequence is a set of integers (*)      This package supplies the following verbs:             :add (seq,f,t) => seq with [f..t] interval added       :remove (seq,f,t) => seq with [f..t] interval removed       :range (f,t) => sequence corresponding to [f..t]       {} => empty sequence       :contains (seq,n) => n in seq       :size (seq) => number of elements in seq       :first (seq) => first integer in seq or E_NONE       :firstn (seq,n) => first n integers in seq (as a sequence)       :last (seq) => last integer in seq or E_NONE       :lastn (seq,n) => last n integers in seq (as a sequence)             :complement(seq) => [-2147483648..2147483647] - seq       :union (seq,seq,...)       :intersect(seq,seq,...)             :extract(seq,array) => array[@seq]       :for([n,]seq,obj,verb,@args) => for s in (seq) obj:verb(s,@args); endfor             :tolist(seq) => list corresponding to seq       :tostr(seq) => contents of seq as a string       :from_list(list) => sequence corresponding to list       :from_sorted_list(list) => sequence corresponding to list (assumed sorted)       :from_string(string) => sequence corresponding to string            For boolean expressions, note that       the representation of the empty sequence is {} (boolean FALSE) and       all non-empty sequences are represented as nonempty lists (boolean TRUE).            The representation used works better than the usual list implementation for sets consisting of long uninterrupted ranges of integers.      For sparse sets of integers the representation is decidedly non-optimal (though it never takes more than double the space of the usual list representation).            (*) Actually what this package implements is sets of integers-mod-2^32, but this assumes the underlying machine on which the server runs has 32-bit integers. If not, you need to change this.maxneg to be the largest negative ("smallest"?) integer available. VERB SOURCE CODE: add remove:
" add(seq,start[,end]) => seq with range added.";
"remove(seq,start[,end]) => seq with range removed.";
" both assume start<=end.";
remove = verb == "remove";
seq = args[1];
start = args[2];
s = (start == $minint) ? 1 | $list_utils:find_insert(seq, start - 1);
if (length(args) < 3)
return {@seq[1..s - 1], @((s + remove) % 2) ? {start} | {}};
else
e = $list_utils:find_insert(seq, after = args[3] + 1);
return {@seq[1..s - 1], @((s + remove) % 2) ? {start} | {}, @((e + remove) %
2) ? {after} | {}, @seq[e..length(seq)]};
endif
.
contains: ":contains(seq,elt) => true iff elt is in seq."; return ($list_utils:find_insert(@args) + 1) % 2; . complement:
":complement(seq[,lower[,upper]]) => the sequence containing all integers *not* in
seq.";
"If lower/upper are given, the resulting sequence is restricted to the specified
range.";
"Bad things happen if seq is not a subset of [lower..upper]";
seq = args[1];
lower = {@args, $minint}[2];
if (length(args) >= 3)
if (seq[l = length(seq)] >= (upper = args[3] + 1))
seq[l..l] = {};
else
seq[l + 1..l] = {upper};
endif
endif
if (seq && (seq[1] <= lower))
return listdelete(seq, 1);
else
return {lower, @seq};
endif
.
union:
":union(seq1,seq2,...) => union of all sequences...";
if ({} in args)
args = $list_utils:setremove_all(args, {});
endif
if (length(args) <= 1)
return args ? args[1] | {};
endif
return this:_union(@args);
.
tostr:
if (!(seq = args[1]))
return "empty";
endif
e = tostr((seq[1] == $minint) ? "" | seq[1]);
for i in [2..length(seq)]
e = e + ((i % 2) ? tostr(", ", seq[i]) | ((seq[i] == (seq[i - 1] + 1)) ? "" |
tostr("..", seq[i] - 1)));
endfor
return e + ((length(seq) % 2) ? ".." | "");
.
for:
":for([n,]seq,obj,verb,@args) => for s in (seq) obj:verb(s,@args); endfor";
if (typeof(n = args[1]) == NUM)
args = listdelete(args, 1);
seq = args[1];
else
seq = n;
n = 1;
endif
if (seq[1] == $minint)
return E_RANGE;
endif
object = args[2];
vname = args[3];
args = args[4..length(args)];
for r in [1..length(seq) / 2]
for i in [seq[(2 * r) - 1]..seq[2 * r] - 1]
if (typeof(object:(vname)(@listinsert(args, i, n))) == ERR)
return;
endif
endfor
endfor
if (length(seq) % 2)
i = seq[length(seq)];
while (1)
if (typeof(object:(vname)(@listinsert(args, i, n))) == ERR)
return;
endif
i = i + 1;
endwhile
endif
.
extract:
"extract(seq,array) => list of elements of array with indices in seq.";
if (alen = length(array = args[2]))
e = $list_utils:find_insert(seq = args[1], 1);
s = $list_utils:find_insert(seq, alen);
seq = {@(e % 2) ? {} | {1}, @seq[e..s - 1], @(s % 2) ? {} | {alen + 1}};
ret = {};
for i in [1..length(seq) / 2]
$command_utils:suspend_if_needed(0);
ret = {@ret, @array[seq[(2 * i) - 1]..seq[2 * i] - 1]};
endfor
return ret;
else
return {};
endif
.
tolist:
seq = args[1];
if (!seq)
return {};
else
if (length(seq) % 2)
seq = {@seq, $minint};
endif
l = {};
for i in [1..length(seq) / 2]
for j in [seq[(2 * i) - 1]..seq[2 * i] - 1]
l = {@l, j};
endfor
endfor
return l;
endif
.
from_list: ":fromlist(list) => corresponding sequence."; return this:from_sorted_list($list_utils:sort(args[1])); . from_sorted_list:
":from_sorted_list(sorted_list) => corresponding sequence.";
if (!(lst = args[1]))
return {};
else
seq = {i = lst[1]};
next = i + 1;
for i in (listdelete(lst, 1))
if (i != next)
seq = {@seq, next, i};
endif
next = i + 1;
endfor
return (next == $minint) ? seq | {@seq, next};
endif
.
first: return (seq = args[1]) ? seq[1] | E_NONE; . last: return (seq = args[1]) ? ((len = length(seq)) % 2) ? $minint - 1 | (seq[len] - 1) | E_NONE; . size:
":size(seq) => number of elements in seq";
" for sequences consisting of more than half of the 4294967298 available integers,
this returns a negative number, which can either be interpreted as (cardinality -
4294967298) or -(size of complement sequence)";
n = 0;
for i in (seq = args[1])
n = i - n;
endfor
return (length(seq) % 2) ? $minint - n | n;
.
from_string:
":from_string(string) => corresponding sequence or E_INVARG";
" string should be a comma separated list of numbers and";
" number..number ranges";
su = $string_utils;
if (!(words = su:explode(su:strip_chars(args[1], " "), ",")))
return {};
endif
parts = {};
for word in (words)
to = index(word, "..");
if ((!to) && su:is_numeric(word))
part = {tonum(word), tonum(word) + 1};
elseif (to)
if (to == 1)
start = $minint;
elseif (su:is_numeric(start = word[1..to - 1]))
start = tonum(start);
else
return E_INVARG;
endif
end = word[to + 2..length(word)];
if (!end)
part = {start};
elseif (!su:is_numeric(end))
return E_INVARG;
elseif ((end = tonum(end)) >= start)
part = {start, end + 1};
else
part = {};
endif
else
return E_INVARG;
endif
parts = {@parts, part};
endfor
return this:union(@parts);
.
firstn:
":firstn(seq,n) => first n elements of seq as a sequence.";
if ((n = args[2]) <= 0)
return {};
endif
l = length(seq = args[1]);
s = 1;
while (s <= l)
n = n + seq[s];
if ((s >= l) || (n <= seq[s + 1]))
return {@seq[1..s], n};
endif
n = n - seq[s + 1];
s = s + 2;
endwhile
return seq;
.
lastn:
":lastn(seq,n) => last n elements of seq as a sequence.";
n = args[2];
if ((l = length(seq = args[1])) % 2)
return {$minint - n};
else
s = l;
while (s)
n = seq[s] - n;
if (n >= seq[s - 1])
return {n, @seq[s..l]};
endif
n = seq[s - 1] - n;
s = s - 2;
endwhile
return seq;
endif
.
range:
":range(start,end) => sequence corresponding to [start..end] range";
return ((start = args[1]) <= (end = args[2])) ? {start, end + 1} | {};
.
expand:
":expand(seq,eseq[,include=0])";
"eseq is assumed to be a finite sequence consisting of intervals ";
"[f1..a1-1],[f2..a2-1],... We map each element i of seq to";
" i if i < f1";
" i+(a1-f1) if f1 <= i < f2-(a1-f1)";
" i+(a1-f1+a2-f2) if f2-(a1-f1) <= i < f3-(a2-f2)-(a1-f1)";
" ...";
"returning the resulting sequence if include=0,";
"returning the resulting sequence unioned with eseq if include=1;";
old = args[1];
insert = args[2];
exclude = !{@args, 0}[3];
if (!insert)
return old;
elseif (((ilen = length(insert)) % 2) || (insert[1] == $minint))
return E_TYPE;
endif
olast = length(old);
ilast = length(insert);
"... find first o for which old[o] >= insert[1]...";
ifirst = insert[i = 1];
o = $list_utils:find_insert(old, ifirst - 1);
if (o > olast)
return ((olast % 2) == exclude) ? {@old, @insert} | old;
endif
new = old[1..o - 1];
oe = old[o];
diff = 0;
while (1)
"INVARIANT: oe == old[o]+diff";
"INVARIANT: oe >= ifirst == insert[i]";
"... at this point we need to dispose of the interval ifirst..insert[i+1]";
if (oe == ifirst)
new = {@new, insert[i + ((o % 2) == exclude)]};
if (o >= olast)
return ((olast % 2) == exclude) ? {@new, @insert[i + 2..ilast]} |
new;
endif
o = o + 1;
else
if ((o % 2) != exclude)
new = {@new, @insert[i..i + 1]};
endif
endif
"... advance i...";
diff = (diff + insert[i + 1]) - ifirst;
if ((i = i + 2) > ilast)
for oe in (old[o..olast])
new = {@new, oe + diff};
endfor
return new;
endif
ifirst = insert[i];
"... find next o for which old[o]+diff >= ifirst )...";
while ((oe = old[o] + diff) < ifirst)
new = {@new, oe};
if (o >= olast)
return ((olast % 2) == exclude) ? {@new, @insert[i..ilast]} |
new;
endif
o = o + 1;
endwhile
endwhile
.
contract:
":contract(seq,cseq)";
"cseq is assumed to be a finite sequence consisting of intervals ";
"[f1..a1-1],[f2..a2-1],... From seq, we remove any elements that ";
"are in those ranges and map each remaining element i to";
" i if i < f1";
" i-(a1-f1) if a1 <= i < f2";
" i-(a1-f1+a2-f2) if a2 <= i < f3 ...";
"returning the resulting sequence.";
"";
"For any finite sequence cseq, the following always holds:";
" :contract(:expand(seq,cseq,include),cseq)==seq";
old = args[1];
removed = args[2];
if (!removed)
return old;
elseif (((rlen = length(removed)) % 2) || (removed[1] == $minint))
return E_TYPE;
endif
rfirst = removed[1];
ofirst = $list_utils:find_insert(old, rfirst - 1);
new = old[1..ofirst - 1];
diff = 0;
rafter = removed[r = 2];
for o in [ofirst..olast = length(old)]
while (old[o] > rafter)
if ((o - ofirst) % 2)
new = {@new, rfirst - diff};
ofirst = o;
endif
diff = (diff + rafter) - rfirst;
if (r >= rlen)
for oe in (old[o..olast])
new = {@new, oe - diff};
endfor
return new;
endif
rfirst = removed[r + 1];
rafter = removed[r = r + 2];
endwhile
if (old[o] < rfirst)
new = {@new, old[o] - diff};
ofirst = o + 1;
endif
endfor
return ((olast - ofirst) % 2) ? new | {@new, rfirst - diff};
.
_union:
":_union(seq,seq,...)";
"assumes all seqs are nonempty and that there are at least 2";
nargs = length(args);
"args -- list of sequences.";
"nexts -- nexts[i] is the index in args[i] of the start of the first";
" interval not yet incorporated in the return sequence.";
"heap -- a binary tree of indices into args/nexts represented as a list where";
" heap[1] is the root and the left and right children of heap[i]";
" are heap[2*i] and heap[2*i+1] respectively. ";
" Parent index h is <= both children in the sense of args[h][nexts[h]].";
" heap[i]==0 indicates a nonexistant child; we fill out the array with";
" zeros so that length(heap)>2*length(args).";
"...initialize heap...";
heap = {0, 0, 0, 0, 0};
nexts = {1, 1};
hlen2 = 2;
while (hlen2 < nargs)
nexts = {@nexts, @nexts};
heap = {@heap, @heap};
hlen2 = hlen2 * 2;
endwhile
for n in [-nargs..-1]
s1 = args[i = -n][1];
while ((hleft = heap[2 * i]) && (s1 > (m = min(la = args[hleft][1], (hright =
heap[(2 * i) + 1]) ? args[hright][1] | $maxint))))
if (m == la)
heap[i] = hleft;
i = 2 * i;
else
heap[i] = hright;
i = (2 * i) + 1;
endif
endwhile
heap[i] = -n;
endfor
"...";
"...find first interval...";
h = heap[1];
rseq = {args[h][1]};
if (length(args[h]) < 2)
return rseq;
endif
current_end = args[h][2];
nexts[h] = 3;
"...";
while (1)
if (length(args[h]) >= nexts[h])
"...this sequence has some more intervals in it...";
else
"...no more intevals left in this sequence, grab another...";
h = heap[1] = heap[nargs];
heap[nargs] = 0;
if ((nargs = nargs - 1) > 1)
elseif (args[h][nexts[h]] > current_end)
return {@rseq, current_end, @args[h][nexts[h]..length(args[h])]};
elseif ((i = $list_utils:find_insert(args[h], current_end)) % 2)
return {@rseq, current_end, @args[h][i..length(args[h])]};
else
return {@rseq, @args[h][i..length(args[h])]};
endif
endif
"...";
"...sink the top sequence...";
i = 1;
first = args[h][nexts[h]];
while ((hleft = heap[2 * i]) && (first > (m = min(la = args[hleft][nexts[hleft]],
(hright = heap[(2 * i) + 1]) ? args[hright][nexts[hright]] | $maxint))))
if (m == la)
heap[i] = hleft;
i = 2 * i;
else
heap[i] = hright;
i = (2 * i) + 1;
endif
endwhile
heap[i] = h;
"...";
"...check new top sequence ...";
if (args[h = heap[1]][nexts[h]] > current_end)
"...hey, a new interval! ...";
rseq = {@rseq, current_end, args[h][nexts[h]]};
if (length(args[h]) <= nexts[h])
return rseq;
endif
current_end = args[h][nexts[h] + 1];
nexts[h] = nexts[h] + 2;
else
"...first interval overlaps with current one ...";
i = $list_utils:find_insert(args[h], current_end);
if (i % 2)
nexts[h] = i;
elseif (i > length(args[h]))
return rseq;
else
current_end = args[h][i];
nexts[h] = i + 1;
endif
endif
endwhile
.
intersection:
":intersection(seq1,seq2,...) => intersection of all sequences...";
if ((U = {$minint}) in args)
args = $list_utils:setremove_all(args, U);
endif
if (length(args) <= 1)
return args ? args[1] | U;
endif
return this:complement(this:_union(@$list_utils:map_arg(this, "complement", args)));
.
PROPERTY DATA: |