sequence utilities (#35)

(an instance of Generic Utilities Package made by Hacker)

     A sequence is a set of integers (*)
     This package supplies the following verbs:
     
      :add (seq,f,t) => seq with [f..t] interval added
      :remove (seq,f,t) => seq with [f..t] interval removed
      :range (f,t) => sequence corresponding to [f..t]
      {} => empty sequence
      :contains (seq,n) => n in seq
      :size (seq) => number of elements in seq
      :first (seq) => first integer in seq or E_NONE
      :firstn (seq,n) => first n integers in seq (as a sequence)
      :last (seq) => last integer in seq or E_NONE
      :lastn (seq,n) => last n integers in seq (as a sequence)
     
      :complement(seq) => [-2147483648..2147483647] - seq
      :union (seq,seq,...)
      :intersect(seq,seq,...)
     
      :extract(seq,array) => array[@seq]
      :for([n,]seq,obj,verb,@args) => for s in (seq) obj:verb(s,@args); endfor
     
      :tolist(seq) => list corresponding to seq
      :tostr(seq) => contents of seq as a string
      :from_list(list) => sequence corresponding to list
      :from_sorted_list(list) => sequence corresponding to list (assumed sorted)
      :from_string(string) => sequence corresponding to string
     
     For boolean expressions, note that
      the representation of the empty sequence is {} (boolean FALSE) and
      all non-empty sequences are represented as nonempty lists (boolean TRUE).
     
     The representation used works better than the usual list implementation for sets consisting of long uninterrupted ranges of integers.
     For sparse sets of integers the representation is decidedly non-optimal (though it never takes more than double the space of the usual list representation).
     
     (*) Actually what this package implements is sets of integers-mod-2^32, but this assumes the underlying machine on which the server runs has 32-bit integers. If not, you need to change this.maxneg to be the largest negative ("smallest"?) integer available.



VERB SOURCE CODE:

add remove:
"   add(seq,start[,end]) => seq with range added.";
"remove(seq,start[,end]) => seq with range removed.";
"  both assume start<=end.";
remove = verb == "remove";
seq = args[1];
start = args[2];
s = (start == $minint) ? 1 | $list_utils:find_insert(seq, start - 1);
if (length(args) < 3)
    return {@seq[1..s - 1], @((s + remove) % 2) ? {start} | {}};
else
    e = $list_utils:find_insert(seq, after = args[3] + 1);
    return {@seq[1..s - 1], @((s + remove) % 2) ? {start} | {}, @((e + remove) % 
2) ? {after} | {}, @seq[e..length(seq)]};
endif
.


contains:
":contains(seq,elt) => true iff elt is in seq.";
return ($list_utils:find_insert(@args) + 1) % 2;
.


complement:
":complement(seq[,lower[,upper]]) => the sequence containing all integers *not* in 
seq.";
"If lower/upper are given, the resulting sequence is restricted to the specified 
range.";
"Bad things happen if seq is not a subset of [lower..upper]";
seq = args[1];
lower = {@args, $minint}[2];
if (length(args) >= 3)
    if (seq[l = length(seq)] >= (upper = args[3] + 1))
        seq[l..l] = {};
    else
        seq[l + 1..l] = {upper};
    endif
endif
if (seq && (seq[1] <= lower))
    return listdelete(seq, 1);
else
    return {lower, @seq};
endif
.


union:
":union(seq1,seq2,...)        => union of all sequences...";
if ({} in args)
    args = $list_utils:setremove_all(args, {});
endif
if (length(args) <= 1)
    return args ? args[1] | {};
endif
return this:_union(@args);
.


tostr:
if (!(seq = args[1]))
    return "empty";
endif
e = tostr((seq[1] == $minint) ? "" | seq[1]);
for i in [2..length(seq)]
    e = e + ((i % 2) ? tostr(", ", seq[i]) | ((seq[i] == (seq[i - 1] + 1)) ? "" | 
tostr("..", seq[i] - 1)));
endfor
return e + ((length(seq) % 2) ? ".." | "");
.


for:
":for([n,]seq,obj,verb,@args) => for s in (seq) obj:verb(s,@args); endfor";
if (typeof(n = args[1]) == NUM)
    args = listdelete(args, 1);
    seq = args[1];
else
    seq = n;
    n = 1;
endif
if (seq[1] == $minint)
    return E_RANGE;
endif
object = args[2];
vname = args[3];
args = args[4..length(args)];
for r in [1..length(seq) / 2]
    for i in [seq[(2 * r) - 1]..seq[2 * r] - 1]
        if (typeof(object:(vname)(@listinsert(args, i, n))) == ERR)
            return;
        endif
    endfor
endfor
if (length(seq) % 2)
    i = seq[length(seq)];
    while (1)
        if (typeof(object:(vname)(@listinsert(args, i, n))) == ERR)
            return;
        endif
        i = i + 1;
    endwhile
endif
.


extract:
"extract(seq,array) => list of elements of array with indices in seq.";
if (alen = length(array = args[2]))
    e = $list_utils:find_insert(seq = args[1], 1);
    s = $list_utils:find_insert(seq, alen);
    seq = {@(e % 2) ? {} | {1}, @seq[e..s - 1], @(s % 2) ? {} | {alen + 1}};
    ret = {};
    for i in [1..length(seq) / 2]
        $command_utils:suspend_if_needed(0);
        ret = {@ret, @array[seq[(2 * i) - 1]..seq[2 * i] - 1]};
    endfor
    return ret;
else
    return {};
endif
.


tolist:
seq = args[1];
if (!seq)
    return {};
else
    if (length(seq) % 2)
        seq = {@seq, $minint};
    endif
    l = {};
    for i in [1..length(seq) / 2]
        for j in [seq[(2 * i) - 1]..seq[2 * i] - 1]
            l = {@l, j};
        endfor
    endfor
    return l;
endif
.


from_list:
":fromlist(list) => corresponding sequence.";
return this:from_sorted_list($list_utils:sort(args[1]));
.


from_sorted_list:
":from_sorted_list(sorted_list) => corresponding sequence.";
if (!(lst = args[1]))
    return {};
else
    seq = {i = lst[1]};
    next = i + 1;
    for i in (listdelete(lst, 1))
        if (i != next)
            seq = {@seq, next, i};
        endif
        next = i + 1;
    endfor
    return (next == $minint) ? seq | {@seq, next};
endif
.


first:
return (seq = args[1]) ? seq[1] | E_NONE;
.


last:
return (seq = args[1]) ? ((len = length(seq)) % 2) ? $minint - 1 | (seq[len] - 1) 
| E_NONE;
.


size:
":size(seq) => number of elements in seq";
"  for sequences consisting of more than half of the 4294967298 available integers, 
this returns a negative number, which can either be interpreted as (cardinality - 
4294967298) or -(size of complement sequence)";
n = 0;
for i in (seq = args[1])
    n = i - n;
endfor
return (length(seq) % 2) ? $minint - n | n;
.


from_string:
":from_string(string) => corresponding sequence or E_INVARG";
"  string should be a comma separated list of numbers and";
"  number..number ranges";
su = $string_utils;
if (!(words = su:explode(su:strip_chars(args[1], " "), ",")))
    return {};
endif
parts = {};
for word in (words)
    to = index(word, "..");
    if ((!to) && su:is_numeric(word))
        part = {tonum(word), tonum(word) + 1};
    elseif (to)
        if (to == 1)
            start = $minint;
        elseif (su:is_numeric(start = word[1..to - 1]))
            start = tonum(start);
        else
            return E_INVARG;
        endif
    end = word[to + 2..length(word)];
    if (!end)
        part = {start};
    elseif (!su:is_numeric(end))
        return E_INVARG;
    elseif ((end = tonum(end)) >= start)
        part = {start, end + 1};
    else
        part = {};
    endif
else
    return E_INVARG;
endif
parts = {@parts, part};
endfor
return this:union(@parts);
.


firstn:
":firstn(seq,n) => first n elements of seq as a sequence.";
if ((n = args[2]) <= 0)
    return {};
endif
l = length(seq = args[1]);
s = 1;
while (s <= l)
    n = n + seq[s];
    if ((s >= l) || (n <= seq[s + 1]))
        return {@seq[1..s], n};
    endif
    n = n - seq[s + 1];
    s = s + 2;
endwhile
return seq;
.


lastn:
":lastn(seq,n) => last n elements of seq as a sequence.";
n = args[2];
if ((l = length(seq = args[1])) % 2)
    return {$minint - n};
else
    s = l;
    while (s)
        n = seq[s] - n;
        if (n >= seq[s - 1])
            return {n, @seq[s..l]};
        endif
        n = seq[s - 1] - n;
        s = s - 2;
    endwhile
    return seq;
endif
.


range:
":range(start,end) => sequence corresponding to [start..end] range";
return ((start = args[1]) <= (end = args[2])) ? {start, end + 1} | {};
.


expand:
":expand(seq,eseq[,include=0])";
"eseq is assumed to be a finite sequence consisting of intervals ";
"[f1..a1-1],[f2..a2-1],...  We map each element i of seq to";
"  i               if               i < f1";
"  i+(a1-f1)       if         f1 <= i < f2-(a1-f1)";
"  i+(a1-f1+a2-f2) if f2-(a1-f1) <= i < f3-(a2-f2)-(a1-f1)";
"  ...";
"returning the resulting sequence if include=0,";
"returning the resulting sequence unioned with eseq if include=1;";
old = args[1];
insert = args[2];
exclude = !{@args, 0}[3];
if (!insert)
    return old;
elseif (((ilen = length(insert)) % 2) || (insert[1] == $minint))
    return E_TYPE;
endif
olast = length(old);
ilast = length(insert);
"... find first o for which old[o] >= insert[1]...";
ifirst = insert[i = 1];
    o = $list_utils:find_insert(old, ifirst - 1);
    if (o > olast)
        return ((olast % 2) == exclude) ? {@old, @insert} | old;
    endif
    new = old[1..o - 1];
    oe = old[o];
    diff = 0;
    while (1)
        "INVARIANT: oe == old[o]+diff";
        "INVARIANT: oe >= ifirst == insert[i]";
        "... at this point we need to dispose of the interval ifirst..insert[i+1]";
        if (oe == ifirst)
            new = {@new, insert[i + ((o % 2) == exclude)]};
            if (o >= olast)
                return ((olast % 2) == exclude) ? {@new, @insert[i + 2..ilast]} | 
new;
            endif
            o = o + 1;
        else
            if ((o % 2) != exclude)
                new = {@new, @insert[i..i + 1]};
            endif
        endif
        "... advance i...";
        diff = (diff + insert[i + 1]) - ifirst;
        if ((i = i + 2) > ilast)
            for oe in (old[o..olast])
                new = {@new, oe + diff};
            endfor
            return new;
        endif
        ifirst = insert[i];
            "... find next o for which old[o]+diff >= ifirst )...";
            while ((oe = old[o] + diff) < ifirst)
                new = {@new, oe};
                if (o >= olast)
                    return ((olast % 2) == exclude) ? {@new, @insert[i..ilast]} | 
new;
                endif
                o = o + 1;
            endwhile
        endwhile
.


contract:
":contract(seq,cseq)";
"cseq is assumed to be a finite sequence consisting of intervals ";
"[f1..a1-1],[f2..a2-1],...  From seq, we remove any elements that ";
"are in those ranges and map each remaining element i to";
"  i               if       i < f1";
"  i-(a1-f1)       if a1 <= i < f2";
"  i-(a1-f1+a2-f2) if a2 <= i < f3 ...";
"returning the resulting sequence.";
"";
"For any finite sequence cseq, the following always holds:";
"  :contract(:expand(seq,cseq,include),cseq)==seq";
old = args[1];
removed = args[2];
if (!removed)
    return old;
elseif (((rlen = length(removed)) % 2) || (removed[1] == $minint))
    return E_TYPE;
endif
rfirst = removed[1];
ofirst = $list_utils:find_insert(old, rfirst - 1);
new = old[1..ofirst - 1];
diff = 0;
rafter = removed[r = 2];
for o in [ofirst..olast = length(old)]
    while (old[o] > rafter)
        if ((o - ofirst) % 2)
            new = {@new, rfirst - diff};
            ofirst = o;
        endif
        diff = (diff + rafter) - rfirst;
        if (r >= rlen)
            for oe in (old[o..olast])
                new = {@new, oe - diff};
            endfor
            return new;
        endif
        rfirst = removed[r + 1];
        rafter = removed[r = r + 2];
    endwhile
    if (old[o] < rfirst)
        new = {@new, old[o] - diff};
        ofirst = o + 1;
    endif
endfor
return ((olast - ofirst) % 2) ? new | {@new, rfirst - diff};
.


_union:
":_union(seq,seq,...)";
"assumes all seqs are nonempty and that there are at least 2";
nargs = length(args);
"args  -- list of sequences.";
"nexts -- nexts[i] is the index in args[i] of the start of the first";
"         interval not yet incorporated in the return sequence.";
"heap  -- a binary tree of indices into args/nexts represented as a list where";
"         heap[1] is the root and the left and right children of heap[i]";
"         are heap[2*i] and heap[2*i+1] respectively.  ";
"         Parent index h is <= both children in the sense of args[h][nexts[h]].";
"         heap[i]==0 indicates a nonexistant child; we fill out the array with";
"         zeros so that length(heap)>2*length(args).";
"...initialize heap...";
heap = {0, 0, 0, 0, 0};
nexts = {1, 1};
hlen2 = 2;
while (hlen2 < nargs)
    nexts = {@nexts, @nexts};
    heap = {@heap, @heap};
    hlen2 = hlen2 * 2;
endwhile
for n in [-nargs..-1]
    s1 = args[i = -n][1];
    while ((hleft = heap[2 * i]) && (s1 > (m = min(la = args[hleft][1], (hright = 
heap[(2 * i) + 1]) ? args[hright][1] | $maxint))))
        if (m == la)
            heap[i] = hleft;
            i = 2 * i;
        else
            heap[i] = hright;
            i = (2 * i) + 1;
        endif
    endwhile
    heap[i] = -n;
endfor
"...";
"...find first interval...";
h = heap[1];
rseq = {args[h][1]};
if (length(args[h]) < 2)
    return rseq;
endif
current_end = args[h][2];
nexts[h] = 3;
"...";
while (1)
    if (length(args[h]) >= nexts[h])
        "...this sequence has some more intervals in it...";
    else
        "...no more intevals left in this sequence, grab another...";
        h = heap[1] = heap[nargs];
        heap[nargs] = 0;
        if ((nargs = nargs - 1) > 1)
        elseif (args[h][nexts[h]] > current_end)
            return {@rseq, current_end, @args[h][nexts[h]..length(args[h])]};
        elseif ((i = $list_utils:find_insert(args[h], current_end)) % 2)
            return {@rseq, current_end, @args[h][i..length(args[h])]};
        else
            return {@rseq, @args[h][i..length(args[h])]};
        endif
    endif
    "...";
    "...sink the top sequence...";
    i = 1;
    first = args[h][nexts[h]];
    while ((hleft = heap[2 * i]) && (first > (m = min(la = args[hleft][nexts[hleft]], 
(hright = heap[(2 * i) + 1]) ? args[hright][nexts[hright]] | $maxint))))
        if (m == la)
            heap[i] = hleft;
            i = 2 * i;
        else
            heap[i] = hright;
            i = (2 * i) + 1;
        endif
    endwhile
    heap[i] = h;
    "...";
    "...check new top sequence ...";
    if (args[h = heap[1]][nexts[h]] > current_end)
        "...hey, a new interval! ...";
        rseq = {@rseq, current_end, args[h][nexts[h]]};
        if (length(args[h]) <= nexts[h])
            return rseq;
        endif
        current_end = args[h][nexts[h] + 1];
        nexts[h] = nexts[h] + 2;
    else
        "...first interval overlaps with current one ...";
        i = $list_utils:find_insert(args[h], current_end);
        if (i % 2)
            nexts[h] = i;
        elseif (i > length(args[h]))
            return rseq;
        else
            current_end = args[h][i];
            nexts[h] = i + 1;
        endif
    endif
endwhile
.


intersection:
":intersection(seq1,seq2,...) => intersection of all sequences...";
if ((U = {$minint}) in args)
    args = $list_utils:setremove_all(args, U);
endif
if (length(args) <= 1)
    return args ? args[1] | U;
endif
return this:complement(this:_union(@$list_utils:map_arg(this, "complement", args)));
.



PROPERTY DATA: