/* PLMAP.C : Palette map and unmap functions.  The primary functions
** here are makepal() which performs color space reduction and map()
** which performs the mapping.
*/

#include <stdlib.h>
#include <memory.h>
#include <string.h>
#include <float.h>
#include <time.h>

#include "piclab.h"
#include "sierra"

#define LOCAL _based(_segname("_DATA"))

static U8 LOCAL qr[4], LOCAL pr[4];
static int LOCAL match;

#ifndef MIN
#define MIN(a,b)        ((a)<(b)?(a):(b))
#endif
#ifndef MAX
#define MAX(a,b)        ((a)>(b)?(a):(b))
#endif

#define MAPVER 192
#if (MAPVER == 191)

/*
   The following MAP code came from Lee's "1.92" source, and is
   essentially identical to 1.91's code. It shows promise but has
   some problems.
*/

struct kdnonterm
{
   int lower, higher;
   U8 d, pv;
};

struct kdterm
{
   int size;
   U8 v[4][4];
};

static long LOCAL matchdist;
static long *LOCAL hashtable;
static int primes[16] =
{29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97};

static int nextnode;

static struct kdnode
{
   int isterm;
   union
   {
      struct kdnonterm n;
      struct kdterm t;
   } u;
} LOCAL kdtree[128];

static int LOCAL upper[3], LOCAL lower[3];

int
buildtree(int size, U8 * subfile)
{
   U8 cmin[3], cmax[3], *mp;
   int node, b, lc, p, i, j, t, v, spread[3];
   struct kdnode LOCAL *kp;

   node = nextnode++;
   kp = &kdtree[node];

   if (size <= 4)
   {
      kp->isterm = 1;
      kp->u.t.size = size;

      for (b = 0; b < size; ++b)
      {
         kp->u.t.v[b][3] = *subfile;
         for (p = 0; p < 3; ++p)
            kp->u.t.v[b][p] = gcmap[p][*subfile];
         ++subfile;
      }
   }
   else
   {
      kp->isterm = 0;
      for (p = 0; p < 3; ++p)
      {
         cmin[p] = 0xFF;
         cmax[p] = 0x00;
      };
      for (p = 0; p < 3; ++p)
      {
         for (i = 0; i < size; ++i)
         {
            if (gcmap[p][subfile[i]] < cmin[p])
               cmin[p] = gcmap[p][subfile[i]];
            if (gcmap[p][subfile[i]] > cmax[p])
               cmax[p] = gcmap[p][subfile[i]];
         }
         spread[p] = cmax[p] - cmin[p];
      }
      if (spread[0] > spread[1])
      {
         if (spread[0] > spread[2])
            p = 0;
         else
            p = 2;
      }
      else
      {
         if (spread[1] > spread[2])
            p = 1;
         else
            p = 2;
      }
      kp->u.n.d = p;

      lc = (size >> 1);
      mp = gcmap[p];

      for (i = 1; i < size; ++i)
      {
         v = mp[subfile[i]];
         for (j = 0; j < i; ++j)
            if (mp[subfile[j]] > v)
               break;
         if (j < i)
         {
            t = subfile[i];
            memmove(subfile + j + 1, subfile + j, i - j);
            subfile[j] = t;
         }
      }
      kp->u.n.pv = gcmap[p][subfile[lc - 1]];

      if (lc < size - lc)
      {
         if (lc > 0)
            kp->u.n.lower = buildtree(lc, subfile);
         if (size - lc > 0)
            kp->u.n.higher = buildtree(size - lc, subfile + lc);
      }
      else
      {
         if (size - lc > 0)
            kp->u.n.higher = buildtree(size - lc, subfile + lc);
         if (lc > 0)
            kp->u.n.lower = buildtree(lc, subfile);
      }
   }
   return node;
}

static int
ball_within_bounds(void)
{
   int p;

   for (p = 0; p < 3; ++p)
   {
      if ((long) (qr[p] - lower[p]) * (long) (qr[p] - lower[p]) <= matchdist ||
          (long) (qr[p] - upper[p]) * (long) (qr[p] - upper[p]) <= matchdist)
         return 0;
   }
   return -1;
}

static int
bounds_overlap_ball(void)
{
   long sum;

   sum = 0L;
   if (qr[0] < lower[0])
   {
      sum += (long) (qr[0] - lower[0]) * (long) (qr[0] - lower[0]);
      if (sum > matchdist)
         return 1;
   }
   else if (qr[0] > upper[0])
   {
      sum += (long) (qr[0] - upper[0]) * (long) (qr[0] - upper[0]);
      if (sum > matchdist)
         return 1;
   }
   if (qr[1] < lower[1])
   {
      sum += (long) (qr[1] - lower[1]) * (long) (qr[1] - lower[1]);
      if (sum > matchdist)
         return 1;
   }
   else if (qr[1] > upper[1])
   {
      sum += (long) (qr[1] - upper[1]) * (long) (qr[1] - upper[1]);
      if (sum > matchdist)
         return 1;
   }
   if (qr[2] < lower[2])
   {
      sum += (long) (qr[2] - lower[2]) * (long) (qr[2] - lower[2]);
      if (sum > matchdist)
         return 1;
   }
   else if (qr[2] > upper[2])
   {
      sum += (long) (qr[2] - upper[2]) * (long) (qr[2] - upper[2]);
      if (sum > matchdist)
         return 1;
   }
   return 0;
}


static int
search(int node)
{
   int p, d, temp;              /* Must be auto for recursion */

   static struct kdnode LOCAL *LOCAL kp;
   static struct kdterm LOCAL *LOCAL kt;
   static struct kdnonterm LOCAL *LOCAL kn;
   static long LOCAL dist;
   static int LOCAL b;

   kp = kdtree + node;

   if (kp->isterm)
   {
      kt = &kp->u.t;

      for (b = 0; b < kt->size; ++b)
      {
         dist = (qr[0] - kt->v[b][0]) * (qr[0] - kt->v[b][0]);
         if (dist < matchdist)
         {
            dist += (long) (qr[1] - kt->v[b][1]) * (long) (qr[1] - kt->v[b][1]);
            if (dist < matchdist)
            {
               dist += (long) (qr[2] - kt->v[b][2]) * (long) (qr[2] - kt->v[b][2]);

               if (dist < matchdist)
               {
                  match = kt->v[b][3];
                  matchdist = dist;
               }
            }
         }
      }
      for (p = 0; p < 3; ++p)
      {
         if ((long) (qr[p] - lower[p]) * (long) (qr[p] - lower[p]) <= matchdist ||
         (long) (qr[p] - upper[p]) * (long) (qr[p] - upper[p]) <= matchdist)
            return 0;
      }
      return -1;
   }
   kn = &kp->u.n;
   d = kn->d;
   p = kn->pv;

   if (qr[d] <= p)
   {
      temp = upper[d];
      upper[d] = p;
      if (search(kn->lower) == -1)
         return -1;
      upper[d] = temp;
   }
   else
   {
      temp = lower[d];
      lower[d] = p;
      if (search(kn->higher) == -1)
         return -1;
      lower[d] = temp;
   }

   if (qr[d] <= p)
   {
      temp = lower[d];
      lower[d] = p;
      if (bounds_overlap_ball())
      {
         if (search(kn->higher) == -1)
            return -1;
      }
      lower[d] = temp;
   }
   else
   {
      temp = upper[d];
      upper[d] = p;
      if (bounds_overlap_ball())
      {
         if (search(kn->lower) == -1)
            return -1;
      }
      upper[d] = temp;
   }
   for (p = 0; p < 3; ++p)
   {
      if ((long) (qr[p] - lower[p]) * (long) (qr[p] - lower[p]) <= matchdist ||
          (long) (qr[p] - upper[p]) * (long) (qr[p] - upper[p]) <= matchdist)
         return 0;
   }
   return -1;
}

static void
find_nearest()
{
   int p;

   match = 0;
   matchdist = 3L * 65535L;

   for (p = 0; p < 3; ++p)
   {
      upper[p] = 1000;
      lower[p] = -1000;
   }
   search(0);
}

void
doline(U16 pwidth, U8 * rgb[3], U8 * indices)
{
   static S16 *this[3], *next[3], *next2[3], *temp[3];
   static int p, x, e[3], e1[3], e2[3];
   int i, d, w2, t3, t4, t5, h1, h2;
   long l;
   S8 *sp;

   if (dither == 0)
   {
      for (x = 0; x < pwidth; ++x)
      {
         qr[0] = *rgb[0]++;
         qr[1] = *rgb[1]++;
         qr[2] = *rgb[2]++;

         h1 = ((((qr[0] & 0xF0) << 4) | (qr[1] & 0xF8)) << 2) | (qr[2] >> 3);
         h2 = primes[(qr[0] & 0x08) | (qr[1] & 0x04) | (qr[2] & 3)];
         match = -1;

         for (i = 0; i < 20; ++i)
         {
            l = (hashtable[h1] & 0xFFFFFFL);
            if (l == 0L)
            {
               find_nearest();
               qr[3] = (U8) match;
               hashtable[h1] = *(long *) qr;
               break;
            }
            else if (l == ((*(long *) qr) & 0xFFFFFFL))
            {
               match = (int) (*((U8 *) (hashtable + h1) + 3));
               break;
            }
            h1 = (h1 + h2) & 0x3FFF;
         }
         if (match == -1)
         {
            find_nearest();
         }
         *indices++ = (U8) match;
      }
   }
   else
   {
      if (startdither)
      {                         /* Here for the first time, intialize   */
         startdither = 0;
         w2 = 2 * pwidth + 8;
         for (p = 0; p < 3; ++p)
         {
            this[p] = (S16 *) talloc(w2) + 2;
            memset(this[p] - 2, 0, w2);
            next[p] = (S16 *) talloc(w2) + 2;
            memset(next[p] - 2, 0, w2);
            next2[p] = (S16 *) talloc(w2) + 2;
            memset(next2[p] - 2, 0, w2);
         }
         memset(indices, 0, pwidth);
      }

      e2[0] = e2[1] = e2[2] = 0;
      for (p = 0; p < 3; ++p)
      {
         next[p][0] = next[p][1] = next2[p][0] = next2[p][1] = 0;
      }
      for (x = 0; x < pwidth; ++x)
      {
         for (p = 0; p < 3; ++p)
         {
            i = (int) (rgb[p][x]) + this[p][x] + e1[p];

            if (i <= 0)
            {
               pr[p] = 0;
            }
            else if (i >= 255)
            {
               pr[p] = 255;
            }
            else
               pr[p] = (U8) i;

            qr[p] = ((pr[p] & 0xFC) | (qr[p] >> 6));
         }
         h1 = ((((qr[0] & 0xF0) << 4) | (qr[1] & 0xF8)) << 2) | (qr[2] >> 3);
         h2 = primes[(qr[0] & 0x0C) | ((qr[1] & 0x04) >> 1) | ((qr[2] & 0x04) >> 2)];
         match = -1;

         for (i = 0; i < 20; ++i)
         {
            l = (hashtable[h1] & 0xFFFFFFL);
            if (l == 0L)
            {
               find_nearest();
               qr[3] = (U8) match;
               hashtable[h1] = *(long *) qr;
               break;
            }
            else if (l == ((*(long *) qr) & 0xFFFFFFL))
            {
               match = (int) (*((U8 *) (hashtable + h1) + 3));
               break;
            }
            h1 = (h1 + h2) & 0x3FFF;
         }
         if (match == -1)
         {
            find_nearest();
         }
         *indices++ = (U8) match;

         e[0] = (int) (pr[0]) - (int) (gcmap[0][match]);
         e[1] = (int) (pr[1]) - (int) (gcmap[1][match]);
         e[2] = (int) (pr[2]) - (int) (gcmap[2][match]);

         for (p = 0; p < 3; ++p)
            if (e[p] != 0)
            {
               d = e[p] + 255;
               sp = sierra[d];
               t3 = *sp++;
               t4 = *sp++;
               t5 = *sp++;
               e1[p] = e2[p] + *sp++;
               e2[p] = t4;
               next[p][x - 2] += t5;
               next[p][x - 1] += t3;
               next2[p][x - 1] += t5;
               next[p][x] += *sp;
               next2[p][x] += t4;
               next[p][x + 1] += t3;
               next2[p][x + 1] = t5;
               next[p][x + 2] = t5;
            }
      }
      for (p = 0; p < 3; ++p)
      {
         temp[p] = this[p];
         this[p] = next[p];
         next[p] = next2[p];
         next2[p] = temp[p];
      }
   }
}

int
map(int ac, argument * av)
{
   double elapsed;
   U8 *sp[3], *dp, colors[256];
   int i, p, r, lines;
   U32 mem, needed;
   struct _plane *ip[3], *op;

   if ((r = begintransform()) != 0)
      return r;
   mem = mark();

   needed = 65536L + 4L * BUFSIZE + 6L * (long) new->width;
   if (needed > freemem())
      return 2;

   if ((hashtable = (long *) talloc((U16) 65535)) == NULL)
      return 2;
   memset(hashtable, 0, (U16) 65535);
   *((char *) hashtable + 65535) = 0;

   pl_printf("Mapping image...\r\n");

   for (i = 0; i < palette; ++i)
      colors[i] = (U8) i;
   nextnode = 0;
   buildtree(palette, colors);

   new->planes = 1;
   new->flags |= 2;

   for (p = 0; p < 3; ++p)
      if ((ip[p] = openplane(p, old, READ)) == NULL)
         return 3;
   if ((op = openplane(0, new, WRITE)) == NULL)
      return 3;

   startdither = 1;
   elapsed = clock();
   for (lines = 0; lines < new->height; ++lines)
   {
      for (p = 0; p < 3; ++p)
      {
         if (getline(ip[p]) == 0)
            return 4;
         sp[p] = ip[p]->linebuf;
      }
      dp = op->linebuf;
      doline(new->width, sp, dp);
      if (putline(op) == 0)
         return 3;
      pl_trace(lines);
   }
   for (p = 0; p < 3; ++p)
      closeplane(ip[p]);
   closeplane(op);

   pl_printf(done);
   if (debug & 1 != 0)
      pl_printf("elapsed = %f\n\r", (clock() - elapsed) / CLK_TCK);
   release(mem);
   return 0;
}

#else

/*
   The following came from Lee's "1.71" source, and seems to be what was
   used in Lee's unpublished version 1.90. Works real well but has 15
   bit accuracy.
*/

static U8 *rgbmap;
static U16 *precalc, masks[] =
{
   0x8000, 0x4000, 0x2000, 0x1000, 0x0800, 0x0400, 0x0200, 0x0100,
   0x0080, 0x0040, 0x0020, 0x0010, 0x0008, 0x0004, 0x0002, 0x0001
};

void
doline171(U16 pwidth, U8 * rgb[3], U8 * indices)
{
   static S16 *this[3], *next[3], *next2[3], *temp[3];
   static U8 *up, c;
   static int p, x, c1[3], e[3], e1[3], e2[3], r, g, b;
   int w2, t3, t4, t5;
   register int i, d;
   S8 *sp;

   if (dither == 0)
   {
      for (x = 0; x < new->width; ++x)
      {
         c1[0] = ((int) *rgb[0]++);
         r = c1[0] >> 3;
         c1[1] = ((int) *rgb[1]++);
         g = c1[1] >> 3;
         c1[2] = ((int) *rgb[2]++);
         b = c1[2] >> 3;
         i = (((r << 5) + g) << 5) + b;

         if ((precalc[i >> 4] & masks[i & 15]) == 0)
         {
            precalc[i >> 4] |= masks[i & 15];
            rgbmap[i] = _nearest(c1);
         }
         *indices++ = rgbmap[i];
      }
   }
   else
   {
      if (startdither)
      {                         /* Here for the first time, intialize   */
         startdither = 0;
         w2 = 2 * pwidth + 8;
         for (p = 0; p < 3; ++p)
         {
            this[p] = (S16 *) talloc(w2) + 2;
            memset(this[p] - 2, 0, w2);
            next[p] = (S16 *) talloc(w2) + 2;
            memset(next[p] - 2, 0, w2);
            next2[p] = (S16 *) talloc(w2) + 2;
            memset(next2[p] - 2, 0, w2);
         }
         memset(indices, 0, pwidth);
      }

      e2[0] = e2[1] = e2[2] = 0;
      up = indices;
      for (p = 0; p < 3; ++p)
      {
         next[p][0] = next[p][1] = next2[p][0] = next2[p][1] = 0;
      }
      for (x = 0; x < pwidth; ++x)
      {
         for (p = 0; p < 3; ++p)
         {
            c1[p] = rgb[p][x] + this[p][x] + e1[p];
            if (c1[p] < 0)
               c1[p] = 0;
            if (c1[p] > 255)
               c1[p] = 255;
         }
         r = c1[0] >> 3;
         g = c1[1] >> 3;
         b = c1[2] >> 3;
         i = (((r << 5) + g) << 5) + b;

         if ((precalc[i >> 4] & masks[i & 15]) == 0)
         {
            precalc[i >> 4] |= masks[i & 15];
            rgbmap[i] = _nearest(c1);
         }
         *up++ = c = rgbmap[i];

         e[0] = c1[0] - gcmap[0][c];
         e[1] = c1[1] - gcmap[1][c];
         e[2] = c1[2] - gcmap[2][c];

         for (p = 0; p < 3; ++p)
            if (e[p] != 0)
            {
               d = e[p] + 255;
               sp = sierra[d];
               t3 = *sp++;
               t4 = *sp++;
               t5 = *sp++;
               e1[p] = e2[p] + *sp++;
               e2[p] = t4;
               next[p][x - 2] += t5;
               next[p][x - 1] += t3;
               next2[p][x - 1] += t5;
               next[p][x] += *sp;
               next2[p][x] += t4;
               next[p][x + 1] += t3;
               next2[p][x + 1] = t5;
               next[p][x + 2] = t5;
            }
      }
      for (p = 0; p < 3; ++p)
      {
         temp[p] = this[p];
         this[p] = next[p];
         next[p] = next2[p];
         next2[p] = temp[p];
      }
   }
}

/*
   This version of MAP was written by Tim Wegner. The code is based on
   Stephen Coy's TGA2GIF as well as Lee Crocker's earlier versions of map().
   It features variable resolution from 15 to 24 bits (TGA2GIF was 18 bits).

   Note: for the time being, this code supports both the new algorithm
   (as of version 1.92) and an earlier algorithm used in Piclab 1.71.
*/

/* causes C rather than ASM _nearest2() to be used */
/*#define _nearest2(x) _nearest2_c(x) */

/* uncomment next to compare C and asm nearest code */
/* #define TESTNEAREST */

#ifdef TESTNEAREST
long
err(U8 * p, int n)
{
   long dr, dg, db, c;

   dr = (long) gcmap[0][n] - (long) p[0];
   dg = (long) gcmap[1][n] - (long) p[1];
   db = (long) gcmap[2][n] - (long) p[2];
   return (dr * dr + dg * dg + db * db);
}

extern U8 _nearest2_c(U8 * p);

U8
_nearest3(U8 * cv)
{
   static long good = 0, bad = 0;
   int a, b;
   long la, lb;
   cv[0] = (cv[0] & 0xf8) + 4;
   cv[1] = (cv[1] & 0xf8) + 4;
   cv[2] = (cv[2] & 0xf8) + 4;
   a = _nearest2(cv);
   b = _nearest2_c(cv);
   if (a != b)
   {
      la = err(cv, a);
      lb = err(cv, b);
      printf("ASM index %3d dist %6ld\n", a, la);
      printf("C   index %3d dist %6ld\n", b, lb);
      printf("C better? %1d rgb %3d %3d %3d good %ld bad %ld\n\n",
             la >= lb, (int) cv[0], (int) cv[1], (int) cv[2], good, ++bad);
      if (getch() == 'q')
         exit(0);
   }
   else
      good++;
   return ((U8) b);
}

#define _nearest2(x) _nearest3(x)
#endif

extern U8 get_index(U8 *);
extern U8 _nearest2(U8 *);

typedef struct node
{
   U8 color;
   U8 index;
   struct node *next;
} Node;

Node **grid = NULL;             /* linked list headers for each red/green
                                 * pair */

U8 rshft, gshft, bshft;

/*
        get_index -- get the palette index of an rgb color
*/

U8
get_index(U8 * cv)
{
   U16 r, g, b, gi;
   Node *ptr, *prev;
   U8 color;
   if (rshft == 0)
      return ((U8) _nearest2(cv));
   r = cv[0];
   g = cv[1];
   b = cv[2];
   r = r >> rshft;
   g = g >> gshft;
   b = b >> bshft;

   gi = (r << (8 - gshft)) + g;
   ptr = grid[gi];
   prev = ptr;
   if (!ptr)
   {                            /* no color so we must create one */
      ptr = (Node *) talloc(sizeof(Node));
      if (ptr)
      {
         memset(ptr, 0, sizeof(Node));
         grid[gi] = ptr;
         color = _nearest2(cv);
         ptr->index = color;
         ptr->color = b;
         ptr->next = NULL;
      }
      else
      {
         color = _nearest2(cv);
      }
      return color;
   }
   if (ptr->color > b)
   {                            /* need a new node at the head */
      prev = (Node *) talloc(sizeof(Node));
      if (prev)
      {
         memset(prev, 0, sizeof(Node));
         color = _nearest2(cv);
         prev->index = color;
         prev->color = b;
         prev->next = ptr;
         grid[gi] = prev;
      }
      else
      {
         color = _nearest2(cv);
      }
      return color;
   }

   for (;;)
   {                            /* walk down the list looking for the right
                                 * color */
      if (ptr->color == b)
      {
         return ptr->index;
      }
      if (ptr->next == (Node *) NULL)
      {                         /* end of the rope */
         ptr->next = (Node *) talloc(sizeof(Node));
         if (ptr->next)
         {
            memset(ptr->next, 0, sizeof(Node));
            ptr = ptr->next;
            ptr->color = b;
            color = _nearest2(cv);
            ptr->index = (U8) color;
            ptr->next = NULL;
         }
         else
         {
            color = _nearest2(cv);
         }
         return color;
      }
      prev = ptr;               /* step down to next node */
      ptr = ptr->next;
   }                            /* end of forever loop */
}                               /* end of get_index() */

/*
        map -- find the "best" mapping of an rgb value in the palette
*/

U8
_nearest2_c(U8 * p)
{
   int r, g, b;
   int dr, dg, db, c;
   U8 best;
   long min_dist, dist;

   best = 0;
   r = p[0];
   g = p[1];
   b = p[2];

   dr = gcmap[0][best] - r;
   dg = gcmap[1][best] - g;
   db = gcmap[2][best] - b;
   min_dist = (long) dr *(long) dr + (long) dg *(long) dg + (long) db *(long) db;
   for (c = 1; c < palette; c++)
   {
      dr = gcmap[0][c] - r;
      dg = gcmap[1][c] - g;
      db = gcmap[2][c] - b;

      if ((dist = (long) dr * (long) dr) >= min_dist)
         continue;
      if ((dist += (long) dg * (long) dg) >= min_dist)
         continue;
      if ((dist += (long) db * (long) db) >= min_dist)
         continue;

      min_dist = dist;
      best = c;
   }
   return best;
}

void
doline192(U16 pwidth, U8 * rgb[3], U8 * indices)
{
   int where;
   static S16 *this[3], *next[3], *next2[3], *temp[3];
   static int p, x, e[3], e1[3], e2[3];
   int i, d, w2, t3, t4, t5, h1, h2;
   long l;
   S8 *sp;

   if (dither == 0)
   {
      for (x = 0; x < pwidth; ++x)
      {
         qr[0] = *rgb[0]++;
         qr[1] = *rgb[1]++;
         qr[2] = *rgb[2]++;
         *indices++ = (U8) get_index(qr);
      }
   }
   else
   {
      if (startdither)
      {                         /* Here for the first time, intialize   */
         startdither = 0;
         w2 = 2 * pwidth + 8;
         for (p = 0; p < 3; ++p)
         {
            this[p] = (S16 *) talloc(w2) + 2;
            memset(this[p] - 2, 0, w2);
            next[p] = (S16 *) talloc(w2) + 2;
            memset(next[p] - 2, 0, w2);
            next2[p] = (S16 *) talloc(w2) + 2;
            memset(next2[p] - 2, 0, w2);
         }
         memset(indices, 0, pwidth);
      }

      e2[0] = e2[1] = e2[2] = 0;
      for (p = 0; p < 3; ++p)
      {
         next[p][0] = next[p][1] = next2[p][0] = next2[p][1] = 0;
      }
      for (x = 0; x < pwidth; ++x)
      {
         for (p = 0; p < 3; ++p)
         {
            i = (int) (rgb[p][x]) + this[p][x] + e1[p];

            if (i <= 0)
            {
               pr[p] = 0;
            }
            else if (i >= 255)
            {
               pr[p] = 255;
            }
            else
               pr[p] = (U8) i;

            qr[p] = ((pr[p] & 0xFC) | (qr[p] >> 6));
         }
         *indices++ = match = (U8) get_index(qr);

         e[0] = (int) (pr[0]) - (int) (gcmap[0][match]);
         e[1] = (int) (pr[1]) - (int) (gcmap[1][match]);
         e[2] = (int) (pr[2]) - (int) (gcmap[2][match]);

         for (p = 0; p < 3; ++p)
            if (e[p] != 0)
            {
               d = e[p] + 255;
               sp = sierra[d];
               t3 = *sp++;
               t4 = *sp++;
               t5 = *sp++;
               e1[p] = e2[p] + *sp++;
               e2[p] = t4;
               next[p][x - 2] += t5;
               next[p][x - 1] += t3;
               next2[p][x - 1] += t5;
               next[p][x] += *sp;
               next2[p][x] += t4;
               next[p][x + 1] += t3;
               next2[p][x + 1] = t5;
               next[p][x + 2] = t5;
            }
      }
      for (p = 0; p < 3; ++p)
      {
         temp[p] = this[p];
         this[p] = next[p];
         next[p] = next2[p];
         next2[p] = temp[p];
      }
   }
}

int
map(int ac, argument * av)
{
   void (*doline) (U16, U8 **, U8 *);
   double elapsed;
   int old_mapbits;
   U8 *sp[3], *dp, colors[256];
   int i, p, r, lines;
   U32 mem, needed;
   struct _plane *ip[3], *op;

   if (new->flags & 2)
   {
      pl_printf("Image already mapped.\r\n");
      return 0;
   }
   else if (new->planes == 1)
   {
      pl_printf("Cannot map mono image.\r\n");
      return 0;
   }

   if ((r = begintransform()) != 0)
      return r;
   mem = mark();

   bshft = gshft = rshft = 0;
   if (mapbits >= 15)           /* use new map code */
   {
      int extrabits;
      doline = doline192;
      rshft = 8 - mapbits / 3;
      if (rshft < 0)
         rshft = 0;
      else if (rshft > 3)
         rshft = 3;
      bshft = gshft = rshft;
      extrabits = MIN(mapbits % 3, rshft);
      if (rshft <= 3 && extrabits > 0)
      {
         bshft--;
         if (extrabits == 2)
            gshft--;
      }
      old_mapbits = mapbits;
      mapbits = 24 - rshft - gshft - bshft;
      if (debug && (old_mapbits != mapbits))
      {
         pl_printf("old mapbits %d new mapbits %d\r\n", old_mapbits, mapbits);
         pl_printf("extrabits %d\r\n", extrabits);
      }
      if (rshft > 0)            /* use linked list */
      {
         U32 mem;
         mem = ((1L << (16 - rshft - gshft)) * sizeof(Node *));
         needed = mem + 4L * BUFSIZE + 6L * (long) new->width;
         if (needed > freemem())
            return 2;
         if (mem > (U16) 65535)
            mem = (U16) 65535;
         if (debug)
            printf("map array memory %lu\r\n", mem);
         grid = (Node **) talloc((U16) mem);
         memset(grid, 0, (U16) mem);
         if (mem == (U16) 65535)
         {
            if (debug)
               printf("clearing last byte of map array\r\n");
            *((char *) grid + (U16) 65535) = 0;
         }
      }
      else
         /* brute force pixel-by-pixel method, no linked list */
      {
         needed = 4L * BUFSIZE + 6L * (long) new->width;
         if (needed > freemem())
            return 2;
      }
   }
   else
      /* use old 171 code */
   {
      doline = doline171;
      needed = 32768L + 4L * BUFSIZE + 6L * (long) new->width + 4120L;
      if (needed > freemem())
         return 2;

      if ((rgbmap = (U8 *) talloc((U16) 32768)) == NULL)
         return 2;
      if ((precalc = (U16 *) talloc(4096)) == NULL)
         return 2;
      memset(precalc, 0, 4096);
   }
   pl_printf("Mapping image...");
   if (mapbits >= 15)
      pl_printf("(using mapbits %d rgb %1d %1d %1d)", mapbits, 8 - rshft, 8 - gshft, 8 - bshft);
   else
      pl_printf("(using version 1.71 map algorithm)");
   pl_printf("\r\n");
   new->planes = 1;
   new->flags |= 2;

   for (p = 0; p < 3; ++p)
      if ((ip[p] = openplane(p, old, READ)) == NULL)
         return 3;
   if ((op = openplane(0, new, WRITE)) == NULL)
      return 3;

   startdither = 1;
   elapsed = clock();
   for (lines = 0; lines < new->height; ++lines)
   {
      for (p = 0; p < 3; ++p)
      {
         if (getline(ip[p]) == 0)
            return 4;
         sp[p] = ip[p]->linebuf;
      }
      dp = op->linebuf;
      (*doline) (new->width, sp, dp);
      if (putline(op) == 0)
         return 3;
      pl_trace(lines);
   }
   for (p = 0; p < 3; ++p)
      closeplane(ip[p]);
   closeplane(op);

   pl_printf(done);
   if (debug & 1 != 0)
      printf("elapsed = %f\n\r", (clock() - elapsed) / CLK_TCK);
   release(mem);
   return 0;
   /* end map */
}


#endif

int
unmap(int ac, argument * av)
{
   int r, i;
   U8 *rp, *gp, *bp, *sp;
   U16 lines, x;
   struct _plane *ip, *op[3];

   if ((new->flags & 2) == 0)
   {
      pl_printf("Image is not mapped.\r\n");
      return 0;
   }
   if (ac > 1)
      pl_warn(1);

   if ((r = begintransform()) != 0)
      return r;
   new->planes = 3;
   new->flags &= ~2;

   if ((ip = openplane(0, old, READ)) == NULL)
      return 3;
   for (i = 0; i < 3; ++i)
      if ((op[i] = openplane(i, new, WRITE)) == NULL)
         return 3;

   pl_printf(working);
   for (lines = 0; lines < new->height; ++lines)
   {
      if (getline(ip) == 0)
         return 4;
      rp = op[0]->linebuf;
      gp = op[1]->linebuf;
      bp = op[2]->linebuf;
      sp = ip->linebuf;
      for (x = 0; x < new->width; ++x)
      {
         *rp++ = gcmap[0][*sp];
         *gp++ = gcmap[1][*sp];
         *bp++ = gcmap[2][*sp++];
      }
      for (i = 0; i < 3; ++i)
         if (putline(op[i]) == 0)
            return 3;
      pl_trace(lines);
   }
   closeplane(ip);
   for (i = 0; i < 3; ++i)
      closeplane(op[i]);
   pl_printf(done);
   return 0;
}

int
loadpal(int ac, argument * av)
{
   char *path, *buf;
   U32 mem;
   int h, i, j, c, v, offset, count;

   if (ac < 2)
   {
      pl_printf("Syntax is PLOAD <filename> <offset> <count>\r\n");
      return 0;
   }
   else
   {
      path = makepath(mapdir, av[1].cval, "MAP");
      if (ac > 2)
         offset = (int) av[2].fval;
      else
         offset = 0;
      if (ac > 3)
         count = (int) av[3].fval;
      else
         count = palette;
      if (offset + count > 256)
         count = 256 - offset;
      if (ac > 4)
         pl_warn(1);
   }

   mem = mark();
   if ((buf = talloc(BUFSIZE + 80)) == NULL)
      return 2;
   if ((h = p_open(path, buf, BUFSIZE, 80, READ)) < 0)
      return 3;

   pl_printf(reading, path);

   for (i = offset; i < offset + count; ++i)
   {
      for (j = 0; j < 3; ++j)
      {
         v = 0;
         do
         {
            if ((c = p_getc(h)) == -1)
               goto lpexit;
         } while (c < '0' || c > '9');
         do
         {
            v *= 10;
            v += (c - '0');
            if ((c = p_getc(h)) == -1)
               goto lpexit;
         } while (c >= '0' && c <= '9');
         gcmap[j][i] = (U8) v;
      }
      while (c != '\n')
         if ((c = p_getc(h)) == -1)
            goto lpexit;
   }
 lpexit:
   p_close(h);
   release(mem);
   return 0;
}

int
savepal(int ac, argument * av)
{
   char *path, *buf, line[80];
   U32 mem;
   int h, i, offset, count;

   if (ac < 2)
   {
      pl_printf("Syntax is PSAVE <filename> <offset> <count>\r\n");
      return 0;
   }
   else
   {
      path = makepath(mapdir, av[1].cval, "MAP");
      if (ac > 2)
         offset = (int) av[2].fval;
      else
         offset = 0;
      if (ac > 3)
         count = (int) av[3].fval;
      else
         count = palette;
      if (offset + count > 256)
         count = 256 - offset;
      if (ac > 4)
         pl_warn(1);
   }

   mem = mark();
   if ((buf = talloc(BUFSIZE + 80)) == NULL)
      return 2;
   if ((h = p_open(path, buf, BUFSIZE, 80, WRITE)) < 0)
      return 3;

   pl_printf(writing, path);
   for (i = offset; i < offset + count; ++i)
   {
      pl_sprintf(line, "%d %d %d\r\n", gcmap[0][i], gcmap[1][i], gcmap[2][i]);
      strcpy(linebuf(h), line);
      p_putline(h, strlen(line));
   }
   p_close(h);
   release(mem);
   return 0;
}

int
graypal(int ac, argument * av)
{
   char *path, *buf;
   U32 mem;
   int h, i, j, c, v, offset, count;

   if (ac < 2)
   {
      pl_printf("Syntax is TGAPAL <filename> <offset> <count>\r\n");
      return 0;
   }
   else
   {
      path = makepath(mapdir, av[1].cval, "MAP");
      if (ac > 2)
         offset = (int) av[2].fval;
      else
         offset = 0;
      if (ac > 3)
         count = (int) av[3].fval;
      else
         count = 256;
      if (offset + count > 256)
         count = 256 - offset;
      if (ac > 4)
         pl_warn(1);
   }

   mem = mark();
   if ((buf = talloc(BUFSIZE + 80)) == NULL)
      return 2;
   if ((h = p_open(path, buf, BUFSIZE, 80, READ)) < 0)
      return 3;

   pl_printf(reading, path);

   for (i = offset; i < offset + count; ++i)
   {
      for (j = 0; j < 3; ++j)
      {
         v = 0;
         do
         {
            if ((c = p_getc(h)) == -1)
               goto gpexit;
         } while (c < '0' || c > '9');
         do
         {
            v *= 10;
            v += (c - '0');
            if ((c = p_getc(h)) == -1)
               goto gpexit;
         } while (c >= '0' && c <= '9');
         graymap[i][j] = (U8) ((v & 0xFF) >> 2);
      }
      while (c != '\n')
         if ((c = p_getc(h)) == -1)
            goto gpexit;
   }
 gpexit:
   p_close(h);
   release(mem);
   return 0;
}

int
egapal(int ac, argument * av)
{
   U32 mem, freq, *hist;
   int i, j, c, p, r, g, b;
   U8 *lp[3];
   U16 lines, x;
   struct _plane *ip[3];

   if (new->planes == 1)
   {
      pl_printf("Can only make palette for full-color image.\r\n");
      return 0;
   }
   mem = mark();
   hist = (U32 *) talloc(256);
   memset(hist, 0, 256);

   for (p = 0; p < 3; ++p)
      if ((ip[p] = openplane(p, new, READ)) == NULL)
         return 3;
   pl_printf("Building histogram...\r\n");

   for (lines = 0; lines < new->height; ++lines)
   {
      for (p = 0; p < 3; ++p)
      {
         if (getline(ip[p]) == 0)
            return 4;
         else
            lp[p] = ip[p]->linebuf;
      }
      for (x = 0; x < new->width; ++x)
      {
         r = (((int) *lp[0]++ + 32) >> 6);
         g = (((int) *lp[1]++ + 32) >> 6);
         b = (((int) *lp[2]++ + 32) >> 6);
         ++hist[(((r << 2) + g) << 2) + b];
      }
      pl_trace(lines);
   }
   for (p = 0; p < 3; ++p)
      closeplane(ip[p]);

   palette = 16;
   crez = 2;
   gcmap[0][0] = gcmap[1][0] = gcmap[2][0] = 0;
   gcmap[0][1] = gcmap[1][1] = gcmap[2][1] = 0xFF;

   for (i = 2; i < 16; ++i)
   {
      freq = 0L;
      c = 0;
      for (j = 1; j < 63; ++j)
         if (freq < hist[j])
         {
            freq = hist[j];
            c = j;
         }
      hist[c] = 0L;
      gcmap[0][i] = (U8) (((c & 0x30) >> 4) * 0x55);
      gcmap[1][i] = (U8) (((c & 0x0C) >> 2) * 0x55);
      gcmap[2][i] = (U8) ((c & 0x03) * 0x55);
   }

   pl_printf(done);
   release(mem);
   return 0;
}

/**************************************
**
** Here begins the Wan, Wong, and Prusinkiewicz color reduction.
*/

typedef struct
{
   double variance, mean[3];
   U32 weight;
   U16 freq[3][32];
   U8 low[3], high[3];
} BOX;

static BOX *box;
static U16 *hist1;
static U32 npixels;

void
boxstats(BOX * thisbox)
{
   int i, p;
   U16 *freq;
   double mean, var, tvar, fm, f;

   thisbox->variance = tvar = 0.0;
   if (thisbox->weight == 0L)
      return;

   for (p = 0; p < 3; ++p)
   {
      var = mean = 0.0;
      i = thisbox->low[p];
      freq = &thisbox->freq[p][i];
      for (; i < thisbox->high[p]; ++i, ++freq)
      {
         f = (double) (*freq);
         mean += i * f;
         var += i * i * f;
      }
      fm = mean / (double) thisbox->weight;
      thisbox->mean[p] = fm;
      tvar += var - fm * fm * (double) thisbox->weight;
   }
   thisbox->variance = tvar;
}

void
updatefreq(BOX * box1, BOX * box2)
{
   U16 myfreq, *h, roff;
   int i, r, g, b;

   for (i = 0; i < 3; ++i)
      memset(box1->freq[i], 0, 64);

   for (r = box1->low[0]; r < box1->high[0]; ++r)
   {
      roff = r << 5;
      for (g = box1->low[1]; g < box1->high[1]; ++g)
      {
         b = box1->low[2];
         h = hist1 + (((roff | g) << 5) | b);
         for (; b < box1->high[2]; ++b)
         {
            if ((myfreq = *h++) == 0)
               continue;
            box1->freq[0][r] += myfreq;
            box1->freq[1][g] += myfreq;
            box1->freq[2][b] += myfreq;
            box2->freq[0][r] -= myfreq;
            box2->freq[1][g] -= myfreq;
            box2->freq[2][b] -= myfreq;
         }
      }
   }
}

void
findcutpoint(BOX * thisbox, int p, BOX * new1, BOX * new2)
{
   double u, v, max, iw, t;
   int i, maxindex, minindex, cutpoint;
   U32 optweight, curweight, f;

   if ((thisbox->high[p] - thisbox->low[p]) <= 1)
   {
      new1->variance = FLT_MAX;
      return;
   }

   minindex = (U16) ((thisbox->low[p] + thisbox->mean[p]) * 0.5);
   maxindex = (U16) ((thisbox->high[p] + thisbox->mean[p]) * 0.5);

   cutpoint = minindex;
   optweight = thisbox->weight;
   iw = 1.0 / (double) thisbox->weight;

   curweight = 0L;
   for (i = thisbox->low[p]; i < minindex; ++i)
      curweight += thisbox->freq[p][i];
   u = 0.0;
   max = -1.0;

   for (i = minindex; i <= maxindex; ++i)
   {
      curweight += (f = thisbox->freq[p][i]);
      if (curweight == thisbox->weight)
         break;

      u += (double) (i * f) * iw;
      t = thisbox->mean[p] - u;
      v = ((double) curweight / (double) (thisbox->weight - curweight)) * t * t;
      if (v > max)
      {
         max = v;
         cutpoint = i;
         optweight = curweight;
      }
   }
   ++cutpoint;
   *new1 = *new2 = *thisbox;
   new1->weight = optweight;
   new2->weight -= optweight;
   new1->high[p] = (U8) cutpoint;
   new2->low[p] = (U8) cutpoint;
   updatefreq(new1, new2);
   boxstats(new1);
   boxstats(new2);
}

int
cutbox(BOX * thisbox, BOX * newbox)
{
   int i, m;
   double totalvar[3];

   if (thisbox->variance == 0.0 || thisbox->weight == 0L)
      return 0;

   m = 0;
   for (i = 0; i < 3; ++i)
   {
      findcutpoint(thisbox, i, &box[256 + 2 * (U16) i], &box[256 + 2 * (U16) i + 1]);
      totalvar[i] = box[(U16) 256 + 2 * (U16) i].variance;
      if (totalvar[i] != FLT_MAX)
         totalvar[i] += box[256 + 2 * (U16) i + 1].variance;
      else
         ++m;
   }
   if (m == 3)
      return 0;

   if (totalvar[0] <= totalvar[1])
   {
      if (totalvar[0] <= totalvar[2])
         i = 0;
      else
         i = 2;
   }
   else
   {
      if (totalvar[1] <= totalvar[2])
         i = 1;
      else
         i = 2;
   }

   *thisbox = box[256 + 2 * (U16) i];
   *newbox = box[256 + 2 * (U16) i + 1];
   return 1;
}

int
greatestvar(int nboxes)
{
   int i, gv;
   double max;

   max = 0.0;
   gv = -1;
   for (i = 0; i < nboxes; ++i)
   {
      if (box[i].variance >= max)
      {
         max = box[i].variance;
         gv = i;
      }
   }
   if (max == 0.0)
      return -1;
   else
      return gv;
}

int
makepal(int ac, argument * av)
{
   register U16 *hp, *rf, *gf, *bf;
   U32 mem, needed, total;
   int i, p, r, g, b, curbox;
   U8 *lp[3];
   U16 lines, x;
   struct _plane *ip[3];

   if (ac == 2)
   {
      palette = (int) ((*++av).fval);
   }
   if (ac > 2)
      pl_warn(1);

   if (new->planes == 1)
   {
      pl_printf("Can only make palette for full-color image.\r\n");
      return 0;
   }
   mem = mark();
   needed = (3L * BUFSIZE) + 65536L + (262L * sizeof(BOX));
   if (needed > freemem())
      return 2;

   box = (BOX *) talloc(262 * sizeof(BOX));
   memset(box, 0, 262 * sizeof(BOX));
   hist1 = (U16 *) talloc((U16) 65535);
   memset(hist1, 0, (U16) 65535);
   hist1[(U16) 32767] = 0;
   for (p = 0; p < 3; ++p)
      if ((ip[p] = openplane(p, new, READ)) == NULL)
         return 3;

   pl_printf("Building histogram...\r\n");
   rf = box[0].freq[0];
   gf = box[0].freq[1];
   bf = box[0].freq[2];
   total = npixels = 0L;

   for (lines = 0; lines < new->height; ++lines)
   {
      for (p = 0; p < 3; ++p)
      {
         if (getline(ip[p]) == 0)
            return 4;
         else
            lp[p] = ip[p]->linebuf;
      }
      for (x = 0; x < new->width; ++x)
      {
         r = (((int) *lp[0]++) >> 3);
         g = (((int) *lp[1]++) >> 3);
         b = (((int) *lp[2]++) >> 3);
         hp = hist1 + ((((U16) (r << 5) | g) << 5) | b);

         if ((*hp < 65535) && (rf[r] < 65535) &&
             (gf[g] < 65535) && (bf[b] < 65535))
         {

            ++npixels;
            if (++*hp == 1)
               ++total;
            ++rf[r];
            ++gf[g];
            ++bf[b];
         }
      }
      pl_trace(lines);
   }
   for (p = 0; p < 3; ++p)
      closeplane(ip[p]);

   pl_printf("\r%d Total colors.\r\n", total);
   if (total <= palette)
   {
      for (r = 31; r >= 0; --r)
      {
         for (g = 31; g >= 0; --g)
         {
            for (b = 31; b >= 0; --b)
            {
               if (hist1[(((r << 5) + g) << 5) + b])
               {
                  gcmap[0][--total] = (U8) ((255L * r) / 31L);
                  gcmap[1][total] = (U8) ((255L * g) / 31L);
                  gcmap[2][total] = (U8) ((255L * b) / 31L);
               }
            }
         }
      }
   }
   else
   {
      pl_printf("Dividing color space...\r\n");

      box[0].low[0] = box[0].low[1] = box[0].low[2] = 0;
      box[0].high[0] = box[0].high[1] = box[0].high[2] = 32;
      box[0].weight = npixels;
      boxstats(&box[0]);

      for (curbox = 1; curbox < palette;)
      {
         if ((g = greatestvar(curbox)) == -1)
            break;
         if (cutbox(&box[g], &box[curbox]) == 0)
            box[g].variance = 0.0;
         else
         {
            ++curbox;
            pl_printf("\r%d", curbox);
         }
      }
      pl_printf("\n");

      for (i = 0; i < curbox; ++i)
      {
         for (p = 0; p < 3; ++p)
         {
            gcmap[p][i] = (U8) (box[i].mean[p] * (255.0 / 31.0));
         }
      }
   }
   pl_printf(done);

   release(mem);
   return 0;
}
