Fix 'all at one page bug' in picksplit method of R-tree emulation. Add defense
authorTeodor Sigaev <teodor@sigaev.ru>
Tue, 7 Apr 2009 17:49:40 +0000 (17:49 +0000)
committerTeodor Sigaev <teodor@sigaev.ru>
Tue, 7 Apr 2009 17:49:40 +0000 (17:49 +0000)
from buggy user-defined picksplit to GiST.

contrib/rtree_gist/rtree_gist.c
src/backend/access/gist/gist.c

index 6836cd251796a33b97e1e1c8743cb55813e8e398..1af6e0f63dfb2f5bb2273e61ed587d50c5fabfec 100644 (file)
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- * $Header: /cvsroot/pgsql/contrib/rtree_gist/Attic/rtree_gist.c,v 1.7 2003/07/27 17:10:06 tgl Exp $
+ * $Header: /cvsroot/pgsql/contrib/rtree_gist/Attic/rtree_gist.c,v 1.7.4.1 2009/04/07 17:49:40 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -178,6 +178,72 @@ compare_KB(const void *a, const void *b)
    return (sa > sb) ? 1 : -1;
 }
 
+static void
+adjustBox(BOX *b, BOX *addon)
+{
+   if (b->high.x < addon->high.x)
+       b->high.x = addon->high.x;
+   if (b->low.x > addon->low.x)
+       b->low.x = addon->low.x;
+   if (b->high.y < addon->high.y)
+       b->high.y = addon->high.y;
+   if (b->low.y > addon->low.y)
+       b->low.y = addon->low.y;
+}
+
+/*
+ * Trivial split: half of entries will be placed on one page
+ * and another half - to another
+ */
+static void
+fallbackSplit(bytea *entryvec, GIST_SPLITVEC *v)
+{
+   OffsetNumber    i, 
+                   maxoff;
+   BOX            *unionL = NULL,
+                  *unionR = NULL;
+   int             nbytes;
+
+   maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 1;
+
+   nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+   v->spl_left = (OffsetNumber *) palloc(nbytes);
+   v->spl_right = (OffsetNumber *) palloc(nbytes);
+   v->spl_nleft = v->spl_nright = 0;
+
+   for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+   {
+       BOX * cur = DatumGetBoxP(((GISTENTRY *) VARDATA(entryvec))[i].key);
+
+       if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
+       {
+           v->spl_left[v->spl_nleft] = i;
+           if (unionL == NULL)
+           {
+               unionL = (BOX *) palloc(sizeof(BOX));
+               *unionL = *cur;
+           }
+           else
+               adjustBox(unionL, cur);
+
+           v->spl_nleft++;
+       }
+       else
+       {
+           v->spl_right[v->spl_nright] = i;
+           if (unionR == NULL)
+           {
+               unionR = (BOX *) palloc(sizeof(BOX));
+               *unionR = *cur;
+           }
+           else
+               adjustBox(unionR, cur);
+
+           v->spl_nright++;
+       }
+   }
+}
+
 /*
 ** The GiST PickSplit method
 ** New linear algorithm, see 'New Linear Node Splitting Algorithm for R-tree',
@@ -226,54 +292,25 @@ gbox_picksplit(PG_FUNCTION_ARGS)
                                   ))
            allisequal = false;
 
-       if (pageunion.high.x < cur->high.x)
-           pageunion.high.x = cur->high.x;
-       if (pageunion.low.x > cur->low.x)
-           pageunion.low.x = cur->low.x;
-       if (pageunion.high.y < cur->high.y)
-           pageunion.high.y = cur->high.y;
-       if (pageunion.low.y > cur->low.y)
-           pageunion.low.y = cur->low.y;
+       adjustBox(&pageunion, cur);
    }
 
-   nbytes = (maxoff + 2) * sizeof(OffsetNumber);
-   listL = (OffsetNumber *) palloc(nbytes);
-   listR = (OffsetNumber *) palloc(nbytes);
-   unionL = (BOX *) palloc(sizeof(BOX));
-   unionR = (BOX *) palloc(sizeof(BOX));
    if (allisequal)
    {
-       cur = DatumGetBoxP(((GISTENTRY *) VARDATA(entryvec))[OffsetNumberNext(FirstOffsetNumber)].key);
-       if (memcmp((void *) cur, (void *) &pageunion, sizeof(BOX)) == 0)
-       {
-           v->spl_left = listL;
-           v->spl_right = listR;
-           v->spl_nleft = v->spl_nright = 0;
-           memcpy((void *) unionL, (void *) &pageunion, sizeof(BOX));
-           memcpy((void *) unionR, (void *) &pageunion, sizeof(BOX));
-
-           for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
-           {
-               if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
-               {
-                   v->spl_left[v->spl_nleft] = i;
-                   v->spl_nleft++;
-               }
-               else
-               {
-                   v->spl_right[v->spl_nright] = i;
-                   v->spl_nright++;
-               }
-           }
-           v->spl_ldatum = BoxPGetDatum(unionL);
-           v->spl_rdatum = BoxPGetDatum(unionR);
-
-           PG_RETURN_POINTER(v);
-       }
+       /*
+        * All entries are the same
+        */
+       fallbackSplit(entryvec, v);
+       PG_RETURN_POINTER(v);
    }
 
+   nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+   listL = (OffsetNumber *) palloc(nbytes);
+   listR = (OffsetNumber *) palloc(nbytes);
    listB = (OffsetNumber *) palloc(nbytes);
    listT = (OffsetNumber *) palloc(nbytes);
+   unionL = (BOX *) palloc(sizeof(BOX));
+   unionR = (BOX *) palloc(sizeof(BOX));
    unionB = (BOX *) palloc(sizeof(BOX));
    unionT = (BOX *) palloc(sizeof(BOX));
 
@@ -343,6 +380,12 @@ gbox_picksplit(PG_FUNCTION_ARGS)
                ADDLIST(listT, unionT, posT, arr[i - 1].pos);
        }
        pfree(arr);
+
+       if ((posR == 0 || posL == 0) && (posT == 0 || posB == 0))
+       {
+           fallbackSplit(entryvec, v);
+           PG_RETURN_POINTER(v);
+       }
    }
 
    /* which split more optimal? */
index 4d595ca9877f629ec8bf22958bf5dd554cbe401a..0fc21e36c90c276868b63f93e7d785b07e173b9c 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.105.4.1 2005/08/30 08:36:52 teodor Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.105.4.2 2009/04/07 17:49:40 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1211,6 +1211,66 @@ gistadjsubkey(Relation r,
    pfree(storage);             /* pfree(evec); */
 }
 
+/*
+ * Trivial picksplit implementaion. Function called only 
+ * if user-defined picksplit puts all keys to the one page.
+ * That is a bug of user-defined picksplit but we'd like
+ * to "fix" that.
+ */
+static void
+genericPickSplit(GISTSTATE *giststate, bytea *entryvec, GIST_SPLITVEC *v)
+{
+   OffsetNumber    i,
+                   maxoff;
+   int             nbytes;
+   bytea          *evec;
+   char           *storage;
+
+   maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 1;
+
+   nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+
+   v->spl_left = (OffsetNumber *) palloc(nbytes);
+   v->spl_right = (OffsetNumber *) palloc(nbytes);
+   v->spl_nleft = v->spl_nright = 0;
+
+   for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+   {
+       if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
+       {
+           v->spl_left[v->spl_nleft] = i;
+           v->spl_nleft++;
+       }
+       else
+       {
+           v->spl_right[v->spl_nright] = i;
+           v->spl_nright++;
+       }
+   }
+
+   /*
+    * Form unions of each page
+    */
+
+   /* workaround for 64-bit: ensure GISTENTRY array is maxaligned */
+   storage = (char *) palloc(VARSIZE(entryvec) + MAXALIGN(VARHDRSZ));
+   evec = (bytea *) (storage + MAXALIGN(VARHDRSZ) - VARHDRSZ);
+
+   VARATT_SIZEP(evec) = v->spl_nleft * sizeof(GISTENTRY) + VARHDRSZ;
+   memcpy(VARDATA(evec), ((GISTENTRY *) VARDATA(entryvec)) + FirstOffsetNumber,
+                               sizeof(GISTENTRY) * v->spl_nleft);
+   v->spl_ldatum = FunctionCall2(&giststate->unionFn[0],
+                                   PointerGetDatum(evec),
+                                   PointerGetDatum(&nbytes));
+
+   VARATT_SIZEP(evec) = v->spl_nright * sizeof(GISTENTRY) + VARHDRSZ;
+   memcpy(VARDATA(evec), ((GISTENTRY *) VARDATA(entryvec)) + FirstOffsetNumber + v->spl_nleft, 
+                               sizeof(GISTENTRY) * v->spl_nright);
+   v->spl_rdatum = FunctionCall2(&giststate->unionFn[0],
+                                   PointerGetDatum(evec),
+                                   PointerGetDatum(&nbytes));
+}
+
 /*
  * gistSplit -- split a page in the tree.
  */
@@ -1299,11 +1359,24 @@ gistSplit(Relation r,
                  PointerGetDatum(entryvec),
                  PointerGetDatum(&v));
 
-   /* compatibility with old code */
-   if (v.spl_left[v.spl_nleft - 1] == InvalidOffsetNumber)
-       v.spl_left[v.spl_nleft - 1] = (OffsetNumber) *len;
-   if (v.spl_right[v.spl_nright - 1] == InvalidOffsetNumber)
-       v.spl_right[v.spl_nright - 1] = (OffsetNumber) *len;
+   if ( v.spl_nleft == 0 || v.spl_nright == 0 )
+   {
+       ereport(DEBUG1,
+               (errcode(ERRCODE_INTERNAL_ERROR),
+                errmsg("Picksplit method for first column of index \"%s\" failed",
+                                                   RelationGetRelationName(r)),
+                errhint("Index is not optimal, to optimize it contact developer or try to use the column as a second one in create index command")));
+
+       genericPickSplit(giststate, entryvec, &v);
+   }
+   else
+   {
+       /* compatibility with old code */
+       if (v.spl_left[v.spl_nleft - 1] == InvalidOffsetNumber)
+           v.spl_left[v.spl_nleft - 1] = (OffsetNumber) *len;
+       if (v.spl_right[v.spl_nright - 1] == InvalidOffsetNumber)
+           v.spl_right[v.spl_nright - 1] = (OffsetNumber) *len;
+   }
 
    v.spl_lattr[0] = v.spl_ldatum;
    v.spl_rattr[0] = v.spl_rdatum;