Subversion Repositories shark

Rev

Rev 2 | Rev 107 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed


/*
 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */


/*
 * putils.c -- plan utilities shared by planner.c and rplanner.c
 */


/* $Id: putils.c,v 1.1.1.1 2002-03-29 14:12:57 pj Exp $ */
#ifdef FFTW_USING_CILK
#include <ports/cilk.h>
#include <ports/cilk-compat.h>
#endif

#include <ports/fftw-int.h>
#include <stdlib.h>
//#include <ports/stdio.h>

int fftw_node_cnt = 0;
int fftw_plan_cnt = 0;

/*
 * These two constants are used for the FFTW_ESTIMATE flag to help
 * create a heuristic plan.  They don't affect FFTW_MEASURE.
 */

#define NOTW_OPTIMAL_SIZE 32
#define TWIDDLE_OPTIMAL_SIZE 12

#define IS_POWER_OF_TWO(n) ((n & (n - 1)) == 0)

/* constructors --- I wish I had ML */
fftw_plan_node *fftw_make_node(void)
{
     fftw_plan_node *p = (fftw_plan_node *)
     fftw_malloc(sizeof(fftw_plan_node));
     p->refcnt = 0;
     fftw_node_cnt++;
     return p;
}

void fftw_use_node(fftw_plan_node *p)
{
     ++p->refcnt;
}

fftw_plan_node *fftw_make_node_notw(int size, const fftw_codelet_desc *config)
{
     fftw_plan_node *p = fftw_make_node();

     p->type = config->type;
     p->nodeu.notw.size = size;
     p->nodeu.notw.codelet = (fftw_notw_codelet *) config->codelet;
     p->nodeu.notw.codelet_desc = config;
     return p;
}

fftw_plan_node *fftw_make_node_real2hc(int size,
                                       const fftw_codelet_desc *config)
{
     fftw_plan_node *p = fftw_make_node();

     p->type = config->type;
     p->nodeu.real2hc.size = size;
     p->nodeu.real2hc.codelet = (fftw_real2hc_codelet *) config->codelet;
     p->nodeu.real2hc.codelet_desc = config;
     return p;
}

fftw_plan_node *fftw_make_node_hc2real(int size,
                                       const fftw_codelet_desc *config)
{
     fftw_plan_node *p = fftw_make_node();

     p->type = config->type;
     p->nodeu.hc2real.size = size;
     p->nodeu.hc2real.codelet = (fftw_hc2real_codelet *) config->codelet;
     p->nodeu.hc2real.codelet_desc = config;
     return p;
}

fftw_plan_node *fftw_make_node_twiddle(int n,
                                       const fftw_codelet_desc *config,
                                       fftw_plan_node *recurse,
                                       int flags)
{
     fftw_plan_node *p = fftw_make_node();

     p->type = config->type;
     p->nodeu.twiddle.size = config->size;
     p->nodeu.twiddle.codelet = (fftw_twiddle_codelet *) config->codelet;
     p->nodeu.twiddle.recurse = recurse;
     p->nodeu.twiddle.codelet_desc = config;
     fftw_use_node(recurse);
     if (flags & FFTW_MEASURE)
          p->nodeu.twiddle.tw = fftw_create_twiddle(n, config);
     else
          p->nodeu.twiddle.tw = 0;
     return p;
}

fftw_plan_node *fftw_make_node_hc2hc(int n, fftw_direction dir,
                                     const fftw_codelet_desc *config,
                                     fftw_plan_node *recurse,
                                     int flags)
{
     fftw_plan_node *p = fftw_make_node();

     p->type = config->type;
     p->nodeu.hc2hc.size = config->size;
     p->nodeu.hc2hc.dir = dir;
     p->nodeu.hc2hc.codelet = (fftw_hc2hc_codelet *) config->codelet;
     p->nodeu.hc2hc.recurse = recurse;
     p->nodeu.hc2hc.codelet_desc = config;
     fftw_use_node(recurse);
     if (flags & FFTW_MEASURE)
          p->nodeu.hc2hc.tw = fftw_create_twiddle(n, config);
     else
          p->nodeu.hc2hc.tw = 0;
     return p;
}

fftw_plan_node *fftw_make_node_generic(int n, int size,
                                       fftw_generic_codelet *codelet,
                                       fftw_plan_node *recurse,
                                       int flags)
{
     fftw_plan_node *p = fftw_make_node();

     p->type = FFTW_GENERIC;
     p->nodeu.generic.size = size;
     p->nodeu.generic.codelet = codelet;
     p->nodeu.generic.recurse = recurse;
     fftw_use_node(recurse);

     if (flags & FFTW_MEASURE)
          p->nodeu.generic.tw = fftw_create_twiddle(n,
                                          (const fftw_codelet_desc *) 0);
     else
          p->nodeu.generic.tw = 0;
     return p;
}

fftw_plan_node *fftw_make_node_rgeneric(int n, int size,
                                        fftw_direction dir,
                                        fftw_rgeneric_codelet *codelet,
                                        fftw_plan_node *recurse,
                                        int flags)
{
     fftw_plan_node *p = fftw_make_node();

     if (size % 2 == 0 || (n / size) % 2 == 0)
          fftw_die("invalid size for rgeneric codelet\n");

     p->type = FFTW_RGENERIC;
     p->nodeu.rgeneric.size = size;
     p->nodeu.rgeneric.dir = dir;
     p->nodeu.rgeneric.codelet = codelet;
     p->nodeu.rgeneric.recurse = recurse;
     fftw_use_node(recurse);

     if (flags & FFTW_MEASURE)
          p->nodeu.rgeneric.tw = fftw_create_twiddle(n,
                                          (const fftw_codelet_desc *) 0);
     else
          p->nodeu.rgeneric.tw = 0;
     return p;
}

/*
 * Note that these two Rader-related things must go here, rather than
 * in rader.c, in order that putils.c (and rplanner.c) won't depend
 * upon rader.c.
 */


fftw_rader_data *fftw_rader_top = NULL;

static void fftw_destroy_rader(fftw_rader_data * d)
{
     if (d) {
          d->refcount--;
          if (d->refcount <= 0) {
               fftw_rader_data *cur = fftw_rader_top, *prev = NULL;

               while (cur && cur != d) {
                    prev = cur;
                    cur = cur->next;
               }
               if (!cur)
                    fftw_die("invalid Rader data pointer\n");

               if (prev)
                    prev->next = d->next;
               else
                    fftw_rader_top = d->next;

               fftw_destroy_plan_internal(d->plan);
               fftw_free(d->omega);
               fftw_free(d->cdesc);
               fftw_free(d);
          }
     }
}

static void destroy_tree(fftw_plan_node *p)
{
     if (p) {
          --p->refcnt;
          if (p->refcnt == 0) {
               switch (p->type) {
                   case FFTW_NOTW:
                   case FFTW_REAL2HC:
                   case FFTW_HC2REAL:
                        break;

                   case FFTW_TWIDDLE:
                        if (p->nodeu.twiddle.tw)
                             fftw_destroy_twiddle(p->nodeu.twiddle.tw);
                        destroy_tree(p->nodeu.twiddle.recurse);
                        break;

                   case FFTW_HC2HC:
                        if (p->nodeu.hc2hc.tw)
                             fftw_destroy_twiddle(p->nodeu.hc2hc.tw);
                        destroy_tree(p->nodeu.hc2hc.recurse);
                        break;

                   case FFTW_GENERIC:
                        if (p->nodeu.generic.tw)
                             fftw_destroy_twiddle(p->nodeu.generic.tw);
                        destroy_tree(p->nodeu.generic.recurse);
                        break;

                   case FFTW_RADER:
                        if (p->nodeu.rader.tw)
                             fftw_destroy_twiddle(p->nodeu.rader.tw);
                        if (p->nodeu.rader.rader_data)
                             fftw_destroy_rader(p->nodeu.rader.rader_data);
                        destroy_tree(p->nodeu.rader.recurse);
                        break;

                   case FFTW_RGENERIC:
                        if (p->nodeu.rgeneric.tw)
                             fftw_destroy_twiddle(p->nodeu.rgeneric.tw);
                        destroy_tree(p->nodeu.rgeneric.recurse);
                        break;
               }

               fftw_free(p);
               fftw_node_cnt--;
          }
     }
}

/* create a plan with twiddle factors, and other bells and whistles */
fftw_plan fftw_make_plan(int n, fftw_direction dir,
                         fftw_plan_node *root, int flags,
                         enum fftw_node_type wisdom_type,
                         int wisdom_signature)
{
     fftw_plan p = (fftw_plan) fftw_malloc(sizeof(struct fftw_plan_struct));

     p->n = n;
     p->dir = dir;
     p->flags = flags;
     fftw_use_node(root);
     p->root = root;
     p->cost = 0.0;
     p->wisdom_type = wisdom_type;
     p->wisdom_signature = wisdom_signature;
     p->next = (fftw_plan) 0;
     p->refcnt = 0;
     fftw_plan_cnt++;
     return p;
}

/*
 * complete with twiddle factors (because nodes don't have
 * them when FFTW_ESTIMATE is set)
 */

void fftw_complete_twiddle(fftw_plan_node *p, int n)
{
     int r;
     switch (p->type) {
         case FFTW_NOTW:
         case FFTW_REAL2HC:
         case FFTW_HC2REAL:
              break;

         case FFTW_TWIDDLE:
              r = p->nodeu.twiddle.size;
              if (!p->nodeu.twiddle.tw)
                   p->nodeu.twiddle.tw =
                       fftw_create_twiddle(n, p->nodeu.twiddle.codelet_desc);
              fftw_complete_twiddle(p->nodeu.twiddle.recurse, n / r);
              break;

         case FFTW_HC2HC:
              r = p->nodeu.hc2hc.size;
              if (!p->nodeu.hc2hc.tw)
                   p->nodeu.hc2hc.tw =
                       fftw_create_twiddle(n, p->nodeu.hc2hc.codelet_desc);
              fftw_complete_twiddle(p->nodeu.hc2hc.recurse, n / r);
              break;

         case FFTW_GENERIC:
              r = p->nodeu.generic.size;
              if (!p->nodeu.generic.tw)
                   p->nodeu.generic.tw =
                       fftw_create_twiddle(n, (const fftw_codelet_desc *) 0);
              fftw_complete_twiddle(p->nodeu.generic.recurse, n / r);
              break;

         case FFTW_RADER:
              r = p->nodeu.rader.size;
              if (!p->nodeu.rader.tw)
                   p->nodeu.rader.tw =
                       fftw_create_twiddle(n, p->nodeu.rader.rader_data->cdesc);
              fftw_complete_twiddle(p->nodeu.rader.recurse, n / r);
              break;

         case FFTW_RGENERIC:
              r = p->nodeu.rgeneric.size;
              if (!p->nodeu.rgeneric.tw)
                   p->nodeu.rgeneric.tw =
                       fftw_create_twiddle(n, (const fftw_codelet_desc *) 0);
              fftw_complete_twiddle(p->nodeu.rgeneric.recurse, n / r);
              break;

     }
}

void fftw_use_plan(fftw_plan p)
{
     ++p->refcnt;
}

void fftw_destroy_plan_internal(fftw_plan p)
{
     --p->refcnt;

     if (p->refcnt == 0) {
          destroy_tree(p->root);
          fftw_plan_cnt--;
          fftw_free(p);
     }
}

/* end of constructors */

/* management of plan tables */
void fftw_make_empty_table(fftw_plan *table)
{
     *table = (fftw_plan) 0;
}

void fftw_insert(fftw_plan *table, fftw_plan this_plan, int n)
{
     fftw_use_plan(this_plan);
     this_plan->n = n;
     this_plan->next = *table;
     *table = this_plan;
}

fftw_plan fftw_lookup(fftw_plan *table, int n, int flags)
{
     fftw_plan p;

     for (p = *table; p &&
          ((p->n != n) || (p->flags != flags)); p = p->next);

     return p;
}

void fftw_destroy_table(fftw_plan *table)
{
     fftw_plan p, q;

     for (p = *table; p; p = q) {
          q = p->next;
          fftw_destroy_plan_internal(p);
     }
}

double fftw_estimate_node(fftw_plan_node *p)
{
     int k;

     switch (p->type) {
         case FFTW_NOTW:
              k = p->nodeu.notw.size;
              goto common1;

         case FFTW_REAL2HC:
              k = p->nodeu.real2hc.size;
              goto common1;

         case FFTW_HC2REAL:
              k = p->nodeu.hc2real.size;
            common1:
              return 1.0 + 0.1 * (k - NOTW_OPTIMAL_SIZE) *
                  (k - NOTW_OPTIMAL_SIZE);

         case FFTW_TWIDDLE:
              k = p->nodeu.twiddle.size;
              return 1.0 + 0.1 * (k - TWIDDLE_OPTIMAL_SIZE) *
                  (k - TWIDDLE_OPTIMAL_SIZE)
                  + fftw_estimate_node(p->nodeu.twiddle.recurse);

         case FFTW_HC2HC:
              k = p->nodeu.hc2hc.size;
              return 1.0 + 0.1 * (k - TWIDDLE_OPTIMAL_SIZE) *
                  (k - TWIDDLE_OPTIMAL_SIZE)
                  + fftw_estimate_node(p->nodeu.hc2hc.recurse);

         case FFTW_GENERIC:
              k = p->nodeu.generic.size;
              return 10.0 + k * k
                  + fftw_estimate_node(p->nodeu.generic.recurse);

         case FFTW_RADER:
              k = p->nodeu.rader.size;
              return 10.0 + 10 * k
                  + fftw_estimate_node(p->nodeu.rader.recurse);

         case FFTW_RGENERIC:
              k = p->nodeu.rgeneric.size;
              return 10.0 + k * k
                  + fftw_estimate_node(p->nodeu.rgeneric.recurse);
     }
     return 1.0E20;
}

/* pick the better of two plans and destroy the other one. */
fftw_plan fftw_pick_better(fftw_plan p1, fftw_plan p2)
{
     if (!p1)
          return p2;

     if (!p2)
          return p1;

     if (p1->cost > p2->cost) {
          fftw_destroy_plan_internal(p1);
          return p2;
     } else {
          fftw_destroy_plan_internal(p2);
          return p1;
     }
}

/* find the smallest prime factor of n */
int fftw_factor(int n)
{
     int r;

     /* try 2 */
     if ((n & 1) == 0)
          return 2;

     /* try odd numbers up to sqrt(n) */
     for (r = 3; r * r <= n; r += 2)
          if (n % r == 0)
               return r;

     /* n is prime */
     return n;
}
/*
static void print_node(FILE *f, fftw_plan_node *p, int indent)
{
     if (p) {
          switch (p->type) {
              case FFTW_NOTW:
                   fprintf(f, "%*sFFTW_NOTW %d\n", indent, "",
                           p->nodeu.notw.size);
                   break;
              case FFTW_REAL2HC:
                   fprintf(f, "%*sFFTW_REAL2HC %d\n", indent, "",
                           p->nodeu.real2hc.size);
                   break;
              case FFTW_HC2REAL:
                   fprintf(f, "%*sFFTW_HC2REAL %d\n", indent, "",
                           p->nodeu.hc2real.size);
                   break;
              case FFTW_TWIDDLE:
                   fprintf(f, "%*sFFTW_TWIDDLE %d\n", indent, "",
                           p->nodeu.twiddle.size);
                   print_node(f, p->nodeu.twiddle.recurse, indent);
                   break;
              case FFTW_HC2HC:
                   fprintf(f, "%*sFFTW_HC2HC %d\n", indent, "",
                           p->nodeu.hc2hc.size);
                   print_node(f, p->nodeu.hc2hc.recurse, indent);
                   break;
              case FFTW_GENERIC:
                   fprintf(f, "%*sFFTW_GENERIC %d\n", indent, "",
                           p->nodeu.generic.size);
                   print_node(f, p->nodeu.generic.recurse, indent);
                   break;
              case FFTW_RADER:
                   fprintf(f, "%*sFFTW_RADER %d\n", indent, "",
                           p->nodeu.rader.size);

                   fprintf(f, "%*splan for size %d convolution:\n",
                           indent + 4, "", p->nodeu.rader.size - 1);
                   print_node(f, p->nodeu.rader.rader_data->plan->root,
                              indent + 6);

                   print_node(f, p->nodeu.rader.recurse, indent);
                   break;
              case FFTW_RGENERIC:
                   fprintf(f, "%*sFFTW_RGENERIC %d\n", indent, "",
                           p->nodeu.rgeneric.size);
                   print_node(f, p->nodeu.rgeneric.recurse, indent);
                   break;
          }
     }
}

void fftw_fprint_plan(FILE *f, fftw_plan p)
{

     fprintf(f, "plan: (cost = %e)\n", p->cost);
     print_node(f, p->root, 0);
}

void fftw_print_plan(fftw_plan p)
{
     fftw_fprint_plan(stdout, p);
}
*/

size_t fftw_sizeof_fftw_real(void)
{
     return(sizeof(fftw_real));
}