Logo Search packages:      
Sourcecode: ragel version File versions

parsedata.cpp

/*
 *  Copyright 2001-2004 Adrian Thurston <adriant@ragel.ca>
 */

/*  This file is part of Ragel.
 *
 *  Ragel is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 * 
 *  Ragel is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 * 
 *  You should have received a copy of the GNU General Public License
 *  along with Ragel; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 */

#include <iostream>
#include <iomanip>
#include <errno.h>
#include <stdlib.h>

/* Parsing. */
#include "ragel.h"
#include "ptreetypes.h"
#include "rlparse.h"
#include "parsedata.h"
#include "parsetree.h"

/* Machine building. */
#include "redfsm.h"

/* Code generators. */
#include "tabcodegen.h"
#include "ftabcodegen.h"
#include "flatcodegen.h"
#include "fflatcodegen.h"
#include "gotocodegen.h"
#include "fgotocodegen.h"
#include "ipgotocodegen.h"

/* Dumping the fsm. */
#include "fsmdump.h"
#include "gvdotgen.h"
#include "mergesort.h"

using namespace std;

char machineMain[] = "main";

InputLoc::InputLoc( const BISON_YYLTYPE &loc )
:
      line(loc.first_line), 
      col(loc.first_column)
{
}

Action::Action( const InputLoc &loc, char *name, 
            InlineList *inlineList, const NameRefList &nameRefs )
:
      loc(loc),
      name(name),
      inlineList(inlineList), 
      actionId(0),
      numTransRefs(0),
      numEofRefs(0),
      nameRefs(nameRefs)
{
}

Action::~Action()
{
      delete[] name;
}

/* Perform minimization after an operation according 
 * to the command line args. */
void afterOpMinimize( FsmAp *fsm )
{
      /* Switch on the prefered minimization algorithm. */
      if ( minimizeEveryOp ) {
            if ( minimizeLevel != MinimizeNone ) {
                  /* First clean up the graph. FsmAp operations may leave these lying
                   * around.  There should be no dead end states however. The
                   * subtract intersection operators are the only places where they
                   * may be created and those operators clean them up. */
                  fsm->removeUnreachableStates();
            }

            switch ( minimizeLevel ) {
                  case MinimizeNone:
                        break;
                  case MinimizeApprox:
                        fsm->minimizeApproximate();
                        break;
                  case MinimizePartition1:
                        fsm->minimizePartition1();
                        break;
                  case MinimizePartition2:
                        fsm->minimizePartition2();
                        break;
                  case MinimizeStable:
                        fsm->minimizeStable();
                        break;
            }
      }
}

/* Count the transitions in the fsm by walking the state list. */
int countTransitions( FsmAp *fsm )
{
      int numTrans = 0;
      StateAp *state = fsm->stateList.head;
      while ( state != 0 ) {
            numTrans += state->outList.length();
            if ( state->outDefault != 0 )
                  numTrans += 1;
            state = state->next;
      }
      return numTrans;
}

long makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd )
{
      /* Convert the number to a decimal. Reset errno so we can check for
       * overflow or underflow. In the event of an error, sets the return val to
       * the upper or lower bound being tested against. */
      errno = 0;
      unsigned long ul = strtoul( str, 0, 16 );

      switch ( pd->alphType ) {
      case AT_Char: {
            /* Check against upper bound. */
            if ( ul > RL_UCHAR_MAX ) {
                  error(loc) << "literal " << str << 
                              " overflows char alphabet" << endl;
                  ul = RL_UCHAR_MAX;
            }

            /* Cast to a char to get the proper sign, then extend to an integer. */
            return (char)ul;
      }

      case AT_UnsignedChar: {
            /* Check against upper bound. */
            if ( ul > RL_UCHAR_MAX ) {
                  error(loc) << "literal " << str << 
                              " overflows unsigned char alphabet" << endl;
                  ul = RL_UCHAR_MAX;
            }

            /* Store cast to unsigned char, then extend to an integer. */
            return (unsigned char)ul;
      }

      case AT_Short: {
            /* Check against upper bound. */
            if ( ul > RL_USHORT_MAX ) {
                  error(loc) << "literal " << str << 
                              " overflows short alphabet" << endl;
                  ul = RL_USHORT_MAX;
            }

            /* Cast to a short to get the proper sign, then extend to an integer. */
            return (short)ul;
      }

      case AT_UnsignedShort: {
            /* Check against upper bound. */
            if ( ul > RL_USHORT_MAX ) {
                  error(loc) << "literal " << str << 
                              " overflows unsigned short alphabet" << endl;
                  ul = RL_USHORT_MAX;
            }

            /* Store cast to unsigned short, then extend to an integer. */
            return (unsigned short)ul;
      }

      case AT_Int: {
            /* Check against upper bound. */
            if ( ul > RL_UINT_MAX || errno == ERANGE && ul == ULONG_MAX ) {
                  error(loc) << "literal " << str << 
                              " overflows int alphabet" << endl;
                  ul = ULONG_MAX;
            }

            /* Store in integer format. */
            return (int)ul;
      }

      case AT_UnsignedInt: {
            /* Check against upper bound. */
            if ( ul > RL_UINT_MAX || errno == ERANGE && ul == ULONG_MAX ) {
                  error(loc) << "literal " << str << 
                              " overflows unsigned int alphabet" << endl;
                  ul = ULONG_MAX;
            }

            /* Store in integer format. */
            return (unsigned int)ul;
      }}
      /* Not reached. */
      return 0;
}

long makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd )
{
      switch ( pd->alphType ) {
      case AT_Char: {
            /* Convert the number to a decimal. If there is overflow or underflow
             * the num will be set to LONG_MIN/LONG_MAX which are way too low and
             * high for char anyways so no need to check. */
            long l = strtol( str, 0, 10 );

            /* Check against lower bound. */
            if ( l < RL_CHAR_MIN ) {
                  error(loc) << "literal " << str << 
                              " underflows char alphabet" << endl;
                  l = RL_CHAR_MIN;
            }

            /* Check against upper bound. */
            if ( l > RL_CHAR_MAX ) {
                  error(loc) << "literal " << str << 
                              " overflows char alphabet" << endl;
                  l = RL_CHAR_MAX;
            }
            return l;
      }

      case AT_UnsignedChar: {
            /* Can't allow a negative decimal number for unsigned char type. */
            if ( str[0] == '-' ) {
                  /* Recover by advancing over the dash. */
                  error(loc) << "literal " << str << 
                              " underflows unsigned char alphabet" << endl;
                  str += 1;
            }

            /* Convert to an unsigned number and bounds check it. */
            unsigned long ul = strtoul( str, 0, 10 );
            if ( ul > RL_UCHAR_MAX ) {
                  error(loc) << "literal " << str << 
                              " overflows unsigned char alphabet" << endl;
                  ul = RL_UCHAR_MAX;
            }
            return ul;
      }

      case AT_Short: {
            /* Convert the number to a decimal. If there is overflow or underflow
             * the num will be set to the high or low limits which are exceed the
             * bounds for short so no need to check. */
            long l = strtol( str, 0, 10 );

            /* Check against lower bound. */
            if ( l < RL_SHORT_MIN ) {
                  error(loc) << "literal " << str << 
                              " underflows short alphabet" << endl;
                  l = RL_SHORT_MIN;
            }

            /* Check against upper bound. */
            if ( l > RL_SHORT_MAX ) {
                  error(loc) << "literal " << str << 
                              " overflows short alphabet" << endl;
                  l = RL_SHORT_MAX;
            }
            return l;
      }

      case AT_UnsignedShort: {
            /* Can't allow a negative decimal number for unsigned short type. */
            if ( str[0] == '-' ) {
                  /* Recover by skippping the dash. */
                  error(loc) << "literal " << str << 
                              " underflows unsigned short alphabet" << endl;
                  str += 1;
            }

            /* Convert to an unsigned number and bounds check it. */
            unsigned long ul = strtoul( str, 0, 10 );
            if ( ul > RL_USHORT_MAX ) {
                  error(loc) << "literal " << str << 
                              " overflows unsigned short alphabet" << endl;
                  ul = RL_USHORT_MAX;
            }
            return ul;
      }

      case AT_Int: {
            /* Convert the number to a decimal. First reset errno so we can check
             * for overflow or underflow. */
            errno = 0;
            long l = strtol( str, 0, 10 );

            /* Check lower bound. */
            if ( l < RL_INT_MIN || errno == ERANGE && l == LONG_MIN ) {
                  error(loc) << "literal " << str << 
                              " underflows int alphabet" << endl;
                  l = LONG_MIN;
            }

            /* Check upper bound. */
            if ( l > RL_INT_MAX || errno == ERANGE && l == LONG_MAX ) {
                  error(loc) << "literal " << str << 
                              " overflows int alphabet" << endl;
                  l = LONG_MAX;
            }
            return l;
      }

      case AT_UnsignedInt: {
            /* Cannot allow negative numbers for an unsigned type. */
            if ( str[0] == '-' ) {
                  /* Recover by skipping the dash. */
                  error(loc) << "literal " << str << 
                              " underflows unsigned int alphabet" << endl;
                  str += 1;
            }

            /* Convert to an unsigned number and bounds check it. */
            errno = 0;
            unsigned long ul = strtoul( str, 0, 10 );
            if ( ul > RL_UINT_MAX || errno == ERANGE && ul == ULONG_MAX ) {
                  error(loc) << "literal " << str << 
                              " overflows unsigned int alphabet" << endl;
                  ul = ULONG_MAX;
            }

            /* Store in integer format. */
            return ul;
      }}
      /* Not reached. */
      return 0;
}

/* Make an fsm key in int format (what the fsm graph uses) from an alphabet
 * number returned by the parser. Validates that the number doesn't overflow
 * the alphabet type. */
long makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd )
{
      /* Switch on hex/decimal format. */
      if ( str[0] == '0' && str[1] == 'x' )
            return makeFsmKeyHex( str, loc, pd );
      else
            return makeFsmKeyDec( str, loc, pd );
}

/* Make an fsm int format (what the fsm graph uses) from a single character.
 * Performs proper conversion depending on signed/unsigned property of the
 * alphabet. */
long makeFsmKeyChar( char c, ParseData *pd )
{
      long retVal;
      if ( pd->isAlphSigned() ) {
            /* Copy from a char type. */
            retVal = c;
      }
      else {
            /* Copy from an unsigned byte type. */
            retVal = (unsigned char)c;
      }
      return retVal;
}

/* Make an fsm key array in int format (what the fsm graph uses) from a string
 * of characters. Performs proper conversion depending on signed/unsigned
 * property of the alphabet. */
void makeFsmKeyArray( long *result, char *data, int len, ParseData *pd )
{
      if ( pd->isAlphSigned() ) {
            /* Copy from a char star type. */
            char *src = data;
            for ( int i = 0; i < len; i++ )
                  result[i] = src[i];
      }
      else {
            /* Copy from an unsigned byte ptr type. */
            unsigned char *src = (unsigned char*) data;
            for ( int i = 0; i < len; i++ )
                  result[i] = src[i];
      }
}

/* Like makeFsmKeyArray except the result has only unique keys. They ordering
 * will be changed. */
int makeFsmUniqueKeyArray( long *result, char *data, int len, ParseData *pd )
{
      /* Set up a key ops for the transitions list that we will use in the
       * insert for the transition list that we will use to make unique keys. */
      KeyOps keyOps( pd->isAlphSigned() );

      /* Use a transitions list for getting unique keys. */
      KeySet keySet;
      if ( pd->isAlphSigned() ) {
            /* Copy from a char star type. */
            char *src = data;
            for ( int si = 0; si < len; si++ ) {
                  /* If the key is not already in the trans list it is unique. */
                  keySet.insert( src[si], &keyOps );
            }
      }
      else {
            /* Copy from an unsigned byte ptr type. */
            unsigned char *src = (unsigned char*) data;
            for ( int si = 0; si < len; si++ ) {
                  /* If the key is not already in the trans list it is unique. */
                  keySet.insert( src[si], &keyOps );
            }
      }

      /* Copy the data back in and return the number of unique keys written. */
      memcpy( result, keySet.data, sizeof(long)*keySet.length() );
      return keySet.length();
}

/* Make a builtin type. Depends on the signed nature of the alphabet type. */
FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd )
{
      /* FsmAp created to return. */
      FsmAp *retFsm = 0;
      bool isSigned = pd->isAlphSigned();

      switch ( builtin ) {
      case BT_Any: {
            /* All characters. */
            retFsm = new FsmAp( &pd->keyOps );
            retFsm->dotFsm( );
            break;
      }
      case BT_Ascii: {
            /* Ascii characters 0 to 127. */
            retFsm = new FsmAp( &pd->keyOps );
            retFsm->rangeFsm( 0, 127 );
            break;
      }
      case BT_Extend: {
            /* Ascii extended characters. This is the full byte range. Dependent
             * on signed, vs no signed. If the alphabet is one byte then just use
             * dot fsm. */
            retFsm = new FsmAp( &pd->keyOps );
            if ( pd->alphType == AT_Char || pd->alphType == AT_UnsignedChar )
                  retFsm->dotFsm( );
            else if ( isSigned )
                  retFsm->rangeFsm( -128, 127 );
            else
                  retFsm->rangeFsm( 0, 255 );
            break;
      }
      case BT_Alpha: {
            /* Alpha [A-Za-z]. */
            FsmAp *upper = new FsmAp( &pd->keyOps ), *lower = new FsmAp( &pd->keyOps );
            upper->rangeFsm( 'A', 'Z' );
            lower->rangeFsm( 'a', 'z' );
            upper->unionOp( lower );
            upper->minimizePartition2();
            retFsm = upper;
            break;
      }
      case BT_Digit: {
            /* Digits [0-9]. */
            retFsm = new FsmAp( &pd->keyOps );
            retFsm->rangeFsm( '0', '9' );
            break;
      }
      case BT_Alnum: {
            /* Alpha numerics [0-9A-Za-z]. */
            FsmAp *digit = new FsmAp( &pd->keyOps ), *lower = new FsmAp( &pd->keyOps );
            FsmAp *upper = new FsmAp( &pd->keyOps );
            digit->rangeFsm( '0', '9' );
            upper->rangeFsm( 'A', 'Z' );
            lower->rangeFsm( 'a', 'z' );
            digit->unionOp( upper );
            digit->unionOp( lower );
            digit->minimizePartition2();
            retFsm = digit;
            break;
      }
      case BT_Lower: {
            /* Lower case characters. */
            retFsm = new FsmAp( &pd->keyOps );
            retFsm->rangeFsm( 'a', 'z' );
            break;
      }
      case BT_Upper: {
            /* Upper case characters. */
            retFsm = new FsmAp( &pd->keyOps );
            retFsm->rangeFsm( 'A', 'Z' );
            break;
      }
      case BT_Cntrl: {
            /* Control characters. */
            FsmAp *cntrl = new FsmAp( &pd->keyOps );
            FsmAp *highChar = new FsmAp( &pd->keyOps );
            cntrl->rangeFsm( 0, 31 );
            highChar->concatFsm( 127 );
            cntrl->unionOp( highChar );
            cntrl->minimizePartition2();
            retFsm = cntrl;
            break;
      }
      case BT_Graph: {
            /* Graphical ascii characters [!-~]. */
            retFsm = new FsmAp( &pd->keyOps );
            retFsm->rangeFsm( '!', '~' );
            break;
      }
      case BT_Print: {
            /* Printable characters. Same as graph except includes space. */
            retFsm = new FsmAp( &pd->keyOps );
            retFsm->rangeFsm( ' ', '~' );
            break;
      }
      case BT_Punct: {
            /* Punctuation. */
            FsmAp *range1 = new FsmAp( &pd->keyOps );
            FsmAp *range2 = new FsmAp( &pd->keyOps );
            FsmAp *range3 = new FsmAp( &pd->keyOps ); 
            FsmAp *range4 = new FsmAp( &pd->keyOps );
            range1->rangeFsm( '!', '/' );
            range2->rangeFsm( ':', '@' );
            range3->rangeFsm( '[', '`' );
            range4->rangeFsm( '{', '~' );
            range1->unionOp( range2 );
            range1->unionOp( range3 );
            range1->unionOp( range4 );
            range1->minimizePartition2();
            retFsm = range1;
            break;
      }
      case BT_Space: {
            /* Whitespace: [\t\v\f\n\r ]. */
            FsmAp *cntrl = new FsmAp( &pd->keyOps );
            FsmAp *space = new FsmAp( &pd->keyOps );
            cntrl->rangeFsm( '\t', '\r' );
            space->concatFsm( ' ' );
            cntrl->unionOp( space );
            cntrl->minimizePartition2();
            retFsm = cntrl;
            break;
      }
      case BT_Xdigit: {
            /* Hex digits [0-9A-Fa-f]. */
            FsmAp *digit = new FsmAp( &pd->keyOps );
            FsmAp *upper = new FsmAp( &pd->keyOps );
            FsmAp *lower = new FsmAp( &pd->keyOps );
            digit->rangeFsm( '0', '9' );
            upper->rangeFsm( 'A', 'F' );
            lower->rangeFsm( 'a', 'f' );
            digit->unionOp( upper );
            digit->unionOp( lower );
            digit->minimizePartition2();
            retFsm = digit;
            break;
      }
      case BT_Null: {
            retFsm = new FsmAp( &pd->keyOps );
            retFsm->nullFsm();
            break;
      }
      case BT_Empty: {
            retFsm = new FsmAp( &pd->keyOps );
            retFsm->emptyFsm();
            break;
      }}

      return retFsm;
}

/* Check if this name inst or any name inst below is referenced. */
bool NameInst::anyRefsRec()
{
      if ( numRefs > 0 )
            return true;

      /* Recurse on children until true. */
      for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) {
            if ( (*ch)->anyRefsRec() )
                  return true;
      }

      return false;
}

/*
 * ParseData
 */

/* Initialize the structure that will collect info during the parse of a
 * machine. */
ParseData::ParseData( const String &fileName )
:     
      actionIndex(0),
      baseClause(0),
      nextPriorKey(0),
      /* 0 is reserved for global error actions. */
      nextLocalErrKey(1),
      nextNameId(0),

      machineGiven(false), 
      alphType(AT_Char),
      elementType(0),
      getKeyExpr(0),
      nextContextId(0),
      fileName(fileName),
      errorCount(0),

      curActionOrd(0),
      curPriorOrd(0),

      rootName(0)
{
      /* Initialize the dictionary of graphs. This is our symbol table. The
       * initialization needs to be done on construction which happens at the
       * beginning of a machine spec so any assignment operators can reference
       * the builtins. */
      initGraphDict();
}

/* Clean up the data collected during a parse. */
ParseData::~ParseData()
{
      if ( actionIndex != 0 )
            delete[] actionIndex;

      /* Delete all the nodes in the action list. Will cause all the
       * string data that represents the actions to be deallocated. */
      actionList.empty();
      dataList.empty();
      initCodeList.empty();
}

/* Make a name id in the current name instantiation scope if it is not
 * already there. */
NameInst *ParseData::addNameInst( const InputLoc &loc, char *data, bool isLabel )
{
      /* Create the name instantitaion object and insert it. */
      NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel );
      curNameInst->childVect.append( newNameInst );
      if ( data != 0 )
            curNameInst->children.insertMulti( data, newNameInst );
      return newNameInst;
}

void ParseData::initNameWalk()
{
      curNameInst = rootName;
      curNameChild = 0;
}

/* Goes into the next child scope. The number of the child is already set up.
 * We need this for the syncronous name tree and parse tree walk to work
 * properly. It is reset on entry into a scope and advanced on poping of a
 * scope. A call to enterNameScope should be accompanied by a corresponding
 * popNameScope. */
NameFrame ParseData::enterNameScope( bool isLocal, int numScopes )
{
      /* Save off the current data. */
      NameFrame retFrame;
      retFrame.prevNameInst = curNameInst;
      retFrame.prevNameChild = curNameChild;
      retFrame.prevLocalScope = localNameScope;

      /* Enter into the new name scope. */
      for ( int i = 0; i < numScopes; i++ ) {
            curNameInst = curNameInst->childVect[curNameChild];
            curNameChild = 0;
      }

      if ( isLocal )
            localNameScope = curNameInst;

      return retFrame;
}

/* Return from a child scope to a parent. The parent info must be specified as
 * an argument and is obtained from the corresponding call to enterNameScope.
 * */
void ParseData::popNameScope( const NameFrame &frame )
{
      /* Pop the name scope. */
      curNameInst = frame.prevNameInst;
      curNameChild = frame.prevNameChild+1;
      localNameScope = frame.prevLocalScope;
}

void ParseData::resetNameScope( const NameFrame &frame )
{
      /* Pop the name scope. */
      curNameInst = frame.prevNameInst;
      curNameChild = frame.prevNameChild;
      localNameScope = frame.prevLocalScope;
}


void ParseData::unsetObsoleteEntries( FsmAp *graph )
{
      /* Loop the reference names and increment the usage. Names that are no
       * longer needed will be unset in graph. */
      for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) {
            /* Get the name. */
            NameInst *name = *ref;
            name->numUses += 1;

            /* If the name is no longer needed unset its corresponding entry. */
            if ( name->numUses == name->numRefs )
                  graph->unsetEntry( name->id );
      }
}

NameSet ParseData::resolvePart( NameInst *refFrom, char *data, bool recLabelsOnly )
{
      /* Queue needed for breadth-first search, load it with the start node. */
      NameInstList nameQueue;
      nameQueue.append( refFrom );

      NameSet result;
      while ( nameQueue.length() > 0 ) {
            /* Pull the next from location off the queue. */
            NameInst *from = nameQueue.detachFirst();

            /* Look for the name. */
            NameMapEl *low, *high;
            if ( from->children.findMulti( data, low, high ) ) {
                  /* Record all instances of the name. */
                  for ( ; low <= high; low++ )
                        result.insert( low->value );
            }

            /* Name not there, do breadth-first operation of appending all
             * childrent to the processing queue. */
            for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) {
                  if ( !recLabelsOnly || (*name)->isLabel )
                        nameQueue.append( *name );
            }
      }

      /* Queue exhausted and name never found. */
      return result;
}

void ParseData::resolveFrom( NameSet &result, NameInst *refFrom, 
            const NameRef &nameRef, int namePos )
{
      /* Look for the name in the owning scope of the factor with aug. */
      NameSet partResult = resolvePart( refFrom, nameRef[namePos], false );
      
      /* If there are more parts to the name then continue on. */
      if ( ++namePos < nameRef.length() ) {
            /* There are more components to the name, search using all the part
             * results as the base. */
            for ( NameSet::Iter name = partResult; name.lte(); name++ )
                  resolveFrom( result, *name, nameRef, namePos );
      }
      else {
            /* This is the last component, append the part results to the final
             * results. */
            result.insert( partResult );
      }
}

/* Write out a name reference. */
ostream &operator<<( ostream &out, const NameRef &nameRef )
{
      int pos = 0;
      if ( nameRef[pos] == 0 ) {
            out << "::";
            pos += 1;
      }
      out << nameRef[pos++];
      for ( ; pos < nameRef.length(); pos++ )
            out << "::" << nameRef[pos];
      return out;
}

ostream &operator<<( ostream &out, const NameInst &nameInst )
{
      /* Count the number fully qualified name parts. */
      int numParents = 0;
      NameInst *curParent = nameInst.parent;
      while ( curParent != 0 ) {
            numParents += 1;
            curParent = curParent->parent;
      }

      /* Make an array and fill it in. */
      curParent = nameInst.parent;
      NameInst **parents = new NameInst*[numParents];
      for ( int p = numParents-1; p >= 0; p-- ) {
            parents[p] = curParent;
            curParent = curParent->parent;
      }
            
      /* Write the parents out, skip the root. */
      for ( int p = 1; p < numParents; p++ )
            out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" );

      /* Write the name and cleanup. */
      out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" );
      delete[] parents;
      return out;
}

struct CmpNameInstLoc
{
      static int compare( const NameInst *ni1, const NameInst *ni2 )
      {
            if ( ni1->loc.line < ni2->loc.line )
                  return -1;
            else if ( ni1->loc.line > ni2->loc.line )
                  return 1;
            else if ( ni1->loc.col < ni2->loc.col )
                  return -1;
            else if ( ni1->loc.col > ni2->loc.col )
                  return 1;
            return 0;
      }
};

void errorStateLabels( const NameSet &resolved )
{
      MergeSort<NameInst*, CmpNameInstLoc> mergeSort;
      mergeSort.sort( resolved.data, resolved.length() );
      for ( NameSet::Iter res = resolved; res.lte(); res++ )
            error((*res)->loc) << "  -> " << **res << endl;
}


NameInst *ParseData::resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action )
{
      NameInst *nameInst = 0;

      /* Do the local search if the name is not strictly a root level name
       * search. */
      if ( nameRef[0] != 0 ) {
            /* If the action is referenced, resolve all of them. */
            if ( action != 0 && action->actionRefs.length() > 0 ) {
                  /* Look for the name in all referencing scopes. */
                  NameSet resolved;
                  for ( ActionRefs::Iter actRef = action->actionRefs; actRef.lte(); actRef++ )
                        resolveFrom( resolved, *actRef, nameRef, 0 );

                  if ( resolved.length() > 0 ) {
                        /* Take the first one. */
                        nameInst = resolved[0];
                        if ( resolved.length() > 1 ) {
                              /* Complain about the multiple references. */
                              error(loc) << "state reference " << nameRef << 
                                          " resolves to multiple entry points" << endl;
                              errorStateLabels( resolved );
                        }
                  }
            }
      }

      /* If not found in the local scope, look in global. */
      if ( nameInst == 0 ) {
            NameSet resolved;
            int fromPos = nameRef[0] != 0 ? 0 : 1;
            resolveFrom( resolved, rootName, nameRef, fromPos );

            if ( resolved.length() > 0 ) {
                  /* Take the first. */
                  nameInst = resolved[0];
                  if ( resolved.length() > 1 ) {
                        /* Complain about the multiple references. */
                        error(loc) << "state reference " << nameRef << 
                                    " resolves to multiple entry points" << endl;
                        errorStateLabels( resolved );
                  }
            }
      }

      if ( nameInst == 0 ) {
            /* If not found then complain. */
            error(loc) << "could not resolve state reference " << nameRef << endl;
      }
      return nameInst;
}

void ParseData::resolveNameRefs( InlineList *inlineList, Action *action )
{
      for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
            switch ( item->type ) {
                  case InlineItem::Entry: case InlineItem::Goto:
                  case InlineItem::Call: case InlineItem::Next: {
                        /* Resolve, pass action for local search. */
                        NameInst *target = resolveStateRef( *item->nameRef, item->loc, action );

                        /* Note the reference in the name. This will cause the entry
                         * point to survive to the end of the graph generating walk. */
                        if ( target != 0 )
                              target->numRefs += 1;
                        item->nameTarg = target;
                        break;
                  }
                  default:
                        break;
            }
      }
}

void ParseData::resolveInitNameRefs()
{
      /* Resolve references in all init blocks. */
      for ( InlineBlockList::Iter init = initCodeList; init.lte(); init++ )
            resolveNameRefs( init->inlineList, 0 );
}

/* Resolve references to labels in actions. */
void ParseData::resolveActionNameRefs()
{
      for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
            /* Only care about the actions that are referenced. */
            if ( act->actionRefs.length() > 0 )
                  resolveNameRefs( act->inlineList, act );
      }
}

/* Walk a name tree starting at from and fill the name index. */
void ParseData::fillNameIndex( NameInst *from )
{
      /* Fill the value for from in the name index. */
      nameIndex[from->id] = from;

      /* Recurse on the implicit final state and then all children. */
      if ( from->final != 0 )
            fillNameIndex( from->final );
      for ( NameVect::Iter name = from->childVect; name.lte(); name++ )
            fillNameIndex( *name );
}

/* Build the name tree and supporting data structures. */
void ParseData::makeNameTree( GraphDictEl *dictEl )
{
      /* Create the root name and set up curNameInst for the walk. */
      rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
      curNameInst = rootName;
      curNameChild = 0;

      if ( dictEl != 0 ) {
            /* A start location has been specified. */
            dictEl->value->makeNameTree( dictEl->loc, this );
      }
      else {
            /* First make the name tree. */
            for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
                  /* Recurse on the instance. */
                  glel->value->makeNameTree( glel->loc, this );
            }
      }
      
      /* The number of nodes in the tree can now be given by nextNameId */
      nameIndex = new NameInst*[nextNameId];
      memset( nameIndex, 0, sizeof(NameInst*)*nextNameId );
      fillNameIndex( rootName );
}


/* Is the alphabet type a signed type? */
bool ParseData::isAlphSigned()
{
      return alphType == AT_Char || alphType == AT_Short || alphType == AT_Int;
}

void ParseData::createBuiltin( char *name, BuiltinMachine builtin )
{
      Expression *expression = new Expression( builtin );
      Join *join = new Join( expression );
      VarDef *varDef = new VarDef( name, join );
      GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
      graphDict.insert( graphDictEl );
}

/* Initialize the graph dict with builtin types. */
void ParseData::initGraphDict( )
{
      createBuiltin( "any", BT_Any );
      createBuiltin( "ascii", BT_Ascii );
      createBuiltin( "extend", BT_Extend );
      createBuiltin( "alpha", BT_Alpha );
      createBuiltin( "digit", BT_Digit );
      createBuiltin( "alnum", BT_Alnum );
      createBuiltin( "lower", BT_Lower );
      createBuiltin( "upper", BT_Upper );
      createBuiltin( "cntrl", BT_Cntrl );
      createBuiltin( "graph", BT_Graph );
      createBuiltin( "print", BT_Print );
      createBuiltin( "punct", BT_Punct );
      createBuiltin( "space", BT_Space );
      createBuiltin( "xdigit", BT_Xdigit );
      createBuiltin( "null", BT_Null );
      createBuiltin( "empty", BT_Empty );
}

/* Set the alphabet type. If type types are not valid returns false. */
bool ParseData::setAlphType( String s1, String s2 )
{
      bool valid = false;
      if ( strcmp( s1, "unsigned" ) == 0 ) {
            if ( strcmp( s2, "char" ) == 0 ) {
                  alphType = AT_UnsignedChar;
                  valid = true;
            }
            else if ( strcmp( s2, "short" ) == 0 ) {
                  alphType = AT_UnsignedShort;
                  valid = true;
            }
            else if ( strcmp( s2, "int" ) == 0 ) {
                  alphType = AT_UnsignedInt;
                  valid = true;
            }
      }
      return valid;
}

/* Set the alphabet type. If type types are not valid returns false. */
bool ParseData::setAlphType( String s1 )
{
      bool valid = false;
      if ( strcmp( s1, "char" ) == 0 ) {
            alphType = AT_Char;
            valid = true;
      }
      else if ( strcmp( s1, "short" ) == 0 ) {
            alphType = AT_Short;
            valid = true;
      }
      else if ( strcmp( s1, "int" ) == 0 ) {
            alphType = AT_Int;
            valid = true;
      }
      return valid;
}

void ParseData::setLowerUpperRange( )
{
      if ( lowerNum.length() != 0 ) {
            /* If ranges are given then interpret the alphabet type. */
            lowKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
            highKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
      }
      else {
            /* Use default ranges for alphabet type. */
            switch ( alphType ) {
            case AT_Char:
                  lowKey = RL_CHAR_MIN;
                  highKey = RL_CHAR_MAX;
                  break;
            case AT_UnsignedChar:
                  lowKey = RL_UCHAR_MIN;
                  highKey = RL_UCHAR_MAX;
                  break;
            case AT_Short:
                  lowKey = RL_SHORT_MIN;
                  highKey = RL_SHORT_MAX;
                  break;
            case AT_UnsignedShort:
                  lowKey = RL_USHORT_MIN;
                  highKey = RL_USHORT_MAX;
                  break;
            case AT_Int:
                  lowKey = RL_INT_MIN;
                  highKey = RL_INT_MAX;
                  break;
            case AT_UnsignedInt:
                  lowKey = RL_UINT_MIN;
                  highKey = RL_UINT_MAX;
                  break;
            }
      }
}

/* Initialize the key operators object that will be referenced by all fsms
 * created. */
void ParseData::initKeyOps( )
{
      /* Signedness and bounds. */
      keyOps.isAlphSigned = isAlphSigned();
      keyOps.lowKey = lowKey;
      keyOps.highKey = highKey;
}

/* Fill the action index for easy access to the actions. */
void ParseData::fillActionIndex()
{
      /* Make the array of pointers to action list elements. */
      actionIndex = new Action*[actionList.size()];

      int nextActionId = 0;
      for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
            /* Only ever interested in referenced actions. */
            if ( act->actionRefs.length() > 0 ) {
                  act->actionId = nextActionId;
                  actionIndex[nextActionId] = act;
                  nextActionId += 1;
            }
      }
}

void ParseData::printNameInst( NameInst *nameInst, int level )
{
      for ( int i = 0; i < level; i++ )
            cerr << "  ";
      cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") << 
                  "  id: " << nameInst->id << 
                  "  refs: " << nameInst->numRefs << endl;
      for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ )
            printNameInst( *name, level+1 );
}


/* Make the graph from a graph dict node. Does minimization and state sorting. */
FsmAp *ParseData::makeInstance( GraphDictEl *gdNode )
{
      /* Build the graph from a walk of the parse tree. */
      FsmAp *graph = gdNode->value->walk( this );

      /* Resolve any labels that point to multiple states. Any labels that are
       * still around are referenced only by gotos and calls and they need to be
       * made into deterministic entry points. */
      graph->deterministicEntry();

      /*
       * All state construction is now complete.
       */

      /* Transfer actions from out action tables to transitions or to the eof
       * action table. */
      for ( StateSet::Iter state = graph->finStateSet; state.lte(); state++ )
            graph->transferOutActions( *state );

      /* Transfer global error actions. */
      for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
            graph->transferErrorActions( state, 0 );

      /* Remove unreachable states. There should be no dead end states however.
       * The subtract and intersection operators are the only places where they
       * may be created and those operators clean them up. */
      graph->removeUnreachableStates();

      /* No more fsm operations are to be done. Action ordering numbers are
       * no longer of use and will just hinder minimization. Clear them. */
      graph->nullActionKeys();

      /* Transition priorities are no longer of use. We can clear them
       * because they will just hinder minimization as well. Clear them. */
      graph->clearAllPriorities();

      /* Minimize here even if we minimized at every op. Now that function
       * keys have been cleared we may get a more minimal fsm. */
      switch ( minimizeLevel ) {
            case MinimizeNone:
                  break;
            case MinimizeApprox:
                  graph->minimizeApproximate();
                  break;
            case MinimizeStable:
                  graph->minimizeStable();
                  break;
            case MinimizePartition1:
                  graph->minimizePartition1();
                  break;
            case MinimizePartition2:
                  graph->minimizePartition2();
                  break;
      }

      return graph;
}

void ParseData::printNameTree()
{
      /* Print the name instance map. */
      for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ )
            printNameInst( *name, 0 );
      
      cerr << "name index:" << endl;
      /* Show that the name index is correct. */
      for ( int ni = 0; ni < nextNameId; ni++ ) {
            cerr << ni << ": ";
            char *name = nameIndex[ni]->name;
            cerr << ( name != 0 ? name : "<ANON>" ) << endl;
      }
}

FsmAp *ParseData::makeSpecific( GraphDictEl *gdNode )
{
      /* Build the name tree and supporting data structures. */
      makeNameTree( gdNode );

      /* Resove name references from gdNode. */
      initNameWalk();
      gdNode->value->resolveNameRefs( this );

      /* Assign action ids and fill a list of pointers indexed by ids. */
      fillActionIndex();

      /* Just building the specified graph. */
      initNameWalk();
      FsmAp *mainGraph = makeInstance( gdNode );

      return mainGraph;
}

FsmAp *ParseData::makeAll()
{
      /* Build the name tree and supporting data structures. */
      makeNameTree( 0 );

      /* Resove name references in the tree. */
      initNameWalk();
      for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ )
            glel->value->resolveNameRefs( this );

      /* Resolve action code name references. */
      resolveInitNameRefs();
      resolveActionNameRefs();

      /* Assign action ids and fill a list of pointers indexed by ids. */
      fillActionIndex();

      FsmAp *mainGraph = 0;
      FsmAp **graphs = new FsmAp*[instanceList.length()];
      int numOthers = 0;

      /* Make all the instantiations, we know that main exists in this list. */
      initNameWalk();
      for ( GraphList::Iter glel = instanceList; glel.lte();  glel++ ) {
            if ( strcmp( glel->key, machineMain ) == 0 ) {
                  /* Main graph is always instantiated. */
                  mainGraph = makeInstance( glel );
            }
            else {
                  /* Check to see if the instance is ever referenced. */
                  NameInst *nameInst = nextNameScope();
                  if ( nameInst->anyRefsRec() )
                        graphs[numOthers++] = makeInstance( glel );
                  else {
                        /* Need to walk over the name tree item. */
                        NameFrame nameFrame = enterNameScope( true, 1 );
                        popNameScope( nameFrame );
                  }
            }
      }

      if ( numOthers > 0 ) {
            /* Add all the other graphs into main. */
            mainGraph->globOp( graphs, numOthers );
      }

      delete[] graphs;
      return mainGraph;
}

FsmAp *ParseData::makeGraph( GraphDictEl *gdNode )
{
      if ( gdNode != 0 )
            return makeSpecific( gdNode );
      else
            return makeAll();
}

/* Dump either a specific machine or main machine and all instantiations (in
 * the case gdNode == 0). */
void ParseData::dumpFsm( GraphDictEl *dictEl )
{
      /* Interpret the alphabet type. */
      setLowerUpperRange();
      initKeyOps();

      /* Make the graph, do minimization. */
      FsmAp *graph = makeGraph( dictEl );

      /* If any errors have occured in the input file then don't write anything. */
      if ( gblErrorCount > 0 )
            return;

      /* Create a dump object. */
      FsmDump dump( fsmName, this, graph, *outStream );

      /* Dump the fsm. */
      dump.dumpGraph( );
}

/* Four different things can happen here, there can be an error, the machine
 * can be ignore, the main machine can be output by default and a specific
 * machine can be output. */
void ParseData::dumpFsm()
{
      /* Bail if there was no machine given. Nothing to output. */
      if ( ! machineGiven )
            return;

      /* A machine was given, proceed. */
      GraphDictEl *dictEl = 0;
      if ( shouldGenerate( dictEl ) )
            dumpFsm( dictEl );
}

void ParseData::printFsm()
{
      /* FIXME: Not yet implemented. */
      assert( false );
}

void ParseData::generateGraphviz( GraphDictEl *dictEl )
{
      /* Interpret the alphabet type. */
      setLowerUpperRange();
      initKeyOps();

      /* Make the graph, do minimization. */
      FsmAp *graph = makeGraph( dictEl );

      /* If any errors have occured in the input file then don't write anything. */
      if ( gblErrorCount > 0 )
            return;

      /* Make the machine. */
      RedFsmAp machine( graph, false );

      /* Do ordering and choose state ids. */
      machine.depthFirstOrdering();
      machine.sequentialStateIds();

      /* For dot file generation we want to pick default transitions. */
      machine.chooseDefaultSpan();

      /* Make the generator. */
      GraphvizDotGen dotGen( fsmName, this, &machine, *outStream );

      /* Write out with it. */
      dotGen.writeDotFile();
}

bool ParseData::shouldGenerate( GraphDictEl *&genDictEl )
{
      bool shouldGenerate = false;

      if ( machineSpec == 0 || strcmp(machineSpec, fsmName) == 0 ) {
            /* Remember that we found a machine spec to generate in. */
            machineSpecFound = true;

            if ( machineName != 0 ) {
                  /* In the right spec, look for the machine. */
                  GraphDictEl *dictEl = graphDict.find( machineName );
                  if ( dictEl != 0 ) {
                        shouldGenerate = true;
                        genDictEl = dictEl;
                  }
                  else {
                        /* No recovery action. Fsm is skipped. */
                        error(fsmStartSecLoc) << "machine " << machineName << 
                                          " not found in " << fsmName << endl;
                  }
            }
            else {
                  /* No machine name. Need to have a main. Make sure it was given. */
                  GraphDictEl *mainEl = graphDict.find( machineMain );
                  if ( mainEl != 0 ) {
                        shouldGenerate = true;
                        genDictEl = 0;
                  }
                  else {
                        /* No recovery action, fsm is skipped. */
                        error(fsmStartSecLoc) << "main graph not defined in \"" 
                                    << fsmName << "\"" << endl;
                  } 
            }
      }
      return shouldGenerate;
}

void ParseData::generateGraphviz( )
{
      /* If there was no machine given, we cannot generate graphviz output. */
      if ( ! machineGiven )
            return;

      /* A machine was given, proceed. In when generatign g*/
      GraphDictEl *dictEl = 0;
      if ( !graphvizDone ) {
            /* For graphviz generate only the first graph. */
            if ( shouldGenerate( dictEl ) ) {
                  generateGraphviz( dictEl );
                  graphvizDone = true;
            }
      }
}


/* Generate the codegen depending on the command line options given. */
FsmCodeGen *ParseData::makeCodeGen( RedFsmAp *redFsm )
{
      FsmCodeGen *codeGen = 0;
      switch ( outputFormat ) {
      case OutCCode:
            switch ( codeStyle ) {
            case GenTables:
                  codeGen = new CTabCodeGen( fsmName, this, redFsm, *outStream );
                  break;
            case GenFTables:
                  codeGen = new CFTabCodeGen( fsmName, this, redFsm, *outStream );
                  break;
            case GenFlat:
                  codeGen = new CFlatCodeGen( fsmName, this, redFsm, *outStream );
                  break;
            case GenFFlat:
                  codeGen = new CFFlatCodeGen( fsmName, this, redFsm, *outStream );
                  break;
            case GenGoto:
                  codeGen = new CGotoCodeGen( fsmName, this, redFsm, *outStream );
                  break;
            case GenFGoto:
                  codeGen = new CFGotoCodeGen( fsmName, this, redFsm, *outStream );
                  break;
            case GenIpGoto:
                  codeGen = new CIpGotoCodeGen( fsmName, this, redFsm, *outStream );
                  break;
            }
            break;
      case OutCppCode:
            switch ( codeStyle ) {
            case GenTables:
                  codeGen = new CppTabCodeGen( fsmName, this, redFsm, *outStream );
                  break;
            case GenFTables:
                  codeGen = new CppFTabCodeGen( fsmName, this, redFsm, *outStream );
                  break;
            case GenFlat:
                  codeGen = new CppFlatCodeGen( fsmName, this, redFsm, *outStream );
                  break;
            case GenFFlat:
                  codeGen = new CppFFlatCodeGen( fsmName, this, redFsm, *outStream );
                  break;
            case GenGoto:
                  codeGen = new CppGotoCodeGen( fsmName, this, redFsm, *outStream );
                  break;
            case GenFGoto:
                  codeGen = new CppFGotoCodeGen( fsmName, this, redFsm, *outStream );
                  break;
            case GenIpGoto:
                  codeGen = new CppIpGotoCodeGen( fsmName, this, redFsm, *outStream );
                  break;
            }
            break;
      case OutObjCCode:
            switch ( codeStyle ) {
            case GenTables:
                  codeGen = new ObjCTabCodeGen( fsmName, this, redFsm, *outStream );
                  break;
            case GenFTables:
                  codeGen = new ObjCFTabCodeGen( fsmName, this, redFsm, *outStream );
                  break;
            case GenFlat:
                  codeGen = new ObjCFlatCodeGen( fsmName, this, redFsm, *outStream );
                  break;
            case GenFFlat:
                  codeGen = new ObjCFFlatCodeGen( fsmName, this, redFsm, *outStream );
                  break;
            case GenGoto:
                  codeGen = new ObjCGotoCodeGen( fsmName, this, redFsm, *outStream );
                  break;
            case GenFGoto:
                  codeGen = new ObjCFGotoCodeGen( fsmName, this, redFsm, *outStream );
                  break;
            case GenIpGoto:
                  codeGen = new ObjCIpGotoCodeGen( fsmName, this, redFsm, *outStream );
                  break;
            }
            break;


      /* Called codegen when generating something else. */
      default:
            assert( false );
      }
      return codeGen;
}

/* Generate the code for an fsm. Assumes parseData is set up properly. Called
 * by parser code. */
void ParseData::generateCode( )
{
      /* The machine that will take the compressed fsm and the code generator. */
      RedFsmAp *redFsm = 0;
      FsmCodeGen *codeGen = 0;

      /* Nothing to do if there is no datalist or machine given. */
      if ( dataList.length() == 0 && ! machineGiven )
            return;
      
      /* If the graph dict is given, find the main machine. */
      if ( machineGiven ) {
            /* Look for the main instance. The parser prevents main from going
             * into the graph dictionary as a variable def and not an instance. */
            GraphDictEl *mainEl = graphDict.find( machineMain );
            if ( mainEl == 0 ) {
                  /* No recovery action, fsm is skipped. */
                  error(fsmStartSecLoc) << "main graph not defined in \"" 
                              << fsmName << "\"" << endl;
                  return;
            }

            /* Interpret the alphabet type. */
            setLowerUpperRange();
            initKeyOps();

            /* Make the main graph. */
            FsmAp *mainGraph = makeGraph( 0 );

            /* Make the machine. */
            redFsm = new RedFsmAp( mainGraph, true );

            /* Order the states. */
            redFsm->depthFirstOrdering();

            if ( codeStyle == GenGoto || codeStyle == GenFGoto || codeStyle == GenIpGoto ) {
                  /* For goto driven machines we can keep the original depth
                   * first ordering because it's ok if the state ids are not
                   * sequential. Split the the ids by final state status. */
                  redFsm->sortStateIdsByFinal();
            }
            else {
                  /* For table driven machines the location of the state is used to
                   * identify it so the states must be sorted by their final ids.
                   * Though having a deterministic ordering is important,
                   * specifically preserving the depth first ordering is not because
                   * states are stored in tables. */
                  redFsm->sortStatesByFinal();
                  redFsm->sequentialStateIds();
            }

            /* Find the first final state. This is the final state with the lowest
             * id.  */
            redFsm->findFirstFinState();

            /* Choose default transitions and the single transition. */
            redFsm->chooseDefaultSpan();
            
            /* Maybe do flat expand, otherwise choose single. */
            if ( codeStyle == GenFlat || codeStyle == GenFFlat )
                  redFsm->makeFlat();
            else
                  redFsm->chooseSingle();
      }

      /* If any errors have occured in the input file then don't write anything. */
      if ( gblErrorCount > 0 )
            return;

      /* Make a code generator that will output the header/code. */
      codeGen = makeCodeGen( redFsm );

      /* Write line directive. */
      codeGen->startCodeGen();

      /* If the struct element was used then output the header portion. */
      if ( dataList.length() > 0 )
            codeGen->writeOutHeader( );
      
      /* If there was a machine given, gather info on it and write it out. */
      if ( machineGiven ) {
            codeGen->analyzeMachine();
            codeGen->writeOutCode( );
      }

      /* Write line directive. */
      codeGen->endCodeGen();

      /* Done with the code generator. */
      delete codeGen;
      if ( redFsm != 0 )
            delete redFsm;
}


Generated by  Doxygen 1.6.0   Back to index