Logo Search packages:      
Sourcecode: ragel version File versions

parsedata.h

/*
 *  Copyright 2001-2004 Adrian Thurston <adriant@ragel.ca>
 */

/*  This file is part of Ragel.
 *
 *  Ragel is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 * 
 *  Ragel is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 * 
 *  You should have received a copy of the GNU General Public License
 *  along with Ragel; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 */

#ifndef _PARSEDATA_H
#define _PARSEDATA_H

#include <limits.h>
#include "ptreetypes.h"
#include "astring.h"
#include "avlmap.h"
#include "bstmap.h"
#include "astring.h"
#include "vectsimp.h"
#include "dlist.h"
#include "fsmgraph.h"
#include "compare.h"

/* Forwards. */
struct RedFsmAp;
struct FsmCodeGen;
struct NameInst;

/* The minimum chars are the absolute value of the real minimum because the
 * sign is stored separately in integers read in so they are compared in the
 * positive. Each is casted to an unsigned because the data part of the number
 * is in unsigned int size. 
 */
#define RL_CHAR_MIN    ((long)((char)CHAR_MIN))
#define RL_CHAR_MAX    ((long)((char)CHAR_MAX))
#define RL_UCHAR_MIN   ((unsigned long)((unsigned char)0))
#define RL_UCHAR_MAX   ((unsigned long)((unsigned char)UCHAR_MAX))
#define RL_SHORT_MIN   ((long)((short)SHRT_MIN))
#define RL_SHORT_MAX   ((long)((short)SHRT_MAX))
#define RL_USHORT_MIN  ((unsigned long)((unsigned short)0))
#define RL_USHORT_MAX  ((unsigned long)((unsigned short)USHRT_MAX))
#define RL_INT_MIN     ((long)((int)INT_MIN))
#define RL_INT_MAX     ((long)((int)INT_MAX))
#define RL_UINT_MIN    ((unsigned long)((unsigned int)0))
#define RL_UINT_MAX    ((unsigned long)((unsigned int)UINT_MAX))
#define RL_LONG_MIN    ((long)LONG_MIN)
#define RL_LONG_MAX    ((long)LONG_MAX)
#define RL_ULONG_MIN   ((unsigned long)0)
#define RL_ULONG_MAX   ((unsigned long)LONG_MAX)


/* Types of builtin machines. */
enum BuiltinMachine
{
      BT_Any,
      BT_Ascii,
      BT_Extend,
      BT_Alpha,
      BT_Digit,
      BT_Alnum,
      BT_Lower,
      BT_Upper,
      BT_Cntrl,
      BT_Graph,
      BT_Print,
      BT_Punct,
      BT_Space,
      BT_Xdigit,
      BT_Null,
      BT_Empty
};

/* Location in an input file. */
struct InputLoc
{
      InputLoc( ) 
            : line(0), col(0) { }
      InputLoc( const BISON_YYLTYPE &loc );

      int line;
      int col;
};

/* Reference to a named state. */
typedef Vector<char*> NameRef;
typedef Vector<NameRef*> NameRefList;
typedef Vector<NameInst*> NameTargList;

/* Element in a list of strings used for storing inline code blocks. */
struct InlineBlock
{
      InlineBlock( const InputLoc &loc, InlineList *inlineList, const NameRefList &nameRefs ) 
                  : loc(loc), inlineList(inlineList), nameRefs(nameRefs) { }

      InputLoc loc;
      InlineList *inlineList;
      NameRefList nameRefs;
      NameTargList nameTargs;

      InlineBlock *prev, *next;
};

/* List of inline code blocks. */
typedef DList<InlineBlock> InlineBlockList;

/* Nodes in the tree that use this action. */
typedef Vector<NameInst*> ActionRefs;

/* Element in list of actions. Contains the string for the code to exectute. */
struct Action 
:
      public DListEl<Action>,
      public AvlTreeEl<Action>
{
public:
      Action( const InputLoc &loc, char *name, InlineList *inlineList, 
                  const NameRefList &nameRefs );
      ~Action();

      /* Key for action dictionary. */
      char *getKey() const { return name; }

      /* Data collected during parse. */
      InputLoc loc;
      char *name;
      InlineList *inlineList;
      int actionId;

      /* Places in the input text that reference the action. */
      ActionRefs actionRefs;

      /* Number of references in the final machine. */
      int numTransRefs;
      int numEofRefs;

      /* List of qualified name references and associated targets. */
      NameRefList nameRefs;
      NameTargList nameTargs;
};

/* A list of actions. */
typedef DList<Action> ActionList;
typedef AvlTree<Action, char *, CmpStr> ActionDict;

/* Structure for reverse action mapping. */
struct RevActionMapEl
{
      char *name;
      InputLoc location;
};

/* Store the value and type of a priority augmentation. */
struct PriorityAug
{
      PriorityAug( AugType type, int priorKey, int priorValue ) :
            type(type), priorKey(priorKey), priorValue(priorValue) { }

      AugType type;
      int priorKey;
      int priorValue;
};

/* Structrue represents an action assigned to some FactorWithAug node. The
 * factor with aug will keep an array of these. */
struct ParserAction
{
      ParserAction( AugType type, int localErrKey, Action *action )
            : type(type), localErrKey(localErrKey), action(action) { }

      AugType type;
      int localErrKey;
      Action *action;
};

/* Context statement, kept unique by name. */
struct Context
{
      Context( char *data, int id )
            : data(data), id(id), declared(false) {}

      char *data;
      int id;
      bool declared;

      Context *prev, *next;
};
typedef AvlMap<char *, Context*, CmpStr> ContextMap;
typedef AvlMapEl<char *, Context*> ContextMapEl;
typedef DList<Context> ContextList;

struct ContextEmbed
{
      ContextEmbed( AugType type, int contextId )
            : type(type), contextId(contextId) { }
      
      AugType type;
      int contextId;
};

struct VarDef;
struct Join;
struct Expression;
struct Term;
struct FactorWithAug;
struct FactorWithLabel;
struct FactorWithRep;
struct FactorWithNeg;
struct Factor;
struct Literal;
struct Range;
struct RegExpr;
struct ReItem;
struct ReOrBlock;
struct ReOrItem;

/* Graph dictionary. */
struct GraphDictEl 
:
      public AvlTreeEl<GraphDictEl>,
      public DListEl<GraphDictEl>
{
      GraphDictEl( char *k ) 
            : key(k), value(0), isInstance(false) { }
      GraphDictEl( char *k, VarDef *value ) 
            : key(k), value(value), isInstance(false) { }

      const char *getKey() { return key; }

      char *key;
      VarDef *value;
      bool isInstance;

      /* Location info of graph definition. Points to variable name of assignment. */
      InputLoc loc;
};

typedef AvlTree<GraphDictEl, char*, CmpStr> GraphDict;
typedef DList<GraphDictEl> GraphList;

/* Priority name dictionary. */
typedef AvlMapEl<char*, int> PriorDictEl;
typedef AvlMap<char*, int, CmpStr> PriorDict;

/* Local error name dictionary. */
typedef AvlMapEl<char*, int> LocalErrDictEl;
typedef AvlMap<char*, int, CmpStr> LocalErrDict;

/* Types of alphabet supported by Ragel. */
enum AlphType
{
      AT_Char,
      AT_UnsignedChar,
      AT_Short,
      AT_UnsignedShort,
      AT_Int,
      AT_UnsignedInt
};

/* Tree of instantiated names. */
typedef BstMapEl<char*, NameInst*> NameMapEl;
typedef BstMap<char*, NameInst*, CmpStr> NameMap;
typedef Vector<NameInst*> NameVect;
typedef BstSet<NameInst*> NameSet;

/* Node in the tree of instantiated names. */
struct NameInst
{
      NameInst( const InputLoc &loc, NameInst *parent, char *name, int id, bool isLabel ) 
            : loc(loc), parent(parent), name(name), id(id), isLabel(isLabel), 
            numRefs(0), numUses(0), start(0), final(0) {}

      InputLoc loc;

      /* Keep parent pointers in the name tree to retrieve 
       * fully qulified names. */
      NameInst *parent;

      char *name;
      int id;
      bool isLabel;

      int numRefs;
      int numUses;

      /* Names underneath us, excludes anonymous names. */
      NameMap children;

      /* All names underneath us in order of appearance. */
      NameVect childVect;

      /* Join scopes need an implicit "final" target. */
      NameInst *start, *final;

      /* During a fsm generation walk, lists the names that are referenced by
       * epsilon operations in the current scope. After the link is made by the
       * epsilon reference and the join operation is complete, the label can
       * have its refcount decremented if there are no more references then the
       * entry point can be removed from the fsm returned. */
      NameVect referencedNames;

      /* Pointers for the name search queue. */
      NameInst *prev, *next;

      /* Check if this name inst or any name inst below is referenced. */
      bool anyRefsRec();
};

typedef DList<NameInst> NameInstList;

/* Stack frame used in walking the name tree. */
struct NameFrame 
{
      NameInst *prevNameInst;
      int prevNameChild;
      NameInst *prevLocalScope;
};

/* Class to collect information about the machine during the 
 * parse of input. */
struct ParseData
{
      /* Create a new parse data object. This is done at the beginning of every
       * fsm specification. */
      ParseData( const String &fileName );
      ~ParseData();

      /*
       * Setting up the graph dict.
       */

      /* Initialize a graph dict with the basic fsms. */
      void initGraphDict();
      void createBuiltin( char *name, BuiltinMachine builtin );

      /* Make a name id in the current name instantiation scope if it is not
       * already there. */
      NameInst *addNameInst( const InputLoc &loc, char *data, bool isLabel );
      void makeNameTree( GraphDictEl *gdNode );
      void fillNameIndex( NameInst *from );
      void printNameTree();

      /* Increments the usage count on entry names. Names that are no longer
       * needed will have their entry points unset. */
      void unsetObsoleteEntries( FsmAp *graph );

      /* Resove name references in action code and epsilon transitions. */
      NameSet resolvePart( NameInst *refFrom, char *data, bool recLabelsOnly );
      void resolveFrom( NameSet &result, NameInst *refFrom, 
                  const NameRef &nameRef, int namePos );
      NameInst *resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action );
      void resolveNameRefs( InlineList *inlineList, Action *action );
      void resolveActionNameRefs();
      void resolveInitNameRefs();

      /* Make an index of pointers to the action blocks. */
      void fillActionIndex();

      /* Set the alphabet type. If type types are not valid returns false. */
      bool setAlphType( String s1, String s2 );
      bool setAlphType( String s1 );

      /* Dumping the name instantiation tree. */
      void printNameInst( NameInst *nameInst, int level );

      /* Make the graph from a graph dict node. Does minimization. */
      FsmAp *makeInstance( GraphDictEl *gdNode );
      FsmAp *makeSpecific( GraphDictEl *gdNode );
      FsmAp *makeAll();
      FsmAp *makeGraph( GraphDictEl *gdNode );

      /* Resolving machine paths. */
      bool shouldGenerate( GraphDictEl *&genDictEl );

      /* Dumping and printing. */
      void dumpFsm( GraphDictEl *gdNode );
      void dumpFsm();
      void printFsm();

      /* Generate graphviz code. */
      void generateGraphviz( GraphDictEl *gdNode );
      void generateGraphviz();

      /* Generate and write out the fsm. */
      void generateCode( );
      void doGenerateCode( GraphDictEl *gdNode );
      FsmCodeGen *makeCodeGen( RedFsmAp *machine2 );

      /* Set the lower and upper range from the lower and upper number keys. */
      void setLowerUpperRange( );
      void initKeyOps();

      /*
       * Querying the parse data
       */

      /* Is the alphabet a signed type? */
      bool isAlphSigned();
      
      /*
       * Data collected during the parse.
       */

      /* Dictionary of graphs. Both instances and non-instances go here. */
      GraphDict graphDict;

      /* The list of instances. */
      GraphList instanceList;

      /* Dictionary of actions. Lets actions be defined and then referenced. */
      ActionDict actionDict;

      /* Dictionary of named priorities. */
      PriorDict priorDict;

      /* Dictionary of named local errors. */
      LocalErrDict localErrDict;

      /* List of actions. Will be pasted into a switch statement. */
      ActionList actionList;
      Action **actionIndex;

      /* List of sections of data to add to the fsm struct. */
      InlineBlockList dataList;
      char *baseClause;

      /* The id of the next priority name and label. */
      int nextPriorKey, nextLocalErrKey, nextNameId;
      
      /* The default priority number key for a machine. This is active during
       * the parse of the rhs of a machine assignment. */
      int curDefPriorKey;

      int curDefLocalErrKey;

      /* If a machine is given this is set. */
      bool machineGiven;

      /* Code sections put in the fsms init routine. */
      InlineBlockList initCodeList;

      /* Alphabet type. */
      AlphType alphType;

      /* Element type and get key expression. */
      InlineList *elementType;
      InlineList *getKeyExpr;

      /* The set of contexts. A context goes in the context list if it is
       * declared. */
      ContextMap contextMap;
      ContextList contextList;
      int nextContextId;

      /* The alphabet range. */
      String lowerNum, upperNum;
      long lowKey, highKey;
      InputLoc rangeLowLoc, rangeHighLoc;

      /* Key operators. */
      KeyOps keyOps;

      /* The name and location of the fsm. */
      String fsmName;
      InputLoc fsmStartSecLoc;
      InputLoc fsmEndSecLoc;

      /* The name of the file the fsm is from. */
      String fileName;

      /* Number of errors encountered parsing the fsm spec. */
      int errorCount;

      /* Counting the action and priority ordering. */
      int curActionOrd;
      int curPriorOrd;

      /* Root of the name tree. */
      NameInst *rootName;
      NameInst *curNameInst;
      int curNameChild;

      /* Root of the name tree used for doing local name searches. */
      NameInst *localNameScope;

      void initNameWalk();
      NameInst *nextNameScope() { return curNameInst->childVect[curNameChild]; }
      NameFrame enterNameScope( bool isLocal, int numScopes );
      void popNameScope( const NameFrame &frame );
      void resetNameScope( const NameFrame &frame );

      /* Make name ids to name inst pointers. */
      NameInst **nameIndex;
};

void afterOpMinimize( FsmAp *fsm );
long makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd );
long makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd );
long makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd );
long makeFsmKeyChar( char c, ParseData *pd );
void makeFsmKeyArray( long *result, char *data, int len, ParseData *pd );
int makeFsmUniqueKeyArray( long *result, char *data, int len, ParseData *pd );
FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd );

void errorStateLabels( const NameSet &locations );

#endif /* _PARSEDATA_H */

Generated by  Doxygen 1.6.0   Back to index