Showing posts with label interpreter. Show all posts
Showing posts with label interpreter. Show all posts

Thursday, November 12, 2015

Day 2: A forth interpreter in C, tokenizing


On our second day, we analyze each token, determine if it is a number or an operator.


/

#include 
#include 
#include 
#include 

/*
 day 2 - tokenizing.
*/

#define MAXBUFFSIZE 128
char    BUFF[MAXBUFFSIZE];   // input string buffer.

char delims[] = " \t\n";
double fval   = 0;

enum  Dtypes{ERROR, FNUM, FADD,FSUB, FMUL, FDIV, FPRINT, QUIT};

int  lexer(char* tok)
{
    int  n = strlen(tok);
    char c = *tok;
    if (n == 1){
       if (c == '+') return FADD;
       if (c == '-') return FSUB;
       if (c == '*') return FMUL;
       if (c == '/') return FDIV;
       if (c == '.') return FPRINT;
    }
    if (getnumber(tok) == 0) {
       return FNUM;
    };
    if (strcmp(tok, "quit")== 0) {
       return QUIT;
    }
    return ERROR;
}


int checknumber(char *s)
{
  /* Description
     Returns 0 if s is a double real number, otherwise nonzero value.
     It is assumed that leading and trailing white spaces are removed from s.
     1 illegal character after '.'
     2 illegal starting character.
     3 illegal character after 'e'
     4 expected a '\0'
     5 empty string.
   */
  char* t = s;
  /* empty string */
  if (*t == '\0') return 5; 
  /* starts with an optional sign? */
  if ((*t == '-') || (*t == '+'))t++;
  // block of digits starting with a digit 
  if (isdigit(*t)) { /* starts with a digit? */
  while (isdigit(*t)) t++;
  if (*t == '.'){ /* a block starting with a .? */
    t++;
    if (*t == '\0') return 0;  // a number ending with a '.'
    while (isdigit(*t)) t++;
  }
  }else if (*t == '.'){ /* a number starting with a .? */
 t++;
 if (! isdigit(*t)) return 1;
 while (isdigit(*t)) t++;
  } else return 2;  /* illegal starting value! */
    
  /* optional exponent */
  if (tolower(*t) == 'e') {
 t++;
 if ((*t == '-') || (*t == '+')) {
    t++;
 }
 if (!isdigit(*t))return 3;
 while isdigit(*t) t++;
  }
  /* illegal character */
  if (*t != '\0') return 4;
  return 0;
};


int getnumber(char *s){
   int i = checknumber(s);
   if (i==0) {
      fval = atof(s);
   }
   return i;
};

int main() {
  int i =0;
  int exitnow = 0;
  while (1) {
    fputs(">>", stdout);
    fgets(BUFF, MAXBUFFSIZE-1, stdin);
    char *tok = strtok(BUFF, delims);
    while (tok)  {
      printf("%d %s", ++i, tok);
      if (tok == NULL) break;
     
      // process token.
      int dtype = lexer(tok);
      switch(dtype) {
      case  FNUM:
            printf("FNUM %f\n",fval);
            break;
      case  FADD:
      case  FSUB:
      case  FMUL:
      case  FDIV:
      case  FPRINT:
            printf("[%s]\n", tok);  
     break;
      case  QUIT:
            printf("quitting...\n");
     exitnow = 1;
     break;
      case ERROR:
     printf("ERROR\n");
     break;
      };
      tok = strtok(NULL, delims);
    } 
    if (exitnow) {
       puts("Thanks for using easyforth!\n");
       break;
    }
  };
  return 0;
};



Here is a compilation-run of our recent program version.

toto@toto-VirtualBox:~/Projects/forth$ gcc day2-easyforth.c
toto@toto-VirtualBox:~/Projects/forth$ ./a.out
>>-1.2345 320 7 + * - 
1 -1.2345FNUM -1.234500
2 320FNUM 320.000000
3 7FNUM 7.000000
4 +[+]
5 *[*]
6 -[-]
>>quit
7 quitquitting...
Thanks for using easyforth!

In the next installment we add a simple working calculator.


Wednesday, January 25, 2012

Simple floating point arithmetic for simple-forth

Here is our latest incarnation of simple-forth, a forth interpreter written in C.
This time we add floating point arithmetic capabilities. There are two basic ways to incorporate floating point capabilities. One is using the same integer stack and the other is using a separate foating point stack.



We use a separate floating point stack with maximum 32 elements. in our newest version shown below for simplicity. The built-in integer stack is 32 bits wide. Floating point numbers in our interpreter are 64 bit wide. The current stack element index is indicated by FP. The basic floating point operations are f+, f-, f*, f/ these Forth words are the equivalent floating point words corresponding to the built-in basic + - * / operator words



:


/*
file     simple-forth-0.0.3.c
author   Dr. Ernesto P. Adorio
         UPDEPP (University of the Philippines,
         Extension Program in Pampanga
         Clarkfield, Pampanga
email    ernesto.adorio@gmail.com
version  0.0.1 January 14, 2012 basic interpreter.
         0.0.2 January 16, 2012 interactivity added.
         0.0.3 January 25, 2012 basic floating point.
*/

#include 
#include 
#include 
#include 



enum {E_STACKUNDERFLOW, E_FSTACKUNDERFLOW} ERRCODES;


enum {L_EMIT, L_DOT,L_DOTT, L_SPACE, L_ADD, L_SUB, L_MUL, L_DIV, L_DROP, L_INT32, L_CR,
      L_FNUM, L_FPLUS, L_FSUB, L_FMUL, L_FDIV, L_FDOT, L_FDOTT, L_FDROP,
      L_BYE, L_EOS,L_ERROR} OPCODES;

const char *stdwords[] = { "emit", ".",  ".t", "space", "+", "-", "*", "/", "drop", "int32", "cr",
                           "fnum", "f+", "f-", "f*" ,"f/", "f.","f.t","fdrop",
                           "bye",
                           "\n",
                         };
// "int32" and "fnum" are dummies. 


#define MAXSTKLEN 32
#define MAXFSTKLEN 32
#define MAXTOKENLEN 128

int  LENSTDWORDS = sizeof(stdwords)/sizeof(stdwords[0]);
char LINEBUFFER[256];
char *goodbye= "bye";
int  ERRCODE = 0;
int SP = -1;   /* stack pointer index */
int stack[MAXSTKLEN];

int  opcode;
char *tokstart;
char *tokend;

int FP = -1;   /* floating point stack index */
double fstack[MAXFSTKLEN];


int numbertype(char *s) 
{
   /*
   Returns 
     0 - not a number!
     1 - an integer
     2 - a floating point number
   */
   
   char *t = s;
   while (isspace(*t)) t++;
   if (*t == '\0') return 0;
    
   if (*t == '+' || *t == '-') t++;
   if (*t == '\0') return 0;
   while (isdigit(*t)) t++;
   if (*t == '\0') {
     return 1; // an integer!
   }
  
   if (*t == '.') t++;
   while (isdigit(*t)) t++;
   if (*t == '\0') return 2;
   
   if (*t == 'e') t++;
   if (*t == '\0') return 0; // error!
   if (*t == '+' || *t == '-') t++;
   if (*t == '\0') return 0; // error!
   while (isdigit(*t)) t++;
   if (*t == '\0') return 2; // a floating point number!
   return 0; // not an integer or floating point.
}





int eval(int opcode)
{
    /* Evaluate opcode */
    switch (opcode){
    case L_EMIT: 
      //@@printf("opcode L_EMIT %s %d\n", tokstart, SP);
      if (SP < 0) {
     ERRCODE = E_STACKUNDERFLOW;
      } else {
        printf ("%c", stack[SP--]);
      }
      break;

    case L_DOT:
      printf("opcode L_EMIT %s %d\n", tokstart, SP);
      if (SP < 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
      } else {
        printf ("%d", stack[SP--]);
      }
      break;

    case L_DOTT:
      //@@printf("opcode L_EMIT %s %d\n", tokstart, SP);
      if (SP < 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
      } else {
        printf ("%d", stack[SP]);
      }
      break;
      
    case L_SPACE:
      printf ("%c", 32);
      break;
      
    case L_ADD:
      //printf("opcode L_ADD %s\n", tokstart);
      if (SP <= 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
     break;
      } 
      stack[SP-1] += stack[SP];
      SP--;
      break;
      
    case L_SUB:
      //printf("opcode L_SUB %s\n", tokstart);
      if (SP <= 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
     break;
      } 
      stack[SP-1] -= stack[SP];
      SP--;
      break;
      
    case L_MUL:
      //printf("opcode L_MUL %s\n", tokstart);
      if (SP <= 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
     break;
      } 
      stack[SP-1] *= stack[SP];
      SP--;
      break;
      
    case L_DIV:
      //printf("opcode L_DIV %s\n", tokstart);
      if (SP <= 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
     break;
      } 
      stack[SP-1] /= stack[SP];
      SP--;
      break;
  
   case L_INT32:
      //printf("opcode L_INT32 %s\n",tokstart);
      stack[++SP]= atoi(tokstart);
      // printf("@@@ pushing an integer!");
      break;
      
    case L_DROP:
      //printf("opcode L_DROP %s \n", tokstart);
      SP=SP -1;
      break;
      
    case L_CR:
      // printf("opcode L_CR %s \n", tokstart);
      printf("\n");
      break;


/* simple floating point features */
    case L_FNUM:
      //printf("floating point number %s\n",tokstart);
      fstack[++FP]= atof(tokstart);
      break;
    
    case L_FPLUS:
      if (FP <= 0) {
     ERRCODE = E_FSTACKUNDERFLOW;
     printf("%s", "fstack underflow!");
     break;
      } 
      fstack[FP-1] += fstack[FP];
      FP--;
      break;

    case L_FSUB:
      if (FP <= 0) {
     ERRCODE = E_FSTACKUNDERFLOW;
     printf("%s", "fstack underflow!");
     break;
      } 
      fstack[FP-1] -= fstack[FP];
      FP--;
      break;

    case L_FMUL:
      if (FP <= 0) {
     ERRCODE = E_FSTACKUNDERFLOW;
     printf("%s", "fstack underflow!");
     break;
      } 
      fstack[FP-1] *= fstack[FP];
      FP--;
      break;

    case L_FDIV:
      if (FP <= 0) {
     ERRCODE = E_FSTACKUNDERFLOW;
     printf("%s", "fstack underflow!");
     break;
      } 
      fstack[FP-1] /= fstack[FP];
      FP--;
      break;

    case L_FDOT:
      printf("opcode FDOT %s\n", tokstart);
      if (FP < 0) {
     ERRCODE = E_FSTACKUNDERFLOW;
     printf("%s", "underflow!");
      } else {
        printf ("%f", fstack[FP--]);
      }
      break;

    case L_FDOTT:
      //@@printf("opcode L_EMIT %s %d\n", tokstart, SP);
      if (SP < 0) {
     ERRCODE = E_FSTACKUNDERFLOW;
     printf("%s", "underflow!");
      } else {
        printf ("%f", fstack[FP]);
      }
      break;
      
    case L_BYE:
      printf("Terminating... ");
      break;
    default:
      ;
      break;
    }  
   
}

void nexttoken()
{   
    tokstart = tokend;
    //@ printf("inside nexttoken [%s]", tokstart);
    
    /* ignore leading white spaces */
    while (isspace(*tokstart)) tokstart++;

    /* find terminating space of end of string */
    tokend = tokstart;
    while (!isspace(*tokend) && *tokend != '\0') tokend++;
    
    /* string terminator */
    if (*tokend != '\0') {
      *tokend = '\0';
      tokend ++;
    } 

    /* get opcode */
    opcode = -1; 
    if (tokend == tokstart) {
       opcode = L_EOS;
       return;
    }

    //@@@ printf("token [%s]\n", tokstart); 
    for (int i =0; i < LENSTDWORDS; i++) {
      if (strcmp(stdwords[i], tokstart) == 0){
 opcode = i;
        break;
      }
    }
    if (opcode != -1) {
       return;
    }

    int ntype = numbertype(tokstart); 
    if (ntype == 1){
      /* is this a number ?*/
      opcode = L_INT32;
    } else if (ntype == 2) {
      opcode = L_FNUM;
    } else if (opcode == -1){
      opcode = L_ERROR;
    };
}



int main(){
  while (1) {
    printf("> ");
    if (fgets(LINEBUFFER, 250, stdin)!= NULL) {
      tokend=LINEBUFFER;
    } else {
      tokend= goodbye;
    }
    while (1) {
      nexttoken();
      eval(opcode);
      if (opcode == L_BYE) {
 return 0;
      }
      if (opcode ==L_EOS) {
 break;
      }
      if (ERRCODE != 0) {
 printf("ERRORCODE [%d]", ERRCODE);
 break;
      };
    }
  }
  printf("\n");
  return 0;
}



Here is compilation and a simple execution run.

gcc simple-forth-0.0.3.c -g -std=c99 -o simple-forth
toto@toto-Aspire-4520:~/Blogs/my-other-life-as-programmer/forth$ ./simple-forth
> 23.34 -56.12 f* f. 
-1309.840800> bye
Terminating... 

Dont forget: a floating point number has a comma or an exponent in our simple-forth implementation.

Tuesday, January 24, 2012

Terminal interactivity added to simple-forth.

Our previous Forth interpreter processed only fixed strings. To add interactivity from a console or terminal, we have to read an input string to a buffer. There is more than one way, we may use scanf. Another one, much better is fgets which avoids buffer overruns. Here is our version 0.0.2 with terminal interactivity.

/*
file     simple-forth-0.0.2.c
author   Dr. Ernesto P. Adorio
         UPDEPP (University of the Philippines,
         Extension Program in Pampanga
         Clarkfield, Pampanga
email    ernesto.adorio@gmail.com
version  0.0.1 January 14, 2012 basic interpreter.
         0.0.2 January 16, 2012 interactivity added.
*/

#include 
#include 
#include 
#include 



enum {E_STACKUNDERFLOW} ERRCODES;


enum {L_EMIT, L_DOT,L_DOTT, L_SPACE, L_ADD, L_SUB, L_MUL, L_DIV, L_DROP, L_INT32, L_CR,  L_BYE, L_EOS,L_ERROR} OPCODES;
const char *stdwords[] = { 
"emit",
".", 
".t",
"space",
"+",
"-",
"*",
"/",
"drop",
"int32",
"cr",
"bye",
"\n",
};

#define MAXSTKLEN 32
#define MAXFSTKLEN 32
#define MAXTOKENLEN 128

int  LENSTDWORDS = sizeof(stdwords)/sizeof(stdwords[0]);
char LINEBUFFER[256];
char *goodbye= "bye";
int  ERRCODE = 0;
int SP = -1;   /* stack pointer index */

int    stack[MAXSTKLEN];
double fstack[MAXFSTKLEN];

int  opcode;
char *tokstart;
char *tokend;






int eval(int opcode)
{
    /* Evaluate opcode */
    switch (opcode){
    case L_EMIT: 
      //@@printf("opcode L_EMIT %s %d\n", tokstart, SP);
      if (SP < 0) {
     ERRCODE = E_STACKUNDERFLOW;
      } else {
        printf ("%c", stack[SP--]);
      }
      break;

    case L_DOT:
      //@@printf("opcode L_EMIT %s %d\n", tokstart, SP);
      if (SP < 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
      } else {
        printf ("%d", stack[SP--]);
      }
      break;

    case L_DOTT:
      //@@printf("opcode L_EMIT %s %d\n", tokstart, SP);
      if (SP < 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
      } else {
        printf ("%d", stack[SP]);
      }
      break;
      
    case L_SPACE:
      printf ("%c", 32);
      break;
      
    case L_ADD:
      //printf("opcode L_ADD %s\n", tokstart);
      if (SP <= 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
     break;
      } 
      stack[SP-1] += stack[SP];
      SP--;
      break;
      
    case L_SUB:
      //printf("opcode L_SUB %s\n", tokstart);
      if (SP <= 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
     break;
      } 
      stack[SP-1] -= stack[SP];
      SP--;
      break;
      
    case L_MUL:
      //printf("opcode L_MUL %s\n", tokstart);
      if (SP <= 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
     break;
      } 
      stack[SP-1] *= stack[SP];
      SP--;
      break;
      
    case L_DIV:
      //printf("opcode L_DIV %s\n", tokstart);
      if (SP <= 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
     break;
      } 
      stack[SP-1] /= stack[SP];
      SP--;
      break;
    case L_INT32:
      //printf("opcode L_INT32 %s\n",tokstart);
      stack[++SP]= atoi(tokstart);
      break;
      
    case L_DROP:
      //printf("opcode L_DROP %s \n", tokstart);
      SP=SP -1;
      break;
      
    case L_CR:
      // printf("opcode L_CR %s \n", tokstart);
      printf("\n");
      break;
    default:
      ;
      break;
    }  
   
}

void nexttoken()
{   
    tokstart = tokend;
    //@ printf("inside nexttoken [%s]", tokstart);
    
    /* ignore leading white spaces */
    while (isspace(*tokstart)) tokstart++;

    /* find terminating space of end of string */
    tokend = tokstart;
    while (!isspace(*tokend) && *tokend != '\0') tokend++;
    
    /* string terminator */
    if (*tokend != '\0') {
      *tokend = '\0';
      tokend ++;
    } 

    /* get opcode */
    opcode = -1; 
    if (tokend == tokstart) {
       opcode = L_EOS;
       return;
    }

    /*@@@ printf("token [%s]\n", tokstart); */
    for (int i =0; i < LENSTDWORDS; i++) {
      if (strcmp(stdwords[i], tokstart) == 0){
 opcode = i;
 //@ printf("opcde [%d]", opcode);
        break;
      }
    }
    if (opcode == -1){
      opcode = L_INT32;
    }
}



int main(){
  while (1) {
    // read string.
    printf("> ");
    if (fgets(LINEBUFFER, 250, stdin)!= NULL) {
      tokend=LINEBUFFER;
      // @@@ printf("input string [%s]", LINEBUFFER);
    } else {
      tokend= goodbye;
    }
    // eval
    // printf ("Entering evaluation loop.\n");
    while (1) {
      nexttoken();
      //@ printf ("[%d] %s", opcode, tokstart);
      eval(opcode);
      if (opcode== L_BYE) {
 //printf ("%% opcode is L_BYE");
 return 0;
      }
      if (opcode ==L_EOS) {
 break;
      }
      if (ERRCODE != 0) {
 printf("ERRORCODE [%d]", ERRCODE);
 break;
      };
    }
    // loop back unless BYE!
  }
  printf("\n");
  return 0;
}

Save file to simple-forth-0.0.2.c, then compile it using the following command:

: gcc simple-forth.c -std=c99 -o simple-forth-0.0.2

Then run it by issuing ./simple-forth00.0.2 Here is an uninspired terminal run:

> 2 3 - 1 + .
0> 56 emit
8> 97 emit
a> 64 emit
@> 
> 
> bye

Next, we will add add a floating point computation capabilities using a separate stack.



Saturday, January 14, 2012

A basic Forth language interpreter (non-interactive)

Last time we showed a simple tokenizer for a Forth interpreter. We find it easier to combine the ideas in that post to create a simple Forth engine which process a simple string.

We will expound further on this until we successfully build a compiler for a non-standard Forth language.

Have fun working on this. The current version works on simple strings but specified on the source code itself. We will introduce interactivity and expand the capabilities of this simple Forth interpreter.



/*
file     simple-forth.c
author   Dr. Ernesto P. Adorio
         UPDEPP (University of the Philippines,
         Extension Program in Pampanga
         Clarkfield, Pampanga
email    ernesto.adorio@gmail.com
version  0.0.1 January 14, 2012
*/

#include 
#include 
#include 
#include 


#define MAXSTKLEN 32
#define MAXTOKENLEN 128

enum {L_EMIT, L_ADD, L_SUB, L_MUL, L_DIV, L_POP, L_INT32, L_CR, L_ERROR} OPCODES;
const char *stdwords[] = { 
".", 
"+",
"-",
"*",
"/",
"pop",
"int32",
"cr",
};


int LENSTDWORDS = 8;

int stack[MAXSTKLEN];
int SP = 0;   /* stack pointer index */

int main(){
  char tokens[] = "   123 34 + . cr 567 -3456 * . cr";
  char *tokstart = tokens;
  char *tokend = tokstart;
  int  opcode;

  /* @@@ printf("tokstart: %s\n", tokstart);*/

  tokend = tokens;

  while (1) {
    tokstart = tokend;
    /* ignore leading white spaces */
    while (isspace(*tokstart)) tokstart++;

    /* find terminating space of end of string */
    tokend = tokstart;
    while (!isspace(*tokend) && *tokend != '\0') tokend++;
    
    /* string terminator */
    if (*tokend != '\0') {
      *tokend = '\0';
      tokend ++;
    } else {
      break;
    } 

    /* get opcode */
    opcode = -1; 
    /*@@@ printf("token [%s]\n", tokstart); */
    for (int i =0; i < LENSTDWORDS; i++) {
      if (strcmp(stdwords[i], tokstart) == 0){
 opcode = i;
        break;
      }
    }
    if (opcode == -1){
      opcode = L_INT32;
    }


    /* Evaluate opcode */
    switch (opcode){
    case L_EMIT: 
      //printf("opcode L_EMIT %s \n", tokstart);
      printf ("%d", stack[SP--] );
      break;
    case L_ADD:
      //printf("opcode L_ADD %s\n", tokstart);
      stack[SP-1] += stack[SP];
      SP--;
      break;
    case L_SUB:
      //printf("opcode L_SUB %s\n", tokstart);
      stack[SP-1] += stack[SP];
      SP--;
      break;
    case L_MUL:
      //printf("opcode L_MUL %s\n", tokstart);
      stack[SP-1] *= stack[SP];
      SP--;
      break;
    case L_DIV:
      //printf("opcode L_DIV %s\n", tokstart);
      stack[SP-1] /= stack[SP];
      SP--;
      break;
    case L_INT32:
      //printf("opcode L_INT32 %s\n",tokstart);
      stack[++SP]= atoi(tokstart);
      break;
    case L_POP:
      //printf("opcode L_POP %s \n", tokstart);
      SP--;
      break;
    case L_CR:
      // printf("opcode L_CR %s \n", tokstart);
      printf("\n");
    }  
  }
  printf("\n");
  return 0;
}
Save the code to a file simple-forth.c, then compile using the following command line on the directory where the source file is stored. gcc simple-forth.c -std=c99 -o forth It should compile cleanly, then execute the executible by issuing ./forth When the program runs, it prints out
toto@toto-Aspire-4520:~/Blogs/my-other-life-as-programmer/forth$ ./forth
157
-1959552