Wednesday, January 25, 2012

Simple floating point arithmetic for simple-forth

Here is our latest incarnation of simple-forth, a forth interpreter written in C.
This time we add floating point arithmetic capabilities. There are two basic ways to incorporate floating point capabilities. One is using the same integer stack and the other is using a separate foating point stack.



We use a separate floating point stack with maximum 32 elements. in our newest version shown below for simplicity. The built-in integer stack is 32 bits wide. Floating point numbers in our interpreter are 64 bit wide. The current stack element index is indicated by FP. The basic floating point operations are f+, f-, f*, f/ these Forth words are the equivalent floating point words corresponding to the built-in basic + - * / operator words



:


/*
file     simple-forth-0.0.3.c
author   Dr. Ernesto P. Adorio
         UPDEPP (University of the Philippines,
         Extension Program in Pampanga
         Clarkfield, Pampanga
email    ernesto.adorio@gmail.com
version  0.0.1 January 14, 2012 basic interpreter.
         0.0.2 January 16, 2012 interactivity added.
         0.0.3 January 25, 2012 basic floating point.
*/

#include 
#include 
#include 
#include 



enum {E_STACKUNDERFLOW, E_FSTACKUNDERFLOW} ERRCODES;


enum {L_EMIT, L_DOT,L_DOTT, L_SPACE, L_ADD, L_SUB, L_MUL, L_DIV, L_DROP, L_INT32, L_CR,
      L_FNUM, L_FPLUS, L_FSUB, L_FMUL, L_FDIV, L_FDOT, L_FDOTT, L_FDROP,
      L_BYE, L_EOS,L_ERROR} OPCODES;

const char *stdwords[] = { "emit", ".",  ".t", "space", "+", "-", "*", "/", "drop", "int32", "cr",
                           "fnum", "f+", "f-", "f*" ,"f/", "f.","f.t","fdrop",
                           "bye",
                           "\n",
                         };
// "int32" and "fnum" are dummies. 


#define MAXSTKLEN 32
#define MAXFSTKLEN 32
#define MAXTOKENLEN 128

int  LENSTDWORDS = sizeof(stdwords)/sizeof(stdwords[0]);
char LINEBUFFER[256];
char *goodbye= "bye";
int  ERRCODE = 0;
int SP = -1;   /* stack pointer index */
int stack[MAXSTKLEN];

int  opcode;
char *tokstart;
char *tokend;

int FP = -1;   /* floating point stack index */
double fstack[MAXFSTKLEN];


int numbertype(char *s) 
{
   /*
   Returns 
     0 - not a number!
     1 - an integer
     2 - a floating point number
   */
   
   char *t = s;
   while (isspace(*t)) t++;
   if (*t == '\0') return 0;
    
   if (*t == '+' || *t == '-') t++;
   if (*t == '\0') return 0;
   while (isdigit(*t)) t++;
   if (*t == '\0') {
     return 1; // an integer!
   }
  
   if (*t == '.') t++;
   while (isdigit(*t)) t++;
   if (*t == '\0') return 2;
   
   if (*t == 'e') t++;
   if (*t == '\0') return 0; // error!
   if (*t == '+' || *t == '-') t++;
   if (*t == '\0') return 0; // error!
   while (isdigit(*t)) t++;
   if (*t == '\0') return 2; // a floating point number!
   return 0; // not an integer or floating point.
}





int eval(int opcode)
{
    /* Evaluate opcode */
    switch (opcode){
    case L_EMIT: 
      //@@printf("opcode L_EMIT %s %d\n", tokstart, SP);
      if (SP < 0) {
     ERRCODE = E_STACKUNDERFLOW;
      } else {
        printf ("%c", stack[SP--]);
      }
      break;

    case L_DOT:
      printf("opcode L_EMIT %s %d\n", tokstart, SP);
      if (SP < 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
      } else {
        printf ("%d", stack[SP--]);
      }
      break;

    case L_DOTT:
      //@@printf("opcode L_EMIT %s %d\n", tokstart, SP);
      if (SP < 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
      } else {
        printf ("%d", stack[SP]);
      }
      break;
      
    case L_SPACE:
      printf ("%c", 32);
      break;
      
    case L_ADD:
      //printf("opcode L_ADD %s\n", tokstart);
      if (SP <= 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
     break;
      } 
      stack[SP-1] += stack[SP];
      SP--;
      break;
      
    case L_SUB:
      //printf("opcode L_SUB %s\n", tokstart);
      if (SP <= 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
     break;
      } 
      stack[SP-1] -= stack[SP];
      SP--;
      break;
      
    case L_MUL:
      //printf("opcode L_MUL %s\n", tokstart);
      if (SP <= 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
     break;
      } 
      stack[SP-1] *= stack[SP];
      SP--;
      break;
      
    case L_DIV:
      //printf("opcode L_DIV %s\n", tokstart);
      if (SP <= 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
     break;
      } 
      stack[SP-1] /= stack[SP];
      SP--;
      break;
  
   case L_INT32:
      //printf("opcode L_INT32 %s\n",tokstart);
      stack[++SP]= atoi(tokstart);
      // printf("@@@ pushing an integer!");
      break;
      
    case L_DROP:
      //printf("opcode L_DROP %s \n", tokstart);
      SP=SP -1;
      break;
      
    case L_CR:
      // printf("opcode L_CR %s \n", tokstart);
      printf("\n");
      break;


/* simple floating point features */
    case L_FNUM:
      //printf("floating point number %s\n",tokstart);
      fstack[++FP]= atof(tokstart);
      break;
    
    case L_FPLUS:
      if (FP <= 0) {
     ERRCODE = E_FSTACKUNDERFLOW;
     printf("%s", "fstack underflow!");
     break;
      } 
      fstack[FP-1] += fstack[FP];
      FP--;
      break;

    case L_FSUB:
      if (FP <= 0) {
     ERRCODE = E_FSTACKUNDERFLOW;
     printf("%s", "fstack underflow!");
     break;
      } 
      fstack[FP-1] -= fstack[FP];
      FP--;
      break;

    case L_FMUL:
      if (FP <= 0) {
     ERRCODE = E_FSTACKUNDERFLOW;
     printf("%s", "fstack underflow!");
     break;
      } 
      fstack[FP-1] *= fstack[FP];
      FP--;
      break;

    case L_FDIV:
      if (FP <= 0) {
     ERRCODE = E_FSTACKUNDERFLOW;
     printf("%s", "fstack underflow!");
     break;
      } 
      fstack[FP-1] /= fstack[FP];
      FP--;
      break;

    case L_FDOT:
      printf("opcode FDOT %s\n", tokstart);
      if (FP < 0) {
     ERRCODE = E_FSTACKUNDERFLOW;
     printf("%s", "underflow!");
      } else {
        printf ("%f", fstack[FP--]);
      }
      break;

    case L_FDOTT:
      //@@printf("opcode L_EMIT %s %d\n", tokstart, SP);
      if (SP < 0) {
     ERRCODE = E_FSTACKUNDERFLOW;
     printf("%s", "underflow!");
      } else {
        printf ("%f", fstack[FP]);
      }
      break;
      
    case L_BYE:
      printf("Terminating... ");
      break;
    default:
      ;
      break;
    }  
   
}

void nexttoken()
{   
    tokstart = tokend;
    //@ printf("inside nexttoken [%s]", tokstart);
    
    /* ignore leading white spaces */
    while (isspace(*tokstart)) tokstart++;

    /* find terminating space of end of string */
    tokend = tokstart;
    while (!isspace(*tokend) && *tokend != '\0') tokend++;
    
    /* string terminator */
    if (*tokend != '\0') {
      *tokend = '\0';
      tokend ++;
    } 

    /* get opcode */
    opcode = -1; 
    if (tokend == tokstart) {
       opcode = L_EOS;
       return;
    }

    //@@@ printf("token [%s]\n", tokstart); 
    for (int i =0; i < LENSTDWORDS; i++) {
      if (strcmp(stdwords[i], tokstart) == 0){
 opcode = i;
        break;
      }
    }
    if (opcode != -1) {
       return;
    }

    int ntype = numbertype(tokstart); 
    if (ntype == 1){
      /* is this a number ?*/
      opcode = L_INT32;
    } else if (ntype == 2) {
      opcode = L_FNUM;
    } else if (opcode == -1){
      opcode = L_ERROR;
    };
}



int main(){
  while (1) {
    printf("> ");
    if (fgets(LINEBUFFER, 250, stdin)!= NULL) {
      tokend=LINEBUFFER;
    } else {
      tokend= goodbye;
    }
    while (1) {
      nexttoken();
      eval(opcode);
      if (opcode == L_BYE) {
 return 0;
      }
      if (opcode ==L_EOS) {
 break;
      }
      if (ERRCODE != 0) {
 printf("ERRORCODE [%d]", ERRCODE);
 break;
      };
    }
  }
  printf("\n");
  return 0;
}



Here is compilation and a simple execution run.

gcc simple-forth-0.0.3.c -g -std=c99 -o simple-forth
toto@toto-Aspire-4520:~/Blogs/my-other-life-as-programmer/forth$ ./simple-forth
> 23.34 -56.12 f* f. 
-1309.840800> bye
Terminating... 

Dont forget: a floating point number has a comma or an exponent in our simple-forth implementation.

Tuesday, January 24, 2012

Terminal interactivity added to simple-forth.

Our previous Forth interpreter processed only fixed strings. To add interactivity from a console or terminal, we have to read an input string to a buffer. There is more than one way, we may use scanf. Another one, much better is fgets which avoids buffer overruns. Here is our version 0.0.2 with terminal interactivity.

/*
file     simple-forth-0.0.2.c
author   Dr. Ernesto P. Adorio
         UPDEPP (University of the Philippines,
         Extension Program in Pampanga
         Clarkfield, Pampanga
email    ernesto.adorio@gmail.com
version  0.0.1 January 14, 2012 basic interpreter.
         0.0.2 January 16, 2012 interactivity added.
*/

#include 
#include 
#include 
#include 



enum {E_STACKUNDERFLOW} ERRCODES;


enum {L_EMIT, L_DOT,L_DOTT, L_SPACE, L_ADD, L_SUB, L_MUL, L_DIV, L_DROP, L_INT32, L_CR,  L_BYE, L_EOS,L_ERROR} OPCODES;
const char *stdwords[] = { 
"emit",
".", 
".t",
"space",
"+",
"-",
"*",
"/",
"drop",
"int32",
"cr",
"bye",
"\n",
};

#define MAXSTKLEN 32
#define MAXFSTKLEN 32
#define MAXTOKENLEN 128

int  LENSTDWORDS = sizeof(stdwords)/sizeof(stdwords[0]);
char LINEBUFFER[256];
char *goodbye= "bye";
int  ERRCODE = 0;
int SP = -1;   /* stack pointer index */

int    stack[MAXSTKLEN];
double fstack[MAXFSTKLEN];

int  opcode;
char *tokstart;
char *tokend;






int eval(int opcode)
{
    /* Evaluate opcode */
    switch (opcode){
    case L_EMIT: 
      //@@printf("opcode L_EMIT %s %d\n", tokstart, SP);
      if (SP < 0) {
     ERRCODE = E_STACKUNDERFLOW;
      } else {
        printf ("%c", stack[SP--]);
      }
      break;

    case L_DOT:
      //@@printf("opcode L_EMIT %s %d\n", tokstart, SP);
      if (SP < 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
      } else {
        printf ("%d", stack[SP--]);
      }
      break;

    case L_DOTT:
      //@@printf("opcode L_EMIT %s %d\n", tokstart, SP);
      if (SP < 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
      } else {
        printf ("%d", stack[SP]);
      }
      break;
      
    case L_SPACE:
      printf ("%c", 32);
      break;
      
    case L_ADD:
      //printf("opcode L_ADD %s\n", tokstart);
      if (SP <= 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
     break;
      } 
      stack[SP-1] += stack[SP];
      SP--;
      break;
      
    case L_SUB:
      //printf("opcode L_SUB %s\n", tokstart);
      if (SP <= 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
     break;
      } 
      stack[SP-1] -= stack[SP];
      SP--;
      break;
      
    case L_MUL:
      //printf("opcode L_MUL %s\n", tokstart);
      if (SP <= 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
     break;
      } 
      stack[SP-1] *= stack[SP];
      SP--;
      break;
      
    case L_DIV:
      //printf("opcode L_DIV %s\n", tokstart);
      if (SP <= 0) {
     ERRCODE = E_STACKUNDERFLOW;
     printf("%s", "underflow!");
     break;
      } 
      stack[SP-1] /= stack[SP];
      SP--;
      break;
    case L_INT32:
      //printf("opcode L_INT32 %s\n",tokstart);
      stack[++SP]= atoi(tokstart);
      break;
      
    case L_DROP:
      //printf("opcode L_DROP %s \n", tokstart);
      SP=SP -1;
      break;
      
    case L_CR:
      // printf("opcode L_CR %s \n", tokstart);
      printf("\n");
      break;
    default:
      ;
      break;
    }  
   
}

void nexttoken()
{   
    tokstart = tokend;
    //@ printf("inside nexttoken [%s]", tokstart);
    
    /* ignore leading white spaces */
    while (isspace(*tokstart)) tokstart++;

    /* find terminating space of end of string */
    tokend = tokstart;
    while (!isspace(*tokend) && *tokend != '\0') tokend++;
    
    /* string terminator */
    if (*tokend != '\0') {
      *tokend = '\0';
      tokend ++;
    } 

    /* get opcode */
    opcode = -1; 
    if (tokend == tokstart) {
       opcode = L_EOS;
       return;
    }

    /*@@@ printf("token [%s]\n", tokstart); */
    for (int i =0; i < LENSTDWORDS; i++) {
      if (strcmp(stdwords[i], tokstart) == 0){
 opcode = i;
 //@ printf("opcde [%d]", opcode);
        break;
      }
    }
    if (opcode == -1){
      opcode = L_INT32;
    }
}



int main(){
  while (1) {
    // read string.
    printf("> ");
    if (fgets(LINEBUFFER, 250, stdin)!= NULL) {
      tokend=LINEBUFFER;
      // @@@ printf("input string [%s]", LINEBUFFER);
    } else {
      tokend= goodbye;
    }
    // eval
    // printf ("Entering evaluation loop.\n");
    while (1) {
      nexttoken();
      //@ printf ("[%d] %s", opcode, tokstart);
      eval(opcode);
      if (opcode== L_BYE) {
 //printf ("%% opcode is L_BYE");
 return 0;
      }
      if (opcode ==L_EOS) {
 break;
      }
      if (ERRCODE != 0) {
 printf("ERRORCODE [%d]", ERRCODE);
 break;
      };
    }
    // loop back unless BYE!
  }
  printf("\n");
  return 0;
}

Save file to simple-forth-0.0.2.c, then compile it using the following command:

: gcc simple-forth.c -std=c99 -o simple-forth-0.0.2

Then run it by issuing ./simple-forth00.0.2 Here is an uninspired terminal run:

> 2 3 - 1 + .
0> 56 emit
8> 97 emit
a> 64 emit
@> 
> 
> bye

Next, we will add add a floating point computation capabilities using a separate stack.



Tuesday, January 17, 2012

Tiobe rankings of programming languages

Click on for the latest popularity rankings of programming languages. It is not based on absolute value but on the relative change of market share.

For January 2012, Objective C showed the fastest growth. Click on the link for the the definition of the Tiobe index.


The next popular is C# followed by C and Javascript. On the other hand, based on absolute market share, the top languages are Java, C, C#, C++, Objective-C, PHP and Visual Basic. Our favorite language Python is at #8. Interesting to note that the statistical software R is at #19.








Saturday, January 14, 2012

Does a number string represents an integer or a floating value in C?

I got the shock of my blogging life when I cannot access an old post about the evils of the C function atof. Here is a code which return 1 for integers and 2 for floating point and 0 if not a valid integer or a valid float.

/*
author  Ernesto P. Adorio
        UPDEPP (University of the 
        Philippines, Extension Program
        in Pampanga, Clarkfield, Pampanga
email   ernesto. adorio@ gmail.com [remove spaces]
version 0.0.1 january 14, 2012
*/

#include 
#include 


int numbertype(char *s) 
{
   char *t = s;
   while (isspace(*t)) t++;
   if (*t == '\0') return 0;
    
   // number sign
   printf("ltrimmed [%s]", t);
   if (*t == '+' || *t == '-') t++;
   if (*t == '\0') return 0;
   printf("after sign [%s]", t);
   while (isdigit(*t)) t++;
   printf("after digits [%s]", t);
   if (*t == '\0') return 1; // an integer!
   
   // floating point??
   printf ("decimal point? [%s] ", t);  
   if (*t == '.') t++;
   printf ("fractional digits?");
   while (isdigit(*t)) t++;
   if (*t == '\0') return 2;
   
   // exponent part.
   printf ("testing exponent part %s", t); 
   if (*t == 'e') t++;
   printf ("after 'e' ");  
   if (*t == '\0') return 0; // error!
   printf ("sign after e?", t);  
   if (*t == '+' || *t == '-') t++;
   printf("%s", t);  
   if (*t == '\0') return 0; // error!
   while (isdigit(*t)) t++;
   if (*t == '\0') return 2; // a floating point number!
   return 0; // not an integer or floating point.
}

int main() 
{
  printf("numtype =[%d]", numbertype("-123.34")); 
};

Remove the deubgging printf statements when you use it for applications.

When the program is run, it returns a code of 2 to denote a floating point number, a 1 for an integer, and a 0 if not a number.


A basic Forth language interpreter (non-interactive)

Last time we showed a simple tokenizer for a Forth interpreter. We find it easier to combine the ideas in that post to create a simple Forth engine which process a simple string.

We will expound further on this until we successfully build a compiler for a non-standard Forth language.

Have fun working on this. The current version works on simple strings but specified on the source code itself. We will introduce interactivity and expand the capabilities of this simple Forth interpreter.



/*
file     simple-forth.c
author   Dr. Ernesto P. Adorio
         UPDEPP (University of the Philippines,
         Extension Program in Pampanga
         Clarkfield, Pampanga
email    ernesto.adorio@gmail.com
version  0.0.1 January 14, 2012
*/

#include 
#include 
#include 
#include 


#define MAXSTKLEN 32
#define MAXTOKENLEN 128

enum {L_EMIT, L_ADD, L_SUB, L_MUL, L_DIV, L_POP, L_INT32, L_CR, L_ERROR} OPCODES;
const char *stdwords[] = { 
".", 
"+",
"-",
"*",
"/",
"pop",
"int32",
"cr",
};


int LENSTDWORDS = 8;

int stack[MAXSTKLEN];
int SP = 0;   /* stack pointer index */

int main(){
  char tokens[] = "   123 34 + . cr 567 -3456 * . cr";
  char *tokstart = tokens;
  char *tokend = tokstart;
  int  opcode;

  /* @@@ printf("tokstart: %s\n", tokstart);*/

  tokend = tokens;

  while (1) {
    tokstart = tokend;
    /* ignore leading white spaces */
    while (isspace(*tokstart)) tokstart++;

    /* find terminating space of end of string */
    tokend = tokstart;
    while (!isspace(*tokend) && *tokend != '\0') tokend++;
    
    /* string terminator */
    if (*tokend != '\0') {
      *tokend = '\0';
      tokend ++;
    } else {
      break;
    } 

    /* get opcode */
    opcode = -1; 
    /*@@@ printf("token [%s]\n", tokstart); */
    for (int i =0; i < LENSTDWORDS; i++) {
      if (strcmp(stdwords[i], tokstart) == 0){
 opcode = i;
        break;
      }
    }
    if (opcode == -1){
      opcode = L_INT32;
    }


    /* Evaluate opcode */
    switch (opcode){
    case L_EMIT: 
      //printf("opcode L_EMIT %s \n", tokstart);
      printf ("%d", stack[SP--] );
      break;
    case L_ADD:
      //printf("opcode L_ADD %s\n", tokstart);
      stack[SP-1] += stack[SP];
      SP--;
      break;
    case L_SUB:
      //printf("opcode L_SUB %s\n", tokstart);
      stack[SP-1] += stack[SP];
      SP--;
      break;
    case L_MUL:
      //printf("opcode L_MUL %s\n", tokstart);
      stack[SP-1] *= stack[SP];
      SP--;
      break;
    case L_DIV:
      //printf("opcode L_DIV %s\n", tokstart);
      stack[SP-1] /= stack[SP];
      SP--;
      break;
    case L_INT32:
      //printf("opcode L_INT32 %s\n",tokstart);
      stack[++SP]= atoi(tokstart);
      break;
    case L_POP:
      //printf("opcode L_POP %s \n", tokstart);
      SP--;
      break;
    case L_CR:
      // printf("opcode L_CR %s \n", tokstart);
      printf("\n");
    }  
  }
  printf("\n");
  return 0;
}
Save the code to a file simple-forth.c, then compile using the following command line on the directory where the source file is stored. gcc simple-forth.c -std=c99 -o forth It should compile cleanly, then execute the executible by issuing ./forth When the program runs, it prints out
toto@toto-Aspire-4520:~/Blogs/my-other-life-as-programmer/forth$ ./forth
157
-1959552