/** % LIBFORTH(3) % Richard Howe % November 2017 @file libforth.h @brief A FORTH library @author Richard James Howe. @copyright Copyright 2015,2016,2017 Richard James Howe. @license MIT @email howe.r.j.89@gmail.com **/ #ifndef F_H #define F_H #ifdef __cplusplus extern "C" { #endif #include /* #include */ /* #include */ /** @brief This is the absolute minimum size the Forth virtual machine can be in Forth cells, not bytes. **/ #define MINIMUM_CORE_SIZE (2048) /** @brief Default VM size which should be large enough for any Forth application, in Forth cells, not bytes. **/ #ifndef DEFAULT_CORE_SIZE #define DEFAULT_CORE_SIZE (64 * 1024) #endif /** @brief When designing a binary format, which this interpreter uses and saves to disk, it is imperative that certain information is saved to disk - one of those pieces of information is the version of the interpreter. This value is used for compatibility checking. Each version is incompatible with previous or later versions, which is a deficiency of the program. A way to migrate core files would be useful, but the task is too difficult. **/ #define F_CORE_VERSION (0x04u) typedef unsigned long int uintptr_t; typedef unsigned char uint8_t; typedef unsigned long int uint64_t; typedef unsigned short uint16_t; struct forth; /**< An opaque object that holds a running FORTH environment**/ typedef struct forth f_t; /**< Typedef of opaque object for general use */ typedef uintptr_t f_cell_t; /**< FORTH cell large enough for a pointer*/ #define PRIdPTR "ld" #define PRIxPTR "lx" #define PRId64 "lld" #define PRIdCell PRIdPTR /**< Decimal format specifier for a Forth cell */ #define PRIxCell PRIxPTR /**< Hex format specifier for a Forth word */ /** @brief The **IS_BIG_ENDIAN** macro looks complicated, however all it does is determine the endianess of the machine using trickery. See: * * For more information and alternatives. **/ #define IS_BIG_ENDIAN (!(union { uint16_t u16; uint8_t c; }){ .u16 = 1 }.c) /** @brief Functions matching this typedef can be called via the CALL instruction. **/ typedef int (*f_function_t)(f_t *o); /** @brief struct f_functions allows arbitrary C functions to be passed to the forth interpreter which can be used from within the Forth interpreter. This structure can be used to extend the forth interpreter with functions defined elsewhere, which is particularly useful for allowing the interpreter to use non-portable functions. **/ struct f_functions { f_cell_t count; /**< number of functions */ /**@brief The only information needed to perform a CALL is the function * that needs calling and the depth expected on the call stack. This * interface is minimal, but works. */ struct f_function { unsigned depth; /**< depth expected on stack before call */ f_function_t function; /**< function to execute */ } *functions; /**< list of possible functions for CALL */ }; /** @brief The logging function is used to print error messages, warnings and notes within this program. @param prefix prefix to add to any logged messages @param func function in which logging function is called @param line line number logging function was called at @param fmt logging format string @param ... arguments for format string @return int < 0 is failure **/ int f_logger(const char *prefix, const char *func, unsigned line, const char *fmt, ...); /** Some macros are also needed for logging. As an aside, **__VA_ARGS__** should be prepended with '##' in case zero extra arguments are passed into the variadic macro, to swallow the extra comma, but it is not *standard* C, even if most compilers support the extension. **/ /** @brief Variadic macro for handling fatal error printing information @note This function does not terminate the process, it is up to the user to this after fatal() is called. @param FMT printf style format string **/ #define fatal(FMT,...) f_logger("fatal", __func__, __LINE__, FMT, __VA_ARGS__) /** @brief Variadic macro for handling error printing information @note Use this for recoverable errors @param FMT printf style format string **/ #define error(FMT,...) f_logger("error", __func__, __LINE__, FMT, __VA_ARGS__) /** @brief Variadic macro for handling warnings @note Use this for minor problems, for example, some optional component failed. @param FMT printf style format string **/ #define warning(FMT,...) f_logger("warning",__func__, __LINE__, FMT, __VA_ARGS__) /** @brief Variadic macro for notes @note Use this printing high-level information about the interpreter, for example opening up a new file. It should be used sparingly. @param FMT printf style format string **/ #define note(FMT,...) f_logger("note", __func__, __LINE__, FMT, __VA_ARGS__) /** @brief Variadic macro for debugging information. @note Use this for debugging, debug messages can be subject to change and may be present or removed arbitrarily between releases. This macro can be used liberally. @warning May produce copious amounts of output. @param FMT printf style format string **/ #define debug(FMT,...) f_logger("debug", __func__, __LINE__, FMT, __VA_ARGS__) /** @brief These are the possible options for the debug registers. Higher levels mean more verbose error messages are generated. **/ enum f_debug_level { F_DEBUG_OFF, /**< tracing is off */ F_DEBUG_F_CODE, /**< used within the forth interpreter */ F_DEBUG_NOTE, /**< print notes */ F_DEBUG_INSTRUCTION, /**< instructions and stack are traced */ F_DEBUG_CHECKS, /**< bounds checks are printed out */ F_DEBUG_ALL, /**< trace everything that can be traced */ }; /** @brief Compute the binary logarithm of an integer value @param x number to act on @return log2 of x **/ f_cell_t f_blog2(f_cell_t x); /** @brief Round up a number to the nearest power of 2 @param r number to round up @return rounded up number **/ f_cell_t f_round_up_pow2(f_cell_t r); /** @brief Given an input and an output this will initialize forth, allocating memory for it and setting it up so it has a few FORTH words predefined. The returned object must be freed by the caller and can be done with f_free(). It will return NULL on failure. @param size Size of interpreter environment, must be greater or equal to MINIMUM_CORE_SIZE @param in Read from this input file. Caller closes. @param out Output to this file. Caller closes. @param calls Used to specify arbitrary functions that the interpreter can call Can be NULL, caller frees if allocated. @return forth A fully initialized forth environment or NULL. **/ f_t *f_init(size_t size, FILE *in, FILE *out, const struct f_functions *calls); /** @brief Given a FORTH object it will free any memory and perform any internal cleanup needed. This will not free any evaluated strings nor will it close any files passed in via the C-API. @param o An object to free, Asserted **/ void f_free(f_t *o); /** @brief Allocate space for a function list. @param count the number of functions that can be held. @return a function list that can be passed to f_init, or NULL on failure. **/ struct f_functions *f_new_function_list(f_cell_t count); /** @brief Free a function list. @param calls function list to free. **/ void f_delete_function_list(struct f_functions *calls); /** @brief find a forth word in its dictionary if it exists, there must be no extra characters (apart from a terminating NUL) in the word name, the entire string will be searched for. @param o initialized forth environment @param s a string, representing a words name, to find @return non zero if the word has been found, zero if it has not been **/ f_cell_t f_find(f_t *o, const char *s); /** @brief Convert a string, representing a numeric value, into a forth cell. @param base base to convert string from, valid values are 0, and 2-26 @param[out] n the result of the conversion is stored here @param s string to convert @return int return code indicating failure (non zero) or success (zero) **/ int f_string_to_cell(int base, f_cell_t *n, const char *s); /** @brief push a value onto the variable stack @param o initialized forth environment @param f value to push **/ void f_push(f_t *o, f_cell_t f); /** @brief pop a value from the variable stack @param o initialized forth environment @return popped value **/ f_cell_t f_pop(f_t *o); /** @brief get the current stack position @param o initialized forth environment @return stack position, number of items on the stack **/ f_cell_t f_stack_position(f_t *o); /** @brief Alert a Forth environment to a signal, this function should be called from a signal handler to let the Forth environment know a signal has been caught. It will then set a register that can (but not necessarily will be) checked when the Forth environment runs again. @param o initialized forth environment @param sig caught signal value **/ void f_signal(f_t *o, int sig); /** @brief Duplicate a string, not all C libraries have a strdup function, although they should! @param s String to duplicate @return Duplicated string, caller frees. **/ char *f_strdup(const char *s); /** @brief Free the list of words returned by f_words @param s word list to free @param length length of word list to free **/ void f_free_words(char **s, size_t length); /** @brief This function returns a list of strings containing all of the names of words defined in a forth environment. This function returns NULL on failure and sets length to zero. @param o initialized forth environment @param length length of returned array @return list of pointers to strings containing Forth word names **/ char **f_words(f_t *o, size_t *length); /** @brief Check whether a forth environment is still valid, that is if the environment makes sense and is still runnable, an invalid forth environment cannot be saved to disk or run. Once a core is invalidated it cannot be made valid again. @param o initialized forth environment @return zero if environment is still valid, non zero if it is not **/ int f_is_invalid(f_t *o); /** @brief Invalidate a Forth environment. @param o initialized forth environment to invalidate. **/ void f_invalidate(f_t *o); /** @brief Set the verbosity/log/debug level of the interpreter, higher values mean more verbose output. @param o initialized forth environment. @param level to set environment to. **/ void f_set_debug_level(f_t *o, enum f_debug_level level); /** @brief Execute an initialized forth environment, this will read from input until there is no more or an error occurs. If an error occurs a negative number will be returned and the forth object passed to f_run will be invalidated, all subsequent calls to f_run() or f_eval() will return errors. @param o An initialized forth environment. Caller frees. @return int This is an error code, less than one is an error. **/ int f_run(f_t *o); /** @brief This function behaves like f_run() but instead will read from a string until there is no more. It will like- wise invalidate objects if there is an error evaluating the string. Do not forget to call either f_s_f_input() or f_set_string_input() or to close any previous files passed to f_eval() after you have called f_eval(). Multiple calls to f_eval() will work however. @param o An initialized forth environment. Caller frees. @param s A NUL terminated string to read from. Asserted. @return int This is an error code, less than one is an error. **/ int f_eval(f_t *o, const char *s); /** @brief This is the same as f_eval, except the string to read from does not have to be NUL terminated. @param o An initialized forth environment. Caller frees. @param s A block of memory to evaluate. Asserted. @param length Size of block to read from. @return int This is an error code, less than one is an error. **/ int f_eval_block(f_t *o, const char *s, size_t length); /** @brief Dump a raw forth object to disk, for debugging purposes, this cannot be loaded with "flc_file". @param o forth object to dump, caller frees, asserted. @param dump file to dump to (opened as "wb"), caller frees, asserted. @return int 0 if successful, non zero otherwise **/ int f_dump_core(f_t *o, FILE *dump); /** @brief Save the opaque FORTH object to file, this file may be loaded again with flc_file. The file passed in should be have been opened up in binary mode ("wb"). These files are not portable, files generated on machines with different machine word sizes or endianess will not work with each other. @warning Note that this function will not save out the contents or in anyway remember the f_functions structure passed in to f_init, it is up to the user to correctly pass in the right value after loading a previously saved core. Therefore portable core files must be generated from a f_init that was passed NULL. @param o The FORTH environment to dump. Caller frees. Asserted. @param dump Core dump file handle ("wb"). Caller closes. Asserted. @return int An error code, negative on error. **/ int f_s_c_file(f_t *o, FILE *dump); /** @brief Load a Forth file from disk, returning a forth object that can be passed to f_run. The loaded core file will have it's input and output file-handles defaulted so it reads from standard in and writes to standard error. @param dump a file handle opened on a Forth core dump, previously saved with f_save_core, this must be opened in binary mode ("rb"). @return f_t a reinitialized forth object, or NULL on failure **/ f_t *flc_file(FILE *dump); /** @brief Load a core file from memory, much like flc_file. The size parameter must be greater or equal to the MINIMUM_CORE_SIZE, this is asserted. @param m memory containing a Forth core file @param size size of core file in memory in bytes @return f_t a reinitialized forth object, or NULL on failure **/ f_t *flc_memory(char *m, size_t size); /** @brief Save a Forth object to memory, this function will allocate enough memory to store the core file. @param o forth object to save to memory, Asserted. @param[out] size of returned object, in bytes @return pointer to saved memory on success. **/ char *f_s_c_memory(f_t *o, size_t *size); /** @brief Define a new constant in an Forth environment. @param o Forth environment to define new constant in @param name Name of constant, should be less than 31 characters in length as only they will be used in defining the new name @param c Value of constant @return Same return status as f_eval **/ int f_define_constant(f_t *o, const char *name, f_cell_t c); /** @brief Set the input of an environment 'o' to read from a file 'in'. @param o An initialized FORTH environment. Caller frees. @param in Open handle for reading; "r"/"rb". Caller closes. **/ void f_s_f_input(f_t *o, FILE *in); /** @brief Set the output file of an environment 'o'. @param o An initialized FORTH environment. Caller frees. Asserted. @param out Open handle for writing; "w"/"wb". Caller closes. Asserted. **/ void f_s_f_output(f_t *o, FILE *out); /** @brief Set the input of an environment 'o' to read from a block of memory. @param o An initialized FORTH environment. Caller frees. Asserted. @param s A block of memory to act as input. Asserted. @param length Length of block **/ void f_set_block_input(f_t *o, const char *s, size_t length); /** @brief Set the input of an environment 'o' to read from a string 's'. @param o An initialized FORTH environment. Caller frees. Asserted. @param s A NUL terminated string to act as input. Asserted. **/ void f_set_string_input(f_t *o, const char *s); /** @brief Set the register elements in the Forth virtual machine for "argc" and "argv" to argc and argv, allowing them to be accessible within the interpreter @param o An initialized FORTH environment. Caller frees. Asserted. @param argc argc, as is passed into main() @param argv argv, as is passed into main() **/ void f_set_args(f_t *o, int argc, char **argv); /** @brief A wrapper around fopen, exposed as a utility function, this function either succeeds or calls "exit(EXIT_FAILURE)" after printing an error message. @param name of file to open @param mode to open file in @return always returns a file handle **/ FILE *f_fopen_or_die(const char *name, char *mode); /** @brief This is a simple wrapper around strerror, if the errno is zero it returns "unknown error", or if strerror returns NULL. This function inherits the problems of strerror (it is not threadsafe). @return error string. **/ const char *f_strerror(void); /** @brief This implements a limited FORTH REPL. Currently there is no mechanism for passing a struct f_functions to this call, this is deliberate. A saved Forth file will not make sense without the correct f_functions structure and associated functions. @param argc An argument count, like in main(). @param argv argc strings, like in main(). Not checked for NULL. @return int A error code. Anything non zero is an error. **/ int mainforth(int argc, char **argv); #ifdef __cplusplus } #endif #endif /** # libforth.c.md @file libforth.c @author Richard James Howe. @copyright Copyright 2015,2016,2017 Richard James Howe. @license MIT @email howe.r.j.89@gmail.com @brief A FORTH library, written in a literate style. ## License The MIT License (MIT) Copyright (c) 2016 Richard James Howe Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ## Introduction This file implements the core Forth interpreter, it is written in portable C99. The file contains a virtual machine that can interpret threaded Forth code and a simple compiler for the virtual machine, which is one of its instructions. The interpreter can be embedded in another application and there should be no problem instantiating multiple instances of the interpreter. For more information about Forth see: * * Thinking Forth by Leo Brodie * Starting Forth by Leo Brodie A glossary of words for FIG FORTH 79: * And the more recent and widespread standard for ANS Forth: * The antecedent of this interpreter: * cxxforth, a literate Forth written in C++ * Jones Forth, a literate Forth written in x86 assembly: * * (backup) Another portable Forth interpreter of written in C: * http://www.softsynth.com/pforth/ * https://github.com/philburk/pforth A Forth processor: * And my Forth processor based on this one: * The repository should also contain: * "readme.md" : a Forth manual, and generic project information * "forth.fth" : basic Forth routines and startup code * "libforth.h" : The header contains the API documentation The structure of this file is as follows: 1) Headers and configuration macros 2) Enumerations and constants 3) Helping functions for the compiler 4) API related functions and Initialization code 5) The Forth virtual machine itself 6) An example main function called **mainforth** and support functions Each section will be explained in detail as it is encountered. An attempt has been made to make this document flow, as both a source code document and as a description of how the Forth kernel works. This is helped by the fact that the program is small and compact without being written in obfuscated C. It is, as mentioned, compact, and can be difficult to understand regardless of code quality. Some of the semantics of Forth will not be familiar to C programmers. A basic understanding of how to use Forth would help as this document is meant to describe how a Forth implementation works and not as an introduction to the language. A quote about the language from Wikipedia best sums the language up: "Forth is an imperative stack-based computer programming language and programming environment. Language features include structured programming, reflection (the ability to modify the program structure during program execution), concatenative programming (functions are composed with juxtaposition) and extensibility (the programmer can create new commands). ... A procedural programming language without type checking, Forth features both interactive execution of commands (making it suitable as a shell for systems that lack a more formal operating system) and the ability to compile sequences of commands for later execution." Forth has a philosophy like most languages, one of simplicity, compactness and of trying only to solve the problem at hand, even going as far as to try to simplify the problem or replace the problem (which may span multiple domains, not just software) with a simpler one. This is often not a realistic way of tackling things and Forth has fallen out of favor, it is nonetheless an interesting language which can be implemented and understood by a single programmer (another integral part of the Forth philosophy). The core of the concept of the language - simplicity I would say - is achieved by the following: 1) The language uses Reverse Polish Notation to enter expressions and parsing is simplified to the extreme with space delimited words and numbers being the most complex terms. This means a abstract syntax tree does not need to be constructed and terms can be executed as soon as they are parsed. The *parser* can described in only a handful of lines of C. 2) The language uses concatenation of Forth words (called functions in other language) to create new words, this allows for small programs to be created and encourages *factoring* definitions into smaller words. 3) The language is untyped. 4) Forth functions, or words, take their arguments implicitly and return variables implicitly via a variable stack which the programmer explicitly interacts with. A comparison of two languages behavior best illustrates the point, we will define a function in C and in Forth that simply doubles a number. In C this would be: int double_number(int x) { return x << 1; } And in Forth it would be: : 2* 1 lshift ; No types are needed, and the arguments and the return values are not stated, unlike in C. Although this has the advantage of brevity, it is now up to the programmer to manages those variables. 5) The input and output facilities are set up and used implicitly as well. Input is taken from **stdin** and output goes to **stdout**, by default. Words that deal with I/O uses these file steams internally. 6) Error handling is traditionally non existent or limited. 7) This point is not a property of the language, but part of the way the Forth programmer must program. The programmer must make their factored word definitions *flow*. Instead of reordering the contents of the stack for each word, words should be made so that the reordering does not have to take place (ie. Manually performing the job of a optimizing compile another common theme in Forth, this time with memory reordering). The implicit behavior relating to argument passing and I/O really reduce program size, the type of implicit behavior built into a language can really define what that language is good for. For example AWK is naturally good for processing text, thanks in large part to sensible defaults for how text is split up into lines and records, and how input and output is already set up for the programmer. An example of this succinctness in AWK is the following program, which can be typed in at the command line. It will read from the standard input if no files are given, and print any lines longer than eighty characters along with the line number of that line: awk '{line++}length > 80 {printf "%04u: %s\n", line, $0}' file.txt ... For more information about AWK see: * * * Forth likewise can achieve succinctness and brevity because of its implicit behavior. Naturally we try to adhere to Forth philosophy, but also to Unix philosophy (which most Forths do not do), this is described later on. Glossary of Terms: VM - Virtual Machine Cell - The Virtual Machines natural Word Size, on a 32 bit machine the Cell will be 32 bits wide Word - In Forth a Word refers to a function, and not the usual meaning of an integer that is the same size as the machines underlying word size, this can cause confusion API - Application Program Interface interpreter - as in byte code interpreter, synonymous with virtual machine. REPL - Read-Evaluate-Print-Loop, this Forth actually provides something more like a "REL", or Read-Evaluate-Loop (as printing has to be done explicitly), but the interpreter is interactive which is the important point RPN - Reverse Polish Notation (see ). The Forth interpreter uses RPN to enter expressions. The stack - Forth implementations have at least two stacks, one for storing variables and another for control flow and temporary variables, when the term *stack* is used on its own and with no other context it refers to the *variable stack* and not the *return stack*. This *variable stack* is used for passing parameters into and return values to functions. Return stack - Most programming languages have a call stack, C has one but not one that the programmer can directly access, in Forth manipulating the return stack is often used. factor - factoring is splitting words into smaller words that perform a specific function. To say a word is a natural factor of another word is to say that it makes sense to take some functionality of the word to be factored and to create a new word that encapsulates that functionality. Forth encourages heavy factoring of definitions. Command mode - This mode executes both compiling words and immediate words as they are encountered Compile mode - This mode executes immediate words as they are encountered, but compiling words are compiled into the dictionary. Primitive - A word whose instruction is built into the VM. **/ /** ## Headers and configurations macros **/ /** This file implements a Forth library, so a Forth interpreter can be embedded in another application, as such a subset of the functions in this file are exported, and are documented in the *libforth.h* header **/ /* #include "libforth.h" */ /** We try to make good use of the C library as even microcontrollers have enough space for a reasonable implementation of it, although it might require some setup. The only time allocations are explicitly done is when the virtual machine image is initialized, after this the VM does not allocate any more memory. **/ #include /* #include */ #define true (0 == 0) #define false (0 == 1) typedef unsigned int bool; #include #include #include #include #include #include #include #include #include /** Traditionally Forth implementations were the only program running on the (micro)computer, running on processors orders of magnitude slower than this one, as such checks to make sure memory access was in bounds did not make sense and the implementation had to have access to the entire machines limited memory. To aide debugging and to help ensure correctness the **ck** macro, a wrapper around the function **check_bounds**, is called for most memory accesses that the virtual machine makes. **/ #ifndef NDEBUG /** @brief This is a wrapper around **check_bounds**, so we do not have to keep typing in the line number, as so the name is shorter (and hence the checks are out of the way visually when reading the code). @param C expression to bounds check @return check index **/ #define ck(C) check_bounds(o, &on_error, (C), __LINE__, o->core_size) /** @brief This is a wrapper around **check_bounds**, so we do not have to keep typing in the line number, as so the name is shorter (and hence the checks are out of the way visually when reading the code). This will check character pointers instead of cell pointers, like **ck** does. @param C expression to bounds check @return checked character index **/ #define ckchar(C) check_bounds(o, &on_error, (C), __LINE__, \ o->core_size * sizeof(f_cell_t)) /** @brief This is a wrapper around **check_depth**, to make checking the depth short and simple. @param DEPTH current depth of the stack **/ #define cd(DEPTH) check_depth(o, &on_error, S, (DEPTH), __LINE__) /** @brief This macro makes sure any dictionary pointers never cross into the stack area. @param DPTR a index into the dictionary @return checked index **/ #define dic(DPTR) check_dictionary(o, &on_error, (DPTR)) /** @brief This macro wraps up the tracing function, which we may want to remove. @param ENV forth environment @param INSTRUCTION instruction being executed @param STK stack pointer @param TOP current top of stack to print out **/ #define TRACE(ENV,INSTRUCTION,STK,TOP) trace(ENV,INSTRUCTION,STK,TOP) #else /** The following are defined only if we remove the checking and the debug code. **/ #define ck(C) (C) #define ckchar(C) (C) #define cd(DEPTH) ((void)DEPTH) #define dic(DPTR) check_dictionary(o, &on_error, (DPTR)) #define TRACE(ENV, INSTRUCTION, STK, TOP) #endif /** @brief When we are reading input to be parsed we need a space to hold that input, the offset to this area is into a field called **m** in **struct forth**, defined later, the offset is a multiple of cells and not chars. **/ #define STRING_OFFSET (32u) /** @brief This defines the maximum length of a Forth words name, that is the string that represents a Forth word. **/ #define MAXIMUM_WORD_LENGTH (32u) /** @brief The minimum stack size of both the variable and return stack, the stack size should not be made smaller than this otherwise the built in code and code in *forth.fth* will not work. **/ #define MINIMUM_STACK_SIZE (64u) /** @brief The start of the dictionary is after the registers and the **STRING_OFFSET**, this is the area where Forth definitions are placed. **/ #define DICTIONARY_START (STRING_OFFSET+MAXIMUM_WORD_LENGTH) /** Later we will encounter a field called **CODE**, a field in every Word definition and is always present in the Words header. This field contains multiple values at different bit offsets, only the lower 16 bits of this cell are ever used. The next macros are helper to extract information from the **CODE** field. **/ /** @brief The bit offset for word length start. **/ #define WORD_LENGTH_OFFSET (8) /** @brief The bit offset for the bit that determines whether a word is a compiling, or an immediate word. **/ #define COMPILING_BIT_OFFSET (15) /** @brief This is the bit that determines whether a word is a compiling word (the bit is set) or an immediate word (the bit is cleared). **/ #define COMPILING_BIT (1u << COMPILING_BIT_OFFSET) /** @brief The lower 5-bits of the upper word are used for the word length **/ #define WORD_MASK (0x1f) /** @brief **WORD_LENGTH** extracts the length of a Forth words name so we know where it is relative to the **PWD** field of a word. @param CODE This should be the **CODE** field of a word **/ #define WORD_LENGTH(CODE) (((CODE) >> WORD_LENGTH_OFFSET) & WORD_MASK) /** @brief Offset for the word hidden bit **/ #define WORD_HIDDEN_BIT_OFFSET (7) /** @brief Test if a word is a **hidden** word, one that is not in the search order for the dictionary. @param CODE field to test **/ #define WORD_HIDDEN(CODE) ((CODE) & 0x80) /** @brief The lower 7 bits of the CODE field are used for the VM instruction, limiting the number of instructions the virtual machine can have in it, the higher bits are used for other purposes. **/ #define INSTRUCTION_MASK (0x7f) /** @brief A mask that the VM uses to extract the instruction. @param k This **CODE**, or a **CODE** Field of a Forth word **/ #define instruction(k) ((k) & INSTRUCTION_MASK) /** @brief **VERIFY** is our assert macro that will always been defined regardless of whether **NDEBUG** is defined. @param X expression to verify **/ /* #define VERIFY(X) do { if (!(X)) { abort(); } } while (0) */ #define VERIFY(X) do { if (!(X)) { exit(1); } } while (0) /** @brief Errno are biased to fall in the range of -256...-511 when the get to the Forth interpreter. **/ #define BIAS_ERRNO (-256) /** @brief Signals numbers are biased to fall in the range of -512...-1024 when they get to the Forth interpreter. **/ #define BIAS_SIGNAL (-512) /** @brief This is a useful function for performing what is in effect a static assert by abusing the language. @param conditions, condition to fail on if true */ #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) /** ## Enumerations and Constants **/ /** This following string is a forth program that gets called when creating a new Forth environment, it is run before the user gets a chance to do anything. The program is kept as small as possible, but is dependent on the virtual machine image being set up correctly with other, basic, constants being defined first, they will be described as they are encountered. Suffice to say, before this program is executed the following happens: 1) The virtual machine image is initialized 2) All the virtual machine primitives are defined 3) All registers are named and some constants defined 4) **;** is defined Of note, words such as **if**, **else**, **then**, and even comments - **(** -, are not actually Forth primitives, there are defined in terms of other Forth words. The Forth interpreter is a simple loop that does the following: Start the interpreter loop <-----------<-----------------<---. Get a space delimited word \ Attempt to look up that word in the dictionary \ Was the word found? ^ |-Yes: | | Are we in compile mode? | | |-Yes: ^ | | \-Is the Word an Immediate word? | | | |-Yes: | | | | \-Execute the word >--------->----------------->----->. | | \-No: | | | \-Compile the word into the dictionary >------->----->. | \-No: | | \-Execute the word >------------->----------------->----->. \-No: ^ \-Can the word be treated as a number? | |-Yes: | | \-Are we in compile mode? | | |-Yes: | | | \-Compile a literal into the dictionary >------>----->. | \-No: | | \-Push the number to the variable stack >------>----->. \-No: | \-An Error has occurred, print out an error message >---->. As you can see, there is not too much too it, however there are still a lot of details left out, such as how exactly the virtual machine executes words and how this loop is formed. A short description of the words defined in **initial_f_program** follows, bear in mind that they depend on the built in primitives, the named registers being defined, as well as **state** and **;**. here - push the current dictionary pointer [ - immediately enter command mode ] - enter compile mode >mark - make a hole in the dictionary and push a pointer to it :noname - make an anonymous word definition, push token to it, the definition is terminated by ';' like normal word definitions. if - immediate word, begin if...else...then clause else - immediate word, optional else clause then - immediate word, end if...else...then clause begin - immediate word, start a begin...until loop until - immediate word, end begin...until loop, jump to matching begin at run time if top of stack is zero. ')' - push a ")" character to the stack ( - begin a Forth comment, terminated by a ) rot - perform stack manipulation: x y z => y z x -rot - perform stack manipulation: x y z => z x y tuck - perform stack manipulation: x y => y x y nip - perform stack manipulation: x y => y allot - allocate space in the dictionary bl - push the space character to the stack space - print a space . - print out current top of stack, followed by a space **/ static const char *initial_f_program = ": smudge pwd @ 1 + dup @ hidden-mask xor swap ! _exit\n" ": (;) ' _exit , 0 state ! _exit\n" ": ; immediate (;) smudge _exit\n" ": : immediate :: smudge _exit\n" ": here h @ ; \n" ": [ immediate 0 state ! ; \n" ": ] 1 state ! ; \n" ": >mark here 0 , ; \n" ": :noname immediate -1 , here dolist , ] ; \n" ": if immediate ' ?branch , >mark ; \n" ": else immediate ' branch , >mark swap dup here swap - swap ! ; \n" ": then immediate dup here swap - swap ! ; \n" ": begin immediate here ; \n" ": until immediate ' ?branch , here - , ; \n" ": ( immediate begin key ')' = until ; \n" ": rot >r swap r> swap ; \n" ": -rot rot rot ; \n" ": tuck swap over ; \n" ": nip swap drop ; \n" ": 2drop drop drop ; \n" ": allot here + h ! ; \n" ": emit _emit drop ; \n" ": space bl emit ; \n" ": evaluate 0 evaluator ; \n" ": . (.) drop space ; \n" ": ? @ . ;\n" ; /** @brief This is a string used in number to string conversion in **number_printer**, which is dependent on the current base. **/ static const char conv[] = "0123456789abcdefghijklmnopqrstuvwxzy"; /** @brief int to **char\*** map for file access methods. **/ enum fams { FAM_WO, /**< write only */ FAM_RO, /**< read only */ FAM_RW, /**< read write */ LAST_FAM /**< marks last file access method */ }; /** @brief These are the file access methods available for use when the virtual machine is up and running, they are passed to the built in primitives that deal with file input and output (such as open-file). @note It might be worth adding more *fams*, which **fopen** can accept. **/ static const char *fams[] = { [FAM_WO] = "wb", [FAM_RO] = "rb", [FAM_RW] = "w+b", NULL }; /** @brief The following are different reactions errors can take when using **longjmp** to a previous **setjump**. **/ enum errors { INITIALIZED, /**< setjmp returns zero if returning directly */ OK, /**< no error, do nothing */ FATAL, /**< fatal error, this invalidates the Forth image */ RECOVERABLE, /**< recoverable error, this will reset the interpreter */ }; /** We can serialize the Forth virtual machine image, saving it to disk so we can load it again later. When saving the image to disk it is important to be able to identify the file somehow, and to identify properties of the image. Unfortunately each image is not portable to machines with different cell sizes (determined by "sizeof(f_cell_t)") and different endianess, and it is not trivial to convert them due to implementation details. **enum header** names all of the different fields in the header. The first four fields (**MAGIC0**...**MAGIC3**) are magic numbers which identify the file format, so utilities like *file* on Unix systems can differentiate binary formats from each other. **CELL_SIZE** is the size of the virtual machine cell used to create the image. **VERSION** is used to both represent the version of the Forth interpreter and the version of the file format. **ENDIAN** is the endianess of the VM **MAGIC7** is the last magic number. When loading the image the magic numbers are checked as well as compatibility between the saved image and the compiled Forth interpreter. **/ enum header { /**< Forth header description enum */ MAGIC0, /**< Magic number used to identify file type */ MAGIC1, /**< Magic number ... */ MAGIC2, /**< Magic number ... */ MAGIC3, /**< Magic number ... */ CELL_SIZE, /**< Size of a Forth cell, or virtual machine word */ VERSION, /**< Version of the image */ ENDIAN, /**< Endianess of the interpreter */ LOG2_SIZE, /**< Log-2 of the size */ MAX_HEADER_FIELD }; /** The header itself, this will be copied into the **f_t** structure on initialization, the **ENDIAN** field is filled in then as it seems impossible to determine the endianess of the target at compile time. **/ static const uint8_t header[MAX_HEADER_FIELD] = { [MAGIC0] = 0xFF, [MAGIC1] = '4', [MAGIC2] = 'T', [MAGIC3] = 'H', [CELL_SIZE] = sizeof(f_cell_t), [VERSION] = F_CORE_VERSION, [ENDIAN] = -1, [LOG2_SIZE] = -1 }; /** @brief The main structure used by the virtual machine is **f_t**. The structure is defined here and not in the header to hide the implementation details it, all API functions are passed an opaque pointer to the structure (see ). Only three fields are serialized to the file saved to disk: 1) **header** 2) **core_size** 3) **m** And they are done so in that order, **core_size** and **m** are save in whatever endianess the machine doing the saving is done in, however **core_size** is converted to a **uint64_t** before being save to disk so it is not of a variable size. **m** is a flexible array member **core_size** number of members. The **m** field is the virtual machines working memory, it has its own internal structure which includes registers, stacks and a dictionary of defined words. The **m** field is laid out as follows, assuming the size of the virtual machine is 32768 cells big: .-----------------------------------------------. | 0-3F | 40-7BFF |7C00-7DFF|7E00-7FFF| .-----------------------------------------------. | Registers | Dictionary... | V stack | R stack | .-----------------------------------------------. V stack = The Variable Stack R stack = The Return Stack The dictionary has its own complex structure, and it always starts just after the registers. It includes scratch areas for parsing words, start up code and empty space yet to be consumed before the variable stack. The sizes of the variable and returns stack change depending on the virtual machine size. The structures within the dictionary will be described later on. In the following structure, **struct forth**, values marked with a '~~' are serialized, the serialization takes place in order. Values are written out as they are. The size of the Forth memory core gets stored in the header, the size must be a power of two, so its binary logarithm can be stored in a single byte. **/ struct forth { /**< FORTH environment */ uint8_t header[sizeof(header)]; /**< ~~ header for core file */ f_cell_t core_size; /**< size of VM */ uint8_t *s; /**< convenience pointer for string input buffer */ f_cell_t *S; /**< stack pointer */ f_cell_t *vstart;/**< index into m[] where variable stack starts*/ f_cell_t *vend; /**< index into m[] where variable stack ends*/ const struct f_functions *calls; /**< functions for CALL instruction */ int unget; /**< single character of push back */ bool unget_set; /**< character is in the push back buffer? */ size_t line; /**< count of new lines read in */ f_cell_t m[]; /**< ~~ Forth Virtual Machine memory */ }; /** @brief This enumeration describes the possible actions that can be taken when an error occurs, by setting the right register value it is possible to make errors halt the interpreter straight away, or even to make it invalidate the core. This does not override the behavior of the virtual machine when it detects an error that cannot be recovered from, only when it encounters an error such as a divide by zero or a word not being found, not when the virtual machine executes and invalid instruction (which should never normally happen unless something has been corrupted). **/ enum actions_on_error { ERROR_RECOVER, /**< recover when an error happens, like a call to ABORT */ ERROR_HALT, /**< halt on error */ ERROR_INVALIDATE, /**< halt on error and invalid the Forth interpreter */ }; /** @brief There are a small number of registers available to the virtual machine, they are actually indexes into the virtual machines main memory, this is so that the programs running on the virtual machine can access them. There are other registers that are in use that the virtual machine cannot access directly (such as the program counter or instruction pointer). Some of these registers correspond directly to well known Forth concepts, such as the dictionary and return stack pointers, others are just implementation details. X-Macros are an unusual but useful method of making tables of data. We use this to store the registers name, it's address within the virtual machine and the enumeration for it. More information about X-Macros can be found here: * * * **/ #define XMACRO_REGISTERS \ X("h", DIC, 6, "dictionary pointer")\ X("r", RSTK, 7, "return stack pointer")\ X("state", STATE, 8, "interpreter state")\ X("base", BASE, 9, "base conversion variable")\ X("pwd", PWD, 10, "pointer to previous word")\ X("`source-id", SOURCE_ID, 11, "input source selector")\ X("`sin", SIN, 12, "string input pointer")\ X("`sidx", SIDX, 13, "string input index")\ X("`slen", SLEN, 14, "string input length")\ X("`start-address", START_ADDR, 15, "pointer to start of VM")\ X("`fin", FIN, 16, "file input pointer")\ X("`fout", FOUT, 17, "file output pointer")\ X("`stdin", STDIN, 18, "file pointer to stdin")\ X("`stdout", STDOUT, 19, "file pointer to stdout")\ X("`stderr", STDERR, 20, "file pointer to stderr")\ X("`argc", ARGC, 21, "argument count")\ X("`argv", ARGV, 22, "arguments")\ X("`debug", DEBUG, 23, "turn debugging on/off if enabled")\ X("`invalid", INVALID, 24, "non-zero on serious error")\ X("`top", TOP, 25, "*stored* version of top of stack")\ X("`instruction", INSTRUCTION, 26, "start up instruction")\ X("`stack-size", STACK_SIZE, 27, "size of the stacks")\ X("`error-handler", ERROR_HANDLER, 28, "actions to take on error")\ X("`handler", THROW_HANDLER, 29, "exception handler is stored here")\ X("`signal", SIGNAL_HANDLER, 30, "signal handler")\ X("`x", SCRATCH_X, 31, "scratch variable x") /** @brief The virtual machine registers used by the Forth virtual machine. **/ enum registers { #define X(NAME, ENUM, VALUE, HELP) ENUM = VALUE, XMACRO_REGISTERS #undef X }; static const char *register_names[] = { /**< names of VM registers */ #define X(NAME, ENUM, VALUE, HELP) NAME, XMACRO_REGISTERS #undef X NULL }; /** @brief The enum **input_stream** lists values of the **SOURCE_ID** register. Input in Forth systems traditionally (tradition is a word we will keep using here, generally in the context of programming it means justification for cruft) came from either one of two places, the keyboard that the programmer was typing at, interactively, or from some kind of non volatile store, such as a floppy disk. Our C program has no portable way of interacting directly with the keyboard, instead it could interact with a file handle such as **stdin**, or read from a string. This is what we do in this interpreter. A word in Forth called **SOURCE-ID** can be used to query what the input device currently is, the values expected are zero for interactive interpretation, or minus one (minus one, or all bits set, is used to represent truth conditions in most Forths, we are a bit more liberal in our definition of true) for string input. These are the possible values that the **SOURCE_ID** register can take. The **SOURCE-ID** word, defined in *forth.fth*, then does more processing of this word. Note that the meaning is slightly different in our Forth to what is meant traditionally, just because this program is taking input from **stdin** (or possibly another file handle), does not mean that this program is being run interactively, it could possibly be part of a Unix pipe, which is the reason the interpreter defaults to being as silent as possible. **/ enum input_stream { FILE_IN, /**< file input; this could be interactive input */ STRING_IN = -1 /**< string input */ }; /** @brief **enum instructions** contains each virtual machine instruction, a valid instruction is less than LAST. One of the core ideas of Forth is that given a small set of primitives it is possible to build up a high level language, given only these primitives it is possible to add conditional statements, case statements, arrays and strings, even though they do not exist as instructions here. Most of these instructions are simple (such as; pop two items off the variable stack, add them and push the result for **ADD**) however others are a great deal more complex and will require paragraphs to explain fully (such as **READ**, or how **IMMEDIATE** interacts with the virtual machines execution). The instruction name, enumeration and a help string, are all stored with an X-Macro. Some of these words are not necessary, that is they can be implemented in Forth, but they are useful to have around when the interpreter starts up for debugging purposes (like **pnum**). **/ #define XMACRO_INSTRUCTIONS\ X(0, PUSH, "push", " -- u : push a literal")\ X(0, CONST, "const", " -- u : push a literal")\ X(0, RUN, "run", " -- : run a Forth word")\ X(0, DEFINE, "define", " -- : make new Forth word, set compile mode")\ X(0, IMMEDIATE, "immediate", " -- : make a Forth word immediate")\ X(0, READ, "read", " c\" xxx\" -- : read Forth word, execute it")\ X(1, LOAD, "@", "addr -- u : load a value")\ X(2, STORE, "!", "u addr -- : store a value")\ X(1, CLOAD, "c@", "c-addr -- u : load character value")\ X(2, CSTORE, "c!", "u c-addr -- : store character value")\ X(2, SUB, "-", "u1 u2 -- u3 : subtract u2 from u1 yielding u3")\ X(2, ADD, "+", "u u -- u : add two values")\ X(2, AND, "and", "u u -- u : bitwise and of two values")\ X(2, OR, "or", "u u -- u : bitwise or of two values")\ X(2, XOR, "xor", "u u -- u : bitwise exclusive or of two values")\ X(1, INV, "invert", "u -- u : invert bits of value")\ X(2, SHL, "lshift", "u1 u2 -- u3 : left shift u1 by u2")\ X(2, SHR, "rshift", "u1 u2 -- u3 : right shift u1 by u2")\ X(2, MUL, "*", "u u -- u : multiply to values")\ X(2, DIV, "/", "u1 u2 -- u3 : divide u1 by u2 yielding u3")\ X(2, ULESS, "u<", "u u -- bool : unsigned less than")\ X(2, UMORE, "u>", "u u -- bool : unsigned greater than")\ X(0, EXIT, "exit", " -- : return from a word definition")\ X(0, KEY, "key", " -- char : get one character of input")\ X(1, EMIT, "_emit", " char -- status : get one character of input")\ X(0, FROMR, "r>", " -- u, R: u -- : move from return stack")\ X(1, TOR, ">r", "u --, R: -- u : move to return stack")\ X(0, BRANCH, "branch", " -- : unconditional branch")\ X(1, QBRANCH, "?branch", "u -- : branch if u is zero")\ X(1, PNUM, "(.)", "u -- n : print a number returning an error on failure")\ X(1, COMMA, ",", "u -- : write a value into the dictionary")\ X(2, EQUAL, "=", "u u -- bool : compare two values for equality")\ X(2, SWAP, "swap", "x1 x2 -- x2 x1 : swap two values")\ X(1, DUP, "dup", "u -- u u : duplicate a value")\ X(1, DROP, "drop", "u -- : drop a value")\ X(2, OVER, "over", "x1 x2 -- x1 x2 x1 : copy over a value")\ X(0, TAIL, "tail", " -- : tail recursion")\ X(0, FIND, "find", "c\" xxx\" -- addr | 0 : find a Forth word")\ X(0, DEPTH, "depth", " -- u : get current stack depth")\ X(0, SPLOAD, "sp@", " -- addr : load current stack pointer ")\ X(0, SPSTORE, "sp!", " addr -- : modify the stack pointer")\ X(0, CLOCK, "clock", " -- u : push a time value")\ X(3, EVALUATOR, "evaluator", "c-addr u 0 | file-id 0 1 -- u : evaluate file/str")\ X(0, PSTK, ".s", " -- : print out values on the stack")\ X(1, RESTART, "restart", " error -- : restart system, cause error")\ X(0, CALL, "call", "n1...nn c -- n1...nn c : call a function")\ X(2, SYSTEM, "system", "c-addr u -- bool : execute system command")\ X(1, FCLOSE, "close-file", "file-id -- ior : close a file")\ X(3, FOPEN, "open-file", "c-addr u fam -- open a file")\ X(2, FDELETE, "delete-file", "c-addr u -- ior : delete a file")\ X(3, FREAD, "read-file", "c-addr u file-id -- u ior : write block")\ X(3, FWRITE, "write-file", "c-addr u file-id -- u ior : read block")\ X(1, FPOS, "file-position", "file-id -- u : get the file position")\ X(2, FSEEK, "reposition-file", "file-id u -- ior : reposition file")\ X(1, FFLUSH, "flush-file", "file-id -- ior : flush a file")\ X(4, FRENAME, "rename-file", "c-addr1 u1 c-addr2 u2 -- ior : rename file")\ X(0, TMPFILE, "temporary-file", "-- file-id ior : open a temporary file")\ X(1, RAISE, "raise", "signal -- bool : raise a signal")\ X(0, DATE, "date", " -- date : push the time")\ X(3, MEMMOVE, "memory-copy", " r-addr1 r-addr2 u -- : move a block of memory from r-addr2 to r-addr1")\ X(3, MEMCHR, "memory-locate", " r-addr char u -- r-addr | 0 : locate a character memory")\ X(3, MEMSET, "memory-set", " r-addr char u -- : set a block of memory")\ X(3, MEMCMP, "memory-compare", " r-addr1 r-addr2 u -- u : compare two blocks of memory")\ X(1, ALLOCATE, "allocate", " u -- r-addr ior : allocate a block of memory")\ X(1, FREE, "free", " r-addr1 -- ior : free a block of memory")\ X(2, RESIZE, "resize", " r-addr u -- r-addr ior : resize a block of memory")\ X(2, GETENV, "getenv", " c-addr u -- r-addr u : return an environment variable")\ X(1, BYE, "(bye)", " u -- : bye, bye!")\ X(0, LAST_INSTRUCTION, NULL, "") /** @brief All of the instructions that can be used by the Forth virtual machine. **/ enum instructions { #define X(STACK, ENUM, STRING, HELP) ENUM, XMACRO_INSTRUCTIONS #undef X }; /** So that we can compile programs we need ways of referring to the basic programming constructs provided by the virtual machine, theses words are fed into the C function **compile** in a process described later. **LAST_INSTRUCTION** is not an instruction, but only a marker of the last enumeration used in **enum instructions**, so it does not get a name. **/ static const char *instruction_names[] = { /**< instructions with names */ #define X(STACK, ENUM, STRING, HELP) STRING, XMACRO_INSTRUCTIONS #undef X }; /** This contains an array of values that are the minimum number of values needed on the stack before a word can execute. **/ static const int stack_bounds[] = { /**< number stack variables needed*/ #define X(STACK, ENUM, STRING, HELP) STACK, XMACRO_INSTRUCTIONS #undef X }; /** This X-Macro contains a list of constants that will be available to the Forth interpreter. **/ #define X_MACRO_CONSTANTS\ X("dictionary-start", DICTIONARY_START, "start of dictionary")\ X("r/o", FAM_RO, "read only file access method")\ X("r/w", FAM_RW, "read/write file access method")\ X("w/o", FAM_WO, "write only file access method")\ X("size", sizeof(f_cell_t), "size of forth cell in bytes")\ X("#tib", MAXIMUM_WORD_LENGTH * sizeof(f_cell_t), "")\ X("tib", STRING_OFFSET * sizeof(f_cell_t), "")\ X("SIGABRT", -SIGABRT+BIAS_SIGNAL, "SIGABRT value")\ X("SIGFPE", -SIGFPE +BIAS_SIGNAL, "SIGFPE value")\ X("SIGILL", -SIGILL +BIAS_SIGNAL, "SIGILL value")\ X("SIGINT", -SIGINT +BIAS_SIGNAL, "SIGINT value")\ X("SIGSEGV", -SIGSEGV+BIAS_SIGNAL, "SIGSEGV value")\ X("SIGTERM", -SIGTERM+BIAS_SIGNAL, "SIGTERM value")\ X("bias-signal", BIAS_SIGNAL, "bias added to signals")\ X("bias-errno", BIAS_ERRNO, "bias added to errnos")\ X("instruction-mask", INSTRUCTION_MASK, "instruction mask for CODE field")\ X("word-mask", WORD_MASK, "word length mask for CODE field")\ X("hidden-bit", WORD_HIDDEN_BIT_OFFSET, "hide bit in CODE field")\ X("hidden-mask", 1u << WORD_HIDDEN_BIT_OFFSET, "hide mask for CODE ")\ X("compile-bit", COMPILING_BIT_OFFSET, "compile/immediate bit in CODE field")\ X("dolist", RUN, "instruction for executing a words body")\ X("dolit", 2, "location of fake word for pushing numbers")\ X("doconst", CONST, "instruction for pushing a constant")\ X("bl", ' ', "space character")\ X("')'", ')', "')' character")\ X("lf", '\n', "newline (also causes stdout flush on CMS")\ X("cell", 1, "space a single cell takes up") /** @brief A structure that contains a constant to be added to the Forth environment by **f_init**. A constants name, like any other Forth word, should be shorter than MAXIMUM_WORD_LENGTH. **/ static struct constants { const char *name; /**< constants name */ f_cell_t value; /**< value of the named constant */ } constants[] = { #define X(NAME, VALUE, DESCRIPTION) { NAME, (VALUE) }, X_MACRO_CONSTANTS #undef X { NULL, 0 } }; /** ## Helping Functions For The Compiler **/ static int ferrno(void) { /**@note The VM should only see biased error numbers */ return errno ? (-errno) + BIAS_ERRNO : 0; } const char *f_strerror(void) { static const char *unknown = "unknown reason"; const char *r = errno ? strerror(errno) : unknown; if (!r) r = unknown; return r; } int f_logger(const char *prefix, const char *func, unsigned line, const char *fmt, ...) { int r; va_list ap; assert(prefix); assert(func); assert(fmt); fprintf(stderr, "[%s %u] %s: ", func, line, prefix); va_start(ap, fmt); r = vfprintf(stderr, fmt, ap); va_end(ap); fputc('\n', stderr); return r; } /** @brief Get a char from string input or a file @param o forth image containing information about current input stream @return int same value as fgetc or getchar This Forth interpreter only has a limited number of mechanisms for I/O, one of these is to fetch an individual character of input from either a string or a file which can be set either with knowledge of the implementation from within the virtual machine, or via the API presented to the programmer. The C functions **f_init**, **f_s_f_input** and **f_set_string_input** set up and manipulate the input of the interpreter. These functions act on the following registers: SOURCE_ID - The current input source (SIN or FIN) SIN - String INput SIDX - String InDeX SLEN - String LENgth FIN - File INput Note that either SIN or FIN might not both be valid, one will be but the other might not, this makes manipulating these values hazardous. The input functions **f_get_char** and **f_ge\t_word** both take their input streams implicitly via the registers contained within the Forth execution environment passed in to those functions. @note If the Forth interpreter is blocking, waiting for input, and a signal occurs an EOF might be returned. This should be translated into a 'throw', but it is not handled yet. **/ static int f_get_char(f_t *o) { assert(o); int r = 0; if (o->unget_set) { o->unget_set = false; return o->unget; } /* BWT note("get_char on source: %d FILE %d, STRING %d\n", (int)o->m[SOURCE_ID], FILE_IN, STRING_IN); */ switch ((int)o->m[SOURCE_ID]) { case FILE_IN: r = fgetc((FILE*)(o->m[FIN])); break; case STRING_IN: r = o->m[SIDX] >= o->m[SLEN] ? EOF : ((char*)(o->m[SIN]))[(int)o->m[SIDX]++]; break; default: r = EOF; } if (r == '\n') o->line++; return r; } /** @brief Push back a single character into the input buffer. @param o initialized Forth environment @param ch character to push back @return ch on success, negative on failure, of it EOF was pushed back **/ static int f_unget_char(f_t *o, int ch) { assert(o); if (o->unget_set) return -1; o->unget_set = true; o->unget = ch; return o->unget; } /** @brief get a word (space delimited, up to 31 chars) from a FILE\* or string-in @param o initialized Forth environment. @param p pointer to string to write into @param length maximum length of string to get @return int 0 on success, -1 on failure (EOF) This function reads in a space delimited word, limited to **MAXIMUM_WORD_LENGTH**, the word is put into the pointer **\*p**, due to the simple nature of Forth this is as complex as parsing and lexing gets. It can either read from a file handle or a string, like f_get_char() **/ static int pages = 1; static int f_get_word(f_t *o, uint8_t *s, f_cell_t length) { int ch; /* BWT int every = 80; */ /* BWT */ FILE *p = (FILE*)o->m[FIN]; memset(s, 0, length); for (;;) { ch = f_get_char(o); /* BWT printf("f_get_char: %d, ", ch); */ /* BWT if (ch == EOF || !ch) printf("f_get_char EOF: %d err: %d\n", ch, ferror((FILE*)o->m[FIN])); */ /* BWT if (ch == '\n' || !ch) printf("f_get_char: %d err: %d\n", ch, ferror((FILE*)o->m[FIN])); */ /* BWT */ if ((o->m[SOURCE_ID] == FILE_IN) && ((FILE*)o->m[FIN] == stdin) && (ch == '\n' || !ch)) fputc('\n', stdout); if (ch == EOF || !ch) return -1; if (!isspace(ch)) break; } s[0] = ch; /* BWT printf("%c", ch); if (!every--) { every = 80; fputc('\n', stdout); } */ size_t i; for (i = 1; i < (length - 1); i++) { ch = f_get_char(o); /* BWT if (ch == '\n' || !ch) printf("f_get_char: %d err: %d\n", ch, ferror((FILE*)o->m[FIN])); */ if (ch == EOF || isspace(ch) || !ch) goto unget; s[i] = ch; /* BWT printf("%c", ch); if (!every--) { every = 80; fputc('\n', stdout); } */ } /* BWT fputc('\n', stdout); */ return 0; unget: /* BWT */ /* if (ftell(p) > pages * 2000) { pages++; note(" [ungetc] %d '%c' eof: %d err: %d pos: %d", ch, ch, feof(p), ferror(p), ftell(p)); } */ /* BWT */ f_unget_char(o, ch); return 0; } /** @brief Compile a Forth word header into the dictionary @param o Forth environment to do the compilation in @param code virtual machine instruction for that word @param str name of Forth word @return returns a pointer to the code field of the word just defined The function **compile** is not that complicated in itself, however it requires an understanding of the structure of a Forth word definition and the behavior of the Forth run time. In all Forth implementations there exists a concept of *the dictionary*, although they may be implemented in different ways the usual way is as a linked list of words, starting with the latest defined word and ending with a special terminating value. Words cannot be arbitrarily deleted, deletions have to occur in the reverse order that they are defined. Each word or Forth function that has been defined can be looked up in this dictionary, and dependent on whether it is an immediate word or a compiling word, and whether we are in command or compile mode different actions are taken when we have found the word we are looking for in our Read-Evaluate-Loop. | <-- Start of VM memory | | <-- Start of dictionary | | .------------. | .------. .------. .-------------. | Terminator | <---- | Word | <--- | Word | < -- ... -- | Latest Word | .------------. | .------. .------. .-------------. | ^ | | | PWD Register The **PWD** registers points to the latest defined word, a search starts from here and works it way backwards (allowing us replace old definitions by appending new ones with the same name), the terminator 'value' is actually any value that points before the beginning of the dictionary. Our word header looks like this: .-----------.-----.------.------------. | Word Name | PWD | CODE | Data Field | .-----------.-----.------.------------. * The **Data Field** is optional and is of variable length. * **Word Name** is a variable length field whose size is recorded in the CODE field. And the **CODE** field is a composite field, to save space, containing a virtual machine instruction, the hidden bit, the compiling bit, and the length of the Word Name string as an offset in cells from **PWD** field. The field looks like this: .---------------.------------------.------------.-------------. | 15 | 14 ........... 8 | 9 | 7 ....... 0 | | Compiling Bit | Word Name Size | Hidden Bit | Instruction | .---------------.------------------.------------.-------------. The maximum value for the Word Name field is determined by the width of the Word Name Size field. The hidden bit is not used in the **compile** function, but is used elsewhere (in **f_find**) to hide a word definition from the word search. The hidden bit is not set within this program at all, however it can be set by a running Forth virtual machine (and it is, if desired). The compiling bit tells the text interpreter/compiler what to do with the word when it is read in from the input, if set it will be compiled into the dictionary if in compile mode and in command mode it will be executed, if it is cleared the word will always be executed. The instruction is the virtual machine instruction that is to be executed by the interpreter. **/ static f_cell_t compile(f_t *o, f_cell_t code, const char *str, f_cell_t compiling, f_cell_t hide) { assert(o && code < LAST_INSTRUCTION); f_cell_t *m = o->m, head = m[DIC], l = 0, cf = 0; /*FORTH header structure */ /*Copy the new FORTH word into the new header */ strcpy((char *)(o->m + head), str); /* align up to size of cell */ l = strlen(str) + 1; l = (l + (sizeof(f_cell_t) - 1)) & ~(sizeof(f_cell_t) - 1); l = l/sizeof(f_cell_t); m[DIC] += l; /* Add string length in words to header (STRLEN) */ m[m[DIC]++] = m[PWD]; /*0 + STRLEN: Pointer to previous words header */ m[PWD] = m[DIC] - 1; /*Update the PWD register to new word */ /*size of words name and code field*/ assert(l < WORD_MASK); cf = m[DIC]; m[m[DIC]++] = ((!!compiling) << COMPILING_BIT_OFFSET) | (l << WORD_LENGTH_OFFSET) | (hide << WORD_HIDDEN_BIT_OFFSET) | code; return cf; } /** @brief This function turns a string into a number using a base and returns an error code to indicate success or failure, the results of the conversion are stored in **n**, even if the conversion failed. **/ int f_string_to_cell(int base, f_cell_t *n, const char *s) { char *end = NULL; errno = 0; *n = strtol(s, &end, base); return errno || *s == '\0' || *end != '\0'; } /** @brief Forths are usually case insensitive and are required to be (or at least accept only uppercase characters only) by the majority of the standards for Forth. As an aside I do not believe case insensitivity is a good idea as it complicates interfaces and creates as much confusion as it tries to solve (not only that, but different case letters do convey information). However, in keeping with other implementations, this Forth is also made insensitive to case **DUP** is treated the same as **dup** and **Dup**. This comparison function, **istrcmp**, is only used in one place however, in the C function **f_find**, replacing it with **strcmp** will bring back the more logical, case sensitive, behavior. @param a first string to compare @param b second string @return int same as **strcmp**, only case insensitive **/ static int istrcmp(const char *a, const char *b) { for (; ((*a == *b) || (tolower(*a) == tolower(*b))) && *a && *b; a++, b++) ; return tolower(*a) - tolower(*b); } /** The **match** function returns true if the word is not hidden and if a case sensitive case sensitive has succeeded. **/ static int match(f_cell_t *m, f_cell_t pwd, const char *s) { f_cell_t len = WORD_LENGTH(m[pwd + 1]); return !WORD_HIDDEN(m[pwd+1]) && !istrcmp(s, (char*)(&m[pwd-len])); } /** **f_find** finds a word in the dictionary and if it exists it returns a pointer to its **PWD** field. If it is not found it will return zero, also of notes is the fact that it will skip words that are hidden, that is the hidden bit in the **CODE** field of a word is set. The structure of the dictionary has already been explained, so there should be no surprises in this word. Any improvements to the speed of this word would speed up the text interpreter a lot, but not the virtual machine in general. **/ f_cell_t f_find(f_t *o, const char *s) { f_cell_t *m = o->m, pwd = m[PWD]; #ifdef USE_FAST_FIND /* This implements a self organizing list, which speeds * up the searching of words (which has been profiled), however * it does not interact well with Forth words like "marker", so * it is optional. This method uses transposition, move to * front has not been tested. * * See: https://en.wikipedia.org/wiki/Self-organizing_list */ f_cell_t grandparent = pwd, parent = pwd; for (;pwd > DICTIONARY_START && !match(m, pwd, s);) { grandparent = parent; parent = pwd; pwd = m[pwd]; } if (pwd > DICTIONARY_START && parent != m[PWD]) { /* found - transpose it */ m[grandparent] = pwd; /* grandparent = current */ m[parent] = m[pwd]; /* parent = current next */ m[pwd] = parent; /* new next = parent */ } #else for (;pwd > DICTIONARY_START && !match(m, pwd, s);) pwd = m[pwd]; #endif return pwd > DICTIONARY_START ? pwd + 1 : 0; } /** @brief Print a number in a given base to an output stream @param o initialized forth environment @param out output file stream @param u number to print @return number of characters written, or negative on failure **/ static int print_cell(f_t *o, FILE *out, f_cell_t u) { int i = 0, r = 0; char s[64 + 1] = {0}; unsigned base = o->m[BASE]; base = base != 0 ? base : 10 ; if (base >= 37) return -1; if (base == 10) return fprintf(out, "%"PRIdCell, u); do s[i++] = conv[u % base]; while ((u /= base)); for (r = --i; i >= 0; i--) if (fputc(s[i], out) != s[i]) return -1; return r; } /** **check_bounds** is used to both check that a memory access performed by the virtual machine is within range and as a crude method of debugging the interpreter (if it is enabled). The function is not called directly but is instead wrapped in with the **ck** macro, it can be removed with compile time defines, removing the check and the debugging code. **/ static f_cell_t check_bounds(f_t *o, jmp_buf *on_error, f_cell_t f, unsigned line, f_cell_t bound) { if (o->m[DEBUG] >= F_DEBUG_CHECKS) debug("0x%"PRIxCell " %u", f, line); if (f >= bound) { fatal("bounds check failed (%"PRIdCell" >= %zu) C line %u Forth Line %zu", f, (size_t)bound, line, o->line); longjmp(*on_error, /* BWT FATAL */ RECOVERABLE); } return f; } /** **check_depth** is used to check that there are enough values on the stack before an operation takes place. It is wrapped up in the **cd** macro. **/ static void check_depth(f_t *o, jmp_buf *on_error, f_cell_t *S, f_cell_t expected, unsigned line) { if (o->m[DEBUG] >= F_DEBUG_CHECKS) debug("0x%"PRIxCell " %u", (f_cell_t)(S - o->vstart), line); if ((uintptr_t)(S - o->vstart) < expected) { error("stack underflow %p -> %u (line %zu)", S - o->vstart, line, o->line); longjmp(*on_error, RECOVERABLE); } else if (S > o->vend) { error("stack overflow %p -> %u (line %zu)", S - o->vend, line, o->line); longjmp(*on_error, RECOVERABLE); } } /** Check that the dictionary pointer does not go into the stack area: **/ static f_cell_t check_dictionary(f_t *o, jmp_buf *on_error, f_cell_t dptr) { if ((o->m + dptr) >= (o->vstart)) { fatal("dictionary pointer is in stack area %"PRIdCell, dptr); f_invalidate(o); longjmp(*on_error, /* BWT FATAL */ RECOVERABLE); } return dptr; } /** This checks that a Forth string is *NUL* terminated, as required by most C functions, which should be the last character in string (which is s+end). There is a bit of a mismatch between Forth strings (which are pointer to the string and a length) and C strings, which a pointer to the string and are *NUL* terminated. This function helps to correct that. **/ static void check_is_asciiz(jmp_buf *on_error, char *s, f_cell_t end) { if (*(s + end) != '\0') { error("not an ASCIIZ string at %p", s); longjmp(*on_error, RECOVERABLE); } } /** This function gets a string off the Forth stack, checking that the string is *NUL* terminated. It is a helper function used when a Forth string has to be converted to a C string so it can be passed to a C function. **/ static char *f_get_string(f_t *o, jmp_buf *on_error, f_cell_t **S, f_cell_t f) { f_cell_t length = f + 1; char *string = ((char*)o->m) + **S; (*S)--; check_is_asciiz(on_error, string, length); return string; } /** Forth file access methods (or *fam*s) must be held in a single cell, this requires a method of translation from this cell into a string that can be used by the C function **fopen** **/ static const char* f_get_fam(jmp_buf *on_error, f_cell_t f) { if (f >= LAST_FAM) { error("Invalid file access method %"PRIdCell, f); longjmp(*on_error, RECOVERABLE); } return fams[f]; } /** This prints out the Forth stack, which is useful for debugging. **/ static void print_stack(f_t *o, FILE *out, f_cell_t *S, f_cell_t f) { f_cell_t depth = (f_cell_t)(S - o->vstart); fprintf(out, "%"PRIdCell": ", depth); if (!depth) return; f_cell_t j; int i; for (j = (S - o->vstart), i = 1; i < j; i++) { print_cell(o, out, *(o->S + i + 1)); fputc(' ', out); } print_cell(o, out, f); fputc(' ', out); } /** This function allows for some more detailed tracing to take place, reading the logs is difficult, but it can provide *some* information about what is going on in the environment. This function will be compiled out if **NDEBUG** is defined by the C preprocessor. **/ static void trace(f_t *o, f_cell_t instruction, f_cell_t *S, f_cell_t f) { if (o->m[DEBUG] < F_DEBUG_INSTRUCTION) return; fprintf(stderr, "\t( %s\t ", instruction_names[instruction]); print_stack(o, stderr, S, f); fputs(" )\n", stderr); } /** ## API related functions and Initialization code **/ void f_s_f_input(f_t *o, FILE *in) { assert(o); assert(in); o->unget_set = false; /* discard character of push back */ o->m[SOURCE_ID] = FILE_IN; o->m[FIN] = (f_cell_t)in; /* BWT printf("o: %p, m[FIN]: %p\n", o, o->m[FIN]); */ } void f_s_f_output(f_t *o, FILE *out) { assert(o); assert(out); o->m[FOUT] = (f_cell_t)out; } void f_set_block_input(f_t *o, const char *s, size_t length) { assert(o); assert(s); o->unget_set = false; /* discard character of push back */ o->m[SIDX] = 0; /* m[SIDX] == start of string input */ o->m[SLEN] = length; /* m[SLEN] == string len */ o->m[SOURCE_ID] = STRING_IN; /* read from string, not a file handle */ o->m[SIN] = (f_cell_t)s; /* sin == pointer to string input */ } void f_set_string_input(f_t *o, const char *s) { assert(s); f_set_block_input(o, s, strlen(s) + 1); } int f_eval_block(f_t *o, const char *s, size_t length) { assert(o); assert(s); f_set_block_input(o, s, length); return f_run(o); } int f_eval(f_t *o, const char *s) { assert(o); assert(s); f_set_string_input(o, s); return f_run(o); } int f_define_constant(f_t *o, const char *name, f_cell_t c) { assert(o); assert(name); compile(o, CONST, name, true, false); if (strlen(name) >= MAXIMUM_WORD_LENGTH) return -1; if (o->m[DIC] + 1 >= o->core_size) return -1; o->m[o->m[DIC]++] = c; return 0; } void f_set_args(f_t *o, int argc, char **argv) { /* currently this is of little use to the interpreter */ assert(o); o->m[ARGC] = argc; o->m[ARGV] = (f_cell_t)argv; } int f_is_invalid(f_t *o) { assert(o); return !!(o->m[INVALID]); } void f_invalidate(f_t *o) { assert(o); o->m[INVALID] = 1; } void f_set_debug_level(f_t *o, enum f_debug_level level) { assert(o); o->m[DEBUG] = level; } FILE *f_fopen_or_die(const char *name, char *mode) { FILE *file; assert(name); assert(mode); errno = 0; file = fopen(name, mode); if (!file) { fatal("opening file \"%s\" => %s", name, f_strerror()); exit(EXIT_FAILURE); } /* BWT note("file: %s, %p", name, file); */ return file; } /** @brief This function defaults all of the registers in a Forth environment and sets up the input and output streams. @param o the forth environment to set up @param size the size of the **m** field in **o** @param in the input file @param out the output file **f_make_default** default is called by **f_init** and **flc_file**, it is a routine which deals that sets up registers for the virtual machines memory, and especially with values that may only be valid for a limited period (such as pointers to **stdin**). **/ static void f_make_default(f_t *o, size_t size, FILE *in, FILE *out) { assert(o && size >= MINIMUM_CORE_SIZE && in && out); o->core_size = size; o->m[STACK_SIZE] = size / MINIMUM_STACK_SIZE > MINIMUM_STACK_SIZE ? size / MINIMUM_STACK_SIZE : MINIMUM_STACK_SIZE; o->s = (uint8_t*)(o->m + STRING_OFFSET); /*skip registers*/ o->m[FOUT] = (f_cell_t)out; o->m[START_ADDR] = (f_cell_t)&(o->m); o->m[STDIN] = (f_cell_t)stdin; o->m[STDOUT] = (f_cell_t)stdout; o->m[STDERR] = (f_cell_t)stderr; o->m[RSTK] = size - o->m[STACK_SIZE]; /* set up return stk ptr */ o->m[ARGC] = o->m[ARGV] = 0; o->S = o->m + size - (2 * o->m[STACK_SIZE]); /* v. stk pointer */ o->vstart = o->m + size - (2 * o->m[STACK_SIZE]); o->vend = o->vstart + o->m[STACK_SIZE]; f_s_f_input(o, in); /* set up input after our eval */ } /** @brief This function simply copies the current Forth header into a byte array, filling in the endianess which can only be determined at run time. @param dst a byte array at least "sizeof header" large **/ static void make_header(uint8_t *dst, uint8_t log2size) { memcpy(dst, header, sizeof header); /*fill in endianess, needs to be done at run time */ dst[ENDIAN] = !IS_BIG_ENDIAN; dst[LOG2_SIZE] = log2size; } /** Calculates the binary logarithm of a forth cell, rounder up towards infinity. This used for storing the size field in the header. **/ f_cell_t f_blog2(f_cell_t x) { f_cell_t b = 0; while (x >>= 1) b++; return b; } /** This rounds up an integer to the nearest power of two larger than that integer. **/ f_cell_t f_round_up_pow2(f_cell_t r) { f_cell_t up = 1; while (up < r) up <<= 1; return up; } /** **f_init** is a complex function that returns a fully initialized forth environment we can start executing Forth in, it does the usual task of allocating memory for the object to be returned, but it also does has the task of getting the object into a runnable state so we can pass it to **f_run** and do useful work. **/ f_t *f_init(size_t size, FILE *in, FILE *out, const struct f_functions *calls) { f_cell_t *m, i, w, t, pow; f_t *o; assert(in); assert(out); BUILD_BUG_ON(sizeof(f_cell_t) < sizeof(uintptr_t)); size = f_round_up_pow2(size); pow = f_blog2(size); /** There is a minimum requirement on the **m** field in the **f_t** structure which is not apparent in its definition (and cannot be made apparent given how flexible array members work). We need enough memory to store the registers (32 cells), the parse area for a word (**MAXIMUM_WORD_LENGTH** cells), the initial start up program (about 6 cells), the initial built in and defined word set (about 600-700 cells) and the variable and return stacks (**MINIMUM_STACK_SIZE** cells each, as minimum). If we add these together we come up with an absolute minimum, although that would not allow us define new words or do anything useful. We use **MINIMUM_STACK_SIZE** to define a useful minimum, albeit a restricted on, it is not a minimum large enough to store all the definitions in *forth.fth* (a file within the project containing a lot of Forth code) but it is large enough for embedded systems, for testing the interpreter and for the unit tests within the *unit.c* file. We **VERIFY** that the size has been passed in is equal to or about minimum as this has been documented as being a requirement to this function in the C API, if we are passed a lower number the programmer has made a mistake somewhere and should be informed of this problem. **/ VERIFY(size >= MINIMUM_CORE_SIZE); if (!(o = calloc(1, sizeof(*o) + sizeof(f_cell_t)*size))) return NULL; /** Default the registers, and input and output streams: **/ f_make_default(o, size, in, out); /** **o->header** needs setting up, but has no effect on the run time behavior of the interpreter: **/ make_header(o->header, pow); o->calls = calls; /* pass over functions for CALL */ m = o->m; /* a local variable only for convenience */ /** The next section creates a word that calls **READ**, then **TAIL**, then itself. This is what the virtual machine will run at startup so that we can start reading in and executing Forth code. It creates a word that looks like this: | <-- start of dictionary | .------.------.-----.----.----.----. | TAIL | READ | RUN | P1 | P2 | P2 | Rest of dictionary ... .------.------.-----.----.----.----. | end of this special word --> | P1 is a pointer to READ P2 is a pointer to TAIL P2 is a pointer to RUN The effect of this can be described as "make a function which performs a **READ** then calls itself tail recursively". The first instruction run is **RUN** which we save in **o->m[INSTRUCTION]** and restore when we enter **f_run**. **/ o->m[PWD] = 0; /* special terminating pwd value */ t = m[DIC] = DICTIONARY_START; /* initial dictionary offset */ m[m[DIC]++] = TAIL; /* add a TAIL instruction that can be called */ w = m[DIC]; /* save current offset, which will contain READ */ m[m[DIC]++] = READ; /* populate the cell with READ */ m[m[DIC]++] = RUN; /* call the special word recursively */ o->m[INSTRUCTION] = m[DIC]; /* stream points to the special word */ m[m[DIC]++] = w; /* call to READ word */ m[m[DIC]++] = t; /* call to TAIL */ m[m[DIC]++] = o->m[INSTRUCTION] - 1; /* recurse */ /** **DEFINE** and **IMMEDIATE** are two immediate words, the only two immediate words that are also virtual machine instructions, we can make them immediate by passing in their code word to **compile**. The created word looks like this: .------.-----.------. | NAME | PWD | CODE | .------.-----.------. The **CODE** field here contains either **DEFINE** or **IMMEDIATE**, as well as the hidden bit field and an offset to the beginning of name. The compiling bit is cleared for these words. **/ compile(o, DEFINE, ":", false, false); compile(o, DEFINE, "::", true, false); compile(o, IMMEDIATE, "immediate", false, false); /** All of the other built in words that use a virtual machine instruction to do work are instead compiling words, and because there are lots of them we can initialize them in a loop, the created words look the same as the immediate words, except the compiling bit is set in the CODE field. The CODE field here also contains the VM instructions, the READ word will compile pointers to this CODE field into the dictionary. **/ for (i = READ, w = READ; instruction_names[i]; i++) compile(o, w++, instruction_names[i], true, false); compile(o, EXIT, "_exit", true, false); /* needed for 'see', trust me */ compile(o, PUSH, "'", true, false); /* crude starting version of ' */ /** We now name all the registers so we can refer to them by name instead of by number. **/ for (i = 0; register_names[i]; i++) VERIFY(f_define_constant(o, register_names[i], i+DIC) >= 0); /** More constants are now defined: **/ w = size - (2 * o->m[STACK_SIZE]); /* start of stack */ VERIFY(f_define_constant(o, "stack-start", w) >= 0); VERIFY(f_define_constant(o, "max-core", size) >= 0); for (i = 0; constants[i].name; i++) VERIFY(f_define_constant(o, constants[i].name, constants[i].value) >= 0); /** Now we finally are in a state to load the slightly inaccurately named **initial_f_program**, which will give us basic looping and conditional constructs **/ VERIFY(f_eval(o, initial_f_program) >= 0); /**All of the calls to **f_eval** and **f_define_constant** have set the input streams to point to a string, we need to reset them to they point to the file **in** **/ f_s_f_input(o, in); /*set up input after our eval */ o->line = 1; return o; } /** This is a crude method that should only be used for debugging purposes, it simply dumps the forth structure to disk, including any padding which the compiler might have inserted. This dump cannot be reloaded! **/ int f_dump_core(f_t *o, FILE *dump) { assert(o); assert(dump); size_t w = sizeof(*o) + sizeof(f_cell_t) * o->core_size; return w != fwrite(o, 1, w, dump) ? -1: 0; } /** We can save the virtual machines working memory in a way, called serialization, such that we can load the saved file back in and continue execution using this save environment. Only the three previously mentioned fields are serialized; **m**, **core_size** and the **header**. **/ int f_s_c_file(f_t *o, FILE *dump) { assert(o && dump); uint64_t r1, r2, core_size = o->core_size; if (f_is_invalid(o)) return -1; r1 = fwrite(o->header, 1, sizeof(o->header), dump); r2 = fwrite(o->m, 1, sizeof(f_cell_t) * core_size, dump); if (r1+r2 != (sizeof(o->header) + sizeof(f_cell_t) * core_size)) return -1; return 0; } /** Logically if we can save the core for future reuse, then we must have a function for loading the core back in, this function returns a reinitialized Forth object. Validation on the object is performed to make sure that it is a valid object and not some other random file, endianess, **core_size**, cell size and the headers magic constants field are all checked to make sure they are correct and compatible with this interpreter. **f_make_default** is called to replace any instances of pointers stored in registers which are now invalid after we have loaded the file from disk. **/ f_t *flc_file(FILE *dump) { uint8_t actual[sizeof(header)] = {0}, /* read in header */ expected[sizeof(header)] = {0}; /* what we expected */ f_t *o = NULL; uint64_t w = 0, core_size = 0; assert(dump); make_header(expected, 0); if (sizeof(actual) != fread(actual, 1, sizeof(actual), dump)) { goto fail; /* no header */ } if (memcmp(expected, actual, sizeof(header)-1)) { goto fail; /* invalid or incompatible header */ } core_size = 1 << actual[LOG2_SIZE]; if (core_size < MINIMUM_CORE_SIZE) { error("core size of %"PRIdCell" is too small", core_size); goto fail; } w = sizeof(*o) + (sizeof(f_cell_t) * core_size); errno = 0; if (!(o = calloc(w, 1))) { error("allocation of size %"PRId64" failed, %s", w, f_strerror()); goto fail; } w = sizeof(f_cell_t) * core_size; if (w != fread(o->m, 1, w, dump)) { error("file too small (expected %"PRId64")", w); goto fail; } o->core_size = core_size; memcpy(o->header, actual, sizeof(o->header)); f_make_default(o, core_size, stdin, stdout); return o; fail: free(o); return NULL; } /** The following function allows us to load a core file from memory: **/ f_t *flc_memory(char *m, size_t size) { assert(m); assert((size / sizeof(f_cell_t)) >= MINIMUM_CORE_SIZE); f_t *o; size_t offset = sizeof(o->header); size -= offset; errno = 0; o = calloc(sizeof(*o) + size, 1); if (!o) { error("allocation of size %zu failed, %s", sizeof(*o) + size, f_strerror()); return NULL; } make_header(o->header, f_blog2(size)); memcpy(o->m, m + offset, size); f_make_default(o, size / sizeof(f_cell_t), stdin, stdout); return o; } /** And likewise we will want to be able to save to memory as well, the load and save functions for memory expect headers *not* to be present. **/ char *f_s_c_memory(f_t *o, size_t *size) { assert(o && size); char *m; *size = 0; errno = 0; uint64_t w = o->core_size; m = malloc(w * sizeof(f_cell_t) + sizeof(o->header)); if (!m) { error("allocation of size %zu failed, %s", o->core_size * sizeof(f_cell_t), f_strerror()); return NULL; } memcpy(m, o->header, sizeof(o->header)); /* copy header */ memcpy(m + sizeof(o->header), o->m, w); /* core */ *size = o->core_size * sizeof(f_cell_t) + sizeof(o->header); return m; } /** Free the Forth interpreter, we make sure to invalidate the interpreter in case there is a use after free. **/ void f_free(f_t *o) { assert(o); /* invalidate the forth core, a sufficiently "smart" compiler * might optimize this out */ f_invalidate(o); free(o); } /** Unfortunately C disallows the static initialization of structures with flexible array member, GCC allows this as an extension. **/ struct f_functions *f_new_function_list(f_cell_t count) { struct f_functions *ff = NULL; errno = 0; ff = calloc(sizeof(*ff), 1); if (!ff) { warning("calloc failed: %s", f_strerror()); return NULL; } ff->functions = calloc(sizeof(ff->functions[0]) * count, 1); if (!(ff->functions)) { free(ff); warning("calloc failed: %s", f_strerror()); return NULL; } ff->count = count; return ff; } void f_delete_function_list(struct f_functions *calls) { assert(calls); free(calls->functions); free(calls); } /** **f_push**, **f_pop** and **f_stack_position** are the main ways an application programmer can interact with the Forth interpreter. Usually this tutorial talks about how the interpreter and virtual machine work, about how compilation and command modes work, and the internals of a Forth implementation. However this project does not just present an ordinary Forth interpreter, the interpreter can be embedded into other applications, and it is possible be running multiple instances Forth interpreters in the same process. The project provides an API which other programmers can use to do this, one mechanism that needs to be provided is the ability to move data into and out of the interpreter, these C level functions are how this mechanism is achieved. They move data between a C program and a paused Forth interpreters variable stack. **/ void f_push(f_t *o, f_cell_t f) { assert(o); assert(o->S < o->m + o->core_size); *++(o->S) = o->m[TOP]; o->m[TOP] = f; } f_cell_t f_pop(f_t *o) { assert(o); assert(o->S > o->m); f_cell_t f = o->m[TOP]; o->m[TOP] = *(o->S)--; return f; } f_cell_t f_stack_position(f_t *o) { assert(o); return o->S - o->vstart; } void f_signal(f_t *o, int sig) { assert(o); o->m[SIGNAL_HANDLER] = (f_cell_t)((sig * -1) + BIAS_SIGNAL); } char *f_strdup(const char *s) { assert(s); char *str; if (!(str = malloc(strlen(s) + 1))) return NULL; strcpy(str, s); return str; } void f_free_words(char **s, size_t length) { size_t i; for (i = 0; i < length; i++) free(s[i]); free(s); } char **f_words(f_t *o, size_t *length) { assert(o); assert(length); f_cell_t pwd = o->m[PWD]; f_cell_t *m = o->m; size_t i; char **n, **s = calloc(2, sizeof(*s)); if (!s) return NULL; for (i = 0 ;pwd > DICTIONARY_START; pwd = m[pwd], i++) { f_cell_t len = WORD_LENGTH(m[pwd + 1]); s[i] = f_strdup((char*)(&m[pwd-len])); if (!s[i]) { f_free_words(s, i); *length = 0; return NULL; } n = realloc(s, sizeof(*s) * (i+2)); if (!n) { f_free_words(s, i); *length = 0; return NULL; } s = n; } *length = i; return s; } /** ## The Forth Virtual Machine **/ /** The largest function in the file, which implements the forth virtual machine, everything else in this file is just fluff and support for this function. This is the Forth virtual machine, it implements a threaded code interpreter (see , and ). **/ int f_run(f_t *o) { int errorval = 0, rval = 0; assert(o); jmp_buf on_error; if (f_is_invalid(o)) { fatal("refusing to run an invalid forth, %"PRIdCell, f_is_invalid(o)); return -1; } /* The following code handles errors, if an error occurs, the * interpreter will jump back to here. * * This code needs to be rethought to be made more compliant with * how "throw" and "catch" work in Forth. */ if ((errorval = setjmp(on_error)) || f_is_invalid(o)) { /* if the interpreter is invalid we always exit*/ if (f_is_invalid(o)) return -1; switch (errorval) { default: case FATAL: f_invalidate(o); return -1; /* recoverable errors depend on o->m[ERROR_HANDLER], * a register which can be set within the running * virtual machine. */ case RECOVERABLE: switch (o->m[ERROR_HANDLER]) { case ERROR_INVALIDATE: f_invalidate(o); /* fall-through */ case ERROR_HALT: return -f_is_invalid(o); case ERROR_RECOVER: o->m[RSTK] = o->core_size - o->m[STACK_SIZE]; break; } case OK: break; } } f_cell_t *m = o->m, /* convenience variable: virtual memory */ pc, /* virtual machines program counter */ *S = o->S, /* convenience variable: stack pointer */ I = o->m[INSTRUCTION], /* instruction pointer */ f = o->m[TOP], /* top of stack */ w, /* working pointer */ clk; /* clock variable */ assert(m); assert(S); clk = (1000 * clock()) / CLOCKS_PER_SEC; /** The following section will explain how the threaded virtual machine interpreter works. Threaded code is a simple concept and Forths typically compile their code to threaded code, it suites Forth implementations as word definitions consist of juxtaposition of previously defined words until they reach a set of primitives. This means a function like **square** will be implemented like this: call dup <- duplicate the top item on the variable stack call * <- push the result of multiplying the top two items call exit <- exit the definition of square Each word definition is like this, a series of calls to other functions. We can optimize this by removing the explicit **call** and just having a series of code address to jump to, which will become: address of "dup" address of "*" address of "exit" We now have the problem that we cannot just jump to the beginning of the definition of **square** in our virtual machine, we instead use an instruction (**RUN** in our interpreter, or **DOLIST** as it is sometimes known in most other implementations) to determine what to do with the following data, if there is any. This system also allows us to encode primitives, or virtual machine instructions, in the same way as we encode words. If our word does not have the **RUN** instruction as its first instruction then the list of addresses will not be interpreted but only a simple instruction will be executed. The for loop and the switch statement here form the basis of our thread code interpreter along with the program counter register (**pc**) and the instruction pointer register (**I**). To explain how execution proceeds it will help to refer to the internal structure of a word and how words are compiled into the dictionary. Above we saw that a words layout looked like this: .-----------.-----.------.----------------. | Word Name | PWD | CODE | Data Field ... | .-----------.-----.------.----------------. And we can define words like this: : square dup * ; Which, on a 32 bit machine, produces code that looks like this: Address Contents ._____._____._____._____. X | 's' | 'q' | 'u' | 'a' | ._____._____._____._____. X+1 | 'r' | 'e' | 0 | 0 | ._____._____._____._____. X+2 | previous word pointer | ._______________________. X+3 | CODE Field | ._______________________. X+4 | Pointer to 'dup' | ._______________________. X+5 | Pointer to '*' | ._______________________. X+6 | Pointer to 'exit' | ._______________________. The **:** word creates the header (everything up to and including the CODE field), and enters compile mode, where instead of words being executed they are compiled into the dictionary. When **dup** is encountered a pointer is compiled into the next available slot at **X+4**, likewise for *****. The word **;** is an immediate word that gets executed regardless of mode, which switches back into compile mode and compiles a pointer to **exit**. This **CODE** field at **X+3** contains the following: .---------------.------------------.------------.-------------. Bit | 15 | 14 ........... 8 | 9 | 7 ....... 0 | Field | Compiling Bit | Word Name Size | Hidden Bit | Instruction | Contents | 1 | 2 | 0 | RUN (1) | .---------------.------------------.------------.-------------. The definition of words mostly consists of pointers to other words. The compiling bit, Word Name Size field and Hidden bit have no effect when the word is execution, only in finding the word and determining whether to execute it when typing the word in. The instruction tells the virtual machine what to do with this word, in this case the instruction is **RUN**, which means that the words contains a list of pointers to be executed. The virtual machine then pushes the value of the next address to execute onto the return stack and then jumps to that words CODE field, executing the instruction it finds for that word. Words like **dup** and ***** are built in words, they are slightly differently in that their **CODE** field contains contains a virtual machine instruction other than **RUN**, they contain the instructions **DUP** and **MUL** respectively. **/ /* BWT note("inner interpreter: %d", I); */ for (;(pc = m[ck(I++)]);) { INNER: w = instruction(m[ck(pc++)]); if (w < LAST_INSTRUCTION) { cd(stack_bounds[w]); TRACE(o, w, S, f); } switch (w) { /** When explaining words with example Forth code the instructions enumeration will not be used (such as **ADD** or **SUB**), but its name will be used instead (such as **+** or **-**) **/ case PUSH: *++S = f; f = m[ck(I++)]; break; case CONST: *++S = f; f = m[ck(pc)]; break; case RUN: m[ck(++m[RSTK])] = I; I = pc; break; /** **DEFINE** backs the Forth word **:**, which is an immediate word, it reads in a new word name, creates a header for that word and enters into compile mode, where all words (baring immediate words) are compiled into the dictionary instead of being executed. The created header looks like this: .------.-----.------.---- | NAME | PWD | CODE | ... .------.-----.------.---- ^ | Dictionary Pointer The CODE field contains the RUN instruction. **/ case DEFINE: m[STATE] = 1; /* compile mode */ if (f_get_word(o, o->s, MAXIMUM_WORD_LENGTH) < 0) goto end; compile(o, RUN, (char*)o->s, true, false); break; /** **IMMEDIATE** makes the current word definition execute regardless of whether we are in compile or command mode. This word simply clears the compiling bit of the most recently defined Forth word, which makes the word immediate. This Forth allows the following for making a word immediate ('immediate' is itself immediate): : xxx ... ; immediate ( Traditional way ) : xxx immediate ... ; ( New way ) **/ case IMMEDIATE: w = m[PWD] + 1; m[w] &= ~COMPILING_BIT; break; case READ: /** The **READ** instruction, an instruction that usually does not belong in a virtual machine, forms the basis of Forths interactive nature. In order to move this word outside of the virtual machine a compiler for the virtual machine would have to be made, which would complicate the implementation, but simplify the virtual machine and make it more like a 'normal' virtual machine. It attempts to do the follow: a) Lookup a space delimited string in the Forth dictionary, if it is found and we are in command mode we execute it, if we are in compile mode and the word is a compiling word we compile a pointer to it in the dictionary, if not we execute it. b) If it is not a word in the dictionary we attempt to treat it as a number, if it is numeric (using the **BASE** register to determine the base) then if we are in command mode we push the number to the variable stack, else if we are in compile mode we compile the literal into the dictionary. c) If it is neither a word nor a number, regardless of mode, we emit a diagnostic. This is the most complex word in the Forth virtual machine, there is a good case for it being moved outside of it, and perhaps this will happen. You will notice that the above description did not include any looping, as such there is a driver for the interpreter which must be made and initialized in **f_init**, a simple word that calls **READ** in a loop (actually tail recursively). **/ /* BWT note("case READ: %d", w); */ if (f_get_word(o, o->s, MAXIMUM_WORD_LENGTH) < 0) { /* BWT note("get_word < 0: %d", w); */ goto end; } if ((w = f_find(o, (char*)o->s)) > 1) { pc = w; if (m[STATE] && (m[ck(pc)] & COMPILING_BIT)) { m[dic(m[DIC]++)] = pc; /* compile word */ break; } goto INNER; /* execute word */ } else if (f_string_to_cell(o->m[BASE], &w, (char*)o->s)) { error("'%s' is not a word (line %zu)", o->s, o->line); longjmp(on_error, RECOVERABLE); } if (m[STATE]) { /* must be a number then */ m[dic(m[DIC]++)] = 2; /*fake word push at m[2] */ m[dic(m[DIC]++)] = w; } else { /* push word */ *++S = f; f = w; } break; /** Most of the following Forth instructions are simple Forth words, each one with an uncomplicated Forth word which is implemented by the corresponding instruction (such as LOAD and "@", STORE and "!", EXIT and "exit", and ADD and "+"). However, the reason for these words existing, and under what circumstances some of the can be used is a different matter, the COMMA and TAIL word will require some explaining, but ADD, SUB and DIV will not. **/ case LOAD: f = m[ck(f)]; break; case STORE: m[ck(f)] = *S--; f = *S--; break; case CLOAD: f = *(((uint8_t*)m) + ckchar(f)); break; case CSTORE: ((uint8_t*)m)[ckchar(f)] = *S--; f = *S--; break; case SUB: f = *S-- - f; break; case ADD: f = *S-- + f; break; case AND: f = *S-- & f; break; case OR: f = *S-- | f; break; case XOR: f = *S-- ??' f; break; case INV: f = ~f; break; case SHL: f = *S-- << f; break; case SHR: f = *S-- >> f; break; case MUL: f = *S-- * f; break; case DIV: if (f) { f = *S-- / f; } else { error("divide %"PRIdCell" by zero ", *S--); longjmp(on_error, RECOVERABLE); } break; case ULESS: f = *S-- < f; break; case UMORE: f = *S-- > f; break; case EXIT: I = m[ck(m[RSTK]--)]; break; case KEY: *++S = f; f = f_get_char(o); break; case EMIT: f = fputc(f, (FILE*)o->m[FOUT]); break; case FROMR: *++S = f; f = m[ck(m[RSTK]--)]; break; case TOR: m[ck(++m[RSTK])] = f; f = *S--; break; case BRANCH: I += m[ck(I)]; break; case QBRANCH: I += f == 0 ? m[I] : 1; f = *S--; break; case PNUM: f = print_cell(o, (FILE*)(o->m[FOUT]), f); break; case COMMA: m[dic(m[DIC]++)] = f; f = *S--; break; case EQUAL: f = *S-- == f; break; case SWAP: w = f; f = *S--; *++S = w; break; case DUP: *++S = f; break; case DROP: f = *S--; break; case OVER: w = *S; *++S = f; f = w; break; /** **TAIL** is a crude method of doing tail recursion, it should not be used generally but is useful at startup, there are limitations when using it in word definitions. The following tail recursive definition of the greatest common divisor, called **(gcd)** will not work correctly when interacting with other words: : (gcd) ?dup if dup rot rot mod tail (gcd) then ; If we define a word: : uses-gcd 50 20 (gcd) . ; We might expect it to print out "10", however it will not, it will calculate the GCD, but not print it out with ".", as GCD will have popped off where it should have returned. Instead we must wrap the definition up in another definition: : gcd (gcd) ; And the definition **gcd** can be used. There is a definition of **tail** within *forth.fth* that does not have this limitation, in fact the built in definition is hidden in favor of the new one. **/ case TAIL: m[RSTK]--; break; /** FIND is a natural factor of READ, we add it to the Forth interpreter as it already exits, it looks up a Forth word in the dictionary and returns a pointer to that word if it found. **/ case FIND: *++S = f; if (f_get_word(o, o->s, MAXIMUM_WORD_LENGTH) < 0) goto end; f = f_find(o, (char*)o->s); f = f < DICTIONARY_START ? 0 : f; break; /** DEPTH is added because the stack is not directly accessible by the virtual machine, normally it would have no way of knowing where the variable stack pointer is, which is needed to implement Forth words such as **.s** - which prints out all the items on the stack. **/ case DEPTH: w = S - o->vstart; *++S = f; f = w; break; /** SPLOAD (**sp@**) loads the current stack pointer, which is needed because the stack pointer does not live within any of the virtual machines registers. **/ case SPLOAD: *++S = f; f = (f_cell_t)(S - o->m); break; /** SPSTORE (**sp!**) modifies the stack, setting it to the value on the top of the stack. **/ case SPSTORE: w = *S; S = (f_cell_t*)(f + o->m - 1); f = w; break; /** CLOCK allows for a primitive and wasteful (depending on how the C library implements "clock") timing mechanism, it has the advantage of being portable: **/ case CLOCK: *++S = f; f = ((1000 * clock()) - clk) / CLOCKS_PER_SEC; break; /** EVALUATOR is another complex word which needs to be implemented in the virtual machine. It saves and restores state which we do not usually need to do when the interpreter is not running (the usual case for **f_eval** when called from C). It can read either from a string or from a file. **/ case EVALUATOR: { /* save current input */ f_cell_t sin = o->m[SIN], sidx = o->m[SIDX], slen = o->m[SLEN], fin = o->m[FIN], source = o->m[SOURCE_ID], r = m[RSTK]; char *s = NULL; FILE *file = NULL; f_cell_t length; int file_in = 0; file_in = f; /*get file/string in bool*/ f = *S--; if (file_in) { file = (FILE*)(*S--); f = *S--; } else { s = ((char*)o->m + *S--); length = f; f = *S--; } /* save the stack variables */ o->S = S; o->m[TOP] = f; /* push a fake call to f_eval */ m[RSTK]++; if (file_in) { f_s_f_input(o, file); w = f_run(o); } else { w = f_eval_block(o, s, length); } /* BWT */ printf("in evaluator\n"); /* restore stack variables */ m[RSTK] = r; S = o->S; *++S = o->m[TOP]; f = w; /* restore input stream */ o->m[SIN] = sin; o->m[SIDX] = sidx; o->m[SLEN] = slen; o->m[FIN] = fin; o->m[SOURCE_ID] = source; if (f_is_invalid(o)) return -1; break; } case PSTK: print_stack(o, (FILE*)(o->m[STDOUT]), S, f); fputc('\n', (FILE*)(o->m[STDOUT])); break; case RESTART: longjmp(on_error, f); break; /** CALL allows arbitrary C functions to be passed in and used within the interpreter, allowing it to be extended. The functions have to be passed in during initialization and then they become available to be used by CALL. The structure **f_functions** is a list of function pointers that can be populated by the user of the libforth library, CALL indexes into that structure (after performing bounds checking) and executes the function. **/ case CALL: { if (!(o->calls) || !(o->calls->count)) { /* no call structure, or count is zero */ f = -1; break; } f_cell_t i = f; if (i >= (o->calls->count)) { f = -1; break; } assert(o->calls->functions[i].function); /* check depth of function */ cd(o->calls->functions[i].depth); /* pop call number */ f = *S--; /* save stack state */ o->S = S; o->m[TOP] = f; /* call arbitrary C function */ w = o->calls->functions[i].function(o); /* restore stack state */ S = o->S; f = o->m[TOP]; /* push call success value */ *++S = f; f = w; break; } /** Whilst loathe to put these in here as virtual machine instructions (instead a better mechanism should be found), this is the simplest way of adding file access words to our Forth interpreter. The file access methods *should* all be wrapped up so it does not matter if a file or a piece of memory (a string for example) is being read or written to. This would allow the KEY to be removed as a virtual machine instruction, and would be a useful abstraction. **/ case SYSTEM: #ifdef __CMS__ f = CMScommand(f_get_string(o, &on_error, &S, f), 11 /* CMS_CONSOLE */); #else f = system(f_get_string(o, &on_error, &S, f)); #endif break; case FCLOSE: errno = 0; f = fclose((FILE*)f) ? ferrno() : 0; break; case FDELETE: errno = 0; f = remove(f_get_string(o, &on_error, &S, f)) ? ferrno() : 0; break; case FFLUSH: errno = 0; f = fflush((FILE*)f) ? ferrno() : 0; break; case FSEEK: { errno = 0; int r = fseek((FILE*)(*S--), f, SEEK_SET); f = r == -1 ? errno ? ferrno() : -1 : 0; break; } case FPOS: { errno = 0; int r = ftell((FILE*)f); *++S = r; f = r == -1 ? errno ? ferrno() : -1 : 0; break; } case FOPEN: { const char *fam = f_get_fam(&on_error, f); f = *S--; char *file = f_get_string(o, &on_error, &S, f); errno = 0; *++S = (f_cell_t)fopen(file, fam); f = ferrno(); } break; case FREAD: { FILE *file = (FILE*)f; f_cell_t count = *S--; f_cell_t offset = *S--; *++S = fread(((char*)m)+offset, 1, count, file); f = ferror(file); clearerr(file); } break; case FWRITE: { FILE *file = (FILE*)f; f_cell_t count = *S--; f_cell_t offset = *S--; *++S = fwrite(((char*)m)+offset, 1, count, file); f = ferror(file); clearerr(file); } break; case FRENAME: { const char *f1 = f_get_fam(&on_error, f); f = *S--; char *f2 = f_get_string(o, &on_error, &S, f); errno = 0; f = rename(f2, f1) ? ferrno() : 0; } break; case TMPFILE: { *++S = f; errno = 0; *++S = (f_cell_t)tmpfile(); f = errno ? ferrno() : 0; } break; case RAISE: f = raise((-f) - BIAS_SIGNAL); break; case DATE: { time_t raw; struct tm *gmt; time(&raw); gmt = gmtime(&raw); *++S = f; *++S = gmt->tm_sec; *++S = gmt->tm_min; *++S = gmt->tm_hour; *++S = gmt->tm_mday; *++S = gmt->tm_mon + 1; *++S = gmt->tm_year + 1900; *++S = gmt->tm_wday; *++S = gmt->tm_yday; f = gmt->tm_isdst; break; } /** The following memory functions can be used by the Forth interpreter for faster memory operations, but more importantly they can be used to interact with memory outside of the Forth core. **/ case MEMMOVE: w = *S--; memmove((char*)(*S--), (char*)w, f); f = *S--; break; case MEMCHR: w = *S--; f = (f_cell_t)memchr((char*)(*S--), w, f); break; case MEMSET: w = *S--; memset((char*)(*S--), w, f); f = *S--; break; case MEMCMP: w = *S--; f = memcmp((char*)(*S--), (char*)w, f); break; case ALLOCATE: errno = 0; *++S = (f_cell_t)calloc(f, 1); f = ferrno(); break; case FREE: /** It is not likely that the C library will set the errno if it detects a problem, it will most likely either abort the program or silently corrupt the heap if something goes wrong, however the Forth standard requires that an error status is returned. **/ errno = 0; free((char*)f); f = ferrno(); break; case RESIZE: errno = 0; w = (f_cell_t)realloc((char*)(*S--), f); *++S = w; f = ferrno(); break; case GETENV: { char *s = getenv(f_get_string(o, &on_error, &S, f)); f = s ? strlen(s) : 0; *++S = (f_cell_t)s; break; } case BYE: rval = f; f = *S--; goto end; /** This should never happen, and if it does it is an indication that virtual machine memory has been corrupted somehow. **/ default: fatal("illegal operation %" PRIdCell, w); longjmp(on_error, FATAL); } } /** We must save the stack pointer and the top of stack when we exit the interpreter so the C functions like "f_pop" work correctly. If the **f_t** object has been invalidated (because something went wrong), we do not have to jump to *end* as functions like **f_pop** should not be called on the invalidated object any longer. **/ end: o->S = S; o->m[TOP] = f; return rval; } /** ## An example main function called **mainforth** This is a very simple, limited, example of what can be done with the libforth. This make implementing a Forth interpreter as simple as: ==== main.c ============================= #include "libforth.h" int main(int argc, char **argv) { return mainforth(argc, argv); } ==== main.c ============================= **/ int mainforth(int argc, char **argv) { FILE *core = fopen("FORTH CORE A", "rb"); f_t *o = NULL; int r = 0; if (core) { o = flc_file(core); fclose(core); } if (!o) o = f_init(DEFAULT_CORE_SIZE, stdin, stdout, NULL); if (!o) { fatal("failed to initialize forth: %s", f_strerror()); return -1; } f_set_args(o, argc, argv); if ((r = f_run(o)) < 0) return r; errno = 0; if (!(core = fopen("FORTH CORE A", "wb"))) { fatal("failed to save core file: %s", f_strerror()); return -1; } fclose(core); r = f_s_c_file(o, core); f_free(o); return r; } /** And that completes the program, and the documentation describing it. **/ /** @file main.c @author Richard James Howe. @copyright Copyright 2015,2016,2017 Richard James Howe. @license MIT @email howe.r.j.89@gmail.com **/ /* #include "libforth.h" */ /* #include "unit.h" */ #include #include #include #include #include #include #include #include #ifdef _WIN32 #include #include extern int _fileno(FILE*); #endif #ifdef USE_BUILT_IN_CORE extern unsigned char f_core_data[]; extern f_cell_t f_core_size; #endif /** Although multiple instances of a libforth environment can be active in a single C application, this test program only has one active. This is stored in a global variable so signal handlers can access it. **/ static f_t *global_f_environment; static int enable_signal_handling; typedef void (*signal_handler)(int sig); /**< functions for handling signals*/ #ifdef USE_ABORT_HANDLER #ifdef __unix__ #include #define TRACE_SIZE (64u) /** This hander calls functions (backtrace, printf) that are not safe to call from a signal handler, however this is only going to be called in the event of an internal consistency failure, and only as a courtesy to the programmer. A windows version could be made using information from: https://msdn.microsoft.com/en-us/library/windows/desktop/bb204633%28v=vs.85%29.aspx and https://stackoverflow.com/questions/5693192/win32-backtrace-from-c-code **/ static void sig_abrt_handler(int sig) { void *trace[TRACE_SIZE]; char **messages = NULL; int i, trace_size; signal(sig, SIG_DFL); trace_size = backtrace(trace, TRACE_SIZE); messages = backtrace_symbols(trace, trace_size); if (trace_size < 0) goto fail; fprintf(stderr, "SIGABRT was raised.\nStack trace:\n"); for (i = 0; i < trace_size; i++) fprintf(stderr, "\t%s\n", messages[i]); fflush(stderr); fail: exit(1); /* abort(); */ } #endif #endif #ifdef USE_LINE_EDITOR #include "libline.h" #define LINE_EDITOR_AVAILABLE (1) /**< line editor is available */ /** The Forth history file will be stored in this file, if the **USE\_LINE\_EDITOR** option is set. **/ static const char *history_file = ".forth"; /** The line editor, if used, will print a prompt for each line: **/ static const char *prompt = "> "; /** This is the line completion callback **/ void f_line_completion_callback(const char *line, size_t pos, line_completions *lc) { size_t length = 0; char **s; assert(line); assert(lc); assert(global_f_environment); s = f_words(global_f_environment, &length); (void)pos; if (!s) return; size_t i; for (i = 0; i < length; i++) { line_add_completion(lc, s[i]); } f_free_words(s, length); } /** @brief The following function implements a line-editor loop, quiting when there is no more input to be read. @param o a fully initialized for environment @param mode set vi mode on or off @return int <0 on failure of the Forth execution or the line editor **/ static int f_line_editor(f_t *o, int mode) { int rval = 0; char *line = NULL; assert(o); line_set_vi_mode(mode); errno = 0; if (line_history_load(history_file) < 0) /* loading can fail, which is fine */ warning("failed to load history file %s, %s", history_file, f_strerror()); line_set_completion_callback(f_line_completion_callback); while ((line = line_editor(prompt))) { f_set_string_input(o, line); if ((rval = f_run(o)) < 0) goto end; if (line_history_add(line) < 0) { rval = -1; goto end; } if (line_history_save(history_file) < 0) { rval = -1; goto end; } free(line); line = NULL; } end: free(line); return rval; } #else #define LINE_EDITOR_AVAILABLE (0) /**< line editor is not available */ #endif /* USE_LINE_EDITOR */ static void register_signal_handler(int sig, signal_handler handler) { errno = 0; if (signal(SIGINT, handler) == SIG_ERR) { error("could not install %d handler: %s", sig, f_strerror()); exit(EXIT_FAILURE); } } static void sig_generic_handler(int sig) { if (enable_signal_handling) { f_signal(global_f_environment, sig); register_signal_handler(sig, sig_generic_handler); } else { exit(EXIT_FAILURE); } } /** This program can be used as a filter in a Unix pipe chain, or as a standalone interpreter for Forth. It tries to follow the Unix philosophy and way of doing things (see and ). Whether this is achieved is a matter of opinion. There are a things this interpreter does differently to most Forth interpreters that support this philosophy however, it is silent by default and does not clutter up the output window with "ok", or by printing a banner at start up (which would contain no useful information whatsoever). It is simple, and only does one thing (but does it do it well?). **/ static void fclose_input(FILE **in) { if (*in && (*in != stdin)) fclose(*in); *in = stdin; } /** It is customary for Unix programs to have a usage string, which we can print out as a quick reminder to the user as to what the command line options are. **/ static void usage(const char *name) { fprintf(stderr, "usage: %s " "[-(s|l|f) file] [-e expr] [-m size] [-LSVthvnx] [-] files\n", name); } /** We try to keep the interface to the example program as simple as possible, so there are limited, uncomplicated options. What they do should come as no surprise to an experienced Unix programmer, it is important to pick option names that they would expect (for example *-l* for loading, *-e* for evaluation, and not using *-h* for help would be a hanging offense). **/ static void help(void) { static const char help_text[] = "Forth: A small forth interpreter build around libforth\n\n" "\t-h print out this help and exit unsuccessfully\n" "\t-u run the built in unit tests, then exit\n" "\t-e string evaluate a string\n" "\t-s file save state of forth interpreter to file\n" "\t-S save state to 'forth.core'\n" "\t-n use the line editor, if available, when reading from stdin\n" "\t-f file immediately read from and execute a file\n" "\t-l file load previously saved state from file\n" "\t-L load previously saved state from 'forth.core'\n" "\t-m size specify forth memory size in KiB (cannot be used with '-l')\n" "\t-t process stdin after processing forth files\n" "\t-v turn verbose mode on\n" "\t-x enable signal handling\n" "\t-V print out version information and exit\n" "\t- stop processing options\n\n" "Options must come before files to execute.\n\n" "The following words are built into the interpreter:\n\n"; fputs(help_text, stderr); } static int eval_file(f_t *o, const char *file, enum f_debug_level verbose) { FILE *in = NULL; int c = 0, rval = 0; assert(file); if (verbose >= F_DEBUG_NOTE) note("reading from file '%s'", file); f_s_f_input(o, in = f_fopen_or_die(file, "r")); /* shebang line '#!', core files could also be detected */ if ((c = fgetc(in)) == '#') while (((c = fgetc(in)) > 0) && (c != '\n')); else if (c == EOF) goto close; else ungetc(c, in); /* BWT printf("SOURCE-ID: %lu, %p\n", o->m[SOURCE_ID], o->m[FIN]); */ rval = f_run(o); close: fclose_input(&in); return rval; } static void version(void) { fprintf(stdout, "libforth:\n" "\tversion: %u\n" "\tsize: %u\n" "\tendianess: %u\n", F_CORE_VERSION, (unsigned)sizeof(f_cell_t) * CHAR_BIT, (unsigned)IS_BIG_ENDIAN); } static f_t *f_initial_enviroment(f_t **o, f_cell_t size, FILE *input, FILE *output, enum f_debug_level verbose, int argc, char **argv) { errno = 0; assert(input && output && argv); if (*o) goto finished; #ifdef USE_BUILT_IN_CORE /* USE_BUILT_IN_CORE is an experimental feature, it should not be * relied upon to work correctly */ (void)size; *o = flc_memory((char*)f_core_data, f_core_size); f_s_f_input(*o, input); f_s_f_output(*o, output); #else *o = f_init(size, input, output, NULL); #endif if (!(*o)) { fatal("forth initialization failed, %s", f_strerror()); exit(EXIT_FAILURE); } finished: f_set_debug_level(*o, verbose); f_set_args(*o, argc, argv); global_f_environment = *o; return *o; } /** To keep things simple options are parsed first then arguments like files, although some options take arguments immediately after them. A library for parsing command line options like *getopt* should be used, this would reduce the portability of the program. It is not recommended that arguments are parsed in this manner. **/ int main(int argc, char **argv) { FILE *in = NULL, *dump = NULL; int rval = 0, i = 1; int save = 0, /* attempt to save core if true */ eval = 0, /* have we evaluated anything? */ /* BWT */ readterm = 1, /* read from standard in */ use_line_editor = 0, /* use a line editor, *if* one exists */ mset = 0; /* memory size specified */ enum f_debug_level verbose = F_DEBUG_OFF; /* verbosity level */ static const size_t kbpc = 1024 / sizeof(f_cell_t); /*kilobytes per cell*/ char *optarg = NULL; f_cell_t core_size = DEFAULT_CORE_SIZE; f_t *o = NULL; int orig_argc = argc; char **orig_argv = argv; /* BWT */ #ifdef __CMS__ char *basewords = "LIBFORTH FS A"; static const char *dump_name = "LIBFORTH CORE A"; #else char *basewords = "libforth.fs"; static const char *dump_name = "forth.core"; #endif int rc = 0; register_signal_handler(SIGINT, sig_generic_handler); #ifdef USE_ABORT_HANDLER #ifdef __unix__ register_signal_handler(SIGABRT, sig_abrt_handler); #endif #endif #ifdef _WIN32 /* unmess up Windows file descriptors: there is a warning about an * implicit declaration of _fileno when compiling under Windows in C99 * mode */ _setmode(_fileno(stdin), _O_BINARY); _setmode(_fileno(stdout), _O_BINARY); _setmode(_fileno(stderr), _O_BINARY); #endif /** This loop processes any options that may have been passed to the program, it looks for arguments beginning with '-' and attempts to process that option, if the argument does not start with '-' the option processing stops. It is a simple mechanism for processing program arguments and there are better ways of doing it (such as "getopt" and "getopts"), but by using them we sacrifice portability. **/ for (i = 1; i < argc && argv[i][0] == '-'; i++) { if (strlen(argv[i]) > 2) { fatal("Only one option allowed at a time (got %s)", argv[i]); goto fail; } switch (argv[i][1]) { case '\0': goto done; /* stop processing options */ case 'H': case 'h': usage(argv[0]); help(); return -1; case 't': case 'T': readterm = 1; if (verbose >= F_DEBUG_NOTE) note("stdin on. line editor %s", use_line_editor ? "on" : "off"); break; case 'u': case 'U': return -1; /* libf_unit_tests(0, 0, 0); */ case 'E': case 'e': if (i >= (argc - 1)) goto fail; f_initial_enviroment(&o, core_size, stdin, stdout, verbose, orig_argc, orig_argv); optarg = argv[++i]; if (verbose >= F_DEBUG_NOTE) note("evaluating '%s'", optarg); if (f_eval(o, optarg) < 0) goto end; eval = 1; break; case 'f': case 'F': if (i >= (argc - 1)) goto fail; f_initial_enviroment(&o, core_size, stdin, stdout, verbose, orig_argc, orig_argv); optarg = argv[++i]; if (eval_file(o, optarg, verbose) < 0) goto end; break; case 'a': case 'A': if (i >= (argc - 1)) goto fail; dump_name = argv[++i]; /* fall-through */ case 's': case 'S': /*use default name */ if (verbose >= F_DEBUG_NOTE) note("saving core file to '%s' (on exit)", dump_name); save = 1; break; case 'm': case 'M': if (o || (i >= argc - 1) || f_string_to_cell(10, &core_size, argv[++i])) goto fail; if ((core_size *= kbpc) < MINIMUM_CORE_SIZE) { fatal("-m too small (minimum %zu)", MINIMUM_CORE_SIZE / kbpc); return -1; } if (verbose >= F_DEBUG_NOTE) note("memory size set to %zu", core_size); mset = 1; break; case 'l': case 'L': if (o || mset || (i >= argc - 1)) goto fail; dump_name = argv[++i]; /* fall-through */ case 'd': case 'D': if (verbose >= F_DEBUG_NOTE) note("loading core file '%s'", dump_name); if (!(o = flc_file(dump = f_fopen_or_die(dump_name, "rb")))) { fatal("%s, core load failed", dump_name); return -1; } f_set_debug_level(o, verbose); fclose(dump); break; case 'v': case 'V': verbose++; break; case 'w': case 'W': version(); return EXIT_SUCCESS; break; case 'x': case 'X': enable_signal_handling = 1; break; default: fail: fatal("invalid argument '%s'", argv[i]); usage(argv[0]); return -1; } } done: /* if no files are given, read stdin */ if (verbose >= F_DEBUG_NOTE) note("initialize environment (%p)", &o); readterm = (!eval && i == argc) || readterm; f_initial_enviroment(&o, core_size, stdin, stdout, verbose, orig_argc, orig_argv); /* BWT */ rc = eval_file(o, basewords, verbose); if (rc < 0) goto end; if (verbose >= F_DEBUG_NOTE) note("environment initialized (%d) size: %d", rc, core_size); for (; i < argc; i++) /* process all files on command line */ if (eval_file(o, argv[i], verbose) < 0) goto end; if (readterm) { /* if '-t' or no files given, read from stdin */ if (verbose >= F_DEBUG_NOTE) note("reading from stdin (%p)", stdin); #ifdef USE_LINE_EDITOR if (use_line_editor) { rval = f_line_editor(o, 1); goto end; } #endif /* BWT */ rval = 0; while (!rval) { f_s_f_input(o, stdin); rval = f_run(o); if (rval) { fputc('\n', stdout); return rval; } /* BWT */ printf("ok "); clearerr(stdin); } } end: fclose_input(&in); /** If the save option has been given we only want to save valid core files, we might want to make an option to force saving of core files for debugging purposes, but in general we do not want to over write valid previously saved state with invalid data. **/ if (save) { /* save core file */ if (rval || f_is_invalid(o)) { fatal("refusing to save invalid core, %u/%d", rval, f_is_invalid(o)); return -1; } if (verbose >= F_DEBUG_NOTE) note("saving for file to '%s'", dump_name); if (f_s_c_file(o, dump = f_fopen_or_die(dump_name, "wb"))) { fatal("core file save to '%s' failed", dump_name); rval = -1; } fclose(dump); } /** Whilst the following **f_free** is not strictly necessary, there is often a debate that comes up making short lived programs or programs whose memory use stays either constant or only goes up, when these programs exit it is not necessary to clean up the environment and in some case (although not this one) it can slow down the exit of the program for no reason. However not freeing the memory after use does not play nice with programs that detect memory leaks, like Valgrind. Either way, we free the memory used here, but only if no other errors have occurred before hand. **/ f_free(o); return rval; } /** #ifndef UNIT_H #define UNIT_H #ifdef __cplusplus extern "C" { #endif int libf_unit_tests(int keep_files, int colorize, int silent); #ifdef __cplusplus } #endif #endif @file unit.c @brief unit tests for libforth interpreter public interface @author Richard Howe @license MIT (see https://opensource.org/licenses/MIT) @email howe.r.j.89@gmail.com **/ /*** module to test ***/ /* BWT #include "libforth.h" */ /**********************/ /* BWT #include #include #include #include #include #include #include */ /*** very minimal test framework ***/ /** @brief This contains the information needed to complete a series of unit tests, along with various options. **/ typedef struct { unsigned passed, /**< number of unit tests passed */ failed; /**< number of unit tests failed */ clock_t start_time; /**< when the unit tests began */ int color_on; /**< is colorized output on?*/ int jmpbuf_active; /**< have we setup the longjmp buffer or not? */ int is_silent; /**< silent mode on? The programs return code is used to determine success*/ jmp_buf current_test; /**< current unit tests jump buffer */ unsigned current_line; /**< current line number of unit test being executed */ int current_result; /**< result of latest test execution */ const char *current_expr; /**< string representation of expression being executed */ FILE *output; /**< output file for report generation */ /**@warning signal catching is supported, but not thread safe. Signal * handling really should be made optional, but there is no need for * such a small test suite. */ int caught_signal; /**< The value of any caught signals when running tests*/ } test_t /**< structure used to hold test information */; static test_t tb; #define MAX_SIGNALS (256) /**< maximum number of signals to decode */ static char *(sig_lookup[]) = { /*List of C89 signals and their names*/ [SIGABRT] = "SIGABRT", [SIGFPE] = "SIGFPE", [SIGILL] = "SIGILL", [SIGINT] = "SIGINT", [SIGSEGV] = "SIGSEGV", [SIGTERM] = "SIGTERM", [MAX_SIGNALS] = NULL }; /* static size_t compare(const char *restrict a, const char *restrict b, size_t c) { for (size_t i = 0; i < c; i++) if (a[i] != b[i]) return i; return 0; }*/ static void print_caught_signal_name(test_t *t) { char *sig_name = "UNKNOWN SIGNAL"; if ((t->caught_signal > 0) && (t->caught_signal < MAX_SIGNALS) && sig_lookup[t->caught_signal]) sig_name = sig_lookup[t->caught_signal]; if (!(t->is_silent)) fprintf(t->output, "caught %s (signal number %d)\n", sig_name, t->caught_signal);\ } /**@warning not thread-safe, this function uses internal static state*/ static void sig_abrt_handler(int sig) { /* catches assert() from within functions being exercised */ tb.caught_signal = sig; if (tb.jmpbuf_active) { tb.jmpbuf_active = 0; longjmp(tb.current_test, 1); } } static const char *reset(test_t *t) { return t->color_on ? "\x1b[0m" : ""; } static const char *red(test_t *t) { return t->color_on ? "\x1b[31m" : ""; } static const char *green(test_t *t) { return t->color_on ? "\x1b[32m" : ""; } static const char *yellow(test_t *t) { return t->color_on ? "\x1b[33m" : ""; } static const char *blue(test_t *t) { return t->color_on ? "\x1b[34m" : ""; } static int unit_tester(test_t *t, const int test, const char *msg, unsigned line) { assert(t && msg); if (test) { t->passed++; if (!(t->is_silent)) fprintf(t->output, " %sok%s:\t%s\n", green(t), reset(t), msg); } else { t->failed++; if (!(t->is_silent)) fprintf(t->output, " %sFAILED%s:\t%s (line %u)\n", red(t), reset(t), msg, line); } return test; } static void print_statement(test_t *t, const char *stmt) { assert(t); if (!(t->is_silent)) fprintf(t->output, " %sstate%s:\t%s\n", blue(t), reset(t), stmt); } static void print_must(test_t *t, const char *must) { assert(t); if (!(t->is_silent)) fprintf(t->output, " %smust%s:\t%s\n", blue(t), reset(t), must); } static void print_note(test_t *t, const char *name) { assert(t); if (!(t->is_silent)) fprintf(t->output, "%s%s%s\n", yellow(t), name, reset(t)); } /**@brief Advance the test suite by testing and executing an expression. This * framework can catch assertions that have failed within the expression * being tested. * @param TESTBENCH The test bence to execute under * @param EXPR The expression should yield non zero on success **/ #define test(TESTBENCH, EXPR) _test((TESTBENCH), (EXPR) != 0, #EXPR, __LINE__) static void _test(test_t *t, const int result, const char *expr, const unsigned line) { assert(t && expr); t->current_line = line, t->current_expr = expr; signal(SIGABRT, sig_abrt_handler); if (!setjmp(t->current_test)) { t->jmpbuf_active = 1; t->current_result = unit_tester(t, result, t->current_expr, t->current_line); } else { print_caught_signal_name(t); t->current_result = unit_tester(t, 0, t->current_expr, t->current_line); signal(SIGABRT, sig_abrt_handler); } signal(SIGABRT, SIG_DFL); t->jmpbuf_active = 0; } /**@brief This advances the test suite like the test macro, however this test * must be executed otherwise the test suite will not continue * @param TESTBENCH The test bence to execute under * @param EXPR The expression should yield non zero on success */ #define must(TESTBENCH, EXPR) _must((TESTBENCH), (EXPR) != 0, #EXPR, __LINE__) static void _must(test_t *t, const int result, const char *expr, const unsigned line) { assert(t && expr); print_must(t, expr); _test(t, result, expr, line); if (!(t->current_result)) exit(-1); } /**@brief print out and execute a statement that is needed to further a test * @param TESTBENCH The test bence to execute under * @param STMT A statement to print out (stringify first) and then execute**/ #define state(TESTBENCH, STMT) do{ print_statement((TESTBENCH), #STMT ); STMT; } while (0); static int unit_test_start(test_t *t, const char *unit_name, FILE *output) { assert(t && unit_name && output); memset(t, 0, sizeof(*t)); time_t rawtime; time(&rawtime); t->output = output; if (signal(SIGABRT, sig_abrt_handler) == SIG_ERR) { fprintf(stderr, "signal handler installation failed"); return -1; } t->start_time = clock(); if (!(t->is_silent)) fprintf(t->output, "%s unit tests\n%sbegin:\n\n", unit_name, asctime(localtime(&rawtime))); return 0; } static unsigned unit_test_end(test_t *t, const char *unit_name) { assert(t && unit_name); clock_t end_time = clock(); double total = ((double) (end_time - t->start_time)) / CLOCKS_PER_SEC; if (!(t->is_silent)) fprintf(t->output, "\n\n%s unit tests\npassed %u/%u\ntime %fs\n", unit_name, t->passed, t->passed+t->failed, total); return t->failed; } /*** end minimal test framework ***/ /* forf_1 and forf_2 are test functions that can be called from within the interpreter */ static int forf_1(f_t *f) { f_push(f, 123); return 0; } static int forf_2(f_t *f) { f_push(f, 789); return 0; } int libf_unit_tests(int keep_files, int colorize, int silent) { tb.is_silent = silent; tb.color_on = colorize; unit_test_start(&tb, "libforth", stdout); { test(&tb, 0 == f_blog2(0)); test(&tb, 0 == f_blog2(1)); test(&tb, 1 == f_blog2(2)); test(&tb, 2 == f_blog2(4)); test(&tb, 3 == f_blog2(8)); test(&tb, 3 == f_blog2(10)); test(&tb, 4 == f_blog2(16)); test(&tb, 4 == f_blog2(17)); test(&tb, 1 == f_round_up_pow2(0)); test(&tb, 1 == f_round_up_pow2(1)); test(&tb, 2 == f_round_up_pow2(2)); test(&tb, 4 == f_round_up_pow2(3)); test(&tb, 16 == f_round_up_pow2(9)); test(&tb, 64 == f_round_up_pow2(37)); } { /**@note The following functions will not be tested: * - void f_set_file_output(f_t *o, FILE *out); * - void f_set_args(f_t *o, int argc, char **argv); * - void f_signal(f_t *o, int signal); * - int main_forth(int argc, char **argv); **/ FILE *core; f_cell_t here; f_t *f; print_note(&tb, "libforth.c"); state(&tb, f = f_init(MINIMUM_CORE_SIZE, stdin, stdout, NULL)); must(&tb, f); state(&tb, core = fopen("unit.core", "wb")); must(&tb, core); /* test setup, simple tests of push/pop interface */ test(&tb, 0 == f_stack_position(f)); test(&tb, f_eval(f, "here ") >= 0); state(&tb, here = f_pop(f)); state(&tb, f_push(f, here)); test(&tb, f_eval(f, "2 2 + ") >= 0); test(&tb, f_pop(f) == 4); /* define a word, call that word, pop result */ test(&tb, !f_find(f, "unit-01")); test(&tb, f_eval(f, ": unit-01 69 ; unit-01 ") >= 0); test(&tb, f_find(f, "unit-01")); test(&tb, !f_find(f, "unit-01 ")); /* notice the trailing space */ test(&tb, f_pop(f) == 69); test(&tb, 1 == f_stack_position(f)); /* "here" still on stack */ /* constants */ test(&tb, f_define_constant(f, "constant-1", 0xAA0A) >= 0); test(&tb, f_define_constant(f, "constant-2", 0x5055) >= 0); test(&tb, f_eval(f, "constant-1 constant-2 or") >= 0); test(&tb, f_pop(f) == 0xFA5F); /* string input */ state(&tb, f_set_string_input(f, " 18 2 /")); test(&tb, f_run(f) >= 0); test(&tb, f_pop(f) == 9); state(&tb, f_set_file_input(f, stdin)); /* save core for later tests */ test(&tb, f_save_core_file(f, core) >= 0); state(&tb, fclose(core)); /* more simple tests of arithmetic */ state(&tb, f_push(f, 99)); state(&tb, f_push(f, 98)); test(&tb, f_eval(f, "+") >= 0); test(&tb, f_pop(f) == 197); test(&tb, 1 == f_stack_position(f)); /* "here" still on stack */ test(&tb, here == f_pop(f)); state(&tb, f_free(f)); } { /**@note Previously 'tmpfile()' was used instead of writing to * 'coredump.log', however this causes problems under Windows * as it creates the temporary file in the root directory, * which needs administrator permissions. * * https://stackoverflow.com/questions/6247148/tmpfile-on-windows-7-x64 */ FILE *core_dump; f_t *f = NULL; static const char *name = "coredump.log"; state(&tb, core_dump = fopen(name, "wb")); must(&tb, core_dump); state(&tb, f = f_init(MINIMUM_CORE_SIZE, stdin, stdout, NULL)); must(&tb, f); test(&tb, f_dump_core(f, core_dump) >= 0); state(&tb, fclose(core_dump)); state(&tb, f_free(f)); if (!keep_files) state(&tb, remove(name)); } { /* Test the persistence of word definitions across core loads*/ FILE *core; f_t *f; state(&tb, core = fopen("unit.core", "rb")); must(&tb, core); /* test that definitions persist across core dumps */ state(&tb, f = f_load_core_file(core)); /* stack position does no persist across loads, this might * change, but is the current functionality */ test(&tb, 0 == f_stack_position(f)); must(&tb, f); /* the word "unit-01" was defined earlier */ test(&tb, f_find(f, "unit-01")); test(&tb, f_eval(f, "unit-01 constant-1 *") >= 0); test(&tb, f_pop(f) == 69 * 0xAA0A); test(&tb, 0 == f_stack_position(f)); state(&tb, f_free(f)); state(&tb, fclose(core)); } { /* test invalidation fails */ FILE *core; f_t *f; state(&tb, core = fopen("unit.core", "rb+")); must(&tb, core); state(&tb, rewind(core)); state(&tb, f = f_load_core_file(core)); test(&tb, !f_is_invalid(f)); state(&tb, f_invalidate(f)); test(&tb, f_is_invalid(f)); /* saving should fail as we have invalidated the core */ test(&tb, f_save_core_file(f, core) < 0); state(&tb, f_free(f)); state(&tb, fclose(core)); state(&tb, remove("unit.core")); } { /* test the built in words, there is a set of built in words * that are defined in the interpreter, these must be tested * * The following words need testing: * [ ] :noname * '\n' ')' cr :: */ f_t *f; state(&tb, f = f_init(MINIMUM_CORE_SIZE, stdin, stdout, NULL)); must(&tb, f); /* here we test if...else...then statements and hex conversion, * this also tests >mark indirectly */ test(&tb, f_eval(f, ": if-test if 0x55 else 0xAA then ;") >= 0); test(&tb, f_eval(f, "0 if-test") >= 0); test(&tb, f_pop(f) == 0xAA); state(&tb, f_push(f, 1)); test(&tb, f_eval(f, "if-test") >= 0); test(&tb, f_pop(f) == 0x55); /* simple loop tests */ test(&tb, f_eval(f, " : loop-test begin 1 + dup 10 u> until ;") >= 0); test(&tb, f_eval(f, " 1 loop-test") >= 0); test(&tb, f_pop(f) == 11); test(&tb, f_eval(f, " 39 loop-test") >= 0); test(&tb, f_pop(f) == 40); /* rot and comments */ test(&tb, f_eval(f, " 1 2 3 rot ( 1 2 3 -- 2 3 1 )") >= 0); test(&tb, f_pop(f) == 1); test(&tb, f_pop(f) == 3); test(&tb, f_pop(f) == 2); /* -rot */ test(&tb, f_eval(f, " 1 2 3 -rot ") >= 0); test(&tb, f_pop(f) == 2); test(&tb, f_pop(f) == 1); test(&tb, f_pop(f) == 3); /* nip */ test(&tb, f_eval(f, " 3 4 5 nip ") >= 0); test(&tb, f_pop(f) == 5); test(&tb, f_pop(f) == 3); /* allot */ test(&tb, f_eval(f, " here 32 allot here swap - ") >= 0); test(&tb, f_pop(f) == 32); /* tuck */ test(&tb, f_eval(f, " 67 23 tuck ") >= 0); test(&tb, f_pop(f) == 23); test(&tb, f_pop(f) == 67); test(&tb, f_pop(f) == 23); state(&tb, f_free(f)); } { /* test the forth interpreter internals */ f_t *f = NULL; state(&tb, f = f_init(MINIMUM_CORE_SIZE, stdin, stdout, NULL)); must(&tb, f); /* base should be set to zero, this is a special value * that allows hexadecimal, octal and decimal to be read * in if formatted correctly; * - hex 0x[0-9a-fA-F]* * - octal 0[0-7]* * - decimal [1-9][0-9]* */ test(&tb, f_eval(f, " base @ 0 = ") >= 0); test(&tb, f_pop(f)); /* the invalid flag should not be set */ test(&tb, f_eval(f, " `invalid @ 0 = ") >= 0); test(&tb, f_pop(f)); /* source id should be -1 (reading from string) */ test(&tb, f_eval(f, " `source-id @ -1 = ") >= 0); test(&tb, f_pop(f)); /* 0 call should fail, returning non zero */ test(&tb, f_eval(f, "0 call") >= 0); test(&tb, f_pop(f)); state(&tb, f_free(f)); } { /* tests for CALL */ f_t *f = NULL; struct f_functions *ff; state(&tb, ff = f_new_function_list(2)); must(&tb, ff); state(&tb, ff->functions[0].function = forf_1); state(&tb, ff->functions[1].function = forf_2); state(&tb, f = f_init(MINIMUM_CORE_SIZE, stdin, stdout, ff)); must(&tb, f); /* 0 call should correspond to the first function, which just * pushes 123 */ test(&tb, f_eval(f, "0 call") >= 0); test(&tb, !f_pop(f)); test(&tb, 123 == f_pop(f)); /* 1 call corresponds to the second function... */ test(&tb, f_eval(f, "1 call") >= 0); test(&tb, !f_pop(f)); test(&tb, 789 == f_pop(f)); test(&tb, f_eval(f, "2 call") >= 0); /* call signals failure by returning non zero */ test(&tb, f_pop(f)); state(&tb, f_free(f)); state(&tb, f_delete_function_list(ff)); } { FILE *core = NULL; f_t *f1 = NULL, *f2 = NULL; char *m1 = NULL, *m2 = NULL; f_cell_t size1, size2; must(&tb, f1 = f_init(MINIMUM_CORE_SIZE, stdin, stdout, NULL)); state(&tb, core = fopen("unit.core", "wb+")); must(&tb, core); test(&tb, f_save_core_file(f1, core) >= 0); state(&tb, rewind(core)); must(&tb, m1 = f_save_core_memory(f1, &size1)); must(&tb, f2 = f_load_core_memory(m1, size1)); must(&tb, m2 = f_save_core_memory(f2, &size2)); must(&tb, size2 == size1); test(&tb, size1/sizeof(f_cell_t) > MINIMUM_CORE_SIZE); state(&tb, fclose(core)); state(&tb, f_free(f1)); state(&tb, f_free(f2)); state(&tb, free(m1)); state(&tb, free(m2)); if (!keep_files) state(&tb, remove("unit.core")); } return !!unit_test_end(&tb, "libforth"); }