I know how pointers and pointer arithmetic works but this is confounding me.
I made a lexer a while back and now I'm modifying it so I can use a parser with it but, for some reason, it will not iterate the char* I pass to the lexer function.
I malloc()'d a char pointer and then passed it to the function where it iterates over it. It does the iteration but it resets back to the original address for some reason. I need the char* to be able to change and STAY at the address it finished at after iterating or else I cannot complete a proper parser...
main.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lex.h"
int main(int argc, char **argv)
{
    struct lexer *token_queue = NULL;
    FILE *srcfile = fopen( argv[1], "r+" );
    if (!srcfile) {
        fprintf(stderr, "file \"%s\" not found, is null\n", argv[1]);
        lexer_destroy(&token_queue);
        goto hell;
    }
    fseek(srcfile, 0, SEEK_END);    // go all the way to end of file
    long file_len = ftell(srcfile); // get total size of the file
    rewind(srcfile);        // reset the file reader to beginning
    char *srcBuffer = malloc(sizeof(char) * (file_len+1));  // pretty sure it's safe to assume char is 1 byte...
    memset(srcBuffer, '\0', sizeof(char) * (file_len+1));   // zero the whole buffer
    fread(srcBuffer, sizeof(char), (file_len+1), srcfile);  // get entire file as a string
    char decision[20];
    do {
        printf("print a token?\n");
        scanf("%19s", decision);
        lexer_get_single_token(srcBuffer, &token_queue); // tokenize baby!
        //printf(" *srcBuffer == %c \n", *srcBuffer);
        print_tokens_colored(&token_queue);
    }
    while (decision[0] == 'y') ;    // slowly simulate parser asking for another token!
    free(srcBuffer); srcBuffer = NULL;
    fclose(srcfile); srcfile = NULL;
    lexer_destroy(&token_queue);
hell:;
    return 0;
}
lexer function
void lexer_get_single_token(char *iter, struct lexer **Q)
{
    char wording[512] = "";     // buffer to hold identifiers, keywords, and strings
    unsigned int i = 0;
    const char *keywords[] = {
        "auto", "const", "double", "float", "int", "short", "struct", "unsigned",
        "break", "continue", "else", "for", "long", "signed", "switch", "void",
        "case", "default", "enum", "goto", "register", "sizeof", "typedef", "volatile",
        "char", "do", "extern", "if", "return", "static", "union", "while",
        "inline", "alignof", "_Generic", "bool", "_Bool", "true", "false"
    };
    printf("*iter == %c\n", *iter);
    while ( *iter != '\0' ) {
        while ( is_space(*iter) )
            ++iter;
        if (*iter == '/' && iter[1] == '*') {       // found C style /**/ comment
            do {
                ++iter;
            }
            while ( !(*iter == '*' && iter[1] == '/') );    // continuously skip until we find a */
            iter += 2;  // skip twice to pass over */ and go to the next token.
        }
        if (*iter == '/' && iter[1] == '/') {       // found C++ style // comment
            while ( *iter != '\n' )
                ++iter;     // skip until the next line which will be skipped itself.
        }
        if (*iter == '\\' && iter[1] == '\n') {     // formatting Left slash check
            lexer_add_token(Q, LeftSlash, "\\", 2);
            iter += 2;
            return;
        }
        if (*iter == '\"') {    // found string literal, adjust for "\\" so we won't crash
            wording[i++] = *iter++;     // add the first double quote to buffer
            while ( *iter != '\"' ) {
                if (*iter == '\\' && iter[1] == '\"' && iter[-1] != '\\') {
                    wording[i++] = *iter++;     // add the literal double quote as well
                }
                wording[i++] = *iter++;
            }
            wording[i++] = *iter++;     // found the ending double quote, add that too.
            if (wording[0] != '\0') {
                lexer_add_token(Q, StringConstant, wording, i+1);
                reset_string(wording);
                return;
            }
        }
        if ( *iter == '\'' ) {  // found character literal, adjust for '\\' so we won't crash
            wording[i++] = *iter++;
            int counter=0;
            while (*iter != '\'' && counter < 2) {      // Same operation as the string literal but limit as char
                if (*iter == '\\' && iter[1] == '\'' && iter[-1] != '\\') {
                    wording[i++] = *iter++;
                }
                wording[i++] = *iter++;
                ++counter;
            }
            wording[i++] = *iter++;     // add ending single quote to buffer
            if (wording[0] != '\0') {
                lexer_add_token(Q, CharConstant, wording, i+1);
                reset_string(wording);
                return;
            }
        }
        if (*iter == '0' && (iter[1] == 'x' || iter[1] == 'X')) {   // found hexadecimal constant
            wording[i++] = *iter++;     // copy both 0 and x to buffer
            wording[i++] = *iter++;
            while ( is_numeral(*iter) ) {
                wording[i++] = *iter++;     // copy numbers and letters A to F
            }
            if ( *iter == '.' && is_numeral(iter[1]) ) {    // found hexadecimal float
                wording[i++] = *iter++;
                while ( is_numeral(*iter) )
                    wording[i++] = *iter++;
                if (*iter == 'p' && is_numeral(iter[1])) {  // stuff like 0x0.3p10.
                    wording[i++] = *iter++;
                    while ( is_numeral(*iter) )
                        wording[i++] = *iter++;
                }
                if (wording[0] != '\0') {
                    lexer_add_token(Q, NumConstantHexFloat, wording, i+1);
                    reset_string(wording);
                    return;
                }
            }
            else {      // we didn't find a decimal, so tokenize what we found as a normal hex constant
                if (wording[0] != '\0') {
                    lexer_add_token(Q, NumConstantHex, wording, i+1);
                    reset_string(wording);
                    return;
                }
            }
        }
        while ( is_numeric(*iter) ) {   // found decimal constant
            wording[i++] = *iter++;
        }
        if ( *iter == '.' && is_numeric(iter[1]) ) {    // found floating point number
            wording[i++] = *iter++;     // add in the decimal char
            while ( is_numeric(*iter) )
                wording[i++] = *iter++;
            // add the 'e' constant for large floats as well as 'p' (power) constant
            if ( (*iter == 'p' || *iter == 'P' || *iter == 'e' || *iter == 'E') && is_numeric(iter[1]) )
            {
                wording[i++] = *iter++;
                while ( is_numeric(*iter) )
                    wording[i++] = *iter++;
            }
            if (*iter == 'f' || *iter == 'F')   // stuff like 2.0f, add that into the buffer!
                wording[i++] = *iter++;
            if (wording[0] != '\0') {
                lexer_add_token(Q, NumConstantReal, wording, i+1);
                reset_string(wording);
                return;
            }
        }
        else {      // no decimal, consider it a natural number
            if (wording[0] != '\0') {
                lexer_add_token(Q, NumConstant, wording, i+1);
                reset_string(wording);
                return;
            }
        }
        if (is_alphabetic(*iter)) { // found an identifier or potential keyword
            while (is_potential_identifier(*iter))
                wording[i++] = *iter++;
            if (wording[0] != '\0') {
                int x;
                int found_keyword = 0;
                for ( x=0 ; x<sizeof keywords/sizeof keywords[0] ; ++x ) {
                    if ( !strcmp(wording, keywords[x]) )
                        found_keyword = 1;
                }
                if (found_keyword)
                    lexer_add_token(Q, Keyword, wording, i+1);
                else lexer_add_token(Q, NumIdent, wording, i+1);
                reset_string(wording);
                return;
            }
        }
        switch ( *iter ) {  // add in individual characters
            case '=':
                if (iter[1] == '=') {
                    ++iter;
                    lexer_add_token(Q, EqualCmp, "==", 3);
                }
                else lexer_add_token(Q, Equal, "=", 2);
                ++iter;
                return;
            case ';':
                lexer_add_token(Q, Semicolon, ";", 2);
                ++iter;
                return;
            case ':':
                lexer_add_token(Q, Colon, ";", 2);
                ++iter;
                return;
            case '+':   // possible uses => left unary is positive, twice unary is increment, once binary is addition
                if (iter[1] == '=') {
                    ++iter;
                    lexer_add_token(Q, PlusEqual, "+=", 3);
                }
                else if (iter[1] == '+') {
                    ++iter;
                    lexer_add_token(Q, Increment, "++", 3);
                }
                else lexer_add_token(Q, Plus, "+", 2);
                ++iter;
                return;
            case '-':   // possible uses => left unary is negating, twice unary is decrement, one binary is minus
                if (iter[1] == '=') {
                    ++iter;
                    lexer_add_token(Q, MinusEqual, "-=", 3);
                }
                else if (iter[1] == '-') {
                    ++iter;
                    lexer_add_token(Q, Decrement, "--", 3);
                }
                else if (iter[1] == '>') {
                    ++iter;
                    lexer_add_token(Q, Arrow, "->", 3);
                }
                else lexer_add_token(Q, Dash, "-", 2);
                ++iter;
                return;
            case '*':   // leftward unary is dereferencing ptr, binary be mult. Also check for / as ending comment
                if (iter[1] == '=') {
                    ++iter;
                    lexer_add_token(Q, MultEqual, "*=", 3);
                }
                else lexer_add_token(Q, Asterisk, "*", 2);
                ++iter;
                return;
            case '/':   // check for * and / as comment EDIT: DONE
                if (iter[1] == '=') {
                    ++iter;
                    lexer_add_token(Q, DivEqual, "/=", 3);
                }
                else lexer_add_token(Q, DivSlash, "/", 2);
                ++iter;
                return;
            case '(':
                lexer_add_token(Q, LeftParens, "(", 2);
                ++iter;
                return;
            case ')':
                lexer_add_token(Q, RiteParens, ")", 2);
                ++iter;
                return;
            case '[':
                lexer_add_token(Q, LeftSqBracket, "[", 2);
                ++iter;
                return;
            case ']':
                lexer_add_token(Q, RightSqBracket, "]", 2);
                ++iter;
                return;
            case '{':
                lexer_add_token(Q, LeftCurlBrace, "{", 2);
                ++iter;
                return;
            case '}':
                lexer_add_token(Q, RightCurlBrace, "}", 2);
                ++iter;
                return;
            case '.':
                if (iter[1] == '.' && iter[2] == '.') {
                    iter += 2;
                    lexer_add_token(Q, Ellipses, "...", 4);
                }
                else lexer_add_token(Q, Dot, ".", 2);
                ++iter;
                return;
            case ',':
                lexer_add_token(Q, Comma, ",", 2);
                ++iter;
                return;
            case '<':
                if (iter[1] == '<') {
                    if (iter[2] == '=') {
                        lexer_add_token(Q, LeftBitShiftEqual, "<<=", 4);
                        iter += 2;
                    }
                    else {
                        lexer_add_token(Q, LeftBitShift, "<<", 3);
                        ++iter;
                    }
                }
                else if (iter[1] == '=') {
                    lexer_add_token(Q, LessEqual, "<=", 3);
                    ++iter;
                }
                else lexer_add_token(Q, LeftArrow, "<", 2);
                ++iter;
                return;
            case '>':
                if (iter[1] == '>') {
                    if (iter[2] == '=') {
                        lexer_add_token(Q, RightBitShiftEqual, ">>=", 4);
                        iter += 2;
                    }
                    else {
                        lexer_add_token(Q, RightBitShift, ">>", 3);
                        ++iter;
                    }
                }
                else if (iter[1] == '=') {
                    lexer_add_token(Q, GreaterEqual, ">=", 3);
                    ++iter;
                }
                else lexer_add_token(Q, RightArrow, ">", 2);
                ++iter;
                return;
            case '?':
                lexer_add_token(Q, QuestionMark, "?", 2);
                ++iter;
                return;
            case '#':
                lexer_add_token(Q, HashSym, "#", 2);
                ++iter;
                return;
            case '&':
                if (iter[1] == '=') {
                    ++iter;
                    lexer_add_token(Q, AndEqual, "&=", 3);
                }
                else if (iter[1] == '&') {
                    ++iter;
                    lexer_add_token(Q, BoolAnd, "&&", 3);
                }
                else lexer_add_token(Q, Ampersand, "&", 2);
                ++iter;
                return;
            case '^':
                if (iter[1] == '=') {
                    ++iter;
                    lexer_add_token(Q, XorEqual, "^=", 3);
                }
                else lexer_add_token(Q, Carot, "^", 2);
                ++iter;
                return;
            case '%':
                if (iter[1] == '=') {
                    ++iter;
                lexer_add_token(Q, ModuloEqual, "%=", 3);
            }
            else lexer_add_token(Q, Percent, "%", 2);
            ++iter;
            return;
        case '!':
            if (iter[1] == '=') {
                ++iter;
                lexer_add_token(Q, NotEqual, "!=", 3);
            }
            else lexer_add_token(Q, ExclamationMark, "!", 2);
            ++iter;
            return;
        case '|':
            if (iter[1] == '=') {
                ++iter;
                lexer_add_token(Q, OrEqual, "|=", 3);
            }
            else if (iter[1] == '|') {
                ++iter;
                lexer_add_token(Q, BoolOr, "||", 3);
            }
            else lexer_add_token(Q, VerticalBar, "|", 2);
            ++iter;
            return;
        case '~':
            lexer_add_token(Q, Tilde, "~", 2);
            ++iter;
            return;
        case '@':
            lexer_add_token(Q, AtSign, "@", 2);
            ++iter;
            return;
        case '$':
            lexer_add_token(Q, DollarSign, "$", 2);
            ++iter;
            return;
        case '`':
            lexer_add_token(Q, GraveAccent, "`", 2);
            ++iter;
            return;
    }
    ++iter;
}
}
                        
C functions pass arguments by value, so inside the function
lexer_get_single_token(),iteris a copy of the pointersrcBuffer. This means that changes to the value ofiterare not reflected insrcBuffer. If you want to preserve the changes made toiterinside the function, you can either return the pointer to the calling function, or add another layer of indirection.By changing the function signature to:
this function returns a pointer to
char, anditercan be returned when the function is finished. The function call would then look like:After the function call,
srcBufferpoints to the location indicated byiterwhen the end of the function was reached. You should save a copy of the originalsrcBufferpointer so that you canfreeit later.Alternatively, you can change the function signature to:
Now the function call will look like:
The code in the function will need to be modified to account for the additional indirection, but because you pass in a pointer to
srcBuffer, you will be able to make modifications tosrcBuffer.