Tokenization for shunting-yard-calculation algorithm

69 views Asked by At

I have a tokenization algorithm and some math expressions that do not get parsed correctly.

Here is the code:

private void GetTokens()
{
    foreach (char token in test)
    {
        if (token.ToString().IsNumber() || token == '.' || token == ',')
        {
            currentToken.Append(token);
        }
        else if (token.IsOperator())
        {
            if (currentToken.Length > 0)
            {
                tokens.Add(currentToken.ToString());
                currentToken.Clear();
            }

            bool minusToNegative =
                tokens.Length > 0 && token == '-' && 
                tokens[^1].IsOperator() && tokens[^1] != ")";
            
            if (token == '(' && tokens[^1] == "-" && tokens[^2].IsNumber())
            {
                tokens[^1] = "+";
                tokens.Add("-1");
                tokens.Add("*");
            }
            
            if (minusToNegative)
            {
                currentToken.Append(token);
                continue;
            }

            tokens.Add(token.ToString());
        }
    }

    if (currentToken.Length > 0)
    {
        tokens.Add(currentToken.ToString());
    }
}

I have tried to fix for one expression but then it does not work for another. There is this examples: -(((-3)-10)--12) and -23-(2+3). Parse methods for first one does not work to second and vice versa.

How can I fix it for all situations?

1

There are 1 answers

5
Olivier Jacot-Descombes On

I would suggest a slightly different approach:

private static void GetTokens(string input)
{
    var number = new StringBuilder();
    foreach (char ch in input) {
        switch (ch) {
            case >= '0' and <= '9' or '.' or ',':
                number.Append(ch);
                break;
            case '+' or '-' when number.Length == 0 && tokens is [] or [.., "(" or "+" or "-" or "*" or "/"]:
                // Unary minus.
                number.Append(ch);
                break;
            case '+' or '-' or '*' or '/' or '(' or ')':
                // Operator
                AppendNumber();
                tokens.Add(ch.ToString());
                break;
            case ' ':
                AppendNumber();
                // Ignore white space
                break;
            default:
                // We have an unexpected character here!
                break;
        }
    }
    AppendNumber();


    void AppendNumber()
    {
        if (number.Length > 0) {
            tokens.Add(number.ToString());
            number.Clear();
        }
    }
}

Test:

string[] tests = ["-2+3", "+3*-4*+5", "+3--4-+5", "(((-3)-10)--12)", "-23-(2+3)"];
foreach (string t in tests) {
    tokens.Clear();

    Console.Write($"\"{t}\" ==> ");
    GetTokens(t);
    foreach (string token in tokens) {
        Console.Write($""" "{token}" """);
    }
    Console.WriteLine();
}
Console.ReadKey();

Test output:

"-2+3" ==>  "-2"  "+"  "3"
"+3*-4*+5" ==>  "+3"  "*"  "-4"  "*"  "+5"
"+3--4-+5" ==>  "+3"  "-"  "-4"  "-"  "+5"
"(((-3)-10)--12)" ==>  "("  "("  "("  "-3"  ")"  "-"  "10"  ")"  "-"  "-12"  ")"
"-23-(2+3)" ==>  "-23"  "-"  "("  "2"  "+"  "3"  ")"