今天写一个词法分析器,在 vs 下 debug 后生成运行好好的,转到 Ubuntu 用 gcc 编译一下子傻眼了,回 win 下面用 gcc 编译也是一样的结果
这是 vs 的结果(正常): http://chuantu.biz/t6/82/1507355708x3738353725.png
这是 gcc 的结果(无限循环): http://chuantu.biz/t6/82/1507355765x2890174166.png
初学者,实在搞不懂为什么
这是代码
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<ctype.h>
int charClass;
#define MAX_LEN 3000
char lexeme[MAX_LEN];
char nextChar;
char next2Char;
int lexLen;
int token;
int nextToken;
FILE* inFile;
#define LETTER 0
#define DIGIT 1
#define SLASH 2
#define ASTERISK 3
#define DQUOTE 4
#define UNKNOWN 999
enum {ABSTRACT =258, CASE, CATCH, CLASS, DEF,
DO, ELSE, EXTENDS, fALSE, FINAL,
FINALLY, FOR, FORSOME, IF, IMPLICIT,
IMPORT, LAZY, MACRO, MATCH, NEW,
nULL, OBJECT, OVERRIDE, PACKAGE, PRIVATE,
PROTECTED, RETURN, SEALED, SUPER, THIS,
THROW, TRAIT, TRY, tRUE, TYPE,
VAL, VAR, WHILE, WITH, YIELD,
UNDERLINE, COLON, EQU, EQUG, LDE, LCO, LPER, GCO, HASHTAG, AITE,
OPERATOR, ID, INTERGER, ERROR, SYMBOL, COMMENT, STRING};
char* keywords[]={"abstract", "case", "catch", "class", "def",
"do", "else", "extends", "false", "final",
"finally", "for", "forSome", "if", "implicit",
"import", "lazy", "macro", "match", "new",
"null", "object", "override", "package", "private",
"protected", "return", "sealed", "super", "this",
"throw", "trait", "try", "true", "type",
"val", "var", "while", "with", "yield",
"_", ":", "=", "=>", "<-", "<:", "<%", ">:", "#", "@", 0};
void addChar()
{
if(lexLen <= MAX_LEN -2)
{
lexeme[lexLen++] = nextChar;
lexeme[lexLen] = 0;
}
else
printf("ERROR:lexeme is too long. \n" );
}
void getChar()
{
static int firstRun = 1;
if(firstRun)
{
nextChar = getc(inFile);
next2Char = getc(inFile);
firstRun = 0;
}
else
{
nextChar = next2Char;
next2Char = getc(inFile);
}
if(nextChar == EOF)
{
charClass = EOF;
}
else
{
if (nextChar == '\"')
charClass = DQUOTE;
else if(nextChar == '/')
charClass = SLASH;
else if(nextChar == '*')
charClass = ASTERISK;
else if(isalpha(nextChar))
charClass = LETTER;
else if(isdigit(nextChar))
charClass = DIGIT;
else
charClass = UNKNOWN;
}
}
void getNonBlank()
{
while (isspace(nextChar)) {
getChar();
}
}
int checkSymbol(char ch,char nextCh)
{
switch (ch) {
case '(':case ')':case '{':case '}':case '<':case '>':case'&':case '[': case ']':case '.':case '\\':case '_':case '=':case ':':case ';':
addChar();
nextToken = SYMBOL;
break;
case '+':case '-':case '*':case '/':
addChar();
nextToken = OPERATOR;
break;
case EOF:
addChar();
nextToken = EOF;
default:
printf("ERROR:unknown character '%c'. \n",ch );
nextToken = ERROR;
}
return nextToken;
}
void checkKeywords(char* pword)
{
int i = 0;
while(keywords[i]!= 0)
{
char* pkeyword=keywords[i];
if(strcmp(pword,pkeyword) == 0)
{
nextToken = 258 + i;
return;
}
i++;
}
}
int lexer()
{
lexLen =0;
getNonBlank();
switch (charClass) {
case DQUOTE:
addChar();
getChar();
while(charClass != DQUOTE)
{
addChar();
getChar();
}
addChar();
getChar();
nextToken = STRING;
break;
case SLASH:
addChar();
getChar();
if(charClass == ASTERISK)
{
addChar();
getChar();
loop:
while(charClass != ASTERISK)
{
addChar();
getChar();
}
addChar();
getChar();
if(charClass == SLASH)
{
addChar();
getChar();
nextToken = COMMENT;
break;
}
else goto loop;
}
else if(charClass == SLASH)
{
addChar();
getChar();
while(nextChar != '\n')
{
addChar();
getChar();
}
nextToken = COMMENT;
break;
}
else
{
nextToken = OPERATOR;
break;
}
case ASTERISK:
nextToken = OPERATOR;
break;
case LETTER:
addChar();
getChar();
while(charClass == LETTER || charClass == DIGIT)
{
addChar();
getChar();
}
nextToken = ID;
checkKeywords(lexeme);
break;
case DIGIT:
addChar();
getChar();
while(charClass == DIGIT)
{
addChar();
getChar();
}
nextToken = INTERGER;
break;
case UNKNOWN:
checkSymbol(nextChar,next2Char);
getChar();
break;
case EOF:
nextToken = EOF;
lexeme[0] = 'E';
lexeme[1] = 'O';
lexeme[2] = 'F';
lexeme[3] = 0;
break;
}
printf("<%6d, %s >\n",nextToken,lexeme);
return nextToken;
memset(lexeme,0,sizeof(lexeme));
}
int main(int argc,char* argv[])
{
argv++;argc--;
inFile = fopen(argv[0],"r");
if(inFile == NULL)
{
printf("ERROR:cannot open file.\n" );
exit(0);
}
getChar();
while(nextToken != EOF)
lexer();
return 0;
}
1
lcdtyph 2017-10-07 14:09:14 +08:00 via iPhone 1
没看代码,猜测一下,输入文件的行末是\r\n,在 linux 下会 fgetc 两次,win 下只有一次。
|
2
CEBBCAT 2017-10-07 16:24:32 +08:00 via Android 1
跑个题:代码用 gist 贴,图片可以传到 imgur
|
3
virusdefender 2017-10-07 16:49:22 +08:00 1
先单步或者 print 找到死循环的地方,然后排除换行符编码编译器优化( gcc -O0 )之类的问题。
|
4
wevsty 2017-10-07 17:25:27 +08:00 1
这个问题其实是楼主应该是自己去调试的,事实上光贴代码没有测试例子有的时候也不好找到问题。
我这里帮楼主调试了一下,我的结论是这个问题是楼主代码自己的 bug 与编译器无关。 举例,有一个文件内容为如下内容的时候就会有 bug 了。 ''' t1 * t2 t3 ''' 当 nextChar = '*' next2Char = ' ' 的时候函数返回了一个 ASTERISK,但是在 lexer 的 case ASTERISK:中没有继续读取下一个字符直接 break 了,所以就死循环了。 |
5
coderluan 2017-10-07 19:11:37 +08:00 1
自己写的代码自己调试啊,gcc 的调试工具叫 gdb,用不习惯的话弄个 codeblocks 之类的 ide
|
7
daychan OP @virusdefender 谢谢啦,找到 bug 了
|