1.分析组成
C语言的程序中,有很单词多符号和保留字。一些单词符号还有对应的左线性文法。所以我们需要先做出一个单词字符表,给出对应的识别码,然后跟据对应的表格来写出程序
2.程序设计
程序主要有循环判断构成。不需推理即可产生的符号我们可以把它包装在函数中,返回值为对应的识别码即可。但是有线性文法的则需要单独的一遍推倒才可以得出词法分析结果。对于测试样例我们可以存储到txt文件中,使用循环读写可以更高效的测试和输出词法分析结果。最终的结果我们用二元式的形式来表示,存储在txt文件中
3.完整程序
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int english(char ch) {
if((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) return 1;
else return 0;
}
int number(char ch) {
if(ch >= '0' && ch <= '9') return 1;
else return 0;
}
int reserved(char str[]) {
if(strcmp(str, "void") == 0) return 3;
else if(strcmp(str, "int") == 0) return 4;
else if(strcmp(str, "float") == 0) return 5;
else if(strcmp(str, "double") == 0) return 6;
else if(strcmp(str, "if") == 0) return 7;
else if(strcmp(str, "else") == 0) return 8;
else if(strcmp(str, "for") == 0) return 9;
else if(strcmp(str, "do") == 0) return 10;
else if(strcmp(str, "while") == 0) return 11;
else if(strcmp(str, "break") == 0) return 12;
else if(strcmp(str, "return") == 0) return 13;
else return 1;
}
int symbol(char ch) {
if(ch == ';') return 14;
else if(ch == ',') return 15;
else if(ch == '(') return 16;
else if(ch == ')') return 17;
else if(ch == '{') return 18;
else if(ch == '}') return 19;
else if(ch == '[') return 20;
else if(ch == ']') return 21;
else if(ch == '%') return 22;
else if(ch == '?') return 23;
else if(ch == ':') return 24;
else if(ch == '\'') return 25;
else if(ch == '\"') return 26;
else if(ch == '.') return 27;
else return 0;
}
int main(){
for(int i = 1; i <= 4; i++){
char txt1[] = "test";
char num[6];
sprintf(num, "%d.txt", i);
strcat(txt1, num);
char txt2[] = "analyze";
sprintf(num, "%d.txt", i);
strcat(txt2, num);
FILE *fp = fopen(txt1, "r");
FILE *fw = fopen(txt2, "wt+");
int flag = 0;
char ch = fgetc(fp);
while(!feof(fp)) {
char str[32];
int j = 0;
if(ch == ' ' || ch == '\t') {
ch = fgetc(fp);
continue;
}
else if(ch == '\n'){
fprintf(fw, "\n");
ch = fgetc(fp);
continue;
}
else if(english(ch)) {
str[j++] = ch;
do{
ch = fgetc(fp);
str[j++] = ch;
}while(english(ch)||number(ch));
str[j-1] = '\0';
int id = reserved(str);
fprintf(fw, "(%d, %s) ", id, str);
}
else if(number(ch)) {
str[j++] = ch;
do{
ch = fgetc(fp);
str[j++] = ch;
}while(number(ch));
str[j-1] = '\0';
fprintf(fw, "(2, %s) ", str);
}
else if(symbol(ch) != 0) {
fprintf(fw, "(%d, %c) ", symbol(ch), ch);
ch = fgetc(fp);
}
else if(ch == '>') {
ch = fgetc(fp);
if(ch == '=') {
fprintf(fw, "(29, >=) ");
ch = fgetc(fp);
}
else if(ch == '>') {
fprintf(fw, "(30, >>) ");
ch = fgetc(fp);
}
else fprintf(fw, "(28, >) ");
}
else if(ch == '<') {
ch = fgetc(fp);
if(ch == '=') {
fprintf(fw, "(32, <=) ");
ch = fgetc(fp);
}
else if(ch == '<') {
fprintf(fw, "(33, <<) ");
ch = fgetc(fp);
}
else fprintf(fw, "(31, <) ");
}
else if(ch == '!') {
ch = fgetc(fp);
if(ch == '=') {
fprintf(fw, "(35, !=) ");
ch = fgetc(fp);
}
else fprintf(fw, "(34, !) ");
}
else if(ch == '=') {
ch = fgetc(fp);
if(ch == '=') {
fprintf(fw, "(37, ==) ");
ch = fgetc(fp);
}
else fprintf(fw, "(36, =) ");
}
else if(ch == '/') {
ch = fgetc(fp);
if(ch == '*') {
fprintf(fw, "(Start annotate, /*) ");
do {
ch = fgetc(fp);
if(ch == '*') {
ch = fgetc(fp);
if(ch == '/') {
fprintf(fw, "(End annotate, */) ");
ch = fgetc(fp);
break;
}
}
}while(1);
}
else if(ch == '/') {
fprintf(fw, "(annotate, //) ");
do {
ch = fgetc(fp);
}while(ch != '\n');
fprintf(fw, "\n");
ch = fgetc(fp);
}
else if(ch == '=') {
fprintf(fw, "(39, /=) ");
ch = fgetc(fp);
}
else fprintf(fw, "(38, /) ");
}
else if(ch == '&') {
ch = fgetc(fp);
if(ch == '&') {
fprintf(fw, "(41, &&) ");
ch = fgetc(fp);
}
else fprintf(fw, "(40, &) ");
}
else if(ch == '|') {
ch = fgetc(fp);
if(ch == '|') {
fprintf(fw, "(43, ||) ");
ch = fgetc(fp);
}
else fprintf(fw, "(42, |) ");
}
else if(ch == '+') {
ch = fgetc(fp);
if(ch == '=') {
fprintf(fw, "(46, +=) ");
ch = fgetc(fp);
}
else if(ch == '+') {
fprintf(fw, "(45 ++) ");
ch = fgetc(fp);
}
else fprintf(fw, "(44, +) ");
}
else if(ch == '-') {
ch = fgetc(fp);
if(ch == '=') {
fprintf(fw, "(49, -=) ");
ch = fgetc(fp);
}
else if(ch == '-') {
fprintf(fw, "(48, --) ");
ch = fgetc(fp);
}
else fprintf(fw, "(47, -) ");
}
else if(ch == '*') {
ch = fgetc(fp);
if(ch == '=') {
fprintf(fw, "(51, *=) ");
ch = fgetc(fp);
}
else fprintf(fw, "(50, *) ");
}
else if(ch == '\\') {
ch = fgetc(fp);
if(ch == 'n') {
ch = fgetc(fp);
}
}
else {
fprintf(fw, "Undefined symbol!");
printf("test%d: Undefined symbol!\n", i);
flag = 1;
break;
}
}
fclose(fp);
fclose(fw);
if(flag) continue;
printf("test%d: Finish analyzing.\n", i);
}
}
4.测试运行
其中一个测试样例:(剩余的大家可以自主编写)
void test1() {
int a = 5, b = 10;
if (a > b) {
printf("a is greater than b");
} else if (a < b) {
printf("b is greater than a");
} else {
printf("a and b are equal");
}
for (int i = 0; i < 5; i++) {
printf("%d ", i);
}
printf("\n");
int arr[5] = {1, 2, 3, 4, 5};
int sum = 0;
for (int i = 0; i < 5; i++) {
sum += arr[i];
}
printf("The sum of the array is: %d\n", sum);
return 0;
}
运行结果:
(3, void) (1, test1) (16, () (17, )) (18, {)
(4, int) (1, a) (36, =) (2, 5) (15, ,) (1, b) (36, =) (2, 10) (14, ;)
(7, if) (16, () (1, a) (28, >) (1, b) (17, )) (18, {)
(1, printf) (16, () (26, ") (1, a) (1, is) (1, greater) (1, than) (1, b) (26, ") (17, )) (14, ;)
(19, }) (8, else) (7, if) (16, () (1, a) (31, <) (1, b) (17, )) (18, {)
(1, printf) (16, () (26, ") (1, b) (1, is) (1, greater) (1, than) (1, a) (26, ") (17, )) (14, ;)
(19, }) (8, else) (18, {)
(1, printf) (16, () (26, ") (1, a) (1, and) (1, b) (1, are) (1, equal) (26, ") (17, )) (14, ;)
(19, })
(9, for) (16, () (4, int) (1, i) (36, =) (2, 0) (14, ;) (1, i) (31, <) (2, 5) (14, ;) (1, i) (45 ++) (17, )) (18, {)
(1, printf) (16, () (26, ") (22, %) (1, d) (26, ") (15, ,) (1, i) (17, )) (14, ;)
(19, })
(1, printf) (16, () (26, ") (26, ") (17, )) (14, ;)
(4, int) (1, arr) (20, [) (2, 5) (21, ]) (36, =) (18, {) (2, 1) (15, ,) (2, 2) (15, ,) (2, 3) (15, ,) (2, 4) (15, ,) (2, 5) (19, }) (14, ;)
(4, int) (1, sum) (36, =) (2, 0) (14, ;)
(9, for) (16, () (4, int) (1, i) (36, =) (2, 0) (14, ;) (1, i) (31, <) (2, 5) (14, ;) (1, i) (45 ++) (17, )) (18, {)
(1, sum) (46, +=) (1, arr) (20, [) (1, i) (21, ]) (14, ;)
(19, })
(1, printf) (16, () (26, ") (1, The) (1, sum) (1, of) (1, the) (1, array) (1, is) (24, :) (22, %) (1, d) (26, ") (15, ,) (1, sum) (17, )) (14, ;)
(13, return) (2, 0) (14, ;)
(19, })