二叉树统计单词方法-勤学培训网

因为预先不知道出现的单词列表，无法方便地排序并使用折半查找;也不能分别对输入中的每个单词都执行一次线性查找，所以考虑使用二叉树的数据结构来组织这些单词，接下来中山美联小编告诉你具体的二叉树统计单词方法，大家一起来看看吧!

二叉树统计单词方法1：

* My practice of K&R 6.5

#include

#define MAXWORD 100

/* a binary tree node */

typedef struct tnode_ {

char *word;

int count;

struct tnode_ *left;

struct tnode_ *right;

} tnode;

void print_tree(tnode *root);

tnode *add_tree(tnode *root, char *word);

void free_node(tnode *root);

char *copy_string(char *s) {

char *p = (char *)malloc(strlen(s)+1);

if(p != NULL) {

strcpy(p, s);

}

return p;

}

tnode *add_tree(tnode *p, char *word) {

int result;

if(p == NULL) {

p = (tnode *)malloc(sizeof(tnode));

p->word= copy_string(word); // Attention !

p->count = 1;

p->left = NULL;

p->right = NULL;

}

else {

result = strcmp(word, p->word);

if(result < 0) {

p->left = add_tree(p->left, word);

}

else if(result > 0) {

p->right = add_tree(p->right, word);

}

else {

p->count++;

}

return p;

}

void print_tree(tnode *p) {

if(p) {

print_tree(p->left);

printf("%st : %4dn", p->word, p->count);

print_tree(p->right);

}

void free_node(tnode *p) {

if(p) {

free_node(p->left);

free_node(p->right);

free(p->word);

free(p);

}

int getword(char *word, int n) {

int c;

char *w = word;

while(isspace(c = getchar())) {

;

}

if(c != EOF) {

*w++ = c;

}

if(!isalpha(c)) {

*w = '';

return c;

}

while(n > 0) {

c = getchar();

if(isalnum(c)) {

*w++ = c;

}

else {

break;

}

n--;

}

*w = '';

return w[0];

}

int main() {

tnode *root = NULL; // 指针一定要初始化为NULL

char word[MAXWORD];

while((getword(word, MAXWORD)) != EOF) {

if(isalnum(word[0])) {

root = add_tree(root, word);

}

print_tree(root);

free_node(root);

return 0;

}

二叉树统计单词方法

二叉树统计单词方法2：

如果使用二叉数来完成这个问题，我们将会得到更好的时间复杂度O(lgn)，n为节点个数。也可以说，所花费的时间将会和树的高度成正比。二叉数的所有操作的时间复杂度都是这么多，包括插入、删除、建立、查找(来自：算法导论第三版第12章)。所以，对于一个单词而言，如果已经存放在了树中，那么就是查找，将新单词放入的就是插入。然而，实际上这两个动作应该是结合起来的，因为在其该在的位置找不到的时候，我们已经得到新单词该在的位置，所以直接插入即可。具体请看下面代码的实现。

二叉数的数据结构中基本的有三个：

自身数据

左孩子

右孩子

我们该题中，自身数据就是单词，而且还需要多一个统计单词次数的数值。

code如下：

[cpp] view plain copy

struct tnode {

char *word;

int count;

struct tnode *left;

struct tnode *right;

};

面对过程的流程以及代码

作为一般讲c语言的书，很显然是会用面向过程的思维方式来完成的，由下面的main函数可以具体的流程：

[cpp] view plain copy

int main()

{

struct tnode *root;

char word[MAXWORD];

root = NULL;

while (getword(word, MAXWORD) != EOF)//读入一个单词

if (isalpha(word[0]))

root = addtree(root, word);//将单词加入到树中

treeprint(root);//打印树

return 0;

}

main函数中的addtree(root, word)包含了更多的流程，我们来看看addtree函数：

[cpp] view plain copy

struct tnode *addtree(struct tnode *p, char *w)

{

//判断是否是新节点

if (p == NULL) {

/* a new word has arrived */

p = talloc();

/* make a new node */

p->word = strdup1(w);//复制w的过程，如单纯的赋值，就会使p->word和w指向了一处。

p->count = 1;

p->left = p->right = NULL;

return p;

}

//不是新节点

int cond;

cond = strcmp(w, p->word);

if (cond== 0){

p->count++;/* repeated word */

}else if (cond < 0){//就到左子树去找

p->left = addtree(p->left, w);

}else{//就到右子树去找

p->right = addtree(p->right, w);

}

return p;

}

通过上述代码，我相信不仅对本题的流程有了基本的认识，并且也对二叉树的查找和插入有了一定的了解。实际上，而二叉数建立的过程就是查找和插入的过程。

其中的两个函数的代码：

[cpp] view plain copy

/* talloc: make a tnode */

/**

* 为新节点分配内存

struct tnode *talloc(void)

{

return (struct tnode *) malloc(sizeof(struct tnode));

}

/**

* 完成对单词的复制，避免不同指针指向同一个单词，对单词产生干扰

char *strdup1(char *s)

{

char *p;

/* make a duplicate of s */

p = (char *) malloc(strlen(s)+1); /* +1 for ’’ */

if (p != NULL)

strcpy(p, s);

return p;

}

读入单词

[cpp] view plain copy

/* getword: get next word or character from input */

/**

* 没有调用高级的库函数使得这段代码略显复杂

* 但是这些基本的方式能够让我更深刻的体会了一些编程的高级技巧

int getword(char *word, int lim)

{

int c, getch(void);

void ungetch(int);

char *w = word;

while (isspace(c = getch()))

;

if (c != EOF)

*w++ = c;

if (!isalpha(c)) {

*w = '';

return c;

}

for ( ; --lim > 0; w++){

if (!isalnum(*w = getch())) {

ungetch(*w);//读的后一个要放回去，因为后一个不是我们要要的，但是我们又已经读了

break;

}

*w = '';

return word[0];

}

#define BUFSIZE 100

char buf[BUFSIZE];

int bufp = 0;

/* buffer for ungetch */

/* next free position in buf */

int getch(void) /* get a (possibly pushed-back) character */

{

return (bufp > 0) ? buf[--bufp] : getchar();

}

void ungetch(int c)

/* push character back on input */

{

if (bufp >= BUFSIZE)

printf("ungetch: too many charactersn");

else

buf[bufp++] = c;

}

打印结果

都快忘了

先序遍历：先根，再左，后右

中序遍历：先左，再根，后右

后序遍历：先左，再右，后中

[cpp] view plain copy

/* treeprint: in-order print of tree p */

/**

* 递归打印，采用了中序遍历。

void treeprint(struct tnode *p)

{

if (p != NULL) {

treeprint(p->left);

printf("%4d %sn", p->count, p->word);

treeprint(p->right);

}

打印实例

就拿我们编写的代码作一个测试，结果如下，不能给中文做这样测试，因为中文里面的字都没有空格

[cpp] view plain copy

zy@zy:~/Documents/eclipseWorkSpace/test/src$ ./a.out

1 EOF

3 MAXWORD

1 NULL

2 addtree

4 char

1 count

1 ctype

1 define

2 getword

4 h

1 if

4 include

4 int

1 isalpha

1 left

1 main

1 return

1 right

5 root

1 stdio

1 stdlib

1 string

7 struct

7 tnode

2 treeprint

1 void

1 while

5 word

zy@zy:~/Documents/eclipseWorkSpace/test/src$

源代码

[cpp] view plain copy

#include

#define MAXWORD 100

struct tnode *addtree(struct tnode *, char *);

void treeprint(struct tnode *);

int getword(char *, int);

struct tnode {

char *word;

int count;

struct tnode *left;

struct tnode *right;

};

int main()

{

struct tnode *root;

char word[MAXWORD];

root = NULL;

while (getword(word, MAXWORD) != EOF)//读入一个单词

if (isalpha(word[0]))

root = addtree(root, word);//将单词加入到树中

treeprint(root);//打印树

return 0;

}

struct tnode *talloc(void);

char *strdup1(char *s);

struct tnode *addtree(struct tnode *p, char *w)

{

//判断是否是新节点

if (p == NULL) {

/* a new word has arrived */

p = talloc();

/* make a new node */

p->word = strdup1(w);//复制w的过程，如单纯的赋值，就会使p->word和w指向了一处。

p->count = 1;

p->left = p->right = NULL;

return p;

}

//不是新节点

int cond;

cond = strcmp(w, p->word);

if (cond== 0){

p->count++;/* repeated word */

}else if (cond < 0){//就到左子树去找

p->left = addtree(p->left, w);

}else{//就到右子树去找

p->right = addtree(p->right, w);

}

return p;

}

/* treeprint: in-order print of tree p */

/**

* 递归打印，采用了中序遍历。

void treeprint(struct tnode *p)

{

if (p != NULL) {

treeprint(p->left);

printf("%4d %sn", p->count, p->word);

treeprint(p->right);

}

/* talloc: make a tnode */

/**

* 为新节点分配内存

struct tnode *talloc(void)

{

return (struct tnode *) malloc(sizeof(struct tnode));

}

/**

* 完成对单词的复制，避免不同指针指向同一个单词，对单词产生干扰

char *strdup1(char *s)

{

char *p;

/* make a duplicate of s */

p = (char *) malloc(strlen(s)+1); /* +1 for ’’ */

if (p != NULL)

strcpy(p, s);

return p;

}

/* getword: get next word or character from input */

/**

* 没有调用高级的库函数使得这段代码略显复杂

* 但是这些基本的方式能够让我更深刻的体会了一些编程的高级技巧

int getword(char *word, int lim)

{

int c, getch(void);

void ungetch(int);

char *w = word;

while (isspace(c = getch()))

;

if (c != EOF)

*w++ = c;

if (!isalpha(c)) {

*w = '';

return c;

}

for ( ; --lim > 0; w++){

if (!isalnum(*w = getch())) {

ungetch(*w);//读的后一个要放回去，因为后一个不是我们要要的，但是我们又已经读了

break;

}

*w = '';

return word[0];

}

#define BUFSIZE 100

char buf[BUFSIZE];

int bufp = 0;

/* buffer for ungetch */

/* next free position in buf */

int getch(void) /* get a (possibly pushed-back) character */

{

return (bufp > 0) ? buf[--bufp] : getchar();

}

void ungetch(int c)

/* push character back on input */

{

if (bufp >= BUFSIZE)

printf("ungetch: too many charactersn");

else

buf[bufp++] = c;

}

二叉树统计单词方法3：

一、

二、

三、

二叉树统计单词方法4：

一、

二、

三、

与“二叉树统计单词方法”相关文章

12 12 分享：

二叉树统计单词方法

与“二叉树统计单词方法”相关文章

相关课程

发表评论

最新文章

热门推荐