/* -get a word or a collocation of words (in quotes) from command line or CGI -search for the word in the WordNet -print all info about the word form to stdout as HTML */ #define WNHOME "WNHOME=H:\\WN16" #undef PAGES_STATIC /* if defined static urls are generated */ #include #include #include #include "wn.h" #include "../../CGIHTML/cgi-lib.h" #include "../../CGIHTML/html-lib.h" char** CommandLineArguments; struct { char* description; char* separator; } pointers_data[] = { /* ptr_type Value Pointer Symbol Search ANTPTR 1 ! Antonyms HYPERPTR 2 @ Hypernyms HYPOPTR 3 ~ Hyponyms ENTAILPTR 4 * Entailment SIMPTR 5 & Similar ISMEMBERPTR 6 #m Member meronym ISSTUFFPTR 7 #s Substance meronym ISPARTPTR 8 #p Part meronym HASMEMBERPTR 9 %m Member holonym HASSTUFFPTR 10 %s Substance holonym HASPARTPTR 11 %p Part holonym MERONYM 12 % All meronyms HOLONYM 13 # All holonyms CAUSETO 14 > Cause PPLPTR 15 < Participle of verb SEEALSOPTR 16 ^ Also see PERTPTR 17 \ Pertains to noun or derived from adjective ATTRIBUTE 18 = Attribute VERBGROUP 19 $ Verb group Extra: *SYNS 20 n/a Find synonyms *FREQ 21 n/a Polysemy FRAMES 22 n/a Verb example sentences and generic frames COORDS 23 n/a Noun coordinates *RELATIVES 24 n/a Group related senses *HMERONYM 25 n/a Hierarchical meronym search *HHOLONYM 26 n/a Hierarchical holonym search *WNESCORT 27 n/a Not used *WNGREP 28 n/a Find keywords by substring *OVERVIEW 29 n/a Show all synsets for word * not implemented here. Pointers used for different parts of speech in the database: Noun Verb Adjective Adverb ----------- ----------- ----------- ----------- Antonym ! Antonym ! Antonym ! Antonym ! Hyponym ~ Troponym ~ Similar & Derived from \ Hypernym @ Hypernym @ Pertains \ Meronym # Entails * Also see ^ Holonym % Cause > Attribute = Attrinute = Also see ^ */ {"", ""}, /* 0 */ {"%s is opposite to:", "; "}, /* 1 */ {"%s is a type of:", " => "}, /* 2 */ {"Types of %s include:", "; "}, /* 3 */ {"%s entails:", "; "}, /* 4 */ {"%s is similar to: ", "; "}, /* 5 */ {"%s is a member of:", "; "}, /* 6 */ {"Made from %s:", "; "}, /* 7 */ {"%s is a part of:", "; "}, /* 8 */ {"%s includes members:", "; "}, /* 9 */ {"%s contains substances:", "; "}, /* 10 */ {"%s has parts:", "; "}, /* 11 */ {"All meronyms:", "; "}, /* 12 */ {"All holonyms:", "; "}, /* 13 */ {"Cause:", "; "}, /* 14 */ {"Participle of verb: ", "; "}, /* 15 */ {"See also: ", "; "}, /* 16 */ {"%s is derived from:", "; "}, /* 17 */ {"%s is a value of:", "; "}, /* 18 */ {"Verb group:", "; "}, /* 19 */ {"", "; "}, /* 20 */ {"", "; "}, /* 21 */ {"", "; "}, /* 22 */ {"Coordinate terms:", "; "}, /* 23 */ {"", "; "}, /* 24 */ {"", "; "}, /* 25 */ {"", "; "}, /* 26 */ {"", "; "}, /* 27 */ {"", "; "}, /* 28 */ {"", "; "} /* 29 */ }; char* familiar[] = { "","A very rare","A rare", "An uncommon","A common", "A familiar","A very familiar","An extremely familiar" }; void printusage(); int error_message(char *); void find_word(char* word, SynsetPtr* search); void print_results(char* word, SynsetPtr* search); void print_title(SynsetPtr x, int pos); void print_senses(SynsetPtr x); void print_senses_detailed(SynsetPtr x, int pos); char* char2html(char ch); char* str_to_html(char* x); char* word_to_str(char* w); char* word_to_link(char* w); char* word_to_html(char* x); char* defn_to_html(char* x); char* pos_short(int pos); void print_synset_html_linked(SynsetPtr next, int whichword); void print_alternative_forms(char* word, char* prefix); int get_sense_count(SynsetPtr start); void print_pointers(SynsetPtr start); void print_pointers_type(SynsetPtr start, int type, char* prefix); void print_pointers_type_tree(SynsetPtr start, int type, char* prefix); void print_pointer_in_cell(SynsetPtr start, int type, int nest_level, int with_row); void print_coords(SynsetPtr start); void print_verb_frames(SynsetPtr syn); void print_examples(SynsetPtr synptr); char* get_example(char *offset); char* get_synset_html_linked(SynsetPtr next, int pars); int main(int argc,char *argv[]) { llist entries = {NULL}; /* clear list of CGI parameters */ SynsetPtr search[NUMPARTS]; /* search results */ int i; char* word; char* isCGI; CommandLineArguments = argv; /* path command line options to cgi library */ isCGI = getenv("REQUEST_METHOD"); if(!isCGI) { /* Check is there a word as argument */ if(argc < 2) { printusage(); return(1); } if(argc > 2) { display_message("Error: enter a single word or a collocation of words in quotes\n"); return(1); } word = argv[1]; } else { /* export a path for WordNet!!! When called in CGI mode the path is unknown! */ _putenv(WNHOME); /* print HTML header for server */ html_header(); /* read parameters */ read_cgi_input(&entries); word = cgi_val(entries, "q"); if(!word) { /* print start page */ html_begin("WordNet Search"); h1("Search word:"); printf("
\n"); printf("\n"); printf("\n"); printf("
\n"); goto main_exit; } } html_begin(word); /* Initialize the error function pointer */ display_message = error_message; /* Open database */ if(wninit()) { display_message("Error: cannot open WordNet database\n"); goto main_exit; } /* Search the word */ find_word(word, search); /* Print results */ print_results(word, search); /* Release memory */ for(i = 0; i < NUMPARTS; i++) if(search[i]) free_syns(search[i]); main_exit: html_end(); if(isCGI) list_clear(&entries); return(0); } void find_word(char* word, SynsetPtr* search) { int i; char buf[128]; /* clear results*/ for(i = 0; i < NUMPARTS; i++) search[i] = NULL; /* copy word to buffer*/ strncpy(buf, word, 128); word[127] = '\0'; /* make lower case and replace spaces with underscores */ strtolower(strsubst(buf, ' ', '_')); /* get all possible searches for this exact word: 0 - no match 1 - there is a match */ for(i = 0; i < NUMPARTS; i++) { if(is_defined(buf, i + 1)) { search[i] = (struct ss*)1; } } /* perform search for each part of speech */ for(i = 0; i < NUMPARTS; i++) { if(search[i]) { search[i] = findtheinfo_ds(buf, i + 1, OVERVIEW, ALLSENSES); } } } void print_results(char* word, SynsetPtr* search) { int i; int s; /* start html body */ printf("

%s

\n", word); /* check if there are matches */ for(i = 0, s = 0; i < NUMPARTS; i++) if(search[i]) s++; if(s == 0) { printf("

Word not found\n"); } /* print overview data for each part of speech */ for(i = 0; i < NUMPARTS; i++) { if(search[i]) { print_title(search[i], i+1); print_senses(search[i]); } } /* print 'See: alternative spelling forms '*/ if(s) print_alternative_forms(word, "

See also: "); else print_alternative_forms(word, "

See: "); /* print detailed data for each part of speech */ printf("\n\n


\n\n"); for(i = 0; i < NUMPARTS; i++) { if(search[i]) { print_senses_detailed(search[i], i+1); } } } void print_title(SynsetPtr x, int pos) { int i; SynsetPtr next = x; int f; #ifdef PAGES_STATIC char* pos_str[] = { "", "noun", "verb", "adjective", "adverb" }; #else char* pos_str[] = { "", "noun", "verb", "adjective", "adverb" }; #endif /* get number of senses */ i = 1; while(next = next->nextss) { i++; } /* translate number of senses to familiarity index */ f = 7; if(i == 0) f = 0; else if(i == 1) f = 1; else if(i == 2) f = 2; else if(i >= 3 && i <= 4) f = 3; else if(i >= 5 && i <= 8) f = 4; else if(i >= 9 && i <= 16) f = 5; else if(i >= 17 && i <= 32) f = 6; /* print title */ printf("%s %s which %s:\n", familiar[f], pos_str[pos], (i == 1)?"means":"may mean"); } void print_senses(SynsetPtr x) { int i; int j; SynsetPtr next; /* start ordered list */ printf("
    \n"); /* print each sense */ for(i = 1, next = x; next; next = next->nextss, i++) { /* print item number */ printf("
  1. \n\t", i, next->pos); /* print synset */ /* first is our word */ printf("%s%s", word_to_html(next->words[next->whichword-1]), next->wcount == 1 ? "" : ": "); for(j = 0; j < next->wcount; j++) { /* skip our word */ if((j+1) == next->whichword) continue; if(j != 0 && !(j == 1 && next->whichword == 1)) { printf(", "); } /* print next word from the synset */ printf("%s", word_to_html(next->words[j])); } /* if word count is 1 print also the gloss */ if(next->wcount == 1) { printf(" %s", str_to_html(next->defn)); } printf("."); /* close item */ printf("\n"); } /* end ordered list */ printf("
\n"); } char* word_to_str(char* w) { static char buf[128]; /* copy raw word to buffer */ strncpy(buf, w, 128); buf[127] = '\0'; /* remove underscores */ strsubst(buf, '_', ' '); /* remove trailing adjective marker - (...) */ strsubst(buf, '(', '\0'); return(buf); } char* word_to_link(char* w) { static char buf[256]; char* x; #ifdef PAGES_STATIC int s; int i; int j; #endif /* get cleaned word to x */ x = word_to_str(w); /* generate link */ #ifdef PAGES_STATIC s = sprintf(buf, "%s", x, x); #else sprintf(buf, "%s", x, x); #endif return(buf); } char* word_to_html(char* x) { return(str_to_html(word_to_str(x))); } char* char2html(char ch) { static char* ch_html=" "; switch(ch) { case '<': return("<"); case '>': return(">"); case '&': return("&"); case '\"': return("""); default: ch_html[0] = ch; return(ch_html); } } char* str_to_html(char* x) { static char buf[1024]; int i; int s = 0; for(i = 0; x[i]; i++) { s += sprintf(buf + s, "%s", char2html(x[i])); if(s > 1000) break; } return(buf); } void printusage() { printf("\nSearch the WordNet database and print results in HTML.\n\n"); printf("Usage:\twnsearch.exe search_word\n"); printf("\twnsearch.exe \"a collocation of words\"\n"); } int error_message(char *msg) { printf(str_to_html(msg)); return(0); } /* print detailed information about each sense */ void print_senses_detailed(SynsetPtr x, int pos) { int i; SynsetPtr next; /* start defininition list */ printf("
\n"); /* print each sense */ for(i = 1, next = x; next; next = next->nextss, i++) { /* start definition term */ printf("
"); /* our word with anchor for this sense */ printf("%d. %s %s%s", i, pos_short(pos), i, next->pos, word_to_str(next->words[next->whichword-1]), next->wcount > 1 ? ": " : "" ); /* all other words in this synset */ print_synset_html_linked(next, next->whichword); printf(". "); /* print gloss */ printf("
%s\n", defn_to_html(next->defn)); /* print more detailed info */ print_pointers(next); if(getsstype(next->pos) == NOUN) { print_coords(next); } if(getsstype(next->pos) == VERB) { print_verb_frames(next); } } /* end defininition list */ printf("
\n"); } /* make the definition look better and convert it to html */ char* defn_to_html(char* x) { char* h; int i; /* convert string to html */ h = str_to_html(x); /* strip of starting '(' and ending ')' */ for(i = 0; h[i]; i++) if(h[i] == '(') { h[i] = ' '; break; } for(i = strlen(h) - 1; i >= 0; i--) if(h[i] == ')') { h[i] = '.'; break; } /* uppercase the first letter and skip () in the beginning */ for(i = 0; h[i]; i++) if(isalpha(h[i])) { if(h[i] >= 'a' && h[i] <= 'z') h[i] = h[i] - 32; break; } else if(h[i] == '(') { while(h[i++] && h[i] != ')'); /* skip (...) */ if(!h[i]) break; /* quit if end of string */ } return(h); } char* pos_short(int pos) { /* convert POS code to a short decriptive string */ if(pos == 1) return "n."; else if(pos == 2) return "v."; else if(pos == 3) return "adj."; else if(pos == 4) return "adv."; return ""; } void print_synset_html_linked(SynsetPtr next, int whichword) { int j; /* print a string with a list of words in the synset linked to their pages */ for(j = 0; j < next->wcount; j++) { /* skip our word */ if((j+1) == whichword) continue; if(j != 0 && !(j == 1 && whichword == 1)) { printf(", "); } /* print next word from the synset */ printf("%s", word_to_link(next->words[j])); } } void print_pointers(SynsetPtr start) { char buf[256]; /*char buf2[256];*/ int i; /* search for every possible type of pointer */ for(i = 1; i <= MAXPTR; i++) { /* print header for that pointer type */ sprintf(buf, pointers_data[i].description, word_to_html(start->words[start->whichword-1])); if(i == PERTPTR && getsstype(start->pos) == ADJ) { /* resolve double meaning for this pointer */ sprintf(buf, "%s pertains to:", word_to_html(start->words[start->whichword-1])); } /* upper case the first letter of header */ if(buf[0] >= 'a' && buf[0] <= 'z') buf[0] -= 32; /* print list of pointers */ if(i == HYPERPTR) { print_pointers_type_tree(start, i, buf); } else { print_pointers_type(start, i, buf); } } } void print_pointers_type(SynsetPtr start, int type, char* prefix) { SynsetPtr next; int j; int s; int pos; /* are there any pointers of that type */ for(j = 0, s = 0; j < start->ptrcount; j++) if(start->ptrtyp[j] == type) s++; if(!s) return; /* start definition list and print prefix */ printf("\n\t
%s
\n\t", prefix); /* print all pointers separated by a separator*/ for(j = 0, s = 0; j < start->ptrcount; j++) { if(start->ptrtyp[j] == type) { if(s) printf("%s", pointers_data[type].separator); s++; /* what database */ pos = start->ppos[j]; next = read_synset(pos, start->ptroff[j], ""); if(next) { print_synset_html_linked(next, 0); /* do not exlude our word */ free_synset(next); } } } printf("\n\t
\n"); } void print_pointers_type_tree(SynsetPtr start, int type, char* prefix) { int j; int s; /* are there any pointers of that type */ for(j = 0, s = 0; j < start->ptrcount; j++) if(start->ptrtyp[j] == type) s++; if(!s) return; /* start definition list, and start a table */ printf("\n\t
%s
", prefix); printf(""); /* print all pointers as cells */ print_pointer_in_cell(start, type, 0, 1); /* end a table and a list */ printf("\n\t
\n"); } void print_pointer_in_cell(SynsetPtr start, int type, int nest_level, int with_row) { int pos; int j; SynsetPtr next; int s = 0; /* start a new row if necessary */ for(j = 0; j < start->ptrcount; j++) { if(start->ptrtyp[j] == type) { /* what database */ pos = start->ppos[j]; /* read and print synset */ next = read_synset(pos, start->ptroff[j], ""); if(next) { if(with_row || s) { /* end a row */ if(s) printf(""); /* start a row */ printf("\n\t"); /* make an indentation */ if(nest_level > 0) printf("", nest_level * 2); } /* start a cell */ printf("\n\t»"); print_synset_html_linked(next, 0); /* do not exclude a word */ /* end a cell */ printf("\n\t"); /* print recursively all other cells in a row */ if(nest_level < 50) /* protect against infinite loops */ { print_pointer_in_cell(next, type, nest_level+1, 0); } free_synset(next); s++; /* increment branches number */ } } } /* end a row */ if(with_row) printf(""); } void print_alternative_forms(char* word, char* prefix) { int s = 0; int i; char* morphword; /* print all alternative forms, and start the printout from the prefix string if found any */ for(i = 1; i <= NUMPARTS; i++) { morphword = morphstr(word, i); while(morphword) { if(is_defined(morphword, i)) { if(!s) { printf("%s", prefix); } else { printf(", "); } s = 1; printf("%s %s", pos_short(i), word_to_link(morphword)); } /* try next word */ morphword = morphstr(NULL, i); } } if(s) printf(". "); } int get_sense_count(SynsetPtr start) { int i = 1; /* how many senses for the word */ while(start = start->nextss) i++; return i; } void print_coords(SynsetPtr start) { SynsetPtr next; int j; int i; int s; char buf[256]; /* works only for nouns */ if(*start->ppos != NOUN) return; /* are there any coordinate terms except this one */ for(j = 0, s = -1; j < start->ptrcount; j++) if(start->ptrtyp[j] == HYPERPTR) { next = read_synset(NOUN, start->ptroff[j], ""); if(!next) continue; for(i = 0; i < next->ptrcount; i++) if(next->ptrtyp[i] == HYPOPTR) s++; free_synset(next); if(s > 0) break; /* break if at least two hyponyms were found */ } if(!s) return; /* not enough coords */ /* count hypernyms */ for(j = 0, s = 0; j < start->ptrcount; j++) if(start->ptrtyp[j] == HYPERPTR) s++; /* start definition list and print prefix */ sprintf(buf, pointers_data[COORDS].description, word_to_html(start->words[start->whichword-1])); /* upper case the first letter of header */ if(buf[0] >= 'a' && buf[0] <= 'z') buf[0] -= 32; if(s > 1) { printf("\n\t
"); printf("%s", buf); printf("
\n\t"); buf[0] = '\0'; } /* get all immediate hypernyms and print all their hyponyms */ for(j = 0; j < start->ptrcount; j++) { if(start->ptrtyp[j] == HYPERPTR) { if(s > 1) printf("\n\t» "); /* read synset */ next = read_synset(NOUN, start->ptroff[j], ""); if(next) { /* print hypernyms if more than one */ if(s > 1) { print_synset_html_linked(next, 0); /* do not exclude a word */ printf(":"); } /* print its hyponyms */ print_pointers_type(next, HYPOPTR, buf); /* release memory */ free_synset(next); } } } if(s > 1) printf("\n\t
\n"); } void print_verb_frames(SynsetPtr syn) { int i; char* x; char* frame; int p; /* are there examples */ print_examples(syn); /* are there verb frames? */ if(syn->fcount == 0) return; /* start definition list */ printf("\n\t
Verb frame%s:
\n\t", syn->fcount > 1 ? "s" : ""); /* print all frames */ for(i = 0; i < syn->fcount; i++) { if(syn->frmto[i] == syn->whichword) { /* applies only to the selected verb */ x = word_to_link(syn->words[syn->whichword - 1]); } else { /* applies to entire synset */ x = get_synset_html_linked(syn, syn->wcount != 1); /* enclose in (..) if many words*/ } if(i) printf("
\n"); /* print frame and substitute "----" with a verb or synset */ frame = frametext[syn->frmid[i]]; for(p = 0; frame[p]; p++) { if(frame[p] == '-' && frame[p + 1] == '-') { /* "--" detected. print x and advance 3 characters ahead to skip all "----" */ printf(x); p += 3; } else { printf("%.1s", frame + p); } } } /* end definition list */ printf("\n\t
\n"); } void print_examples(SynsetPtr synptr) { char tbuf[256]; char* temp; char* offset; int wdnum; int found = 0; char* ex; int s = 0; if(vidxfilefp != NULL) { wdnum = synptr->whichword - 1; sprintf(tbuf,"%s%%%-1.1d:%-2.2d:%-2.2d::", synptr->words[wdnum], getpos(synptr->pos), synptr->fnum, synptr->lexid[wdnum]); if((temp = bin_search(tbuf, vidxfilefp)) != NULL) { /* skip over sense key and get sentence numbers */ temp += strlen(synptr->words[wdnum]) + 11; strcpy(tbuf, temp); offset = strtok(tbuf, " ,\n"); while(offset) { ex = get_example(offset); offset = strtok(NULL, ",\n"); if(ex) { if(!s) printf("\n\t
Example%s:
\n\t", offset ? "s" : ""); s++; printf(""); printf(ex, synptr->words[wdnum]); printf("
"); } } found = 1; } } if(s) printf("\n\t
\n"); } char* get_example(char *offset) { char* line = NULL; if(vsentfilefp != NULL) if(line = bin_search(offset, vsentfilefp)) while(*line != ' ') line++; return(line); } char* get_synset_html_linked(SynsetPtr next, int pars) { static char buf[1024]; int j; int s = 0; if(pars) s += sprintf(buf + s, "("); /* print a string with a list of words in the synset linked to their pages */ for(j = 0; j < next->wcount; j++) { if(j != 0) s += sprintf(buf + s, ", "); /* print next word from the synset */ s += sprintf(buf + s, "%s", word_to_link(next->words[j])); } if(pars) s += sprintf(buf + s, ")"); return buf; }