|
读取英文单词,按顺序排列(C++学习题目)(1)
读取英文单词,按顺序排列(C++学习题目) 题目: 1. Implement a function that counting Word frequency. It reads an English article from an user-specified txt file(article.txt) and counts their number. Those exclusive words should not be counted. Output the words and counts to 2 files. One(3_1_1out.txt) is in lexicographic order, and the other (3_1_2out.txt) is in descending frequency order. – Exclusive words: – Using map XML:namespace prefix = o ns = "urn:schemas-microsoft-com:office:office" /> 代码: #pragma warning(disable:4786) #include <string> #include <iostream> #include <fstream> #include <functional> #include <algorithm> #include <vector> #include <map> #include <set> using namespace std; typedef map<string, int>::value_type sival_type; vector<string> *retrieve_text(string file_name) { ifstream artcile_file( file_name.c_str(), ios::in ); if (!artcile_file) { cout << "Conn't open " << file_name.c_str() << " !" << endl; exit (1); } vector<string> *lines_of_text = new vector<string>; string textline; while ( getline(artcile_file, textline, '\n')) { //cout << " " << textline << '\n'; lines_of_text->push_back(textline); } return lines_of_text; } void strip_caps( vector<string> *text_file ) { string caps( "ABCDEFGHIJKLMNOPQRSTUVWXYZ" ); vector<string>::iterator iter = text_file->begin(); for ( ; iter != text_file->end(); ++iter ) { string::size_type pos = 0; while ( (pos = (*iter).find_first_of( caps, pos )) != string::npos ) { (*iter)[ pos ] = tolower( (*iter)[pos] ); } } //end of for } vector<string> *separate_words( const vector<string> *text_file ) { string filter("abcdefghijklmnopqrstuvwxy"); //包含独立的单词集合 vector<string> *words = new vector<string>; short line_pos = 0; for ( ; line_pos <text_file->size(); ++line_pos ) { string textline = (*text_file)[line_pos]; // 用来遍历所有的字母 string::size_type pos = 0; // 单词的开始位置 string::size_type prev_pos = textline.find_first_of(filter); // 单词末尾的下一空格位置 string::size_type temp_pos = textline.find_first_of(filter); // 一个小开关,其值为TURE时,prev_pos指向单词开始的位置 bool onoff = false; while ( (pos = textline.find_first_of(filter, pos)) != string::npos ) { if ( onoff ) { prev_pos = temp_pos - 1; // 将onoff值改为false,使单词开始的位置不会改变 onoff = false; } ++pos; if ( (pos - temp_pos) != 1 ) { // 为下一次的赋值做准备 onoff = true; // 将分离出的单词输入words words->push_back( textline.substr( prev_pos, temp_pos - prev_pos )); } temp_pos = pos; } // end of while // 输入最后一个单词,除非这一段没有找到任何字母 if ( prev_pos != string::npos ) { words->push_back( textline.substr( prev_pos, temp_pos - prev_pos )); } } // end of for return words; } map< string, int > *appear_total( const vector<string> *words ) { // 创建单词排除集合 set<string> exclusion_set; ifstream exclusion_file( "pkg95.txt", ios::in ); if (!exclusion_file) { cout << "Conn't open pkg95.txt !" << endl; exit (1); } string textline; while ( getline(exclusion_file, textline, '\n')) { //cout << " " << textline << '\n'; exclusion_set.insert(textline); } map<string, int> *word_map = new map<string, int>; // 开始向word_map中记录数据 vector<string>::const_iterator iter = words->begin(); for ( ; iter != words->end(); ++iter ) { // 如果少于3个字符或在排除集合中存在,则不输入到map中 if ( (*iter).size() < 3 exclusion_set.count( *iter ) ) { continue; } // 如果count()返回0,则单词不存在,加入它 if ( !word_map->count(*iter) ) { word_map->insert( sival_type( (*iter), 1 ) ); } else { //将单词的出现次数加1 (*word_map)[ (*iter) ] += 1; } } //end of for return word_map; } multimap< int, string, greater<int> > * multimap_total( map<string, int> *text_map ) { multimap<int, string, greater<int> > *word_map = new multimap< int, string, greater<int> >; map< string, int >::iterator map_siter = text_map->begin(); for ( ; map_siter != text_map->end(); ++map_siter ) { word_map->insert(make_pair((*map_siter).second, (*map_siter).first)); } { string ofile("3_1_2out.txt"); ofstream outfile( ofile.c_str() ); if (!outfile) { cerr << "error: unable to open output file: " << ofile << endl; } multimap< int, string, greater<int> >::iterator map_siter = word_map->begin(); for ( ; map_siter != word_map->end(); ++map_siter ) { outfile << (*map_siter).second; for ( int n = 0; n < 15 - (*map_siter).second.size(); ++n ) { outfile << ' '; } outfile << "出现 " << (*map_siter).first << "\t次" << endl; } // end of for cout << "程序已将处理结果写入3_1_2out.txt,该文件保存在当前目录" << endl; } return word_map; } void map_output( map<string, int> *text_map ) { string ofile("3_1_1out.txt"); ofstream outfile( ofile.c_str() ); if (!outfile) { cerr << "error: unable to open output file: " << ofile << endl; } map< string, int >::iterator map_siter = text_map->begin(); for ( ; map_siter != text_map->end(); ++map_siter ) { outfile << (*map_siter).first; for ( int n = 0; n < 15 - (*map_siter).first.size(); ++n ) { outfile << ' '; } outfile << "出现 " << (*map_siter).second << "\t次" << endl; } // end of for cout << "程序已将处理结果写入3_1_1out.txt,该文件保存在当前目录" << endl; } int main() { vector<string> *text_file = retrieve_text("article.txt"); strip_caps(text_file); vector<string> *words = separate_words(text_file); map< string, int > *text_map = appear_total(words); map_output( text_map ); multimap_total(text_map); return 0; } 程序执行结果 3_1_1out.txt article 出现 2 次 counted 出现 1 次 counting 出现 1 次 counts 出现 2 次 descending 出现 1 次 english 出现 1 次 exclusive 出现 1 次 file 出现 1 次 files 出现 1 次 frequency 出现 2 次 function 出现 1 次 implement 出现 1 次 lexicographic 出现 1 次 output 出现 1 次 reads 出现 1 次 specified 出现 1 次 txt 出现 4 次 user 出现 1 次 word 出现 1 次 words 出现 2 次 3_1_2out.txt txt 出现 4 次 article 出现 2 次 counts 出现 2 次 frequency 出现 2 次 words 出现 2 次 counted 出现 1 次 counting 出现 1 次 descending 出现 1 次 english 出现 1 次 exclusive 出现 1 次 file 出现 1 次 files 出现 1 次 function 出现 1 次 implement 出现 1 次 lexicographic 出现 1 次 output 出现 1 次 reads 出现 1 次 specified 出现 1 次 user 出现 1 次 word 出现 1 次 附PKG95.TXT 文件内容: different necessary need needed needing newest next no nobody non none not nothing now nowhere of off often new old older oldest on once one only open again among already about above against alone after also although along always an across and another ask asking asks backed away should show came all almost before began back backing be became because becomes been at behind being best better between big showed ended ending both but by asked backs can cannot number numbers case few find finds cases clearly her herself come could did here beings fact far felt become first for four from full fully furthers gave general generally get gets gives facts go going good goods certain certainly clear great greater greatest group grouped grouping groups got has have having he further furthered had furthering itself faces highest him himself his how however if important interests into is it its anyone anything anywhere are area areas around as seconds see seem seemed seeming seems sees right several shall she enough even evenly over part parted parting parts per down place places point pointed pointing points possible present presented presenting ends high mrs mUCh must my myself presents down problem problems put puts quite will with within rather really room rooms said same right showing shows side sides since small smaller smallest so some somebody someone something somewhere state states such sure take taken than that the their then there therefore these thought thoughts three through thus to today together too took toward turn turned turning turns two still under until up others upon us use used uses very want wanted wanting wants was way we well wells went were what when where whether which while who whole year years yet you everyone everything everywhere young younger youngest your yours ever works every everybody face other our out just interesting high might keep keeps give given higher kind knew know known knows large largely last later latest least less needs never newer let lets like likely long high longer longest made make making man many may me member members men more in interest interested most mostly mr opened opening new opens or perhaps order ordered ordering orders differ differently do does done downed downing downs they thing things think thinks this those ways why without work worked working would during each early either end though still whose saw say says them second any anybody
|