Задача на файлы. Частотный словарь

@T_R_M · Регистрация: 13.03.2016

Author24 — интернет-сервис помощи студентам

Очень нужна помощь, уже просто не соображаю.
условие:
Напишите программу, которая составляет частотный словарь заданного
текста, пропуская при этом символы - разделители. Словом будем считать
непрерывную последовательность символов латинского алфавита. Слова,
различающиеся только регистром символов, считаются одинаковыми.
Элемент частотного словаря содержит слово и количество повторений этого
слова, разделенные одним пробелом. Слова, встречающиеся менее, чем K
раз, в словарь не включаются (параметр K вводится с консоли). Частотный
словарь должен быть упорядочен по убыванию количество повторений
слова. Исходный текст программа должна читать из файла input.txt, а
сформированный словарь – помещать в файл output.txt.

Работает всё кроме упорядочивания по убыванию, то есть сортировки. Не могу понять где и какая именно проблема!

C++

#include <stdlib.h>
#include <iostream>
#include <string>
#include <stdio.h>
#include <algorithm>
using namespace std;
struct dictionary
    {
        int num;
        string word;
        dictionary(){num=1;}
    };
struct In{};
struct Out{};
struct Empty{};
struct Negativ{};
struct Div{};
struct No{};
int Sort(const void* one,const void* two)
{
    if (((dictionary*)(one))->num>(((dictionary*)(two))->num))
        return 1;
    if (((dictionary*)(one))->num==(((dictionary*)(two))->num))
        return 0;
    if (((dictionary*)(one))->num<(((dictionary*)(two))->num))
        return -1;
}
int main()
{
    setlocale (LC_ALL, ".1251");
    string s,l,x;
    const int nmax=301;
    const char *probel=" ";
    int n=50, K=0,count=0;
    char text[nmax-1];
    FILE *in=fopen("input.txt","rt");
    FILE *out=fopen("output.txt","w+");
    
    fgets(text,n,in);
    
    try
    {
        if (in==NULL) 
            throw In();
        if (feof(in))
            throw Empty();
        int i=0;
        do
        {
            if(text[strlen(text)-1]=='\n')
            {
                strcat(text,probel);
            }
            fgets(text,n,in);
        }       
        while(!feof(in)); 
        fseek(in,NULL,SEEK_SET);
 
            fclose(in);
            cout<<"Ведите минимальное количество раз: ";
            cin>>K;
            if (K<0)
                throw Negativ();
            for(i=1;i<257;i++)
                if ((i<65)||(i>90 && i<97)||(i>122 && i<256))
                    l+=(char)(i);
 
        fgets(text,n,in);
         while(!feof(in))
             {
 
             for(int i = 0; i < strlen(text); i++)
                text[i] = tolower(text[i]);
 
            fgets(text,n,in);
            }       
         fseek(in,NULL,SEEK_SET);
            fgets(text,n,in);
            string texts;
        int p(0),p2(0),z(0);
         while(!feof(in))
         {
             texts=text;
                while((p=texts.find_first_not_of(l,p2))!=-1)
                {
                    p2=texts.find_first_of(l,p);
                    x=texts.substr(p, p2-p);
                    z++;
                }
                p2=0;
            
                if (z==0)
                {
                    throw Div();
            
                }
                fgets(text,n,in);
            }
        dictionary *D=new dictionary[z];
        int j=0;
 
 
        fseek(in,NULL,SEEK_SET);
        fgets(text,n,in);
        while(!feof(in))
        {
            texts=text;
            //{     
                while((p=texts.find_first_not_of(l,p2))!=-1)
                {
                    p2=texts.find_first_of(l,p);
                    D[j++].word=texts.substr(p, p2-p);
                }
                p2=0;
                fgets(text,n,in);
            
        }
        
        for(int k=0;k<j;k++)
            for(i=k+1;i<j;i++)
                if(( D[k].word==D[i].word)&&(D[k].num!=-1))
                    {
                        D[k].num++;
                        D[i].num=-1;
                    }
        qsort(D,j, sizeof (dictionary),Sort);
        int v=0;    
        for(i=0;i<j;i++)
            {
                if ((D[i].num!=-1)&&(D[i].num>=K))
                {   
                    cout<<D[i].word<<" "<<D[i].num<<endl;
                    fputs(D[i].word.c_str(),out);
                    fprintf(out," %d\n",D[i].num);
                    v++;
                }
            }
        if (out==NULL) 
            throw Out();
        if (v==0) 
        {
            throw No(); 
            delete []D;
        }
        delete []D;
}
    catch (In)
    {
        cout<<"Входной файл не открывается!"<<endl;
    }
    catch (Out)
    {
        cout<<"Выходной файл не открывается!"<<endl;
    }
    catch (Empty)
    {
        cout<<"Входной файл пуст!"<<endl;
    }
    catch (Negativ)
    {
        cout<<"Введите неотрицательное число повторений!"<<endl;
    }
    catch (Div)
    {
        cout<<"В файле одни разделители!"<<endl;
    }
    catch (No)
    {
        cout<<"В файле нет слов, которые повторяются не меньше "<<K<<" раз."<<endl;
    }
    return 0;
}

@Fixer_84 · 02.12.2016, 00:04

T_R_M, все, что я могу сделать - это показать вам мой частотный словарь. Он прекрасно работает и может быть с легкостью переделан под ваши запросы. Напишите, если вам интересно.

Добавлено через 39 минут
Вот словарь. Может кому-то еще будет интересно...

C++

#include <iostream>
#include <cmath>
#include <string>
#include <fstream>
 
using namespace std;
 
int main()
{
setlocale(LC_ALL, "Russian");
ifstream fin("input.txt");
ofstream fout("output.txt");
int k, t, d, p = 0;
struct point
{
string x;
} res[450000]; 
p = 0;
while(!fin.eof()) 
{
string a, b;
getline(fin, a);
a = a + ' ';
for (int i = 0; i < a.length(); i++)
{
b += a[i]; 
if (!(isalpha(a[i]))) 
{
b.pop_back();
if (b != "")
{
res[p].x = b;
p++;
}
b = "";
}
}
}
//Сортировка массива
for (int i = 0; i < p; i++) 
{                          
for (int j = p - 1; j > i; j--) 
{                               
if (res[j].x <= res[j-1].x)    
{
swap(res[j-1].x, res[j].x); 
}
}
}
//Конец сортировки
fout << "PROCESSED WORDS:" << endl << endl;
k = 0;
t = 0;
d = 0;
for (int i = t; i < p; i++)
{
if (res[i].x == res[i+1].x) 
{
k++;
}
else 
{
t = k;
k = 0;
if (t >= 0) 
{
fout << res[i].x << ": " << t + 1 << endl; //Количество повторяющихся слов
d++;
}
}
}
fout << endl;
fout << "QUANTITY OF WORDS: " << d << endl; //Число обработанных слов
fin.close();
fout.close();
system("pause"); 
return 0;   
}

@T_R_M · 02.12.2016, 00:06 **[ТС]**

Fixer_84, извините за такой вопрос, но я правильно понимаю, что здесь всё через потоки записано?

@Renji · 02.12.2016, 00:33

Дальше сами.

C++

#include<iostream>
#include<string>
#include<fstream>
#include<map>
 
int main()
{
    std::map<std::string,int>dict;
    for(std::ifstream stream("input.txt");stream;)
    {
        std::string word;
        while(stream && !isalpha(stream.peek()))
            stream.get();
        while(stream && isalpha(stream.peek()))
            word.push_back(tolower(stream.get()));
        if(!word.empty())
            ++dict[word];
    }
 
    std::multimap<int,std::string,std::greater<int>>result;
    for(auto&pair:dict)
        result.emplace(pair.second,pair.first);
    for(auto&pair:result)
        std::cout<<pair.first<<" "<<pair.second<<std::endl;
    return 0;
}

PS Нужен C++11. Под более старый стандарт перепиливайте сами.

@Mr.X · 02.12.2016, 00:57

T_R_M, что-то вы сишню написали какую-то!
Ежели на С++, то так можно:

C++

//Напишите программу, которая составляет частотный словарь заданного
//текста, пропуская при этом символы - разделители. Словом будем считать
//непрерывную последовательность символов латинского алфавита. Слова,
//различающиеся только регистром символов, считаются одинаковыми.
//Элемент частотного словаря содержит слово и количество повторений этого
//слова, разделенные одним пробелом. Слова, встречающиеся менее, чем K
//раз, в словарь не включаются (параметр K вводится с консоли). Частотный
//словарь должен быть упорядочен по убыванию количество повторений
//слова. Исходный текст программа должна читать из файла input.txt, а
//сформированный словарь – помещать в файл output.txt.
///////////////////////////////////////////////////////////////////////////////
#include <cctype>
#include <fstream>
#include <iostream>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <utility>
///////////////////////////////////////////////////////////////////////////////
const   char    SPACE_SYMB  {' '};
///////////////////////////////////////////////////////////////////////////////
typedef std::string                             T_str;
typedef std::map        < T_str,    int     >   T_count_of_word;
typedef std::pair       < T_str,    int     >   T_word_and_count;
///////////////////////////////////////////////////////////////////////////////
struct  T_word_and_count_compare
{
    //-------------------------------------------------------------------------
    bool    operator()
        (
            T_word_and_count    const   &   L,
            T_word_and_count    const   &   R
        )
    {
        return      std::make_pair  ( R.second,     L.first )
                <   std::make_pair  ( L.second,     R.first );
    }
    //-------------------------------------------------------------------------
};
///////////////////////////////////////////////////////////////////////////////
typedef std::set
    <
        T_word_and_count,
        T_word_and_count_compare
    >
    T_word_and_count_set;
///////////////////////////////////////////////////////////////////////////////
void    count_words_frequency
    (
        int                 min_count,
        std::istream    &   istr,
        std::ostream    &   ostr
    )
{
    T_count_of_word     count_of_word;
    T_str               line_cur;
 
    while   (
                getline     (
                                istr,
                                line_cur
                            )
            )
    {
        for( auto   &   symb    :   line_cur )
        {
            symb    =   std::isalpha( symb )
                            ?   std::tolower( symb )
                            :   SPACE_SYMB;
        }//for
 
        std::istringstream  ssin( line_cur );
 
        T_str   word_cur;
 
        while( ssin >> word_cur )
        {
            ++count_of_word[ word_cur ];
        }
    }//while
 
    T_word_and_count_set    word_and_count_set;
 
    for( auto   const   &   word_and_count  :   count_of_word )
    {
        word_and_count_set.emplace( word_and_count );
    }
 
    for( auto   const   &   word_and_count  :   word_and_count_set )
    {
        if  (
                    word_and_count.second
                >=  min_count
            )
        {
            ostr    <<  word_and_count.first
                    <<  SPACE_SYMB
                    <<  word_and_count.second
                    <<  std::endl;
        }//if
    }//for
}
///////////////////////////////////////////////////////////////////////////////
int     main()
{
    const   T_str   IFILE_NAME  { "input.txt"   };
    const   T_str   OFILE_NAME  { "output.txt"  };
 
    std::ifstream   ifile       ( IFILE_NAME    );
 
    if( !ifile )
    {
        std::cout   <<  "bad ifile"
                    <<  std::endl;
    }//if
 
    std::ofstream   ofile   ( OFILE_NAME );
    int     min_count{};
    std::cout   <<  "min count = ";
    std::cin    >>  min_count;
 
    count_words_frequency   (
                                min_count,
                                ifile,
                                ofile
                            );
 
    std::cout   <<  "finish"
                <<  std::endl;
}

@T_R_M 0 / 0 / 0 Регистрация: 13.03.2016 Сообщений: 23
	02.12.2016, 00:06 [ТС]	3
	Fixer_84, извините за такой вопрос, но я правильно понимаю, что здесь всё через потоки записано? 0

	02.12.2016, 00:57