vectorstring,allocator::iterator iter_end = words-end();
while ( iter != iter_end )
{
string::size_type pos = 0;
while ((pos = (*iter).find_first_of(filt_elems, pos))
!= string::npos )
(*iter).erase(pos,l);
++iter;
}
}
void
TextQuery::
suffix_text()
{
vectorstring,allocator *words = text_locations-first;
vectorstring,allocator::iterator iter = words-begin();
vectorstring,allocator::iterator iter_end = words-end() ;
while ( iter != iter_end ) {
if ( (*iter).size() = 3 )
{ iter++; continue; }
if ( (*iter)[ (*iter).size()-l ] == 's' )
suffix_s( *iter );
// дополнительная обработка суффиксов...
iter++;
}
}
void
TextQuery::
suffix_s( string word )
{
string::size_type spos = 0;
string::size_type pos3 = word.size()-3;
// "ous", "ss", "is", "ius"
string suffixes( "oussisius" );
if ( ! word.compare( pos3, 3, suffixes, spos, 3 ) ||
! word.compare( pos3, 3, suffixes, spos+6, 3) ||
! word.compare( pos3+l, 2, suffixes, spos+2, 2 ) ||
! word.compare( pos3+l, 2, suffixes, spos+4, 2 ))
return;
string ies( "ies" );
if ( ! word.compare( pos3, 3, ies ))
{
word.replace( pos3, 3, 1, 'у' );
return;
}
string ses( "ses" );
if ( ! word.compare( pos3, 3, ses ))
{
word.erase( pos3+l, 2 );
return;
}
// удалим 's' в конце
word.erase( pos3+2 );
// удалим "'s"
if ( word[ pos3+l ] == '\'' )
word.erase( pos3+l );
}
void
TextQuery::
strip_caps()
{
vectorstring,allocator *words = text_locations-first;
vectorstring,allocator::iterator iter = words-begin();
vectorstring,allocator::iterator iter_end = words-end();
string caps( "ABCDEFGHI3KLMNOPQRSTUVWXYZ" );
while ( iter != iter_end ) {
string::size_type pos = 0;
while (( pos = (*iter).find_first_of( caps, pos ))
!= string::npos )
(*iter)[ pos ] = to1ower( (*iter)[pos] );
++iter;
}
}
void
TextQuery::
build_word_map()
{
word_map = new mapstring,loc*,lessstring,allocator;
typedef mapstring,loc*,lessstring,allocator::value_type
value_type;
typedef setstring,lessstring,allocator::difference_type
diff_type;
setstring,lessstring,allocator exclusion_set;
ifstream infile( "exclusion_set" );
if ( !infile )
{
static string default_excluded_words[25] = {
"the","and","but","that","then","are","been",
"can","can't","cannot","could","did","for",
"had","have","him","his","her","its"."into",
"were","which","when","with","would"
};
cerr "warning! unable to open word exclusion file! -- "
"using default set\n";
copy( default_excluded_words,
default_excluded_words+25,
inserter(exclusion_set, exclusion_set.begin()));
}
else {
istream_iterator string, diff_type
input_set( infile ), eos;
copy( input_set, eos,
inserter( exclusion_set, exclusion_set.begin() ));
}