diff -c v0/Common.cxx v3/Common.cxx
*** v0/Common.cxx	Tue Sep  5 11:22:32 2000
--- v3/Common.cxx	Wed Sep 20 16:44:38 2000
***************
*** 23,26 ****
  
  const int Version  = 3;
  const int Revision = 0;
! 
--- 23,26 ----
  
  const int Version  = 3;
  const int Revision = 0;
! const char *RevComment = ".3";
diff -c v0/Common.h v3/Common.h
*** v0/Common.h	Tue Sep  5 11:22:34 2000
--- v3/Common.h	Wed Sep 20 16:44:45 2000
***************
*** 34,41 ****
  
  extern const int Version;
  extern const int Revision;
  
- 
  inline char look_ahead( istream &is )
  {
    char ch;
--- 34,41 ----
  
  extern const int Version;
  extern const int Revision;
+ extern const char *RevComment;
  
  inline char look_ahead( istream &is )
  {
    char ch;
diff -c v0/GetOptClass.cxx v3/GetOptClass.cxx
*** v0/GetOptClass.cxx	Tue Sep  5 11:22:34 2000
--- v3/GetOptClass.cxx	Wed Sep 20 16:44:38 2000
***************
*** 21,26 ****
--- 21,30 ----
   *	Timbl@kub.nl
   */
  
+ //
+ // 10 april 2000: Fixed a problem with get_N_option
+ // 23-08-200 : free the result of verbosity_to_string
+ 
  #include <fstream>
  #include <string>
  #include <cstdio>
***************
*** 180,186 ****
      }
      if ( TestFileName != "" ){
        cerr << "Multiple testfile name " << TestFileName << " ignored.\n"
! 	   << "since -f leave_one_out is also specified" << endl;
      }
      if ( LocalAlgo != IB1 ) {
        cerr << "option -a ignored, since it conflicts with -t leave_one_out\n"
--- 184,190 ----
      }
      if ( TestFileName != "" ){
        cerr << "Multiple testfile name " << TestFileName << " ignored.\n"
! 	   << "since -t leave_one_out is also specified" << endl;
      }
      if ( LocalAlgo != IB1 ) {
        cerr << "option -a ignored, since it conflicts with -t leave_one_out\n"
***************
*** 337,344 ****
      if ( *Option++ == '-' ){
        while( isspace(*Option) ) Option++;
        if ( *Option++ == 'N' ) {
- 	Result = atoi( Option );
  	MaxFeatsSet = true;
  	break;
        }
      }
--- 341,355 ----
      if ( *Option++ == '-' ){
        while( isspace(*Option) ) Option++;
        if ( *Option++ == 'N' ) {
  	MaxFeatsSet = true;
+ 	Result = 0;
+ 	if ( *Option )
+ 	  Result = atoi( Option );
+ 	else if (opt < Argc )
+ 	  Result = atoi( Argv[opt] );
+ 	if ( Result == 0 ){
+ 	  cerr << "missing a number after N option" << endl;
+ 	}
  	break;
        }
      }
***************
*** 462,474 ****
  
  inline void show_version_info(void)
  {
!   cerr << "TiMBL " << Version << "." << Revision
         << ", compiled on " << __DATE__ << ", " << __TIME__ << endl;
    exit(1);
  }
  
  bool GetOptClass::definitive_options( TimblExperiment *Exp, 
! 					     bool first ){
    char optline[3 * OPT_LENGTH];
    if ( !sparse_bin_set && !to_late_for_bin && 
         (LocalInputFormat != UnknownInputFormat || LocalAlgo != LOO ) ){
--- 473,485 ----
  
  inline void show_version_info(void)
  {
!   cerr << "TiMBL " << Version << "." << Revision << RevComment
         << ", compiled on " << __DATE__ << ", " << __TIME__ << endl;
    exit(1);
  }
  
  bool GetOptClass::definitive_options( TimblExperiment *Exp, 
! 				      bool first ){
    char optline[3 * OPT_LENGTH];
    if ( !sparse_bin_set && !to_late_for_bin && 
         (LocalInputFormat != UnknownInputFormat || LocalAlgo != LOO ) ){
***************
*** 522,529 ****
  	  if ( Exp->SetOption( optline ) ){
  	    sprintf( optline, "PROGRESS: %d", local_progress );
  	    if ( Exp->SetOption( optline ) ){
! 	      sprintf( optline, "VERBOSITY: %s", 
! 		       verbosity_to_string(MyVerbosity) );
  	      if ( Exp->SetOption( optline ) ){
  		int i;
  		for ( i=0; i < MaxFeats+1; i++ ){
--- 533,541 ----
  	  if ( Exp->SetOption( optline ) ){
  	    sprintf( optline, "PROGRESS: %d", local_progress );
  	    if ( Exp->SetOption( optline ) ){
! 	      const char *tmp = verbosity_to_string(MyVerbosity);
! 	      sprintf( optline, "VERBOSITY: %s", tmp );
! 	      free( const_cast<char *>(tmp) );
  	      if ( Exp->SetOption( optline ) ){
  		int i;
  		for ( i=0; i < MaxFeats+1; i++ ){
***************
*** 532,542 ****
  		  if (!Exp->SetOption( optline ) )
  		    return false;
  		}
! 		if ( do_query ){
! 		  Exp->ShowSettings( );
! 		  do_query = false;
  		}
- 		return true;
  	      }
  	    }
  	  }
--- 544,558 ----
  		  if (!Exp->SetOption( optline ) )
  		    return false;
  		}
! 		sprintf( optline, "TREE_ORDER: %s", 
! 			 order_to_string(LocalOrder) );
! 		if ( Exp->SetOption( optline ) ){
! 		  if ( do_query ){
! 		    Exp->ShowSettings( );
! 		    do_query = false;
! 		  }
! 		  return true;
  		}
  	      }
  	    }
  	  }
diff -c v0/IBtree.cxx v3/IBtree.cxx
*** v0/IBtree.cxx	Tue Sep  5 10:51:21 2000
--- v3/IBtree.cxx	Wed Sep 20 16:44:38 2000
***************
*** 19,24 ****
--- 19,30 ----
   *	Timbl@kub.nl
   */
  
+ // bug fixed 23-08-2000 Ko: replaced "magic number" 80 with MAX_BUFFER_SIZE
+ //                          in read_local and read_list
+ //           20-09-2000 Ko: Changed read_local. Shouldn't add targetvalues but
+ //                          look them up! (they are added by read_distribution)
+ //
+ 
  #include <cstdlib>
  #include <cstdio>
  #include <string>
***************
*** 206,212 ****
  {
    IBtree *result = new IBtree;
    tree_size++;
!   char buf[80];
    is >> buf;
    result->FValue = Feats[level]->add_value( buf, NULL );
    is >> buf;
--- 212,218 ----
  {
    IBtree *result = new IBtree;
    tree_size++;
!   char buf[MAX_BUFFER_SIZE];
    is >> buf;
    result->FValue = Feats[level]->add_value( buf, NULL );
    is >> buf;
***************
*** 216,229 ****
      return NULL;
    }
    is >> buf;
!   result->TValue = Targ->add_value( buf );
    if ( look_ahead(is) == '{' ){
      result->TDistribution = new ValueDistribution();
!     result->TDistribution->read_distribution( is, Targ, false );
      //    cerr << "Dist = " << TopDistribution << endl;
      // also we have to update the targetinformation of the featurevalue
      // so we can recalculate the statistics later on.
!     result->FValue->ReconstructDistribution( result->TDistribution );
    }
    if ( look_ahead(is) == '[' ){
      result->link = read_list( is, Feats, Targ, level+1 );
--- 222,237 ----
      return NULL;
    }
    is >> buf;
!   result->TValue = (TargetValue*)Targ->Lookup( buf );
    if ( look_ahead(is) == '{' ){
      result->TDistribution = new ValueDistribution();
!     if ( !result->TDistribution->read_distribution( is, Targ, false ) )
!       Error( "problems reading a distribution from InstanceBase file" );
      //    cerr << "Dist = " << TopDistribution << endl;
      // also we have to update the targetinformation of the featurevalue
      // so we can recalculate the statistics later on.
!     if ( result->FValue->ValFreq() > 0 )
!       result->FValue->ReconstructDistribution( result->TDistribution );
    }
    if ( look_ahead(is) == '[' ){
      result->link = read_list( is, Feats, Targ, level+1 );
***************
*** 256,262 ****
    IBtree *result = NULL, **pnt;
    pnt = &result;
    bool goon = true;
!   char buf[80];
    if ( Version < 3 )
      is >> buf; // skip the opening `[` (old style)
    while ( goon ) {
--- 264,270 ----
    IBtree *result = NULL, **pnt;
    pnt = &result;
    bool goon = true;
!   char buf[MAX_BUFFER_SIZE];
    if ( Version < 3 )
      is >> buf; // skip the opening `[` (old style)
    while ( goon ) {
***************
*** 300,306 ****
      is >> buf;
      // Now read the TopDistribution, to get the Targets
      // in the right order in Targ
!     TopDistribution->read_distribution( is, Targs, true );
      //    cerr << "Top = " << TopDistribution << endl;
      if ( look_ahead( is ) == '[' ){
        InstBase = read_list( is, Feats, Targs, 0 );
--- 308,315 ----
      is >> buf;
      // Now read the TopDistribution, to get the Targets
      // in the right order in Targ
!     if ( !TopDistribution->read_distribution( is, Targs, true ) )
!       Error( "problems reading Top Distribution " );
      //    cerr << "Top = " << TopDistribution << endl;
      if ( look_ahead( is ) == '[' ){
        InstBase = read_list( is, Feats, Targs, 0 );
***************
*** 506,512 ****
      result = false;
    }
    else if ( !InvAss ){
-     //    IBtree *Arr[Depth];
      IBtree **Arr = new IBtree *[Depth+1];
      IBtree *pnt = InstBase;
      int CurI = 0, Feat = 0;
--- 515,520 ----
diff -c v0/Instance.cxx v3/Instance.cxx
*** v0/Instance.cxx	Tue Sep  5 11:22:35 2000
--- v3/Instance.cxx	Wed Sep 20 16:44:39 2000
***************
*** 19,24 ****
--- 19,31 ----
   *	Timbl@kub.nl
   */
  
+ //
+ // bug fixed 27-03-2000 Ko: ChiSquareStatistics: Resizing of n_i_dot was wrong
+ //           03-07-2000 Ko: NumStatistics: Test for ValFreq() >=0 should be
+ //                             a test for ValFreq() > 0.
+ //           20-09-2000 Ko: Repaired problem with Target::add_value, which made
+ //                          TotalValues() useless. Also affected: read_distribution
+ //
  #include <iostream>
  #include <strstream>
  #include <iomanip>
***************
*** 41,48 ****
  #define BIN_SIZE 20
  
  
! ostream& operator<<(ostream& s, const FeatureValue* fv )
! { 
   if ( fv )
      s << fv->TokVal->Name();
    else
--- 48,54 ----
  #define BIN_SIZE 20
  
  
! ostream& operator<<(ostream& s, const FeatureValue* fv ){ 
   if ( fv )
      s << fv->TokVal->Name();
    else
***************
*** 50,57 ****
    return s;
  }
  
! ostream& operator<<(ostream& s, const TargetValue* tv )
! { 
    if ( tv )
      s << tv->TokVal->Name();
    else
--- 56,62 ----
    return s;
  }
  
! ostream& operator<<(ostream& s, const TargetValue* tv ){ 
    if ( tv )
      s << tv->TokVal->Name();
    else
***************
*** 230,257 ****
    }
  }
  
! void ValueDistribution::read_distribution( istream &is, 
  					   Target *Targ,
! 					   bool do_fr )
! {
    // read a distribution from stream is into Target
    // if do_f we also adjust the value of Frequency of the Target, which is
    // otherwise 1. Special case when reading the TopDistribution.
    //
    TargetValue *target;
    char buf[STR_LEN];
    int freq;
    is >> buf;   // skip {
    do {
      is >> buf;
!     target = Targ->add_value( buf );
!     is >> buf;
!     freq = atoi( buf );
!     if ( do_fr )
!       target->ValFreq( freq );
      (*this)[target] = freq;
    } while ( look_ahead(is) != '}' );
    is >> buf;   // skip }
  }
  
  
--- 235,270 ----
    }
  }
  
! bool ValueDistribution::read_distribution( istream &is, 
  					   Target *Targ,
! 					   bool do_fr ){
    // read a distribution from stream is into Target
    // if do_f we also adjust the value of Frequency of the Target, which is
    // otherwise 1. Special case when reading the TopDistribution.
    //
+   bool result = true;
    TargetValue *target;
    char buf[STR_LEN];
+   char buf2[STR_LEN];
    int freq;
    is >> buf;   // skip {
    do {
      is >> buf;
!     is >> buf2;
!     freq = atoi( buf2 );
!     if ( do_fr ){
!       target = Targ->add_value( buf, freq );
!     }
!     else
!       target = (TargetValue*)Targ->Lookup( buf );
!     if ( !target ){
!       result = false;
!       break;
!     }
      (*this)[target] = freq;
    } while ( look_ahead(is) != '}' );
    is >> buf;   // skip }
+   return result;
  }
  
  
***************
*** 263,269 ****
    double randnum = (double)rand()/(double)RAND_MAX;
    randnum *= (Max-Min);
    randnum += Min;
!   return (int)rint(randnum);
  }
  
  const TargetValue *ValueDistribution::BestTarget( bool& tie,
--- 276,282 ----
    double randnum = (double)rand()/(double)RAND_MAX;
    randnum *= (Max-Min);
    randnum += Min;
!   return (int)floor(randnum+0.5);
  }
  
  const TargetValue *ValueDistribution::BestTarget( bool& tie,
***************
*** 357,364 ****
    index = I;
  }
  
! FeatureValue::FeatureValue( char *s )
! {
    SValueClassProb = NULL;
    Frequency = 0;
    index = -1;
--- 370,376 ----
    index = I;
  }
  
! FeatureValue::FeatureValue( char *s ){
    SValueClassProb = NULL;
    Frequency = 0;
    index = -1;
***************
*** 380,387 ****
    if ((map_iterator = Mapping.find( ID )) == Mapping.end()){
      // Token was not found in map, so we insert a new copy of it here
      //
!     Mapping.insert( IImaptype::value_type( ID, NumVals ));
!     index = NumVals++;
    }
    else{
      index = (*map_iterator).second;
--- 392,399 ----
    if ((map_iterator = Mapping.find( ID )) == Mapping.end()){
      // Token was not found in map, so we insert a new copy of it here
      //
!     Mapping.insert( IImaptype::value_type( ID, array_size ));
!     index = array_size++;
    }
    else{
      index = (*map_iterator).second;
***************
*** 389,399 ****
    return index;
  }
  
! FeatureValue *Feature::add_value( char *valstr, TargetValue *tv )
! {
    HashInfo *Tok = TokenTree->Hash( valstr );
    int index = localmapping( Tok->Index() );
-   TotalVals++;
    if ( index >= CurSize )
      enlarge_values_array( index );
    if(  ValuesArray[index] == NULL ){
--- 401,427 ----
    return index;
  }
  
! int BaseFeatTargClass::EffectiveValues() {
!   int result = 0;
!   int i;
!   for ( i=0; i < ArraySize(); i++ )
!     if ( ValuesArray[i]->ValFreq() > 0 )
!       result++;
!   return result;
! }
! 
! unsigned int BaseFeatTargClass::TotalValues() {
!   unsigned int result = 0;
!   int i;
!   for ( i=0; i < ArraySize(); i++ )
!     result += ValuesArray[i]->ValFreq();
!   return result;
! }
! 
! FeatureValue *Feature::add_value( char *valstr, 
! 				  TargetValue *tv ){
    HashInfo *Tok = TokenTree->Hash( valstr );
    int index = localmapping( Tok->Index() );
    if ( index >= CurSize )
      enlarge_values_array( index );
    if(  ValuesArray[index] == NULL ){
***************
*** 406,419 ****
    return (FeatureValue *)ValuesArray[index];
  }
  
  BaseFeatTargClass::BaseFeatTargClass( int Size, int Inc, StringHash *T )
  {
    CurSize = Size;
    Increment = Inc;
    TokenTree = T;
!   Scraped = 0;
!   NumVals = 0;
!   TotalVals = 0;
    ValuesArray = (ValueClass **)malloc( sizeof(ValueClass*) * CurSize );
    assert( ValuesArray != NULL );
    int i;
--- 434,469 ----
    return (FeatureValue *)ValuesArray[index];
  }
  
+ bool Feature::increment_value( FeatureValue *FV, TargetValue *tv )
+ {
+   bool result = false;
+   if ( FV ){
+     FV->incr_val_freq();
+     if ( tv )
+       FV->TargetDist[tv] += 1;
+     result = true;
+   }
+   return result;
+ }
+ 
+ bool Feature::decrement_value( FeatureValue *FV, TargetValue *tv )
+ {
+   bool result = false;
+   if ( FV ){
+     FV->decr_val_freq();
+     if ( tv )
+       FV->TargetDist[tv] -= 1;
+     result = true;
+   }
+   return result;
+ }
+ 
  BaseFeatTargClass::BaseFeatTargClass( int Size, int Inc, StringHash *T )
  {
    CurSize = Size;
    Increment = Inc;
    TokenTree = T;
!   array_size = 0;
    ValuesArray = (ValueClass **)malloc( sizeof(ValueClass*) * CurSize );
    assert( ValuesArray != NULL );
    int i;
***************
*** 470,480 ****
  
  bool Feature::prepare_numeric_stats( )
  { 
!   double tmp, Total = 0.0;
    int freq;
    bool first = true;
    int i;
!   for ( i=0; i < NumOfValues(); i++ ){
      freq = ValuesArray[i]->ValFreq();
      if ( freq ){
        if ( !string2double( ValuesArray[i]->Name(), tmp ) ){
--- 520,530 ----
  
  bool Feature::prepare_numeric_stats( )
  { 
!   double tmp;
    int freq;
    bool first = true;
    int i;
!   for ( i=0; i < ArraySize(); i++ ){
      freq = ValuesArray[i]->ValFreq();
      if ( freq ){
        if ( !string2double( ValuesArray[i]->Name(), tmp ) ){
***************
*** 492,498 ****
  	  Min = tmp;
  	else if ( tmp > Max )
  	  Max = tmp;
- 	Total += tmp * freq;
        }
      }
    }
--- 542,547 ----
***************
*** 503,508 ****
--- 552,558 ----
  {
    int place;
    double tmp, Prob, FVEntropy, Freq;
+   unsigned int TotalVals = TotalValues();
    char dumname[STR_LEN];
    FeatureValue **FVBin = new FeatureValue *[BIN_SIZE];
    Entropy = 0.0;
***************
*** 512,521 ****
      FVBin[i] = new FeatureValue( dumname );
    }
    int j;
!   for ( j=0; j < NumOfValues(); j++ ){
!     string2double( ValuesArray[j]->Name(), tmp );
!     place = (int)(floor((tmp - Min)/(Max-Min) * (BIN_SIZE-1)) );
!     FVBin[place]->TargetDist.Merge( &((FeatureValue *)ValuesArray[j])->TargetDist );
    }
    int k;
    for ( k=0; k < BIN_SIZE; k++ ){
--- 562,573 ----
      FVBin[i] = new FeatureValue( dumname );
    }
    int j;
!   for ( j=0; j < ArraySize(); j++ ){
!     if ( ValuesArray[j]->ValFreq() > 0 ){
!       string2double( ValuesArray[j]->Name(), tmp );
!       place = (int)(floor((tmp - Min)/(Max-Min) * (BIN_SIZE-1)) );
!       FVBin[place]->TargetDist.Merge( &((FeatureValue *)ValuesArray[j])->TargetDist );
!     }
    }
    int k;
    for ( k=0; k < BIN_SIZE; k++ ){
***************
*** 532,538 ****
  	FVEntropy += Prob * Log2(Prob);
  	hlp = hlp->Next();
        }
!       Entropy += -FVEntropy * Freq / (double)TotalValues();
      }
    }
    Entropy = fabs( Entropy );
--- 584,590 ----
  	FVEntropy += Prob * Log2(Prob);
  	hlp = hlp->Next();
        }
!       Entropy += -FVEntropy * Freq / (double)TotalVals;
      }
    }
    Entropy = fabs( Entropy );
***************
*** 547,553 ****
    for ( m=0; m < BIN_SIZE; m++ ){
      Freq = ((FeatureValue*)FVBin[m])->ValFreq();
      if ( Freq ){
!       Prob = Freq / (double)TotalValues();
        SplitInfo += Prob * Log2(Prob);
      } 
      delete (FeatureValue*)FVBin[m];
--- 599,605 ----
    for ( m=0; m < BIN_SIZE; m++ ){
      Freq = ((FeatureValue*)FVBin[m])->ValFreq();
      if ( Freq ){
!       Prob = Freq / (double)TotalVals;
        SplitInfo += Prob * Log2(Prob);
      } 
      delete (FeatureValue*)FVBin[m];
***************
*** 568,584 ****
  void Feature::Statistics( double DBentropy )
  {
    double Prob = 0.0;
    Entropy = 0.0;
    // Loop over the values.
    VDlist *hlp;
    FeatureValue **pnt = (FeatureValue**)ValuesArray;
    int i;
!   for ( i=0; i < NumOfValues(); i++ ){
      // Entropy for this FV pair.
      //
      double FVEntropy = 0.0;
      double Freq = pnt[i]->ValFreq();
!     if ( Freq ){
        hlp = pnt[i]->TargetDist.First();
        while ( hlp ){
  	Prob = hlp->Freq() / Freq;
--- 620,637 ----
  void Feature::Statistics( double DBentropy )
  {
    double Prob = 0.0;
+   unsigned int TotalVals = TotalValues();
    Entropy = 0.0;
    // Loop over the values.
    VDlist *hlp;
    FeatureValue **pnt = (FeatureValue**)ValuesArray;
    int i;
!   for ( i=0; i < ArraySize(); i++ ){
      // Entropy for this FV pair.
      //
      double FVEntropy = 0.0;
      double Freq = pnt[i]->ValFreq();
!     if ( Freq > 0 ){
        hlp = pnt[i]->TargetDist.First();
        while ( hlp ){
  	Prob = hlp->Freq() / Freq;
***************
*** 585,591 ****
  	FVEntropy += Prob * Log2(Prob);
  	hlp = hlp->Next();
        }
!       Entropy += -FVEntropy * Freq / (double)TotalValues();
      }
    } //end i
    
--- 638,644 ----
  	FVEntropy += Prob * Log2(Prob);
  	hlp = hlp->Next();
        }
!       Entropy += -FVEntropy * Freq / (double)TotalVals;
      }
    } //end i
    
***************
*** 599,607 ****
    SplitInfo = 0.0;
    pnt = (FeatureValue **)ValuesArray;
    int j;
!   for ( j=0; j < NumOfValues(); j++ ){
!     Prob = pnt[j]->ValFreq() / (double)TotalValues();
!     if ( Prob )
        SplitInfo += Prob * Log2(Prob);
    } 
    SplitInfo = -SplitInfo;
--- 652,660 ----
    SplitInfo = 0.0;
    pnt = (FeatureValue **)ValuesArray;
    int j;
!   for ( j=0; j < ArraySize(); j++ ){
!     Prob = pnt[j]->ValFreq() / (double)TotalVals;
!     if ( Prob > 0 )
        SplitInfo += Prob * Log2(Prob);
    } 
    SplitInfo = -SplitInfo;
***************
*** 617,624 ****
  {
    VDlist *pnt;
    FeatureValue **FVA = (FeatureValue **)ValuesArray;
!   int Size = Targets->NumOfValues();
!   int Num_Vals = NumOfValues();
    long int n_dot_dot = 0;
    double tmp;
    if ( !n_dot_j ) {
--- 670,677 ----
  {
    VDlist *pnt;
    FeatureValue **FVA = (FeatureValue **)ValuesArray;
!   int Size = Targets->ArraySize();
!   int Num_Vals = ArraySize();
    long int n_dot_dot = 0;
    double tmp;
    if ( !n_dot_j ) {
***************
*** 635,641 ****
      }
      if ( SaveNum < Num_Vals ){
        delete [] n_i_dot;
!       n_i_dot = new long int[Size];
        SaveNum = Num_Vals;
      }
    }
--- 688,694 ----
      }
      if ( SaveNum < Num_Vals ){
        delete [] n_i_dot;
!       n_i_dot = new long int[Num_Vals];
        SaveNum = Num_Vals;
      }
    }
***************
*** 695,702 ****
  void Feature::SharedVarianceStatistics( Target *Targ )
  {
    int NumInst = Targ->TotalValues();
!   int NumCats = Targ->NumOfValues();
!   int k = min( NumCats, NumOfValues() ) - 1 ;
    if ( k == 0 )
      SharedVariance = 0;
    else
--- 748,755 ----
  void Feature::SharedVarianceStatistics( Target *Targ )
  {
    int NumInst = Targ->TotalValues();
!   int NumCats = Targ->EffectiveValues();
!   int k = min( NumCats, EffectiveValues() ) - 1 ;
    if ( k == 0 )
      SharedVariance = 0;
    else
***************
*** 707,713 ****
  {
    if ( VDmatrix ){
      int i;
!     for ( i = 0; i < NumOfValues(); i++)
        free(VDmatrix[i]);
      free(VDmatrix);
    }
--- 760,766 ----
  {
    if ( VDmatrix ){
      int i;
!     for ( i = 0; i < ArraySize(); i++)
        free(VDmatrix[i]);
      free(VDmatrix);
    }
***************
*** 722,738 ****
  {
    int i, j;
    if ( !Prestore_failed && VDmatrix == NULL ){
!     if ( NumOfValues() > 1000 ){
        Prestore_failed = true;
        return false;
      }
!     VDmatrix = (double**)malloc( NumOfValues() * sizeof(double*) );
      if ( VDmatrix == NULL ){
        Prestore_failed = true;
        return false;
      }
!     for (i = 0; i < NumOfValues(); i++) {
!       VDmatrix[i] = (double *)malloc( NumOfValues() * sizeof(double) );
        if ( VDmatrix[i] == NULL ){
  	int si;
  	for ( si = i-1; si >= 0; si-- )
--- 775,791 ----
  {
    int i, j;
    if ( !Prestore_failed && VDmatrix == NULL ){
!     if ( ArraySize() > 1000 ){
        Prestore_failed = true;
        return false;
      }
!     VDmatrix = (double**)malloc( ArraySize() * sizeof(double*) );
      if ( VDmatrix == NULL ){
        Prestore_failed = true;
        return false;
      }
!     for (i = 0; i < ArraySize(); i++) {
!       VDmatrix[i] = (double *)malloc( ArraySize() * sizeof(double) );
        if ( VDmatrix[i] == NULL ){
  	int si;
  	for ( si = i-1; si >= 0; si-- )
***************
*** 745,752 ****
      }
      // Now we have the memory. Calculate and store.
      //
!     for ( i = 0; i< NumOfValues(); i++ ){
!       for ( j=0; j < NumOfValues(); j++ ){
  	double result = 0.0;
  	SA_list<double> *p1 = 
  	  ((FeatureValue *)ValuesArray[i])->SValueClassProb->First();
--- 798,805 ----
      }
      // Now we have the memory. Calculate and store.
      //
!     for ( i = 0; i< ArraySize(); i++ ){
!       for ( j=0; j < ArraySize(); j++ ){
  	double result = 0.0;
  	SA_list<double> *p1 = 
  	  ((FeatureValue *)ValuesArray[i])->SValueClassProb->First();
***************
*** 785,791 ****
  {
    FeatureValue **FVA = (FeatureValue **)ValuesArray;
    int i;
!   for ( i = 0; i < NumOfValues(); i++ ){
      if ( FVA[i]->SValueClassProb )
        os << FVA[i] << FVA[i]->SValueClassProb << endl;
    }
--- 838,844 ----
  {
    FeatureValue **FVA = (FeatureValue **)ValuesArray;
    int i;
!   for ( i = 0; i < ArraySize(); i++ ){
      if ( FVA[i]->SValueClassProb )
        os << FVA[i] << FVA[i]->SValueClassProb << endl;
    }
***************
*** 802,808 ****
    FeatureValue **FVA = (FeatureValue **)ValuesArray;
    // clear all existing arrays
    int i;
!   for ( i = 0; i < NumOfValues(); i++ ){
      if ( FVA[i]->SValueClassProb ){
        delete FVA[i]->SValueClassProb;
      }
--- 855,861 ----
    FeatureValue **FVA = (FeatureValue **)ValuesArray;
    // clear all existing arrays
    int i;
!   for ( i = 0; i < ArraySize(); i++ ){
      if ( FVA[i]->SValueClassProb ){
        delete FVA[i]->SValueClassProb;
      }
***************
*** 853,859 ****
    }
    // check if we've got all the values, assign a default if not so
    int j;
!   for ( j = 0; j < NumOfValues(); j++ ){
      if ( FVA[j]->SValueClassProb == NULL ){
        FVA[j]->SValueClassProb = new SparseArrayClass<double>( Num );
        FVA[j]->SValueClassProb->Assign( 0, -1.0 );
--- 906,912 ----
    }
    // check if we've got all the values, assign a default if not so
    int j;
!   for ( j = 0; j < ArraySize(); j++ ){
      if ( FVA[j]->SValueClassProb == NULL ){
        FVA[j]->SValueClassProb = new SparseArrayClass<double>( Num );
        FVA[j]->SValueClassProb->Assign( 0, -1.0 );
***************
*** 869,875 ****
    //
  {
    if ( shrt ){
!     cout << " a " << NumOfValues() << "x" << NumOfValues() 
  	 << " matrix" << endl;
    }
    else {
--- 922,928 ----
    //
  {
    if ( shrt ){
!     cout << " a " << ArraySize() << "x" << ArraySize() 
  	 << " matrix" << endl;
    }
    else {
***************
*** 876,882 ****
      int old_prec = cout.precision();
      cout.setf( ios::scientific );
      int i;
!     for ( i=0; i< NumOfValues(); i++ ){
        cout.width(6);
        cout.setf(ios::left, ios::adjustfield);
        cout << (FeatureValue *)ValuesArray[i] << ":";
--- 929,935 ----
      int old_prec = cout.precision();
      cout.setf( ios::scientific );
      int i;
!     for ( i=0; i< ArraySize(); i++ ){
        cout.width(6);
        cout.setf(ios::left, ios::adjustfield);
        cout << (FeatureValue *)ValuesArray[i] << ":";
***************
*** 884,890 ****
        cout.precision(3);
        cout.setf(ios::right, ios::adjustfield);
        int j;
!       for ( j = 0; j < NumOfValues(); j++) {
  	cout.width(12); 
  	cout.precision(3);
  	cout.setf(ios::right, ios::adjustfield);
--- 937,943 ----
        cout.precision(3);
        cout.setf(ios::right, ios::adjustfield);
        int j;
!       for ( j = 0; j < ArraySize(); j++) {
  	cout.width(12); 
  	cout.precision(3);
  	cout.setf(ios::right, ios::adjustfield);
***************
*** 896,916 ****
    }
  }
  
! TargetValue *Target::add_value( char *valstr )
! {
    HashInfo *Tok = TokenTree->Hash( valstr );
    int index = localmapping( Tok->Index() );
-   TotalVals++;
    if ( index >= CurSize )
      enlarge_values_array( index );
    if(  ValuesArray[index] == NULL ){
      ValuesArray[index] = new TargetValue( Tok, index );
    }
    else
!     ValuesArray[index]->incr_val_freq();
    return (TargetValue *)ValuesArray[index];
  }
  
  Instance::Instance( int len )
  {
    FV = new FeatureValue*[len];
--- 949,988 ----
    }
  }
  
! TargetValue *Target::add_value( char *valstr, int freq ){
    HashInfo *Tok = TokenTree->Hash( valstr );
    int index = localmapping( Tok->Index() );
    if ( index >= CurSize )
      enlarge_values_array( index );
    if(  ValuesArray[index] == NULL ){
      ValuesArray[index] = new TargetValue( Tok, index );
+     ValuesArray[index]->ValFreq( freq );
    }
    else
!     ValuesArray[index]->ValFreq( ValuesArray[index]->ValFreq() + freq );
    return (TargetValue *)ValuesArray[index];
  }
  
+ bool Target::increment_value( TargetValue *TV )
+ {
+   bool result = false;
+   if ( TV ){
+     TV->incr_val_freq();
+     result = true;
+   }
+   return result;
+ }
+ 
+ bool Target::decrement_value( TargetValue *TV )
+ {
+   bool result = false;
+   if ( TV ){
+     TV->decr_val_freq();
+     result = true;
+   }
+   return result;
+ }
+ 
  Instance::Instance( int len )
  {
    FV = new FeatureValue*[len];
diff -c v0/Instance.h v3/Instance.h
*** v0/Instance.h	Tue Sep  5 11:22:36 2000
--- v3/Instance.h	Wed Sep 20 16:44:45 2000
***************
*** 20,26 ****
  #define INSTANCE_H
  
  
! #define OWN_MEM_HANDLING YES
  
  
  #include "Stack.h"
--- 20,26 ----
  #define INSTANCE_H
  
  
! // #define OWN_MEM_HANDLING YES
  
  
  #include "Stack.h"
***************
*** 77,83 ****
    const TargetValue* BestTarget( ) const;
    void Merge( ValueDistribution *vd );
    void Clear(){ delete distribution; distribution = NULL; };
!   void read_distribution( istream &, Target *, bool );
    void DistToString( string& ) const;
    bool ZeroDist() 
      { return ( distribution == 0 || 
--- 77,83 ----
    const TargetValue* BestTarget( ) const;
    void Merge( ValueDistribution *vd );
    void Clear(){ delete distribution; distribution = NULL; };
!   bool read_distribution( istream &, Target *, bool );
    void DistToString( string& ) const;
    bool ZeroDist() 
      { return ( distribution == 0 || 
***************
*** 150,160 ****
  
  template <class ContentType>
  void SparseArrayClass<ContentType>::Assign( const int I, 
! 					    const ContentType val )
    // search for I, if not there: add entry 
    // assigns prob with val;
    // adjusts Dimension when necessary
- {
    SA_list<ContentType> *tmp, **pnt = &contents;
    while ( *pnt ){
      if ( I < (*pnt)->index ){
--- 150,159 ----
  
  template <class ContentType>
  void SparseArrayClass<ContentType>::Assign( const int I, 
! 					    const ContentType val ){
    // search for I, if not there: add entry 
    // assigns prob with val;
    // adjusts Dimension when necessary
    SA_list<ContentType> *tmp, **pnt = &contents;
    while ( *pnt ){
      if ( I < (*pnt)->index ){
***************
*** 178,185 ****
  };
  
  template <class ContentType> ostream& operator<<
! (ostream& os, SparseArrayClass<ContentType>* SArr )
! {
    if ( SArr ) {
      int old_prec = os.precision();
      os.precision(3);
--- 177,183 ----
  };
  
  template <class ContentType> ostream& operator<<
! (ostream& os, SparseArrayClass<ContentType>* SArr ){
    if ( SArr ) {
      int old_prec = os.precision();
      os.precision(3);
***************
*** 201,208 ****
    return os;
  };
  
! class FeatureValue: public ValueClass 
! {
    friend ostream& operator<<( ostream&, const FeatureValue * );
   public:
    FeatureValue( HashInfo *, int );
--- 199,206 ----
    return os;
  };
  
! class FeatureValue: public ValueClass {
!   friend class Feature;
    friend ostream& operator<<( ostream&, const FeatureValue * );
   public:
    FeatureValue( HashInfo *, int );
***************
*** 226,246 ****
    ValueClass **ValuesArray;
    BaseFeatTargClass( int, int, StringHash * );
    ~BaseFeatTargClass();
!   int NumOfValues() { return NumVals; };
!   unsigned int TotalValues() { return TotalVals - Scraped; };
!   int Scraped;
!   int localmapping( int );
    ValueClass *Lookup( char * );
   protected:
    int CurSize;
    int Increment;
!   unsigned int TotalVals;
!   int NumVals;
    StringHash *TokenTree;
    IImaptype Mapping;
    void enlarge_values_array( int );
!   ValueClass *get_value_index( int I )
!   { return ValuesArray[I]; };
  };
  
  
--- 224,242 ----
    ValueClass **ValuesArray;
    BaseFeatTargClass( int, int, StringHash * );
    ~BaseFeatTargClass();
!   int ArraySize() { return array_size; };
!   int EffectiveValues();
!   unsigned int TotalValues();
    ValueClass *Lookup( char * );
   protected:
    int CurSize;
    int Increment;
!   int array_size;
    StringHash *TokenTree;
    IImaptype Mapping;
    void enlarge_values_array( int );
!   int localmapping( int );
!   ValueClass *get_value_index( int I ) { return ValuesArray[I]; };
  };
  
  
***************
*** 247,253 ****
  class Target: public BaseFeatTargClass {
   public:
    Target( int a, int b, StringHash *T ): BaseFeatTargClass(a,b,T) {};
!   TargetValue *add_value( char * );
  };
  
  class Feature: public BaseFeatTargClass {
--- 243,251 ----
  class Target: public BaseFeatTargClass {
   public:
    Target( int a, int b, StringHash *T ): BaseFeatTargClass(a,b,T) {};
!   TargetValue *add_value( char *, int freq = 1 );
!   bool decrement_value( TargetValue * );
!   bool increment_value( TargetValue * );
  };
  
  class Feature: public BaseFeatTargClass {
***************
*** 288,293 ****
--- 286,293 ----
      };
    ~Feature();
    FeatureValue *add_value( char *, TargetValue * );
+   bool decrement_value( FeatureValue *, TargetValue * );
+   bool increment_value( FeatureValue *, TargetValue * );
    bool store_vd_matrix( );
    void delete_vd_matrix( void );
    void print_vd_matrix( bool s = false );
diff -c v0/MBLClass.cxx v3/MBLClass.cxx
*** v0/MBLClass.cxx	Tue Sep  5 11:22:36 2000
--- v3/MBLClass.cxx	Wed Sep 20 16:44:39 2000
***************
*** 18,23 ****
--- 18,31 ----
   *	Timbl@kub.nl
   */
  
+ //
+ // 27-3-2000: Ko : UnHideInstance: Features[i] should be  Permfeatures[i]
+ //                                 The TargetDist must be increased as well
+ //                 HideInstance: The TargetDist should be decreased.
+ // 10-4-2000: Ko : Solved a problem with large output in write_perm
+ // 20-09-2000 Ko : made LearningInfo global and correct (i hope).
+ //
+ 
  #include <fstream>
  #include <strstream>
  #include <iomanip>
***************
*** 415,421 ****
    return true;
  };
  
! #endif
  
  bool MBLClass::ShowWeights( ostream &os )
  {
--- 423,429 ----
    return true;
  };
  
! #endif // BUILD_SERVER
  
  bool MBLClass::ShowWeights( ostream &os )
  {
***************
*** 500,516 ****
  inline void MBLClass::write_perm(void)
  {
    int j;
!   ostrstream outstr;
!   outstr << "Feature Permutation based on "
! 	 << order_to_string(TreeOrder, true) << " :\n";
!   outstr << "< ";
    for ( j=0; j < NumOfFeatures-1; j++ ){
!     outstr << Permutation[j]+1 << ", ";
    }
!   outstr << Permutation[j]+1 << " >" << ends;
!   char *pnt = outstr.str();
!   Info( pnt );
!   delete [] pnt;
  }
  
  inline void MBLClass::write_perm_special( ostream &os )
--- 508,520 ----
  inline void MBLClass::write_perm(void)
  {
    int j;
!   Info( "Feature Permutation based on %s :",
! 	order_to_string(TreeOrder, true) );
!   cerr << "< ";
    for ( j=0; j < NumOfFeatures-1; j++ ){
!     cerr << Permutation[j]+1 << ", ";
    }
!   cerr << Permutation[j]+1 << " >" << endl;
  }
  
  inline void MBLClass::write_perm_special( ostream &os )
***************
*** 565,572 ****
  
  void MBLClass::InitWeights(void)
  {
!   if ( TreeOrder == DataFile )
!     Weighting = UserDefined;
    int i;
    for ( i=0; i< NumOfFeatures; i++ ){
      if ( Features[i]->Ignore )
--- 569,576 ----
  
  void MBLClass::InitWeights(void)
  {
! //    if ( TreeOrder == DataFile )
! //      Weighting = UserDefined;
    int i;
    for ( i=0; i< NumOfFeatures; i++ ){
      if ( Features[i]->Ignore )
***************
*** 709,715 ****
  	time_stamp( "Calculating Entropy " );
  	calculate_fv_entropy( false );
  	if ( verbosity & FEAT_W ){
! 	  learning_info( cerr, DataLines, SkippedLines );
  	  result = true;
  	}
        }
--- 713,722 ----
  	time_stamp( "Calculating Entropy " );
  	calculate_fv_entropy( false );
  	if ( verbosity & FEAT_W ){
! 	  cerr << "Lines of data     : " << DataLines << endl;
! 	  if ( SkippedLines != 0 )
! 	    cerr << "SkippedLines      : " << SkippedLines << endl;
! 	  LearningInfo( cerr );
  	  result = true;
  	}
        }
***************
*** 743,761 ****
        Order[i] = Features[i]->SharedVariance;
        break;
      case OneoverFeature:
!       Order[i] =  1.0 / Features[i]->NumOfValues();
        break;
      case GRoverFeature:
!       Order[i] =  Features[i]->GainRatio / Features[i]->NumOfValues();
        break;
      case IGoverFeature:
!       Order[i] =  Features[i]->InfoGain / Features[i]->NumOfValues();
        break;
      case X2overFeature:
!       Order[i] =  Features[i]->ChiSquare / Features[i]->NumOfValues();
        break;
      case SVoverFeature:
!       Order[i] =  Features[i]->SharedVariance / Features[i]->NumOfValues();
        break;
      case OneoverSplitInfo:
        Order[i] =  1.0 / Features[i]->SplitInfo;
--- 750,768 ----
        Order[i] = Features[i]->SharedVariance;
        break;
      case OneoverFeature:
!       Order[i] =  1.0 / Features[i]->ArraySize();
        break;
      case GRoverFeature:
!       Order[i] =  Features[i]->GainRatio / Features[i]->ArraySize();
        break;
      case IGoverFeature:
!       Order[i] =  Features[i]->InfoGain / Features[i]->ArraySize();
        break;
      case X2overFeature:
!       Order[i] =  Features[i]->ChiSquare / Features[i]->ArraySize();
        break;
      case SVoverFeature:
!       Order[i] =  Features[i]->SharedVariance / Features[i]->ArraySize();
        break;
      case OneoverSplitInfo:
        Order[i] =  1.0 / Features[i]->SplitInfo;
***************
*** 973,982 ****
        //
        out << "Targets : ";
        int t;
!       for ( t=0; t < Targets->NumOfValues()-1; t++ ) {
  	out << tv[t] << ", ";
        } 
!       out << tv[Targets->NumOfValues()-1] << "." << endl << endl;
        int i;
        for ( i = 0; i < NumOfFeatures; i++ )
  	if ( Features[i]->Ignore )
--- 980,989 ----
        //
        out << "Targets : ";
        int t;
!       for ( t=0; t < Targets->ArraySize()-1; t++ ) {
  	out << tv[t] << ", ";
        } 
!       out << tv[Targets->ArraySize()-1] << "." << endl << endl;
        int i;
        for ( i = 0; i < NumOfFeatures; i++ )
  	if ( Features[i]->Ignore )
***************
*** 997,1003 ****
  
  bool MBLClass::allocate_arrays()
  {
!   int Dim = Targets->NumOfValues();
    int i,j;
    for ( j = 0; j < NumOfFeatures; j++) {
      if ( !Features[j]->Ignore ) {
--- 1004,1010 ----
  
  bool MBLClass::allocate_arrays()
  {
!   int Dim = Targets->ArraySize();
    int i,j;
    for ( j = 0; j < NumOfFeatures; j++) {
      if ( !Features[j]->Ignore ) {
***************
*** 1004,1010 ****
        FeatureValue **FVA = (FeatureValue **)Features[j]->ValuesArray;
        // Loop over all values.
        //
!       for ( i=0; i < Features[j]->NumOfValues(); i++ ){
  	// Loop over all classes.
  	if ( FVA[i]->SValueClassProb == NULL ){
  	  if ( !(FVA[i]->SValueClassProb = 
--- 1011,1017 ----
        FeatureValue **FVA = (FeatureValue **)Features[j]->ValuesArray;
        // Loop over all values.
        //
!       for ( i=0; i < Features[j]->ArraySize(); i++ ){
  	// Loop over all classes.
  	if ( FVA[i]->SValueClassProb == NULL ){
  	  if ( !(FVA[i]->SValueClassProb = 
***************
*** 1034,1042 ****
  	  FeatureValue **FVA = (FeatureValue **)Features[j]->ValuesArray;
  	  // Loop over all values.
  	  //
! 	  for ( i=0; i < Features[j]->NumOfValues(); i++ ){
  	    double freq = FVA[i]->ValFreq();
! 	    if ( freq ){
  	      // Loop over all classes.
  	      //
  	      VDlist *tmp = FVA[i]->TargetDist.First( );
--- 1041,1049 ----
  	  FeatureValue **FVA = (FeatureValue **)Features[j]->ValuesArray;
  	  // Loop over all values.
  	  //
! 	  for ( i=0; i < Features[j]->ArraySize(); i++ ){
  	    double freq = FVA[i]->ValFreq();
! 	    if ( freq > 0 ){
  	      // Loop over all classes.
  	      //
  	      VDlist *tmp = FVA[i]->TargetDist.First( );
***************
*** 1325,1331 ****
      for ( k = 0; k < EffectiveFeatures; k++ ){
        register int j = Permutation[k];
        CurrInst->FV[k] = (FeatureValue*)Features[j]->Lookup( OrgInput[j] );
!     } // i
      // and the Target
      CurrInst->TV = (TargetValue*)Targets->Lookup( OrgInput[NumOfFeatures] );
      break;
--- 1332,1338 ----
      for ( k = 0; k < EffectiveFeatures; k++ ){
        register int j = Permutation[k];
        CurrInst->FV[k] = (FeatureValue*)Features[j]->Lookup( OrgInput[j] );
!     } // k
      // and the Target
      CurrInst->TV = (TargetValue*)Targets->Lookup( OrgInput[NumOfFeatures] );
      break;
***************
*** 1410,1425 ****
    }
  }
  
! void MBLClass::learning_info( ostream& s, int Total, int Skipped )
! {
    int i;
!   s << "Lines of data     : " << Total << endl;
!   if ( Skipped != 0 )
!   s << "SkippedLines      : " << Skipped << endl;
    s.precision(8);
    s.setf(ios::showpoint );
    s << "DB Entropy        : " << DBEntropy << endl;
!   s << "Number of Classes : " << Targets->NumOfValues() << endl;
    s << endl;
    if ( verbosity & FEAT_W ){
      s << "Feats\tVals\tX-square\tVariance\tInfoGain\tGainRatio" << endl;
--- 1417,1430 ----
    }
  }
  
! void MBLClass::LearningInfo( ostream& s ){
    int i;
!   calculate_fv_entropy( true );
    s.precision(8);
    s.setf(ios::showpoint );
    s << "DB Entropy        : " << DBEntropy << endl;
!   //  s << "Number of Classes : " << Targets->ArraySize() << endl;
!   s << "Number of Classes : " << Targets->EffectiveValues() << endl;
    s << endl;
    if ( verbosity & FEAT_W ){
      s << "Feats\tVals\tX-square\tVariance\tInfoGain\tGainRatio" << endl;
***************
*** 1433,1439 ****
        else{
  	s.width(7);
  	s.setf(ios::right, ios::adjustfield);
! 	s << Features[i]->NumOfValues()
  	  << "\t" << Features[i]->ChiSquare
  	  << "\t" << Features[i]->SharedVariance
  	  << "\t" << Features[i]->InfoGain
--- 1438,1445 ----
        else{
  	s.width(7);
  	s.setf(ios::right, ios::adjustfield);
! 	//	s << Features[i]->ArraySize()
! 	s << Features[i]->EffectiveValues()
  	  << "\t" << Features[i]->ChiSquare
  	  << "\t" << Features[i]->SharedVariance
  	  << "\t" << Features[i]->InfoGain
***************
*** 1470,1476 ****
        if ( verbosity )
  	Info( "Saving Weights in %s", FileName );
        outfile << "# DB Entropy: " << DBEntropy << endl;
!       outfile << "# Classes: " << Targets->NumOfValues() << endl;
        outfile << "# Lines of data: " << Targets->TotalValues() << endl;
        outfile << "# Fea." << "\t" << "Weight" << endl;
        InitWeights( );
--- 1476,1482 ----
        if ( verbosity )
  	Info( "Saving Weights in %s", FileName );
        outfile << "# DB Entropy: " << DBEntropy << endl;
!       outfile << "# Classes: " << Targets->ArraySize() << endl;
        outfile << "# Lines of data: " << Targets->TotalValues() << endl;
        outfile << "# Fea." << "\t" << "Weight" << endl;
        InitWeights( );
***************
*** 1839,1848 ****
      double Entropy = 0.0, Ratio;
      // first get the Database Entropy
      int i;
!     for ( i=0; i < Targets->NumOfValues(); i++ ) {
        Ratio = Targets->ValuesArray[i]->ValFreq() / 
  	(double)Targets->TotalValues();
!       if ( Ratio )
  	Entropy += Ratio * Log2(Ratio);
      }
      DBEntropy = fabs(-Entropy);
--- 1845,1854 ----
      double Entropy = 0.0, Ratio;
      // first get the Database Entropy
      int i;
!     for ( i=0; i < Targets->ArraySize(); i++ ) {
        Ratio = Targets->ValuesArray[i]->ValFreq() / 
  	(double)Targets->TotalValues();
!       if ( Ratio > 0 )
  	Entropy += Ratio * Log2(Ratio);
      }
      DBEntropy = fabs(-Entropy);
***************
*** 1962,1971 ****
        // Print the possible classes.
        //
        int t;
!       for ( t=0; t < Targets->NumOfValues()-1; t++ ) {
  	namesfile << tv[t] << ",";
        } 
!       namesfile << tv[Targets->NumOfValues()-1] << "." << endl << endl;
        
        // Loop over the Features.
        //
--- 1968,1977 ----
        // Print the possible classes.
        //
        int t;
!       for ( t=0; t < Targets->ArraySize()-1; t++ ) {
  	namesfile << tv[t] << ",";
        } 
!       namesfile << tv[Targets->ArraySize()-1] << "." << endl << endl;
        
        // Loop over the Features.
        //
***************
*** 1991,2000 ****
  	    //
  	    vf = (FeatureValue **)Features[f]->ValuesArray;
  	    int i;
! 	    for ( i=0; i< Features[f]->NumOfValues()-1; i++ ){
  	      namesfile << vf[i] << ",";
  	    }
! 	    namesfile << vf[Features[f]->NumOfValues()-1]
  		      << "." << endl;
  	  }
  	}
--- 1997,2006 ----
  	    //
  	    vf = (FeatureValue **)Features[f]->ValuesArray;
  	    int i;
! 	    for ( i=0; i< Features[f]->ArraySize()-1; i++ ){
  	      namesfile << vf[i] << ",";
  	    }
! 	    namesfile << vf[Features[f]->ArraySize()-1]
  		      << "." << endl;
  	  }
  	}
***************
*** 2450,2470 ****
    InstanceBase->RemoveInstance( Inst );
    int i;
    for ( i=0; i < EffectiveFeatures && result; i++ ){
!     if ( !Inst->FV[i] ){
        FatalError( "Unable to Hide an Instance!" );
        result = false;
      }
-     else { 
-       Inst->FV[i]->decr_val_freq();
-       PermFeatures[i]->delete_vd_matrix();
-       PermFeatures[i]->Scraped = 1;
-     }
    }
    if ( result ){
!     Inst->TV->decr_val_freq();
!     Targets->Scraped = 1;
!     MBLInit = false;
!     result = prepare_MBL_test();
    }
    return result;
  }
--- 2456,2471 ----
    InstanceBase->RemoveInstance( Inst );
    int i;
    for ( i=0; i < EffectiveFeatures && result; i++ ){
!     if ( PermFeatures[i]->decrement_value( Inst->FV[i], Inst->TV ) ){
!       PermFeatures[i]->delete_vd_matrix();
!     }
!     else {
        FatalError( "Unable to Hide an Instance!" );
        result = false;
      }
    }
    if ( result ){
!     Targets->decrement_value( Inst->TV );
    }
    return result;
  }
***************
*** 2474,2484 ****
    InstanceBase->AddInstance( Inst );
    int i;
    for ( i=0; i < EffectiveFeatures; i++ ){    
!     Inst->FV[i]->incr_val_freq();
!     Features[i]->Scraped = 0;
    }
!   Inst->TV->incr_val_freq();
!   Targets->Scraped = 0;
  }
  
  ValueDistribution *LOOClass::LocalClassify( int &status,
--- 2475,2483 ----
    InstanceBase->AddInstance( Inst );
    int i;
    for ( i=0; i < EffectiveFeatures; i++ ){    
!     PermFeatures[i]->increment_value( Inst->FV[i], Inst->TV );
    }
!   Targets->increment_value( Inst->TV );
  }
  
  ValueDistribution *LOOClass::LocalClassify( int &status,
***************
*** 2487,2493 ****
    status = -1;
    ValueDistribution *ResultDist = NULL;
    strings_to_instance( TestWords );
!   if ( HideInstance( CurrInst ) ){
      if ( do_exact_match )
        ResultDist = InstanceBase->ExactMatch( CurrInst );
      if ( ResultDist ){
--- 2486,2493 ----
    status = -1;
    ValueDistribution *ResultDist = NULL;
    strings_to_instance( TestWords );
!   MBLInit = false;
!   if ( HideInstance( CurrInst ) && prepare_MBL_test() ){
      if ( do_exact_match )
        ResultDist = InstanceBase->ExactMatch( CurrInst );
      if ( ResultDist ){
***************
*** 2500,2508 ****
        ResultDist = test_instance_full( CurrInst, Distance );
      }
      UnHideInstance( CurrInst );
    }
    else {
!     Warning( " Hiding failed!\n" );
    }
    return ResultDist;
  }
--- 2500,2509 ----
        ResultDist = test_instance_full( CurrInst, Distance );
      }
      UnHideInstance( CurrInst );
+     MBLInit = false;
    }
    else {
!     FatalError( " Hiding failed!\n" );
    }
    return ResultDist;
  }
***************
*** 3075,3080 ****
--- 3076,3083 ----
  	if ( isspace(Buffer[j]) ){
  	  result++;
  	  while ( isspace( Buffer[++j] ) );
+ 	  if ( Buffer[j] == '\0' )
+ 	    result--; // we had some trailing spaces
  	}
        };
        break;
diff -c v0/MBLClass.h v3/MBLClass.h
*** v0/MBLClass.h	Tue Sep  5 10:51:22 2000
--- v3/MBLClass.h	Wed Sep 20 16:44:45 2000
***************
*** 123,128 ****
--- 123,129 ----
    void FatalError( const char*... );
    int SockNum() const { return Socket; };
    int MaxFeats() { return MaxFeatures; };
+   void LearningInfo( ostream& );
  protected:
    MBLClass() {};
    MBLClass( AlgorithmType, const int N = 1000, const char * = NULL );
***************
*** 241,247 ****
    inline bool set_input_format( InputFormatType );
    inline void show_input_format( );
    inline void set_order(void);
-   void learning_info( ostream&, int, int );
    void calc_perm( double * );
    inline void write_perm(void);
    inline void write_perm_special(ostream&);
--- 242,247 ----
diff -c v0/Makefile v3/Makefile
*** v0/Makefile	Tue Sep  5 11:22:37 2000
--- v3/Makefile	Tue Sep  5 11:41:09 2000
***************
*** 9,14 ****
--- 9,21 ----
  OBJS	= $(SRCS:.cxx=.o)
  EXES	= $(MSRCS:.cxx=)
  
+ #INSTALL_DIR = /usr/local/bin
+ #LIB_INSTALL_DIR = /usr/local/lib
+ INSTALL_DIR = ./SPARC
+ LIB_INSTALL_DIR = ./SPARC
+ #INSTALL_DIR = ./X86
+ #LIB_INSTALL_DIR = ./X86
+ 
  all:
  	-@if [ $(OSTYPE) = linux ] ||\
  	 [ $(OSTYPE) = Linux ] || \
***************
*** 44,48 ****
  	-rm *.tmp
  
  install:
! 	cp $(EXES) /usr/local/bin
! 	cp libTimbl.a /usr/local/lib
--- 51,55 ----
  	-rm *.tmp
  
  install:
! 	cp $(EXES) $(INSTALL_DIR)
! 	cp libTimbl.a $(LIB_INSTALL_DIR)
diff -c v0/Options.h v3/Options.h
*** v0/Options.h	Tue Sep  5 10:51:33 2000
--- v3/Options.h	Wed Sep 20 16:44:45 2000
***************
*** 16,21 ****
--- 16,23 ----
   *	Timbl@kub.nl
   */
  
+ // Bug fixed 23-08-2000 Ko: Free-in the result of verbosity_to_string
+ 
  #ifndef OPTIONS_H
  #define OPTIONS_H
  
***************
*** 108,122 ****
      return result;
    };
    ostream& show_opt( ostream &os ){
      os.width(20);
      os.setf( ios::left, ios::adjustfield );
!     os << Name << " : " << verbosity_to_string(*V);
      return os;
    };
    ostream& show_full( ostream &os ){
      os.width(20);
      os.setf( ios::left, ios::adjustfield );
!     os << Name << " : " << verbosity_to_string(*V);
      return os;
    };
  };
--- 110,128 ----
      return result;
    };
    ostream& show_opt( ostream &os ){
+     const char *tmp = verbosity_to_string(*V);
      os.width(20);
      os.setf( ios::left, ios::adjustfield );
!     os << Name << " : " << tmp;
!     free( const_cast<char *>(tmp) );
      return os;
    };
    ostream& show_full( ostream &os ){
+     const char *tmp = verbosity_to_string(*V);
      os.width(20);
      os.setf( ios::left, ios::adjustfield );
!     os << Name << " : " << tmp;
!     free(  const_cast<char *>(tmp) );
      return os;
    };
  };
Common subdirectories: v0/SPARC and v3/SPARC
diff -c v0/ServerProcs.cxx v3/ServerProcs.cxx
*** v0/ServerProcs.cxx	Tue Sep  5 10:51:34 2000
--- v3/ServerProcs.cxx	Wed Sep 20 16:44:39 2000
***************
*** 19,24 ****
--- 19,25 ----
   *	Timbl@kub.nl
   */
  
+ #ifdef BUILD_SERVER
  #define _REENTRANT
  #include <iostream>
  #include <strstream>
***************
*** 455,457 ****
--- 456,459 ----
    }
  }
  
+ #endif // BUILD_SERVER
diff -c v0/SocketBasics.cxx v3/SocketBasics.cxx
*** v0/SocketBasics.cxx	Tue Sep  5 10:51:34 2000
--- v3/SocketBasics.cxx	Wed Sep 20 16:44:39 2000
***************
*** 17,23 ****
   * or send mail to:
   *	Timbl@kub.nl
   */
! 
  #include <iostream>
  #include <cstdio>
  #include <string>
--- 17,23 ----
   * or send mail to:
   *	Timbl@kub.nl
   */
! #ifdef BUILD_SERVER
  #include <iostream>
  #include <cstdio>
  #include <string>
***************
*** 233,235 ****
--- 233,236 ----
    return  write_line( socknum, line.c_str() );
  }
  
+ #endif // BUILD_SERVER
diff -c v0/Timbl.cxx v3/Timbl.cxx
*** v0/Timbl.cxx	Tue Sep  5 11:22:38 2000
--- v3/Timbl.cxx	Wed Sep 20 16:44:39 2000
***************
*** 47,53 ****
    string TheTestFile, PercFile;
    // Start.
    //
!   cerr << "TiMBL " << Version << "." << Revision
         << " (c) ILK 1998, 1999, 2000.\n" 
         << "Tilburg Memory Based Learner\n"
         << "Induction of Linguistic Knowledge Research Group\n"
--- 47,53 ----
    string TheTestFile, PercFile;
    // Start.
    //
!   cerr << "TiMBL " << Version << "." << Revision << RevComment
         << " (c) ILK 1998, 1999, 2000.\n" 
         << "Tilburg Memory Based Learner\n"
         << "Induction of Linguistic Knowledge Research Group\n"
***************
*** 91,99 ****
        cerr << "nothing to test or train!\n" << endl;
        exit(1);
      }
!     if ( !GlobalOptPars.definitive_options( Run, false ) ||
  	 !Run->GetInstanceBase( GlobalOptPars.TreeInFileName ) )
        exit(1);
      if ( GlobalOptPars.NamesFileName != "" )
        Run->WriteNamesFile( GlobalOptPars.NamesFileName );
      bool more_to_do;
--- 91,102 ----
        cerr << "nothing to test or train!\n" << endl;
        exit(1);
      }
!     if ( !GlobalOptPars.definitive_options( Run, true ) ||
  	 !Run->GetInstanceBase( GlobalOptPars.TreeInFileName ) )
        exit(1);
+     //    Run->LearningInfo( cerr );
+     if ( GlobalOptPars.SaveTree() )
+       Run->WriteInstanceBase( GlobalOptPars.TreeOutFileName );
      if ( GlobalOptPars.NamesFileName != "" )
        Run->WriteNamesFile( GlobalOptPars.NamesFileName );
      bool more_to_do;
***************
*** 132,141 ****
      } while ( more_to_do );
    }
    else {
-     char optline[3 * OPT_LENGTH];
-     sprintf( optline, "TREE_ORDER: %s", 
- 	     order_to_string(GlobalOptPars.Order()) );
-     Run->SetOption( optline );
      if ( !GlobalOptPars.definitive_options( Run, true ) )
        exit(1);
      Run->PrepareExperiment( GlobalOptPars.DataFileName );
--- 135,140 ----
diff -c v0/TimblClient.cxx v3/TimblClient.cxx
*** v0/TimblClient.cxx	Tue Sep  5 10:51:35 2000
--- v3/TimblClient.cxx	Wed Sep 20 16:44:39 2000
***************
*** 26,34 ****
  #include <cctype>
  #include <ctime>
  
  
  #ifdef BUILD_SERVER
- #include "TimblExperiment.h"
  #include "ServerProcs.h"
  #else
  void RunClient( istream& Input, ostream& Output, 
--- 26,34 ----
  #include <cctype>
  #include <ctime>
  
+ #include "TimblExperiment.h"
  
  #ifdef BUILD_SERVER
  #include "ServerProcs.h"
  #else
  void RunClient( istream& Input, ostream& Output, 
diff -c v0/Types.cxx v3/Types.cxx
*** v0/Types.cxx	Tue Sep  5 10:51:35 2000
--- v3/Types.cxx	Wed Sep 20 16:44:39 2000
***************
*** 19,24 ****
--- 19,26 ----
   *	Timbl@kub.nl
   */
  
+ // bug fixed 23-08-2000 Ko: verbosity_to_string now returns a copy of c_str()
+ 
  #include <string>
  #include <cstdlib>
  #include <cstring>
***************
*** 275,281 ****
  	Outline += VerbStrings[i][(full?1:0)];
        }
    }
!   return Outline.c_str();
  }
  
  const char *verbosity_to_string( VerbosityFlags v, bool full )
--- 277,283 ----
  	Outline += VerbStrings[i][(full?1:0)];
        }
    }
!   return strdup( Outline.c_str() );
  }
  
  const char *verbosity_to_string( VerbosityFlags v, bool full )
Common subdirectories: v0/X86 and v3/X86
Only in v0: makefile
Only in v3: makefile.ilk
Only in v3: patch300-301
Only in v3: patch300-302
Only in v3: patch301-302

