//------------------------------------------------------------------------------
// name: x-validate.ck
// desc: a cross validation example using extracted features
//
// version: need chuck version 1.5.0.0 or higher
// sorting: part of ChAI (ChucK for AI)
//
// USAGE: this is purely for cross validation on extracted features, so ...
// run chuck in non-real-time mode (this actually can be much
// faster than real-time mode, since it doesn't synch to audio):
//
// cross validation from FILE
//> chuck --silent x-validate.ck:FILE
//
// date: Spring 2023
// authors: Yikai Li
// Ge Wang (https://ccrma.stanford.edu/~ge/)
//------------------------------------------------------------------------------
// input: pre-extracted features file with labels
"" => string FEATURES_FILE;
// if have arguments, override filename
if( me.args()> 0 ) me.arg(0) => FEATURES_FILE;
else
{ <<< "[usage]: chuck --silent x-validate:FILE", "">>>; me.exit();}
//------------------------------------------------------------------------------
// expected features file format:
//------------------------------------------------------------------------------
// VALUE VALUE ... VALUE LABEL
// VALUE VALUE ... VALUE LABEL
// ... ... ... ... LABEL
// VALUE VALUE ... VALUE LABEL
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
// load feature data; read important global values like numPoints and numCoeffs
//------------------------------------------------------------------------------
// values to be read from file
0 => int numPoints; // number of points in data
0 => int numCoeffs; // number of dimensions in data
// file read PART 1: read over the file to get numPoints and numCoeffs
loadFile( FEATURES_FILE ) @=> FileIO @ fin;
// check
if( !fin.good() ) me.exit();
// labels of all data points
string inLabels[numPoints];
// label indices of all data points
int inLabelsInt[inLabels.size()];
// feature vectors of data points
float inFeatures[numPoints][numCoeffs];
// keys
string labels[0];
// use as map: labels to numbers
int label2int[0];
//------------------------------------------------------------------------------
// read the data
//------------------------------------------------------------------------------
readData( fin );
//------------------------------------------------------------------------------
// set up our KNN object to use for classification
// (KNN2 is a fancier version of the KNN object)
// -- run KNN2.help(); in a separate program to see its available functions --
//------------------------------------------------------------------------------
KNN2 knn;
// k nearest neighbors
10 => int K;
// results vector
float knnResult[labels.size()];
//------------------------------------------------------------------------------
// cross validation
//------------------------------------------------------------------------------
// number of folds
20 => int numFolds;
// number of folds to use for testing
4 => int numTestFolds;
// number of folds to use for training
numFolds - numTestFolds => int numTrainFolds;
// number of points in each fold
(numPoints / numFolds) $ int => int numPointsPerFold;
// feature vectors of training data
float trainFeatures[numTrainFolds * numPointsPerFold][numCoeffs];
// labels of training data
int trainLabelsInt[numTrainFolds * numPointsPerFold];
// feature vectors of testing data
float testFeatures[numTestFolds * numPointsPerFold][numCoeffs];
// labels of testing data
int testLabelsInt[numTestFolds * numPointsPerFold];
// normalize the data
normalizeData();
// shuffle the data
shuffleData();
// cross validation
for( 0 => int i; i < numFolds / numTestFolds; i++) { // prepare training and testing data prepareData( i ); // train knn.train( trainFeatures, trainLabelsInt ); // test 0.0 => float accuracy;
for( 0 => int j; j < testLabelsInt.size(); j++ ) { // predict knn.predict( testFeatures[j], K, knnResult ); // aggregate accuracy knnResult[ testLabelsInt[j] ] +=> accuracy;
}
// print accuracy
chout <= "fold " + i + " accuracy: " + ( accuracy / testLabelsInt.size() ) <= IO.newline(); } //------------------------------------------------------------------------------ // function: normalizeData() //------------------------------------------------------------------------------ fun void normalizeData() { // for each dimension for( 0 => int i; i < numCoeffs; i++ ) { // find min and max inFeatures[0][i] => float min;
inFeatures[0][i] => float max;
for( 1 => int j; j < numPoints; j++ ) { if( inFeatures[j][i] < min ) inFeatures[j][i] => min;
if( inFeatures[j][i]> max ) inFeatures[j][i] => max;
}
max - min => float range;
// normalize
for( 0 => int j; j < numPoints; j++ ) (inFeatures[j][i] - min) / range => inFeatures[j][i];
}
}
//------------------------------------------------------------------------------
// function: shuffleData()
//------------------------------------------------------------------------------
fun void shuffleData()
{
// prepare swap data
float swapFeatures[numCoeffs];
int swapLabelInt;
// shuffle the data
for( numPoints - 1 => int i; i> 0; i-- )
{
// random index
Math.random2( 0, i ) => int j;
// swap features
for( 0 => int k; k < numCoeffs; k++ ) { inFeatures[i][k] => swapFeatures[k];
inFeatures[j][k] => inFeatures[i][k];
swapFeatures[k] => inFeatures[j][k];
}
// swap labels
inLabelsInt[i] => swapLabelInt;
inLabelsInt[j] => inLabelsInt[i];
swapLabelInt => inLabelsInt[j];
}
}
//------------------------------------------------------------------------------
// function: prepareData( int fold )
//------------------------------------------------------------------------------
fun void prepareData( int fold )
{
// test indices
fold * numTestFolds * numPointsPerFold => int testStart;
testStart + numTestFolds * numPointsPerFold => int testEnd;
// index
0 => int train_i;
0 => int test_i;
// prepare training and testing data
for( 0 => int i; i < numPoints; i++ ) { // test if( i>= testStart && i < testEnd ) { // copy features for( 0 => int j; j < numCoeffs; j++ ) inFeatures[i][j] => testFeatures[test_i][j];
// copy label
inLabelsInt[i] => testLabelsInt[test_i];
// increment
test_i++;
}
// train
else
{
// copy features
for( 0 => int j; j < numCoeffs; j++ ) inFeatures[i][j] => trainFeatures[train_i][j];
// copy label
inLabelsInt[i] => trainLabelsInt[train_i];
// increment
train_i++;
}
}
}
//------------------------------------------------------------------------------
// function: load data file
//------------------------------------------------------------------------------
fun FileIO loadFile( string filepath )
{
// reset
0 => numPoints;
0 => numCoeffs;
// load data
FileIO fio;
if( !fio.open( filepath, FileIO.READ ) )
{
// error
<<< "cannot open file:", filepath>>>;
// close
fio.close();
// return
return fio;
}
string str;
string line;
// read file int array
while( fio.more() )
{
// read each line
fio.readLine().trim() => str;
// check if empty line
if( str != "" )
{
numPoints++;
str => line;
}
}
// a string tokenizer
StringTokenizer tokenizer;
// set to last non-empty line
tokenizer.set( line );
// -1 (to account for label)
-1 => numCoeffs;
// see how many, including label name
while( tokenizer.more() )
{
tokenizer.next();
numCoeffs++;
}
// check
if( numPoints == 0 || numCoeffs <= 0 ) { <<< "no data in file:", filepath>>>;
fio.close();
return fio;
}
// print
<<< "# of data points:", numPoints, "dimensions:", numCoeffs>>>;
// done for now
return fio;
}
//------------------------------------------------------------------------------
// function: read the data
//------------------------------------------------------------------------------
fun void readData( FileIO fio )
{
// rewind the file reader
fio.seek( 0 );
// read file int array
string str;
int ci, ri;
while( fio => str )
{
// check for last
if( (ci != 0) && ((ci % numCoeffs) == 0) )
{
// read in label
str => inLabels[ri];
// set in map
1 => label2int[str];
// increment row
ri++;
// reset column
0 => ci;
}
else
{
// store feature value
Std.atof(str) => inFeatures[ri][ci];
// increment column
ci++;
}
}
// get keys from map
label2int.getKeys( labels );
// assign index
for( int i; i < labels.size(); i++ ) { i => label2int[labels[i]]; }
// convert in labels to ints
for( int i; i < inLabels.size(); i++ ) { // get index as int label2int[inLabels[i]] => inLabelsInt[i];
}
}