I am working on to create recommendation system in C# using FFM (Field-aware Factorization Machines), where my data set have 4 feature column of string type, and Label column if int type. The dataset and code is as following:
public IDataView LoadData(MLContext mlContext)
{
var partnerhubDataPath = Path.Combine("C://ABB Work//Projects//CustomerPortal-Sitecore10//Sitecore.PartnerHub//MLNetApp", "DataSet", "DummySet.csv");
var data = File.ReadAllLines(partnerhubDataPath).Select(x => new PartnerhubPredictionData()
{
ProjectTool = x[1].ToString(),
DiscoverTag = x[2].ToString(),
UserEmailId = x[0].ToString(),
VideoName = x[4].ToString(),
ViIndexTag = x[3].ToString(),
Label = x[6] != 0? true: false
}) ;
IDataView trainingDataView = mlContext.Data.LoadFromEnumerable(data);
//mlContext.Data.LoadFromTextFile<PartnerhubPredictionData>(partnerhubDataPath,
// hasHeader: true, separatorChar: ',');
var debug = trainingDataView.Preview();
return (trainingDataView);
}
public ITransformer BuildAndTrainModel(MLContext mlContext, IDataView trainingDataView)
{
var option = new FieldAwareFactorizationMachineTrainer.Options
{
LabelColumnName = "Label",
FeatureColumnName = "Features",
Shuffle = false
};
IEstimator<ITransformer> estimator = mlContext.Transforms.Categorical.OneHotEncoding("UserEmailIdOneHot", "UserEmailId").
Append(mlContext.Transforms.Categorical.OneHotEncoding("ProjectToolOneHot", "ProjectTool")).
Append(mlContext.Transforms.Categorical.OneHotEncoding("DiscoverTagOneHot", "DiscoverTag")).
Append(mlContext.Transforms.Categorical.OneHotEncoding("ViIndexTagOneHot", "ViIndexTag")).
Append(mlContext.Transforms.Concatenate("Features",new string[]{ "UserEmailIdOneHot", "ProjectToolOneHot", "DiscoverTagOneHot",
"ViIndexTagOneHot" })).
Append(mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine(option));
var model = estimator.Fit(trainingDataView);
return model;
}
void EvaluateModel(MLContext mlContext, IDataView trainingData, ITransformer model)
{
Console.WriteLine("=============== Evaluating the model ===============");
var testingData = mlContext.Data.ShuffleRows(trainingData);
testingData = mlContext.Data.TakeRows(testingData, 8);
var scoredData = model.Transform(testingData);
IEstimator<ITransformer> sdcaEstimator = mlContext.Regression.Trainers.Sdca();
var cvResults = mlContext.Regression.CrossValidate(trainingData, sdcaEstimator, labelColumnName: "Label", numberOfFolds: 5);
IEnumerable<double> rSquared = cvResults.OrderByDescending(fold => fold.Metrics.RSquared).
Select(fold => fold.Metrics.RSquared).ToArray();
IEnumerable<double> rootMeanSquareSquared = cvResults.Select(fold => fold.Metrics.RootMeanSquaredError);
Console.WriteLine("RSquared : " + rSquared.ToString());
Console.WriteLine(" Root Mean Squared Error: " + rootMeanSquareSquared.ToString());
}
As I don't have test dataset , I am trying cross validation here, but I am constantly getting error while evaluating the model. Even though the sdcaEstimator
have the Features
Column
I want to know where I the code is going wrong, since I have done everything following this blog.