public class CrossValidationResult
{
public string Trainer;
public double AccuracyAverage;
public double AccuraciesStdDeviation;
public double AccuraciesConfidenceInterval95;
}
public class Data
{
[ColumnName("review")] [LoadColumn(0)] public string Review { get; set; }
[ColumnName("sentiment")]
[LoadColumn(1)]
public bool Sentiment { get; set; }
}
public enum Trainers
{
LbfgsLogisticRegression,
SgdCalibrated,
SdcaLogisticRegression,
AveragedPerceptron,
LinearSvm
}
public class LearningMethodResult
{
public string Trainer;
public double Accuracy;
public double AreaUnderRocCurve;
public double F1Score;
}
public class Prediction : Data
{
[ColumnName("PredictedLabel")]
public bool PredictionValue { get; set; }
public float Score { get; set; }
}
private readonly string _dataPath;
private readonly string _modelPath;
private readonly MLContext _mlContext;
private ITransformer _model;
private IDataView _dataViewPrimary;
LbfgsLogisticRegression,
SgdCalibrated,
SdcaLogisticRegression,
AveragedPerceptron,
LinearSvm
private EstimatorChain<BinaryPredictionTransformer<
CalibratedModelParametersBase<LinearBinaryModelParameters, PlattCalibrator>>> _trainingPipelinePlat;
private EstimatorChain<BinaryPredictionTransformer<LinearBinaryModelParameters>> _trainingPipeline;
public SentimentAnalyst(string dataPath = null, string modelPath = null)
{
_mlContext = new MLContext();
_dataPath = dataPath;
_modelPath = modelPath;
}
/// <summary>
/// Loads data set
/// </summary>
private DataOperationsCatalog.TrainTestData LoadData()
{
IDataView dataView = null;
if (_dataPath == null)
throw new Exception("Data Path is undefined");
_dataViewPrimary = _mlContext.Data.LoadFromTextFile<Data>(
_dataPath,
hasHeader: true,
separatorChar: ',',
allowQuoting: true
);
dataView = _dataViewPrimary;
// %80 of data for training %20 for testing
var splitDataView = _mlContext.Data.TrainTestSplit(dataView, 0.2);
return splitDataView;
}
/// <summary>
/// Trains model according to selected trainer
/// </summary>
/// <param name="targetTrainer"></param>
public LearningMethodResult Train(Trainers targetTrainer = Trainers.SdcaLogisticRegression)
{
//Load data
var splitDataView = LoadData();
//Build and train
_targetTrainer = targetTrainer;
_model = BuildAndTrainModel(splitDataView.TrainSet);
//Evaluate
var learningMethodResult = Evaluate(_model, splitDataView.TestSet);
var directoryInfo = new FileInfo(_modelPath).Directory;
if (directoryInfo != null)
{
var path = directoryInfo.FullName;
if (!Directory.Exists(path))
Directory.CreateDirectory(path);
}
// Save _model
_mlContext.Model.Save(_model, _dataViewPrimary.Schema, _modelPath);
return learningMethodResult;
}
public class SentimentAnalyst
{
private readonly string _dataPath;
private readonly string _modelPath;
private readonly MLContext _mlContext;
private ITransformer _model;
private IDataView _dataViewPrimary;
private Trainers _targetTrainer;
private EstimatorChain<BinaryPredictionTransformer<
CalibratedModelParametersBase<LinearBinaryModelParameters, PlattCalibrator>>> _trainingPipelinePlat;
private EstimatorChain<BinaryPredictionTransformer<LinearBinaryModelParameters>> _trainingPipeline;
public SentimentAnalyst(string dataPath = null, string modelPath = null)
{
_mlContext = new MLContext();
_dataPath = dataPath;
_modelPath = modelPath;
}
/// <summary>
/// Loads trained model for prediction
/// </summary>
public void LoadTrainedModel()
{
if (_modelPath == null)
throw new Exception("Model Path is undefined");
// Load trained _model
if (File.Exists(_modelPath))
_model = _mlContext.Model.Load(_modelPath, out _);
}
/// <summary>
/// Trains model according to selected trainer
/// </summary>
/// <param name="targetTrainer"></param>
public LearningMethodResult Train(Trainers targetTrainer = Trainers.SdcaLogisticRegression)
{
//Load data
var splitDataView = LoadData();
//Build and train
_targetTrainer = targetTrainer;
_model = BuildAndTrainModel(splitDataView.TrainSet);
//Evaluate
var learningMethodResult = Evaluate(_model, splitDataView.TestSet);
var directoryInfo = new FileInfo(_modelPath).Directory;
if (directoryInfo != null)
{
var path = directoryInfo.FullName;
if (!Directory.Exists(path))
Directory.CreateDirectory(path);
}
// Save _model
_mlContext.Model.Save(_model, _dataViewPrimary.Schema, _modelPath);
return learningMethodResult;
}
/// <summary>
/// Trains model with several trainers
/// </summary>
public List<LearningMethodResult> TrainMultiple()
{
var learningMethodResults = new List<LearningMethodResult>();
//Load data
var splitDataView = LoadData();
foreach (var trainer in (Trainers[]) Enum.GetValues(typeof(Trainers)))
{
_targetTrainer = trainer;
Console.WriteLine("Trainer:{0}", _targetTrainer);
//Build and train
_model = BuildAndTrainModel(splitDataView.TrainSet);
//Evaluate
learningMethodResults.Add(Evaluate(_model, splitDataView.TestSet));
}
return learningMethodResults;
}
/// <summary>
/// Starts cross validation for the model
/// </summary>
/// <param name="folds">How many iteration</param>
public CrossValidationResult CrossValidate(int folds = 5)
{
var crossValidationResult = new CrossValidationResult();
IReadOnlyList<TrainCatalogBase.CrossValidationResult<BinaryClassificationMetrics>> crossValidationResults =
null;
if (_trainingPipelinePlat != null)
crossValidationResults =
_mlContext.BinaryClassification.CrossValidateNonCalibrated(_dataViewPrimary, _trainingPipelinePlat,
folds, "sentiment");
else if (_trainingPipeline != null)
crossValidationResults =
_mlContext.BinaryClassification.CrossValidateNonCalibrated(_dataViewPrimary, _trainingPipeline,
folds, "sentiment");
var metricsInMultipleFolds =
(crossValidationResults ?? throw new InvalidOperationException()).Select(r => r.Metrics);
var accuracyValues = metricsInMultipleFolds.Select(m => m.Accuracy);
var accuracyAverage = accuracyValues.Average();
var accuraciesStdDeviation = CalculateStandardDeviation(accuracyValues);
var accuraciesConfidenceInterval95 = CalculateConfidenceInterval95(accuracyValues);
crossValidationResult.AccuracyAverage = accuracyAverage;
crossValidationResult.AccuraciesStdDeviation = accuraciesStdDeviation;
crossValidationResult.AccuraciesConfidenceInterval95 = accuraciesConfidenceInterval95;
crossValidationResult.Trainer = _targetTrainer.ToString();
return crossValidationResult;
}
/// <summary>
/// Makes prediction according to input sentiment
/// </summary>
/// <param name="sentiment">Input sentiment</param>
public Prediction Predicate(Data sentiment)
{
var predictionFunction = _mlContext.Model.CreatePredictionEngine<Data, Prediction>(_model);
return predictionFunction.Predict(sentiment);
}
/// <summary>
/// Makes multiple prediction according to multiple input sentiments
/// </summary>
/// <param name="sentiments">Input sentiments</param>
public IEnumerable<Prediction> MultiPredicate(IEnumerable<Data> sentiments)
{
var sentimentPredictionResultList = new List<Prediction>();
var batchComments = _mlContext.Data.LoadFromEnumerable(sentiments);
var predictions = _model.Transform(batchComments);
// Use _model to predict whether comment data is Positive (1) or Negative (0).
var predictedResults = _mlContext.Data.CreateEnumerable<Prediction>(predictions, false);
foreach (var prediction in predictedResults)
{
var sentimentPrediction = new Prediction
{
PredictionValue = prediction.PredictionValue,
Score = prediction.Score
};
sentimentPredictionResultList.Add(sentimentPrediction);
}
return sentimentPredictionResultList;
}
/// <summary>
/// Loads data set
/// </summary>
private DataOperationsCatalog.TrainTestData LoadData()
{
IDataView dataView = null;
if (_dataPath == null)
throw new Exception("Data Path is undefined");
_dataViewPrimary = _mlContext.Data.LoadFromTextFile<Data>(
_dataPath,
hasHeader: true,
separatorChar: ',',
allowQuoting: true
);
dataView = _dataViewPrimary;
// %80 of data for training %20 for testing
var splitDataView = _mlContext.Data.TrainTestSplit(dataView, 0.2);
return splitDataView;
}
/// <summary>
/// Builds and Trains model
/// </summary>
/// <param name="splitTrainSet">Training data set</param>
private ITransformer BuildAndTrainModel(IDataView splitTrainSet)
{
var dataProcessPipeline = _mlContext.Transforms.Text.FeaturizeText("review_tf", "review")
.Append(_mlContext.Transforms.CopyColumns("Features", "review_tf"))
.Append(_mlContext.Transforms.NormalizeMinMax("Features", "Features")
.AppendCacheCheckpoint(_mlContext));
switch (_targetTrainer)
{
case Trainers.LbfgsLogisticRegression:
{
var trainer = _mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression("sentiment");
_trainingPipelinePlat = dataProcessPipeline.Append(trainer);
_trainingPipeline = null;
return _trainingPipelinePlat.Fit(splitTrainSet);
}
case Trainers.SgdCalibrated:
{
var trainer = _mlContext.BinaryClassification.Trainers.SgdCalibrated("sentiment");
_trainingPipelinePlat = dataProcessPipeline.Append(trainer);
_trainingPipeline = null;
return _trainingPipelinePlat.Fit(splitTrainSet);
}
case Trainers.SdcaLogisticRegression:
{
var trainer = _mlContext.BinaryClassification.Trainers.SdcaLogisticRegression("sentiment");
_trainingPipelinePlat = dataProcessPipeline.Append(trainer);
_trainingPipeline = null;
return _trainingPipelinePlat.Fit(splitTrainSet);
}
case Trainers.AveragedPerceptron:
{
var trainer = _mlContext.BinaryClassification.Trainers.AveragedPerceptron("sentiment");
_trainingPipeline = dataProcessPipeline.Append(trainer);
_trainingPipelinePlat = null;
return _trainingPipeline.Fit(splitTrainSet);
}
case Trainers.LinearSvm:
{
var trainer = _mlContext.BinaryClassification.Trainers.LinearSvm("sentiment");
_trainingPipeline = dataProcessPipeline.Append(trainer);
_trainingPipelinePlat = null;
return _trainingPipeline.Fit(splitTrainSet);
}
default:
throw new ArgumentOutOfRangeException(nameof(_targetTrainer), _targetTrainer, null);
}
}
/// <summary>
/// Evaluates model by test data set
/// </summary>
/// <param name="model">Model to evaluate</param>
/// <param name="splitTestSet">Test data set</param>
private LearningMethodResult Evaluate(ITransformer model, IDataView splitTestSet)
{
var learningMethodResult = new LearningMethodResult();
var predictions = model.Transform(splitTestSet);
var metrics = _mlContext.BinaryClassification.EvaluateNonCalibrated(predictions, "sentiment");
learningMethodResult.Accuracy = metrics.Accuracy;
learningMethodResult.AreaUnderRocCurve = metrics.AreaUnderRocCurve;
learningMethodResult.F1Score = metrics.F1Score;
learningMethodResult.Trainer = _targetTrainer.ToString();
return learningMethodResult;
}
/// <summary>
/// Calculates standard deviation for cross validation results
/// This is an auto-generated file by Microsoft ML.NET CLI (Command-Line Interface) tool.
/// </summary>
/// <param name="values">Model to evaluate</param>
private static double CalculateStandardDeviation(IEnumerable<double> values)
{
var average = values.Average();
var sumOfSquaresOfDifferences = values.Select(val => (val - average) * (val - average)).Sum();
var standardDeviation = Math.Sqrt(sumOfSquaresOfDifferences / (values.Count() - 1));
return standardDeviation;
}
/// <summary>
/// Calculates confidence interval
/// This is an auto-generated file by Microsoft ML.NET CLI (Command-Line Interface) tool.
/// </summary>
/// <param name="values">Model to evaluate</param>
private static double CalculateConfidenceInterval95(IEnumerable<double> values)
{
var confidenceInterval95 = 1.96 * CalculateStandardDeviation(values) / Math.Sqrt(values.Count() - 1);
return confidenceInterval95;
}
}
Classification
K-Nearest Neighbor is the one of the well-known and easy machine algorithm ...
K-Nearest Neighbor is the one of the well-known and easy machine algorithm which is very suitable for a lot of real world problems such product recommendation, social media friend recommendation based on interest or social network of person.
Classification
Naive Bayes Algorithm is the algorithm which makes machines to be able to m...
Naive Bayes Algorithm is the algorithm which makes machines to be able to make predictions about the events which they don't have any knowledge about only by looking priors knowledge.
Classification
In the previous article, we finished the first part of our example project,...
In the previous article, we finished the first part of our example project, now we have SentimentAnalyst class which we can use for training data and making a prediction by passing real data to it. Today we are going to work on Part 2 which is going to be a trainer project, the project which is going to handle the training process.