HA_DIGSIG/RCNN_for_traficsigns.m

% 2022-12-29

% script for training/evalutaing faster- RCNN for Traficsign discovery
% with data provided in PicturesResizedLabelsResizedSignsCutted.zip
% based on script from the lecture

% Requirements:
% data has to be provided as PicturesResizedLabelsResizedSignsCutted.zip
% in script location
% (this script unzips data and renames two files, but there is unlabeled
% image-material, that has to be removed by hand after unzipping)

% additional scriptfiles:
% - func_setupData.m
%   unpack data etc.
% - func_groundTruthFromLabelPic.m
%   (generate groundtruthtablle from image-data)
% - augmentData.m
%   (dataaugmentation für RCNN)
% - helperSanitizeBoxes.m
%   (part of augmentation)
% - preprocessData.m
%   (Resize image and bounding boxes to targetSize.
%

% required add-on(s):
% - 'Deep Learning Toolbox Model for ResNet-50 Network'
% - 'image processing toolbox'
% - 'Computer Vision Toolbox '

% recommended add-on(s) - if gpu is apt for the job....
%  - 'Parallel Computing Toolbox'

% adjustable parameters
% - if there is no trained net, it can be trained with this script:
%   set doTraining to true
% - the training can use augmentation or not:
%   set doAugmentation accordingly

close all;
clear;

doTraining = false;
doAugmentation = false;

% first we need the data...
dataDir = 'Picturedata'; % Destination-Folder for provided (img) Data
zippedDataFile = 'PicturesResizedLabelsResizedSignsCutted.zip'; %Data provided by TA
grDataFile = 'signDatasetGroundTruth.mat';
func_setupData(dataDir, zippedDataFile, grDataFile);

%load data
data = load(grDataFile);
traficSignDataset = data.DataSet;


% ----- split the dataset into training, validation, and test sets.
% Select 60% of the data for training, 10% for validation, and the
% rest for testing the trained detector

rng(0)
shuffledIndices = randperm(height(traficSignDataset));
idx = floor(0.6 * height(traficSignDataset));

trainingIdx = 1:idx;
trainingDataTbl = traficSignDataset(shuffledIndices(trainingIdx),:);

validationIdx = idx+1 : idx + 1 + floor(0.1 * length(shuffledIndices) );
validationDataTbl = traficSignDataset(shuffledIndices(validationIdx),:);

testIdx = validationIdx(end)+1 : length(shuffledIndices);
testDataTbl = traficSignDataset(shuffledIndices(testIdx),:);

% ----- use imageDatastore and boxLabelDatastore to create datastores
% for loading the image and label data during training and evaluation.

imdsTrain = imageDatastore(trainingDataTbl{:,'imageFilename'});
bldsTrain = boxLabelDatastore(trainingDataTbl(:,'sign'));

imdsValidation = imageDatastore(validationDataTbl{:,'imageFilename'});
bldsValidation = boxLabelDatastore(validationDataTbl(:,'sign'));

imdsTest = imageDatastore(testDataTbl{:,'imageFilename'});
bldsTest = boxLabelDatastore(testDataTbl(:,'sign'));

% combine image and box label datastores.

trainingData = combine(imdsTrain,bldsTrain); % erzeugt 'CombinedDatastore
validationData = combine(imdsValidation,bldsValidation);
testData = combine(imdsTest,bldsTest);

% display one of the training images and box labels.

data = read(trainingData);
I = data{1};
bbox = data{2};
annotatedImage = insertShape(I,'Rectangle',bbox);
annotatedImage = imresize(annotatedImage,4);  % nur fuer Darstellung
figure
imshow(annotatedImage)

% ----- Create Faster R-CNN Detection Network

inputSize = [224 224 3];

preprocessedTrainingData = transform(trainingData, @(data)preprocessData(data,inputSize));
% Achtung: dieser DS wird nur zur Ermittlung der BoundingBoxes verwendet

% display one of the training images and box labels.
while 1==0  %hasdata(preprocessedTrainingData)
    data = read(preprocessedTrainingData);
    I = data{1};
    bbox = data{2};
    annotatedImage = insertShape(I,'Rectangle',bbox);
    annotatedImage = imresize(annotatedImage,4);  % nur fuer Darstellung
    figure(1)
    imshow(annotatedImage)
    pause(0.100)
end

% Auswahl der anchor boxes
%   Infos dazu: https://de.mathworks.com/help/vision/ug/estimate-anchor-boxes-from-training-data.html
numAnchors = 3;
anchorBoxes = estimateAnchorBoxes(preprocessedTrainingData,numAnchors);

% und das feature CNN
featureExtractionNetwork = resnet50;
featureLayer = 'activation_40_relu';
numClasses = width(traficSignDataset)-1;    % also hier: 1, es sollen nur Verkehrsschilder erkannt werden

lgraph = fasterRCNNLayers(inputSize,numClasses,anchorBoxes,featureExtractionNetwork,featureLayer);

% Netzwerk ansehen
% analyzeNetwork(lgraph)
if doAugmentation

    augmentedTrainingData = transform(trainingData,@augmentData);
    trainingData = transform(augmentedTrainingData,@(data)preprocessData(data,inputSize));
    validationData = transform(validationData,@(data)preprocessData(data,inputSize));

end

options = trainingOptions('sgdm',...
    'MaxEpochs',10,...
    'MiniBatchSize',2,...
    'InitialLearnRate',1e-3,...
    'CheckpointPath',tempdir,...
    'ValidationData',validationData);

netname = "netDetectorResNet50.mat"

if doAugmentation
    netname = "netDetectorResNet50_2.mat"
end


if doTraining
    % Train the Faster R-CNN detector.
    % * Adjust NegativeOverlapRange and PositiveOverlapRange to ensure
    %   that training samples tightly overlap with ground truth.
    [detector, info] = trainFasterRCNNObjectDetector(trainingData,lgraph,options, ...
        'NegativeOverlapRange',[0 0.3], ...
        'PositiveOverlapRange',[0.6 1]);
    save netname detector;
else
    % Load pretrained detector for the example.
    load netname detector;
end

% ----- quick check/test

I = imresize(I,inputSize(1:2));
[bboxes,scores] = detect(detector,I);
% Display the results.

sfigTitle = ""
if height(bboxes) > 0
    I = insertObjectAnnotation(I,'rectangle',bboxes,scores);
    sfigTitle = "Detected";
else
    sfigTitle = "Not Detected";
end


figure;
imshow(I);
annotation('textbox', [0.5, 0.2, 0.1, 0.1], 'String', sfigTitle)

% ----- Testing

testData = transform(testData,@(data)preprocessData(data,inputSize));

% Run the detector on all the test images.

detectionResults = detect(detector,testData,'MinibatchSize',4);

% Evaluate the object detector using the average precision metric.

[ap, recall, precision] = evaluateDetectionPrecision(detectionResults,testData);
% The precision/recall (PR) curve highlights how precise a detector is at varying levels of recall. The ideal precision is 1 at all recall levels. The use of more data can help improve the average precision but might require more training time. Plot the PR curve.

figure
plot(recall,precision)
xlabel('Recall')
ylabel('Precision')
grid on
title(sprintf('Average Precision = %.2f', ap))


% ----- Helper functions

% function data = augmentData(data)
% % Randomly flip images and bounding boxes horizontally.
% tform = randomAffine2d('XReflection',true);
% sz = size(data{1});
% rout = affineOutputView(sz,tform);
% data{1} = imwarp(data{1},tform,'OutputView',rout);
%
% % Sanitize box data, if needed.
% data{2} = helperSanitizeBoxes(data{2}, sz);
%
% % Warp boxes.
% data{2} = bboxwarp(data{2},tform,rout);
% end
%
% function data = preprocessData(data,targetSize)
% % Resize image and bounding boxes to targetSize.
% sz = size(data{1},[1 2]);
% scale = targetSize(1:2)./sz;
% data{1} = imresize(data{1},targetSize(1:2));
%
% % Sanitize box data, if needed.
% data{2} = helperSanitizeBoxes(data{2}, sz);
%
% % Resize boxes.
% data{2} = bboxresize(data{2},scale);
% end