% ID_IN_EX_to_BF
% Converts traditional fingerprint conclusions (identifiaction, inconclusive, exclusion) to Bayes factors
% Uses a beta-binomial model
% 
% Geoffrey Stewart Morrison
% 2022-08-11a
%
% Tested on Matlab version 9.9.0.1538559 (R2020b) Update 3
%   No toolboxes required
%
% Makes use of a reformatted version of the data from:
%   Langenburg G., Champod C., Genessay T. (2012). 
%   Informing the judgements of fingerprint analysts using quality metric and statistical assessment tools. 
%   Forensic Science International, 219, 183–198. http://dx.doi.org/10.1016/j.forsciint.2011.12.017

clc
clear all
close all

resize_figures = true;

plot_example_distributions = true;
example_participant = 7; % specify row of dataset

% read in the data
filename = 'Langenberg, et al (2021) data - edited 2022-07-31a.csv';
data_raw = readmatrix(filename);

conclusions_raw = data_raw(:,3:14);

group_IDs = data_raw(:,1); % 1 = Control, 2 = Expert Minutiae, 3 = LR Tool, 4 = Quality Map, 5 = Expert Decisions, 6 = Quality Map & LR Tool
groups = unique(group_IDs);
num_groups = length(groups);

% truth as to which pairs are same source and which different source
% these values were fixed by the Langenburg et al experiment
truth_II = logical([1 1 0 0 0 1 1 0 1 1 1 0]); % 1 = same source, 0 = different source
num_s = 7;
num_d = 5;
num_total = num_s + num_d;

% conclusions: 1 = identification, 2 = exclusion, 3 = inconclusive
counts_truth_s = [sum(conclusions_raw(:,truth_II) == 1, 2), sum(conclusions_raw(:,truth_II) == 2, 2), sum(conclusions_raw(:,truth_II) == 3, 2)];
counts_truth_d = [sum(conclusions_raw(:,~truth_II) == 1, 2), sum(conclusions_raw(:,~truth_II) == 2, 2), sum(conclusions_raw(:,~truth_II) == 3, 2)];

% uninformative priors
% weighted versions of Jeffreys reference priors
% numerator
a_s = num_s / num_total; 
b_s = a_s; 
m_s = a_s + b_s;
% denominator
a_d = num_d / num_total;
b_d = a_d; 
m_d = a_d + b_d;

% Bayes factor based on posterior means of beta-binomial models (expected values of thetas)
% numerator
a_s_posterior = counts_truth_s + a_s;
num_s_posterior = num_s + m_s;
b_s_posterior = num_s_posterior - a_s_posterior;
theta_mean_s = a_s_posterior ./ num_s_posterior;
% denominator
a_d_posterior = counts_truth_d + a_d;
num_d_posterior = num_d + m_d;
b_d_posterior = num_d_posterior - a_d_posterior;
theta_mean_d = a_d_posterior ./ num_d_posterior;
% Bayes factor
BF_uninformative_prior = theta_mean_s ./ theta_mean_d;
log2BF_uninformative_prior = log2(BF_uninformative_prior);


% informative priors 
% means of the posterior parameters of the other practitioners
size_results = size(BF_uninformative_prior);
BF_informative_prior = NaN(size_results);
a_s_posterior_posterior = NaN(size_results);
a_d_posterior_posterior = NaN(size_results);
theta_mean_s_posterior = NaN(size_results);
theta_mean_d_posterior = NaN(size_results);
I_count = 0;
num_s_posterior_posterior = num_s + num_s_posterior; % these values were fixed by the Langenburg et al experiment
num_d_posterior_posterior = num_d + num_d_posterior;

for I_group = 1:num_groups
    II_group = group_IDs == groups(I_group);

    % numerator prior parameters
    a_s_posterior_temp = a_s_posterior(II_group, :);
    counts_truth_s_temp = counts_truth_s(II_group, :);
    % denominator prior parameters
    a_d_posterior_temp = a_d_posterior(II_group, :);
    counts_truth_d_temp = counts_truth_d(II_group, :);

    % leave one out
    num_practitioners_temp = sum(II_group);
    II_practitioners = false(num_practitioners_temp,1);

    for I_practitioner = 1:num_practitioners_temp
        I_count = I_count+1;

        II_temp = II_practitioners;
        II_temp(I_practitioner) = true;

        % numerator
        a_s_temp = mean(a_s_posterior(~II_temp,:));
        a_s_temp_posterior = counts_truth_s_temp(II_temp,:) + a_s_temp;
        theta_mean_s_temp = a_s_temp_posterior ./ num_s_posterior_posterior;
        % denomnator
        a_d_temp = mean(a_d_posterior(~II_temp,:));
        a_d_temp_posterior = counts_truth_d_temp(II_temp,:) + a_d_temp;
        theta_mean_d_temp = a_d_temp_posterior ./ num_d_posterior_posterior;
        % Bayes factor
        BF_informative_prior(I_count,:) = theta_mean_s_temp ./ theta_mean_d_temp;

        % save posterior values
        a_s_prior_informative(I_count,:) = a_s_temp;
        a_d_prior_informative(I_count,:) = a_d_temp;
        a_s_posterior_posterior(I_count,:) = a_s_temp_posterior;
        a_d_posterior_posterior(I_count,:) = a_d_temp_posterior;
        theta_mean_s_posterior(I_count,:) = theta_mean_s_temp;
        theta_mean_d_posterior(I_count,:) = theta_mean_d_temp;
    end
end
log2BF_informative_prior = log2(BF_informative_prior);

b_s_prior_informative = num_s_posterior - a_s_prior_informative;
b_d_prior_informative = num_d_posterior - a_d_prior_informative;
b_s_posterior_posterior = num_s_posterior_posterior - a_s_posterior_posterior;
b_d_posterior_posterior = num_d_posterior_posterior - a_d_posterior_posterior;


% plot selected results - edit the following lines for desired options
groups_to_plot = 1;%[1 3 5];
plot_titles = {'control', 'likelihood ratio provided', 'other experts'' decisions provided'};
x_labels = {'identification', 'inconclusive', 'exclusion'};
y_label = 'log_{2}(B)';
order_conclusions = [1 3 2];

y_range = [-5 5];
axis_x_increase = 1.2;

num_groups_to_plot = length(groups_to_plot);
num_practitioners = NaN(num_groups_to_plot,1);

if resize_figures % this may need to be adjusted depending on screen size
    figure(1);
    pos = get(1, 'Position');
    pos_new = [pos(1)-pos(3)*1.5, pos(2)-pos(4), pos(3)*4, pos(4)*2];
    pos_new_2 = [pos(1)-pos(3)*1.5, pos(2)-pos(4), pos(3)*3, pos(4)*2];
end

for I_group = 1:num_groups_to_plot
    figure(I_group);
    if resize_figures
        set(I_group, 'Position', pos_new);
    end
    
    II_group = group_IDs == groups_to_plot(I_group);
    num_practitioners(I_group) = sum(II_group);
    x = ones(num_practitioners(I_group),1);
    
    % uninformative priors    
    data_temp = log2BF_uninformative_prior(II_group, :);
    
    % the next part gets repeated, it could be made into a function
    ax_a = subplot(1,2,1);
    for I_conclusion = 1:3
        swarmchart(x*I_conclusion, data_temp(:,order_conclusions(I_conclusion)), 150, [0 0.4471 0.7412], 'filled', 'MarkerEdgeColor', 'none', 'MarkerFaceAlpha', 1/3, 'XJitter', 'density', 'XJitterWidth', 1/3);
        hold on
    end
    title([plot_titles{I_group}, ' (uninformative priors)']);
    ylabel(y_label);
    plot([0.5 3.5], [0 0], '-k', 'LineWidth', 1);
    set(gca, 'XTick', [1 2 3], 'XTickLabel', x_labels, 'YGrid', 'on', 'FontSize', 16);
    box on
    ylim(y_range);
    ax_a.Position(3) = ax_a.Position(3)*axis_x_increase;
    text(0.95, 0.96, '(a)', 'Units', 'normalized', 'FontSize', 18, 'FontName', 'Arial');
    
    % informative priors
    data_temp = log2BF_informative_prior(II_group, :);
    
    ax_b = subplot(1,2,2);
    for I_conclusion = 1:3
        swarmchart(x*I_conclusion, data_temp(:,order_conclusions(I_conclusion)), 150, [0 0.4471 0.7412], 'filled', 'MarkerEdgeColor', 'none', 'MarkerFaceAlpha', 1/3, 'XJitter', 'density', 'XJitterWidth', 1/3);
        hold on
    end
    title([plot_titles{I_group}, ' (informative priors)']);
    %ylabel(y_label);
    plot([0.5 3.5], [0 0], '-k', 'LineWidth', 1);
    set(gca, 'XTick', [1 2 3], 'XTickLabel', x_labels, 'YTickLabel', [], 'YGrid', 'on', 'FontSize', 16);
    box on
    ylim(y_range);
    ax_b.Position(3) = ax_b.Position(3)*axis_x_increase;
    text(0.95, 0.96, '(b)', 'Units', 'normalized', 'FontSize', 18, 'FontName', 'Arial');
end


% plot example distributions
if plot_example_distributions
    y_range_example = [0 6];
    axis_y_increase = 1.17;
    label_pdf = 'probability density';
    label_theta = '\theta';
    
    xx = 0:0.0025:1;
    
    for I_conclusion = 1:3
        % uninformative priors
        y_prior_s_1 = betapdf(xx, a_s, b_s);
        y_prior_d_1 = betapdf(xx, a_d, b_d);
        y_posterior_s_1 = betapdf(xx, a_s_posterior(example_participant,order_conclusions(I_conclusion)), b_s_posterior(example_participant,order_conclusions(I_conclusion)));
        y_posterior_d_1 = betapdf(xx, a_d_posterior(example_participant,order_conclusions(I_conclusion)), b_d_posterior(example_participant,order_conclusions(I_conclusion)));
        proportion_s = counts_truth_s(example_participant,order_conclusions(I_conclusion)) / num_s;
        proportion_d = counts_truth_d(example_participant,order_conclusions(I_conclusion)) / num_d;
        theta_mean_s_plot_1 = theta_mean_s(example_participant,order_conclusions(I_conclusion));
        theta_mean_d_plot_1 = theta_mean_d(example_participant,order_conclusions(I_conclusion));

        figure(10+I_conclusion);
        if resize_figures
            set(10+I_conclusion, 'Position', pos_new_2);
        end
        
        subplot(2,2,1);
        plot(xx, y_prior_s_1, ':b', 'LineWidth', 1);
        hold on
        plot(xx, y_posterior_s_1, '-b', 'LineWidth', 1);
        %y_range_example = get(gca, 'YLim');
        plot([proportion_s proportion_s], y_range_example, '--b', 'LineWidth', 1);
        plot([theta_mean_s_plot_1 theta_mean_s_plot_1], y_range_example, '-b', 'LineWidth', 1);
        ylim(y_range_example);
        ylabel(label_pdf);
        ax11 = gca;
        set(ax11, 'XTickLabel', [], 'XGrid', 'on', 'FontSize', 14);
        ax11.Position(4) = ax11.Position(4)*axis_y_increase;
        ax11.Position(3) = ax11.Position(3)*axis_x_increase;
        text(0.93, 0.9, '(a)', 'Units', 'normalized', 'FontSize', 16, 'FontName', 'Arial');
        
        subplot(2,2,3);
        plot(xx, y_prior_d_1, ':r', 'LineWidth', 1);
        hold on
        plot(xx, y_posterior_d_1, '-r', 'LineWidth', 1);
        %y_range_example = get(gca, 'YLim');
        plot([proportion_d proportion_d], y_range_example, '--r', 'LineWidth', 1);
        plot([theta_mean_d_plot_1 theta_mean_d_plot_1], y_range_example, '-r', 'LineWidth', 1);
        ylim(y_range_example);
        ylabel(label_pdf);
        ax12 = gca;
        set(ax12, 'XGrid', 'on', 'FontSize', 14);
        ax12.Position(4) = ax12.Position(4)*axis_y_increase;
        ax12.Position(3) = ax12.Position(3)*axis_x_increase;
        xlabel(label_theta, 'FontSize', 24);
        text(0.93, 0.9, '(b)', 'Units', 'normalized', 'FontSize', 16, 'FontName', 'Arial');
                
        % informative priors
        y_prior_s_2 = betapdf(xx, a_s_prior_informative(example_participant,order_conclusions(I_conclusion)), b_s_prior_informative(example_participant,order_conclusions(I_conclusion)));
        y_prior_d_2 = betapdf(xx, a_d_prior_informative(example_participant,order_conclusions(I_conclusion)), b_d_prior_informative(example_participant,order_conclusions(I_conclusion)));
        y_posterior_s_2 = betapdf(xx, a_s_posterior_posterior(example_participant,order_conclusions(I_conclusion)), b_s_posterior_posterior(example_participant,order_conclusions(I_conclusion)));
        y_posterior_d_2 = betapdf(xx, a_d_posterior_posterior(example_participant,order_conclusions(I_conclusion)), b_d_posterior_posterior(example_participant,order_conclusions(I_conclusion)));
        theta_mean_s_plot_2 = theta_mean_s_posterior(example_participant,order_conclusions(I_conclusion));
        theta_mean_d_plot_2 = theta_mean_d_posterior(example_participant,order_conclusions(I_conclusion));

        subplot(2,2,2);
        plot(xx, y_prior_s_2, ':b', 'LineWidth', 1);
        hold on
        plot(xx, y_posterior_s_2, '-b', 'LineWidth', 1);
        %y_range_example = get(gca, 'YLim');
        plot([proportion_s proportion_s], y_range_example, '--b', 'LineWidth', 1);
        plot([theta_mean_s_plot_2 theta_mean_s_plot_2], y_range_example, '-b', 'LineWidth', 1);
        ylim(y_range_example);
        ax21 = gca;
        set(ax21, 'XTickLabel', [], 'YTickLabel', [], 'XGrid', 'on', 'FontSize', 14);
        ax21.Position(4) = ax21.Position(4)*axis_y_increase;
        ax21.Position(3) = ax21.Position(3)*axis_x_increase;
        text(0.93, 0.9, '(c)', 'Units', 'normalized', 'FontSize', 16, 'FontName', 'Arial');
        
        subplot(2,2,4);
        plot(xx, y_prior_d_2, ':r', 'LineWidth', 1);
        hold on
        plot(xx, y_posterior_d_2, '-r', 'LineWidth', 1);
        %y_range_example = get(gca, 'YLim');
        plot([proportion_d proportion_d], y_range_example, '--r', 'LineWidth', 1);
        plot([theta_mean_d_plot_2 theta_mean_d_plot_2], y_range_example, '-r', 'LineWidth', 1);
        ylim(y_range_example);
        ax22 = gca;
        set(ax22, 'YTickLabel', [], 'XGrid', 'on', 'FontSize', 14);
        ax22.Position(4) = ax22.Position(4)*axis_y_increase;
        ax22.Position(3) = ax22.Position(3)*axis_x_increase;
        xlabel(label_theta, 'FontSize', 24);
        text(0.93, 0.9, '(d)', 'Units', 'normalized', 'FontSize', 16, 'FontName', 'Arial');
        
    end
end