%% FOOD CHAIN MODEL % % Developed by Andy Hill at BAE Systems for a project funded by the UK Food % Standards Agency % There are two stages to the model: 1) develop a simulated food chain % dataset to generate the baseline food chain dataset % 2) Assume various surveiallnce strategies that can access relevant fields % of the simulated dataset, and determine what insights can be generated. %% SIMULATE FOOD CHAIN DATA ARRAY % Preliminaries and parameter estimation/generation rng(5) % Set random number seed ns = 100; % Number of genotypes % Generate phenotypes att = single(betarnd(3,99,[ns 1])); % 2/100 - Swart et al (2016) D60w = single(wblrnd(.4,2,[ns 1])); % Roughly mean of 0.4 going out to 1.2 - Swart et al Zw = single(unifrnd(5,7, [ns 1])); % Z-values between 5 and 7 log(D) per C - Swart et al D60p = single(wblrnd(.4,2,[ns 1])); % Roughly mean of 0.4 going out to 1.2 - Swart et al Zp = single(unifrnd(5,7, [ns 1])); % Z-values between 5 and 7 log(D) per C - Swart et al pm = single(betarnd(4,999998,[ns 1])); % Indivdual probability of infection from one bug v = betarnd(4,8,[ns 1]); %Categorical for genes - split distribution of genotypes into "alleles" att1 = ordinal(att,{'A1','A2','A3','A4','A5'},[],[0,0.04,0.08,0.12,0.16,1]); D60w1 = ordinal(D60w,{'Dw1','Dw2','Dw3','Dw4','Dw5'},[],[0,0.4,0.8,1.2,1.6,2]); Zw1 = ordinal(Zw,{'zw1','zw2'},[],[5,6,7]); D60p1 = ordinal(D60w,{'Dp1','Dp2','Dp3','Dp4','Dp5'},[],[0,0.4,0.8,1.2,1.6,2]); Zp1 = ordinal(Zw,{'zp1','zp2'},[],[5,6,7]); pm1 = ordinal(pm,{'p1','p2','p3','p4','p5'},[],[0,3e-6,5e-6,7e-6,9e-6,2.5e-5]); v1 = ordinal(v,{'v1','v2','v3','v4','v5'},[],[0,0.2,0.4,0.6,0.8,1]); % Generate processing conditions ndays = single(1000); % number of days processing minsday = single(1001); % minutes per day processing t = repmat(1:minsday,[ndays 1]); % Replicate running of plant minute-by-minute for each day of operation mcold = repmat(single(unifrnd(0,0.01, [ndays 1])),[1 minsday]); % gradient of temp decay over course of a day tempdecay=55-mcold.*t; % Linear model to reduce initial temp over time argh = (73*7):(80*7); % something bad happens at this week tempdecay(argh,:) = 55-mcold(argh,:).*t(argh,:); temps = tempdecay+(5*sin(2*pi*0.12*t)); % Add cyclical nature to running temp TT2 = single(unifrnd(5,7,[ndays minsday])); TT2 = repmat(TT2,[1 1 ns]); % Generate burdens on entrance to Stage 1 processing machine - 10 genotypes % per day wbs = zeros([ndays minsday ns],'single'); %Pre-allocate array (quicker) for j = 1:ndays whichbugs = mnrnd(10,repmat(ns/10000,[100 1]),minsday)>0; wbs(j,:,:) = whichbugs; end N0 = wbs.*single(lognrnd(3,3,[ndays minsday ns])); % Random number of bacteria within each of ns unique populations % STAGE 1 PROCESSING %Time spent in stage 1 processing TT1 = single(unifrnd( 1,2, [ndays minsday] )); %2d matrix varies in days and carcasses TT1 = repmat(TT1, [ 1 1 ns]); beta1 = single(betarnd( 19, 100-18+1, [ns 1])); % Trasnfer to machine - 1d varies in bug alpha1s = single(unifrnd(0,2e-5,[ndays minsday])); % on to the pig alpha1 = repmat(alpha1s,[1 1 ns]); aa2 = exp( -alpha1 .* TT1 ); % part of the solution equation B2 = wbs.*single(lognrnd(3,3,[ndays minsday ns])); % Contamination leaked from interior % Gneerate beta1s for all food units beta1s = zeros([ndays minsday ns],'single'); %Pre-allocate array (quicker) for j = 1:ns beta1s(:,:,j) = repmat(beta1(j),[ndays minsday]); % replicate aDR end M1 = beta1s.*( N0 + B2); % washed off on to equip. B2 amount bugs in the feces (similar to L36 generate bugs in feces - RV atm). % Environmental contamination W1 = zeros([ndays minsday ns],'single'); %Pre-allocate array (quicker) for k=1:minsday-1 W1(:,k+1,:) = round(( M1(:,k,:) + W1(:,k,:) ).* aa2(:,k,:)); end % Contamination at end of Stage 1 processing N1 = ( 1./aa2 - 1 ).*W1 + (1-beta1s).*M1./beta1s; N1 = round(N1); % Stage 2 processing % Inactivation coefficients in water and on food unit % In paper, notation is reduced to gamma2 only, as both tau2 and gamma2 % have the same paramterisation taus = zeros([ndays minsday ns],'single'); % Pre-allocate matrix gamma2s = zeros([ndays minsday ns],'single'); % Pre-allocate matrix for i = 1:ns taus(:,:,i) = (1/D60p(i)) * 10.^((temps(:,:)-60)/Zp(i)); % Inactivation on carcass gamma2s(:,:,i) = (1/D60w(i)) * 10.^((temps(:,:)-60)/Zw(i)); % Inactivation in water end % Various components of scalding equation for W1 and N0 alpha2s = single(unifrnd(0,2e-5,[ndays minsday])); % Water -> pig alpha2 = repmat(alpha2s,[1 1 ns]); alpha2(alpha2<0)=0; % Proportion of firmly-attached bacteria beta2s = zeros(ndays,minsday,ns,'single'); for j = 1:ns beta2s(:,:,j) = repmat(att(j),[ndays minsday]); %disassociation parameter end a2 = exp( (alpha2 - 1) .* (gamma2s .* TT2 )); % TT2 - time spent in scalding tank eta = (1 - alpha2) .* gamma2s - taus ; N02 = N0 .* beta2s .* ( 1 - beta2s ); % intial contamination of ns unqiue bacterial populations on each carcass as they enter scalding machine % Environmental contamination during Stage 2 processing W2 = zeros([ndays minsday ns],'single'); %Pre-allocate array (quicker) for k=1:minsday W2(:,k+1,:) = round(( N02(:,k,:)./(1-beta2s(:,k,:)) + W2(:,k,:) ) .* a2(:,k,:)); end % Contamination of food unit at end of Stage 2 processing N2 = exp( -taus .* TT2 ) .* ... N02 ./ beta2s - alpha2 .* W2(:,2:end,:) .* ... ( 1 - exp( eta .* TT2 ) ) ./ eta; N2 = round(N2); %___ % INFECTION MODEL Pinf = zeros([ndays minsday ns],'single'); % Pre-allocate matrix pms = zeros([ndays minsday ns],'single'); for j = 1:ns pms(:,:,j) = repmat(pm(j),[ndays minsday]); % replicate aDR end %N50 = bDR.*(2^(1/aDR) - 1); Pinf(:,:,:) = 1- (1-pms).^N2; % REPORTING % Apply virulence factor to each genotype vs = zeros([ndays minsday ns],'single'); for l = 1:ns vs(:,:,l) = repmat(v(l),[ndays minsday]); % replicate aDR end % Is case reported or not? Case = binornd(1,Pinf.*vs); %% ANALYSIS OF SIMULATED DATA ARRAY % Understand the data - DRAW SCATTERS N1_t_b =reshape(sum(N1,2),[ndays ns]); N1_b = log10(sum(N1_t_b,2)); x_N1 = linspace(min(N1_b),max(N0_b)); x = N1_b; y = sum_cases_b; xval = x_N1; [b,bint] = regress(y,[ones(size(x)) x]); yhat = b(1)+b(2)*xval; ylow = bint(1,1)+bint(2,1)*xval; yupp = bint(1,2)+bint(2,2)*xval; subplot(2,3,1) plot(x,y,'k*'); hold on; plot(xval,ylow,'r-.'); plot(xval,yupp,'r-.'); plot(xval,yhat,'b','linewidth',2); B2_t_b =reshape(sum(B2,2),[ndays ns]); B2_b = log10(sum(B2_t_b,2)); x_B2 = linspace(min(B2_b),max(B2_b)); x = B2_b; y = sum_cases_b; xval = x_B2; [b,bint] = regress(y,[ones(size(x)) x]); yhat = b(1)+b(2)*xval; ylow = bint(1,1)+bint(2,1)*xval; yupp = bint(1,2)+bint(2,2)*xval; subplot(2,3,2) plot(x,y,'k*'); hold on; plot(xval,ylow,'r-.'); plot(xval,yupp,'r-.'); plot(xval,yhat,'b','linewidth',2); TT1_b=mean(TT1(:,:,1),2); mdl_TT1 = fitlm(TT1_b',sum_cases_b'); x_TT1 = linspace(min(TT1_b),max(TT1_b)); x = TT1_b; y = sum_cases_b; xval = x_TT1; [b,bint] = regress(y,[ones(size(x)) x]); yhat = b(1)+b(2)*xval; ylow = bint(1,1)+bint(2,1)*xval; yupp = bint(1,2)+bint(2,2)*xval; subplot(2,3,3) plot(x,y,'k*'); hold on; plot(xval,ylow,'r-.'); plot(xval,yupp,'r-.'); plot(xval,yhat,'b','linewidth',2); TT2_b=mean(TT2(:,:,1),2); x_TT2 = linspace(min(TT2_b),max(TT2_b)); x = TT2_b; y = sum_cases_b; xval = x_TT2; [b,bint] = regress(y,[ones(size(x)) x]); yhat = b(1)+b(2)*xval; ylow = bint(1,1)+bint(2,1)*xval; yupp = bint(1,2)+bint(2,2)*xval; subplot(2,3,4) plot(x,y,'k*'); hold on; plot(xval,ylow,'r-.'); plot(xval,yupp,'r-.'); plot(xval,yhat,'b','linewidth',2); alpha1_b=mean(alpha1(:,:,1),2)*10000; x_alpha1 = linspace(min(alpha1_b),max(alpha1_b)); x = alpha1_b; y = sum_cases_b; xval = x_alpha1; [b,bint] = regress(y,[ones(size(x)) x]); yhat = b(1)+b(2)*xval; ylow = bint(1,1)+bint(2,1)*xval; yupp = bint(1,2)+bint(2,2)*xval; subplot(2,3,5) plot(x,y,'k*'); hold on; plot(xval,ylow,'r-.'); plot(xval,yupp,'r-.'); plot(xval,yhat,'b','linewidth',2); temps_b=mean(temps,2); x_temps = linspace(min(temps_b),max(temps_b)); x = temps_b; y = sum_cases_b; xval = x_temps; [b,bint] = regress(y,[ones(size(x)) x]); yhat = b(1)+b(2)*xval; ylow = bint(1,1)+bint(2,1)*xval; yupp = bint(1,2)+bint(2,2)*xval; subplot(2,3,6) plot(x,y,'k*'); hold on; plot(xval,ylow,'r-.'); plot(xval,yupp,'r-.'); plot(xval,yhat,'b','linewidth',2); % Understand the data - DRAW SCATTERS FOR BUGS sum_cases_bugs=reshape(sum(sum(Case)),[100 1]); figure(1) subplot(2,4,1) x_att = linspace(min(att),max(att)); x = att; y = sum_cases_bugs; xval = x_att; [b,bint] = regress(y,[ones(size(x)) x]); yhat = b(1)+b(2)*xval; ylow = bint(1,1)+bint(2,1)*xval; yupp = bint(1,2)+bint(2,2)*xval; plot(x,y,'k*'); hold on; plot(xval,ylow,'r-.'); plot(xval,yupp,'r-.'); plot(xval,yhat,'b','linewidth',2); xlabel('\it{\beta_1}'); ylabel('Total cases per genotype') subplot(2,4,2) x_D60w = linspace(min(D60w),max(D60w)); x = D60w; y = sum_cases_bugs; xval = x_D60w; [b,bint] = regress(y,[ones(size(x)) x]); yhat = b(1)+b(2)*xval; ylow = bint(1,1)+bint(2,1)*xval; yupp = bint(1,2)+bint(2,2)*xval; plot(x,y,'k*'); hold on; plot(xval,ylow,'r-.'); plot(xval,yupp,'r-.'); plot(xval,yhat,'b','linewidth',2); xlabel('D60w'); ylabel('Total cases per genotype') subplot(2,4,3) x_Zw = linspace(min(Zw),max(Zw)); x = Zw; y = sum_cases_bugs; xval = x_Zw; [b,bint] = regress(y,[ones(size(x)) x]); yhat = b(1)+b(2)*xval; ylow = bint(1,1)+bint(2,1)*xval; yupp = bint(1,2)+bint(2,2)*xval; plot(x,y,'k*'); hold on; plot(xval,ylow,'r-.'); plot(xval,yupp,'r-.'); plot(xval,yhat,'b','linewidth',2); xlabel('Zw'); ylabel('Total cases per genotype') subplot(2,4,4) x_D60p = linspace(min(D60p),max(D60p)); x = D60p; y = sum_cases_bugs; xval = x_D60p; [b,bint] = regress(y,[ones(size(x)) x]); yhat = b(1)+b(2)*xval; ylow = bint(1,1)+bint(2,1)*xval; yupp = bint(1,2)+bint(2,2)*xval; plot(x,y,'k*'); hold on; plot(xval,ylow,'r-.'); plot(xval,yupp,'r-.'); plot(xval,yhat,'b','linewidth',2); xlabel('D60p'); ylabel('Total cases per genotype') subplot(2,4,5) x_Zp = linspace(min(Zp),max(Zp)); x = Zp; y = sum_cases_bugs; xval = x_Zp; [b,bint] = regress(y,[ones(size(x)) x]); yhat = b(1)+b(2)*xval; ylow = bint(1,1)+bint(2,1)*xval; yupp = bint(1,2)+bint(2,2)*xval; plot(x,y,'k*'); hold on; plot(xval,ylow,'r-.'); plot(xval,yupp,'r-.'); plot(xval,yhat,'b','linewidth',2); xlabel('Zp'); ylabel('Total cases per genotype') subplot(2,4,6) x_pm = linspace(min(pm*1e5),max(pm*1e5)); x = pm*1e5; y = sum_cases_bugs; xval = x_pm; [b,bint] = regress(y,[ones(size(x)) x]); yhat = b(1)+b(2)*xval; ylow = bint(1,1)+bint(2,1)*xval; yupp = bint(1,2)+bint(2,2)*xval; plot(x,y,'k*'); hold on; plot(xval,ylow,'r-.'); plot(xval,yupp,'r-.'); plot(xval,yhat,'b','linewidth',2); xlabel('pm'); ylabel('Total cases per genotype') subplot(2,4,7) x_v = linspace(min(v),max(v)); x = v; y = sum_cases_bugs; xval = x_D60p; [b,bint] = regress(y,[ones(size(x)) x]); yhat = b(1)+b(2)*xval; ylow = bint(1,1)+bint(2,1)*xval; yupp = bint(1,2)+bint(2,2)*xval; plot(x,y,'k*'); hold on; plot(xval,ylow,'r-.'); plot(xval,yupp,'r-.'); plot(xval,yhat,'b','linewidth',2); xlabel('v'); ylabel('Total cases per genotype') subplot(2,4,8) x = pm.*v*1e5; x_pmv = linspace(min(x),max(x)); y = sum_cases_bugs; xval = x_pmv; [b,bint] = regress(y,[ones(size(x)) x]); yhat = b(1)+b(2)*xval; ylow = bint(1,1)+bint(2,1)*xval; yupp = bint(1,2)+bint(2,2)*xval; plot(x,y,'k*'); hold on; plot(xval,ylow,'r-.'); plot(xval,yupp,'r-.'); plot(xval,yhat,'b','linewidth',2); xlabel('pmv'); ylabel('Total cases per genotype') % Scenario 1 - WGS data at human end only % Track BASELINE incidence over time Case_b = Case; Case_t_b =reshape(sum(Case_b,2),[ndays ns]); sum_cases_b = sum(Case_t_b,2); sum_cases_bugs_b = mean(Case_t_b); w_cases_b = zeros( [ceil(ndays/7) ns] ); % weekly cases by genotype w_sum_cases_b = zeros( [ceil(ndays/7) 1] ); % overall sum of weekly cases q=0; for w = 1:7:988 q=q+1; w_cases_b(q,:) = max(cumsum(Case_t_b(w:(w+6),:))); w_sum_cases_b(q) = max(cumsum(sum(Case_t_b(w:(w+6),:),2))); end % t_cases_w95 = prctile(w_sum_cases_b,95); % Exceedance score t_cases_w99 = prctile(w_sum_cases_b,99); %% Scenario 2 - Micro sampling picked = 143; n = 250; % Samples per week daysampling = randsample(ndays,picked); % days sampled units = zeros([picked,n]); % units sampled for j = 1:picked units(j,:) = randsample(minsday,n); % units sampled end units2=repmat(units,[1 1 ns]); % units sampled samples = zeros(picked,n,ns); tempsam = zeros(picked,n,ns); w_cases_sam = zeros(picked,n,ns); picked2 = zeros(ndays,minsday,ns); % Info for each sample for k = 1:picked samples(k,:,:) = N1(daysampling(k),units(k,:),:); tempsam(k,:,:) = repmat(temps(daysampling(k),units(k,:)),[1 1 ns]); w_cases_sam(k,:,:) = Case(daysampling(k),units(k,:),:); end detected = samples>100; MS = reshape(sum(sum(detected)),[ns 1]); scatter(MS,sum(Case_t_b)); x = MS(1:100,1); x_pmv = linspace(min(x),max(x)); y = sum(Case_t_b)'; xval = x_pmv; [b,bint] = regress(y,[ones(size(x)) x]); yhat = b(1)+b(2)*xval; ylow = bint(1,1)+bint(2,1)*xval; yupp = bint(1,2)+bint(2,2)*xval; plot(x,y,'k*'); hold on; plot(xval,ylow,'r-.'); plot(xval,yupp,'r-.'); plot(xval,yhat,'b','linewidth',2); xlabel('pmv'); ylabel('Total cases per genotype') for i = 1:picked for j = 1:n picked2(daysampling(i),units(i,j),:) = 1; end end Casesres = reshape(Case,[ndays*minsday*ns 1]); N1s = N1.*(N1>100); N1res = reshape(N1,[ndays*minsday*ns 1]); B2res = reshape(B2,[ndays*minsday*ns 1]); % Scenario 3 - epidemiological big data % Set up data array to enter into machine learning algorithm D60ps = zeros([ndays minsday ns]); for j = 1:ns D60ps(:,:,j) = repmat(D60p(j),[ndays minsday]); %disassociation parameter end Zps = zeros([ndays minsday ns]); for j = 1:ns Zps(:,:,j) = repmat(Zp(j),[ndays minsday]); %disassociation parameter end temps1 = repmat(temps,[1 1 ns]); D60pres = reshape(D60ps,[ndays*minsday*ns 1]); Zpres = reshape(Zps,[ndays*minsday*ns 1]); tempres = reshape(temps1,[ndays*minsday*ns 1]); pres = reshape(pms, [ndays*minsday*ns 1]); vres = reshape(vs, [ndays*minsday*ns 1]); pickres = reshape(picked2,[ndays*minsday*ns 1]); N1data = zeros([ndays*minsday*ns 1]); B2data = zeros([ndays*minsday*ns 1]); D60data = zeros([ndays*minsday*ns 1]); zpdata = zeros([ndays*minsday*ns 1]); tempdata = zeros([ndays*minsday*ns 1]); pdata = zeros([ndays*minsday*ns 1]); vdata = zeros([ndays*minsday*ns 1]); sampled = pickres==1; N1data(sampled) = N1res(sampled); D60data(sampled) = D60pres(sampled); zpdata(sampled) = Zpres(sampled); tempdata(sampled) = tempres(sampled); pdata(sampled) = pres(sampled); vdata(sampled) = vres(sampled); D60p1 = discretize(D60data,[0,0.4,0.8,1.2,1.6,2]); Zp1 = discretize(zpdata,[5,6,7]); pm1 = discretize(pdata,[0,3e-6,5e-6,7e-6,9e-6,2.5e-5]); v1 = discretize(vdata,[0,0.2,0.4,0.6,0.8,1]); data = [N1data(sampled) tempdata(sampled) D60p1(sampled) Zp1(sampled) pm1(sampled) v1(sampled)]; Casespos = Casesres(sampled); % Machine learning algorithm lr = [0.1 0.5 1]; maxNumSplits = [1 4 7]; numTrees = 300; Mdl = cell(numel(maxNumSplits),numel(lr)); rng(1); % For reproducibility for k = 1:numel(lr); for j = 1:numel(maxNumSplits); t = templateTree('MaxNumSplits',maxNumSplits(j),'Surrogate','on'); Mdl{j,k} = fitensemble(data,Casespos,'RUSBoost',numTrees,t,... 'CategoricalPredictors',3:6,'PredictorNames',{'N1' 'Temp' 'D60' 'Z' 'pm' 'v'},... 'KFold',5,'LearnRate',lr(k)); end end MdlDeep = fitctree(data,Casespos,'CrossVal','on','MergeLeaves','off',... 'MinParentSize',1,'Surrogate','on'); MdlStump = fitctree(data,cases_q,'MaxNumSplits',1,'CrossVal','on','Surrogate','on'); kflAll = @(x)kfoldLoss(x,'Mode','cumulative'); errorCell = cellfun(kflAll,Mdl,'Uniform',false); error = reshape(cell2mat(errorCell),[numTrees numel(maxNumSplits) numel(lr)]); errorDeep = kfoldLoss(MdlDeep); errorStump = kfoldLoss(MdlStump); mnsPlot = 1:4; %[1 round(numel(maxNumSplits)/2) numel(maxNumSplits)]; figure; for k = 1:3; subplot(2,2,k); plot(squeeze(error(:,mnsPlot(k),:)),'LineWidth',2); axis tight; hold on; h = gca; % plot(h.XLim,[errorDeep errorDeep],'-.b','LineWidth',2); %plot(h.XLim,[errorStump errorStump],'-.r','LineWidth',2); plot(h.XLim,min(min(error(:,mnsPlot(k),:))).*[1 1],'--k'); h.YLim = [0 0.04]; xlabel 'Number of trees'; ylabel 'Cross-validated MSE'; title(sprintf('MaxNumSplits = %0.3g', maxNumSplits(mnsPlot(k)))); hold off; end; hL = legend([cellstr(num2str(lr','Learning Rate = %0.2f'));'Min. MSE']); hL.Position(1) = 0.6; % Decision surfaces % N1 vs temp [xx1, xx2] = meshgrid(0:.01:7,40:.01:60); q=numel(xx1(:)); X = [10.^xx1(:) xx2(:) ones([q 1]) ones([q 1]) repmat(2,[q 1]) repmat(2,[q 1])]; Ynew = predict(Mdl2,X); figure(1) subplot(2,2,1) surface(xx1,xx2,reshape(Ynew,size(xx1))) %gscatter(xx1(:), xx2(:), Ynew); colormap(jet) % N1 vs pm [xx1, xx2] = meshgrid(0:.01:7,1:.01:5); q=numel(xx1(:)); X = [10.^xx1(:) repmat(52.39,[q 1]) ones([q 1]) ones([q 1]) xx2(:) repmat(2,[q 1])]; Ynew = predict(Mdl2,X); subplot(2,2,2) surface(xx1,xx2,reshape(Ynew,size(xx1))) colormap(jet) % Temp vs pm [xx1, xx2] = meshgrid(40:.01:60,1:.01:5); q=numel(xx1(:)); X = [repmat(1.4e4,[q 1]) xx1(:) ones([q 1]) ones([q 1]) xx2(:) repmat(2,[q 1])]; Ynew = predict(Mdl2,X); subplot(2,2,3) surface(xx1,xx2,reshape(Ynew,size(xx1))) colormap(jet) % pm vs v [xx1, xx2] = meshgrid(1:5,1:5); q=numel(xx1(:)); X = [repmat(1.4e4,[q 1]) repmat(52,[q 1]) ones([q 1]) ones([q 1]) xx1(:) xx2(:)]; Ynew = predict(Mdl2,X); subplot(2,2,4) surface(xx1,xx2,reshape(Ynew,size(xx1))) colormap(jet)