% GASplineFit- Fits splines to data using a genetic algorithm % Last modified: Apr-16-2008 % % by Will Dwinnell % % [PP KnotY ApparentError] = ... % GASplineFit(X,Y,KnotX,SplineType,ErrorMeasure,Passes,nPopulation,YLimit) % % PP = fitted model (MATLAB piecewise polynomial) % KnotY = discovered knot ordinates % ApparentError = apparent error of fitted model % % X = independent variable % Y = dependent variable % KnotX = knot abscissae % SplineType = 'spline' or 'pchip' % ErrorMeasure = 'L-1', 'L-2', etc. (see 'SampleError' routine) % Passes = number of optimization generations % nPopulation = count of candidate solutions to employ in % GA population (must be an even number >= 40) % YLimit = Optional: limit on ordinate range for knots: [LowLimit HighLimit] % % Note: Requies 'SampleError' routine. % % Example: % X = sort(abs(randn(1500,1))); % YTrue = cos(X); % Y = YTrue + 0.1 * randn(size(YTrue)); % KnotX = linspace(0,3.5,6)'; % [PP KnotY MSE] = GASplineFit(X,Y,KnotX,'pchip','L-1',100,40) % XX = linspace(min([X; KnotX]),max([X; KnotX]),2500); % Z = ppval(PP,XX); % figure % plot(X,YTrue,'b-',X,Y,'b.',KnotX,KnotY,'rs',XX,Z,'r-') % grid on % zoom on % axis tight function [PP KnotY ApparentError] = ... GASplineFit(X,Y,KnotX,SplineType,ErrorMeasure,Passes,nPopulation,YLimit) % Housekeeping, initialization, parameter creation ----------------------- % Count observations n = size(X,1); % Pre-calculate nPopulationOver2 = nPopulation / 2; % Establish number of knots nKnot = length(KnotX); % Find target variable distribution parameters MinY = min(Y); MaxY = max(Y); % Over-ride distributions, if ordinate limit specified if (nargin > 7) MinY = YLimit(1); MaxY = YLimit(2); end % Calculate allowable ordinate range RangeY = MaxY - MinY; % Allocate storage for solution population S = zeros(nPopulation,nKnot); E = zeros(size(S,1),1); % Initialize solution population ----------------------------------------- % 20 flat solutions, equally spaced S(1:20,:) = repmat(linspace(MinY,MaxY,20)',[1 nKnot]); % least-squares linear regression B1 = [ones(n,1) X] \ Y; S(21,:) = B1(1) + B1(2) * KnotX; % least-squares quadratic regression B2 = [ones(n,1) X X .^ 2] \ Y; S(22,:) = B2(1) + B2(2) * KnotX + B2(3) * KnotX .^ 2; % Low-High, High-Low Ramps S(23,:) = linspace(MinY,MaxY,nKnot); S(24,:) = linspace(MaxY,MinY,nKnot); % Step-Up and Step-Down S(25,:) = [repmat(MinY,[1 round(nKnot/2)]) repmat(MaxY,[1 nKnot - round(nKnot/2)])]; S(26,:) = [repmat(MaxY,[1 round(nKnot/2)]) repmat(MinY,[1 nKnot - round(nKnot/2)])]; % Left Spike, Right Spike S(27,:) = [MaxY repmat(MinY,[1 nKnot-1])]; S(28,:) = [repmat(MinY,[1 nKnot-1]) MaxY]; % Left Dip, Right Dip S(29,:) = [MinY repmat(MaxY,[1 nKnot-1])]; S(30,:) = [repmat(MaxY,[1 nKnot-1]) MinY]; % Fill in the rest with random values rand('twister',325249); S(31:nPopulation,:) = MinY + rand(nPopulation-30,nKnot) * RangeY; % Constrain ordinate range, if specified if (nargin > 7) S = max(min(S,YLimit(2)),YLimit(1)); end % Evaluation for i = 1:nPopulation % Loop over all solutions % Execute model ModelOutput = interp1(KnotX,S(i,:),X,SplineType); % Assess initial candidate E(i) = SampleError(ModelOutput,Y,ErrorMeasure); end % Selection % Sort candidate population and respective performance in descending order of performance [E I] = sort(E); S = S(I,:); % Make indicated number of passes over data ------------------------------ % Main loop for Pass = 1:Passes % Crossover ----- % Generate scrambled ordering of parents R = randperm(nPopulationOver2)'; for i = 1:2:nPopulationOver2-1 % Loop over consecutive pairs of parent solutions % Single-point crossover Cut = ceil((nKnot - 1) * rand); % Child 1 S(i+nPopulationOver2,:) = [S(R(i),1:Cut) S(R(i+1),Cut+1:end)]; % Child 2 S(i+nPopulationOver2+1,:) = [S(R(i+1),1:Cut) S(R(i),Cut+1:end)]; end % Mutation ----- % Additive noise % Yes, this next line is somewhat inefficient... % Generate mutation Mutation = 0.05 * RangeY * randn([nPopulationOver2 nKnot]) .* ... double(rand([nPopulationOver2 nKnot]) < 0.05); % Apply mutation S(nPopulationOver2+1:nPopulation,:) = ... S(nPopulationOver2+1:nPopulation,:) + Mutation; % Constrain mutation, if specified if (nargin > 7) S(nPopulationOver2+1:nPopulation,:) = ... max(min(S(nPopulationOver2+1:nPopulation,:),YLimit(2)),YLimit(1)); end % Evaluation ----- for i = nPopulationOver2+1:nPopulation % Loop over new solutions % Execute model ModelOutput = interp1(KnotX,S(i,:),X,SplineType); % Assess initial candidate E(i) = SampleError(ModelOutput,Y,ErrorMeasure); end % Selection ----- % Sort population in descending order of performance [E I] = sort(E); S = S(I,:); end % End main loop % Assign best candidate solution to output KnotY = S(1,:); ApparentError = E(1); % Construct piecewise polynomial version of solution switch lower(SplineType) case 'spline' PP = spline(KnotX,KnotY); case 'pchip' PP = pchip(KnotX,KnotY); end % EOF