Tuesday, April 3, 2012

calculating mass spec similarity direct in postgres using stored procedures

since I'm currently debugging a rather annoying bug and for this reason need to be able to execute queries like:




CREATE OR REPLACE FUNCTION binbase.calculatesimilarity (in unknown text, in library text) RETURNS float8 AS
$BODY$

DECLARE
    result float8;
    sameIons int[];
    sameSpectraRelativeValuesunknown float8[];
    sameSpectraAbsoluteValuesunknown float8[];

    sameSpectraRelativeValueslibrary float8[];
    sameSpectraAbsoluteValueslibrary float8[];

    unknownSpectra float8[];
    unknownSpectraRel float8[];

    librarySpectra float8[];
    librarySpectraRel float8[];

    unknownSpectraLength int :=0;

    f1 float8 := 0;
    f2 float8 := 0;

    lib float8 := 0;
    unk float8 := 0;

    sqrt1 float8 := 0;
    summ float8 := 0;
    summ4 float8 := 0;
    summ2 float8 := 0;
    summ3 float8 := 0;

    array_len int;
    sameIon int;

BEGIN

    unknownSpectra = convertSpectra(unknown);
    unknownSpectraRel = createRelavieSpectra(unknownSpectra);

    librarySpectra = convertSpectra(library);
    librarySpectraRel = createRelavieSpectra(librarySpectra);

    array_len = 1000;

    sameIon = 0;

    for i in 1 .. array_len
    LOOP
        -- this will contain all the identical ions --
        IF unknownSpectra[i] is not null and librarySpectra[i] is not null
        then
            sameIons[sameIon] = i;
            sameSpectraRelativeValuesunknown[sameIon] = unknownSpectraRel[i];
            sameSpectraAbsoluteValuesunknown[sameIon] = unknownSpectra[i];
            sameSpectraRelativeValueslibrary[sameIon] = librarySpectraRel[i];
            sameSpectraAbsoluteValueslibrary[sameIon] = librarySpectra[i];
            sameIon = sameIon + 1;
        END IF;
       
    END LOOP;


    -- calculate f1 --
    for i in 1 .. sameIon
    LOOP
        -- this will contain all the identical ions --
        IF sameIons[i] is not null 
        then
            sqrt1 = sqrt(sameSpectraRelativeValueslibrary[i] * sameSpectraRelativeValuesunknown[i]);
            summ4 = summ4 + (sqrt1 * sameIons[i]);

            IF i >
            THEN
                unk = sameSpectraRelativeValuesunknown[i]/sameSpectraRelativeValuesunknown[i-1];
                lib = sameSpectraRelativeValueslibrary[i]/sameSpectraRelativeValueslibrary[i-1];

                if unk <= lib
                then
                    summ = summ + (unk/lib);
                else
                    summ = summ + (lib/unk);
                end if;
            END IF;
        END IF;
    END LOOP;

    unknownSpectraLength = 0;

    for i in 1 .. array_len
    LOOP
        IF librarySpectra[i] is not null and librarySpectra[i] > 0
        then
            summ2 = summ2 + (librarySpectraRel[i] * i);
        END IF;
       
        IF unknownSpectra[i] is not null and unknownSpectra[i] > 0
        then
            unknownSpectraLength = unknownSpectraLength + 1;
            summ3 = summ3 + (unknownSpectraRel[i] * i);
        END IF;
    END LOOP;

    f1 = summ4 / sqrt(summ2 * summ3);
    f2 = 1.0/sameIon * summ;

    result = (1000.0/(unknownSpectraLength + sameIon))*((unknownSpectraLength * f1) + (sameIon * f2));

    RETURN result;

EXCEPTION
    WHEN division_by_zero THEN
        RAISE NOTICE 'caught division_by_zero';
        RETURN 0;
END;
$BODY$
LANGUAGE 'plpgsql'




It's definitely not the fastest or most beautiful implementation I could think off. But it's quick enough for now.

It definitely makes the debugging of my algorithm easier and allows me to test new algorithms quicker.

No comments:

Post a Comment