Wednesday, April 25, 2012

osx - set disk as startup disk via terminal

to set a disk as startup disk is quite simple just execute:


sudo bless -mount /Volumes/"name of your startup disk" -setBoot

BinBase - simulate the sifter algorithm directly on the database

currently I'm generating statistics for the BinBase system and so needed to simulate the annotation algorithm in sql.

It was rather simple....



-- 0.05 settings
select
count(*) as "count",
lower('0 < 0.05') as purity, lower('> 500') as signalNoise,
lower('> 800') as similarity

from spectra where purity > 0 and purity < 0.05 and signal_noise > 500 and match > 800

UNION

select
count(*) as "count",
lower('0 < 0.05') as purity, lower('> 50 && < 500') as signalNoise, lower('> 700') as similarity

from spectra where purity > 0 and purity < 0.05 and signal_noise > 50 and signal_noise <= 500 and match > 700


UNION

select
count(*) as "count",
lower('0 < 0.05') as purity, lower('> 5 && < 50') as signalNoise, lower('> 600') as similarity

from spectra where purity > 0 and purity < 0.05 and signal_noise > 5 and signal_noise <= 50 and match > 600

-- 0.3 settings

UNION

select
count(*) as "count",
lower('0 < 0.3') as purity, lower('> 500') as signalNoise,
lower('> 700') as similarity

from spectra where purity > 0 and purity < 0.3 and signal_noise > 500 and match > 700

UNION

select
count(*) as "count",
lower('0 < 0.3') as purity, lower('> 50 && < 500') as signalNoise, lower('> 600') as similarity

from spectra where purity > 0 and purity < 0.3 and signal_noise > 50 and signal_noise <= 500 and match > 600

UNION

select
count(*) as "count",
lower('0 < 0.3') as purity, lower('> 5 && < 50') as signalNoise, lower('> 500') as similarity

from spectra where purity > 0 and purity < 0.03 and signal_noise > 5 and signal_noise <= 50 and match > 500

-- all ither settings

UNION

select
count(*) as "count",
lower('> 0.3') as purity,
lower('> 500') as signalNoise,
lower('> 600') as similarity

from spectra where purity > 0.3 and signal_noise > 500 and match > 600

UNION

select
count(*) as "count",
lower('> 0.3') as purity,
lower('> 50 && < 500') as signalNoise, lower('> 500') as similarity

from spectra where purity > 0.3 and signal_noise > 50 and signal_noise <= 500 and match > 500

UNION

select
count(*) as "count",
lower('> 0.3') as purity,
lower('> 5 && < 50') as signalNoise, lower('> 450') as similarity

from spectra where purity > 0.03 and signal_noise > 5 and signal_noise <= 50 and match > 450

-- total --

union

select
count(*) as "count",
lower('no filter') as purity,
lower('no filter') as signalNoise,
lower('no filter') as similarity

from spectra



this are 3.25 settings btw

Tuesday, April 17, 2012

useful linux tools

getting ip address:

just needed a quick way to get the ip address of a certain network interface in a script. To dynamically update the bind address of a jboss application server.


ifconfig | grep eth2 -A 1 | grep inet | awk '{print $2}' | awk -F ':' '{print $2}'



gzipping all txt files in a directory with a lot of files, when the error:


-bash: /usr/local/fink/bin/gzip: Argument list too long


occurs.


find /Volumes/Thunderbolt\ SSD/data/ -name '*.txt' -exec gzip -vf {} \;

Tuesday, April 3, 2012

calculating mass spec similarity direct in postgres using stored procedures

since I'm currently debugging a rather annoying bug and for this reason need to be able to execute queries like:




CREATE OR REPLACE FUNCTION binbase.calculatesimilarity (in unknown text, in library text) RETURNS float8 AS
$BODY$

DECLARE
    result float8;
    sameIons int[];
    sameSpectraRelativeValuesunknown float8[];
    sameSpectraAbsoluteValuesunknown float8[];

    sameSpectraRelativeValueslibrary float8[];
    sameSpectraAbsoluteValueslibrary float8[];

    unknownSpectra float8[];
    unknownSpectraRel float8[];

    librarySpectra float8[];
    librarySpectraRel float8[];

    unknownSpectraLength int :=0;

    f1 float8 := 0;
    f2 float8 := 0;

    lib float8 := 0;
    unk float8 := 0;

    sqrt1 float8 := 0;
    summ float8 := 0;
    summ4 float8 := 0;
    summ2 float8 := 0;
    summ3 float8 := 0;

    array_len int;
    sameIon int;

BEGIN

    unknownSpectra = convertSpectra(unknown);
    unknownSpectraRel = createRelavieSpectra(unknownSpectra);

    librarySpectra = convertSpectra(library);
    librarySpectraRel = createRelavieSpectra(librarySpectra);

    array_len = 1000;

    sameIon = 0;

    for i in 1 .. array_len
    LOOP
        -- this will contain all the identical ions --
        IF unknownSpectra[i] is not null and librarySpectra[i] is not null
        then
            sameIons[sameIon] = i;
            sameSpectraRelativeValuesunknown[sameIon] = unknownSpectraRel[i];
            sameSpectraAbsoluteValuesunknown[sameIon] = unknownSpectra[i];
            sameSpectraRelativeValueslibrary[sameIon] = librarySpectraRel[i];
            sameSpectraAbsoluteValueslibrary[sameIon] = librarySpectra[i];
            sameIon = sameIon + 1;
        END IF;
       
    END LOOP;


    -- calculate f1 --
    for i in 1 .. sameIon
    LOOP
        -- this will contain all the identical ions --
        IF sameIons[i] is not null 
        then
            sqrt1 = sqrt(sameSpectraRelativeValueslibrary[i] * sameSpectraRelativeValuesunknown[i]);
            summ4 = summ4 + (sqrt1 * sameIons[i]);

            IF i >
            THEN
                unk = sameSpectraRelativeValuesunknown[i]/sameSpectraRelativeValuesunknown[i-1];
                lib = sameSpectraRelativeValueslibrary[i]/sameSpectraRelativeValueslibrary[i-1];

                if unk <= lib
                then
                    summ = summ + (unk/lib);
                else
                    summ = summ + (lib/unk);
                end if;
            END IF;
        END IF;
    END LOOP;

    unknownSpectraLength = 0;

    for i in 1 .. array_len
    LOOP
        IF librarySpectra[i] is not null and librarySpectra[i] > 0
        then
            summ2 = summ2 + (librarySpectraRel[i] * i);
        END IF;
       
        IF unknownSpectra[i] is not null and unknownSpectra[i] > 0
        then
            unknownSpectraLength = unknownSpectraLength + 1;
            summ3 = summ3 + (unknownSpectraRel[i] * i);
        END IF;
    END LOOP;

    f1 = summ4 / sqrt(summ2 * summ3);
    f2 = 1.0/sameIon * summ;

    result = (1000.0/(unknownSpectraLength + sameIon))*((unknownSpectraLength * f1) + (sameIon * f2));

    RETURN result;

EXCEPTION
    WHEN division_by_zero THEN
        RAISE NOTICE 'caught division_by_zero';
        RETURN 0;
END;
$BODY$
LANGUAGE 'plpgsql'




It's definitely not the fastest or most beautiful implementation I could think off. But it's quick enough for now.

It definitely makes the debugging of my algorithm easier and allows me to test new algorithms quicker.

converting an absolute massspec to a relative massspec using postgres stored functions

the next step to a working similarity calculation is to convert our absolute massspec to a relative spectra. Which is rather simple using stored procedures



-- calculates a relative spectra
create or replace function createRelavieSpectra(spectra float8[]) returns float8[] AS $$

DECLARE
result float8[1000];
array_len int;
maxValue float8 := 0;

BEGIN

array_len = array_upper(spectra,1);

for i in 1 .. array_len
LOOP
IF spectra[i] is not null and spectra[i] > maxValue
THEN
maxValue := spectra[i];
END IF;
END LOOP;

for i in 1 .. array_len
LOOP
if spectra[i] is not null
then
result[i] := spectra[i]/maxValue * 100;
end if;
END LOOP;

RETURN result;

END;
$$ LANGUAGE plpgsql;

converting massspecs to an array using postgres stored functions

currently I'm in the progress of writing a couple of stored procedures to query our BinBase database directly by spectral similarity and so the first step was to transform a standard leco spectra string to a standard double array, which represents a spectra.



-- converts a spectra string to a float array for further use --
create or replace function convertSpectra(spectra text) returns float8[] AS $$

DECLARE
result float8[1000];
spectraSplit text[];
ionSplit text[];
array_len int;

ion int;
value float8;

BEGIN

spectraSplit = regexp_split_to_array(spectra,' ');
array_len = array_upper(spectraSplit,1);

for i in 1 .. array_len
LOOP
ionSplit = regexp_split_to_array(spectraSplit[i],':');

ion := ionSplit[1];
value := ionSplit[2];


result[ion] = value;

END LOOP;

RETURN result;

END;
$$ LANGUAGE plpgsql;