<!--
    Place this command file and all input files in the same directory
    along with executable lam_conv and execute like this to
    automatically convert

        ./lam_conv -b -c lamarc-converter-commands.xml

    Or like this to explore in the GUI

        ./lam_conv -c lamarc-converter-commands.xml
-->

<lamarc-converter-cmd>

    <!--
         You can specify the lamarc input file you will produce
         here. If not present, it defaults to infile.xml
    -->
    <outfile>lamarc-input.xml</outfile>

    <!--
        The comment below will be at the top of the outfile produced.
        This is a useful way to distinguish different lamarc infiles
    -->
    <lamarc-header-comment>Example output for 3 chromosomes, some with multiple segments</lamarc-header-comment>

    <!-- ********************************************************* -->
    <!--
        The <regions> section is where you specify both the type of
        data you have and its relative location (and therefore
        likeliness to be co-inherited).
    -->
    <regions>

        <!--
            Each region contains a specification of data types and
            relative locations of data which are "close enough" to
            each other to be modeled as co-inherited.

            As a rule of thumb, data samples should be in the same
            region if:
                (a) they are within 1/1000 of a centimorgan, or
                (b) they are within 1 centimorgan and you plan
                    to estimate recombination.
        -->
        <region>

            <!--
                all region, segment, and population names must be unique
            -->
            <name>chrom1</name>

            <!--
                 The effective population size defaults to 1. You can
                 probably ignore it unless you're working with
                 sex chromosomes or mixing mtDna with chromosomal
            -->
            <effective-popsize>1</effective-popsize>

            <!--
                Within a region, different segments will occur where
                    (a) data types are different,
                    (b) mutation rates are different, or
                    (c) the samples are separated by unsampled stretches
                        of the genome
            -->
            <segments>
                <!--
                    The region for chrom 1 is not terribly interesting.
                    It contains only a single stretch of DNA data,
                    the easiest and simplest to model in lamarc.

                    Allowed datatypes are "snp" "dna" "microsat" and "kallele"
                -->
                <segment datatype="dna">
                    <name>chrom1-segment</name>

                    <!--
                        For DNA data, the number of markers is the number
                        of sites in the data.
                    -->
                    <markers>9</markers>
                </segment>
            </segments>
        </region>

        <region>
            <!--
                Region "chrom2" models two sets of snp data on the
                same chromosome, separated by other unknown data.
            -->

            <name>chrom2</name>
            <effective-popsize>1</effective-popsize>

            <segments>

                <!--
                    A SNP segment requires that we provide more information
                    in order to model it correctly.
                -->
                <segment datatype="snp">

                    <name>chrom2-segment1</name>

                    <!--
                        For SNP data, the number of markers is the number
                        of SNP sites in the data.
                    -->
                    <markers>5</markers>

                    <!--
                        Using a region-wide scale, the position of this 
                        segment within region chrom2. Lamarc needs this
                        information to model recombination events occuring
                        between segments.
                    -->
                    <map-position>1000</map-position>

                    <!--
                        where you started scanning for SNPS, assuming
                        "1" in segment co-ordinates is identical to 
                        <map-position> in region co-ordinates
                    -->
                    <first-position-scanned>-5</first-position-scanned>

                    <!--
                        total data length (in nucleotides) scanned, staring
                        at <first-position-scanned>
                    -->
                    <length>500</length>

                    <!-- 
                        relative locations of snp markers using
                        segment coordinates.
                    -->
                    <locations> 2 88 125 173 443 </locations>

                </segment>

                <segment datatype="snp">
                    <name>chrom2-segment2</name>
                    <markers>7</markers>
                    <map-position>5000</map-position>
                    <first-position-scanned>-5</first-position-scanned>
                    <length>250</length>
                    <locations> 13 19 35 77 102 112 204</locations>
                </segment>
            </segments>
        </region>

        <region>
            <!--
                Here we have a microsat next to a SNP.  The SNP was found
                in a 100-base region at the 23rd site after the microsat
            -->

            <name>chrom3</name>
            <segments>
                <segment datatype="microsat">
                    <name>chrom3-micro</name>
                    <markers>1</markers>
                    <map-position>500</map-position>
                    <first-position-scanned>1</first-position-scanned>
                </segment>
                <segment datatype="snp">
                    <name>chrom3-snp</name>
                    <markers>1</markers>
                    <map-position>501</map-position>
                    <length>100</length>
                    <locations> 23 </locations>
                    <first-position-scanned>1</first-position-scanned>
                </segment>
            </segments>
        </region>
    </regions>

    <!-- ********************************************************* -->
    <!--
        If you want to make sure your populations have nice names,
        here is the place to do it.
    -->
    <populations>
        <population>North</population>
        <population>South</population>
    </populations>

    <!-- ********************************************************* -->
    <!--
        You may need to include the <individuals> tag if you:
            (a) have samples which include unresolved haplotypes, or
            (b) you are combining both allelic and nucleotide 
                segments in a single region, or
            (c) you are doing trait mapping
    -->

    <individuals>
        <individual>
            <!-- 
                if you have specified diploid (or higher ploidy) data
                in a migrate microsat or kallele file, your individual
                names are probably the sequence name labels from that file
            -->
            <name>n_ind0</name>

            <!-- 
                if you have dna or snp data from a phylip or migrate
                file, your sample names are probably the sequence
                name labels from that file
            -->
            <sample><name>n_ind0_a</name></sample>

            <sample><name>n_ind0_b</name></sample>

            <!--
                use the <phase> tag to indicate when you don't know
                which haploid (or greater ploidy) sample has which
                marker. The scale here is the same as the 'locations'
                tag, i.e. relative to the numbering system in the
                segment in question.  The first valid position is the 
                first-position-scanned value, and can be as higher than 
                that as the length of the segment.  
                
                The specification below indicates that for
                this individual, we're not sure which of the two
                haplotypes we should assign the first and second
                data sample values to.
            -->
            <phase>
                <segment-name>chrom2-segment2</segment-name>
                <unresolved-markers> 13 19 </unresolved-markers>
            </phase>

        </individual>
        <individual>
            <name>n_ind1</name>
            <sample><name>n_ind1_a</name></sample>
            <sample><name>n_ind1_b</name></sample>
        </individual>
        <individual>
            <name>n_ind2</name>
            <sample><name>n_ind2_a</name></sample>
            <sample><name>n_ind2_b</name></sample>
        </individual>
        <individual>
            <name>s_ind0</name>
            <sample><name>s_ind0_a</name></sample>
            <sample><name>s_ind0_b</name></sample>
        </individual>
        <individual>
            <name>s_ind1</name>
            <sample><name>s_ind1_a</name></sample>
            <sample><name>s_ind1_b</name></sample>
        </individual>
    </individuals>

    <!-- ********************************************************* -->
    <!--
        Use the <infiles> tag to tell the converter how your data
        corresponds to the <region> and <segment> elements
    -->
    <infiles>

        <!--
            All attributes given for the <infile> tag are required.
            The legal values are given below

            format              : "migrate", "phylip"
            datatype            : "dna", "snp", "kallele", "microsat"
            sequence-alignment  : "sequential" or "interleaved"
        -->
        <infile format="migrate" datatype="dna" sequence-alignment="sequential">

            <!--
                File name is relative to the directory the converter
                was invoked from
            -->
            <name>chrom1.mig</name>

            <!--
                The <population-matching> tag tells the converter how
                to assign data samples to populations.

                legal types are:
                    "single"    : assign all data to the single population
                                  whose name is enclosed within this tag
                    "byList"    : a list of population names appears, enclosed
                                  in <population-name> tags. Assign populations
                                  in the file to the named populations in order
                    "byName"    : use the name in the comment of the infile
            -->
            <population-matching type="byName"/>

            <!--
                The <segments-matching> tag tells the converter how
                to assign data samples to segments.

                legal types are:
                    "single"    : assign all data to the single segment
                                  whose name is enclosed within this tag
                    "byList"    : a list of segment names appears, enclosed
                                  in <segment-name> tags. Assign segments
                                  in the file to the named segments in order
            -->
            <segments-matching type="byList">
                <!-- assigned to segments from input file in the order given here -->
                <segment-name>chrom1-segment</segment-name>
            </segments-matching>

        </infile>

        <infile format="migrate" datatype="snp" sequence-alignment="sequential">
            <name>chrom2.mig</name>
            <population-matching type="byName"/>
            <segments-matching type="byList">
                <segment-name>chrom2-segment1</segment-name>
                <segment-name>chrom2-segment2</segment-name>
            </segments-matching>
        </infile>

        <!--
            note that while both segments in chrom2 could be specified
            in a single file, the segments of chrom3 are in different
            files since they have different data types.
        -->
        <infile format="migrate" datatype="snp" sequence-alignment="sequential">
            <name>chrom3snp.mig</name>
            <population-matching type="byName"/>
            <segments-matching type="byList">
                <segment-name>chrom3-snp</segment-name>
            </segments-matching>
        </infile>

        <infile format="migrate" datatype="microsat" sequence-alignment="sequential">
            <name>chrom3microsat.mig</name>
            <population-matching type="byName"/>
            <segments-matching type="byList">
                <segment-name>chrom3-micro</segment-name>
            </segments-matching>
        </infile>
    </infiles>

</lamarc-converter-cmd>