<!-- Place this command file and all input files in the same directory along with executable lam_conv and execute like this to automatically convert ./lam_conv -b -c lamarc-converter-commands.xml Or like this to explore in the GUI ./lam_conv -c lamarc-converter-commands.xml --> <lamarc-converter-cmd> <!-- You can specify the lamarc input file you will produce here. If not present, it defaults to infile.xml --> <outfile>lamarc-input.xml</outfile> <!-- The comment below will be at the top of the outfile produced. This is a useful way to distinguish different lamarc infiles --> <lamarc-header-comment>Example output for 3 chromosomes, some with multiple segments</lamarc-header-comment> <!-- ********************************************************* --> <!-- The <regions> section is where you specify both the type of data you have and its relative location (and therefore likeliness to be co-inherited). --> <regions> <!-- Each region contains a specification of data types and relative locations of data which are "close enough" to each other to be modeled as co-inherited. As a rule of thumb, data samples should be in the same region if: (a) they are within 1/1000 of a centimorgan, or (b) they are within 1 centimorgan and you plan to estimate recombination. --> <region> <!-- all region, segment, and population names must be unique --> <name>chrom1</name> <!-- The effective population size defaults to 1. You can probably ignore it unless you're working with sex chromosomes or mixing mtDna with chromosomal --> <effective-popsize>1</effective-popsize> <!-- Within a region, different segments will occur where (a) data types are different, (b) mutation rates are different, or (c) the samples are separated by unsampled stretches of the genome --> <segments> <!-- The region for chrom 1 is not terribly interesting. It contains only a single stretch of DNA data, the easiest and simplest to model in lamarc. Allowed datatypes are "snp" "dna" "microsat" and "kallele" --> <segment datatype="dna"> <name>chrom1-segment</name> <!-- For DNA data, the number of markers is the number of sites in the data. --> <markers>9</markers> </segment> </segments> </region> <region> <!-- Region "chrom2" models two sets of snp data on the same chromosome, separated by other unknown data. --> <name>chrom2</name> <effective-popsize>1</effective-popsize> <segments> <!-- A SNP segment requires that we provide more information in order to model it correctly. --> <segment datatype="snp"> <name>chrom2-segment1</name> <!-- For SNP data, the number of markers is the number of SNP sites in the data. --> <markers>5</markers> <!-- Using a region-wide scale, the position of this segment within region chrom2. Lamarc needs this information to model recombination events occuring between segments. --> <map-position>1000</map-position> <!-- where you started scanning for SNPS, assuming "1" in segment co-ordinates is identical to <map-position> in region co-ordinates --> <first-position-scanned>-5</first-position-scanned> <!-- total data length (in nucleotides) scanned, staring at <first-position-scanned> --> <length>500</length> <!-- relative locations of snp markers using segment coordinates. --> <locations> 2 88 125 173 443 </locations> </segment> <segment datatype="snp"> <name>chrom2-segment2</name> <markers>7</markers> <map-position>5000</map-position> <first-position-scanned>-5</first-position-scanned> <length>250</length> <locations> 13 19 35 77 102 112 204</locations> </segment> </segments> </region> <region> <!-- Here we have a microsat next to a SNP. The SNP was found in a 100-base region at the 23rd site after the microsat --> <name>chrom3</name> <segments> <segment datatype="microsat"> <name>chrom3-micro</name> <markers>1</markers> <map-position>500</map-position> <first-position-scanned>1</first-position-scanned> </segment> <segment datatype="snp"> <name>chrom3-snp</name> <markers>1</markers> <map-position>501</map-position> <length>100</length> <locations> 23 </locations> <first-position-scanned>1</first-position-scanned> </segment> </segments> </region> </regions> <!-- ********************************************************* --> <!-- If you want to make sure your populations have nice names, here is the place to do it. --> <populations> <population>North</population> <population>South</population> </populations> <!-- ********************************************************* --> <!-- You may need to include the <individuals> tag if you: (a) have samples which include unresolved haplotypes, or (b) you are combining both allelic and nucleotide segments in a single region, or (c) you are doing trait mapping --> <individuals> <individual> <!-- if you have specified diploid (or higher ploidy) data in a migrate microsat or kallele file, your individual names are probably the sequence name labels from that file --> <name>n_ind0</name> <!-- if you have dna or snp data from a phylip or migrate file, your sample names are probably the sequence name labels from that file --> <sample><name>n_ind0_a</name></sample> <sample><name>n_ind0_b</name></sample> <!-- use the <phase> tag to indicate when you don't know which haploid (or greater ploidy) sample has which marker. The scale here is the same as the 'locations' tag, i.e. relative to the numbering system in the segment in question. The first valid position is the first-position-scanned value, and can be as higher than that as the length of the segment. The specification below indicates that for this individual, we're not sure which of the two haplotypes we should assign the first and second data sample values to. --> <phase> <segment-name>chrom2-segment2</segment-name> <unresolved-markers> 13 19 </unresolved-markers> </phase> </individual> <individual> <name>n_ind1</name> <sample><name>n_ind1_a</name></sample> <sample><name>n_ind1_b</name></sample> </individual> <individual> <name>n_ind2</name> <sample><name>n_ind2_a</name></sample> <sample><name>n_ind2_b</name></sample> </individual> <individual> <name>s_ind0</name> <sample><name>s_ind0_a</name></sample> <sample><name>s_ind0_b</name></sample> </individual> <individual> <name>s_ind1</name> <sample><name>s_ind1_a</name></sample> <sample><name>s_ind1_b</name></sample> </individual> </individuals> <!-- ********************************************************* --> <!-- Use the <infiles> tag to tell the converter how your data corresponds to the <region> and <segment> elements --> <infiles> <!-- All attributes given for the <infile> tag are required. The legal values are given below format : "migrate", "phylip" datatype : "dna", "snp", "kallele", "microsat" sequence-alignment : "sequential" or "interleaved" --> <infile format="migrate" datatype="dna" sequence-alignment="sequential"> <!-- File name is relative to the directory the converter was invoked from --> <name>chrom1.mig</name> <!-- The <population-matching> tag tells the converter how to assign data samples to populations. legal types are: "single" : assign all data to the single population whose name is enclosed within this tag "byList" : a list of population names appears, enclosed in <population-name> tags. Assign populations in the file to the named populations in order "byName" : use the name in the comment of the infile --> <population-matching type="byName"/> <!-- The <segments-matching> tag tells the converter how to assign data samples to segments. legal types are: "single" : assign all data to the single segment whose name is enclosed within this tag "byList" : a list of segment names appears, enclosed in <segment-name> tags. Assign segments in the file to the named segments in order --> <segments-matching type="byList"> <!-- assigned to segments from input file in the order given here --> <segment-name>chrom1-segment</segment-name> </segments-matching> </infile> <infile format="migrate" datatype="snp" sequence-alignment="sequential"> <name>chrom2.mig</name> <population-matching type="byName"/> <segments-matching type="byList"> <segment-name>chrom2-segment1</segment-name> <segment-name>chrom2-segment2</segment-name> </segments-matching> </infile> <!-- note that while both segments in chrom2 could be specified in a single file, the segments of chrom3 are in different files since they have different data types. --> <infile format="migrate" datatype="snp" sequence-alignment="sequential"> <name>chrom3snp.mig</name> <population-matching type="byName"/> <segments-matching type="byList"> <segment-name>chrom3-snp</segment-name> </segments-matching> </infile> <infile format="migrate" datatype="microsat" sequence-alignment="sequential"> <name>chrom3microsat.mig</name> <population-matching type="byName"/> <segments-matching type="byList"> <segment-name>chrom3-micro</segment-name> </segments-matching> </infile> </infiles> </lamarc-converter-cmd>