-
Notifications
You must be signed in to change notification settings - Fork 0
/
FastaFile.h
165 lines (146 loc) · 5.33 KB
/
FastaFile.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
/*
* FastaFile.h
*
* This is the header file for the FastaFile object. The FastaFile object
* is a utility object designed to read in a Fasta File and keep the
* information for the file in memory.
*
* ************* WARNING ***********************************
* * There is no error handling in place for this object! *
* * This means that if the file does not exist or is *
* * formatted incorrectly, you will get an error and *
* * will not be able to use this object. *
* * *
* * If this code gets moved to a production setting *
* * appropriate error handling should be implemented! *
* *********************************************************
*
* Typical use for the file would be to use the FastaFile(pathName, fileName)
* constructor to create the object. This will automatically open the
* Fasta File specified by the pathName and fileName, and read its contents
* storing them in the firstLine, and dnaSequence attributes.
*
* buildGraphFile(graphFileName, wieghtFileName) is a convenience method that
* will create a sequence graph file for the dnaSequence. See the method for
* more details on what is created.
*
* Created on: 1-10-13
* Modified: 1-26-13
* Author: tomkolar
*/
#ifndef FASTAFILE_H
#define FASTAFILE_H
#include <string>
#include <vector>
using namespace std;
class FastaFile {
public:
// Constuctors
// ==============================================
FastaFile();
FastaFile(string filePath, string fileName);
// Destructor
// =============================================
virtual ~FastaFile();
// Public Methods
// =============================================
// buildGraphFile(string& graphFileName, string& weightFileName)
// Purpose:
// Build a sequence graph file from this fasta file. The graph defined
// will essentially be a linked list with vertecies specified inbetween
// each nucleotide in the dnaSequence and edges being the nucleotide
// with a weight as specified in the weightFileName.
//
// The file will be created such that all verticies are listed first
// followed by all edges.
//
// Vertex format:
//
// V <sequential number as identifier>
//
// Edge format:
//
// E <nucleotide> <start vertex id> <end vertex id> <weight>
//
// Preconditions:
// Fasta File has been read and dnaSequence has been populated
// Postconditions:
// File named aGraphFileName will be populated with the sequence graph
// associated with the dnaSequence from the fasta file.
void buildGraphFile(string& graphFileName, string& weightFileName);
// string firstLineResultString()
// Purpose:
// Returns the string value of an XML element representing the first line of
// the Fasta file.
//
// format:
// <result type='first line' file='<<fileName>>' >
// <<firstLine>>
// </result>
// Preconditions:
// Fasta File has been read and firstLine has been populated
string firstLineResultString();
// string baseCountsResultString()
// Purpose:
// Returns the string value of an XML element representing the base counts
// of the dnaSequence.
//
// format:
// <result type='nucleotide histogram' file='<<fileName>>' >
// A=<<baseCountForA>>,C=<<baseCountForC>>,G=<<baseCountForG>>,
// A=<<baseCountForT>>,N=<<countForOtherChars>>
// </result>
// Preconditions:
// Fasta File has been read and dnaSequence has been populated
string baseCountsResultString();
// Public Accessors
// =============================================
const int getSequenceLength(); // length of dnaSequence
string& getFileName();
string& getDnaSequence();
private:
// Attributes
// =============================================
string filePath;
string fileName;
string firstLine;
string dnaSequence;
string reverseComplement;
// Private Methods
// =============================================
// populate()
// Purpose:
// Reads in the Fasta File specified by filePath and fileName and populates
// the object with its contents
// Preconditions:
// fileName and filePath have been set
// Postconditions:
// firstLine - populated with first line from file
// dnaSequence - populated with dnaSequence from file
// reverseComplement - populated with reverse complement of dnaSequence
void populate();
// createReverseComplment()
// Purpose:
// populates the reverseComplement attribute with the reverse comlpement
// of the dnaSequence
// Preconditions:
// dnaSequence has been set
// Postconditions:
// reverseComplement - populated with reverse complement of dnaSequence
void createReverseComplement();
// char complement(char aChar)
// Purpose: returns the dna complement of aChar
char complement(char aChar);
// countBases(int counts[])
// Purpose:
// populates the counts array with the counts for base occurrences
// in dnaSequence. The array is populated with the folllowing
// scheme:
// counts[0] = counts for A
// counts[1] = counts for C
// counts[2] = counts for G
// counts[3] = counts for T
// counts[4] = counts for other characters encountered
void countBases(int counts[]);
};
#endif /* FASTAFILE_H */