-
Notifications
You must be signed in to change notification settings - Fork 0
/
main_scienceA.aux
198 lines (198 loc) · 20.1 KB
/
main_scienceA.aux
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
\relax
\providecommand\hyper@newdestlabel[2]{}
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
\global\let\oldcontentsline\contentsline
\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
\global\let\oldnewlabel\newlabel
\gdef\newlabel#1#2{\newlabelxx{#1}#2}
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\AtEndDocument{\ifx\hyper@anchor\@undefined
\let\contentsline\oldcontentsline
\let\newlabel\oldnewlabel
\fi}
\fi}
\global\let\hyper@last\relax
\gdef\HyperFirstAtBeginDocument#1{#1}
\providecommand\HyField@AuxAddToFields[1]{}
\providecommand\HyField@AuxAddToCoFields[2]{}
\citation{cdc}
\citation{pmid24529515}
\citation{howsmon2017classification,li2018high,hicks2018validation,smith2020metabolomics}
\citation{volkmar2014practice,hyman2020identification}
\citation{kalb2012determinants}
\citation{bisgaier2011access}
\citation{fenikile2015barriers}
\citation{fenikile2015barriers}
\citation{gordon2016whittling}
\citation{pmid31562252}
\citation{robins2014validation,hyman2020identification}
\citation{gordon2016whittling}
\citation{cdc}
\citation{pmid30733689,pmid22511918}
\providecommand \oddpage@label [2]{}
\citation{hyman2020identification}
\citation{pmid31562252}
\citation{pmid31562252}
\citation{hyde2019applications,abbas2020multi,duda2016clinical,duda2014testing,fusaro2014potential,wall2012use,wall2012use2}
\citation{smith2020metabolomics,howsmon2017classification}
\citation{hicks2018validation}
\citation{doshi2014comorbidity,bishop2018using}
\citation{lingren2016electronic}
\citation{hansen2017truven}
\@writefile{lot}{\contentsline {table}{\numberline {I}{\ignorespaces \color {CadetBlue4!50!black} \sffamily \fontsize {9}{10}\selectfont Patient Counts In De-identified Data \& The Fraction of Datasets Excluded By Our Exclusion Criteria$^\star $ }}{2}{table.1}}
\newlabel{tab2}{{I}{2}{\color {CadetBlue4!50!black} \sffamily \fontsize {9}{10}\selectfont Patient Counts In De-identified Data \& The Fraction of Datasets Excluded By Our Exclusion Criteria$^\star $}{table.1}{}}
\citation{pmid29701730}
\@writefile{lot}{\contentsline {table}{\numberline {II}{\ignorespaces \color {CadetBlue4!50!black} \sffamily \fontsize {9}{10}\selectfont Engineered Features (Total Count: 165) }}{3}{table.2}}
\newlabel{EXT-tab1}{{II}{3}{\color {CadetBlue4!50!black} \sffamily \fontsize {9}{10}\selectfont Engineered Features (Total Count: 165)}{table.2}{}}
\@writefile{lot}{\contentsline {table}{\numberline {III}{\ignorespaces \color {CadetBlue4!50!black} \sffamily \fontsize {9}{10}\selectfont Standalone PPV Achieved at 100, 112 and 150 Weeks For Each Dataset and Gender {\bf (M-CHAT/F: sensitivity=$38.8\%$, specificity=$95\%$, PPV=$14.6\%$ between 16 and 26 months ($\approx $112 weeks))} }}{3}{table.3}}
\newlabel{EXT-tabssp}{{III}{3}{\color {CadetBlue4!50!black} \sffamily \fontsize {9}{10}\selectfont Standalone PPV Achieved at 100, 112 and 150 Weeks For Each Dataset and Gender {\bf (M-CHAT/F: sensitivity=$38.8\%$, specificity=$95\%$, PPV=$14.6\%$ between 16 and 26 months ($\approx $112 weeks))}}{table.3}{}}
\citation{CR08}
\citation{CL12g,Chattopadhyay20140826}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces \color {CadetBlue4!50!black} \sffamily \fontsize {9}{10}\selectfont \textbf {Standalone Predictive Performance of ACoR\xspace .} Panel A shows the ROC curves for males and females (Truven data shown, UCM is similar, see Fig.\nobreakspace {}\ref {fig2}a). Panel B shows the feature importance inferred by our prediction pipeline. The detailed description of the features is given in Table\nobreakspace {}\ref {EXT-tab1}. The most import feature is related to immunologic disorders, and we note that in addition to features related to individual disease categories, we also have the mean control likelihood (rank 3), which may be interpreted as the average likelihood of the diagnostic patterns corresponding to the control category as opposed to the positive\xspace category. Panels C and D show the spatial variation in the achieved predictive performance at 150 weeks, measured by AUC, for males and females, respectively. Gray areas lack data on either positive or negative cases. These county-specific AUC plots show that the performance of the algorithm has relatively weak geospatial dependence, which is important in the light of the current uneven distribution of diagnostic resources. Importantly, not all counties have nonzero number of ASD patients; high performance in those counties reflects a small number of false positives with zero false negatives. }}{4}{figure.1}}
\newlabel{fig1}{{1}{4}{\color {CadetBlue4!50!black} \sffamily \fontsize {9}{10}\selectfont \textbf {Standalone Predictive Performance of \acor .} Panel A shows the ROC curves for males and females (Truven data shown, UCM is similar, see Fig.~\ref {fig2}a). Panel B shows the feature importance inferred by our prediction pipeline. The detailed description of the features is given in Table~\ref {EXT-tab1}. The most import feature is related to immunologic disorders, and we note that in addition to features related to individual disease categories, we also have the mean control likelihood (rank 3), which may be interpreted as the average likelihood of the diagnostic patterns corresponding to the control category as opposed to the \treatment category. Panels C and D show the spatial variation in the achieved predictive performance at 150 weeks, measured by AUC, for males and females, respectively. Gray areas lack data on either positive or negative cases. These county-specific AUC plots show that the performance of the algorithm has relatively weak geospatial dependence, which is important in the light of the current uneven distribution of diagnostic resources. Importantly, not all counties have nonzero number of ASD patients; high performance in those counties reflects a small number of false positives with zero false negatives}{figure.1}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces \color {CadetBlue4!50!black} \sffamily \fontsize {9}{10}\selectfont \textbf {More Details on Standalone Predictive Performance of ACoR\xspace and Variation of Inferred Risk.} Panel A illustrates AUC achieved as a function of patient age, for the Truven and UCM datasets. The shaded area outlines the 2 - 2.5 years of age, and shows that we achieve $>80\%$ AUC for either sex from shortly after 2 years. Panel B illustrates how inferred models differ between the control vs. the positive\xspace cohorts. Panel C illustrates how the average risk changes with time for the control and the positive cohorts. Note that the risk progressions are somewhat monotonic, and the computed confidence bounds suggest that the odds of a child with low risk upto 100 or 150 weeks of age abruptly shifting to a high risk trajectory is low. Panel D shows the distribution of the prediction horizon: the time to a clinical diagnosis after inferred relative risk crosses $90\%$. Panel E shows that for each new disease code for a low-risk child, ASD risk increases by approximately $2\%$ for either sex. Panel F illustrates the risk progression of a specific, ultimately autistic male child in the Truven database. Abbreviations in the legend: ill defn. (Symptoms, Signs, And Ill-Defined Conditions), musc. skltl. (Diseases Of The Musculoskeletal System And Connective Tissue), cond. orig. in perintl. (Certain Conditions Originating In The Perinatal Period), immun. (Endocrine, Nutritional And Metabolic Diseases, And Immunity Disorders), nerv. \& sensory (Diseases Of The Nervous System And Sense Organs), respir. (Respiratory Disorders), and digest. (Digestive Disorders). On average, models get less complex, implying the exposures get more statistically independent. }}{5}{figure.2}}
\newlabel{fig2}{{2}{5}{\color {CadetBlue4!50!black} \sffamily \fontsize {9}{10}\selectfont \textbf {More Details on Standalone Predictive Performance of \acor and Variation of Inferred Risk.} Panel A illustrates AUC achieved as a function of patient age, for the Truven and UCM datasets. The shaded area outlines the 2 - 2.5 years of age, and shows that we achieve $>80\%$ AUC for either sex from shortly after 2 years. Panel B illustrates how inferred models differ between the control vs. the \treatment cohorts. Panel C illustrates how the average risk changes with time for the control and the positive cohorts. Note that the risk progressions are somewhat monotonic, and the computed confidence bounds suggest that the odds of a child with low risk upto 100 or 150 weeks of age abruptly shifting to a high risk trajectory is low. Panel D shows the distribution of the prediction horizon: the time to a clinical diagnosis after inferred relative risk crosses $90\%$. Panel E shows that for each new disease code for a low-risk child, ASD risk increases by approximately $2\%$ for either sex. Panel F illustrates the risk progression of a specific, ultimately autistic male child in the Truven database. Abbreviations in the legend: ill defn. (Symptoms, Signs, And Ill-Defined Conditions), musc. skltl. (Diseases Of The Musculoskeletal System And Connective Tissue), cond. orig. in perintl. (Certain Conditions Originating In The Perinatal Period), immun. (Endocrine, Nutritional And Metabolic Diseases, And Immunity Disorders), nerv. \& sensory (Diseases Of The Nervous System And Sense Organs), respir. (Respiratory Disorders), and digest. (Digestive Disorders). On average, models get less complex, implying the exposures get more statistically independent}{figure.2}{}}
\citation{huang2019data}
\citation{lingren2016electronic}
\citation{Cover,kullback1951}
\citation{doob1953stochastic}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces \color {CadetBlue4!50!black} \sffamily \fontsize {9}{10}\selectfont \textbf {Metrics relevant to clinical practice: PPV vs Sensitivity trade-offs.} Panel A shows the precision/recall curves, $i.e.$, the trade-off between PPV and sensitivity for \textbf {standalone operation} with ACoR\xspace . Panel B shows how we can \textbf {boost ACoR\xspace performance} using population stratification from the distribution of M-CHAT/F scores in the population, as reported by the CHOP study\nobreakspace {}\cite {pmid31562252}. This is possible because ACoR\xspace and M-CHAT/F use independent information (co-morbidities vs questionnaire responses). Note that the population prevalence impacts this optimization, and hence we have a distinct curve for each prevalence value ($1.7\%$ is the CDC estimate, while $2.23\%$ is reported by the CHOP study). The two extreme operating zones marked as High Precision (HP) and High Recall (HR): if we choose to operate in HR, then we do not reduce the number of positive screens by much, but maximize sensitivity, while by operating in HP, we increase sensitivity by 20-40\% (depending on the prevalence) but double the PPV achieved in current practice. In contrast, when choosing to maximize sensitivity by operating in the HR zone, we only cut down positive flags to about $70\%$ of what we get with M-CHAT/F, but boost sensitivity by $50-90\%$ (Reaching sensitivities over $70\%$). Note in all these zones, we maintain specificity above $95\%$, which is the current state of art, implying that by doubling the PPV, we can halve the number of positive screens currently reported, thus potentially sharply reducing the queues and wait-times. }}{6}{figure.3}}
\newlabel{figprc}{{3}{6}{\color {CadetBlue4!50!black} \sffamily \fontsize {9}{10}\selectfont \textbf {Metrics relevant to clinical practice: PPV vs Sensitivity trade-offs.} Panel A shows the precision/recall curves, $i.e.$, the trade-off between PPV and sensitivity for \textbf {standalone operation} with \acor . Panel B shows how we can \textbf {boost \acor performance} using population stratification from the distribution of M-CHAT/F scores in the population, as reported by the CHOP study~\cite {pmid31562252}. This is possible because \acor and M-CHAT/F use independent information (co-morbidities vs questionnaire responses). Note that the population prevalence impacts this optimization, and hence we have a distinct curve for each prevalence value ($1.7\%$ is the CDC estimate, while $2.23\%$ is reported by the CHOP study). The two extreme operating zones marked as High Precision (HP) and High Recall (HR): if we choose to operate in HR, then we do not reduce the number of positive screens by much, but maximize sensitivity, while by operating in HP, we increase sensitivity by 20-40\% (depending on the prevalence) but double the PPV achieved in current practice. In contrast, when choosing to maximize sensitivity by operating in the HR zone, we only cut down positive flags to about $70\%$ of what we get with M-CHAT/F, but boost sensitivity by $50-90\%$ (Reaching sensitivities over $70\%$). Note in all these zones, we maintain specificity above $95\%$, which is the current state of art, implying that by doubling the PPV, we can halve the number of positive screens currently reported, thus potentially sharply reducing the queues and wait-times}{figure.3}{}}
\citation{Cover}
\@writefile{lot}{\contentsline {table}{\numberline {IV}{\ignorespaces \color {CadetBlue4!50!black} \sffamily \fontsize {9}{10}\selectfont Personalized Operation Conditioned on M-CHAT/F Scores at 26 months }}{7}{table.4}}
\newlabel{EXT-tabboost}{{IV}{7}{\color {CadetBlue4!50!black} \sffamily \fontsize {9}{10}\selectfont Personalized Operation Conditioned on M-CHAT/F Scores at 26 months}{table.4}{}}
\newlabel{eqR}{{3}{7}{Materials \& Methods}{equation.0.3}{}}
\newlabel{eq6}{{4}{7}{Materials \& Methods}{equation.0.4}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces \color {CadetBlue4!50!black} \sffamily \fontsize {9}{10}\selectfont \textbf {Co-morbidity Patterns} Panel A and B. Difference in occurrence frequencies of diagnostic codes between true positive (TP) and true negative (TN) predictions. The dotted line on panel B shows the abscissa lower cut-off in Panel A, illustrating the lower prevalence of codes in females. Panel C illustrates log-odds ratios for ICD9 disease categories at different ages. Importantly, the negative associations disappear when we consider older children, consistent with the lack of such reports in the literature which lack studies on very young cohorts. }}{8}{figure.4}}
\newlabel{EXT-fig3}{{4}{8}{\color {CadetBlue4!50!black} \sffamily \fontsize {9}{10}\selectfont \textbf {Co-morbidity Patterns} Panel A and B. Difference in occurrence frequencies of diagnostic codes between true positive (TP) and true negative (TN) predictions. The dotted line on panel B shows the abscissa lower cut-off in Panel A, illustrating the lower prevalence of codes in females. Panel C illustrates log-odds ratios for ICD9 disease categories at different ages. Importantly, the negative associations disappear when we consider older children, consistent with the lack of such reports in the literature which lack studies on very young cohorts}{figure.4}{}}
\citation{pmid31562252}
\citation{pmid31562252}
\citation{gordon2016whittling,althouse2006pediatric}
\newlabel{eqscpop}{{5}{9}{Materials \& Methods}{equation.0.5}{}}
\citation{gordon2016whittling}
\citation{pmid31562252}
\citation{pmid22511918,pmid30733689,pmid25681541}
\citation{hyman2020identification}
\citation{pmid23935565}
\citation{pmid30646068,pmid21651783,pmid30823414,pmid21282636,pmid29028817,pmid30109601}
\citation{pmid24729779}
\citation{pmid30337860}
\citation{pmid30646068,pmid21651783,pmid30823414,pmid21282636,pmid29028817,pmid30109601}
\citation{pmid30971960,pmid30941018,pmid29691724,pmid29307081,pmid27351598,pmid26793298,pmid30095240,pmid25681541}
\citation{pmid30178105,pmid27957319,pmid29028817}
\citation{Satterstrom484113,pmid25038753}
\citation{pmid15546155,pmid21595886,pmid21629840,pmid26793298,pmid30483058,pmid29691724}
\citation{Pearce2000}
\citation{pmid23537858}
\citation{pmid23637569}
\citation{smith2020metabolomics,howsmon2017classification,hicks2018validation}
\citation{hyman2020identification}
\citation{cdccp,christensen2014prevalence}
\citation{pmid30823414,pmid27957319}
\citation{hyman2020identification}
\citation{berkson1946limitations}
\citation{10.1001/jamapsychiatry.2019.1956}
\citation{cheng2020there}
\bibstyle{Science}
\citation{*}
\bibdata{mergedbib}
\bibcite{cdc}{{1}{}{{}}{{}}}
\bibcite{pmid24529515}{{2}{}{{}}{{}}}
\bibcite{howsmon2017classification}{{3}{}{{}}{{}}}
\bibcite{li2018high}{{4}{}{{}}{{}}}
\bibcite{hicks2018validation}{{5}{}{{}}{{}}}
\bibcite{smith2020metabolomics}{{6}{}{{}}{{}}}
\bibcite{volkmar2014practice}{{7}{}{{}}{{}}}
\bibcite{hyman2020identification}{{8}{}{{}}{{}}}
\bibcite{kalb2012determinants}{{9}{}{{}}{{}}}
\bibcite{bisgaier2011access}{{10}{}{{}}{{}}}
\bibcite{fenikile2015barriers}{{11}{}{{}}{{}}}
\bibcite{gordon2016whittling}{{12}{}{{}}{{}}}
\bibcite{pmid31562252}{{13}{}{{}}{{}}}
\bibcite{robins2014validation}{{14}{}{{}}{{}}}
\bibcite{pmid30733689}{{15}{}{{}}{{}}}
\bibcite{pmid22511918}{{16}{}{{}}{{}}}
\bibcite{hyde2019applications}{{17}{}{{}}{{}}}
\bibcite{abbas2020multi}{{18}{}{{}}{{}}}
\bibcite{duda2016clinical}{{19}{}{{}}{{}}}
\bibcite{duda2014testing}{{20}{}{{}}{{}}}
\bibcite{fusaro2014potential}{{21}{}{{}}{{}}}
\bibcite{wall2012use}{{22}{}{{}}{{}}}
\bibcite{wall2012use2}{{23}{}{{}}{{}}}
\bibcite{doshi2014comorbidity}{{24}{}{{}}{{}}}
\bibcite{bishop2018using}{{25}{}{{}}{{}}}
\bibcite{lingren2016electronic}{{26}{}{{}}{{}}}
\bibcite{hansen2017truven}{{27}{}{{}}{{}}}
\bibcite{pmid29701730}{{28}{}{{}}{{}}}
\bibcite{CR08}{{29}{}{{}}{{}}}
\bibcite{CL12g}{{30}{}{{}}{{}}}
\bibcite{Chattopadhyay20140826}{{31}{}{{}}{{}}}
\bibcite{huang2019data}{{32}{}{{}}{{}}}
\bibcite{Cover}{{33}{}{{}}{{}}}
\bibcite{kullback1951}{{34}{}{{}}{{}}}
\bibcite{doob1953stochastic}{{35}{}{{}}{{}}}
\bibcite{althouse2006pediatric}{{36}{}{{}}{{}}}
\bibcite{pmid25681541}{{37}{}{{}}{{}}}
\bibcite{pmid23935565}{{38}{}{{}}{{}}}
\bibcite{pmid30646068}{{39}{}{{}}{{}}}
\bibcite{pmid21651783}{{40}{}{{}}{{}}}
\bibcite{pmid30823414}{{41}{}{{}}{{}}}
\bibcite{pmid21282636}{{42}{}{{}}{{}}}
\bibcite{pmid29028817}{{43}{}{{}}{{}}}
\bibcite{pmid30109601}{{44}{}{{}}{{}}}
\bibcite{pmid24729779}{{45}{}{{}}{{}}}
\bibcite{pmid30337860}{{46}{}{{}}{{}}}
\bibcite{pmid30971960}{{47}{}{{}}{{}}}
\bibcite{pmid30941018}{{48}{}{{}}{{}}}
\bibcite{pmid29691724}{{49}{}{{}}{{}}}
\bibcite{pmid29307081}{{50}{}{{}}{{}}}
\bibcite{pmid27351598}{{51}{}{{}}{{}}}
\bibcite{pmid26793298}{{52}{}{{}}{{}}}
\bibcite{pmid30095240}{{53}{}{{}}{{}}}
\bibcite{pmid30178105}{{54}{}{{}}{{}}}
\bibcite{pmid27957319}{{55}{}{{}}{{}}}
\bibcite{Satterstrom484113}{{56}{}{{}}{{}}}
\bibcite{pmid25038753}{{57}{}{{}}{{}}}
\bibcite{pmid15546155}{{58}{}{{}}{{}}}
\bibcite{pmid21595886}{{59}{}{{}}{{}}}
\bibcite{pmid21629840}{{60}{}{{}}{{}}}
\bibcite{pmid30483058}{{61}{}{{}}{{}}}
\bibcite{Pearce2000}{{62}{}{{}}{{}}}
\bibcite{pmid23537858}{{63}{}{{}}{{}}}
\bibcite{pmid23637569}{{64}{}{{}}{{}}}
\bibcite{cdccp}{{65}{}{{}}{{}}}
\bibcite{christensen2014prevalence}{{66}{}{{}}{{}}}
\bibcite{berkson1946limitations}{{67}{}{{}}{{}}}
\bibcite{10.1001/jamapsychiatry.2019.1956}{{68}{}{{}}{{}}}
\bibcite{cheng2020there}{{69}{}{{}}{{}}}
\bibcite{GEMS}{{70}{}{{}}{{}}}
\bibcite{baio2014prevalence}{{71}{}{{}}{{}}}
\bibcite{bolton2012autism}{{72}{}{{}}{{}}}
\bibcite{bondy2008graph}{{73}{}{{}}{{}}}
\bibcite{breiman}{{74}{}{{}}{{}}}
\bibcite{cdc0}{{75}{}{{}}{{}}}
\bibcite{chattopadhyay2008structural}{{76}{}{{}}{{}}}
\bibcite{chlebowski2010using}{{77}{}{{}}{{}}}
\bibcite{doob1990stochastic}{{78}{}{{}}{{}}}
\bibcite{esler2015autism}{{79}{}{{}}{{}}}
\bibcite{falkmer2013diagnostic}{{80}{}{{}}{{}}}
\bibcite{friedman}{{81}{}{{}}{{}}}
\bibcite{hardy1992divergent}{{82}{}{{}}{{}}}
\bibcite{hochreiter}{{83}{}{{}}{{}}}
\bibcite{hopcroft2008introduction}{{84}{}{{}}{{}}}
\bibcite{jarquin2011racial}{{85}{}{{}}{{}}}
\bibcite{johnson2007identification}{{86}{}{{}}{{}}}
\bibcite{kai1967markov_stdis}{{87}{}{{}}{{}}}
\bibcite{kleinman2008diagnostic}{{88}{}{{}}{{}}}
\bibcite{klenke2013probability}{{89}{}{{}}{{}}}
\bibcite{kozlowski2011parents}{{90}{}{{}}{{}}}
\bibcite{lord2006autism}{{91}{}{{}}{{}}}
\bibcite{ltgranger80}{{92}{}{{}}{{}}}
\bibcite{matthews2016sparse}{{93}{}{{}}{{}}}
\bibcite{nimh}{{94}{}{{}}{{}}}
\bibcite{penner2018practice}{{95}{}{{}}{{}}}
\bibcite{trahtman2008road}{{96}{}{{}}{{}}}
\bibcite{vidyasagar2014hidden}{{97}{}{{}}{{}}}
\bibcite{zwaigenbaum2015early}{{98}{}{{}}{{}}}
\providecommand\NAT@force@numbers{}\NAT@force@numbers