Skip to content

Commit

Permalink
move everything to a single funding-acknowledgement model
Browse files Browse the repository at this point in the history
  • Loading branch information
kermitt2 committed Aug 2, 2023
1 parent cd4f305 commit 02c60dc
Show file tree
Hide file tree
Showing 15 changed files with 133 additions and 1,583 deletions.
2 changes: 0 additions & 2 deletions doc/Benchmarking-biorxiv.md
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,6 @@ Evaluation on 2000 random PDF files out of 2000 PDF (ratio 1.0).
|--- |--- |--- |--- |--- |
| availability_stmt | 0 | 0 | 0 | 0 |
| figure_title | 4.24 | 2.01 | 2.72 | 22978 |
| funding_stmt | 0 | 0 | 0 | 0 |
| reference_citation | 71.04 | 71.33 | 71.18 | 147470 |
| reference_figure | 70.59 | 67.74 | 69.13 | 47984 |
| reference_table | 48.12 | 83.06 | 60.94 | 5957 |
Expand All @@ -282,7 +281,6 @@ Evaluation on 2000 random PDF files out of 2000 PDF (ratio 1.0).
|--- |--- |--- |--- |--- |
| availability_stmt | 0 | 0 | 0 | 0 |
| figure_title | 69.47 | 32.89 | 44.65 | 22978 |
| funding_stmt | 0 | 0 | 0 | 0 |
| reference_citation | 83.03 | 83.37 | 83.2 | 147470 |
| reference_figure | 71.21 | 68.34 | 69.75 | 47984 |
| reference_table | 48.57 | 83.83 | 61.51 | 5957 |
Expand Down
4 changes: 2 additions & 2 deletions grobid-core/src/main/java/org/grobid/core/GrobidModels.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ public enum GrobidModels implements GrobidModel {
ASTRO("astro"),
SOFTWARE("software"),
DATASEER("dataseer"),
ACKNOWLEDGEMENT("acknowledgement"),
FUNDING("funding"),
//ACKNOWLEDGEMENT("acknowledgement"),
FUNDING_ACKNOWLEDGEMENT("funding-acknowledgement"),
INFRASTRUCTURE("infrastructure"),
DUMMY("none");

Expand Down
9 changes: 9 additions & 0 deletions grobid-core/src/main/java/org/grobid/core/data/Funder.java
Original file line number Diff line number Diff line change
Expand Up @@ -149,4 +149,13 @@ public void setLayoutTokens(List<LayoutToken> layoutTokens) {
public void addLayoutTokens(List<LayoutToken> layoutTokens) {
this.layoutTokens.addAll(layoutTokens);
}

public String toString() {
StringBuilder builder = new StringBuilder();
if (fullName != null)
builder.append(fullName);
if (abbreviatedName != null)
builder.append(abbreviatedName);
return builder.toString();
}
}
21 changes: 21 additions & 0 deletions grobid-core/src/main/java/org/grobid/core/data/Funding.java
Original file line number Diff line number Diff line change
Expand Up @@ -182,4 +182,25 @@ public boolean isValid() {
else
return false;
}

public String toString() {
StringBuilder builder = new StringBuilder();
if (funder != null)
builder.append("funder: " + funder.toString() + "\n");
if (grantName != null)
builder.append("grant name: " + grantName.toString() + "\n");
if (grantNumber != null)
builder.append("grant number: " + grantNumber.toString() + "\n");
if (projectFullName != null)
builder.append("project name: " + projectFullName.toString() + "\n");
if (projectAbbreviatedName != null)
builder.append("project abbreviated name: " + projectAbbreviatedName.toString() + "\n");
if (programFullName != null)
builder.append("program name: " + programFullName.toString() + "\n");
if (programAbbreviatedName != null)
builder.append("program abbreviated name: " + programAbbreviatedName.toString() + "\n");
if (url != null)
builder.append("url: " + url.toString() + "\n");
return builder.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ public class EngineParsers implements Closeable {
private FigureParser figureParser = null;
private TableParser tableParser = null;
private MonographParser monographParser = null;
private FundingAcknowledgementParser fundingAcknowledgementParser = null;

public AffiliationAddressParser getAffiliationAddressParser() {
if (affiliationAddressParser == null) {
Expand Down Expand Up @@ -170,6 +171,17 @@ public MonographParser getMonographParser() {
return monographParser;
}

public FundingAcknowledgementParser getFundingAcknowledgementParser() {
if (fundingAcknowledgementParser == null) {
synchronized (this) {
if (fundingAcknowledgementParser == null) {
fundingAcknowledgementParser = new FundingAcknowledgementParser();
}
}
}
return fundingAcknowledgementParser;
}

/**
* Init all model, this will also load the model into memory
*/
Expand All @@ -186,6 +198,7 @@ public void initAll() {
figureParser = getFigureParser();
tableParser = getTableParser();
//MonographParser monographParser = getMonographParser();
fundingAcknowledgementParser = getFundingAcknowledgementParser();
}

@Override
Expand Down Expand Up @@ -268,7 +281,12 @@ public void close() throws IOException {
LOGGER.debug("CLOSING monographParser");
}

LOGGER.debug("==> All resources closed");
if (fundingAcknowledgementParser != null) {
fundingAcknowledgementParser.close();
fundingAcknowledgementParser = null;
LOGGER.debug("CLOSING fundingAcknowledgementParser");
}

LOGGER.debug("==> All resources closed");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import org.grobid.core.data.Figure;
import org.grobid.core.data.Table;
import org.grobid.core.data.Equation;
import org.grobid.core.data.Funding;
import org.grobid.core.data.Funder;
import org.grobid.core.document.Document;
import org.grobid.core.document.DocumentPiece;
import org.grobid.core.document.DocumentPointer;
Expand Down Expand Up @@ -1909,7 +1911,7 @@ protected List<Figure> processFigures(String rese, List<LayoutToken> layoutToken
for (TaggingTokenCluster cluster : Iterables.filter(clusteror.cluster(),
new TaggingTokenClusteror.LabelTypePredicate(TaggingLabels.FIGURE))) {
List<LayoutToken> tokenizationFigure = cluster.concatTokens();
Figure result = parsers.getFigureParser().processing(
Figure result = this.parsers.getFigureParser().processing(
tokenizationFigure,
cluster.getFeatureBlock()
);
Expand Down Expand Up @@ -2470,6 +2472,42 @@ private void toTEI(Document doc,

tei.append("\t\t<back>\n");

// funding in header
StringBuilder fundingStmt = new StringBuilder();
if (StringUtils.isNotBlank(resHeader.getFunding())) {
List<LayoutToken> headerFundingTokens = resHeader.getLayoutTokens(TaggingLabels.HEADER_FUNDING);

List<Funding> fundings = this.parsers.getFundingAcknowledgementParser().processing(headerFundingTokens);
for (Funding funding : fundings) {
System.out.println(funding.toString());
}

Pair<String, List<LayoutToken>> headerFundingProcessed = processShort(headerFundingTokens, doc);
if (headerFundingProcessed != null) {
fundingStmt = teiFormatter.processTEIDivSection("funding",
"\t\t\t",
headerFundingProcessed.getLeft(),
headerFundingProcessed.getRight(),
resCitations,
config);
}
if (fundingStmt.length() > 0) {
tei.append(fundingStmt.toString());
}
}

// funding statements in non-header part
fundingStmt = getSectionAsTEI("funding",
"\t\t\t",
doc,
SegmentationLabels.FUNDING,
teiFormatter,
resCitations,
config);
if (fundingStmt.length() > 0) {
tei.append(fundingStmt);
}

// acknowledgement is in the back
StringBuilder acknowledgmentStmt = getSectionAsTEI("acknowledgement", "\t\t\t", doc, SegmentationLabels.ACKNOWLEDGEMENT,
teiFormatter, resCitations, config);
Expand Down Expand Up @@ -2508,36 +2546,6 @@ private void toTEI(Document doc,
tei.append(availabilityStmt.toString());
}

// funding in header
StringBuilder fundingStmt = new StringBuilder();
if (StringUtils.isNotBlank(resHeader.getFunding())) {
List<LayoutToken> headerFundingTokens = resHeader.getLayoutTokens(TaggingLabels.HEADER_FUNDING);
Pair<String, List<LayoutToken>> headerFundingProcessed = processShort(headerFundingTokens, doc);
if (headerFundingProcessed != null) {
fundingStmt = teiFormatter.processTEIDivSection("funding",
"\t\t\t",
headerFundingProcessed.getLeft(),
headerFundingProcessed.getRight(),
resCitations,
config);
}
if (fundingStmt.length() > 0) {
tei.append(fundingStmt.toString());
}
}

// funding statements in non-header part
fundingStmt = getSectionAsTEI("funding",
"\t\t\t",
doc,
SegmentationLabels.FUNDING,
teiFormatter,
resCitations,
config);
if (fundingStmt.length() > 0) {
tei.append(fundingStmt);
}

tei = teiFormatter.toTEIAnnex(tei, reseAnnex, resHeader, resCitations,
tokenizationsAnnex, markerTypes, doc, config);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@

import static org.grobid.core.engines.label.TaggingLabels.*;

public class FundingParser extends AbstractParser {
public class FundingAcknowledgementParser extends AbstractParser {

private static final Logger LOGGER = LoggerFactory.getLogger(FundingParser.class);
private static final Logger LOGGER = LoggerFactory.getLogger(FundingAcknowledgementParser.class);

protected FundingParser() {
super(GrobidModels.FUNDING);
protected FundingAcknowledgementParser() {
super(GrobidModels.FUNDING_ACKNOWLEDGEMENT);
}

/**
Expand Down Expand Up @@ -76,7 +76,7 @@ private List<Funding> getExtractionResult(List<LayoutToken> tokenizations, Strin
// first funding
Funding funding = new Funding();

TaggingTokenClusteror clusteror = new TaggingTokenClusteror(GrobidModels.FUNDING, result, tokenizations);
TaggingTokenClusteror clusteror = new TaggingTokenClusteror(GrobidModels.FUNDING_ACKNOWLEDGEMENT, result, tokenizations);
List<TaggingTokenCluster> clusters = clusteror.cluster();
TaggingLabel previousLabel = null;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ public class TaggingLabels {
public final static String PROJECT_NAME_LABEL = "<projectName>";
public final static String PROJECT_ABBRV_NAME_LABEL = "<projectAbbrv>";
public final static String URL_LABEL = "<url>";
public final static String PERSON_LABEL = "<person>";

/* title page (secondary title page)
* publisher page (publication information, including usually the copyrights info)
Expand Down Expand Up @@ -260,16 +261,19 @@ public class TaggingLabels {
public static final TaggingLabel MONOGRAPH_BACK = new TaggingLabelImpl(GrobidModels.MONOGRAPH, BACK_LABEL);
public static final TaggingLabel MONOGRAPH_OTHER = new TaggingLabelImpl(GrobidModels.MONOGRAPH, OTHER_LABEL);

public static final TaggingLabel FUNDING_FUNDER_NAME = new TaggingLabelImpl(GrobidModels.FUNDING, FUNDER_NAME_LABEL);
public static final TaggingLabel FUNDING_FUNDER_ABBRV_NAME = new TaggingLabelImpl(GrobidModels.FUNDING, FUNDER_ABBRV_NAME_LABEL);
public static final TaggingLabel FUNDING_PROGRAM_NAME = new TaggingLabelImpl(GrobidModels.FUNDING, PROGRAM_NAME_LABEL);
public static final TaggingLabel FUNDING_PROGRAM_ABBRV_NAME = new TaggingLabelImpl(GrobidModels.FUNDING, PROGRAM_ABBRV_NAME_LABEL);
public static final TaggingLabel FUNDING_GRANT_NUMBER = new TaggingLabelImpl(GrobidModels.FUNDING, GRANT_NUMBER_LABEL);
public static final TaggingLabel FUNDING_GRANT_NAME = new TaggingLabelImpl(GrobidModels.FUNDING, GRANT_NAME_LABEL);
public static final TaggingLabel FUNDING_PROJECT_NAME = new TaggingLabelImpl(GrobidModels.FUNDING, PROJECT_NAME_LABEL);
public static final TaggingLabel FUNDING_PROJECT_ABBRV_NAME = new TaggingLabelImpl(GrobidModels.FUNDING, PROJECT_ABBRV_NAME_LABEL);
public static final TaggingLabel FUNDING_URL = new TaggingLabelImpl(GrobidModels.FUNDING, URL_LABEL);
public static final TaggingLabel FUNDING_OTHER = new TaggingLabelImpl(GrobidModels.FUNDING, OTHER_LABEL);
public static final TaggingLabel FUNDING_FUNDER_NAME = new TaggingLabelImpl(GrobidModels.FUNDING_ACKNOWLEDGEMENT, FUNDER_NAME_LABEL);
public static final TaggingLabel FUNDING_FUNDER_ABBRV_NAME = new TaggingLabelImpl(GrobidModels.FUNDING_ACKNOWLEDGEMENT, FUNDER_ABBRV_NAME_LABEL);
public static final TaggingLabel FUNDING_PROGRAM_NAME = new TaggingLabelImpl(GrobidModels.FUNDING_ACKNOWLEDGEMENT, PROGRAM_NAME_LABEL);
public static final TaggingLabel FUNDING_PROGRAM_ABBRV_NAME = new TaggingLabelImpl(GrobidModels.FUNDING_ACKNOWLEDGEMENT, PROGRAM_ABBRV_NAME_LABEL);
public static final TaggingLabel FUNDING_GRANT_NUMBER = new TaggingLabelImpl(GrobidModels.FUNDING_ACKNOWLEDGEMENT, GRANT_NUMBER_LABEL);
public static final TaggingLabel FUNDING_GRANT_NAME = new TaggingLabelImpl(GrobidModels.FUNDING_ACKNOWLEDGEMENT, GRANT_NAME_LABEL);
public static final TaggingLabel FUNDING_PROJECT_NAME = new TaggingLabelImpl(GrobidModels.FUNDING_ACKNOWLEDGEMENT, PROJECT_NAME_LABEL);
public static final TaggingLabel FUNDING_PROJECT_ABBRV_NAME = new TaggingLabelImpl(GrobidModels.FUNDING_ACKNOWLEDGEMENT, PROJECT_ABBRV_NAME_LABEL);
public static final TaggingLabel FUNDING_URL = new TaggingLabelImpl(GrobidModels.FUNDING_ACKNOWLEDGEMENT, URL_LABEL);
public static final TaggingLabel FUNDING_PERSON = new TaggingLabelImpl(GrobidModels.FUNDING_ACKNOWLEDGEMENT, PERSON_LABEL);
public static final TaggingLabel FUNDING_INSTITUTION = new TaggingLabelImpl(GrobidModels.FUNDING_ACKNOWLEDGEMENT, INSTITUTION_LABEL);
public static final TaggingLabel FUNDING_AFFILIATION = new TaggingLabelImpl(GrobidModels.FUNDING_ACKNOWLEDGEMENT, AFFILIATION_LABEL);
public static final TaggingLabel FUNDING_OTHER = new TaggingLabelImpl(GrobidModels.FUNDING_ACKNOWLEDGEMENT, OTHER_LABEL);

protected static void register(TaggingLabel label) {
cache.putIfAbsent(new Pair<>(label.getGrobidModel(), label.getLabel()), label);
Expand Down Expand Up @@ -404,7 +408,7 @@ protected static void register(TaggingLabel label) {
register(MONOGRAPH_BACK);
register(MONOGRAPH_OTHER);

// funding
// funding-acknowledgement
register(FUNDING_FUNDER_NAME);
register(FUNDING_FUNDER_ABBRV_NAME);
register(FUNDING_PROGRAM_NAME);
Expand All @@ -414,6 +418,9 @@ protected static void register(TaggingLabel label) {
register(FUNDING_PROJECT_NAME);
register(FUNDING_PROJECT_ABBRV_NAME);
register(FUNDING_URL);
register(FUNDING_PERSON);
register(FUNDING_AFFILIATION);
register(FUNDING_INSTITUTION);
register(FUNDING_OTHER);
}

Expand Down
2 changes: 1 addition & 1 deletion grobid-home/config/grobid.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ grobid:
epsilon: 0.0001
window: 20

- name: "funding"
- name: "funding-acknowledgement"
engine: "wapiti"
#engine: "delft"
wapiti:
Expand Down
Loading

0 comments on commit 02c60dc

Please sign in to comment.