diff --git a/.circleci/config.yml b/.circleci/config.yml index 86260223a..ca44d645f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -38,8 +38,8 @@ jobs: command: | mkdir data/ cd data - wget https://ldbc.github.io/ldbc_snb_data_converter/csv-composite-projected-fk-legacy-filenames.zip - unzip csv-composite-projected-fk-legacy-filenames.zip + wget -q https://ldbc.github.io/ldbc_snb_data_converter/csv-composite-projected-fk.zip + unzip csv-composite-projected-fk.zip cd .. - run: name: Load @@ -49,7 +49,7 @@ jobs: # Cypher cd cypher . scripts/environment-variables-default.sh - export NEO4J_CSV_DIR=`pwd`/../data/csv-composite-projected-fk-legacy-filenames + export NEO4J_CSV_DIR=`pwd`/../data/csv-composite-projected-fk export NEO4J_CSV_POSTFIX=.csv scripts/load-in-one-step.sh cd .. diff --git a/cypher/README.md b/cypher/README.md index 7466704f6..aeaaf7dd9 100644 --- a/cypher/README.md +++ b/cypher/README.md @@ -26,16 +26,13 @@ This script replaces the headers in the input CSVs, load them, starts Neo4j, and ## Loading the example data set -Transform the example data set in the [data converter](https://github.com/ldbc/ldbc_snb_data_converter) repository, then rename it: +Transform the example data set in the [data converter](https://github.com/ldbc/ldbc_snb_data_converter) repository: -```bash -./rename.sh -``` +Then, in in this repository, run -In this repository, run ```bash . scripts/environment-variables-default.sh -export NEO4J_CSV_DIR=${DATA_CONVERTER_DIR}/ldbc_snb_data_converter/data/csv-composite-projected-fk-legacy-filenames +export NEO4J_CSV_DIR=${DATA_CONVERTER_DIR}/ldbc_snb_data_converter/data/csv-composite-projected-fk export NEO4J_CSV_POSTFIX=.csv scripts/load-in-one-step.sh ``` diff --git a/cypher/scripts/convert-csvs.sh b/cypher/scripts/convert-csvs.sh index fd2626070..cb8ceb64d 100755 --- a/cypher/scripts/convert-csvs.sh +++ b/cypher/scripts/convert-csvs.sh @@ -25,6 +25,10 @@ while read line; do echo ${FILENAME}: ${HEADER} # replace header (no point using sed to save space as it creates a temporary file as well) + if [ ! -f ${NEO4J_CSV_DIR}/${FILENAME}${NEO4J_CSV_POSTFIX} ]; then + echo "${NEO4J_CSV_DIR}/${FILENAME}${NEO4J_CSV_POSTFIX} does not exist" + exit 1 + fi echo ${HEADER} | ${SNB_CAT} - <(tail -n +2 ${NEO4J_CSV_DIR}/${FILENAME}${NEO4J_CSV_POSTFIX}) > tmpfile.csv && mv tmpfile.csv ${NEO4J_CSV_DIR}/${FILENAME}${NEO4J_CSV_POSTFIX} done < headers.txt diff --git a/cypher/scripts/headers.txt b/cypher/scripts/headers.txt index aac9e1d3e..872fdf173 100644 --- a/cypher/scripts/headers.txt +++ b/cypher/scripts/headers.txt @@ -1,31 +1,31 @@ -static/organisation id:ID(Organisation)|:LABEL|name:STRING|url:STRING -static/place id:ID(Place)|name:STRING|url:STRING|:LABEL -static/tagclass id:ID(TagClass)|name:STRING|url:STRING -static/tag id:ID(Tag)|name:STRING|url:STRING -static/tagclass_isSubclassOf_tagclass :START_ID(TagClass)|:END_ID(TagClass) -static/tag_hasType_tagclass :START_ID(Tag)|:END_ID(TagClass) -static/organisation_isLocatedIn_place :START_ID(Organisation)|:END_ID(Place) -static/place_isPartOf_place :START_ID(Place)|:END_ID(Place) -dynamic/comment creationDate:DATETIME|id:ID(Comment)|locationIP:STRING|browserUsed:STRING|content:STRING|length:LONG -dynamic/forum creationDate:DATETIME|id:ID(Forum)|title:STRING -dynamic/person creationDate:DATETIME|id:ID(Person)|firstName:STRING|lastName:STRING|gender:STRING|birthday:DATE|locationIP:STRING|browserUsed:STRING|speaks:STRING[]|email:STRING[] -dynamic/post creationDate:DATETIME|id:ID(Post)|imageFile:STRING|locationIP:STRING|browserUsed:STRING|language:STRING|content:STRING|length:LONG -dynamic/comment_hasCreator_person creationDate:DATETIME|:START_ID(Comment)|:END_ID(Person) -dynamic/comment_isLocatedIn_country creationDate:DATETIME|:START_ID(Comment)|:END_ID(Place) -dynamic/comment_replyOf_comment creationDate:DATETIME|:START_ID(Comment)|:END_ID(Comment) -dynamic/comment_replyOf_post creationDate:DATETIME|:START_ID(Comment)|:END_ID(Post) -dynamic/forum_containerOf_post creationDate:DATETIME|:START_ID(Forum)|:END_ID(Post) -dynamic/forum_hasMember_person creationDate:DATETIME|:START_ID(Forum)|:END_ID(Person) -dynamic/forum_hasModerator_person creationDate:DATETIME|:START_ID(Forum)|:END_ID(Person) -dynamic/forum_hasTag_tag creationDate:DATETIME|:START_ID(Forum)|:END_ID(Tag) -dynamic/person_hasInterest_tag creationDate:DATETIME|:START_ID(Person)|:END_ID(Tag) -dynamic/person_isLocatedIn_city creationDate:DATETIME|:START_ID(Person)|:END_ID(Place) -dynamic/person_knows_person creationDate:DATETIME|:START_ID(Person)|:END_ID(Person) -dynamic/person_likes_comment creationDate:DATETIME|:START_ID(Person)|:END_ID(Comment) -dynamic/person_likes_post creationDate:DATETIME|:START_ID(Person)|:END_ID(Post) -dynamic/person_studyAt_organisation creationDate:DATETIME|:START_ID(Person)|:END_ID(Organisation)|classYear:LONG -dynamic/person_workAt_organisation creationDate:DATETIME|:START_ID(Person)|:END_ID(Organisation)|workFrom:LONG -dynamic/post_hasCreator_person creationDate:DATETIME|:START_ID(Post)|:END_ID(Person) -dynamic/comment_hasTag_tag creationDate:DATETIME|:START_ID(Comment)|:END_ID(Tag) -dynamic/post_hasTag_tag creationDate:DATETIME|:START_ID(Post)|:END_ID(Tag) -dynamic/post_isLocatedIn_country creationDate:DATETIME|:START_ID(Post)|:END_ID(Place) +static/Organisation id:ID(Organisation)|:LABEL|name:STRING|url:STRING +static/Place id:ID(Place)|name:STRING|url:STRING|:LABEL +static/TagClass id:ID(TagClass)|name:STRING|url:STRING +static/Tag id:ID(Tag)|name:STRING|url:STRING +static/TagClass_isSubclassOf_TagClass :START_ID(TagClass)|:END_ID(TagClass) +static/Tag_hasType_TagClass :START_ID(Tag)|:END_ID(TagClass) +static/Organisation_isLocatedIn_Place :START_ID(Organisation)|:END_ID(Place) +static/Place_isPartOf_Place :START_ID(Place)|:END_ID(Place) +dynamic/Comment creationDate:DATETIME|id:ID(Comment)|locationIP:STRING|browserUsed:STRING|content:STRING|length:LONG +dynamic/Forum creationDate:DATETIME|id:ID(Forum)|title:STRING +dynamic/Person creationDate:DATETIME|id:ID(Person)|firstName:STRING|lastName:STRING|gender:STRING|birthday:DATE|locationIP:STRING|browserUsed:STRING|speaks:STRING[]|email:STRING[] +dynamic/Post creationDate:DATETIME|id:ID(Post)|imageFile:STRING|locationIP:STRING|browserUsed:STRING|language:STRING|content:STRING|length:LONG +dynamic/Comment_hasCreator_Person creationDate:DATETIME|:START_ID(Comment)|:END_ID(Person) +dynamic/Comment_isLocatedIn_Country creationDate:DATETIME|:START_ID(Comment)|:END_ID(Place) +dynamic/Comment_replyOf_Comment creationDate:DATETIME|:START_ID(Comment)|:END_ID(Comment) +dynamic/Comment_replyOf_Post creationDate:DATETIME|:START_ID(Comment)|:END_ID(Post) +dynamic/Forum_containerOf_Post creationDate:DATETIME|:START_ID(Forum)|:END_ID(Post) +dynamic/Forum_hasMember_Person creationDate:DATETIME|:START_ID(Forum)|:END_ID(Person) +dynamic/Forum_hasModerator_Person creationDate:DATETIME|:START_ID(Forum)|:END_ID(Person) +dynamic/Forum_hasTag_Tag creationDate:DATETIME|:START_ID(Forum)|:END_ID(Tag) +dynamic/Person_hasInterest_Tag creationDate:DATETIME|:START_ID(Person)|:END_ID(Tag) +dynamic/Person_isLocatedIn_City creationDate:DATETIME|:START_ID(Person)|:END_ID(Place) +dynamic/Person_knows_Person creationDate:DATETIME|:START_ID(Person)|:END_ID(Person) +dynamic/Person_likes_Comment creationDate:DATETIME|:START_ID(Person)|:END_ID(Comment) +dynamic/Person_likes_Post creationDate:DATETIME|:START_ID(Person)|:END_ID(Post) +dynamic/Person_studyAt_University creationDate:DATETIME|:START_ID(Person)|:END_ID(Organisation)|classYear:LONG +dynamic/Person_workAt_Company creationDate:DATETIME|:START_ID(Person)|:END_ID(Organisation)|workFrom:LONG +dynamic/Post_hasCreator_Person creationDate:DATETIME|:START_ID(Post)|:END_ID(Person) +dynamic/Comment_hasTag_Tag creationDate:DATETIME|:START_ID(Comment)|:END_ID(Tag) +dynamic/Post_hasTag_Tag creationDate:DATETIME|:START_ID(Post)|:END_ID(Tag) +dynamic/Post_isLocatedIn_Country creationDate:DATETIME|:START_ID(Post)|:END_ID(Place) diff --git a/cypher/scripts/import-to-neo4j.sh b/cypher/scripts/import-to-neo4j.sh index c1f8d45ab..d6653d3f2 100755 --- a/cypher/scripts/import-to-neo4j.sh +++ b/cypher/scripts/import-to-neo4j.sh @@ -29,35 +29,35 @@ docker run --rm \ neo4j:${NEO4J_VERSION} \ neo4j-admin import \ --id-type=INTEGER \ - --nodes=Place="/import/static/place${NEO4J_CSV_POSTFIX}" \ - --nodes=Organisation="/import/static/organisation${NEO4J_CSV_POSTFIX}" \ - --nodes=TagClass="/import/static/tagclass${NEO4J_CSV_POSTFIX}" \ - --nodes=Tag="/import/static/tag${NEO4J_CSV_POSTFIX}" \ - --nodes=Forum="/import/dynamic/forum${NEO4J_CSV_POSTFIX}" \ - --nodes=Person="/import/dynamic/person${NEO4J_CSV_POSTFIX}" \ - --nodes=Message:Comment="/import/dynamic/comment${NEO4J_CSV_POSTFIX}" \ - --nodes=Message:Post="/import/dynamic/post${NEO4J_CSV_POSTFIX}" \ - --relationships=IS_PART_OF="/import/static/place_isPartOf_place${NEO4J_CSV_POSTFIX}" \ - --relationships=IS_SUBCLASS_OF="/import/static/tagclass_isSubclassOf_tagclass${NEO4J_CSV_POSTFIX}" \ - --relationships=IS_LOCATED_IN="/import/static/organisation_isLocatedIn_place${NEO4J_CSV_POSTFIX}" \ - --relationships=HAS_TYPE="/import/static/tag_hasType_tagclass${NEO4J_CSV_POSTFIX}" \ - --relationships=HAS_CREATOR="/import/dynamic/comment_hasCreator_person${NEO4J_CSV_POSTFIX}" \ - --relationships=IS_LOCATED_IN="/import/dynamic/comment_isLocatedIn_country${NEO4J_CSV_POSTFIX}" \ - --relationships=REPLY_OF="/import/dynamic/comment_replyOf_comment${NEO4J_CSV_POSTFIX}" \ - --relationships=REPLY_OF="/import/dynamic/comment_replyOf_post${NEO4J_CSV_POSTFIX}" \ - --relationships=CONTAINER_OF="/import/dynamic/forum_containerOf_post${NEO4J_CSV_POSTFIX}" \ - --relationships=HAS_MEMBER="/import/dynamic/forum_hasMember_person${NEO4J_CSV_POSTFIX}" \ - --relationships=HAS_MODERATOR="/import/dynamic/forum_hasModerator_person${NEO4J_CSV_POSTFIX}" \ - --relationships=HAS_TAG="/import/dynamic/forum_hasTag_tag${NEO4J_CSV_POSTFIX}" \ - --relationships=HAS_INTEREST="/import/dynamic/person_hasInterest_tag${NEO4J_CSV_POSTFIX}" \ - --relationships=IS_LOCATED_IN="/import/dynamic/person_isLocatedIn_city${NEO4J_CSV_POSTFIX}" \ - --relationships=KNOWS="/import/dynamic/person_knows_person${NEO4J_CSV_POSTFIX}" \ - --relationships=LIKES="/import/dynamic/person_likes_comment${NEO4J_CSV_POSTFIX}" \ - --relationships=LIKES="/import/dynamic/person_likes_post${NEO4J_CSV_POSTFIX}" \ - --relationships=HAS_CREATOR="/import/dynamic/post_hasCreator_person${NEO4J_CSV_POSTFIX}" \ - --relationships=HAS_TAG="/import/dynamic/comment_hasTag_tag${NEO4J_CSV_POSTFIX}" \ - --relationships=HAS_TAG="/import/dynamic/post_hasTag_tag${NEO4J_CSV_POSTFIX}" \ - --relationships=IS_LOCATED_IN="/import/dynamic/post_isLocatedIn_country${NEO4J_CSV_POSTFIX}" \ - --relationships=STUDY_AT="/import/dynamic/person_studyAt_organisation${NEO4J_CSV_POSTFIX}" \ - --relationships=WORK_AT="/import/dynamic/person_workAt_organisation${NEO4J_CSV_POSTFIX}" \ + --nodes=Place="/import/static/Place${NEO4J_CSV_POSTFIX}" \ + --nodes=Organisation="/import/static/Organisation${NEO4J_CSV_POSTFIX}" \ + --nodes=TagClass="/import/static/TagClass${NEO4J_CSV_POSTFIX}" \ + --nodes=Tag="/import/static/Tag${NEO4J_CSV_POSTFIX}" \ + --nodes=Forum="/import/dynamic/Forum${NEO4J_CSV_POSTFIX}" \ + --nodes=Person="/import/dynamic/Person${NEO4J_CSV_POSTFIX}" \ + --nodes=Message:Comment="/import/dynamic/Comment${NEO4J_CSV_POSTFIX}" \ + --nodes=Message:Post="/import/dynamic/Post${NEO4J_CSV_POSTFIX}" \ + --relationships=IS_PART_OF="/import/static/Place_isPartOf_Place${NEO4J_CSV_POSTFIX}" \ + --relationships=IS_SUBCLASS_OF="/import/static/TagClass_isSubclassOf_TagClass${NEO4J_CSV_POSTFIX}" \ + --relationships=IS_LOCATED_IN="/import/static/Organisation_isLocatedIn_Place${NEO4J_CSV_POSTFIX}" \ + --relationships=HAS_TYPE="/import/static/Tag_hasType_TagClass${NEO4J_CSV_POSTFIX}" \ + --relationships=HAS_CREATOR="/import/dynamic/Comment_hasCreator_Person${NEO4J_CSV_POSTFIX}" \ + --relationships=IS_LOCATED_IN="/import/dynamic/Comment_isLocatedIn_Country${NEO4J_CSV_POSTFIX}" \ + --relationships=REPLY_OF="/import/dynamic/Comment_replyOf_Comment${NEO4J_CSV_POSTFIX}" \ + --relationships=REPLY_OF="/import/dynamic/Comment_replyOf_Post${NEO4J_CSV_POSTFIX}" \ + --relationships=CONTAINER_OF="/import/dynamic/Forum_containerOf_Post${NEO4J_CSV_POSTFIX}" \ + --relationships=HAS_MEMBER="/import/dynamic/Forum_hasMember_Person${NEO4J_CSV_POSTFIX}" \ + --relationships=HAS_MODERATOR="/import/dynamic/Forum_hasModerator_Person${NEO4J_CSV_POSTFIX}" \ + --relationships=HAS_TAG="/import/dynamic/Forum_hasTag_Tag${NEO4J_CSV_POSTFIX}" \ + --relationships=HAS_INTEREST="/import/dynamic/Person_hasInterest_Tag${NEO4J_CSV_POSTFIX}" \ + --relationships=IS_LOCATED_IN="/import/dynamic/Person_isLocatedIn_City${NEO4J_CSV_POSTFIX}" \ + --relationships=KNOWS="/import/dynamic/Person_knows_Person${NEO4J_CSV_POSTFIX}" \ + --relationships=LIKES="/import/dynamic/Person_likes_Comment${NEO4J_CSV_POSTFIX}" \ + --relationships=LIKES="/import/dynamic/Person_likes_Post${NEO4J_CSV_POSTFIX}" \ + --relationships=HAS_CREATOR="/import/dynamic/Post_hasCreator_Person${NEO4J_CSV_POSTFIX}" \ + --relationships=HAS_TAG="/import/dynamic/Comment_hasTag_Tag${NEO4J_CSV_POSTFIX}" \ + --relationships=HAS_TAG="/import/dynamic/Post_hasTag_Tag${NEO4J_CSV_POSTFIX}" \ + --relationships=IS_LOCATED_IN="/import/dynamic/Post_isLocatedIn_Country${NEO4J_CSV_POSTFIX}" \ + --relationships=STUDY_AT="/import/dynamic/Person_studyAt_University${NEO4J_CSV_POSTFIX}" \ + --relationships=WORK_AT="/import/dynamic/Person_workAt_Company${NEO4J_CSV_POSTFIX}" \ --delimiter '|'