Skip to content

Commit

Permalink
Use new filenames (with PascalCase node label names)
Browse files Browse the repository at this point in the history
  • Loading branch information
szarnyasg committed Mar 16, 2021
1 parent bf0bc7f commit 0c90eae
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 71 deletions.
6 changes: 3 additions & 3 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ jobs:
command: |
mkdir data/
cd data
wget https://ldbc.github.io/ldbc_snb_data_converter/csv-composite-projected-fk-legacy-filenames.zip
unzip csv-composite-projected-fk-legacy-filenames.zip
wget -q https://ldbc.github.io/ldbc_snb_data_converter/csv-composite-projected-fk.zip
unzip csv-composite-projected-fk.zip
cd ..
- run:
name: Load
Expand All @@ -49,7 +49,7 @@ jobs:
# Cypher
cd cypher
. scripts/environment-variables-default.sh
export NEO4J_CSV_DIR=`pwd`/../data/csv-composite-projected-fk-legacy-filenames
export NEO4J_CSV_DIR=`pwd`/../data/csv-composite-projected-fk
export NEO4J_CSV_POSTFIX=.csv
scripts/load-in-one-step.sh
cd ..
Expand Down
9 changes: 3 additions & 6 deletions cypher/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,13 @@ This script replaces the headers in the input CSVs, load them, starts Neo4j, and

## Loading the example data set

Transform the example data set in the [data converter](https://github.com/ldbc/ldbc_snb_data_converter) repository, then rename it:
Transform the example data set in the [data converter](https://github.com/ldbc/ldbc_snb_data_converter) repository:

```bash
./rename.sh
```
Then, in in this repository, run

In this repository, run
```bash
. scripts/environment-variables-default.sh
export NEO4J_CSV_DIR=${DATA_CONVERTER_DIR}/ldbc_snb_data_converter/data/csv-composite-projected-fk-legacy-filenames
export NEO4J_CSV_DIR=${DATA_CONVERTER_DIR}/ldbc_snb_data_converter/data/csv-composite-projected-fk
export NEO4J_CSV_POSTFIX=.csv
scripts/load-in-one-step.sh
```
4 changes: 4 additions & 0 deletions cypher/scripts/convert-csvs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ while read line; do

echo ${FILENAME}: ${HEADER}
# replace header (no point using sed to save space as it creates a temporary file as well)
if [ ! -f ${NEO4J_CSV_DIR}/${FILENAME}${NEO4J_CSV_POSTFIX} ]; then
echo "${NEO4J_CSV_DIR}/${FILENAME}${NEO4J_CSV_POSTFIX} does not exist"
exit 1
fi
echo ${HEADER} | ${SNB_CAT} - <(tail -n +2 ${NEO4J_CSV_DIR}/${FILENAME}${NEO4J_CSV_POSTFIX}) > tmpfile.csv && mv tmpfile.csv ${NEO4J_CSV_DIR}/${FILENAME}${NEO4J_CSV_POSTFIX}
done < headers.txt

Expand Down
62 changes: 31 additions & 31 deletions cypher/scripts/headers.txt
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
static/organisation id:ID(Organisation)|:LABEL|name:STRING|url:STRING
static/place id:ID(Place)|name:STRING|url:STRING|:LABEL
static/tagclass id:ID(TagClass)|name:STRING|url:STRING
static/tag id:ID(Tag)|name:STRING|url:STRING
static/tagclass_isSubclassOf_tagclass :START_ID(TagClass)|:END_ID(TagClass)
static/tag_hasType_tagclass :START_ID(Tag)|:END_ID(TagClass)
static/organisation_isLocatedIn_place :START_ID(Organisation)|:END_ID(Place)
static/place_isPartOf_place :START_ID(Place)|:END_ID(Place)
dynamic/comment creationDate:DATETIME|id:ID(Comment)|locationIP:STRING|browserUsed:STRING|content:STRING|length:LONG
dynamic/forum creationDate:DATETIME|id:ID(Forum)|title:STRING
dynamic/person creationDate:DATETIME|id:ID(Person)|firstName:STRING|lastName:STRING|gender:STRING|birthday:DATE|locationIP:STRING|browserUsed:STRING|speaks:STRING[]|email:STRING[]
dynamic/post creationDate:DATETIME|id:ID(Post)|imageFile:STRING|locationIP:STRING|browserUsed:STRING|language:STRING|content:STRING|length:LONG
dynamic/comment_hasCreator_person creationDate:DATETIME|:START_ID(Comment)|:END_ID(Person)
dynamic/comment_isLocatedIn_country creationDate:DATETIME|:START_ID(Comment)|:END_ID(Place)
dynamic/comment_replyOf_comment creationDate:DATETIME|:START_ID(Comment)|:END_ID(Comment)
dynamic/comment_replyOf_post creationDate:DATETIME|:START_ID(Comment)|:END_ID(Post)
dynamic/forum_containerOf_post creationDate:DATETIME|:START_ID(Forum)|:END_ID(Post)
dynamic/forum_hasMember_person creationDate:DATETIME|:START_ID(Forum)|:END_ID(Person)
dynamic/forum_hasModerator_person creationDate:DATETIME|:START_ID(Forum)|:END_ID(Person)
dynamic/forum_hasTag_tag creationDate:DATETIME|:START_ID(Forum)|:END_ID(Tag)
dynamic/person_hasInterest_tag creationDate:DATETIME|:START_ID(Person)|:END_ID(Tag)
dynamic/person_isLocatedIn_city creationDate:DATETIME|:START_ID(Person)|:END_ID(Place)
dynamic/person_knows_person creationDate:DATETIME|:START_ID(Person)|:END_ID(Person)
dynamic/person_likes_comment creationDate:DATETIME|:START_ID(Person)|:END_ID(Comment)
dynamic/person_likes_post creationDate:DATETIME|:START_ID(Person)|:END_ID(Post)
dynamic/person_studyAt_organisation creationDate:DATETIME|:START_ID(Person)|:END_ID(Organisation)|classYear:LONG
dynamic/person_workAt_organisation creationDate:DATETIME|:START_ID(Person)|:END_ID(Organisation)|workFrom:LONG
dynamic/post_hasCreator_person creationDate:DATETIME|:START_ID(Post)|:END_ID(Person)
dynamic/comment_hasTag_tag creationDate:DATETIME|:START_ID(Comment)|:END_ID(Tag)
dynamic/post_hasTag_tag creationDate:DATETIME|:START_ID(Post)|:END_ID(Tag)
dynamic/post_isLocatedIn_country creationDate:DATETIME|:START_ID(Post)|:END_ID(Place)
static/Organisation id:ID(Organisation)|:LABEL|name:STRING|url:STRING
static/Place id:ID(Place)|name:STRING|url:STRING|:LABEL
static/TagClass id:ID(TagClass)|name:STRING|url:STRING
static/Tag id:ID(Tag)|name:STRING|url:STRING
static/TagClass_isSubclassOf_TagClass :START_ID(TagClass)|:END_ID(TagClass)
static/Tag_hasType_TagClass :START_ID(Tag)|:END_ID(TagClass)
static/Organisation_isLocatedIn_Place :START_ID(Organisation)|:END_ID(Place)
static/Place_isPartOf_Place :START_ID(Place)|:END_ID(Place)
dynamic/Comment creationDate:DATETIME|id:ID(Comment)|locationIP:STRING|browserUsed:STRING|content:STRING|length:LONG
dynamic/Forum creationDate:DATETIME|id:ID(Forum)|title:STRING
dynamic/Person creationDate:DATETIME|id:ID(Person)|firstName:STRING|lastName:STRING|gender:STRING|birthday:DATE|locationIP:STRING|browserUsed:STRING|speaks:STRING[]|email:STRING[]
dynamic/Post creationDate:DATETIME|id:ID(Post)|imageFile:STRING|locationIP:STRING|browserUsed:STRING|language:STRING|content:STRING|length:LONG
dynamic/Comment_hasCreator_Person creationDate:DATETIME|:START_ID(Comment)|:END_ID(Person)
dynamic/Comment_isLocatedIn_Country creationDate:DATETIME|:START_ID(Comment)|:END_ID(Place)
dynamic/Comment_replyOf_Comment creationDate:DATETIME|:START_ID(Comment)|:END_ID(Comment)
dynamic/Comment_replyOf_Post creationDate:DATETIME|:START_ID(Comment)|:END_ID(Post)
dynamic/Forum_containerOf_Post creationDate:DATETIME|:START_ID(Forum)|:END_ID(Post)
dynamic/Forum_hasMember_Person creationDate:DATETIME|:START_ID(Forum)|:END_ID(Person)
dynamic/Forum_hasModerator_Person creationDate:DATETIME|:START_ID(Forum)|:END_ID(Person)
dynamic/Forum_hasTag_Tag creationDate:DATETIME|:START_ID(Forum)|:END_ID(Tag)
dynamic/Person_hasInterest_Tag creationDate:DATETIME|:START_ID(Person)|:END_ID(Tag)
dynamic/Person_isLocatedIn_City creationDate:DATETIME|:START_ID(Person)|:END_ID(Place)
dynamic/Person_knows_Person creationDate:DATETIME|:START_ID(Person)|:END_ID(Person)
dynamic/Person_likes_Comment creationDate:DATETIME|:START_ID(Person)|:END_ID(Comment)
dynamic/Person_likes_Post creationDate:DATETIME|:START_ID(Person)|:END_ID(Post)
dynamic/Person_studyAt_University creationDate:DATETIME|:START_ID(Person)|:END_ID(Organisation)|classYear:LONG
dynamic/Person_workAt_Company creationDate:DATETIME|:START_ID(Person)|:END_ID(Organisation)|workFrom:LONG
dynamic/Post_hasCreator_Person creationDate:DATETIME|:START_ID(Post)|:END_ID(Person)
dynamic/Comment_hasTag_Tag creationDate:DATETIME|:START_ID(Comment)|:END_ID(Tag)
dynamic/Post_hasTag_Tag creationDate:DATETIME|:START_ID(Post)|:END_ID(Tag)
dynamic/Post_isLocatedIn_Country creationDate:DATETIME|:START_ID(Post)|:END_ID(Place)
62 changes: 31 additions & 31 deletions cypher/scripts/import-to-neo4j.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,35 +29,35 @@ docker run --rm \
neo4j:${NEO4J_VERSION} \
neo4j-admin import \
--id-type=INTEGER \
--nodes=Place="/import/static/place${NEO4J_CSV_POSTFIX}" \
--nodes=Organisation="/import/static/organisation${NEO4J_CSV_POSTFIX}" \
--nodes=TagClass="/import/static/tagclass${NEO4J_CSV_POSTFIX}" \
--nodes=Tag="/import/static/tag${NEO4J_CSV_POSTFIX}" \
--nodes=Forum="/import/dynamic/forum${NEO4J_CSV_POSTFIX}" \
--nodes=Person="/import/dynamic/person${NEO4J_CSV_POSTFIX}" \
--nodes=Message:Comment="/import/dynamic/comment${NEO4J_CSV_POSTFIX}" \
--nodes=Message:Post="/import/dynamic/post${NEO4J_CSV_POSTFIX}" \
--relationships=IS_PART_OF="/import/static/place_isPartOf_place${NEO4J_CSV_POSTFIX}" \
--relationships=IS_SUBCLASS_OF="/import/static/tagclass_isSubclassOf_tagclass${NEO4J_CSV_POSTFIX}" \
--relationships=IS_LOCATED_IN="/import/static/organisation_isLocatedIn_place${NEO4J_CSV_POSTFIX}" \
--relationships=HAS_TYPE="/import/static/tag_hasType_tagclass${NEO4J_CSV_POSTFIX}" \
--relationships=HAS_CREATOR="/import/dynamic/comment_hasCreator_person${NEO4J_CSV_POSTFIX}" \
--relationships=IS_LOCATED_IN="/import/dynamic/comment_isLocatedIn_country${NEO4J_CSV_POSTFIX}" \
--relationships=REPLY_OF="/import/dynamic/comment_replyOf_comment${NEO4J_CSV_POSTFIX}" \
--relationships=REPLY_OF="/import/dynamic/comment_replyOf_post${NEO4J_CSV_POSTFIX}" \
--relationships=CONTAINER_OF="/import/dynamic/forum_containerOf_post${NEO4J_CSV_POSTFIX}" \
--relationships=HAS_MEMBER="/import/dynamic/forum_hasMember_person${NEO4J_CSV_POSTFIX}" \
--relationships=HAS_MODERATOR="/import/dynamic/forum_hasModerator_person${NEO4J_CSV_POSTFIX}" \
--relationships=HAS_TAG="/import/dynamic/forum_hasTag_tag${NEO4J_CSV_POSTFIX}" \
--relationships=HAS_INTEREST="/import/dynamic/person_hasInterest_tag${NEO4J_CSV_POSTFIX}" \
--relationships=IS_LOCATED_IN="/import/dynamic/person_isLocatedIn_city${NEO4J_CSV_POSTFIX}" \
--relationships=KNOWS="/import/dynamic/person_knows_person${NEO4J_CSV_POSTFIX}" \
--relationships=LIKES="/import/dynamic/person_likes_comment${NEO4J_CSV_POSTFIX}" \
--relationships=LIKES="/import/dynamic/person_likes_post${NEO4J_CSV_POSTFIX}" \
--relationships=HAS_CREATOR="/import/dynamic/post_hasCreator_person${NEO4J_CSV_POSTFIX}" \
--relationships=HAS_TAG="/import/dynamic/comment_hasTag_tag${NEO4J_CSV_POSTFIX}" \
--relationships=HAS_TAG="/import/dynamic/post_hasTag_tag${NEO4J_CSV_POSTFIX}" \
--relationships=IS_LOCATED_IN="/import/dynamic/post_isLocatedIn_country${NEO4J_CSV_POSTFIX}" \
--relationships=STUDY_AT="/import/dynamic/person_studyAt_organisation${NEO4J_CSV_POSTFIX}" \
--relationships=WORK_AT="/import/dynamic/person_workAt_organisation${NEO4J_CSV_POSTFIX}" \
--nodes=Place="/import/static/Place${NEO4J_CSV_POSTFIX}" \
--nodes=Organisation="/import/static/Organisation${NEO4J_CSV_POSTFIX}" \
--nodes=TagClass="/import/static/TagClass${NEO4J_CSV_POSTFIX}" \
--nodes=Tag="/import/static/Tag${NEO4J_CSV_POSTFIX}" \
--nodes=Forum="/import/dynamic/Forum${NEO4J_CSV_POSTFIX}" \
--nodes=Person="/import/dynamic/Person${NEO4J_CSV_POSTFIX}" \
--nodes=Message:Comment="/import/dynamic/Comment${NEO4J_CSV_POSTFIX}" \
--nodes=Message:Post="/import/dynamic/Post${NEO4J_CSV_POSTFIX}" \
--relationships=IS_PART_OF="/import/static/Place_isPartOf_Place${NEO4J_CSV_POSTFIX}" \
--relationships=IS_SUBCLASS_OF="/import/static/TagClass_isSubclassOf_TagClass${NEO4J_CSV_POSTFIX}" \
--relationships=IS_LOCATED_IN="/import/static/Organisation_isLocatedIn_Place${NEO4J_CSV_POSTFIX}" \
--relationships=HAS_TYPE="/import/static/Tag_hasType_TagClass${NEO4J_CSV_POSTFIX}" \
--relationships=HAS_CREATOR="/import/dynamic/Comment_hasCreator_Person${NEO4J_CSV_POSTFIX}" \
--relationships=IS_LOCATED_IN="/import/dynamic/Comment_isLocatedIn_Country${NEO4J_CSV_POSTFIX}" \
--relationships=REPLY_OF="/import/dynamic/Comment_replyOf_Comment${NEO4J_CSV_POSTFIX}" \
--relationships=REPLY_OF="/import/dynamic/Comment_replyOf_Post${NEO4J_CSV_POSTFIX}" \
--relationships=CONTAINER_OF="/import/dynamic/Forum_containerOf_Post${NEO4J_CSV_POSTFIX}" \
--relationships=HAS_MEMBER="/import/dynamic/Forum_hasMember_Person${NEO4J_CSV_POSTFIX}" \
--relationships=HAS_MODERATOR="/import/dynamic/Forum_hasModerator_Person${NEO4J_CSV_POSTFIX}" \
--relationships=HAS_TAG="/import/dynamic/Forum_hasTag_Tag${NEO4J_CSV_POSTFIX}" \
--relationships=HAS_INTEREST="/import/dynamic/Person_hasInterest_Tag${NEO4J_CSV_POSTFIX}" \
--relationships=IS_LOCATED_IN="/import/dynamic/Person_isLocatedIn_City${NEO4J_CSV_POSTFIX}" \
--relationships=KNOWS="/import/dynamic/Person_knows_Person${NEO4J_CSV_POSTFIX}" \
--relationships=LIKES="/import/dynamic/Person_likes_Comment${NEO4J_CSV_POSTFIX}" \
--relationships=LIKES="/import/dynamic/Person_likes_Post${NEO4J_CSV_POSTFIX}" \
--relationships=HAS_CREATOR="/import/dynamic/Post_hasCreator_Person${NEO4J_CSV_POSTFIX}" \
--relationships=HAS_TAG="/import/dynamic/Comment_hasTag_Tag${NEO4J_CSV_POSTFIX}" \
--relationships=HAS_TAG="/import/dynamic/Post_hasTag_Tag${NEO4J_CSV_POSTFIX}" \
--relationships=IS_LOCATED_IN="/import/dynamic/Post_isLocatedIn_Country${NEO4J_CSV_POSTFIX}" \
--relationships=STUDY_AT="/import/dynamic/Person_studyAt_University${NEO4J_CSV_POSTFIX}" \
--relationships=WORK_AT="/import/dynamic/Person_workAt_Company${NEO4J_CSV_POSTFIX}" \
--delimiter '|'

0 comments on commit 0c90eae

Please sign in to comment.