Skip to content

Update data_parse.yml #29

Update data_parse.yml

Update data_parse.yml #29

Workflow file for this run

name: manuscript_export
on:
push:
branches:
- main
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.8"
- name: upgrade pip and install python packages
run: |
python -m pip install --upgrade pip
pip install -U pip setuptools
pip install requests
pip install beautifulsoup4
pip install pandas
pip install lxml
pip install html5lib
- name: output_render
uses: jannekem/run-python-script-action@v1
with:
script: |
import json
from datetime import datetime
import pandas as pd
import requests
from bs4 import BeautifulSoup
today = str(datetime.today().strftime("%Y-%m-%d"))
def readme_parse():
# use pandas to convert url to list
url = "https://github.com/ladiesoflandsat/LOLManuscriptMonday/blob/main/README.md"
url_info = pd.read_html(url)
# convert to dataframe
df = url_info[0]
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
table = soup.find("table")
links = []
for tr in table.findAll("tr"):
trs = tr.findAll("td")
for each in trs:
try:
link = each.find("a")["href"]
links.append(link)
except Exception as error:
pass
length_links = len(links) // 3
result = [links[i::3] for i in range(length_links)]
columns_url = ["link_article", "link_handle", "link_tweet"]
df_links = pd.DataFrame(columns=columns_url)
for i in range(length_links):
df_links.loc[i] = result[0][i], result[1][i], result[2][i]
df = pd.concat([df, df_links], axis=1)
for link_col in columns_url:
df[link_col] = df[link_col].map(lambda short_link: f"{short_link}")
df.to_csv(f"LOLMonday_latest.csv", index=False, encoding="utf-8-sig")
df2 = df.to_json(orient="records", indent=2)
print(json.dumps(json.loads(df2), indent=2))
with open(f"LOLMonday_latest.json", "w") as f:
f.write(json.dumps(json.loads(df2), indent=2))
readme_parse()
- name: commit files
continue-on-error: true
run: |
today=$(date +"%Y-%m-%d")
git config --local user.email "[email protected]"
git config --local user.name "GitHub Action"
git add -A
git commit -m "updated datasets ${today} UTC" -a
- name: push changes
continue-on-error: true
uses: ad-m/[email protected]
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
branch: main