-
-
Notifications
You must be signed in to change notification settings - Fork 18
/
ingredients.py
113 lines (86 loc) · 5 KB
/
ingredients.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
import bs4
import json
import httpx
import asyncio
import aiofiles
# ----------------------------------------
# FUNCTIONS
# ----------------------------------------
async def scan(url):
matching_ingredients = []
def add_ingredient(category: str, ingredient: str):
if f"{category}/{ingredient}" not in matching_ingredients:
matching_ingredients.append(f"{category}/{ingredient}")
async def listdir_async(path):
loop = asyncio.get_event_loop()
return await loop.run_in_executor(None, os.listdir, path)
categories = await listdir_async("ingredients")
if "categories.json" in categories:
categories.remove("categories.json")
if ".DS_Store" in categories:
categories.remove(".DS_Store") # macOS only, improves compatibility
async with httpx.AsyncClient() as client:
r = await client.get(url, follow_redirects=True)
# other exceptions handled in main.py
if r.status_code != 200:
raise httpx.RequestError(f"Invalid Request Status Code ({r.status_code})")
# Run BeautifulSoup in a separate thread to avoid blocking the event loop
soup = await asyncio.to_thread(bs4.BeautifulSoup, r.text, "html.parser")
headers = r.headers
# ----------------------------------------
# INGREDIENTS SCANNER
# ----------------------------------------
for category in categories:
ingredients = await listdir_async(f"ingredients/{category}")
if ".DS_Store" in ingredients:
ingredients.remove(".DS_Store") # macOS only, improves compatibility
for ingredient in ingredients:
async with aiofiles.open(f"ingredients/{category}/{ingredient}", "r") as f:
f_content = await f.read()
ingredient_data = json.loads(f_content)
# ----- STATS -----
# increment total scans for each ingredient
# <PLACEHOLDER>
# -----------------
for tag_check in ingredient_data["checks"]["tags"]:
tags = soup.find_all(tag_check["tag"])
for tag in tags:
# check for tag attribute (value is None)
if tag_check["value"] is None and tag.get(tag_check["attribute"]) is not None:
add_ingredient(category, ingredient)
# check for tag content (attribute is not None) with wildcards
elif tag.get(tag_check["attribute"]) is not None and "*" in tag_check["value"]:
checks = tag_check["value"].split("*")
successful_checks = 0
for check in checks:
if check in tag.get(tag_check["attribute"]):
successful_checks += 1
if successful_checks == len(checks):
add_ingredient(category, ingredient)
# check for tag content (attribute is not None)
elif tag.get(tag_check["attribute"]) is not None and tag_check["value"] in tag.get(tag_check["attribute"]):
add_ingredient(category, ingredient)
# check for tag content (attribute is None)
elif tag_check["attribute"] is None and tag_check["value"] in tag.text:
add_ingredient(category, ingredient)
# check for <meta name="generator" content="generator name"> tag
# to enable this check, set the tag to "meta", the attribute to "generator"
elif tag_check["tag"] == "meta" and tag.get("name") == "generator":
if tag_check["value"] in tag.get("content"):
add_ingredient(category, ingredient)
# check for <meta name="platform" content="platform name"> tag
# to enable this check, set the tag to "meta", the attribute to "platform"
elif tag_check["tag"] == "meta" and tag.get("name") == "platform":
if tag_check["value"] in tag.get("content"):
add_ingredient(category, ingredient)
# TODO: Check header capitalization
for header_check in ingredient_data["checks"]["headers"]:
# check request header
if header_check["header"] in headers:
if header_check["value"] is None:
add_ingredient(category, ingredient)
elif header_check["value"] in headers[header_check["header"]]:
add_ingredient(category, ingredient)
# ----------------------------------------
return matching_ingredients