Skip to content

Commit

Permalink
Logging overhaul & cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
jpahm committed Feb 7, 2024
1 parent e274649 commit 63adbab
Show file tree
Hide file tree
Showing 18 changed files with 250 additions and 98 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ check:
gofmt -w ./..
goimports -w ./..

build: ./main/main.go
go build -o $(EXEC_NAME) ./main/main.go
build: ./main.go
go build -o $(EXEC_NAME) ./main.go

clean: $(EXEC_NAME)
rm $(EXEC_NAME)
2 changes: 1 addition & 1 deletion build.bat
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ echo[

::build
echo Building...
go build -o %EXEC_NAME% ./main/main.go
go build -o %EXEC_NAME% ./main.go
if ERRORLEVEL 1 exit /b %ERRORLEVEL% :: fail if error occurred
echo Build complete!
13 changes: 12 additions & 1 deletion main/main.go → main.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/UTDNebula/api-tools/parser"
"github.com/UTDNebula/api-tools/scrapers"
"github.com/UTDNebula/api-tools/uploader"
"github.com/UTDNebula/api-tools/utils"
)

func main() {
Expand Down Expand Up @@ -47,6 +48,9 @@ func main() {
upload := flag.Bool("upload", false, "Puts the tool into upload mode.")
replace := flag.Bool("replace", false, "Alongside -upload, specifies that uploaded data should replace existing data rather than being merged.")

// Flags for logging
verbose := flag.Bool("verbose", false, "Enables verbose logging, good for debugging purposes.")

// Parse flags
flag.Parse()

Expand All @@ -66,7 +70,14 @@ func main() {
}

defer logFile.Close()
log.SetOutput(logFile)
// Set logging output destination to a SplitWriter that writes to both the log file and stdout
log.SetOutput(utils.NewSplitWriter(logFile, os.Stdout))
// Do verbose logging if verbose flag specified
if *verbose {
log.SetFlags(log.Ltime | log.Lmicroseconds | log.Lshortfile | utils.Lverbose)
} else {
log.SetFlags(log.Ltime)
}

// Perform actions based on flags
switch {
Expand Down
2 changes: 1 addition & 1 deletion parser/gradeLoader.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ func loadGrades(csvDir string) map[string]map[string][]int {
gradeMap := make(map[string]map[string][]int)

if csvDir == "" {
log.Print("No grade data CSV directory specified. Grade data will not be included.\n")
log.Print("No grade data CSV directory specified. Grade data will not be included.")
return gradeMap
}

Expand Down
21 changes: 11 additions & 10 deletions parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ func Parse(inDir string, outDir string, csvPath string, skipValidation bool) {
// Load grade data from csv in advance
GradeMap = loadGrades(csvPath)
if len(GradeMap) != 0 {
log.Printf("Loaded grade distributions for %d semesters.\n\n", len(GradeMap))
log.Printf("Loaded grade distributions for %d semesters.", len(GradeMap))
}

// Try to load any existing profile data
Expand All @@ -51,32 +51,32 @@ func Parse(inDir string, outDir string, csvPath string, skipValidation bool) {
// Find paths of all scraped data
paths := utils.GetAllFilesWithExtension(inDir, ".html")
if !skipValidation {
log.Printf("Parsing and validating %d files...\n", len(paths))
log.Printf("Parsing and validating %d files...", len(paths))
} else {
log.Printf("Parsing %d files WITHOUT VALIDATION...\n", len(paths))
log.Printf("Parsing %d files WITHOUT VALIDATION...", len(paths))
}

// Parse all data
for _, path := range paths {
parse(path)
}

log.Printf("\nParsing complete. Created %d courses, %d sections, and %d professors.\n", len(Courses), len(Sections), len(Professors))
log.Printf("\nParsing complete. Created %d courses, %d sections, and %d professors.", len(Courses), len(Sections), len(Professors))

log.Print("\nParsing course requisites...\n")
log.Print("\nParsing course requisites...")

// Initialize matchers at runtime for requisite parsing; this is necessary to avoid circular reference errors with compile-time initialization
initMatchers()

for _, course := range Courses {
ReqParsers[course.Id]()
}
log.Print("Finished parsing course requisites!\n")
log.Print("Finished parsing course requisites!")

if !skipValidation {
log.Print("\nStarting validation stage...\n")
log.Print("\nStarting validation stage...")
validate()
log.Print("\nValidation complete!\n")
log.Print("\nValidation complete!")
}

// Make outDir if it doesn't already exist
Expand All @@ -93,7 +93,8 @@ func Parse(inDir string, outDir string, csvPath string, skipValidation bool) {

// Internal parse function
func parse(path string) {
log.Printf("Parsing %s...\n", path)

utils.VPrintf("Parsing %s...", path)

// Open data file for reading
fptr, err := os.Open(path)
Expand Down Expand Up @@ -156,5 +157,5 @@ func parse(path string) {
// Try to create the course and section based on collected info
courseRef := parseCourse(courseNum, session, rowInfo, classInfo)
parseSection(courseRef, classNum, syllabusURI, session, rowInfo, classInfo)
log.Print("Parsed!\n")
utils.VPrint("Parsed!")
}
6 changes: 3 additions & 3 deletions parser/profileLoader.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@ import (
func loadProfiles(inDir string) {
fptr, err := os.Open(fmt.Sprintf("%s/profiles.json", inDir))
if err != nil {
log.Print("Couldn't find/open profiles.json in the input directory. Skipping profile load.\n")
log.Print("Couldn't find/open profiles.json in the input directory. Skipping profile load.")
return
}

decoder := json.NewDecoder(fptr)

log.Print("Beginning profile load.\n")
log.Print("Beginning profile load.")

// Read open bracket
_, err = decoder.Token()
Expand Down Expand Up @@ -46,6 +46,6 @@ func loadProfiles(inDir string) {
panic(err)
}

log.Printf("Loaded %d profiles!\n\n", profileCount)
log.Printf("Loaded %d profiles!", profileCount)
fptr.Close()
}
18 changes: 6 additions & 12 deletions parser/requisiteParser.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ func ORMatcher(group string, subgroups []string) interface{} {
func CourseMinGradeMatcher(group string, subgroups []string) interface{} {
icn, err := findICN(subgroups[1], subgroups[2])
if err != nil {
log.Printf("WARN: %s\n", err)
log.Printf("WARN: %s", err)
return OtherMatcher(group, subgroups)
}
return schema.NewCourseRequirement(icn, subgroups[3])
Expand All @@ -103,7 +103,7 @@ func CourseMinGradeMatcher(group string, subgroups []string) interface{} {
func CourseMatcher(group string, subgroups []string) interface{} {
icn, err := findICN(subgroups[1], subgroups[2])
if err != nil {
log.Printf("WARN: %s\n", err)
log.Printf("WARN: %s", err)
return OtherMatcher(group, subgroups)
}
return schema.NewCourseRequirement(icn, "D")
Expand Down Expand Up @@ -411,7 +411,6 @@ func getReqParser(course *schema.Course, hasEnrollmentReqs bool, enrollmentReqs
if len(parsedChunks) > 0 {
*reqPtr = schema.NewCollectionRequirement("REQUISITES", len(parsedChunks), parsedChunks)
}
log.Printf("\n\n")
}
}
}
Expand Down Expand Up @@ -469,7 +468,6 @@ func joinAdjacentOthers(reqs []interface{}, joinString string) []interface{} {
if temp.Description != "" {
joinedReqs = append(joinedReqs, temp)
}
//log.Printf("JOINEDREQS ARE: %v\n", joinedReqs)
return joinedReqs
}

Expand Down Expand Up @@ -499,22 +497,18 @@ func parseGroup(grp string) interface{} {
if matches != nil {
// If an applicable matcher has been found, return the result of calling its handler
result := matcher.Handler(grp, matches)
log.Printf("'%s' -> %T\n", grp, result)
utils.VPrintf("'%s' -> %T", grp, result)
return result
}
}
// Panic if no matcher was able to be found for a given group -- this means we need to add handling for it!!!
//log.Panicf("NO MATCHER FOUND FOR GROUP '%s'\nSTACK IS: %#v\n", grp, requisiteList)
//log.Printf("NO MATCHER FOR: '%s'\n", grp)
log.Printf("'%s' -> parser.OtherRequirement\n", grp)
//var temp string
//fmt.Scanf("%s", temp)
// If the group couldn't be parsed, give up and make it an OtherRequirement
utils.VPrintf("'%s' -> parser.OtherRequirement", grp)
return *schema.NewOtherRequirement(ungroupText(grp), "")
}

// Outermost function for parsing a chunk of requisite text (potentially containing multiple nested text groups)
func parseChunk(chunk string) interface{} {
log.Printf("\nPARSING CHUNK: '%s'\n", chunk)
utils.VPrintf("\nPARSING CHUNK: '%s'", chunk)
// Extract parenthesized groups from chunk text
parseText, parseGroups := groupParens(chunk)
// Initialize the requisite list and group list
Expand Down
38 changes: 19 additions & 19 deletions parser/validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ func validate() {
}
}()

log.Printf("\nValidating courses...\n")
log.Printf("\nValidating courses...")
courseKeys := utils.GetMapKeys(Courses)
for i := 0; i < len(courseKeys)-1; i++ {
course1 := Courses[courseKeys[i]]
// Check for duplicate courses by comparing course_number, subject_prefix, and catalog_year as a compound key
for j := i + 1; j < len(courseKeys); j++ {
course2 := Courses[courseKeys[j]]
if course2.Catalog_year == course1.Catalog_year && course2.Course_number == course1.Course_number && course2.Subject_prefix == course1.Subject_prefix {
log.Printf("Duplicate course found for %s%s!\n", course1.Subject_prefix, course1.Course_number)
log.Printf("Duplicate course found for %s%s!", course1.Subject_prefix, course1.Course_number)
log.Printf("Course 1: %v\n\nCourse 2: %v", course1, course2)
log.Panic("Courses failed to validate!")
}
Expand All @@ -31,21 +31,21 @@ func validate() {
for _, sectionId := range course1.Sections {
section, exists := Sections[sectionId]
if !exists {
log.Printf("Nonexistent section reference found for %s%s!\n", course1.Subject_prefix, course1.Course_number)
log.Printf("Referenced section ID: %s\nCourse ID: %s\n", sectionId, course1.Id)
log.Printf("Nonexistent section reference found for %s%s!", course1.Subject_prefix, course1.Course_number)
log.Printf("Referenced section ID: %s\nCourse ID: %s", sectionId, course1.Id)
log.Panic("Courses failed to validate!")
}
if section.Course_reference != course1.Id {
log.Printf("Inconsistent section reference found for %s%s! The course references the section, but not vice-versa!\n", course1.Subject_prefix, course1.Course_number)
log.Printf("Referenced section ID: %s\nCourse ID: %s\nSection course reference: %s\n", sectionId, course1.Id, section.Course_reference)
log.Printf("Inconsistent section reference found for %s%s! The course references the section, but not vice-versa!", course1.Subject_prefix, course1.Course_number)
log.Printf("Referenced section ID: %s\nCourse ID: %s\nSection course reference: %s", sectionId, course1.Id, section.Course_reference)
log.Panic("Courses failed to validate!")
}
}
}
courseKeys = nil
log.Print("No invalid courses!\n\n")
log.Print("No invalid courses!")

log.Print("Validating sections...\n")
log.Print("Validating sections...")
sectionKeys := utils.GetMapKeys(Sections)
for i := 0; i < len(sectionKeys)-1; i++ {
section1 := Sections[sectionKeys[i]]
Expand All @@ -55,7 +55,7 @@ func validate() {
if section2.Section_number == section1.Section_number &&
section2.Course_reference == section1.Course_reference &&
section2.Academic_session == section1.Academic_session {
log.Print("Duplicate section found!\n")
log.Print("Duplicate section found!")
log.Printf("Section 1: %v\n\nSection 2: %v", section1, section2)
log.Panic("Sections failed to validate!")
}
Expand All @@ -64,8 +64,8 @@ func validate() {
for _, profId := range section1.Professors {
professorKey, exists := ProfessorIDMap[profId]
if !exists {
log.Printf("Nonexistent professor reference found for section ID %s!\n", section1.Id)
log.Printf("Referenced professor ID: %s\n", profId)
log.Printf("Nonexistent professor reference found for section ID %s!", section1.Id)
log.Printf("Referenced professor ID: %s", profId)
log.Panic("Sections failed to validate!")
}
profRefsSection := false
Expand All @@ -76,23 +76,23 @@ func validate() {
}
}
if !profRefsSection {
log.Printf("Inconsistent professor reference found for section ID %s! The section references the professor, but not vice-versa!\n", section1.Id)
log.Printf("Referenced professor ID: %s\n", profId)
log.Printf("Inconsistent professor reference found for section ID %s! The section references the professor, but not vice-versa!", section1.Id)
log.Printf("Referenced professor ID: %s", profId)
log.Panic("Sections failed to validate!")
}
}
// Make sure section isn't referencing a nonexistant course
_, exists := CourseIDMap[section1.Course_reference]
if !exists {
log.Printf("Nonexistent course reference found for section ID %s!\n", section1.Id)
log.Printf("Referenced course ID: %s\n", section1.Course_reference)
log.Printf("Nonexistent course reference found for section ID %s!", section1.Id)
log.Printf("Referenced course ID: %s", section1.Course_reference)
log.Panic("Sections failed to validate!")
}
}
sectionKeys = nil
log.Printf("No invalid sections!\n\n")
log.Printf("No invalid sections!")

log.Printf("Validating professors...\n")
log.Printf("Validating professors...")
profKeys := utils.GetMapKeys(Professors)
// Check for duplicate professors by comparing first_name, last_name, and sections as a compound key
for i := 0; i < len(profKeys)-1; i++ {
Expand All @@ -102,11 +102,11 @@ func validate() {
if prof2.First_name == prof1.First_name &&
prof2.Last_name == prof1.Last_name &&
prof2.Profile_uri == prof1.Profile_uri {
log.Printf("Duplicate professor found!\n")
log.Printf("Duplicate professor found!")
log.Printf("Professor 1: %v\n\nProfessor 2: %v", prof1, prof2)
log.Panic("Professors failed to validate!")
}
}
}
log.Printf("No invalid professors!\n\n")
log.Printf("No invalid professors!")
}
20 changes: 12 additions & 8 deletions scrapers/coursebook.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
/*
This file contains the code for the coursebook scraper.
*/

package scrapers

import (
Expand All @@ -18,10 +22,10 @@ import (
)

func initChromeDp() (chromedpCtx context.Context, cancelFnc context.CancelFunc) {
log.Printf("Initializing chromedp...\n")
log.Printf("Initializing chromedp...")
allocCtx, cancelFnc := chromedp.NewExecAllocator(context.Background())
chromedpCtx, _ = chromedp.NewContext(allocCtx)
log.Printf("Initialized chromedp!\n")
log.Printf("Initialized chromedp!")
return
}

Expand All @@ -36,7 +40,7 @@ func refreshToken(chromedpCtx context.Context) map[string][]string {
log.Panic("LOGIN_PASSWORD is missing from .env!")
}

log.Printf("Getting new token...\n")
utils.VPrintf("Getting new token...")
_, err := chromedp.RunResponse(chromedpCtx,
chromedp.ActionFunc(func(ctx context.Context) error {
err := network.ClearBrowserCookies().Do(ctx)
Expand All @@ -63,7 +67,7 @@ func refreshToken(chromedpCtx context.Context) map[string][]string {
for i, cookie := range cookies {
cookieStrs[i] = fmt.Sprintf("%s=%s", cookie.Name, cookie.Value)
if cookie.Name == "PTGSESSID" {
fmt.Printf("Got new token: PTGSESSID = %s\n", cookie.Value)
utils.VPrintf("Got new token: PTGSESSID = %s", cookie.Value)
gotToken = true
}
}
Expand Down Expand Up @@ -150,7 +154,7 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) {
// String builder to store accumulated course HTML data for both class levels
courseBuilder := strings.Builder{}

log.Printf("Finding sections for course prefix %s...\n", coursePrefix)
log.Printf("Finding sections for course prefix %s...", coursePrefix)

// Get courses for term and prefix, split by grad and undergrad to avoid 300 section cap
for _, clevel := range []string{"clevel_u", "clevel_g"} {
Expand Down Expand Up @@ -178,7 +182,7 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) {
for _, matchSet := range smatches {
sectionIDs = append(sectionIDs, matchSet[1])
}
log.Printf("Found %d sections for course prefix %s\n", len(sectionIDs), coursePrefix)
log.Printf("Found %d sections for course prefix %s", len(sectionIDs), coursePrefix)

// Get HTML data for all section IDs
sectionsInCoursePrefix := 0
Expand Down Expand Up @@ -211,7 +215,7 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) {
fptr.Close()

// Report success, refresh token periodically
fmt.Printf("Got section: %s\n", id)
utils.VPrintf("Got section: %s", id)
if sectionIndex%30 == 0 && sectionIndex != 0 {
// Ratelimit? What ratelimit?
coursebookHeaders = refreshToken(chromedpCtx)
Expand All @@ -220,7 +224,7 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) {
}
sectionsInCoursePrefix++
}
log.Printf("\nFinished scraping course prefix %s. Got %d sections.\n", coursePrefix, sectionsInCoursePrefix)
log.Printf("\nFinished scraping course prefix %s. Got %d sections.", coursePrefix, sectionsInCoursePrefix)
totalSections += sectionsInCoursePrefix
}
log.Printf("\nDone scraping term! Scraped a total of %d sections.", totalSections)
Expand Down
Loading

0 comments on commit 63adbab

Please sign in to comment.