diff --git a/.gitignore b/.gitignore index c53506079205f5bf98431f4e2d42218dc6e64a4a..f5080e0bf3b8a4ce1845a9484d0743c4b9cd10b7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1 @@ -node_modules -/web/dist -/cmd/cheesegull/data +/cheesegull diff --git a/cheesegull.go b/cheesegull.go index 79058077776c2ae70a619e5eef9ae972247a4576..29ec9b8e4271e2b2f483cdef09a89c9b3c5bde49 100644 --- a/cheesegull.go +++ b/cheesegull.go @@ -1,5 +1,64 @@ package main +import ( + "database/sql" + "fmt" + "os" + "strings" + "time" + + "github.com/alecthomas/kingpin" + _ "github.com/go-sql-driver/mysql" + "github.com/osuripple/cheesegull/dbmirror" + "github.com/osuripple/cheesegull/downloader" + "github.com/osuripple/cheesegull/models" + osuapi "github.com/thehowl/go-osuapi" +) + +var ( + osuAPIKey = kingpin.Flag("api-key", "osu! API key").Short('k').Envar("OSU_API_KEY").String() + osuUsername = kingpin.Flag("osu-username", "osu! username (for downloading and fetching whether a beatmap has a video)").Short('u').Envar("OSU_USERNAME").String() + osuPassword = kingpin.Flag("osu-password", "osu! password (for downloading and fetching whether a beatmap has a video)").Short('p').Envar("OSU_PASSWORD").String() + mysqlDSN = kingpin.Flag("mysql-dsn", "DSN of MySQL").Short('m').Default("root@/cheesegull").Envar("MYSQL_DSN").String() +) + +func addTimeParsing(dsn string) string { + sep := "?" + if strings.Contains(dsn, "?") { + sep = "&" + } + dsn += sep + "parseTime=true" + return dsn +} + func main() { + kingpin.Parse() + + // set up osuapi client + c := osuapi.NewClient(*osuAPIKey) + + // set up downloader + d, err := downloader.LogIn(*osuUsername, *osuPassword) + if err != nil { + fmt.Println("Can't log in into osu!:", err) + os.Exit(1) + } + dbmirror.SetHasVideo(d.HasVideo) + + // set up mysql + db, err := sql.Open("mysql", addTimeParsing(*mysqlDSN)) + if err != nil { + fmt.Println(err) + os.Exit(1) + } + + // run mysql migrations + err = models.RunMigrations(db) + if err != nil { + fmt.Println("Error running migrations", err) + } + // start running components of cheesegull + go dbmirror.StartSetUpdater(c, db) + dbmirror.DiscoverEvery(c, db, time.Minute*30, time.Second*20) } diff --git a/dbmirror/dbmirror.go b/dbmirror/dbmirror.go new file mode 100644 index 0000000000000000000000000000000000000000..db56897902e8a91f1641ca95983349470d179fe0 --- /dev/null +++ b/dbmirror/dbmirror.go @@ -0,0 +1,146 @@ +// Package dbmirror is a package to create a database which is almost exactly +// the same as osu!'s beatmap database. +package dbmirror + +import ( + "database/sql" + "fmt" + "time" + + "github.com/osuripple/cheesegull/models" + osuapi "github.com/thehowl/go-osuapi" +) + +const ( + // NewBatchEvery is the amount of time that will elapse between one batch + // of requests and another. + NewBatchEvery = time.Minute + // PerBatch is the amount of requests and updates every batch contains. + PerBatch = 80 + // SetUpdaterWorkers is the number of goroutines which should take care of + // new batches. Keep in mind that this will be the number of maximum + // concurrent connections to the osu! API. + SetUpdaterWorkers = PerBatch / 10 +) + +// hasVideo checks whether a beatmap set has a video. +var hasVideo func(set int) (bool, error) + +// SetHasVideo sets the hasVideo function to the one passed. +func SetHasVideo(f func(int) (bool, error)) { + if f == nil { + return + } + hasVideo = f +} + +func createChildrenBeatmaps(bms []osuapi.Beatmap) []models.Beatmap { + cgBms := make([]models.Beatmap, len(bms)) + for idx, bm := range bms { + cgBms[idx] = models.Beatmap{ + ID: bm.BeatmapID, + ParentSetID: bm.BeatmapSetID, + DiffName: bm.DiffName, + FileMD5: bm.FileMD5, + Mode: int(bm.Mode), + BPM: bm.BPM, + AR: float32(bm.ApproachRate), + OD: float32(bm.OverallDifficulty), + CS: float32(bm.CircleSize), + HP: float32(bm.HPDrain), + TotalLength: bm.TotalLength, + HitLength: bm.HitLength, + Playcount: bm.Playcount, + Passcount: bm.Passcount, + MaxCombo: bm.MaxCombo, + DifficultyRating: bm.DifficultyRating, + } + } + return cgBms +} + +func setFromOsuAPIBeatmap(b osuapi.Beatmap) models.Set { + return models.Set{ + ID: b.BeatmapSetID, + RankedStatus: int(b.Approved), + ApprovedDate: time.Time(b.ApprovedDate), + LastUpdate: time.Time(b.LastUpdate), + LastChecked: time.Now(), + Artist: b.Artist, + Title: b.Title, + Creator: b.Creator, + Source: b.Source, + Tags: b.Tags, + Genre: int(b.Genre), + Language: int(b.Language), + Favourites: b.FavouriteCount, + } +} + +func updateSet(c *osuapi.Client, db *sql.DB, set models.Set) error { + bms, err := c.GetBeatmaps(osuapi.GetBeatmapsOpts{ + BeatmapSetID: set.ID, + }) + if err != nil { + return err + } + if len(bms) == 0 { + // set has been deleted from osu!, so we do the same thing + return models.DeleteSet(db, set.ID) + } + + // create the new set based on the information we can obtain from the + // first beatmap's information + var x = bms[0] + updated := !time.Time(x.LastUpdate).Equal(set.LastUpdate) + set = setFromOsuAPIBeatmap(x) + set.ChildrenBeatmaps = createChildrenBeatmaps(bms) + if updated { + // if it has been updated, video might have been added or removed + // so we need to check for it + set.HasVideo, err = hasVideo(x.BeatmapSetID) + if err != nil { + return err + } + } + + return models.CreateSet(db, set) +} + +// By making the buffer the same size of the batch, we can be sure that all +// sets from the previous batch will have completed by the time we finish +// pushing all the beatmaps to the queue. +var setQueue = make(chan models.Set, PerBatch) + +// setUpdater is a function to be run as a goroutine, that receives sets +// from setQueue and brings the information in the database up-to-date for that +// set. +func setUpdater(c *osuapi.Client, db *sql.DB) { + for set := range setQueue { + err := updateSet(c, db, set) + if err != nil { + fmt.Printf("Error while updating set %d: %v\n", set.ID, err) + } + } +} + +// StartSetUpdater does batch updates for the beatmaps in the database, +// employing goroutines to fetch the data from the osu! API and then write it to +// the database. +func StartSetUpdater(c *osuapi.Client, db *sql.DB) { + for i := 0; i < SetUpdaterWorkers; i++ { + go setUpdater(c, db) + } + for { + sets, err := models.FetchSetsForBatchUpdate(db, PerBatch) + if err != nil { + fmt.Println("Error while fetching sets:", err) + time.Sleep(NewBatchEvery) + continue + } + for _, set := range sets { + setQueue <- set + } + time.Sleep(NewBatchEvery) + } +} diff --git a/dbmirror/discover.go b/dbmirror/discover.go new file mode 100644 index 0000000000000000000000000000000000000000..f4419e8c385c5c2a04f8411527e40811e27a8e85 --- /dev/null +++ b/dbmirror/discover.go @@ -0,0 +1,66 @@ +package dbmirror + +import ( + "database/sql" + "fmt" + "time" + + "github.com/osuripple/cheesegull/models" + osuapi "github.com/thehowl/go-osuapi" +) + +// Discover discovers new beatmaps in the osu! database and adds them. +func Discover(c *osuapi.Client, db *sql.DB) error { + id, err := models.BiggestSetID(db) + if err != nil { + return err + } + // failedAttempts is the number of consecutive failed attempts at fetching a + // beatmap (by 'failed', in this case we mean exclusively when a request to + // get_beatmaps returns no beatmaps) + failedAttempts := 0 + for failedAttempts < 4096 { + id++ + bms, err := c.GetBeatmaps(osuapi.GetBeatmapsOpts{ + BeatmapSetID: id, + }) + if err != nil { + return err + } + if len(bms) == 0 { + failedAttempts++ + continue + } + failedAttempts = 0 + + set := setFromOsuAPIBeatmap(bms[0]) + set.ChildrenBeatmaps = createChildrenBeatmaps(bms) + set.HasVideo, err = hasVideo(bms[0].BeatmapSetID) + if err != nil { + return err + } + + err = models.CreateSet(db, set) + if err != nil { + return err + } + } + + return nil +} + +// DiscoverEvery runs Discover and waits for it to finish. If Discover returns +// an error, then it will wait errorWait before running Discover again. If +// Discover doesn't return any error, then it will wait successWait before +// running Discover again. +func DiscoverEvery(c *osuapi.Client, db *sql.DB, successWait, errorWait time.Duration) { + for { + err := Discover(c, db) + if err == nil { + time.Sleep(successWait) + } else { + fmt.Println("An error occurred while discovering beatmaps:", err) + time.Sleep(errorWait) + } + } +} diff --git a/downloader/downloader.go b/downloader/downloader.go index 7cb0f8e0c4cda311e9680645cef8fdf261bb6ba5..f46ed55a134690550d3d25b6828bc647687b1791 100644 --- a/downloader/downloader.go +++ b/downloader/downloader.go @@ -44,24 +44,32 @@ func LogIn(username, password string) (*Client, error) { // osu! website. type Client http.Client -// Download downloads a beatmap from the osu! website. -// First reader is beatmap with video. -// Second reader is beatmap without video. -// If video is not in the beatmap, second reader will be nil and first reader -// will be beatmap without video. -func (c *Client) Download(setID int) (io.ReadCloser, io.ReadCloser, error) { +// HasVideo checks whether a beatmap has a video. +func (c *Client) HasVideo(setID int) (bool, error) { h := (*http.Client)(c) page, err := h.Get(fmt.Sprintf("https://osu.ppy.sh/s/%d", setID)) if err != nil { - return nil, nil, err + return false, err } defer page.Body.Close() - pageData, err := ioutil.ReadAll(page.Body) + body, err := ioutil.ReadAll(page.Body) + if err != nil { + return false, err + } + return bytes.Contains(body, []byte(fmt.Sprintf(`href="/d/%dn"`, setID))), nil +} + +// Download downloads a beatmap from the osu! website. +// First reader is beatmap with video. +// Second reader is beatmap without video. +// If video is not in the beatmap, second reader will be nil and first reader +// will be beatmap without video. +func (c *Client) Download(setID int) (io.ReadCloser, io.ReadCloser, error) { + hasVideo, err := c.HasVideo(setID) if err != nil { return nil, nil, err } - hasVideo := bytes.Contains(pageData, []byte(fmt.Sprintf(`href="/d/%dn"`, setID))) if hasVideo { r1, err := c.getReader(strconv.Itoa(setID)) diff --git a/models/beatmap.go b/models/beatmap.go new file mode 100644 index 0000000000000000000000000000000000000000..aa9e39aa92d8fc386b2efa6e23f907267a576fc0 --- /dev/null +++ b/models/beatmap.go @@ -0,0 +1,59 @@ +package models + +import "database/sql" + +// Beatmap represents a single beatmap (difficulty) on osu!. +type Beatmap struct { + ID int `json:"BeatmapID"` + ParentSetID int + DiffName string + FileMD5 string + Mode int + BPM float64 + AR float32 + OD float32 + CS float32 + HP float32 + TotalLength int + HitLength int + Playcount int + Passcount int + MaxCombo int + DifficultyRating float64 +} + +// CreateBeatmaps adds beatmaps in the database. +func CreateBeatmaps(db *sql.DB, bms ...Beatmap) error { + if len(bms) == 0 { + return nil + } + + q := ` +INSERT INTO beatmaps( + id, parent_set_id, diff_name, mode, bpm, + ar, od, cs, hp, total_length, hit_length, + playcount, passcount, max_combo, difficulty_rating +) +VALUES ` + const valuePlaceholder = `( + ?, ?, ?, ?, ?, + ?, ?, ?, ?, ?, ?, + ?, ?, ?, ? + )` + + args := make([]interface{}, 0, 15*4) + for idx, bm := range bms { + if idx != 0 { + q += ", " + } + q += valuePlaceholder + args = append(args, + bm.ID, bm.ParentSetID, bm.DiffName, bm.Mode, bm.BPM, + bm.AR, bm.OD, bm.CS, bm.HP, bm.TotalLength, bm.HitLength, + bm.Playcount, bm.Passcount, bm.MaxCombo, bm.DifficultyRating, + ) + } + + _, err := db.Exec(q, args...) + return err +} diff --git a/models/migrations.go b/models/migrations.go new file mode 100644 index 0000000000000000000000000000000000000000..228567e67955c6584622ff54307088274a6cd40c --- /dev/null +++ b/models/migrations.go @@ -0,0 +1,47 @@ +// THIS FILE HAS BEEN AUTOMATICALLY GENERATED +// To re-generate it, run "go generate" in the models folder. + +package models + +var migrations = [...]string{ + `CREATE TABLE sets( + id INT NOT NULL, + ranked_status TINYINT NOT NULL, + approved_date DATETIME NOT NULL, + last_update DATETIME NOT NULL, + last_checked DATETIME NOT NULL, + artist VARCHAR(1000) NOT NULL, + title VARCHAR(1000) NOT NULL, + creator VARCHAR(1000) NOT NULL, + source VARCHAR(1000) NOT NULL, + tags VARCHAR(1000) NOT NULL, + has_video TINYINT NOT NULL, + genre TINYINT NOT NULL, + language TINYINT NOT NULL, + favourites INT NOT NULL, + set_modes TINYINT NOT NULL, + PRIMARY KEY(id) +); +`, + `CREATE TABLE beatmaps( + id INT NOT NULL, + parent_set_id INT NOT NULL, + diff_name VARCHAR(1000) NOT NULL, + mode INT NOT NULL, + bpm DECIMAL(10, 4) NOT NULL, + ar DECIMAL(4, 2) NOT NULL, + od DECIMAL(4, 2) NOT NULL, + cs DECIMAL(4, 2) NOT NULL, + hp DECIMAL(4, 2) NOT NULL, + total_length INT NOT NULL, + hit_length INT NOT NULL, + playcount INT NOT NULL, + passcount INT NOT NULL, + max_combo INT NOT NULL, + difficulty_rating INT NOT NULL, + PRIMARY KEY(id), + FOREIGN KEY (parent_set_id) REFERENCES sets(id) + ON DELETE CASCADE + ON UPDATE CASCADE +);`, +} diff --git a/models/migrations/0001.sql b/models/migrations/0001.sql new file mode 100644 index 0000000000000000000000000000000000000000..c9801d363ac716c4cb66c7516ef9c8e8cbd9a886 --- /dev/null +++ b/models/migrations/0001.sql @@ -0,0 +1,18 @@ +CREATE TABLE sets( + id INT NOT NULL, + ranked_status TINYINT NOT NULL, + approved_date DATETIME NOT NULL, + last_update DATETIME NOT NULL, + last_checked DATETIME NOT NULL, + artist VARCHAR(1000) NOT NULL, + title VARCHAR(1000) NOT NULL, + creator VARCHAR(1000) NOT NULL, + source VARCHAR(1000) NOT NULL, + tags VARCHAR(1000) NOT NULL, + has_video TINYINT NOT NULL, + genre TINYINT NOT NULL, + language TINYINT NOT NULL, + favourites INT NOT NULL, + set_modes TINYINT NOT NULL, + PRIMARY KEY(id) +); diff --git a/models/migrations/0002.sql b/models/migrations/0002.sql new file mode 100644 index 0000000000000000000000000000000000000000..57c3722a9e15231146c6cc57e5acf77d1df27ddf --- /dev/null +++ b/models/migrations/0002.sql @@ -0,0 +1,21 @@ +CREATE TABLE beatmaps( + id INT NOT NULL, + parent_set_id INT NOT NULL, + diff_name VARCHAR(1000) NOT NULL, + mode INT NOT NULL, + bpm DECIMAL(10, 4) NOT NULL, + ar DECIMAL(4, 2) NOT NULL, + od DECIMAL(4, 2) NOT NULL, + cs DECIMAL(4, 2) NOT NULL, + hp DECIMAL(4, 2) NOT NULL, + total_length INT NOT NULL, + hit_length INT NOT NULL, + playcount INT NOT NULL, + passcount INT NOT NULL, + max_combo INT NOT NULL, + difficulty_rating INT NOT NULL, + PRIMARY KEY(id), + FOREIGN KEY (parent_set_id) REFERENCES sets(id) + ON DELETE CASCADE + ON UPDATE CASCADE +); \ No newline at end of file diff --git a/models/migrations_gen.go b/models/migrations_gen.go new file mode 100644 index 0000000000000000000000000000000000000000..c8bcb80fbb4e96d179ec73da7e7de0acb595e46a --- /dev/null +++ b/models/migrations_gen.go @@ -0,0 +1,60 @@ +// +build ignore + +package main + +import ( + "fmt" + "io" + "io/ioutil" + "os" + "strings" +) + +const fileHeader = `// THIS FILE HAS BEEN AUTOMATICALLY GENERATED +// To re-generate it, run "go generate" in the models folder. + +package models + +var migrations = [...]string{ +` + +func main() { + // ReadDir gets all the files in the directory and then sorts them + // alphabetically - thus we can be sure 0000 will come first and 0001 will + // come afterwards. + files, err := ioutil.ReadDir("migrations") + check(err) + + out, err := os.Create("migrations.go") + check(err) + + _, err = out.WriteString(fileHeader) + check(err) + + for _, file := range files { + if !strings.HasSuffix(file.Name(), ".sql") || file.IsDir() { + continue + } + f, err := os.Open("migrations/" + file.Name()) + check(err) + + out.WriteString("\t`") + _, err = io.Copy(out, f) + check(err) + out.WriteString("`,\n") + + f.Close() + } + + _, err = out.WriteString("}\n") + check(err) + + check(out.Close()) +} + +func check(err error) { + if err != nil { + fmt.Fprintln(os.Stdout, err) + os.Exit(1) + } +} diff --git a/models/models.go b/models/models.go new file mode 100644 index 0000000000000000000000000000000000000000..ef9b0e5b43b429793d09900e1a4e0eb383329265 --- /dev/null +++ b/models/models.go @@ -0,0 +1,51 @@ +// Package models contains everything that is needed to interface to the +// database CheeseGull is using. +package models + +import ( + "database/sql" +) + +//go:generate go run migrations_gen.go + +// RunMigrations brings the database up to date following the migrations. +func RunMigrations(db *sql.DB) error { + var version int + var _b []byte + err := db.QueryRow("SHOW TABLES LIKE 'db_version'").Scan(&_b) + switch err { + case nil: + // fetch version from db + err = db.QueryRow("SELECT version FROM db_version").Scan(&version) + if err != nil { + return err + } + case sql.ErrNoRows: + _, err = db.Exec("CREATE TABLE db_version(version INT NOT NULL)") + if err != nil { + return err + } + _, err = db.Exec("INSERT INTO db_version(version) VALUES ('-1')") + if err != nil { + return err + } + version = -1 + default: + return err + } + + for { + version++ + if version >= len(migrations) { + version-- + db.Exec("UPDATE db_version SET version = ?", version) + return nil + } + + s := migrations[version] + _, err = db.Exec(s) + if err != nil { + return err + } + } +} diff --git a/models/set.go b/models/set.go new file mode 100644 index 0000000000000000000000000000000000000000..0d4522d950a26b8f4634c1bac2f380373b8f99a7 --- /dev/null +++ b/models/set.go @@ -0,0 +1,132 @@ +package models + +import ( + "database/sql" + "fmt" + "time" +) + +// Set represents a set of beatmaps usually sharing the same song. +type Set struct { + ID int `json:"SetID"` + ChildrenBeatmaps []Beatmap + RankedStatus int + ApprovedDate time.Time + LastUpdate time.Time + LastChecked time.Time + Artist string + Title string + Creator string + Source string + Tags string + HasVideo bool + Genre int + Language int + Favourites int +} + +// FetchSetsForBatchUpdate fetches limit sets from the database, sorted by +// LastChecked (asc, older first). Results are further filtered: if the set's +// RankedStatus is 3, 0 or -1 (qualified, pending or WIP), at least 30 minutes +// must have passed from LastChecked. For all other statuses, at least 4 days +// must have passed from LastChecked. +func FetchSetsForBatchUpdate(db *sql.DB, limit int) ([]Set, error) { + n := time.Now() + rows, err := db.Query(` +SELECT + id, ranked_status, approved_date, last_update, last_checked, + artist, title, creator, source, tags, has_video, genre, + language, favourites +FROM sets +WHERE (ranked_status IN (3, 0, -1) AND last_checked <= ?) OR last_checked <= ? +ORDER BY last_checked ASC +LIMIT ?`, + n.Add(-time.Minute*30), + n.Add(-time.Hour*24*4), + limit, + ) + if err != nil { + return nil, err + } + + sets := make([]Set, 0, limit) + for rows.Next() { + var s Set + err = rows.Scan( + &s.ID, &s.RankedStatus, &s.ApprovedDate, &s.LastUpdate, &s.LastChecked, + &s.Artist, &s.Title, &s.Creator, &s.Source, &s.Tags, &s.HasVideo, &s.Genre, + &s.Language, &s.Favourites, + ) + if err != nil { + return nil, err + } + sets = append(sets, s) + } + + return sets, nil +} + +// DeleteSet deletes a set from the database, removing also its children +// beatmaps. +func DeleteSet(db *sql.DB, set int) error { + _, err := db.Exec("DELETE FROM beatmaps WHERE parent_set_id = ?", set) + if err != nil { + return err + } + _, err = db.Exec("DELETE FROM sets WHERE id = ?", set) + return err +} + +// createSetModes will generate the correct value for setModes, which is +// basically a bitwise enum containing the modes that are on a certain set. +func createSetModes(bms []Beatmap) (setModes uint8) { + for _, bm := range bms { + m := bm.Mode + if m < 0 || m >= 4 { + continue + } + setModes |= 1 << uint(m) + } + return setModes +} + +// CreateSet creates (and updates) a beatmap set in the database. +func CreateSet(db *sql.DB, s Set) error { + fmt.Println("CreateSet", s.ID) + // delete existing set, if any. + // This is mostly a lazy way to make sure updates work as well. + err := DeleteSet(db, s.ID) + if err != nil { + return err + } + + _, err = db.Exec(` +INSERT INTO sets( + id, ranked_status, approved_date, last_update, last_checked, + artist, title, creator, source, tags, has_video, genre, + language, favourites, set_modes +) +VALUES ( + ?, ?, ?, ?, ?, + ?, ?, ?, ?, ?, ?, ?, + ?, ?, ? +)`, s.ID, s.RankedStatus, s.ApprovedDate, s.LastUpdate, s.LastChecked, + s.Artist, s.Title, s.Creator, s.Source, s.Tags, s.HasVideo, s.Genre, + s.Language, s.Favourites, createSetModes(s.ChildrenBeatmaps)) + if err != nil { + return err + } + + return CreateBeatmaps(db, s.ChildrenBeatmaps...) +} + +// BiggestSetID retrieves the biggest set ID in the sets database. This is used +// by discovery to have a starting point from which to discover new beatmaps. +func BiggestSetID(db *sql.DB) (int, error) { + var i int + err := db.QueryRow("SELECT id FROM sets ORDER BY id DESC LIMIT 1").Scan(&i) + if err == sql.ErrNoRows { + return 0, nil + } + return i, err +}