go-jobscraper/server.go

348 lines
7.7 KiB
Go
Raw Permalink Normal View History

2024-04-26 16:13:07 +00:00
package main
import (
"context"
"database/sql"
"errors"
"fmt"
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/cache"
"github.com/gofiber/template/html/v2"
"github.com/jackc/pgx/v5/pgxpool"
_ "github.com/lib/pq"
"github.com/robfig/cron/v3"
"jobscraper/grabber"
"log"
"os"
"strconv"
"time"
)
var (
Version string
Build string
)
const fileName = "./db/jobs.db"
var (
ErrDuplicate = errors.New("record already exists")
ErrNotExists = errors.New("row not exists")
ErrUpdateFailed = errors.New("update failed")
ErrDeleteFailed = errors.New("delete failed")
)
type SQLConn struct {
db *sql.DB
}
type JobEntries struct {
ID int64 `json:"_id"`
Title string `json:"title"`
Site string `json:"site"`
Url string `json:"url"`
Id string `json:"id"`
Summary string `json:"summary"`
Company string `json:"company"`
Location string `json:"location"`
Postdate string `json:"postdate"`
Salary string `json:"salary"`
Easyapply int64 `json:"easyapply"`
Timestamp int64 `json:"timestamp"`
Applied any `json:"applied,omitempty"`
Read any `json:"read,omitempty"`
}
type Site struct {
SID int64 `json:"sid"`
Url string `json:"url"`
}
func main() {
log.Printf("GO-JOBSCRAPER v%+v build %+v\n\n", Version, Build)
connStr := os.Getenv("DBCONNECTION")
if connStr == "" {
log.Println("DBCONNECTION not set")
log.Println("Should be something like:")
log.Println("postgresql://user:password@server:5432/database?sslmode=disable")
log.Fatalln("Exiting...")
}
/*db*/
db, err := pgxpool.New(context.Background(), connStr)
if err != nil {
log.Fatal(err)
}
if err != nil {
log.Fatal(err)
} else {
log.Println("connected")
}
defer db.Close()
// url := "https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss"
c := cron.New()
c.AddFunc("CRON_TZ=Europe/London */15 7-21 * * 1-5", func() { JobWorker(db) })
c.AddFunc("CRON_TZ=Europe/London */60 22-23 * * 1-5", func() { JobWorker(db) })
c.AddFunc("CRON_TZ=Europe/London */90 0-6 * * 1-5", func() { JobWorker(db) })
c.AddFunc("CRON_TZ=Europe/London */90 0-23 * * 6,0", func() { JobWorker(db) })
2024-04-26 16:13:07 +00:00
c.Start()
engine := html.New("./dist", ".html")
app := fiber.New(fiber.Config{
Views: engine,
})
// Caching..
app.Use(cache.New(cache.Config{
Next: func(c *fiber.Ctx) bool {
return c.Query("noCache") == "true"
},
Expiration: 2 * time.Minute,
CacheControl: true,
}))
app.Get("/", indexHandler)
port := os.Getenv("PORT")
if port == "" {
port = "3600"
}
app.Static("/", "./dist")
app.Get("/jobs", func(c *fiber.Ctx) error {
return getJobs(c, db)
})
app.Get("/jobs/:id", func(c *fiber.Ctx) error {
return getJobById(c, db)
})
app.Put("/jobs/:id", func(c *fiber.Ctx) error {
return markJobAsReadById(c, db)
})
app.Put("/apply/:id", func(c *fiber.Ctx) error {
return markJobAsAppliedById(c, db)
})
2024-04-26 16:13:07 +00:00
log.Fatalln(app.Listen(fmt.Sprintf(":%v", port)))
}
func indexHandler(c *fiber.Ctx) error {
return c.Render("index", nil)
}
func JobWorker(db *pgxpool.Pool) {
log.Println("JobWorker")
sites, err := AllSites(db)
if err != nil {
log.Fatal(err)
}
log.Printf("%+v\n", sites)
// showstruct.Show(sites)
for _, url := range sites {
entries := grabber.Grab(url.Url)
InsertJobs(db, entries)
time.Sleep(5 * time.Second)
2024-04-26 16:13:07 +00:00
}
/*entries := grabber.Grab("https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss")
InsertJobs(db, entries)*/
}
func AllSites(db *pgxpool.Pool) ([]Site, error) {
log.Println("ALL Sites")
// rows, err := r.db.Query(`SELECT * from jobs`)
var sites []Site
rows, err := db.Query(context.Background(), "SELECT * from sites")
defer rows.Close()
if err != nil {
log.Fatalln(err)
return sites, err
}
for rows.Next() {
var newsite Site
if err := rows.Scan(&newsite.SID, &newsite.Url); err != nil {
return nil, err
}
sites = append(sites, newsite)
}
if err = rows.Err(); err != nil {
log.Fatal(err)
return sites, err
}
return sites, nil
}
func InsertJobs(db *pgxpool.Pool, jobs []grabber.RssItem) error {
// Rollback is safe to call even if the tx is already closed, so if
// the tx commits successfully, this is a no-op
t := time.Now()
ms := strconv.Itoa(int(t.Unix()))
for _, job := range jobs {
// showstruct.Show(job)
log.Println("Inserting")
_, err := db.Exec(context.Background(), "insert into jobs(\"_id\", title, site, url, id, summary, company, \"location\", postdate, salary,easyapply, applied, approved, \"timestamp\") VALUES(nextval('jobs__id_seq'::regclass), $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)", job.Title, "Jobserve", job.URL, job.Id, job.Summary, job.Company, job.Location, job.Date.Format("2006-01-02 15:04:05"), job.Salary, 0, 0, 1, ms)
if err != nil {
log.Println(err)
continue
}
}
return nil
}
func getJobs(c *fiber.Ctx, db *pgxpool.Pool) error {
log.Println("GetJobs")
var jobs []JobEntries
rows, err := db.Query(context.Background(), `SELECT jobs._id, jobs.title, jobs.site, jobs.company, jobs.postdate, jobs.timestamp, coalesce(applied.a, 0) as a, coalesce(read.d, 0) as d
2024-04-26 16:13:07 +00:00
FROM jobs
left join applied on applied.aid = jobs._id
left join read on read.rid = jobs._id order by jobs._id desc`)
defer rows.Close()
if err != nil {
log.Fatalln(err)
c.JSON("An error occured")
}
for rows.Next() {
var job JobEntries
if err := rows.Scan(&job.ID, &job.Title, &job.Site, &job.Company, &job.Postdate, &job.Timestamp, &job.Applied, &job.Read); err != nil {
2024-04-26 16:13:07 +00:00
return err
}
jobs = append(jobs, job)
}
if err = rows.Err(); err != nil {
log.Fatal(err)
return c.JSON(nil)
}
return c.JSON(jobs)
}
func getJobById(c *fiber.Ctx, db *pgxpool.Pool) error {
log.Println("GetJobById")
var entry JobEntries
id := c.Params("id")
log.Printf("-- %+v\n", id)
if id == "" {
log.Println("no id supplied...")
return c.SendString("{}")
}
rows, err := db.Query(context.Background(), `SELECT jobs._id, jobs.title, jobs.site, jobs.url, jobs.id, jobs.summary, jobs.company, jobs.location, jobs.postdate, jobs.salary, jobs.easyapply, jobs."timestamp", coalesce(applied.a, 0) as a FROM jobs
left join applied on applied.aid = jobs._id WHERE jobs._id = $1`, id)
defer rows.Close()
if err = rows.Err(); err != nil {
log.Fatal(err)
return c.JSON(nil)
}
for rows.Next() {
var job JobEntries
if err := rows.Scan(&job.ID, &job.Title, &job.Site, &job.Url, &job.Id, &job.Summary, &job.Company, &job.Location, &job.Postdate, &job.Salary, &job.Easyapply, &job.Timestamp, &job.Applied); err != nil {
return err
}
entry = job
}
return c.JSON(entry)
}
func markJobAsReadById(c *fiber.Ctx, db *pgxpool.Pool) error {
log.Println("markJobasReadById")
id := c.Params("id")
log.Printf("-- %+v\n", id)
t := time.Now()
if id != "" {
log.Printf("Marking entry %v as read", id)
2024-04-26 16:13:07 +00:00
r, err := db.Exec(context.Background(), `INSERT INTO public."read" ("_id", rid, d) VALUES(nextval('read__id_seq'::regclass), $1, $2);`, id, t.Unix())
if err != nil {
log.Printf("An error occured while executing query: %v", err)
}
if r.RowsAffected() != 1 {
return errors.New("No row affected...")
}
log.Println("***")
}
return c.SendStatus(200)
}
func markJobAsAppliedById(c *fiber.Ctx, db *pgxpool.Pool) error {
log.Println("markJobAsAppliedById")
id := c.Params("id")
log.Printf("-- %+v\n", id)
t := time.Now()
if id != "" {
log.Printf("Marking entry %v as applied", id)
r, err := db.Exec(context.Background(), `INSERT INTO public."applied" ("_id", aid, a) VALUES(nextval('read__id_seq'::regclass), $1, $2);`, id, t.Unix())
if err != nil {
log.Printf("An error occured while executing query: %v", err)
}
if r.RowsAffected() != 1 {
return errors.New("No row affected...")
}
log.Println("***")
}
return c.SendStatus(200)
}