235 lines
5.7 KiB
Go
235 lines
5.7 KiB
Go
package grabber
|
|
|
|
import (
|
|
"fmt"
|
|
"github.com/shomali11/util/xhashes"
|
|
|
|
"io"
|
|
"log"
|
|
"net/http"
|
|
"reflect"
|
|
"regexp"
|
|
|
|
"github.com/mmcdole/gofeed"
|
|
)
|
|
|
|
var (
|
|
Version string
|
|
Build string
|
|
locationRegex = regexp.MustCompile(`Location:<\/strong><\/td><td width="50"> <\/td><td>(.*?)(?:<\/td>)`)
|
|
rateRegex = regexp.MustCompile(`Rate:<\/strong><\/td><td width="50"> <\/td><td>(.*?)<\/td>`)
|
|
companyRegex = regexp.MustCompile(`Advertiser:<\/strong><\/td><td width="50"> <\/td><td>(.*?)<\/td>`)
|
|
// accept filter
|
|
acceptRegex = regexp.MustCompile(`(full\s?stack|front\s?end|html|html5|es6|react|angular|knockout|ember|vue|riotjs|css|javascript|typescript|golang|go|sql|node|backbone|git|gulp|jquery|express|£\dk|Data Warehouse Developer|iot|internet of things)\W`)
|
|
|
|
// reject filters
|
|
pattRegex = regexp.MustCompile(`(Simply Education|Splunk|Coordinators?|Teachers?|Technical Writers?|Data Analyst|WebLogic|WebSphere|Data Scientist|Change Managers?|T24|Test Analyst|Insight Analyst|application tester|senior tester|Salesforce|QlikView|Navision|Murex|seo|django|drupal|SHAREPOINT|per annum|ServiceNow|Test Lead|User Researcher|Service Management|\(PERM\)|£\d.K|Remedy|ITSM|Symfony|Zend|Full Time|Technical Business Analyst|BUSINESS ANALYST|AUTOMATION TESTER|FIELD TECHNICIAN|websphere administrator|Research Data Scientist)`)
|
|
engineersRegex = regexp.MustCompile(`((Support|Devops|Planning|security|Postgresql|network|sccm|test|data|imac|firewall|vmware)\s+Engineer)`)
|
|
developersRegex = regexp.MustCompile(`((Big Data|Java Server Side|Java|PHP|Graduate|Access|Oracle ADF|SHAREPOINT|Ruby on Rails|Java Software|IOS|Qlikview|c#|c\+\+|\.net|bi|go lang|Python)+\s+Developer+)`)
|
|
// architectsRegex = regexp.MustCompile(`(Java|PHP|Microsoft)+(?:\s)(?=Architect)`)
|
|
)
|
|
|
|
func Grab(url string) []RssItem {
|
|
log.Printf("Grabbing: %v", url)
|
|
|
|
// url := "https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss"
|
|
|
|
content, err := readFeed(url)
|
|
if err != nil {
|
|
log.Printf("failed to poll feed <%s>: %v", url, err)
|
|
// continue
|
|
}
|
|
|
|
items, err := extractItems(content)
|
|
if err != nil {
|
|
log.Printf("Failed to extract items from feed %s: %v", url, err)
|
|
// log.Printf("Failed to extract items from feed %s: %v", feed.url, err)
|
|
// continue
|
|
}
|
|
|
|
log.Printf("Length %v\n", len(items))
|
|
|
|
items = rejectItems(items)
|
|
|
|
log.Printf("Length %v\n", len(items))
|
|
|
|
items = acceptItems(items)
|
|
|
|
log.Printf("Length %v\n", len(items))
|
|
|
|
return items
|
|
|
|
}
|
|
|
|
func readFeed(url string) (string, error) {
|
|
resp, err := http.Get(url)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to request feed: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to read feed: %v", err)
|
|
}
|
|
|
|
return string(body), err
|
|
}
|
|
|
|
func showStruct(item any) {
|
|
val := reflect.ValueOf(item)
|
|
typ := val.Type()
|
|
|
|
for i := 0; i < val.NumField(); i++ {
|
|
field := val.Field(i)
|
|
fieldType := typ.Field(i)
|
|
|
|
fmt.Printf("Field Name: %s, Field Value: %v\n", fieldType.Name, field.Interface())
|
|
}
|
|
}
|
|
|
|
func extractItems(content string) ([]RssItem, error) {
|
|
var items []RssItem
|
|
|
|
fp := gofeed.NewParser()
|
|
|
|
feed, err := fp.ParseString(content)
|
|
if err != nil {
|
|
return items, fmt.Errorf("Failed to parse feed: %v", err)
|
|
}
|
|
|
|
for _, item := range feed.Items {
|
|
|
|
var ri RssItem
|
|
ri.Title = item.Title
|
|
if item.PublishedParsed != nil {
|
|
ri.Date = *item.PublishedParsed
|
|
}
|
|
|
|
if item.GUID != "" {
|
|
// ri.Id = item.GUID
|
|
ri.Id = xhashes.SHA1(item.GUID)
|
|
}
|
|
|
|
if item.Description != "" {
|
|
ri.Summary = item.Description
|
|
}
|
|
ri.URL = item.Link
|
|
|
|
ri = additionalProcessing(ri)
|
|
|
|
items = append(items, ri)
|
|
}
|
|
|
|
return items, nil
|
|
}
|
|
|
|
func additionalProcessing(workItem RssItem) RssItem {
|
|
|
|
if loc := locationRegex.FindStringSubmatch(workItem.Summary); loc != nil {
|
|
|
|
workItem.Location = loc[1]
|
|
}
|
|
|
|
if rate := rateRegex.FindStringSubmatch(workItem.Summary); rate != nil {
|
|
|
|
workItem.Salary = rate[1]
|
|
}
|
|
|
|
if company := companyRegex.FindStringSubmatch(workItem.Summary); company != nil {
|
|
workItem.Company = company[1]
|
|
}
|
|
|
|
return workItem
|
|
|
|
}
|
|
|
|
func acceptItems(jobitems []RssItem) []RssItem {
|
|
|
|
var items []RssItem
|
|
|
|
for _, item := range jobitems {
|
|
|
|
var accept []string
|
|
|
|
acceptable := false
|
|
|
|
if pat := acceptRegex.FindStringSubmatch(item.Title); pat != nil {
|
|
|
|
accept = append(accept, pat[0])
|
|
|
|
acceptable = true
|
|
}
|
|
|
|
if pat := acceptRegex.FindStringSubmatch(item.Summary); pat != nil {
|
|
|
|
accept = append(accept, pat[0])
|
|
acceptable = true
|
|
}
|
|
|
|
log.Printf("%v :: Accept? %v -- %v", item.Title, acceptable, accept)
|
|
|
|
if acceptable == true {
|
|
|
|
items = append(items, item)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return items
|
|
}
|
|
|
|
func rejectItems(jobitems []RssItem) []RssItem {
|
|
|
|
var items []RssItem
|
|
|
|
for _, item := range jobitems {
|
|
|
|
var rejected []string
|
|
|
|
rejectable := false
|
|
|
|
if pat := pattRegex.FindStringSubmatch(item.Title); pat != nil {
|
|
rejected = append(rejected, pat[0])
|
|
rejectable = true
|
|
}
|
|
|
|
if pat := pattRegex.FindStringSubmatch(item.Summary); pat != nil {
|
|
rejected = append(rejected, pat[0])
|
|
rejectable = true
|
|
}
|
|
|
|
if pat := engineersRegex.FindStringSubmatch(item.Title); pat != nil {
|
|
rejected = append(rejected, pat[0])
|
|
rejectable = true
|
|
}
|
|
|
|
if pat := engineersRegex.FindStringSubmatch(item.Summary); pat != nil {
|
|
rejected = append(rejected, pat[0])
|
|
rejectable = true
|
|
}
|
|
|
|
if pat := developersRegex.FindStringSubmatch(item.Title); pat != nil {
|
|
rejected = append(rejected, pat[0])
|
|
rejectable = true
|
|
}
|
|
|
|
if pat := developersRegex.FindStringSubmatch(item.Summary); pat != nil {
|
|
rejected = append(rejected, pat[0])
|
|
rejectable = true
|
|
}
|
|
|
|
log.Printf("%v :: Reject? %v -- %v", item.Title, rejectable, rejected)
|
|
|
|
if rejectable == false {
|
|
|
|
items = append(items, item)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return items
|
|
}
|