This commit is contained in:
Martin Donnelly 2024-04-26 17:13:07 +01:00
commit be500fde33
17 changed files with 1105 additions and 0 deletions

181
.gitignore vendored Normal file
View File

@ -0,0 +1,181 @@
### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# AWS User-specific
.idea/**/aws.xml
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# SonarLint plugin
.idea/sonarlint/
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser
### Go template
# If you prefer the allow list template instead of the deny list, see community template:
# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
#
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib
# Test binary, built with `go test -c`
*.test
# Output of the go coverage tool, specifically when used with LiteIDE
*.out
# Dependency directories (remove the comment below to include it)
# vendor/
# Go workspace file
go.work
### GoLand template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# AWS User-specific
.idea/**/aws.xml
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# SonarLint plugin
.idea/sonarlint/
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser

BIN
db/jobs.db Normal file

Binary file not shown.

6
dist/build/bundle.css vendored Normal file

File diff suppressed because one or more lines are too long

18
dist/build/bundle.css.map vendored Normal file

File diff suppressed because one or more lines are too long

1
dist/build/bundle.js vendored Normal file

File diff suppressed because one or more lines are too long

BIN
dist/favicon.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

25
dist/gfx/star.svg vendored Normal file
View File

@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8"?>
<svg viewBox="0 0 282.3 270.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<defs>
<linearGradient id="a" gradientUnits="userSpaceOnUse" x1="338.9" x2="322.8" y1="292.1" y2="203.1">
<stop offset="0" stop-color="#e6d82f"/>
<stop offset="1" stop-color="#faf26f"/>
</linearGradient>
<linearGradient id="b" gradientUnits="userSpaceOnUse" x1="391.9" x2="341.2" y1="310.6" y2="310.6">
<stop offset="0" stop-color="#d2c308"/>
<stop offset="1" stop-color="#e8da34"/>
</linearGradient>
</defs>
<g fill-rule="evenodd" transform="translate(-198.9 -145.7)">
<path d="m340.1 292.8l-.4-140.5-32.16 98.6z" fill="url(#a)"/>
<path d="m340.2 152.4l31.45 97.64-31.04 42.7z" fill="#d2c308"/>
<path d="m341.1 292.4l133.8-42.39-103.6-.01z" fill="#faf26f"/>
<path d="m341 293.1l50.85 17.34 83.1-60.32z" fill="#a29910"/>
<path d="m289.1 310.6l50.32-17.12-134.2-43.5z" fill="#d2c308"/>
<path d="m339.6 293.8l-82.67 116.1 32.22-99.1z" fill="#faf26f"/>
<path d="m341.1 293.6l50.78 16.82 31.72 97.69z" fill="url(#b)"/>
<path d="m340.2 348.6l83.08 59.65-82.56-114.4z" fill="#a29910"/>
<path d="m306.9 251.2l33.56 42.72-135.6-43.9z" fill="#faf26f"/>
<path d="m340.2 348.6l-82.9 59.3 83.03-114.5z" fill="#a29910"/>
</g>
</svg>

After

Width:  |  Height:  |  Size: 1.2 KiB

0
dist/global.css vendored Normal file
View File

18
dist/index.html vendored Normal file
View File

@ -0,0 +1,18 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset='utf-8'>
<meta name='viewport' content='width=device-width,initial-scale=1'>
<title>Jobs Server</title>
<link rel='icon' type='image/png' href='/favicon.png'>
<link rel='stylesheet' href='/global.css'>
<link rel='stylesheet' href='/build/bundle.css'>
<script defer src='/build/bundle.js'></script>
</head>
<body>
</body>
</html>

16
feeds.md Normal file
View File

@ -0,0 +1,16 @@
```text
https://www.jobserve.com/MySearch/ED1708BF42EF3513.rss
https://www.jobserve.com/MySearch/6EFA569DF8008ED5.rss
https://www.jobserve.com/MySearch/ABB2BB88FFA9ECF5.rss
https://www.jobserve.com/MySearch/2BBB5238D7B0A92B.rss
https://www.jobserve.com/MySearch/7DC15EBEFCADFABC.rss
https://www.jobserve.com/MySearch/E84953EAAD79B7BB.rss
https://www.jobserve.com/MySearch/E84953EAAD79B7BB.rss
https://www.jobserve.com/MySearch/3ACE55BA06CD0783.rss
https://www.jobserve.com/MySearch/BAEBF3BDF82B8FEF.rss
https://www.jobserve.com/MySearch/70DBC913965F9F95.rss
```

44
go.mod Normal file
View File

@ -0,0 +1,44 @@
module jobscraper
go 1.22.0
require (
github.com/gofiber/fiber/v2 v2.52.4
github.com/gofiber/template/html/v2 v2.1.1
github.com/jackc/pgx/v5 v5.5.5
github.com/lib/pq v1.10.9
github.com/mmcdole/gofeed v1.3.0
github.com/robfig/cron/v3 v3.0.0
github.com/shomali11/util v0.0.0-20220717175126-f0771b70947f
)
require (
github.com/PuerkitoBio/goquery v1.8.0 // indirect
github.com/andybalholm/brotli v1.0.5 // indirect
github.com/andybalholm/cascadia v1.3.1 // indirect
github.com/gofiber/template v1.8.3 // indirect
github.com/gofiber/utils v1.1.0 // indirect
github.com/google/uuid v1.5.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect
github.com/jackc/puddle/v2 v2.2.1 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.17.0 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-runewidth v0.0.15 // indirect
github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/philhofer/fwd v1.1.2 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
github.com/tinylib/msgp v1.1.8 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
github.com/valyala/fasthttp v1.51.0 // indirect
github.com/valyala/tcplisten v1.0.0 // indirect
golang.org/x/crypto v0.17.0 // indirect
golang.org/x/net v0.17.0 // indirect
golang.org/x/sync v0.1.0 // indirect
golang.org/x/sys v0.15.0 // indirect
golang.org/x/text v0.14.0 // indirect
)

123
go.sum Normal file
View File

@ -0,0 +1,123 @@
github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs=
github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/gofiber/fiber/v2 v2.52.4 h1:P+T+4iK7VaqUsq2PALYEfBBo6bJZ4q3FP8cZ84EggTM=
github.com/gofiber/fiber/v2 v2.52.4/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ=
github.com/gofiber/template v1.8.3 h1:hzHdvMwMo/T2kouz2pPCA0zGiLCeMnoGsQZBTSYgZxc=
github.com/gofiber/template v1.8.3/go.mod h1:bs/2n0pSNPOkRa5VJ8zTIvedcI/lEYxzV3+YPXdBvq8=
github.com/gofiber/template/html/v2 v2.1.1 h1:QEy3O3EBkvwDthy5bXVGUseOyO6ldJoiDxlF4+MJiV8=
github.com/gofiber/template/html/v2 v2.1.1/go.mod h1:2G0GHHOUx70C1LDncoBpe4T6maQbNa4x1CVNFW0wju0=
github.com/gofiber/utils v1.1.0 h1:vdEBpn7AzIUJRhe+CiTOJdUcTg4Q9RK+pEa0KPbLdrM=
github.com/gofiber/utils v1.1.0/go.mod h1:poZpsnhBykfnY1Mc0KeEa6mSHrS3dV0+oBWyeQmb2e0=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU=
github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a h1:bbPeKD0xmW/Y25WS6cokEszi5g+S0QxI/d45PkRi7Nk=
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
github.com/jackc/pgx/v5 v5.5.5 h1:amBjrZVmksIdNjxGW/IiIMzxMKZFelXbUoPNb+8sjQw=
github.com/jackc/pgx/v5 v5.5.5/go.mod h1:ez9gk+OAat140fv9ErkZDYFWmXLfV+++K0uAOiwgm1A=
github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/klauspost/compress v1.17.0 h1:Rnbp4K9EjcDuVuHtd0dgA4qNuv9yKDYKK1ulpJwgrqM=
github.com/klauspost/compress v1.17.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mmcdole/gofeed v1.3.0 h1:5yn+HeqlcvjMeAI4gu6T+crm7d0anY85+M+v6fIFNG4=
github.com/mmcdole/gofeed v1.3.0/go.mod h1:9TGv2LcJhdXePDzxiuMnukhV2/zb6VtnZt1mS+SjkLE=
github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23 h1:Zr92CAlFhy2gL+V1F+EyIuzbQNbSgP4xhTODZtrXUtk=
github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23/go.mod h1:v+25+lT2ViuQ7mVxcncQ8ch1URund48oH+jhjiwEgS8=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/philhofer/fwd v1.1.2 h1:bnDivRJ1EWPjUIRXV5KfORO897HTbpFAQddBdE8t7Gw=
github.com/philhofer/fwd v1.1.2/go.mod h1:qkPdfjR2SIEbspLqpe1tO4n5yICnr2DY7mqEx2tUTP0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/robfig/cron/v3 v3.0.0 h1:kQ6Cb7aHOHTSzNVNEhmp8EcWKLb4CbiMW9h9VyIhO4E=
github.com/robfig/cron/v3 v3.0.0/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
github.com/shomali11/parallelizer v0.0.0-20220717173222-a6776fbf40a9/go.mod h1:QsLM53l8gzX0sQbOjVir85bzOUucuJEF8JgE39wD7w0=
github.com/shomali11/util v0.0.0-20220717175126-f0771b70947f h1:OM0LVaVycWC+/j5Qra7USyCg2sc+shg3KwygAA+pYvA=
github.com/shomali11/util v0.0.0-20220717175126-f0771b70947f/go.mod h1:9POpw/crb6YrseaYBOwraL0lAYy0aOW79eU3bvMxgbM=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.1/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0=
github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw=
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasthttp v1.51.0 h1:8b30A5JlZ6C7AS81RsWjYMQmrZG6feChmgAolCl1SqA=
github.com/valyala/fasthttp v1.51.0/go.mod h1:oI2XroL+lI7vdXyYoQk03bXBThfFl2cVdIA3Xl7cH8g=
github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.17.0 h1:r8bRNjWL3GshPW3gkd+RpvzWrZAwPS49OmTGZ/uhM4k=
golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc=
golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@ -0,0 +1,15 @@
package grabber
import "time"
type RssItem struct {
Title string
URL string
Date time.Time
Link string
Summary string
Id string
Company string
Location string
Salary string
}

234
grabber/grabber.go Normal file
View File

@ -0,0 +1,234 @@
package grabber
import (
"fmt"
"github.com/shomali11/util/xhashes"
"io"
"log"
"net/http"
"reflect"
"regexp"
"github.com/mmcdole/gofeed"
)
var (
Version string
Build string
locationRegex = regexp.MustCompile(`Location:<\/strong><\/td><td width="50">&nbsp;<\/td><td>(.*?)(?:<\/td>)`)
rateRegex = regexp.MustCompile(`Rate:<\/strong><\/td><td width="50">&nbsp;<\/td><td>(.*?)<\/td>`)
companyRegex = regexp.MustCompile(`Advertiser:<\/strong><\/td><td width="50">&nbsp;<\/td><td>(.*?)<\/td>`)
// accept filter
acceptRegex = regexp.MustCompile(`(full\s?stack|front\s?end|html|html5|es6|react|angular|knockout|ember|vue|riotjs|css|javascript|typescript|golang|go|sql|node|backbone|git|gulp|jquery|express|£\dk|Data Warehouse Developer|iot|internet of things)\W`)
// reject filters
pattRegex = regexp.MustCompile(`(Simply Education|Splunk|Coordinators?|Teachers?|Technical Writers?|Data Analyst|WebLogic|WebSphere|Data Scientist|Change Managers?|T24|Test Analyst|Insight Analyst|application tester|senior tester|Salesforce|QlikView|Navision|Murex|seo|django|drupal|SHAREPOINT|per annum|ServiceNow|Test Lead|User Researcher|Service Management|\(PERM\)|£\d.K|Remedy|ITSM|Symfony|Zend|Full Time|Technical Business Analyst|BUSINESS ANALYST|AUTOMATION TESTER|FIELD TECHNICIAN|websphere administrator|Research Data Scientist)`)
engineersRegex = regexp.MustCompile(`((Support|Devops|Planning|security|Postgresql|network|sccm|test|data|imac|firewall|vmware)\s+Engineer)`)
developersRegex = regexp.MustCompile(`((Big Data|Java Server Side|Java|PHP|Graduate|Access|Oracle ADF|SHAREPOINT|Ruby on Rails|Java Software|IOS|Qlikview|c#|c\+\+|\.net|bi|go lang|Python)+\s+Developer+)`)
// architectsRegex = regexp.MustCompile(`(Java|PHP|Microsoft)+(?:\s)(?=Architect)`)
)
func Grab(url string) []RssItem {
log.Printf("Grabbing: %v", url)
// url := "https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss"
content, err := readFeed(url)
if err != nil {
log.Printf("failed to poll feed <%s>: %v", url, err)
// continue
}
items, err := extractItems(content)
if err != nil {
log.Printf("Failed to extract items from feed %s: %v", url, err)
// log.Printf("Failed to extract items from feed %s: %v", feed.url, err)
// continue
}
log.Printf("Length %v\n", len(items))
items = rejectItems(items)
log.Printf("Length %v\n", len(items))
items = acceptItems(items)
log.Printf("Length %v\n", len(items))
return items
}
func readFeed(url string) (string, error) {
resp, err := http.Get(url)
if err != nil {
return "", fmt.Errorf("failed to request feed: %v", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("failed to read feed: %v", err)
}
return string(body), err
}
func showStruct(item any) {
val := reflect.ValueOf(item)
typ := val.Type()
for i := 0; i < val.NumField(); i++ {
field := val.Field(i)
fieldType := typ.Field(i)
fmt.Printf("Field Name: %s, Field Value: %v\n", fieldType.Name, field.Interface())
}
}
func extractItems(content string) ([]RssItem, error) {
var items []RssItem
fp := gofeed.NewParser()
feed, err := fp.ParseString(content)
if err != nil {
return items, fmt.Errorf("Failed to parse feed: %v", err)
}
for _, item := range feed.Items {
var ri RssItem
ri.Title = item.Title
if item.PublishedParsed != nil {
ri.Date = *item.PublishedParsed
}
if item.GUID != "" {
// ri.Id = item.GUID
ri.Id = xhashes.SHA1(item.GUID)
}
if item.Description != "" {
ri.Summary = item.Description
}
ri.URL = item.Link
ri = additionalProcessing(ri)
items = append(items, ri)
}
return items, nil
}
func additionalProcessing(workItem RssItem) RssItem {
if loc := locationRegex.FindStringSubmatch(workItem.Summary); loc != nil {
workItem.Location = loc[1]
}
if rate := rateRegex.FindStringSubmatch(workItem.Summary); rate != nil {
workItem.Salary = rate[1]
}
if company := companyRegex.FindStringSubmatch(workItem.Summary); company != nil {
workItem.Company = company[1]
}
return workItem
}
func acceptItems(jobitems []RssItem) []RssItem {
var items []RssItem
for _, item := range jobitems {
var accept []string
acceptable := false
if pat := acceptRegex.FindStringSubmatch(item.Title); pat != nil {
accept = append(accept, pat[0])
acceptable = true
}
if pat := acceptRegex.FindStringSubmatch(item.Summary); pat != nil {
accept = append(accept, pat[0])
acceptable = true
}
log.Printf("%v :: Accept? %v -- %v", item.Title, acceptable, accept)
if acceptable == true {
items = append(items, item)
}
}
return items
}
func rejectItems(jobitems []RssItem) []RssItem {
var items []RssItem
for _, item := range jobitems {
var rejected []string
rejectable := false
if pat := pattRegex.FindStringSubmatch(item.Title); pat != nil {
rejected = append(rejected, pat[0])
rejectable = true
}
if pat := pattRegex.FindStringSubmatch(item.Summary); pat != nil {
rejected = append(rejected, pat[0])
rejectable = true
}
if pat := engineersRegex.FindStringSubmatch(item.Title); pat != nil {
rejected = append(rejected, pat[0])
rejectable = true
}
if pat := engineersRegex.FindStringSubmatch(item.Summary); pat != nil {
rejected = append(rejected, pat[0])
rejectable = true
}
if pat := developersRegex.FindStringSubmatch(item.Title); pat != nil {
rejected = append(rejected, pat[0])
rejectable = true
}
if pat := developersRegex.FindStringSubmatch(item.Summary); pat != nil {
rejected = append(rejected, pat[0])
rejectable = true
}
log.Printf("%v :: Reject? %v -- %v", item.Title, rejectable, rejected)
if rejectable == false {
items = append(items, item)
}
}
return items
}

89
notes.md Normal file
View File

@ -0,0 +1,89 @@
# Notes
- Test Link https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss
LocationRegex
```regexp
[Ll]ocation:(?:<\/span>)?\n*(.*?)&nbsp;
Location:<\/strong><\/td><td width="50">&nbsp;<\/td><td>(.*?)<\/td>
```
RateRegex
```goregexp
[Rr]ate:(?:<\/span>)?\n*(.*?)&nbsp;
Rate:<\/strong><\/td><td width="50">&nbsp;<\/td><td>(.*?)<\/td>
```
Company Regex
```goregexp
Advertiser:<\/strong><\/td><td width="50">&nbsp;<\/td><td>(.*?)<\/td>
```
Location regex
```goregexp
Location:<\/strong><\/td><td width="50">&nbsp;<\/td><td>(.*?)<\/td>
```
Image regex
```goregexp
src="https:\/\/(.+?)"
```
```sqlite
-- jobs definition
CREATE TABLE "jobs" (
"_id" INTEGER NOT NULL UNIQUE,
"title" TEXT,
"site" TEXT,
"url" TEXT,
"id" TEXT UNIQUE,
"summary" TEXT,
"company" TEXT,
"location" TEXT,
"postdate" TEXT,
"salary" TEXT,
"easyapply" INTEGER,
"timestamp" INTEGER,
PRIMARY KEY("_id" AUTOINCREMENT)
);
```
```postgresql
CREATE TABLE jobs (
_id SERIAL PRIMARY KEY,
title VARCHAR,
site VARCHAR,
url VARCHAR,
id VARCHAR UNIQUE,
summary TEXT,
company VARCHAR,
location VARCHAR,
postdate VARCHAR,
salary VARCHAR,
easyapply INTEGER,
"timestamp" INTEGER
);
```

317
server.go Normal file
View File

@ -0,0 +1,317 @@
package main
import (
"context"
"database/sql"
"errors"
"fmt"
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/cache"
"github.com/gofiber/template/html/v2"
"github.com/jackc/pgx/v5/pgxpool"
_ "github.com/lib/pq"
"github.com/robfig/cron/v3"
"jobscraper/grabber"
"log"
"os"
"strconv"
"time"
)
var (
Version string
Build string
)
const fileName = "./db/jobs.db"
var (
ErrDuplicate = errors.New("record already exists")
ErrNotExists = errors.New("row not exists")
ErrUpdateFailed = errors.New("update failed")
ErrDeleteFailed = errors.New("delete failed")
)
type SQLConn struct {
db *sql.DB
}
type JobEntries struct {
ID int64 `json:"_id"`
Title string `json:"title"`
Site string `json:"site"`
Url string `json:"url"`
Id string `json:"id"`
Summary string `json:"summary"`
Company string `json:"company"`
Location string `json:"location"`
Postdate string `json:"postdate"`
Salary string `json:"salary"`
Easyapply int64 `json:"easyapply"`
Timestamp int64 `json:"timestamp"`
Applied any `json:"applied,omitempty"`
Read any `json:"read,omitempty"`
}
type Site struct {
SID int64 `json:"sid"`
Url string `json:"url"`
}
func main() {
log.Printf("GO-JOBSCRAPER v%+v build %+v\n\n", Version, Build)
connStr := os.Getenv("DBCONNECTION")
if connStr == "" {
log.Println("DBCONNECTION not set")
log.Println("Should be something like:")
log.Println("postgresql://user:password@server:5432/database?sslmode=disable")
log.Fatalln("Exiting...")
}
/*db*/
db, err := pgxpool.New(context.Background(), connStr)
if err != nil {
log.Fatal(err)
}
if err != nil {
log.Fatal(err)
} else {
log.Println("connected")
}
defer db.Close()
// url := "https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss"
c := cron.New()
c.AddFunc("*/15 * * * *", func() { JobWorker(db) })
c.Start()
engine := html.New("./dist", ".html")
app := fiber.New(fiber.Config{
Views: engine,
})
// Caching..
app.Use(cache.New(cache.Config{
Next: func(c *fiber.Ctx) bool {
return c.Query("noCache") == "true"
},
Expiration: 2 * time.Minute,
CacheControl: true,
}))
app.Get("/", indexHandler)
port := os.Getenv("PORT")
if port == "" {
port = "3600"
}
app.Static("/", "./dist")
app.Get("/jobs", func(c *fiber.Ctx) error {
return getJobs(c, db)
})
app.Get("/jobs/:id", func(c *fiber.Ctx) error {
return getJobById(c, db)
})
app.Put("/jobs/:id", func(c *fiber.Ctx) error {
return markJobAsReadById(c, db)
})
log.Fatalln(app.Listen(fmt.Sprintf(":%v", port)))
}
func indexHandler(c *fiber.Ctx) error {
return c.Render("index", nil)
}
func JobWorker(db *pgxpool.Pool) {
log.Println("JobWorker")
sites, err := AllSites(db)
if err != nil {
log.Fatal(err)
}
log.Printf("%+v\n", sites)
// showstruct.Show(sites)
for _, url := range sites {
entries := grabber.Grab(url.Url)
InsertJobs(db, entries)
}
/*entries := grabber.Grab("https://www.jobserve.com/MySearch/F3A56475D5FD4966.rss")
InsertJobs(db, entries)*/
}
func AllSites(db *pgxpool.Pool) ([]Site, error) {
log.Println("ALL Sites")
// rows, err := r.db.Query(`SELECT * from jobs`)
var sites []Site
rows, err := db.Query(context.Background(), "SELECT * from sites")
defer rows.Close()
if err != nil {
log.Fatalln(err)
return sites, err
}
for rows.Next() {
var newsite Site
if err := rows.Scan(&newsite.SID, &newsite.Url); err != nil {
return nil, err
}
sites = append(sites, newsite)
}
if err = rows.Err(); err != nil {
log.Fatal(err)
return sites, err
}
return sites, nil
}
func InsertJobs(db *pgxpool.Pool, jobs []grabber.RssItem) error {
// Rollback is safe to call even if the tx is already closed, so if
// the tx commits successfully, this is a no-op
t := time.Now()
ms := strconv.Itoa(int(t.Unix()))
for _, job := range jobs {
// showstruct.Show(job)
log.Println("Inserting")
_, err := db.Exec(context.Background(), "insert into jobs(\"_id\", title, site, url, id, summary, company, \"location\", postdate, salary,easyapply, applied, approved, \"timestamp\") VALUES(nextval('jobs__id_seq'::regclass), $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)", job.Title, "Jobserve", job.URL, job.Id, job.Summary, job.Company, job.Location, job.Date.Format("2006-01-02 15:04:05"), job.Salary, 0, 0, 1, ms)
if err != nil {
log.Println(err)
continue
}
}
return nil
}
func getJobs(c *fiber.Ctx, db *pgxpool.Pool) error {
log.Println("GetJobs")
var jobs []JobEntries
rows, err := db.Query(context.Background(), `SELECT jobs._id, jobs.title, jobs.site, jobs.company, jobs.timestamp, coalesce(applied.a, 0) as a, coalesce(read.d, 0) as d
FROM jobs
left join applied on applied.aid = jobs._id
left join read on read.rid = jobs._id order by jobs._id desc`)
defer rows.Close()
if err != nil {
log.Fatalln(err)
c.JSON("An error occured")
}
for rows.Next() {
var job JobEntries
if err := rows.Scan(&job.ID, &job.Title, &job.Site, &job.Company, &job.Timestamp, &job.Applied, &job.Read); err != nil {
return err
}
jobs = append(jobs, job)
}
if err = rows.Err(); err != nil {
log.Fatal(err)
return c.JSON(nil)
}
return c.JSON(jobs)
}
func getJobById(c *fiber.Ctx, db *pgxpool.Pool) error {
log.Println("GetJobById")
var entry JobEntries
id := c.Params("id")
log.Printf("-- %+v\n", id)
if id == "" {
log.Println("no id supplied...")
return c.SendString("{}")
}
rows, err := db.Query(context.Background(), `SELECT jobs._id, jobs.title, jobs.site, jobs.url, jobs.id, jobs.summary, jobs.company, jobs.location, jobs.postdate, jobs.salary, jobs.easyapply, jobs."timestamp", coalesce(applied.a, 0) as a FROM jobs
left join applied on applied.aid = jobs._id WHERE jobs._id = $1`, id)
defer rows.Close()
if err = rows.Err(); err != nil {
log.Fatal(err)
return c.JSON(nil)
}
for rows.Next() {
var job JobEntries
if err := rows.Scan(&job.ID, &job.Title, &job.Site, &job.Url, &job.Id, &job.Summary, &job.Company, &job.Location, &job.Postdate, &job.Salary, &job.Easyapply, &job.Timestamp, &job.Applied); err != nil {
return err
}
entry = job
}
return c.JSON(entry)
}
func markJobAsReadById(c *fiber.Ctx, db *pgxpool.Pool) error {
log.Println("markJobasReadById")
id := c.Params("id")
log.Printf("-- %+v\n", id)
t := time.Now()
if id != "" {
log.Println("Marking entry %v as read", id)
r, err := db.Exec(context.Background(), `INSERT INTO public."read" ("_id", rid, d) VALUES(nextval('read__id_seq'::regclass), $1, $2);`, id, t.Unix())
if err != nil {
log.Printf("An error occured while executing query: %v", err)
}
if r.RowsAffected() != 1 {
return errors.New("No row affected...")
}
log.Println("***")
}
return c.SendStatus(200)
}

18
showstruct/showstruct.go Normal file
View File

@ -0,0 +1,18 @@
package showstruct
import (
"fmt"
"reflect"
)
func Show(item any) {
val := reflect.ValueOf(item)
typ := val.Type()
for i := 0; i < val.NumField(); i++ {
field := val.Field(i)
fieldType := typ.Field(i)
fmt.Printf("Field : %s, Value: %v\n", fieldType.Name, field.Interface())
}
}