Compare commits

20 Commits

Author SHA1 Message Date
fb4d072b15 remove first element rather than last from sent list 2024-06-20 15:35:47 -05:00
97da0d8b93 switch to crc32 method for hashing, add cache file 2024-05-26 04:45:34 -05:00
dc772fb588 add cache file 2024-05-26 04:45:04 -05:00
4dde548163 add crc library 2024-05-26 04:44:54 -05:00
35b38f61fe check for double sends 2024-05-26 04:03:01 -05:00
a52c86738c fix sending url 2024-05-26 03:45:41 -05:00
e1b69913a4 remove smtp authentication 2024-05-26 03:44:14 -05:00
0509d53e25 fix to not use example config by default 2024-05-26 03:36:59 -05:00
67449fa7ac fix time.sleep from seconds to minutes 2024-05-26 03:35:30 -05:00
fcb17394c6 add debug messages 2024-05-26 03:22:40 -05:00
f9e19fb597 fix stdout flushing 2024-05-26 03:17:10 -05:00
4135f4960e flush stdout 2024-05-26 03:15:28 -05:00
7ab0c83638 Fix dockerfile entrypoint 2024-05-26 03:03:51 -05:00
81e03e3013 fix type conversions 2024-05-26 02:58:51 -05:00
2eddb9a4a1 update gitignore 2024-05-26 02:54:07 -05:00
af6d5b08f6 Update README 2024-05-26 02:53:48 -05:00
a1f10c7655 update example to better reflect usage 2024-05-26 02:53:20 -05:00
4e9e778054 update Dockerfile for python 2024-05-26 02:52:59 -05:00
e739960035 create python version 2024-05-26 02:52:43 -05:00
b29387f9a0 cleanup golang files 2024-05-26 02:52:17 -05:00
9 changed files with 343 additions and 249 deletions

4
.gitignore vendored
View File

@@ -1,2 +1,4 @@
config.ini config.ini
.devcontainer/ .devcontainer/
.github/
cache.txt

View File

@@ -1,13 +1,10 @@
# syntax=docker/dockerfile:1 # syntax=docker/dockerfile:1
FROM golang:latest AS build FROM python:3.12-alpine
WORKDIR /app WORKDIR /app
COPY . . COPY . .
RUN go mod download
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o ./reddit_notify
FROM alpine:latest RUN pip install -r requirements.txt
WORKDIR /app
COPY --from=build /app/reddit_notify /app/reddit_notify ENTRYPOINT ["python", "/app/main.py"]
RUN chmod +x ./reddit_notify
CMD [ "./reddit_notify" ]

View File

@@ -2,4 +2,77 @@
An app that periodically checks a subreddit for a keyword and will send out an SMTP message if found An app that periodically checks a subreddit for a keyword and will send out an SMTP message if found
# Config # Config
Configuration is done via a `config.ini` file within the same location of the application. Check the example on how to properly set up the `config.ini` file. Configuration is done via a `config.ini` file within the same location of the application. Check the example on how to properly set up the `config.ini` file. You can also pass in a command line argument to specify the location of the config file:
```
python main.py --config ~/config.ini
```
## App
The app section defines running configuration to parse through.
### Subreddit
The subreddit of your choosing:
```
subreddit = hardwareswap
```
The configuration does allow for multiple subreddits to be parsed through by adding a comma between the subreddits:
```
subreddit = hardwareswap, mechmarket
```
### Interval
The interval in minutes on how ofter to request data from Reddit. This example will request data from Reddit every 5 minutes:
```
interval = 5
```
### Keyword
The keywords to be matched:
```
keyword = holy pandas
```
The configuration does allow for multiple keywords to be parsed through by adding a comma between the keywords:
```
keyword = holy pandas, boba u4t
```
## SMTP
The SMTP section defines the configuration for sending to the SMTP server.
### SMTP Server
The location of the SMTP server:
```
smtp_server = localhost
```
### SMTP Port
The port used for SMTP (at this current time it does not support TLS):
```
smtp_port = 25
```
### SMTP Username
The username used to authenticate to the SMTP server:
```
smtp_username = username
```
### SMTP Password
The password used to authenticate to the SMTP server:
```
smtp_password = password
```
### SMTP To
The e-mail address you want to send the SMTP message to:
```
smtp_to = example_to@example.com
```
### SMTP From
The e-mail address you want to send the SMTP message from:
```
smtp_from = example_from@example.com
```

View File

@@ -1,15 +1,12 @@
[app] [app]
# Subreddit to parse # Subreddit to parse
subreddit = rabbits subreddit = mechmarket
# Interval (in minutes) to check subreddit # Interval (in minutes) to check subreddit
interval = 5 interval = 5
# Keyword to search for within title and text # Addional keyword search
keyword = cute keyword = holy pandas
# Addional keyword search, split by commas
keyword = pretty, bun
# SMTP information # SMTP information
[smtp] [smtp]

10
go.mod
View File

@@ -1,10 +0,0 @@
module reddit_notify
go 1.20
require (
github.com/buger/jsonparser v1.1.1 // indirect
gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/mail.v2 v2.3.1 // indirect
)

8
go.sum
View File

@@ -1,8 +0,0 @@
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc h1:2gGKlE2+asNV9m7xrywl36YYNnBG5ZQ0r/BOOxqPpmk=
gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc/go.mod h1:m7x9LTH6d71AHyAX77c9yqWCCa3UKHcVEj9y7hAtKDk=
gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/mail.v2 v2.3.1 h1:WYFn/oANrAGP2C0dcV6/pbkPzv8yGzqTjPmTeO7qoXk=
gopkg.in/mail.v2 v2.3.1/go.mod h1:htwXN1Qh09vZJ1NVKxQqHPBaCBbzKhp5GzuJEA4VJWw=

215
main.go
View File

@@ -1,215 +0,0 @@
package main
import (
"fmt"
"io"
"net/http"
"os"
"strconv"
"strings"
"time"
"github.com/buger/jsonparser"
"gopkg.in/ini.v1"
gomail "gopkg.in/mail.v2"
)
var (
subreddit, smtpServer, smtpTo, smtpFrom, smtpUsername, smtpPassword string
keywords []string
interval, smtpPort int
)
func main() {
// Load config
cfg, err := ini.ShadowLoad("config.ini")
if err != nil {
quitConfigParseError(err.Error())
}
// Parse and check config values
if cfg.Section("app").HasKey("subreddit") {
subreddit = cfg.Section("app").Key("subreddit").String()
printConfig("subreddit", subreddit)
} else {
quitConfigParseError("Missing 'subreddit'")
}
if cfg.Section("app").HasKey("interval") {
// default to 5 minutes
interval = cfg.Section("app").Key("interval").MustInt(5)
printConfig("interval", strconv.Itoa(interval))
} else {
quitConfigParseError("Missing 'interval'")
}
if cfg.Section("app").HasKey("keyword") {
keywords = cfg.Section("app").Key("keyword").ValueWithShadows()
for _, keys := range keywords {
printConfig("keyword", keys)
}
} else {
quitConfigParseError("Missing 'keyword'")
}
if cfg.Section("smtp").HasKey("smtp_server") {
smtpServer = cfg.Section("smtp").Key("smtp_server").String()
printConfig("smtp_server", smtpServer)
} else {
quitConfigParseError("Missing 'smtp_server'")
}
if cfg.Section("smtp").HasKey("smtp_port") {
// default to port 25
smtpPort = cfg.Section("smtp").Key("smtp_port").MustInt(25)
printConfig("smtp_port", strconv.Itoa(smtpPort))
} else {
quitConfigParseError("Missing 'smtp_port'")
}
if cfg.Section("smtp").HasKey("smtp_username") {
smtpUsername = cfg.Section("smtp").Key("smtp_username").String()
printConfig("smtp_username", smtpUsername)
} else {
quitConfigParseError("Missing 'smtp_username'")
}
if cfg.Section("smtp").HasKey("smtp_password") {
smtpPassword = cfg.Section("smtp").Key("smtp_password").String()
printConfig("smtp_password", "<redacted>")
} else {
quitConfigParseError("Missing 'smtp_password'")
}
if cfg.Section("smtp").HasKey("smtp_to") {
smtpTo = cfg.Section("smtp").Key("smtp_to").String()
printConfig("smtp_to", smtpTo)
} else {
quitConfigParseError("Missing 'smtp_to'")
}
if cfg.Section("smtp").HasKey("smtp_from") {
smtpFrom = cfg.Section("smtp").Key("smtp_from").String()
printConfig("smtp_from", smtpFrom)
} else {
quitConfigParseError("Missing 'smtp_from'")
}
loop()
}
func loop() {
// Setup
subreddit_rss := "https://www.reddit.com/r/" + subreddit + "/new/.json"
client := &http.Client{}
req, err := http.NewRequest("GET", subreddit_rss, nil)
if err != nil {
fmt.Println(err)
}
// Set a header otherwise it'll be blocked
req.Header.Set("User-Agent", "Golang_Reddit_Notif/1.0")
// Continually GET subreddit for interval time
for {
// Get from subreddit
resp, err := client.Do(req)
if err != nil {
fmt.Println(err)
}
// Response from subreddit
body, _ := io.ReadAll(resp.Body)
resp.Body.Close()
// Amount of items
limit64, _ := jsonparser.GetInt(body, "data", "dist")
limit := int(limit64)
// Loop through titles and texts
for i := 0; i < limit; i++ {
index := strconv.Itoa(i)
title, _ := jsonparser.GetString(body, "data", "children", "["+index+"]", "data", "title")
text, _ := jsonparser.GetString(body, "data", "children", "["+index+"]", "data", "selftext")
for _, keys := range keywords {
// Check for keywords
alert := false
if compareToKeywords(strings.ToLower(title), strings.ToLower(keys)) {
alert = true
} else if compareToKeywords(strings.ToLower(text), strings.ToLower(keys)) {
alert = true
}
// Send alert if keyword found
if alert {
url, _ := jsonparser.GetString(body, "data", "children", "["+index+"]", "data", "url")
timestamp, _ := jsonparser.GetFloat(body, "data", "children", "["+index+"]", "data", "created_utc")
validateAlert(title, text, url, int64(timestamp), keys)
}
}
}
// Sleep for interval time
time.Sleep(time.Duration(interval) * time.Minute)
}
}
func printConfig(key string, value string) {
fmt.Println("Loaded "+key+": ", value)
}
func quitConfigParseError(msg string) {
fmt.Println("Error parsing config.ini: ", msg)
os.Exit(1)
}
func compareToKeywords(text string, keyword string) bool {
// Split keywords on commas
keys := strings.Split(keyword, ",")
found := false
// Check to ensure it contains ALL the keywords
for _, key := range keys {
if strings.Contains(text, strings.TrimSpace(key)) {
found = true
} else {
found = false
}
}
return found
}
// Validate the alert to ensure that it needs to be sent
func validateAlert(title string, text string, url string, timestamp int64, keyword string) {
// Get timestamp of interval period
currentTs := time.Now()
intervalTs := currentTs.Add(-time.Minute * time.Duration(interval))
// Only send alert if it's newer than interval time period
if timestamp > intervalTs.Unix() {
sendAlert(title, text, url, keyword)
}
}
// Send the alert out
func sendAlert(title string, text string, url string, keyword string) {
// Setup
m := gomail.NewMessage()
m.SetHeader("From", smtpFrom)
m.SetHeader("To", smtpTo)
m.SetHeader("Subject", "Reddit Notify: Found match ("+title+")")
m.SetBody("text/plain", "Keyword: "+keyword+"\n\n\n"+title+"\n\n\n"+text)
// Send via gomail
d := gomail.NewDialer(smtpServer, smtpPort, smtpUsername, smtpPassword)
if err := d.DialAndSend(m); err != nil {
fmt.Println(err)
}
}

250
main.py Normal file
View File

@@ -0,0 +1,250 @@
import argparse
import configparser
import sys
import time
import requests
import json
import datetime
import smtplib
import os
import crcmod
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
# Arguments parse
parser = argparse.ArgumentParser(description='Reddit notify on keywords')
parser.add_argument("--config", type=str, help="location of config file",
default='config.ini')
parser.add_argument("--cache", type=str, help="location of cache file",
default='cache.txt')
args = parser.parse_args()
# Config parse
config_parser = configparser.ConfigParser()
# HTTP headers
http_headers = {
'User-Agent': 'Python_Reddit_Notif_Dtam/1.0'
}
# Keep track of sent
crc32_func = crcmod.predefined.mkPredefinedCrcFun('crc-32')
cache_file = ""
sent = []
# Etc
nl = '\n'
# Config error message
def config_error(msg):
print_and_flush(f'Error parsing config file: {msg}')
sys.exit(1)
# Print and flush stdout
def print_and_flush(msg):
print(msg)
sys.stdout.flush()
# Check if message was sent previously
def sent_previously(url):
# Simple hash to keep track of urls
encoded = url.encode('utf-8')
hashed_url = str(crc32_func(encoded))
# If not sent previously, add it to the sent list
if hashed_url not in sent:
sent.append(hashed_url)
# Prune sent list
while len(sent) > 100:
sent.pop(0)
# Write cache to disk
with open(cache_file, "w") as file:
file.write(', '.join(str(value) for value in sent))
return False
else:
return True
# Read config file
def get_config(filename):
config = {}
try:
config_parser.read(filename)
# If unable to read config
if len(config_parser.sections()) == 0 :
raise configparser.Error
except configparser.Error as e:
print_and_flush(f'Error reading config file {e}')
sys.exit(1)
# Parse config file
if config_parser.has_option('app', 'subreddit') and \
len(config_parser.get('app', 'subreddit').strip()) > 0:
config['subreddit'] = config_parser.get('app', 'subreddit').strip()
else:
config_error("Missing 'subreddit'")
if config_parser.has_option('app', 'interval') and \
len(config_parser.get('app', 'interval').strip()) > 0:
config['interval'] = int(config_parser.get('app', 'interval'))
else:
config_error("Missing 'interval'")
if config_parser.has_option('app', 'keyword') and \
len(config_parser.get('app', 'keyword').strip()) > 0:
config['keyword'] = config_parser.get('app', 'keyword').strip()
else:
config_error("Missing 'keyword'")
# SMTP options
if config_parser.has_option('smtp', 'smtp_server') and \
len(config_parser.get('smtp', 'smtp_server').strip()) > 0:
config['smtp_server'] = config_parser.get('smtp', 'smtp_server').strip()
else:
config_error("Missing 'smtp_server'")
if config_parser.has_option('smtp', 'smtp_port') and \
len(config_parser.get('smtp', 'smtp_port').strip()) > 0:
config['smtp_port'] = int(config_parser.get('smtp', 'smtp_port'))
else:
config_error("Missing 'smtp_port'")
if config_parser.has_option('smtp', 'smtp_username') and \
len(config_parser.get('smtp', 'smtp_username').strip()) > 0:
config['smtp_username'] = config_parser.get('smtp', 'smtp_username').strip()
else:
config_error("Missing 'smtp_username'")
if config_parser.has_option('smtp', 'smtp_password') and \
len(config_parser.get('smtp', 'smtp_password').strip()) > 0:
config['smtp_password'] = config_parser.get('smtp', 'smtp_password').strip()
else:
config_error("Missing 'smtp_password'")
if config_parser.has_option('smtp', 'smtp_to') and \
len(config_parser.get('smtp', 'smtp_to').strip()) > 0:
config['smtp_to'] = config_parser.get('smtp', 'smtp_to').strip()
else:
config_error("Missing 'smtp_to'")
if config_parser.has_option('smtp', 'smtp_from') and \
len(config_parser.get('smtp', 'smtp_from').strip()) > 0:
config['smtp_from'] = config_parser.get('smtp', 'smtp_from').strip()
else:
config_error("Missing 'smtp_from'")
return config
# Get/Setup cache file
def setup_cache(filename):
global cache_file
cache_file = filename
# If cache exists, read it and update sent
if os.path.exists(cache_file):
with open(cache_file, "r") as file:
content = file.read()
global sent
sent = [token.strip() for token in content.split(',')]
# Remove random empty string
sent = [item for item in sent if item != ""]
print_and_flush(f'Cache file found at: {filename}')
else:
with open(cache_file, "w") as file:
pass
# Constant loop to check subreddit
def check_reddit(config):
subreddits = [subreddit.strip() for subreddit in config.get('subreddit').split(',')]
keywords = set([keyword.strip().lower() for keyword in config.get('keyword').split(',')])
while(True):
for subreddit in subreddits:
# Debug message
current_time = datetime.datetime.now()
print_and_flush(f'Starting search at: {current_time.strftime("%Y-%m-%d %H:%M:%S")}')
resp = requests.get(f'https://www.reddit.com/r/{subreddit}/new.json', headers=http_headers)
if resp.status_code == 200:
# Body
full_body = json.loads(resp.text)
# Number of items
items = int(full_body.get('data').get('dist'))
# Loops through items
for i in range(0, items):
# Gather data
title = full_body.get('data').get('children')[i].get('data').get('title')
text = full_body.get('data').get('children')[i].get('data').get('selftext')
# Loop through keywords
for keyword in keywords:
if keyword in title.lower() or keyword in text.lower():
# Gather additional data
url = full_body.get('data').get('children')[i].get('data').get('permalink')
timestamp = full_body.get('data').get('children')[i].get('data').get('created_utc')
# Send alert
send_alert(config, title, text, url, timestamp, keyword)
time.sleep(config.get('interval') * 60)
# Send alert out
def send_alert(config, title, text, url, timestamp, keyword):
# Check if sent previously
if sent_previously(url):
return
# Setup
smtp_from = config.get('smtp_from')
smtp_to = config.get('smtp_to')
time_format = datetime.datetime.fromtimestamp(timestamp)
# Debug message
print_and_flush(f'Found match: {title} at {time_format.strftime("%Y-%m-%d %H:%M:%S")}')
# Setup message
message = MIMEMultipart()
message["From"] = smtp_from
message["To"] = smtp_to
message["Subject"] = f'Reddit Notify: Found Match ({title})'
body = f'Keyword: {keyword}{nl}{nl}{nl}\
{title}{nl}{nl}{nl}\
{text}{nl}{nl}{nl}\
URL: https://www.reddit.com{url}{nl}\
Time: {time_format.strftime("%Y-%m-%d %H:%M:%S")}'
message.attach(MIMEText(body, "plain"))
# Send message
try:
with smtplib.SMTP(config.get('smtp_server'),
config.get('smtp_port')) as server:
text = message.as_string()
server.sendmail(smtp_from, smtp_to, text)
except Exception as e:
print_and_flush(f'SMTP Send Error: {e}')
if __name__ == '__main__':
# Setup config
config = get_config(args.config)
print_and_flush(f'Current config file: {config}')
# Setup cache
setup_cache(args.cache)
# Main loop, check reddit
check_reddit(config)

8
requirements.txt Normal file
View File

@@ -0,0 +1,8 @@
certifi==2024.2.2
charset-normalizer==3.3.2
crcmod==1.7
idna==3.7
requests==2.32.2
setuptools==69.0.2
urllib3==2.2.1
wheel==0.42.0