Initial commit
This commit is contained in:
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
subreddits
|
||||
go.sum
|
9
README.md
Normal file
9
README.md
Normal file
@@ -0,0 +1,9 @@
|
||||
# reimager for Reddit
|
||||
|
||||
Download images from reddit.
|
||||
|
||||
```bash
|
||||
go mod tidy
|
||||
|
||||
go run main.go
|
||||
```
|
8
go.mod
Normal file
8
go.mod
Normal file
@@ -0,0 +1,8 @@
|
||||
module github.com/cwpearson/reddit-images
|
||||
|
||||
go 1.23.0
|
||||
|
||||
require (
|
||||
github.com/gabriel-vasile/mimetype v1.4.7 // indirect
|
||||
golang.org/x/net v0.31.0 // indirect
|
||||
)
|
34
main.go
Normal file
34
main.go
Normal file
@@ -0,0 +1,34 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/cwpearson/reddit-images/rate_limit"
|
||||
"github.com/cwpearson/reddit-images/reddit"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
subreddits := []string{
|
||||
"pics",
|
||||
"oldschoolcool",
|
||||
"thewaywewere",
|
||||
"MilitaryPorn",
|
||||
"EarthPorn",
|
||||
}
|
||||
|
||||
rl := rate_limit.NewRateLimit()
|
||||
|
||||
for {
|
||||
for _, subreddit := range subreddits {
|
||||
r := reddit.NewReddit(rl, subreddit)
|
||||
r.Get()
|
||||
}
|
||||
|
||||
when := time.Now().Add(time.Minute * time.Duration(30))
|
||||
log.Println("sleep until", when)
|
||||
time.Sleep(time.Until(when))
|
||||
}
|
||||
|
||||
}
|
124
rate_limit/rate_limit.go
Normal file
124
rate_limit/rate_limit.go
Normal file
@@ -0,0 +1,124 @@
|
||||
package rate_limit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
type RateLimit struct {
|
||||
client *http.Client
|
||||
used int
|
||||
remaining int
|
||||
reset time.Time
|
||||
}
|
||||
|
||||
func getFutureTime(seconds int) time.Time {
|
||||
return time.Now().Add(time.Duration(seconds) * time.Second)
|
||||
}
|
||||
|
||||
func sleepUntil(when time.Time) {
|
||||
now := time.Now()
|
||||
if now.After(when) {
|
||||
return
|
||||
}
|
||||
time.Sleep(when.Sub(now))
|
||||
}
|
||||
|
||||
func NewRateLimit() *RateLimit {
|
||||
return &RateLimit{
|
||||
client: &http.Client{},
|
||||
remaining: 100,
|
||||
reset: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
func (rl *RateLimit) UpdateUsed(used string) {
|
||||
val, err := strconv.ParseFloat(used, 64)
|
||||
if err == nil {
|
||||
rl.used = int(val)
|
||||
log.Println("used ->", rl.used)
|
||||
}
|
||||
}
|
||||
|
||||
func (rl *RateLimit) UpdateRemaining(used string) {
|
||||
val, err := strconv.ParseFloat(used, 64)
|
||||
if err == nil {
|
||||
rl.remaining = int(val)
|
||||
log.Println("remaining ->", rl.remaining)
|
||||
}
|
||||
}
|
||||
|
||||
func (rl *RateLimit) UpdateReset(used string) {
|
||||
val, err := strconv.ParseFloat(used, 64)
|
||||
if err == nil {
|
||||
maybe := getFutureTime(int(val))
|
||||
if rl.reset.Before(maybe) {
|
||||
rl.reset = maybe
|
||||
log.Println("reset ->", rl.reset)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get makes an HTTP GET request to the specified URL while respecting rate limits
|
||||
func (rl *RateLimit) Get(url string, accept string) ([]byte, error) {
|
||||
if rl.remaining <= 0 {
|
||||
log.Println("no requests remaining, sleep until", rl.reset)
|
||||
sleepUntil(rl.reset)
|
||||
}
|
||||
|
||||
// Create new request
|
||||
log.Println("GET", url)
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to make request: %w", err)
|
||||
}
|
||||
|
||||
// Add any required headers
|
||||
req.Header.Add("User-Agent", "linux:reddit-images:0.1")
|
||||
if accept != "" {
|
||||
req.Header.Add("Accept", accept)
|
||||
}
|
||||
|
||||
// Make the request
|
||||
resp, err := rl.client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to make request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// parse rate limit
|
||||
used := resp.Header.Get("X-Ratelimit-Used")
|
||||
remaining := resp.Header.Get("X-Ratelimit-Remaining")
|
||||
reset := resp.Header.Get("X-Ratelimit-Reset")
|
||||
// fmt.Printf("User Limit: %s\n", used)
|
||||
// fmt.Printf("Remaining: %s\n", remaining)
|
||||
// fmt.Printf("Reset: %s\n", reset)
|
||||
rl.UpdateUsed(used)
|
||||
rl.UpdateRemaining(remaining)
|
||||
rl.UpdateReset(reset)
|
||||
|
||||
if resp.StatusCode == http.StatusTooManyRequests {
|
||||
rl.remaining = 0
|
||||
|
||||
// if the reset time is before now, just pick a while in the future for the next retry
|
||||
if rl.reset.Before(time.Now()) {
|
||||
rl.UpdateReset("450")
|
||||
}
|
||||
return nil, fmt.Errorf("request failed with 429")
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("request failed with status: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response body: %w", err)
|
||||
}
|
||||
|
||||
return body, nil
|
||||
}
|
54
reddit/gallery.go
Normal file
54
reddit/gallery.go
Normal file
@@ -0,0 +1,54 @@
|
||||
package reddit
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"github.com/cwpearson/reddit-images/rate_limit"
|
||||
)
|
||||
|
||||
type GListing struct {
|
||||
Data GListingData `json:"data"`
|
||||
}
|
||||
|
||||
type GListingData struct {
|
||||
Children []GListingDataChild `json:"children"`
|
||||
}
|
||||
|
||||
type GListingDataChild struct {
|
||||
Data GListingDataChildData `json:"data"`
|
||||
}
|
||||
|
||||
type GListingDataChildData struct {
|
||||
MediaMetadata map[string]Metadata `json:"media_metadata"`
|
||||
}
|
||||
|
||||
type Metadata struct {
|
||||
Id string `json:"id"`
|
||||
Mimetype string `json:"m"`
|
||||
}
|
||||
|
||||
func GalleryImageMetadata(rl *rate_limit.RateLimit, url string) ([]Metadata, error) {
|
||||
jsonUrl := fmt.Sprintf("%s.json?raw_json=1", url)
|
||||
log.Printf("gallery url: %s -> %s", url, jsonUrl)
|
||||
|
||||
content, err := rl.Get(jsonUrl, "")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var data []GListing
|
||||
err = json.Unmarshal(content, &data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
res := []Metadata{}
|
||||
|
||||
for _, val := range data[0].Data.Children[0].Data.MediaMetadata {
|
||||
res = append(res, val)
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
249
reddit/reddit.go
Normal file
249
reddit/reddit.go
Normal file
@@ -0,0 +1,249 @@
|
||||
package reddit
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/cwpearson/reddit-images/rate_limit"
|
||||
"github.com/gabriel-vasile/mimetype"
|
||||
)
|
||||
|
||||
// Response represents the outer JSON structure
|
||||
type Response struct {
|
||||
Data ListingData `json:"data"`
|
||||
}
|
||||
|
||||
// ListingData represents the data field containing children
|
||||
type ListingData struct {
|
||||
After string `json:"after"`
|
||||
Children []Child `json:"children"`
|
||||
}
|
||||
|
||||
// Child represents each item in the children array
|
||||
type Child struct {
|
||||
Kind string `json:"kind"`
|
||||
Data ChildData `json:"data"`
|
||||
}
|
||||
|
||||
// ChildData represents the nested data in each child
|
||||
type ChildData struct {
|
||||
Title string `json:"title"`
|
||||
Author string `json:"author"`
|
||||
URLOverriddenByDest string `json:"url_overridden_by_dest"`
|
||||
URL string `json:"url"`
|
||||
Created float64 `json:"created"`
|
||||
Id string `json:"id"`
|
||||
}
|
||||
|
||||
type Reddit struct {
|
||||
subreddit string
|
||||
retries int
|
||||
rl *rate_limit.RateLimit
|
||||
}
|
||||
|
||||
func NewReddit(rl *rate_limit.RateLimit, subreddit string) *Reddit {
|
||||
return &Reddit{
|
||||
subreddit: subreddit,
|
||||
retries: 3,
|
||||
rl: rl,
|
||||
}
|
||||
}
|
||||
|
||||
// returns children, after, error
|
||||
func (r *Reddit) Next(after string) ([]ChildData, string, error) {
|
||||
|
||||
baseURL := fmt.Sprintf("https://reddit.com/r/%s/hot.json", r.subreddit)
|
||||
|
||||
u, err := url.Parse(baseURL)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// Create query parameters
|
||||
params := url.Values{}
|
||||
params.Add("raw_json", "1")
|
||||
params.Add("limit", "100")
|
||||
if after != "" {
|
||||
params.Add("after", after)
|
||||
}
|
||||
|
||||
// Add the query parameters to the URL
|
||||
u.RawQuery = params.Encode()
|
||||
|
||||
var body []byte
|
||||
for try := 0; try < r.retries; try++ {
|
||||
body, err = r.rl.Get(u.String(), "")
|
||||
if err != nil {
|
||||
fmt.Printf("Error getting subreddit: %v\n", err)
|
||||
body = nil
|
||||
time.Sleep(time.Second * time.Duration(5))
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
if body == nil {
|
||||
return nil, "", fmt.Errorf("retries exceeded")
|
||||
}
|
||||
response := Response{}
|
||||
err = json.Unmarshal(body, &response)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
res := []ChildData{}
|
||||
for _, c := range response.Data.Children {
|
||||
if c.Kind == "t3" { // link
|
||||
res = append(res, c.Data)
|
||||
}
|
||||
}
|
||||
return res, response.Data.After, nil
|
||||
}
|
||||
|
||||
func getImage(rl *rate_limit.RateLimit, url, outDir, stem string) error {
|
||||
contents, err := rl.Get(url, "image/*")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
mtype := mimetype.Detect(contents)
|
||||
name := stem + mtype.Extension()
|
||||
outPath := filepath.Join(outDir, name)
|
||||
log.Println("write", outPath)
|
||||
return os.WriteFile(outPath, contents, 0644)
|
||||
}
|
||||
|
||||
func (r *Reddit) Get() {
|
||||
var children []ChildData
|
||||
var err error
|
||||
|
||||
outDir := filepath.Join("subreddits", r.subreddit)
|
||||
err = os.MkdirAll(outDir, 0755)
|
||||
if err != nil && !os.IsExist(err) {
|
||||
log.Println("ERROR: couldn't create out directory", outDir)
|
||||
return
|
||||
}
|
||||
|
||||
// load existing names
|
||||
existing := map[string]struct{}{}
|
||||
entries, err := os.ReadDir(outDir)
|
||||
if err != nil {
|
||||
log.Println("ERROR: couldn't read directory", outDir)
|
||||
return
|
||||
}
|
||||
for _, entry := range entries {
|
||||
if entry.IsDir() {
|
||||
continue
|
||||
}
|
||||
filename := entry.Name()
|
||||
nameWithoutSuffix := strings.TrimSuffix(filename, filepath.Ext(filename))
|
||||
existing[nameWithoutSuffix] = struct{}{}
|
||||
}
|
||||
|
||||
children, _, err = r.Next("")
|
||||
if err != nil {
|
||||
log.Println("ERROR: Next error:", err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, child := range children {
|
||||
log.Println("Title:", child.Title)
|
||||
shortTitle := child.Title
|
||||
if len(shortTitle) > 32 {
|
||||
shortTitle = shortTitle[0:32]
|
||||
}
|
||||
shortTitle = sanitizeFilename(shortTitle)
|
||||
|
||||
if strings.Contains(child.URLOverriddenByDest, "www.reddit.com/gallery") {
|
||||
|
||||
metas, err := GalleryImageMetadata(r.rl, child.URLOverriddenByDest)
|
||||
if err != nil {
|
||||
log.Println("ERROR: Gallery handling error:", err)
|
||||
continue
|
||||
}
|
||||
|
||||
log.Println("Gallery metas:", metas)
|
||||
|
||||
for mi, meta := range metas {
|
||||
parts := strings.Split(meta.Mimetype, "/")
|
||||
if len(parts) == 2 {
|
||||
imgUrl := fmt.Sprintf("https://i.redd.it/%s.%s", meta.Id, parts[1])
|
||||
|
||||
stem := fmt.Sprintf("%d_%s_%d_%s", int64(child.Created), shortTitle, mi, meta.Id)
|
||||
|
||||
if _, ok := existing[stem]; ok {
|
||||
log.Println(stem, "already downloaded")
|
||||
continue
|
||||
}
|
||||
|
||||
err := getImage(r.rl, imgUrl, outDir, stem)
|
||||
if err != nil {
|
||||
log.Println("ERROR: getImage:", err)
|
||||
continue
|
||||
}
|
||||
existing[stem] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
continue
|
||||
} else {
|
||||
|
||||
stem := fmt.Sprintf("%d_%s_%s", int64(child.Created), shortTitle, child.Id)
|
||||
|
||||
if _, ok := existing[stem]; ok {
|
||||
log.Println(stem, "already downloaded")
|
||||
continue
|
||||
}
|
||||
|
||||
err := getImage(r.rl, child.URLOverriddenByDest, outDir, stem)
|
||||
if err != nil {
|
||||
log.Println("ERROR: getImage:", err)
|
||||
continue
|
||||
}
|
||||
existing[stem] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func sanitizeFilename(input string) string {
|
||||
// Replace path separators and problematic characters
|
||||
replacer := strings.NewReplacer(
|
||||
"/", "",
|
||||
"\\", "",
|
||||
":", "",
|
||||
"*", "",
|
||||
"?", "",
|
||||
"\"", "",
|
||||
"<", "",
|
||||
">", "",
|
||||
"|", "",
|
||||
",", "",
|
||||
";", "",
|
||||
"\x00", "", // null byte
|
||||
" ", "-", // replace spaces with hyphens
|
||||
)
|
||||
cleaned := replacer.Replace(input)
|
||||
|
||||
// Remove non-ASCII characters
|
||||
var result strings.Builder
|
||||
for _, r := range cleaned {
|
||||
if r < 128 { // Keep only ASCII characters
|
||||
result.WriteRune(r)
|
||||
}
|
||||
}
|
||||
cleaned = result.String()
|
||||
|
||||
// Trim spaces (though they should already be replaced with hyphens)
|
||||
cleaned = strings.TrimSpace(cleaned)
|
||||
|
||||
// If the filename becomes empty after cleaning, provide a default
|
||||
if cleaned == "" {
|
||||
return "unnamed_file"
|
||||
}
|
||||
|
||||
return cleaned
|
||||
}
|
Reference in New Issue
Block a user