250 lines
5.5 KiB
Go
250 lines
5.5 KiB
Go
package reddit
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"net/url"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/cwpearson/reddit-images/rate_limit"
|
|
"github.com/gabriel-vasile/mimetype"
|
|
)
|
|
|
|
// Response represents the outer JSON structure
|
|
type Response struct {
|
|
Data ListingData `json:"data"`
|
|
}
|
|
|
|
// ListingData represents the data field containing children
|
|
type ListingData struct {
|
|
After string `json:"after"`
|
|
Children []Child `json:"children"`
|
|
}
|
|
|
|
// Child represents each item in the children array
|
|
type Child struct {
|
|
Kind string `json:"kind"`
|
|
Data ChildData `json:"data"`
|
|
}
|
|
|
|
// ChildData represents the nested data in each child
|
|
type ChildData struct {
|
|
Title string `json:"title"`
|
|
Author string `json:"author"`
|
|
URLOverriddenByDest string `json:"url_overridden_by_dest"`
|
|
URL string `json:"url"`
|
|
Created float64 `json:"created"`
|
|
Id string `json:"id"`
|
|
}
|
|
|
|
type Reddit struct {
|
|
subreddit string
|
|
retries int
|
|
rl *rate_limit.RateLimit
|
|
}
|
|
|
|
func NewReddit(rl *rate_limit.RateLimit, subreddit string) *Reddit {
|
|
return &Reddit{
|
|
subreddit: subreddit,
|
|
retries: 3,
|
|
rl: rl,
|
|
}
|
|
}
|
|
|
|
// returns children, after, error
|
|
func (r *Reddit) Next(after string) ([]ChildData, string, error) {
|
|
|
|
baseURL := fmt.Sprintf("https://reddit.com/r/%s/hot.json", r.subreddit)
|
|
|
|
u, err := url.Parse(baseURL)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
// Create query parameters
|
|
params := url.Values{}
|
|
params.Add("raw_json", "1")
|
|
params.Add("limit", "100")
|
|
if after != "" {
|
|
params.Add("after", after)
|
|
}
|
|
|
|
// Add the query parameters to the URL
|
|
u.RawQuery = params.Encode()
|
|
|
|
var body []byte
|
|
for try := 0; try < r.retries; try++ {
|
|
body, err = r.rl.Get(u.String(), "")
|
|
if err != nil {
|
|
fmt.Printf("Error getting subreddit: %v\n", err)
|
|
body = nil
|
|
time.Sleep(time.Second * time.Duration(5))
|
|
continue
|
|
}
|
|
break
|
|
}
|
|
if body == nil {
|
|
return nil, "", fmt.Errorf("retries exceeded")
|
|
}
|
|
response := Response{}
|
|
err = json.Unmarshal(body, &response)
|
|
if err != nil {
|
|
return nil, "", err
|
|
}
|
|
|
|
res := []ChildData{}
|
|
for _, c := range response.Data.Children {
|
|
if c.Kind == "t3" { // link
|
|
res = append(res, c.Data)
|
|
}
|
|
}
|
|
return res, response.Data.After, nil
|
|
}
|
|
|
|
func getImage(rl *rate_limit.RateLimit, url, outDir, stem string) error {
|
|
contents, err := rl.Get(url, "image/*")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
mtype := mimetype.Detect(contents)
|
|
name := stem + mtype.Extension()
|
|
outPath := filepath.Join(outDir, name)
|
|
log.Println("write", outPath)
|
|
return os.WriteFile(outPath, contents, 0644)
|
|
}
|
|
|
|
func (r *Reddit) Get(outDir string) {
|
|
var children []ChildData
|
|
var err error
|
|
|
|
outDir = filepath.Join(outDir, r.subreddit)
|
|
err = os.MkdirAll(outDir, 0755)
|
|
if err != nil && !os.IsExist(err) {
|
|
log.Println("ERROR: couldn't create out directory", outDir)
|
|
return
|
|
}
|
|
|
|
// load existing names
|
|
existing := map[string]struct{}{}
|
|
entries, err := os.ReadDir(outDir)
|
|
if err != nil {
|
|
log.Println("ERROR: couldn't read directory", outDir)
|
|
return
|
|
}
|
|
for _, entry := range entries {
|
|
if entry.IsDir() {
|
|
continue
|
|
}
|
|
filename := entry.Name()
|
|
nameWithoutSuffix := strings.TrimSuffix(filename, filepath.Ext(filename))
|
|
existing[nameWithoutSuffix] = struct{}{}
|
|
}
|
|
|
|
children, _, err = r.Next("")
|
|
if err != nil {
|
|
log.Println("ERROR: Next error:", err)
|
|
return
|
|
}
|
|
|
|
for _, child := range children {
|
|
log.Println("Title:", child.Title)
|
|
shortTitle := child.Title
|
|
if len(shortTitle) > 32 {
|
|
shortTitle = shortTitle[0:32]
|
|
}
|
|
shortTitle = sanitizeFilename(shortTitle)
|
|
|
|
if strings.Contains(child.URLOverriddenByDest, "www.reddit.com/gallery") {
|
|
|
|
metas, err := GalleryImageMetadata(r.rl, child.URLOverriddenByDest)
|
|
if err != nil {
|
|
log.Println("ERROR: Gallery handling error:", err)
|
|
continue
|
|
}
|
|
|
|
log.Println("Gallery metas:", metas)
|
|
|
|
for mi, meta := range metas {
|
|
parts := strings.Split(meta.Mimetype, "/")
|
|
if len(parts) == 2 {
|
|
imgUrl := fmt.Sprintf("https://i.redd.it/%s.%s", meta.Id, parts[1])
|
|
|
|
stem := fmt.Sprintf("%d_%s_%s", int64(child.Created), shortTitle, meta.Id)
|
|
|
|
if _, ok := existing[stem]; ok {
|
|
log.Println(stem, "already downloaded")
|
|
continue
|
|
}
|
|
|
|
err := getImage(r.rl, imgUrl, outDir, stem)
|
|
if err != nil {
|
|
log.Println("ERROR: getImage:", err)
|
|
continue
|
|
}
|
|
existing[stem] = struct{}{}
|
|
}
|
|
}
|
|
|
|
continue
|
|
} else {
|
|
|
|
stem := fmt.Sprintf("%d_%s_%s", int64(child.Created), shortTitle, child.Id)
|
|
|
|
if _, ok := existing[stem]; ok {
|
|
log.Println(stem, "already downloaded")
|
|
continue
|
|
}
|
|
|
|
err := getImage(r.rl, child.URLOverriddenByDest, outDir, stem)
|
|
if err != nil {
|
|
log.Println("ERROR: getImage:", err)
|
|
continue
|
|
}
|
|
existing[stem] = struct{}{}
|
|
}
|
|
}
|
|
}
|
|
|
|
func sanitizeFilename(input string) string {
|
|
// Replace path separators and problematic characters
|
|
replacer := strings.NewReplacer(
|
|
"/", "",
|
|
"\\", "",
|
|
":", "",
|
|
"*", "",
|
|
"?", "",
|
|
"\"", "",
|
|
"<", "",
|
|
">", "",
|
|
"|", "",
|
|
",", "",
|
|
";", "",
|
|
"\x00", "", // null byte
|
|
" ", "-", // replace spaces with hyphens
|
|
)
|
|
cleaned := replacer.Replace(input)
|
|
|
|
// Remove non-ASCII characters
|
|
var result strings.Builder
|
|
for _, r := range cleaned {
|
|
if r < 128 { // Keep only ASCII characters
|
|
result.WriteRune(r)
|
|
}
|
|
}
|
|
cleaned = result.String()
|
|
|
|
// Trim spaces (though they should already be replaced with hyphens)
|
|
cleaned = strings.TrimSpace(cleaned)
|
|
|
|
// If the filename becomes empty after cleaning, provide a default
|
|
if cleaned == "" {
|
|
return "unnamed_file"
|
|
}
|
|
|
|
return cleaned
|
|
}
|