2 Commits

Author SHA1 Message Date
Doc
b2c1723313 updated readme 2025-10-03 19:38:12 +02:00
Doc
acaccb8cdd refactor: Improve 4chan downloader with better error handling and resource management
- Replace log.Fatal() with proper error returns for better control flow
- Add type safety with dedicated structs instead of interface{} maps
- Implement proper HTTP client with timeout configuration
- Fix goroutine closure variable capture issues
- Add input validation for URL format and board extraction
- Ensure destination directory exists before writing files
- Improve error messages with context and proper error wrapping
- Add resource cleanup with defer statements for HTTP responses
- Separate concerns into dedicated functions for better organization
- Use filepath.Join for cross-platform path handling
- Add comprehensive logging throughout the application

This refactoring makes the downloader more robust, maintainable, and production-ready while preserving all original functionality.
2025-10-03 19:33:16 +02:00
2 changed files with 185 additions and 99 deletions

View File

@@ -1,25 +1,38 @@
# scrapychan # scrapychan
## Description ## Description
scrapychan is a go rewrite of a original [scraperchan](https://git.protron.dev/Doc/scraperchan) that i wrote in Python. There was no reason to rewrite the original program but atleast it is faster and i learned some go. scrapychan is a Go rewrite of the original [scraperchan](https://git.protron.dev/Doc/scraperchan) that I wrote in Python. There was no reason to rewrite the original program, but at least it is faster and I learned some Go.
## Install ## Install
### Build from source
### Build from Source
#### Dependencies #### Dependencies
You just need to have [go](https://go.dev/doc/install) installed You just need to have [Go](https://go.dev/doc/install) installed.
#### Build #### Build
``` ```bash
git clone https://git.protron.dev/Doc/scrapychan git clone https://git.protron.dev/Doc/scrapychan
cd scrapychan cd scrapychan
go build go build
go install go install
``` ```
### Prebuild binaries ### Pre-built Binaries
or use the binaries provided in the [release section](https://git.protron.dev/Doc/Scrapychan/releases) Alternatively, use the binaries provided in the [release section](https://git.protron.dev/Doc/Scrapychan/releases)
## How to use ## Example Usage
``` ```bash
scrapychan -u=<Thread URL> -o=<Destinationpath of media> -v=<true or false to enable verbose logging> # Download images from a thread to current directory with verbose logging
scrapychan -u="https://boards.4chan.org/b/thread/123456789" -v=true
# Download images from a thread to specific directory
scrapychan -u="https://boards.4chan.org/g/thread/987654321" -o="/home/user/4chan_images"
``` ```
## Features
- Fast concurrent downloading of images and videos
- Support for 4chan's API
- Verbose logging option
- Cross-platform compatibility
- Error handling and recovery
- Automatic directory creation

241
main.go
View File

@@ -8,130 +8,203 @@ import (
"log" "log"
"net/http" "net/http"
"os" "os"
"strconv" "path/filepath"
"strings" "strings"
"sync" "sync"
"time"
) )
func writeDataToDisk(dest *string, board string, verlog *bool, post map[string]interface{}, cdnresbody []byte) {
// Save the mediadata to file // Global HTTP client with timeout
err := os.WriteFile(*dest + "/" + board + "-" + strconv.Itoa(int(post["tim"].(float64))) + post["ext"].(string), cdnresbody, 0664 ) var httpClient = &http.Client{
if err != nil { Timeout: 30 * time.Second,
log.Fatal(err)
} else if (*verlog) {
log.Println("Successfully wrote image/video data to disk")
}
} }
func getPostData(post map[string]interface{}, board string, verlog *bool) []byte { // PostData represents a 4chan post
// Check if post contains media (Video or Image) type PostData struct {
if post["ext"] != nil { No float64 `json:"no"`
cdnurlstr := "https://i.4cdn.org/" + board + "/" + strconv.Itoa(int(post["tim"].(float64))) + post["ext"].(string) Tim float64 `json:"tim"`
Ext string `json:"ext"`
}
// Requesting the media from CDN // ThreadData represents the JSON structure from 4chan API
cdnres, err := http.Get(cdnurlstr) type ThreadData struct {
Posts []PostData `json:"posts"`
}
func writeDataToDisk(dest string, board string, verlog bool, post PostData, cdnresbody []byte) error {
if cdnresbody == nil {
return fmt.Errorf("no data to write")
}
filename := fmt.Sprintf("%s-%d%s", board, int64(post.Tim), post.Ext)
filepath := filepath.Join(dest, filename)
err := os.WriteFile(filepath, cdnresbody, 0664)
if err != nil { if err != nil {
log.Fatal(err) return fmt.Errorf("failed to write file %s: %w", filepath, err)
} }
// Check if respons was valid if verlog {
if cdnres.StatusCode > 299 { log.Printf("Successfully wrote image/video data to disk: %s", filename)
log.Fatalf("Response failed with status code: %d and\n", cdnres.StatusCode)
} else if (*verlog) {
log.Println("Got image/video " + strconv.Itoa(int(post["tim"].(float64))) + post["ext"].(string) + " data")
}
// Read data form respons
cdnresbody, err := io.ReadAll(cdnres.Body)
cdnres.Body.Close()
if err != nil {
log.Fatal(err)
} else if (*verlog) {
log.Println("Successfully got data from responds body")
}
return cdnresbody
} else if (*verlog) {
log.Println("Post " + strconv.Itoa(int(post["no"].(float64))) + " didn't include a image or video")
} }
return nil return nil
} }
func getPostData(post PostData, board string, verlog bool) ([]byte, error) {
if post.Ext == "" {
if verlog {
log.Printf("Post %d didn't include an image or video", int64(post.No))
}
return nil, nil
}
func main () { cdnurlstr := fmt.Sprintf("https://i.4cdn.org/%s/%d%s", board, int64(post.Tim), post.Ext)
cdnres, err := httpClient.Get(cdnurlstr)
if err != nil {
return nil, fmt.Errorf("failed to fetch media from %s: %w", cdnurlstr, err)
}
defer cdnres.Body.Close()
if cdnres.StatusCode > 299 {
return nil, fmt.Errorf("response failed with status code: %d for %s", cdnres.StatusCode, cdnurlstr)
}
if verlog {
log.Printf("Got image/video %d%s data", int64(post.Tim), post.Ext)
}
cdnresbody, err := io.ReadAll(cdnres.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response body: %w", err)
}
if verlog {
log.Println("Successfully got data from response body")
}
return cdnresbody, nil
}
func validateURL(url string) error {
if url == "" {
return fmt.Errorf("no thread URL provided")
}
if !strings.HasPrefix(url, "https://") && !strings.HasPrefix(url, "http://") {
return fmt.Errorf("invalid URL format")
}
parts := strings.Split(url, "/")
if len(parts) < 4 {
return fmt.Errorf("invalid thread URL format")
}
return nil
}
func extractBoard(url string) string {
parts := strings.Split(url, "/")
if len(parts) >= 4 {
return parts[3]
}
return ""
}
func fetchThreadData(url string, verlog bool) (*ThreadData, error) {
apiURL := url + ".json"
res, err := httpClient.Get(apiURL)
if err != nil {
return nil, fmt.Errorf("failed to fetch thread data from %s: %w", apiURL, err)
}
defer res.Body.Close()
if res.StatusCode > 299 {
return nil, fmt.Errorf("API response failed with status code: %d", res.StatusCode)
}
if verlog {
log.Println("Got thread data")
}
resbody, err := io.ReadAll(res.Body)
if err != nil {
return nil, fmt.Errorf("failed to read API response body: %w", err)
}
if verlog {
log.Println("Got body of API response")
}
var jdata ThreadData
if err := json.Unmarshal(resbody, &jdata); err != nil {
return nil, fmt.Errorf("failed to unmarshal API response: %w", err)
}
if verlog {
log.Println("Unmarshaled API response body")
}
return &jdata, nil
}
func main() {
// Setting up command flags // Setting up command flags
wdpath, _ := os.Getwd(); wdpath, err := os.Getwd()
if err != nil {
log.Fatalf("Failed to get working directory: %v", err)
}
url := flag.String("u", "", "The url of the 4chan thread") url := flag.String("u", "", "The url of the 4chan thread")
dest := flag.String("o", wdpath, "Target dir of the content")
dest := flag.String("o", wdpath, "Target dir of the conntent")
verlog := flag.Bool("v", false, "Set logging to verbose") verlog := flag.Bool("v", false, "Set logging to verbose")
flag.Parse() flag.Parse()
// Check if flags are valid // Validate flags
if *url == "" { if err := validateURL(*url); err != nil {
fmt.Println("no thread URL provided") log.Fatalf("Invalid URL: %v", err)
fmt.Println("use the -u=<Thread URL> flag to provid URL")
os.Exit(1)
} }
// Getting the boardname // Getting the boardname
board := strings.Split(*url, "/")[3] board := extractBoard(*url)
if board == "" {
log.Fatal("Failed to extract board name from URL")
}
// Get thread info from API // Fetch thread data
res, err := http.Get(*url + ".json") jdata, err := fetchThreadData(*url, *verlog)
if err != nil { if err != nil {
log.Fatal(err) log.Fatalf("Failed to fetch thread data: %v", err)
} else {
log.Println("Got thread data")
} }
// Check if API response is valid // Ensure destination directory exists
if res.StatusCode > 299 { if err := os.MkdirAll(*dest, 0755); err != nil {
log.Fatalf("Response failed with status code: %d and\n", res.StatusCode) log.Fatalf("Failed to create destination directory: %v", err)
} else if (*verlog) {
log.Println("API response was ok")
} }
//Geting the data from the response
resbody, err := io.ReadAll(res.Body)
res.Body.Close()
if err != nil {
log.Fatal(err)
} else if (*verlog) {
log.Println("Got body of API response")
}
// Var to save the JSON data
var jdata map[string]interface{}
//Unmarshaling the API JSON respons
if err := json.Unmarshal(resbody, &jdata); err != nil {
log.Fatalln(err)
} else if (*verlog) {
log.Println("Unmarsheled API responsebody")
}
var wg sync.WaitGroup var wg sync.WaitGroup
// Iterating the posts from JSON data // Iterating the posts from JSON data
for _, v := range jdata["posts"].([]interface{}) { for _, post := range jdata.Posts {
post := v.(map[string]interface{})
wg.Add(1) wg.Add(1)
go func() { go func(post PostData) {
defer wg.Done() defer wg.Done()
if postdata := getPostData(post, board, verlog); postdata != nil {
writeDataToDisk(dest, board, verlog, post, postdata) if postdata, err := getPostData(post, board, *verlog); err != nil {
log.Printf("Error processing post %d: %v", int64(post.No), err)
return
} else if postdata != nil {
if err := writeDataToDisk(*dest, board, *verlog, post, postdata); err != nil {
log.Printf("Error writing post %d to disk: %v", int64(post.No), err)
} }
}() }
}(post)
} }
wg.Wait() wg.Wait()
log.Println("DONE!!!") log.Println("DONE!!!")
} }