refactor: Improve 4chan downloader with better error handling and resource management

- Replace log.Fatal() with proper error returns for better control flow
- Add type safety with dedicated structs instead of interface{} maps
- Implement proper HTTP client with timeout configuration
- Fix goroutine closure variable capture issues
- Add input validation for URL format and board extraction
- Ensure destination directory exists before writing files
- Improve error messages with context and proper error wrapping
- Add resource cleanup with defer statements for HTTP responses
- Separate concerns into dedicated functions for better organization
- Use filepath.Join for cross-platform path handling
- Add comprehensive logging throughout the application

This refactoring makes the downloader more robust, maintainable, and production-ready while preserving all original functionality.
This commit is contained in:
Doc
2025-10-03 19:33:16 +02:00
parent 07224f9258
commit acaccb8cdd

253
main.go
View File

@@ -8,130 +8,203 @@ import (
"log"
"net/http"
"os"
"strconv"
"path/filepath"
"strings"
"sync"
"time"
)
func writeDataToDisk(dest *string, board string, verlog *bool, post map[string]interface{}, cdnresbody []byte) {
// Save the mediadata to file
err := os.WriteFile(*dest + "/" + board + "-" + strconv.Itoa(int(post["tim"].(float64))) + post["ext"].(string), cdnresbody, 0664 )
if err != nil {
log.Fatal(err)
} else if (*verlog) {
log.Println("Successfully wrote image/video data to disk")
}
// Global HTTP client with timeout
var httpClient = &http.Client{
Timeout: 30 * time.Second,
}
func getPostData(post map[string]interface{}, board string, verlog *bool) []byte {
// Check if post contains media (Video or Image)
if post["ext"] != nil {
cdnurlstr := "https://i.4cdn.org/" + board + "/" + strconv.Itoa(int(post["tim"].(float64))) + post["ext"].(string)
// Requesting the media from CDN
cdnres, err := http.Get(cdnurlstr)
if err != nil {
log.Fatal(err)
}
// Check if respons was valid
if cdnres.StatusCode > 299 {
log.Fatalf("Response failed with status code: %d and\n", cdnres.StatusCode)
} else if (*verlog) {
log.Println("Got image/video " + strconv.Itoa(int(post["tim"].(float64))) + post["ext"].(string) + " data")
}
// Read data form respons
cdnresbody, err := io.ReadAll(cdnres.Body)
cdnres.Body.Close()
if err != nil {
log.Fatal(err)
} else if (*verlog) {
log.Println("Successfully got data from responds body")
}
return cdnresbody
// PostData represents a 4chan post
type PostData struct {
No float64 `json:"no"`
Tim float64 `json:"tim"`
Ext string `json:"ext"`
}
} else if (*verlog) {
log.Println("Post " + strconv.Itoa(int(post["no"].(float64))) + " didn't include a image or video")
// ThreadData represents the JSON structure from 4chan API
type ThreadData struct {
Posts []PostData `json:"posts"`
}
func writeDataToDisk(dest string, board string, verlog bool, post PostData, cdnresbody []byte) error {
if cdnresbody == nil {
return fmt.Errorf("no data to write")
}
filename := fmt.Sprintf("%s-%d%s", board, int64(post.Tim), post.Ext)
filepath := filepath.Join(dest, filename)
err := os.WriteFile(filepath, cdnresbody, 0664)
if err != nil {
return fmt.Errorf("failed to write file %s: %w", filepath, err)
}
if verlog {
log.Printf("Successfully wrote image/video data to disk: %s", filename)
}
return nil
}
func main () {
// Setting up command flags
wdpath, _ := os.Getwd();
url := flag.String("u", "", "The url of the 4chan thread")
dest := flag.String("o", wdpath, "Target dir of the conntent")
verlog := flag.Bool("v", false, "Set logging to verbose")
flag.Parse()
// Check if flags are valid
if *url == "" {
fmt.Println("no thread URL provided")
fmt.Println("use the -u=<Thread URL> flag to provid URL")
os.Exit(1)
func getPostData(post PostData, board string, verlog bool) ([]byte, error) {
if post.Ext == "" {
if verlog {
log.Printf("Post %d didn't include an image or video", int64(post.No))
}
return nil, nil
}
// Getting the boardname
board := strings.Split(*url, "/")[3]
cdnurlstr := fmt.Sprintf("https://i.4cdn.org/%s/%d%s", board, int64(post.Tim), post.Ext)
// Get thread info from API
res, err := http.Get(*url + ".json")
cdnres, err := httpClient.Get(cdnurlstr)
if err != nil {
log.Fatal(err)
} else {
return nil, fmt.Errorf("failed to fetch media from %s: %w", cdnurlstr, err)
}
defer cdnres.Body.Close()
if cdnres.StatusCode > 299 {
return nil, fmt.Errorf("response failed with status code: %d for %s", cdnres.StatusCode, cdnurlstr)
}
if verlog {
log.Printf("Got image/video %d%s data", int64(post.Tim), post.Ext)
}
cdnresbody, err := io.ReadAll(cdnres.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response body: %w", err)
}
if verlog {
log.Println("Successfully got data from response body")
}
return cdnresbody, nil
}
func validateURL(url string) error {
if url == "" {
return fmt.Errorf("no thread URL provided")
}
if !strings.HasPrefix(url, "https://") && !strings.HasPrefix(url, "http://") {
return fmt.Errorf("invalid URL format")
}
parts := strings.Split(url, "/")
if len(parts) < 4 {
return fmt.Errorf("invalid thread URL format")
}
return nil
}
func extractBoard(url string) string {
parts := strings.Split(url, "/")
if len(parts) >= 4 {
return parts[3]
}
return ""
}
func fetchThreadData(url string, verlog bool) (*ThreadData, error) {
apiURL := url + ".json"
res, err := httpClient.Get(apiURL)
if err != nil {
return nil, fmt.Errorf("failed to fetch thread data from %s: %w", apiURL, err)
}
defer res.Body.Close()
if res.StatusCode > 299 {
return nil, fmt.Errorf("API response failed with status code: %d", res.StatusCode)
}
if verlog {
log.Println("Got thread data")
}
// Check if API response is valid
if res.StatusCode > 299 {
log.Fatalf("Response failed with status code: %d and\n", res.StatusCode)
} else if (*verlog) {
log.Println("API response was ok")
}
//Geting the data from the response
resbody, err := io.ReadAll(res.Body)
res.Body.Close()
if err != nil {
log.Fatal(err)
} else if (*verlog) {
return nil, fmt.Errorf("failed to read API response body: %w", err)
}
if verlog {
log.Println("Got body of API response")
}
// Var to save the JSON data
var jdata map[string]interface{}
//Unmarshaling the API JSON respons
var jdata ThreadData
if err := json.Unmarshal(resbody, &jdata); err != nil {
log.Fatalln(err)
} else if (*verlog) {
log.Println("Unmarsheled API responsebody")
return nil, fmt.Errorf("failed to unmarshal API response: %w", err)
}
if verlog {
log.Println("Unmarshaled API response body")
}
return &jdata, nil
}
func main() {
// Setting up command flags
wdpath, err := os.Getwd()
if err != nil {
log.Fatalf("Failed to get working directory: %v", err)
}
url := flag.String("u", "", "The url of the 4chan thread")
dest := flag.String("o", wdpath, "Target dir of the content")
verlog := flag.Bool("v", false, "Set logging to verbose")
flag.Parse()
// Validate flags
if err := validateURL(*url); err != nil {
log.Fatalf("Invalid URL: %v", err)
}
// Getting the boardname
board := extractBoard(*url)
if board == "" {
log.Fatal("Failed to extract board name from URL")
}
// Fetch thread data
jdata, err := fetchThreadData(*url, *verlog)
if err != nil {
log.Fatalf("Failed to fetch thread data: %v", err)
}
// Ensure destination directory exists
if err := os.MkdirAll(*dest, 0755); err != nil {
log.Fatalf("Failed to create destination directory: %v", err)
}
var wg sync.WaitGroup
// Iterating the posts from JSON data
for _, v := range jdata["posts"].([]interface{}) {
post := v.(map[string]interface{})
for _, post := range jdata.Posts {
wg.Add(1)
go func() {
go func(post PostData) {
defer wg.Done()
if postdata := getPostData(post, board, verlog); postdata != nil {
writeDataToDisk(dest, board, verlog, post, postdata)
if postdata, err := getPostData(post, board, *verlog); err != nil {
log.Printf("Error processing post %d: %v", int64(post.No), err)
return
} else if postdata != nil {
if err := writeDataToDisk(*dest, board, *verlog, post, postdata); err != nil {
log.Printf("Error writing post %d to disk: %v", int64(post.No), err)
}
}
}()
}(post)
}
wg.Wait()
log.Println("DONE!!!")
}