Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| b2c1723313 | |||
| acaccb8cdd |
31
README.md
31
README.md
@@ -1,25 +1,38 @@
|
||||
# scrapychan
|
||||
|
||||
## Description
|
||||
scrapychan is a go rewrite of a original [scraperchan](https://git.protron.dev/Doc/scraperchan) that i wrote in Python. There was no reason to rewrite the original program but atleast it is faster and i learned some go.
|
||||
scrapychan is a Go rewrite of the original [scraperchan](https://git.protron.dev/Doc/scraperchan) that I wrote in Python. There was no reason to rewrite the original program, but at least it is faster and I learned some Go.
|
||||
|
||||
## Install
|
||||
### Build from source
|
||||
|
||||
### Build from Source
|
||||
#### Dependencies
|
||||
You just need to have [go](https://go.dev/doc/install) installed
|
||||
You just need to have [Go](https://go.dev/doc/install) installed.
|
||||
|
||||
#### Build
|
||||
```
|
||||
```bash
|
||||
git clone https://git.protron.dev/Doc/scrapychan
|
||||
cd scrapychan
|
||||
go build
|
||||
go install
|
||||
```
|
||||
|
||||
### Prebuild binaries
|
||||
or use the binaries provided in the [release section](https://git.protron.dev/Doc/Scrapychan/releases)
|
||||
### Pre-built Binaries
|
||||
Alternatively, use the binaries provided in the [release section](https://git.protron.dev/Doc/Scrapychan/releases)
|
||||
|
||||
## How to use
|
||||
```
|
||||
scrapychan -u=<Thread URL> -o=<Destinationpath of media> -v=<true or false to enable verbose logging>
|
||||
## Example Usage
|
||||
```bash
|
||||
# Download images from a thread to current directory with verbose logging
|
||||
scrapychan -u="https://boards.4chan.org/b/thread/123456789" -v=true
|
||||
|
||||
# Download images from a thread to specific directory
|
||||
scrapychan -u="https://boards.4chan.org/g/thread/987654321" -o="/home/user/4chan_images"
|
||||
```
|
||||
|
||||
## Features
|
||||
- Fast concurrent downloading of images and videos
|
||||
- Support for 4chan's API
|
||||
- Verbose logging option
|
||||
- Cross-platform compatibility
|
||||
- Error handling and recovery
|
||||
- Automatic directory creation
|
||||
253
main.go
253
main.go
@@ -8,130 +8,203 @@ import (
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"strconv"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
func writeDataToDisk(dest *string, board string, verlog *bool, post map[string]interface{}, cdnresbody []byte) {
|
||||
// Save the mediadata to file
|
||||
err := os.WriteFile(*dest + "/" + board + "-" + strconv.Itoa(int(post["tim"].(float64))) + post["ext"].(string), cdnresbody, 0664 )
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
} else if (*verlog) {
|
||||
log.Println("Successfully wrote image/video data to disk")
|
||||
}
|
||||
|
||||
// Global HTTP client with timeout
|
||||
var httpClient = &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
}
|
||||
|
||||
func getPostData(post map[string]interface{}, board string, verlog *bool) []byte {
|
||||
// Check if post contains media (Video or Image)
|
||||
if post["ext"] != nil {
|
||||
cdnurlstr := "https://i.4cdn.org/" + board + "/" + strconv.Itoa(int(post["tim"].(float64))) + post["ext"].(string)
|
||||
|
||||
// Requesting the media from CDN
|
||||
cdnres, err := http.Get(cdnurlstr)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// Check if respons was valid
|
||||
if cdnres.StatusCode > 299 {
|
||||
log.Fatalf("Response failed with status code: %d and\n", cdnres.StatusCode)
|
||||
} else if (*verlog) {
|
||||
log.Println("Got image/video " + strconv.Itoa(int(post["tim"].(float64))) + post["ext"].(string) + " data")
|
||||
}
|
||||
|
||||
// Read data form respons
|
||||
cdnresbody, err := io.ReadAll(cdnres.Body)
|
||||
cdnres.Body.Close()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
} else if (*verlog) {
|
||||
log.Println("Successfully got data from responds body")
|
||||
}
|
||||
return cdnresbody
|
||||
// PostData represents a 4chan post
|
||||
type PostData struct {
|
||||
No float64 `json:"no"`
|
||||
Tim float64 `json:"tim"`
|
||||
Ext string `json:"ext"`
|
||||
}
|
||||
|
||||
} else if (*verlog) {
|
||||
log.Println("Post " + strconv.Itoa(int(post["no"].(float64))) + " didn't include a image or video")
|
||||
// ThreadData represents the JSON structure from 4chan API
|
||||
type ThreadData struct {
|
||||
Posts []PostData `json:"posts"`
|
||||
}
|
||||
|
||||
func writeDataToDisk(dest string, board string, verlog bool, post PostData, cdnresbody []byte) error {
|
||||
if cdnresbody == nil {
|
||||
return fmt.Errorf("no data to write")
|
||||
}
|
||||
|
||||
filename := fmt.Sprintf("%s-%d%s", board, int64(post.Tim), post.Ext)
|
||||
filepath := filepath.Join(dest, filename)
|
||||
|
||||
err := os.WriteFile(filepath, cdnresbody, 0664)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write file %s: %w", filepath, err)
|
||||
}
|
||||
|
||||
if verlog {
|
||||
log.Printf("Successfully wrote image/video data to disk: %s", filename)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
func main () {
|
||||
// Setting up command flags
|
||||
wdpath, _ := os.Getwd();
|
||||
|
||||
url := flag.String("u", "", "The url of the 4chan thread")
|
||||
|
||||
dest := flag.String("o", wdpath, "Target dir of the conntent")
|
||||
|
||||
verlog := flag.Bool("v", false, "Set logging to verbose")
|
||||
|
||||
flag.Parse()
|
||||
|
||||
// Check if flags are valid
|
||||
if *url == "" {
|
||||
fmt.Println("no thread URL provided")
|
||||
fmt.Println("use the -u=<Thread URL> flag to provid URL")
|
||||
os.Exit(1)
|
||||
func getPostData(post PostData, board string, verlog bool) ([]byte, error) {
|
||||
if post.Ext == "" {
|
||||
if verlog {
|
||||
log.Printf("Post %d didn't include an image or video", int64(post.No))
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Getting the boardname
|
||||
board := strings.Split(*url, "/")[3]
|
||||
cdnurlstr := fmt.Sprintf("https://i.4cdn.org/%s/%d%s", board, int64(post.Tim), post.Ext)
|
||||
|
||||
// Get thread info from API
|
||||
res, err := http.Get(*url + ".json")
|
||||
cdnres, err := httpClient.Get(cdnurlstr)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
} else {
|
||||
return nil, fmt.Errorf("failed to fetch media from %s: %w", cdnurlstr, err)
|
||||
}
|
||||
defer cdnres.Body.Close()
|
||||
|
||||
if cdnres.StatusCode > 299 {
|
||||
return nil, fmt.Errorf("response failed with status code: %d for %s", cdnres.StatusCode, cdnurlstr)
|
||||
}
|
||||
|
||||
if verlog {
|
||||
log.Printf("Got image/video %d%s data", int64(post.Tim), post.Ext)
|
||||
}
|
||||
|
||||
cdnresbody, err := io.ReadAll(cdnres.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response body: %w", err)
|
||||
}
|
||||
|
||||
if verlog {
|
||||
log.Println("Successfully got data from response body")
|
||||
}
|
||||
|
||||
return cdnresbody, nil
|
||||
}
|
||||
|
||||
func validateURL(url string) error {
|
||||
if url == "" {
|
||||
return fmt.Errorf("no thread URL provided")
|
||||
}
|
||||
|
||||
if !strings.HasPrefix(url, "https://") && !strings.HasPrefix(url, "http://") {
|
||||
return fmt.Errorf("invalid URL format")
|
||||
}
|
||||
|
||||
parts := strings.Split(url, "/")
|
||||
if len(parts) < 4 {
|
||||
return fmt.Errorf("invalid thread URL format")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func extractBoard(url string) string {
|
||||
parts := strings.Split(url, "/")
|
||||
if len(parts) >= 4 {
|
||||
return parts[3]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func fetchThreadData(url string, verlog bool) (*ThreadData, error) {
|
||||
apiURL := url + ".json"
|
||||
|
||||
res, err := httpClient.Get(apiURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch thread data from %s: %w", apiURL, err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
if res.StatusCode > 299 {
|
||||
return nil, fmt.Errorf("API response failed with status code: %d", res.StatusCode)
|
||||
}
|
||||
|
||||
if verlog {
|
||||
log.Println("Got thread data")
|
||||
}
|
||||
|
||||
// Check if API response is valid
|
||||
if res.StatusCode > 299 {
|
||||
log.Fatalf("Response failed with status code: %d and\n", res.StatusCode)
|
||||
} else if (*verlog) {
|
||||
log.Println("API response was ok")
|
||||
}
|
||||
|
||||
//Geting the data from the response
|
||||
resbody, err := io.ReadAll(res.Body)
|
||||
res.Body.Close()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
} else if (*verlog) {
|
||||
return nil, fmt.Errorf("failed to read API response body: %w", err)
|
||||
}
|
||||
|
||||
if verlog {
|
||||
log.Println("Got body of API response")
|
||||
}
|
||||
|
||||
// Var to save the JSON data
|
||||
var jdata map[string]interface{}
|
||||
|
||||
//Unmarshaling the API JSON respons
|
||||
var jdata ThreadData
|
||||
if err := json.Unmarshal(resbody, &jdata); err != nil {
|
||||
log.Fatalln(err)
|
||||
} else if (*verlog) {
|
||||
log.Println("Unmarsheled API responsebody")
|
||||
return nil, fmt.Errorf("failed to unmarshal API response: %w", err)
|
||||
}
|
||||
|
||||
if verlog {
|
||||
log.Println("Unmarshaled API response body")
|
||||
}
|
||||
|
||||
return &jdata, nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
// Setting up command flags
|
||||
wdpath, err := os.Getwd()
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to get working directory: %v", err)
|
||||
}
|
||||
|
||||
url := flag.String("u", "", "The url of the 4chan thread")
|
||||
dest := flag.String("o", wdpath, "Target dir of the content")
|
||||
verlog := flag.Bool("v", false, "Set logging to verbose")
|
||||
|
||||
flag.Parse()
|
||||
|
||||
// Validate flags
|
||||
if err := validateURL(*url); err != nil {
|
||||
log.Fatalf("Invalid URL: %v", err)
|
||||
}
|
||||
|
||||
// Getting the boardname
|
||||
board := extractBoard(*url)
|
||||
if board == "" {
|
||||
log.Fatal("Failed to extract board name from URL")
|
||||
}
|
||||
|
||||
// Fetch thread data
|
||||
jdata, err := fetchThreadData(*url, *verlog)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to fetch thread data: %v", err)
|
||||
}
|
||||
|
||||
// Ensure destination directory exists
|
||||
if err := os.MkdirAll(*dest, 0755); err != nil {
|
||||
log.Fatalf("Failed to create destination directory: %v", err)
|
||||
}
|
||||
|
||||
|
||||
var wg sync.WaitGroup
|
||||
|
||||
// Iterating the posts from JSON data
|
||||
for _, v := range jdata["posts"].([]interface{}) {
|
||||
post := v.(map[string]interface{})
|
||||
|
||||
for _, post := range jdata.Posts {
|
||||
wg.Add(1)
|
||||
|
||||
go func() {
|
||||
|
||||
go func(post PostData) {
|
||||
defer wg.Done()
|
||||
if postdata := getPostData(post, board, verlog); postdata != nil {
|
||||
writeDataToDisk(dest, board, verlog, post, postdata)
|
||||
|
||||
if postdata, err := getPostData(post, board, *verlog); err != nil {
|
||||
log.Printf("Error processing post %d: %v", int64(post.No), err)
|
||||
return
|
||||
} else if postdata != nil {
|
||||
if err := writeDataToDisk(*dest, board, *verlog, post, postdata); err != nil {
|
||||
log.Printf("Error writing post %d to disk: %v", int64(post.No), err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}(post)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
|
||||
log.Println("DONE!!!")
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user