Compare commits

..

7 Commits

Author SHA1 Message Date
Doc
73a9d7addd README.md aktualisiert 2023-12-26 17:37:54 +01:00
Doc
b4c61ce848 Added dependency instructions to readme 2023-12-17 11:38:28 +01:00
Doc
a7d8ef8b21 Removed unnecessery imports 2023-12-17 11:34:02 +01:00
Doc
68560d63cd Improved console output with more information 2023-12-17 11:31:39 +01:00
Doc
4dbcafa877 Added board name to filename 2023-12-17 11:24:51 +01:00
Doc
36865b6d16 Code cleanup 2023-12-17 11:23:21 +01:00
Doc
622d96324a Added support for webm videos 2023-12-17 11:21:18 +01:00
4 changed files with 34 additions and 25 deletions

18
README.md Normal file
View File

@@ -0,0 +1,18 @@
# Scraperchan
A simple program to scrape 4Chan Threads for Images and Videos
Install dependencies:
```shell
git clone https://git.protron.dev/Doc/scraperchan.git
cd scraperchan
pip install -r requirements.txt
```
Usage:
```shell
python scraperchan -u <Thread URL> -o <Local target directory>
```

View File

@@ -1,9 +0,0 @@
# Scraperchan
A simple program to scrape 4Chan Threads for Images (videos aren't supporter yet)
usage:
```shell
python scraperchan -u <Thread URL> -o <Local target directory>
```

View File

@@ -1,4 +1,3 @@
requests requests
argparse argparse
Pillow Pillow
ffmpeg-python

View File

@@ -1,4 +1,4 @@
import requests, json, time, argparse, sys, os, ffmpeg import requests, json, time, argparse, sys, os
from PIL import Image from PIL import Image
from io import BytesIO from io import BytesIO
@@ -12,7 +12,7 @@ args = parser.parse_args()
r = requests.get(args.u + ".json") r = requests.get(args.u + ".json")
print("Got main request") print("Got main request from " + args.u + ".json" )
rjson = r.json() rjson = r.json()
@@ -22,7 +22,7 @@ for i in rjson['posts']:
if "ext" in i: if "ext" in i:
ir = requests.get("https://i.4cdn.org/" + board + "/" + str(i['tim']) + i['ext']) ir = requests.get("https://i.4cdn.org/" + board + "/" + str(i['tim']) + i['ext'])
print("Got post request") print("Got post request for: " + board + "/" + str(i['tim']) + i['ext'])
bstream = BytesIO(ir.content) bstream = BytesIO(ir.content)
@@ -30,23 +30,24 @@ for i in rjson['posts']:
ir = requests.get("https://i.4cdn.org/" + board + "/" + str(i['tim']) + i['ext']) ir = requests.get("https://i.4cdn.org/" + board + "/" + str(i['tim']) + i['ext'])
im = Image.open(bstream) im = Image.open(bstream)
print("Converted bytes to image") print("Converted bytes to image")
im.save(args.o + "/" + str(i['tim']) + i['ext']) im.save(args.o + "/" + board + "-" + str(i['tim']) + i['ext'])
print("Saved image") print("Saved image: " + board + "-" + str(i['tim']) + i['ext'])
except KeyboardInterrupt: except KeyboardInterrupt:
print('Closing') print('Closing')
try: try:
sys.exit(130) sys.exit(130)
except SystemExit: except SystemExit:
os._exit(130) os._exit(130)
except: except:
print("Cant convert data to image maybe not an image format")
print("NO:" + str(i['no']))
"""
try: try:
ffmpeg.input(bstream).output(args.o + "/" + str(i['tim']) + ".mp4").run() with open(args.o + "/" + board + str(i['tim']) + i['ext'], 'wb') as f:
print("Saved video") f.write(bstream.getbuffer())
except: f.close()
print("Cant convert to Video") print("Saved video: " + board + "-" + str(i['tim']) + i['ext'])
""" except Exception as err:
print("Cant convert data to image or video maybe not an image or video format")
print("NO: " + str(i['no']))
print(err)
time.sleep(1)
print("Done!!") print("Done!!")