Compare commits

..

7 Commits

Author SHA1 Message Date
Doc
73a9d7addd README.md aktualisiert 2023-12-26 17:37:54 +01:00
Doc
b4c61ce848 Added dependency instructions to readme 2023-12-17 11:38:28 +01:00
Doc
a7d8ef8b21 Removed unnecessery imports 2023-12-17 11:34:02 +01:00
Doc
68560d63cd Improved console output with more information 2023-12-17 11:31:39 +01:00
Doc
4dbcafa877 Added board name to filename 2023-12-17 11:24:51 +01:00
Doc
36865b6d16 Code cleanup 2023-12-17 11:23:21 +01:00
Doc
622d96324a Added support for webm videos 2023-12-17 11:21:18 +01:00
4 changed files with 34 additions and 25 deletions

18
README.md Normal file
View File

@@ -0,0 +1,18 @@
# Scraperchan
A simple program to scrape 4Chan Threads for Images and Videos
Install dependencies:
```shell
git clone https://git.protron.dev/Doc/scraperchan.git
cd scraperchan
pip install -r requirements.txt
```
Usage:
```shell
python scraperchan -u <Thread URL> -o <Local target directory>
```

View File

@@ -1,9 +0,0 @@
# Scraperchan
A simple program to scrape 4Chan Threads for Images (videos aren't supporter yet)
usage:
```shell
python scraperchan -u <Thread URL> -o <Local target directory>
```

View File

@@ -1,4 +1,3 @@
requests
argparse
Pillow
ffmpeg-python

View File

@@ -1,4 +1,4 @@
import requests, json, time, argparse, sys, os, ffmpeg
import requests, json, time, argparse, sys, os
from PIL import Image
from io import BytesIO
@@ -12,7 +12,7 @@ args = parser.parse_args()
r = requests.get(args.u + ".json")
print("Got main request")
print("Got main request from " + args.u + ".json" )
rjson = r.json()
@@ -22,7 +22,7 @@ for i in rjson['posts']:
if "ext" in i:
ir = requests.get("https://i.4cdn.org/" + board + "/" + str(i['tim']) + i['ext'])
print("Got post request")
print("Got post request for: " + board + "/" + str(i['tim']) + i['ext'])
bstream = BytesIO(ir.content)
@@ -30,23 +30,24 @@ for i in rjson['posts']:
ir = requests.get("https://i.4cdn.org/" + board + "/" + str(i['tim']) + i['ext'])
im = Image.open(bstream)
print("Converted bytes to image")
im.save(args.o + "/" + str(i['tim']) + i['ext'])
print("Saved image")
im.save(args.o + "/" + board + "-" + str(i['tim']) + i['ext'])
print("Saved image: " + board + "-" + str(i['tim']) + i['ext'])
except KeyboardInterrupt:
print('Closing')
try:
sys.exit(130)
except SystemExit:
os._exit(130)
except:
print("Cant convert data to image maybe not an image format")
print("NO:" + str(i['no']))
"""
except:
try:
ffmpeg.input(bstream).output(args.o + "/" + str(i['tim']) + ".mp4").run()
print("Saved video")
except:
print("Cant convert to Video")
"""
with open(args.o + "/" + board + str(i['tim']) + i['ext'], 'wb') as f:
f.write(bstream.getbuffer())
f.close()
print("Saved video: " + board + "-" + str(i['tim']) + i['ext'])
except Exception as err:
print("Cant convert data to image or video maybe not an image or video format")
print("NO: " + str(i['no']))
print(err)
time.sleep(1)
print("Done!!")