Compare commits
7 Commits
b1c84dec24
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 73a9d7addd | |||
| b4c61ce848 | |||
| a7d8ef8b21 | |||
| 68560d63cd | |||
| 4dbcafa877 | |||
| 36865b6d16 | |||
| 622d96324a |
18
README.md
Normal file
18
README.md
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# Scraperchan
|
||||||
|
A simple program to scrape 4Chan Threads for Images and Videos
|
||||||
|
|
||||||
|
Install dependencies:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
git clone https://git.protron.dev/Doc/scraperchan.git
|
||||||
|
|
||||||
|
cd scraperchan
|
||||||
|
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
python scraperchan -u <Thread URL> -o <Local target directory>
|
||||||
|
```
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
# Scraperchan
|
|
||||||
A simple program to scrape 4Chan Threads for Images (videos aren't supporter yet)
|
|
||||||
|
|
||||||
usage:
|
|
||||||
|
|
||||||
```shell
|
|
||||||
python scraperchan -u <Thread URL> -o <Local target directory>
|
|
||||||
|
|
||||||
```
|
|
||||||
@@ -1,4 +1,3 @@
|
|||||||
requests
|
requests
|
||||||
argparse
|
argparse
|
||||||
Pillow
|
Pillow
|
||||||
ffmpeg-python
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import requests, json, time, argparse, sys, os, ffmpeg
|
import requests, json, time, argparse, sys, os
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
@@ -12,7 +12,7 @@ args = parser.parse_args()
|
|||||||
|
|
||||||
r = requests.get(args.u + ".json")
|
r = requests.get(args.u + ".json")
|
||||||
|
|
||||||
print("Got main request")
|
print("Got main request from " + args.u + ".json" )
|
||||||
|
|
||||||
rjson = r.json()
|
rjson = r.json()
|
||||||
|
|
||||||
@@ -22,7 +22,7 @@ for i in rjson['posts']:
|
|||||||
if "ext" in i:
|
if "ext" in i:
|
||||||
ir = requests.get("https://i.4cdn.org/" + board + "/" + str(i['tim']) + i['ext'])
|
ir = requests.get("https://i.4cdn.org/" + board + "/" + str(i['tim']) + i['ext'])
|
||||||
|
|
||||||
print("Got post request")
|
print("Got post request for: " + board + "/" + str(i['tim']) + i['ext'])
|
||||||
|
|
||||||
bstream = BytesIO(ir.content)
|
bstream = BytesIO(ir.content)
|
||||||
|
|
||||||
@@ -30,23 +30,24 @@ for i in rjson['posts']:
|
|||||||
ir = requests.get("https://i.4cdn.org/" + board + "/" + str(i['tim']) + i['ext'])
|
ir = requests.get("https://i.4cdn.org/" + board + "/" + str(i['tim']) + i['ext'])
|
||||||
im = Image.open(bstream)
|
im = Image.open(bstream)
|
||||||
print("Converted bytes to image")
|
print("Converted bytes to image")
|
||||||
im.save(args.o + "/" + str(i['tim']) + i['ext'])
|
im.save(args.o + "/" + board + "-" + str(i['tim']) + i['ext'])
|
||||||
print("Saved image")
|
print("Saved image: " + board + "-" + str(i['tim']) + i['ext'])
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print('Closing')
|
print('Closing')
|
||||||
try:
|
try:
|
||||||
sys.exit(130)
|
sys.exit(130)
|
||||||
except SystemExit:
|
except SystemExit:
|
||||||
os._exit(130)
|
os._exit(130)
|
||||||
except:
|
except:
|
||||||
print("Cant convert data to image maybe not an image format")
|
|
||||||
print("NO:" + str(i['no']))
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
ffmpeg.input(bstream).output(args.o + "/" + str(i['tim']) + ".mp4").run()
|
with open(args.o + "/" + board + str(i['tim']) + i['ext'], 'wb') as f:
|
||||||
print("Saved video")
|
f.write(bstream.getbuffer())
|
||||||
except:
|
f.close()
|
||||||
print("Cant convert to Video")
|
print("Saved video: " + board + "-" + str(i['tim']) + i['ext'])
|
||||||
"""
|
except Exception as err:
|
||||||
|
print("Cant convert data to image or video maybe not an image or video format")
|
||||||
|
print("NO: " + str(i['no']))
|
||||||
|
print(err)
|
||||||
|
|
||||||
|
time.sleep(1)
|
||||||
print("Done!!")
|
print("Done!!")
|
||||||
|
|||||||
Reference in New Issue
Block a user