Add files

This commit is contained in:
BBaoVanC 2022-05-21 16:12:25 -05:00
parent 9625c7f66a
commit 5169aa101a
3 changed files with 147 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
.env
data/

79
docker-compose.yml Normal file
View File

@ -0,0 +1,79 @@
version: "3.8"
services:
archivebox:
image: archivebox/archivebox:master
command: server --quick-init 0.0.0.0:8000
ports:
- 127.0.0.1:90:8000
environment:
- ALLOWED_HOSTS=*
- MEDIA_MAX_SIZE=750m
- SEARCH_BACKEND_ENGINE=sonic # uncomment these if you enable sonic below
- SEARCH_BACKEND_HOST_NAME=sonic
- SEARCH_BACKEND_PASSWORD
volumes:
- ./data:/data
# - ./archivebox:/app/archivebox # for developers working on archivebox
# To run the Sonic full-text search backend, first download the config file to sonic.cfg
# curl -O https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/etc/sonic.cfg
# after starting, backfill any existing Snapshots into the index: docker-compose run archivebox update --index-only
sonic:
image: valeriansaliou/sonic:v1.3.0
# expose:
# - 1491
environment:
- SEARCH_BACKEND_PASSWORD
volumes:
- ./sonic.cfg:/etc/sonic.cfg:ro
- ./data/sonic:/var/lib/sonic/store
### Optional Addons: tweak these examples as needed for your specific use case
# Example: Run scheduled imports in a docker instead of using cron on the
# host machine, add tasks and see more info with archivebox schedule --help
# scheduler:
# image: archivebox/archivebox:latest
# command: schedule --foreground --every=day --depth=1 'https://getpocket.com/users/USERNAME/feed/all'
# environment:
# - USE_COLOR=True
# - SHOW_PROGRESS=False
# volumes:
# - ./data:/data
# Example: Put Nginx in front of the ArchiveBox server for SSL termination
# nginx:
# image: nginx:alpine
# ports:
# - 443:443
# - 80:80
# volumes:
# - ./etc/nginx/nginx.conf:/etc/nginx/nginx.conf
# - ./data:/var/www
# Example: run all your ArchiveBox traffic through a WireGuard VPN tunnel
# wireguard:
# image: linuxserver/wireguard
# network_mode: 'service:archivebox'
# cap_add:
# - NET_ADMIN
# - SYS_MODULE
# sysctls:
# - net.ipv4.conf.all.rp_filter=2
# - net.ipv4.conf.all.src_valid_mark=1
# volumes:
# - /lib/modules:/lib/modules
# - ./wireguard.conf:/config/wg0.conf:ro
# Example: Run PYWB in parallel and auto-import WARCs from ArchiveBox
# pywb:
# image: webrecorder/pywb:latest
# entrypoint: /bin/sh 'wb-manager add default /archivebox/archive/*/warc/*.warc.gz; wayback --proxy;'
# environment:
# - INIT_COLLECTION=archivebox
# ports:
# - 8080:8080
# volumes:
# ./data:/archivebox
# ./data/wayback:/webarchive

66
sonic.cfg Normal file
View File

@ -0,0 +1,66 @@
# Sonic
# Fast, lightweight and schema-less search backend
# Configuration file
# Example: https://github.com/valeriansaliou/sonic/blob/master/config.cfg
[server]
log_level = "warn"
[channel]
inet = "0.0.0.0:1491"
tcp_timeout = 300
auth_password = "${env.SEARCH_BACKEND_PASSWORD}"
[channel.search]
query_limit_default = 65535
query_limit_maximum = 65535
query_alternates_try = 10
suggest_limit_default = 5
suggest_limit_maximum = 20
[store]
[store.kv]
path = "/var/lib/sonic/store/kv/"
retain_word_objects = 100000
[store.kv.pool]
inactive_after = 1800
[store.kv.database]
flush_after = 900
compress = true
parallelism = 2
max_files = 100
max_compactions = 1
max_flushes = 1
write_buffer = 16384
write_ahead_log = true
[store.fst]
path = "/var/lib/sonic/store/fst/"
[store.fst.pool]
inactive_after = 300
[store.fst.graph]
consolidate_after = 180
max_size = 2048
max_words = 250000