Initial commit
This commit is contained in:
167
cleanup_and_deploy.py
Normal file
167
cleanup_and_deploy.py
Normal file
@@ -0,0 +1,167 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Volledige cleanup en deploy van de wget-scrape:
|
||||
1. Verwijder WordPress ?p=... en ?replytocom=... rommel
|
||||
2. Hernoem assets: verwijder ?ver=... uit bestandsnaam
|
||||
3. Update HTML/CSS referenties
|
||||
4. Sync naar /var/www/correctvloerverwarming2/
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
SRC_DIR = Path("/home/anisy/projects/websites/correctvloerverwarming/correctvloerverwarming.nl")
|
||||
DST_DIR = Path("/var/www/correctvloerverwarming2")
|
||||
|
||||
# ─── Stap 1: Verwijder WordPress preview rommel ────────────────────────────────
|
||||
def delete_wp_junk():
|
||||
deleted = 0
|
||||
junk_patterns = [
|
||||
r"\?p=\d+", # WordPress post previews
|
||||
r"\?replytocom=", # Comment reply
|
||||
r"wp-json", # WordPress JSON API (niet nodig voor statische site)
|
||||
]
|
||||
for f in list(SRC_DIR.rglob("*")):
|
||||
name = f.name
|
||||
if any(re.search(pat, name) for pat in junk_patterns):
|
||||
if f.is_file():
|
||||
f.unlink()
|
||||
deleted += 1
|
||||
elif f.is_dir():
|
||||
shutil.rmtree(f)
|
||||
deleted += 1
|
||||
print(f" Verwijderd: {deleted} WordPress junk bestanden/folders")
|
||||
|
||||
# ─── Stap 2: Hernoem assets (verwijder ?ver=... / versie suffixen) ─────────────
|
||||
def rename_versioned_assets():
|
||||
renamed = 0
|
||||
ASSET_EXTS = {'.js', '.css', '.woff', '.woff2', '.ttf', '.eot', '.svg', '.png',
|
||||
'.jpg', '.jpeg', '.webp', '.gif', '.ico'}
|
||||
|
||||
for f in list(SRC_DIR.rglob("*")):
|
||||
if not f.is_file():
|
||||
continue
|
||||
name = f.name
|
||||
if '?' not in name:
|
||||
continue
|
||||
|
||||
# Haal het schone deel vóór '?' op
|
||||
clean_name = name.split('?')[0]
|
||||
ext = Path(clean_name).suffix.lower()
|
||||
|
||||
# Alleen assets hernoemen, niet HTML pagina's met ?p=...
|
||||
if ext not in ASSET_EXTS:
|
||||
continue
|
||||
|
||||
new_path = f.parent / clean_name
|
||||
if new_path.exists():
|
||||
f.unlink() # duplicate, verwijder de versioned kopie
|
||||
else:
|
||||
f.rename(new_path)
|
||||
renamed += 1
|
||||
|
||||
print(f" Hernoemd: {renamed} asset bestanden")
|
||||
|
||||
# ─── Stap 3: Fix referenties in HTML en CSS ────────────────────────────────────
|
||||
def fix_references():
|
||||
html_count = css_count = 0
|
||||
|
||||
for html_path in SRC_DIR.rglob("*.html"):
|
||||
try:
|
||||
content = html_path.read_text(encoding="utf-8", errors="ignore")
|
||||
original = content
|
||||
|
||||
# Verwijder ?ver=... en %3Fver=... uit asset URLs
|
||||
content = re.sub(
|
||||
r'((?:\.js|\.css|\.woff2?|\.ttf|\.eot|\.svg|\.png|\.jpg|\.jpeg|\.webp|\.gif|\.ico))(?:%3F|\?)[^"\'&\s>)]+',
|
||||
r'\1',
|
||||
content,
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
|
||||
if content != original:
|
||||
html_path.write_text(content, encoding="utf-8")
|
||||
html_count += 1
|
||||
except Exception as e:
|
||||
print(f" Fout HTML {html_path.name}: {e}")
|
||||
|
||||
for css_path in SRC_DIR.rglob("*.css"):
|
||||
try:
|
||||
content = css_path.read_text(encoding="utf-8", errors="ignore")
|
||||
original = content
|
||||
|
||||
content = re.sub(
|
||||
r'((?:\.js|\.css|\.woff2?|\.ttf|\.eot|\.svg|\.png|\.jpg|\.jpeg|\.webp|\.gif|\.ico))\?[^"\')\s;]+',
|
||||
r'\1',
|
||||
content,
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
|
||||
if content != original:
|
||||
css_path.write_text(content, encoding="utf-8")
|
||||
css_count += 1
|
||||
except Exception as e:
|
||||
print(f" Fout CSS {css_path.name}: {e}")
|
||||
|
||||
print(f" HTML bijgewerkt: {html_count} bestanden")
|
||||
print(f" CSS bijgewerkt: {css_count} bestanden")
|
||||
|
||||
# ─── Stap 4: Fix absolute URLs → relatieve paden ──────────────────────────────
|
||||
def fix_absolute_urls():
|
||||
DOMAINS = [
|
||||
"https://www.correctvloerverwarming.nl",
|
||||
"http://www.correctvloerverwarming.nl",
|
||||
"https://correctvloerverwarming.nl",
|
||||
"http://correctvloerverwarming.nl",
|
||||
]
|
||||
count = 0
|
||||
for html_path in SRC_DIR.rglob("*.html"):
|
||||
try:
|
||||
content = html_path.read_text(encoding="utf-8", errors="ignore")
|
||||
original = content
|
||||
depth = len(html_path.relative_to(SRC_DIR).parts) - 1
|
||||
prefix = "../" * depth if depth > 0 else "./"
|
||||
for domain in DOMAINS:
|
||||
content = content.replace(f'href="{domain}/', f'href="{prefix}')
|
||||
content = content.replace(f"href='{domain}/", f"href='{prefix}")
|
||||
content = content.replace(f'src="{domain}/', f'src="{prefix}')
|
||||
content = content.replace(f"src='{domain}/", f"src='{prefix}")
|
||||
content = content.replace(f'action="{domain}/', f'action="{prefix}')
|
||||
if content != original:
|
||||
html_path.write_text(content, encoding="utf-8")
|
||||
count += 1
|
||||
except Exception as e:
|
||||
print(f" Fout URL fix {html_path.name}: {e}")
|
||||
print(f" Absolute URLs gefixed: {count} HTML bestanden")
|
||||
|
||||
# ─── Stap 5: Sync naar webroot ─────────────────────────────────────────────────
|
||||
def sync_to_webroot():
|
||||
if DST_DIR.exists():
|
||||
shutil.rmtree(DST_DIR)
|
||||
shutil.copytree(SRC_DIR, DST_DIR)
|
||||
# Rechten instellen
|
||||
os.system(f"chown -R www-data:www-data {DST_DIR}")
|
||||
os.system(f"chmod -R 755 {DST_DIR}")
|
||||
total = sum(1 for _ in DST_DIR.rglob("*") if _.is_file())
|
||||
print(f" {total} bestanden gesynchroniseerd naar {DST_DIR}")
|
||||
|
||||
# ─── Hoofdprogramma ────────────────────────────────────────────────────────────
|
||||
if __name__ == "__main__":
|
||||
print("\n[1/5] WordPress junk verwijderen...")
|
||||
delete_wp_junk()
|
||||
|
||||
print("\n[2/5] Asset bestanden hernoemen...")
|
||||
rename_versioned_assets()
|
||||
|
||||
print("\n[3/5] Asset referenties in HTML/CSS fixen...")
|
||||
fix_references()
|
||||
|
||||
print("\n[4/5] Absolute URLs → relatieve paden...")
|
||||
fix_absolute_urls()
|
||||
|
||||
print("\n[5/5] Sync naar webroot...")
|
||||
sync_to_webroot()
|
||||
|
||||
print("\nKlaar! Site bereikbaar op https://correctvloerverwarming2.youztech.nl")
|
||||
Reference in New Issue
Block a user