Initial commit

This commit is contained in:
Ubuntu
2026-03-01 13:23:48 +00:00
commit b66c738e0b
260 changed files with 58871 additions and 0 deletions

167
cleanup_and_deploy.py Normal file
View File

@@ -0,0 +1,167 @@
#!/usr/bin/env python3
"""
Volledige cleanup en deploy van de wget-scrape:
1. Verwijder WordPress ?p=... en ?replytocom=... rommel
2. Hernoem assets: verwijder ?ver=... uit bestandsnaam
3. Update HTML/CSS referenties
4. Sync naar /var/www/correctvloerverwarming2/
"""
import os
import re
import shutil
from pathlib import Path
SRC_DIR = Path("/home/anisy/projects/websites/correctvloerverwarming/correctvloerverwarming.nl")
DST_DIR = Path("/var/www/correctvloerverwarming2")
# ─── Stap 1: Verwijder WordPress preview rommel ────────────────────────────────
def delete_wp_junk():
deleted = 0
junk_patterns = [
r"\?p=\d+", # WordPress post previews
r"\?replytocom=", # Comment reply
r"wp-json", # WordPress JSON API (niet nodig voor statische site)
]
for f in list(SRC_DIR.rglob("*")):
name = f.name
if any(re.search(pat, name) for pat in junk_patterns):
if f.is_file():
f.unlink()
deleted += 1
elif f.is_dir():
shutil.rmtree(f)
deleted += 1
print(f" Verwijderd: {deleted} WordPress junk bestanden/folders")
# ─── Stap 2: Hernoem assets (verwijder ?ver=... / versie suffixen) ─────────────
def rename_versioned_assets():
renamed = 0
ASSET_EXTS = {'.js', '.css', '.woff', '.woff2', '.ttf', '.eot', '.svg', '.png',
'.jpg', '.jpeg', '.webp', '.gif', '.ico'}
for f in list(SRC_DIR.rglob("*")):
if not f.is_file():
continue
name = f.name
if '?' not in name:
continue
# Haal het schone deel vóór '?' op
clean_name = name.split('?')[0]
ext = Path(clean_name).suffix.lower()
# Alleen assets hernoemen, niet HTML pagina's met ?p=...
if ext not in ASSET_EXTS:
continue
new_path = f.parent / clean_name
if new_path.exists():
f.unlink() # duplicate, verwijder de versioned kopie
else:
f.rename(new_path)
renamed += 1
print(f" Hernoemd: {renamed} asset bestanden")
# ─── Stap 3: Fix referenties in HTML en CSS ────────────────────────────────────
def fix_references():
html_count = css_count = 0
for html_path in SRC_DIR.rglob("*.html"):
try:
content = html_path.read_text(encoding="utf-8", errors="ignore")
original = content
# Verwijder ?ver=... en %3Fver=... uit asset URLs
content = re.sub(
r'((?:\.js|\.css|\.woff2?|\.ttf|\.eot|\.svg|\.png|\.jpg|\.jpeg|\.webp|\.gif|\.ico))(?:%3F|\?)[^"\'&\s>)]+',
r'\1',
content,
flags=re.IGNORECASE
)
if content != original:
html_path.write_text(content, encoding="utf-8")
html_count += 1
except Exception as e:
print(f" Fout HTML {html_path.name}: {e}")
for css_path in SRC_DIR.rglob("*.css"):
try:
content = css_path.read_text(encoding="utf-8", errors="ignore")
original = content
content = re.sub(
r'((?:\.js|\.css|\.woff2?|\.ttf|\.eot|\.svg|\.png|\.jpg|\.jpeg|\.webp|\.gif|\.ico))\?[^"\')\s;]+',
r'\1',
content,
flags=re.IGNORECASE
)
if content != original:
css_path.write_text(content, encoding="utf-8")
css_count += 1
except Exception as e:
print(f" Fout CSS {css_path.name}: {e}")
print(f" HTML bijgewerkt: {html_count} bestanden")
print(f" CSS bijgewerkt: {css_count} bestanden")
# ─── Stap 4: Fix absolute URLs → relatieve paden ──────────────────────────────
def fix_absolute_urls():
DOMAINS = [
"https://www.correctvloerverwarming.nl",
"http://www.correctvloerverwarming.nl",
"https://correctvloerverwarming.nl",
"http://correctvloerverwarming.nl",
]
count = 0
for html_path in SRC_DIR.rglob("*.html"):
try:
content = html_path.read_text(encoding="utf-8", errors="ignore")
original = content
depth = len(html_path.relative_to(SRC_DIR).parts) - 1
prefix = "../" * depth if depth > 0 else "./"
for domain in DOMAINS:
content = content.replace(f'href="{domain}/', f'href="{prefix}')
content = content.replace(f"href='{domain}/", f"href='{prefix}")
content = content.replace(f'src="{domain}/', f'src="{prefix}')
content = content.replace(f"src='{domain}/", f"src='{prefix}")
content = content.replace(f'action="{domain}/', f'action="{prefix}')
if content != original:
html_path.write_text(content, encoding="utf-8")
count += 1
except Exception as e:
print(f" Fout URL fix {html_path.name}: {e}")
print(f" Absolute URLs gefixed: {count} HTML bestanden")
# ─── Stap 5: Sync naar webroot ─────────────────────────────────────────────────
def sync_to_webroot():
if DST_DIR.exists():
shutil.rmtree(DST_DIR)
shutil.copytree(SRC_DIR, DST_DIR)
# Rechten instellen
os.system(f"chown -R www-data:www-data {DST_DIR}")
os.system(f"chmod -R 755 {DST_DIR}")
total = sum(1 for _ in DST_DIR.rglob("*") if _.is_file())
print(f" {total} bestanden gesynchroniseerd naar {DST_DIR}")
# ─── Hoofdprogramma ────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("\n[1/5] WordPress junk verwijderen...")
delete_wp_junk()
print("\n[2/5] Asset bestanden hernoemen...")
rename_versioned_assets()
print("\n[3/5] Asset referenties in HTML/CSS fixen...")
fix_references()
print("\n[4/5] Absolute URLs → relatieve paden...")
fix_absolute_urls()
print("\n[5/5] Sync naar webroot...")
sync_to_webroot()
print("\nKlaar! Site bereikbaar op https://correctvloerverwarming2.youztech.nl")