ported vk scraper logic into lib

This commit is contained in:
Miguel Ramalho
2022-06-17 19:15:20 +02:00
parent b3c7ac8e5d
commit 965057619f
32 changed files with 1346 additions and 610 deletions

View File

@@ -1,175 +0,0 @@
"""
Run this script once after first creating your project from this template repo to personalize
it for own project.
This script is interactive and will prompt you for various inputs.
"""
from pathlib import Path
from typing import Generator, List, Tuple
import click
from click_help_colors import HelpColorsCommand
from rich import print
from rich.markdown import Markdown
from rich.prompt import Confirm
from rich.syntax import Syntax
from rich.traceback import install
install(show_locals=True, suppress=[click])
REPO_BASE = (Path(__file__).parent / "..").resolve()
FILES_TO_REMOVE = {
REPO_BASE / ".github" / "workflows" / "setup.yml",
REPO_BASE / "setup-requirements.txt",
REPO_BASE / "scripts" / "personalize.py",
}
PATHS_TO_IGNORE = {
REPO_BASE / "README.md",
REPO_BASE / ".git",
REPO_BASE / "docs" / "source" / "_static" / "favicon.ico",
}
GITIGNORE_LIST = [
line.strip()
for line in (REPO_BASE / ".gitignore").open().readlines()
if line.strip() and not line.startswith("#")
]
REPO_NAME_TO_REPLACE = "python-package-template"
BASE_URL_TO_REPLACE = "https://github.com/allenai/python-package-template"
@click.command(
cls=HelpColorsCommand,
help_options_color="green",
help_headers_color="yellow",
context_settings={"max_content_width": 115},
)
@click.option(
"--github-org",
prompt="GitHub organization or user (e.g. 'allenai')",
help="The name of your GitHub organization or user.",
)
@click.option(
"--github-repo",
prompt="GitHub repository (e.g. 'python-package-template')",
help="The name of your GitHub repository.",
)
@click.option(
"--package-name",
prompt="Python package name (e.g. 'my-package')",
help="The name of your Python package.",
)
@click.option(
"-y",
"--yes",
is_flag=True,
help="Run the script without prompting for a confirmation.",
default=False,
)
@click.option(
"--dry-run",
is_flag=True,
hidden=True,
default=False,
)
def main(
github_org: str, github_repo: str, package_name: str, yes: bool = False, dry_run: bool = False
):
repo_url = f"https://github.com/{github_org}/{github_repo}"
package_actual_name = package_name.replace("_", "-")
package_dir_name = package_name.replace("-", "_")
# Confirm before continuing.
print(f"Repository URL set to: [link={repo_url}]{repo_url}[/]")
print(f"Package name set to: [cyan]{package_actual_name}[/]")
if not yes:
yes = Confirm.ask("Is this correct?")
if not yes:
raise click.ClickException("Aborted, please run script again")
# Delete files that we don't need.
for path in FILES_TO_REMOVE:
assert path.is_file(), path
if not dry_run:
path.unlink()
else:
print(f"Removing {path}")
# Personalize remaining files.
replacements = [
(BASE_URL_TO_REPLACE, repo_url),
(REPO_NAME_TO_REPLACE, github_repo),
("my-package", package_actual_name),
("my_package", package_dir_name),
]
if dry_run:
for old, new in replacements:
print(f"Replacing '{old}' with '{new}'")
for path in iterfiles(REPO_BASE):
personalize_file(path, dry_run, replacements)
# Rename 'my_package' directory to `package_dir_name`.
if not dry_run:
(REPO_BASE / "my_package").replace(REPO_BASE / package_dir_name)
else:
print(f"Renaming 'my_package' directory to '{package_dir_name}'")
# Start with a fresh README.
readme_contents = f"""# {package_actual_name}\n"""
if not dry_run:
with open(REPO_BASE / "README.md", "w+t") as readme_file:
readme_file.write(readme_contents)
else:
print("Replacing README.md contents with:\n", Markdown(readme_contents))
install_example = Syntax("pip install -e '.[dev]'", "bash")
print(
"[green]\N{check mark} Success![/] You can now install your package locally in development mode with:\n",
install_example,
)
def iterfiles(dir: Path) -> Generator[Path, None, None]:
assert dir.is_dir()
for path in dir.iterdir():
if path in PATHS_TO_IGNORE:
continue
is_ignored_file = False
for gitignore_entry in GITIGNORE_LIST:
if path.relative_to(REPO_BASE).match(gitignore_entry):
is_ignored_file = True
break
if is_ignored_file:
continue
if path.is_dir():
yield from iterfiles(path)
else:
yield path
def personalize_file(path: Path, dry_run: bool, replacements: List[Tuple[str, str]]):
with path.open("r+t") as file:
filedata = file.read()
should_update: bool = False
for old, new in replacements:
if filedata.count(old):
should_update = True
filedata = filedata.replace(old, new)
if should_update:
if not dry_run:
with path.open("w+t") as file:
file.write(filedata)
else:
print(f"Updating {path}")
if __name__ == "__main__":
main()

View File

@@ -1,7 +1,7 @@
from datetime import datetime
from pathlib import Path
from my_package.version import VERSION
from vk_url_scraper.version import VERSION
def main():
@@ -27,7 +27,7 @@ def main():
lines.insert(insert_index, "\n")
lines.insert(
insert_index + 1,
f"## [v{VERSION}](https://github.com/allenai/python-package-template/releases/tag/v{VERSION}) - "
f"## [v{VERSION}](https://github.com/bellingcat/vk-url-scraper/releases/tag/v{VERSION}) - "
f"{datetime.now().strftime('%Y-%m-%d')}\n",
)

View File

@@ -2,7 +2,7 @@
set -e
TAG=$(python -c 'from my_package.version import VERSION; print("v" + VERSION)')
TAG=$(python -c 'from vk_url_scraper.version import VERSION; print("v" + VERSION)')
read -p "Creating new release for $TAG. Do you want to continue? [Y/n] " prompt