Added a parralell.py

main
CSDUMMI 2022-04-30 16:16:31 +02:00
rodzic 2dc0432529
commit 8c87d47526
6 zmienionych plików z 104 dodań i 0 usunięć

Wyświetl plik

@ -0,0 +1,3 @@
name: Dependency Funding Tool
dependencies:
"https://codeberg.org/developers/maintain-website-tool/raw/branch/main/dependencies.yaml" : 0.21

0
requirements.txt 100644
Wyświetl plik

18
src/main.py 100644
Wyświetl plik

@ -0,0 +1,18 @@
#!/bin/env python3
import argparse
def main():
parser = argparse.ArgumentParser(prog="dependency-funding-tool")
parser.set-defaults(func=lambda args: parser.print_usage())
subparsers = parser.add_subparsers()
trace(subparsers)
crawl(subparsers)
args = parser.parser_args()
args.func(args)
if __name__ == "__main__":
main()

13
src/net.py 100644
Wyświetl plik

@ -0,0 +1,13 @@
def crawl_urls(locations, fn):
def crawler(unchecked, result):
unchecked_, result_
with multiprocessing.Manager() as manager:
unchecked = manager.dict({ location: None for location in locations })
result = manager.dict()
with multiprocessing.Pool(processes) as pool:
pool.map(crawler, [(unchecked, result) for _ in range(processes)])
return result

29
src/parallel.py 100644
Wyświetl plik

@ -0,0 +1,29 @@
"""
Execute a process in parallel where individual runs
may spawn more runs.
"""
def parallel(fn, start, processes=6):
with multiprocessing.Manager() as manager:
results = manager.dict()
unchecked = manager.dict({key: None for key in start })
with multiprocessing.Pool(processes) as pool:
pool.map(execute_process, [(results, unchecked, fn) for _ in range(processes)])
return dict(results)
def execute_process(args):
results, unchecked, fn = args
while len(unchecked) != 0:
key, _ = unchecked.popitem()
try:
results[key], unchecked_ = fn(key, results=results, unchecked=unchecked)
for k in unchecked_:
if k not in results:
unchecked[k] = None
except Exception as e:
continue

41
src/trace.py 100644
Wyświetl plik

@ -0,0 +1,41 @@
from .io import load_cfg, load_index
from .parallel import parallel
import requests
def trace(args):
cfg = load_cfg(args.config)
index = load_index(args.index)
start_projects = load_start_projects(args.start_projects)
projects = parallel(fetch_project, start_projects, processes=args.processes)
for project in start_projects:
dependencies = dependencies_between(project, projects, processes=args.processes)
def dependencies_between(start_project, projects, processes=6):
dependencies = {}
for project in projects:
paths = paths_between(start_project, project, projects, processes=processes)
dependencies[project] = sum([product(path) for path in paths])/len(paths)
def paths_between(start, end, projects, processes = 6):
paths = []
if start == end:
return paths
else:
return start,
def product(xs):
value = 1
for x in xs:
value *= x
return value
def fetch_project(url):
project = requests.get(url).json()
return project, project["dependencies"].keys()