Added a parralell.py
rodzic
2dc0432529
commit
8c87d47526
|
@ -0,0 +1,3 @@
|
|||
name: Dependency Funding Tool
|
||||
dependencies:
|
||||
"https://codeberg.org/developers/maintain-website-tool/raw/branch/main/dependencies.yaml" : 0.21
|
|
@ -0,0 +1,18 @@
|
|||
#!/bin/env python3
|
||||
import argparse
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(prog="dependency-funding-tool")
|
||||
parser.set-defaults(func=lambda args: parser.print_usage())
|
||||
|
||||
subparsers = parser.add_subparsers()
|
||||
|
||||
trace(subparsers)
|
||||
crawl(subparsers)
|
||||
|
||||
args = parser.parser_args()
|
||||
args.func(args)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,13 @@
|
|||
def crawl_urls(locations, fn):
|
||||
def crawler(unchecked, result):
|
||||
unchecked_, result_
|
||||
|
||||
with multiprocessing.Manager() as manager:
|
||||
unchecked = manager.dict({ location: None for location in locations })
|
||||
|
||||
result = manager.dict()
|
||||
|
||||
with multiprocessing.Pool(processes) as pool:
|
||||
pool.map(crawler, [(unchecked, result) for _ in range(processes)])
|
||||
|
||||
return result
|
|
@ -0,0 +1,29 @@
|
|||
"""
|
||||
Execute a process in parallel where individual runs
|
||||
may spawn more runs.
|
||||
"""
|
||||
|
||||
def parallel(fn, start, processes=6):
|
||||
with multiprocessing.Manager() as manager:
|
||||
results = manager.dict()
|
||||
unchecked = manager.dict({key: None for key in start })
|
||||
|
||||
with multiprocessing.Pool(processes) as pool:
|
||||
pool.map(execute_process, [(results, unchecked, fn) for _ in range(processes)])
|
||||
|
||||
return dict(results)
|
||||
|
||||
def execute_process(args):
|
||||
results, unchecked, fn = args
|
||||
|
||||
while len(unchecked) != 0:
|
||||
key, _ = unchecked.popitem()
|
||||
|
||||
try:
|
||||
results[key], unchecked_ = fn(key, results=results, unchecked=unchecked)
|
||||
|
||||
for k in unchecked_:
|
||||
if k not in results:
|
||||
unchecked[k] = None
|
||||
except Exception as e:
|
||||
continue
|
|
@ -0,0 +1,41 @@
|
|||
from .io import load_cfg, load_index
|
||||
from .parallel import parallel
|
||||
import requests
|
||||
|
||||
def trace(args):
|
||||
cfg = load_cfg(args.config)
|
||||
index = load_index(args.index)
|
||||
|
||||
start_projects = load_start_projects(args.start_projects)
|
||||
|
||||
projects = parallel(fetch_project, start_projects, processes=args.processes)
|
||||
|
||||
for project in start_projects:
|
||||
dependencies = dependencies_between(project, projects, processes=args.processes)
|
||||
|
||||
def dependencies_between(start_project, projects, processes=6):
|
||||
dependencies = {}
|
||||
for project in projects:
|
||||
paths = paths_between(start_project, project, projects, processes=processes)
|
||||
|
||||
dependencies[project] = sum([product(path) for path in paths])/len(paths)
|
||||
|
||||
def paths_between(start, end, projects, processes = 6):
|
||||
paths = []
|
||||
|
||||
if start == end:
|
||||
return paths
|
||||
else:
|
||||
return start,
|
||||
|
||||
|
||||
def product(xs):
|
||||
value = 1
|
||||
for x in xs:
|
||||
value *= x
|
||||
return value
|
||||
|
||||
def fetch_project(url):
|
||||
project = requests.get(url).json()
|
||||
|
||||
return project, project["dependencies"].keys()
|
Ładowanie…
Reference in New Issue