Faster bulk import

pull/68/head
Konstantin Gründger 2017-10-06 21:38:08 +02:00
rodzic 728beeb334
commit ce76a6bb2e
1 zmienionych plików z 17 dodań i 15 usunięć

Wyświetl plik

@ -1,4 +1,5 @@
import os import os
import re
from manager import Manager from manager import Manager
from ogn.commands.dbutils import session from ogn.commands.dbutils import session
@ -9,18 +10,19 @@ from ogn.utils import open_file
manager = Manager() manager = Manager()
PATTERN = '^.+\.txt\_(\d{4}\-\d{2}\-\d{2})(\.gz)?$'
@manager.command @manager.command
def convert_logfile(path, logfile='main.log', loglevel='INFO'): def convert_logfile(path, logfile='main.log', loglevel='INFO'):
"""Convert ogn logfiles to csv logfiles (one for aircraft beacons and one for receiver beacons) <arg: path>. Logfile name: blablabla.txt_YYYY-MM-DD.""" """Convert ogn logfiles to csv logfiles (one for aircraft beacons and one for receiver beacons) <arg: path>. Logfile name: blablabla.txt_YYYY-MM-DD."""
if os.path.isfile(path): if os.path.isfile(path):
print("Reading file: {}".format(path)) head, tail = os.path.split(path)
convert(path) convert(tail, path=head)
print("Finished") print("Finished")
elif os.path.isdir(path): elif os.path.isdir(path):
for filename in os.listdir(path): for filename in os.listdir(path):
print("Reading file: {}".format(filename))
convert(filename, path=path) convert(filename, path=path)
print("Finished") print("Finished")
else: else:
@ -28,15 +30,25 @@ def convert_logfile(path, logfile='main.log', loglevel='INFO'):
def convert(sourcefile, path=''): def convert(sourcefile, path=''):
import re
import csv import csv
import gzip import gzip
import datetime import datetime
match = re.search('^.+\.txt\_(\d{4}\-\d{2}\-\d{2})(\.gz)?$', sourcefile) match = re.search(PATTERN, sourcefile)
if match: if match:
reference_date_string = match.group(1) reference_date_string = match.group(1)
reference_date = datetime.datetime.strptime(reference_date_string, "%Y-%m-%d") reference_date = datetime.datetime.strptime(reference_date_string, "%Y-%m-%d")
aircraft_beacon_filename = os.path.join(path, 'aircraft_beacons.csv_' + reference_date_string + '.gz')
receiver_beacon_filename = os.path.join(path, 'receiver_beacons.csv_' + reference_date_string + '.gz')
if not os.path.exists(aircraft_beacon_filename) and not os.path.exists(receiver_beacon_filename):
print("Reading file: {}".format(sourcefile))
fout_ab = gzip.open(aircraft_beacon_filename, 'wt')
fout_rb = gzip.open(receiver_beacon_filename, 'wt')
else:
print("Output files for file {} already exists. Skipping".format(sourcefile))
return
else: else:
print("filename '{}' does not match pattern. Skipping".format(sourcefile)) print("filename '{}' does not match pattern. Skipping".format(sourcefile))
return return
@ -49,16 +61,6 @@ def convert(sourcefile, path=''):
total += 1 total += 1
fin.seek(0) fin.seek(0)
aircraft_beacon_filename = os.path.join(path, 'aircraft_beacons.csv_' + reference_date_string + '.gz')
receiver_beacon_filename = os.path.join(path, 'receiver_beacons.csv_' + reference_date_string + '.gz')
if not os.path.exists(aircraft_beacon_filename) and not os.path.exists(receiver_beacon_filename):
fout_ab = gzip.open(aircraft_beacon_filename, 'wt')
fout_rb = gzip.open(receiver_beacon_filename, 'wt')
else:
print("Output files already exists. Skipping")
return
aircraft_beacons = list() aircraft_beacons = list()
receiver_beacons = list() receiver_beacons = list()