First version of migration script

This commit is contained in:
Simon Wüllhorst 2015-12-22 22:59:50 +01:00
commit 88b136f0a3
4 changed files with 344 additions and 0 deletions

39
geocode.py Normal file
View file

@ -0,0 +1,39 @@
#!/usr/bin/python
# -*- coding: utf-8 -
#import time
from geopy.geocoders import Nominatim
from blitzdb import Document, FileBackend
class GeoAssign(Document):
pass
class Geocode:
def __init__(self, geocoderCache = True, printStatus = False):
self.printStatus = printStatus
self.geocoderCache = geocoderCache
if self.geocoderCache:
self.db = FileBackend('./geo-cache')
def getGeo(self, lon, lat):
if self.geocoderCache:
try:
nodeObj = self.db.get(GeoAssign,{'lat' : lat, 'lon' : lon})
nodeObj['cached'] = True
return nodeObj
except GeoAssign.DoesNotExist:
pass
if self.printStatus:
print('lon: '+str(lon)+', lat: '+str(lat)+' not in cache - start lookup at Nominatim-API')
geolocator = Nominatim()
location = geolocator.reverse([lat, lon], timeout=20)
if 'address' in location.raw:
location = location.raw['address']
nodeObj = GeoAssign({
'lat' : lat,
'lon' : lon,
'payload' : location
})
self.db.save(nodeObj)
self.db.commit()
nodeObj['cached'] = False
return nodeObj
else:
# got no results (i.e. coordinates are incorrect)
return None

107
graph.py Normal file
View file

@ -0,0 +1,107 @@
#!/usr/bin/python
# -*- coding: utf-8 -
#Imports:
import urllib
import json
from pprint import pprint
from node import Node
from geocode import Geocode
class Graph:
def __init__(self, nodesData, graphData):
self.coder = Geocode(geocoderCache = True, printStatus = True)
self.data = graphData
self.nodes = nodesData
self.nodes_list = {}
self.parseNodes()
self.parseLinks()
self.calculateStepsToVpn()
self.findMissingGeo()
def parseNodes(self):
for k,v in self.nodes['nodes'].iteritems():
lat, lon = self.getGeo(k)
node = Node(k, ipv6 = self.getPublicAddress(k), hostname = self.getHostname(k), isOnline = self.getOnlineState(k), lat=lat, lon=lon, coder = self.coder)
self.nodes_list[k] = node
def parseLinks(self):
link_nodes = self.data['batadv']['nodes']
for link in self.data['batadv']['links']:
if 'node_id' in link_nodes[link['source']].keys() and 'node_id' in link_nodes[link['target']].keys():#else it is a vpn link
self.setLinkBetween(link_nodes[link['source']]['node_id'], link_nodes[link['target']]['node_id'])
else:
self.setVpnLink(link['source'], link['target'])
def setLinkBetween(self, src, dst, stateOnline = True, lastSeen = None):
if src and dst:
self.nodes_list[src].links[dst] = {
'node' : self.nodes_list[dst],
'state_online' : stateOnline,
'last_seen' : lastSeen
}
self.nodes_list[dst].links[src] = {
'node' : self.nodes_list[src],
'state_online' : stateOnline,
'last_seen' : lastSeen
}
def setVpnLink(self, src, dst):
if 'node_id' not in self.data['batadv']['nodes'][src].keys():
if self.data['batadv']['nodes'][dst]['node_id']:
self.nodes_list[self.data['batadv']['nodes'][dst]['node_id']].stepsToVpn = 0
elif 'node_id' not in self.data['batadv']['nodes'][dst].keys():
if self.data['batadv']['nodes'][src]['node_id']:
self.nodes_list[self.data['batadv']['nodes'][src]['node_id']].stepsToVpn = 0
def calculateStepsToVpn(self):
for node in self.nodes_list.itervalues():
node.calculateStepsToVpn()
def findMissingGeo(self):
for node in self.nodes_list.itervalues():
node.findMissingGeo()
def getAllLevelXNodes(self, level, online = True):
zmap = {}
for k,v in self.nodes_list.iteritems():
if v.isOnline or online == False:
if v.stepsToVpn == level:
zmap[k] = v
return zmap
def getHostname(self,node_id):
return self.nodes['nodes'][node_id]['nodeinfo']['hostname']
def getGeo(self, node_id):
if 'location' in self.nodes['nodes'][node_id]['nodeinfo'] and 'latitude' in self.nodes['nodes'][node_id]['nodeinfo']['location'] and 'longitude' in self.nodes['nodes'][node_id]['nodeinfo']['location']:
return self.nodes['nodes'][node_id]['nodeinfo']['location']['latitude'], self.nodes['nodes'][node_id]['nodeinfo']['location']['longitude']
return None, None
def getPublicAddress(self,node_id):
if node_id in self.nodes['nodes']:
if 'addresses' in self.nodes['nodes'][node_id]['nodeinfo']['network']:
for address in self.nodes['nodes'][node_id]['nodeinfo']['network']['addresses']:
if address.startswith('2a03'):
return address
return None
def getOnlineState(self,node_id):
return self.nodes['nodes'][node_id]['flags']['online']
def getNodeCloudsIn(self, region):
results = {}
for k,v in self.getAllLevelXNodes(0).iteritems():
if v.geodata != None and v.isOnline == True:
if v.isInRegion(region):
results.update(v.getNodeCloud({}))
print "Result:",len(results), region
return results
def maxDepth(self):
maxDepth = 0
for v in self.nodes_list.itervalues():
if v.stepsToVpn > maxDepth:
maxDepth = v.stepsToVpn
return maxDepth+1

104
node.py Normal file
View file

@ -0,0 +1,104 @@
#!/usr/bin/python
# -*- coding: utf-8 -
from geocode import Geocode
class Node(object):
def __init__(self, nodeid, ipv6 = None, hostname = None, isOnline = False, lastSeen = None, lat = None, lon = None, coder = None):
self.coder = coder
if self.coder == None:
self.coder = Geocode(geocoderCache = True, printStatus = True)
self.links = {}
self.nodeid = nodeid
self.ipv6 = ipv6
self.hostname = hostname
self.stepsToVpn = -1
self.isOnline = isOnline
self.lastSeen = lastSeen
self._geo = None
self.geodata = None
if lat != None and lon != None:
self.geo = {
'lat' : lat,
'lon' : lon
}
def addLink(self,nodeid, node):
if not nodeid in self.links:
self.links[nodeid] = node
else:
print "link still exists"
def calculateStepsToVpn(self, trace = []):
if self.stepsToVpn != 0:#self.stepsToVpn == -1 doesn't work, cause the shortest path could be the path to a former trace member
own_trace = trace[:]#clone - trace for preventing loops in pathfinding in graph
own_trace.append(self.nodeid)
lowest = -1
current = -1
for k,v in self.links.iteritems():
if k not in own_trace:
current = v['node'].calculateStepsToVpn(own_trace)
if lowest == -1 or current < lowest:
lowest = current
if lowest > -1:
self.stepsToVpn = lowest+1
return self.stepsToVpn
def findMissingGeo(self, trace = []):
if self.geo == None:
own_trace = trace[:]
own_trace.append(self.nodeid)
geo = None
for k,v in self.links.iteritems():
if k not in own_trace:
geo = v['node'].findMissingGeo(own_trace)
if geo != None:
self.geo = geo.copy()
break
return geo
else:
return self.geo
def getNodeCloud(self, nodes = {}):
nodes[self.nodeid] = self
for k,v in self.links.iteritems():
if k not in nodes:
nodes = v['node'].getNodeCloud(nodes)
return nodes
def isInRegion(self, regions):
#AND and OR Conditions are possible
val = False
if self.geodata == None:
return False
for region in regions:
val = False
for k,v in region.iteritems():
if k in self.geodata and self.geodata[k] == v:
val = True
else:
val = False
if val:
return True
return val
@property
def geo(self):
return self._geo
@geo.setter
def geo(self, value):
self._geo = value
self.__get_geodata__()
def __get_geodata__(self):
if self.geo != None:
result = self.coder.getGeo(self.geo['lon'], self.geo['lat'])
if result:
self.geodata = result['payload']
if result['cached'] == False:
time.sleep(1)
else:
self['geodata'] = None

94
node_hierarchy.py Executable file
View file

@ -0,0 +1,94 @@
#!/usr/bin/python
# -*- coding: utf-8 -
#Imports:
import json
from graph import Graph
class NodeHierarchy:
def __init__(self, nodesFile, graphFile, dataPath = './', printStatus = False, targets = None):
self.printStatus = printStatus
self.targets = targets
self.nodesData = self.__getFile__(nodesFile)
self.graphData = self.__getFile__(graphFile)
self.dataPath = dataPath
self.graph = Graph(self.nodesData, self.graphData)
if self.targets == None:
self.writeConfigFiles(self.graph.nodes_list,"all")
else:
nodes = {}
for k,v in self.targets.iteritems():
nodes = self.graph.getNodeCloudsIn(v)
self.writeConfigFiles(nodes,k)
nodes = {}
def __getFile__(self, nodesFile):
if nodesFile.startswith('https://') or nodesFile.startswith('http://'):
if self.printStatus:
print "Download node.json from URL: " + nodesFile
resource = urllib.urlopen(nodesFile)
else:
if self.printStatus:
print "Open node.json file: " + nodesFile
resource = open(nodesFile)
data = json.loads(resource.read())
resource.close()
return data
def writeConfigFiles(self,nodes_level, name):
maxDepth = self.maxDepth(nodes_level)
for i in range(0,maxDepth):
content = 'geo $switch {\n\tdefault\t0;'
f = open(self.dataPath+'/'+name+'_node_level'+str(i),'w')
for node in nodes_level.itervalues():
if node.stepsToVpn == i:
if node.ipv6 and node.hostname:
content += '\n\t'+node.ipv6+'\t1;\t #'+node.hostname
#else:
# print node.nodeid
content += '\n}'
f.write(content.encode('utf8'))
f.close()
def maxDepth(self, nodes):
maxDepth = 0
for v in nodes.itervalues():
if v.stepsToVpn > maxDepth:
maxDepth = v.stepsToVpn
return maxDepth+1
targets = {
'muenster' : [
{'city' : u'Münster'},
{'county' : u'Münster'}
],
'kreis_warendorf' : [
{'county' : u'Kreis Warendorf'}
],
'kreis_coesfeld' : [
{'county' : u'Kreis Coesfeld'}
],
'kreis_steinfurt_west' : [
{'town' : u'48565'},
{'village' : u'Wettringen'},
{'town' : u'Ochtrup'},
{'village' : u'Metelen'},
{'town' : u'Horstmar'},
{'village' : u'Laer'},
{'village' : u'Nordwalde'},
{'village' : u'Altenberge'}
],
'kreis_steinfurt_ost' : [
{'town' : u'Emsdetten'},
{'town' : u'Neuenkirchen'},
{'town' : u'Rheine'},
{'town' : u'Greven'},
{'village' : u'Ladbergen'},
{'town' : u'Lengerich'},
{'town' : u'Tecklenburg'},
{'village' : u'Lienen'},
]
}
ds = NodeHierarchy(nodesFile = 'nodes.json', graphFile = 'graph.json', printStatus = True, dataPath = './results/', targets = targets)