WikiProject Belgium/De Lijndata
WikiProject Belgium +/- | ||
Conventions | ||
Subprojects |
I'm going to describe the process of converting the data from De Lijn, public transport company for Flanders.
Preparation
All source code is licensed GPL v3. Hopefully it's useful as inspiration for others.
Software requirements
- Python v3
- PostgreSQL v9.x
- PostGIS v2.x
To repeat the steps below, it will be necessary to make a request to De Lijn for getting access to their data. The login for their FTP goes into credentials.txt. Username on the first line, password on the second line. The script expects it in the same directory the script is run from.
Creation of the database and adding data to it
The data consists of a zip file which is updated regularly. It needs to be converted to UTF-8 from latin1.
It is, of course, possible to use Filezilla, WinSCP or even wget to do this, but I wrote a Python script which automates downloading (after checking it is necessary), unzipping and recoding:
#!/bin/python
# -*- coding: utf-8 -*-
import os, sys, re, zipfile, ftplib
import argparse
zipre = re.compile('\d\d\d\d-\d\d-\d\d\.zip')
parser = argparse.ArgumentParser(description='Fetch data from FTP server of De Lijn, unzip it and recode to UTF-8')
parser.add_argument('--skipdownload', '-d', action='store_true',
help="Don't contact the FTP server, work with the most recent local file")
parser.add_argument('--dontcallsuccessor', '-s', action='store_true',
help="don't call NewDBfromCSV.py when done")
args = parser.parse_args()
""" Fetch the latest zip file from the ftp site of De Lijn """
class Callback(object):
'''This prints a nice progress status on the command line'''
def __init__(self, totalsize, fp):
self.totalsize = totalsize
self.fp = fp
self.received = 0
def __call__(self, data):
self.fp.write(data)
self.received += len(data)
print('\r%i%% complete' % (100.0*self.received/self.totalsize), end='\r')
if not(args.skipdownload):
print ('Reading credentials from "credentials.txt"')
with open("credentials.txt") as credentials:
username, password = credentials.readlines()
#print (username, password)
print ("Opening connection to FTP site of De Lijn")
ftp=ftplib.FTP(host='poseidon.delijn.be', user=username, passwd=password)
print ("CD to current")
ftp.cwd('current')
print ("Get name of file")
fn = ftp.nlst()[0]
size = ftp.size(fn)
if not(fn in os.listdir()):
# Only download if a newer file is available
print (fn + " found, downloading latest version of De Lijndata")
with open(fn, 'wb') as fh:
w = Callback(size, fh)
#ftp.set_pasv(0)
ftp.retrbinary('RETR %s' % fn, w, 32768)
ftp.quit()
else:
print('Latest version already present, nothing to do')
sys.exit()
""" Unzip the latest file we have available in the current directory """
files = os.listdir()
zipfn=''
for file in files:
if re.match(zipre, file):
if file > zipfn:
zipfn = file
zfile = zipfile.ZipFile(zipfn)
print(); print(); print("Found " + zipfn)
for name in zfile.namelist():
"""Recode csv-file with textual content to UTF-8 """
(dirname, filename) = os.path.split(name)
print("Decompressing " + filename)
fd = open(name,"wb")
fd.write(zfile.read(name).decode('latin-1').replace('\r','').replace('"','').encode('utf-8'))
fd.close()
if not(args.dontcallsuccessor): import NewDBfromCSV
Populate database
Now that we have unpacked the zip file to several csv files, it's time to put them into a PostGIS database. I created a stored procedure to take care of the conversion between Lambert72 and WGS84.
If you put the following in 'NewDBfromCSV.py', it will get started at the end of the previous script automatically if needed.
import postgresql
# -*- coding: utf-8 -*-
db = postgresql.open('pq://Jo:tttttt@localhost:5432/DL')
print("Granting user rights")
createDB = db.execute("""
GRANT ALL ON DATABASE "DL" TO "Jo";
GRANT ALL ON ALL TABLES IN SCHEMA public TO "Jo";
GRANT ALL ON TABLE spatial_ref_sys TO "Jo";
""")
print("Creating tables")
createDB = db.execute("""
--CREATE TABLESPACE pg_dl
-- OWNER postgres
-- LOCATION E'C:\\Data\\OSM\\PostGIS_Tablespace';
DROP TABLE IF EXISTS stops CASCADE;
--DROP EXTENSION postgis CASCADE;
--CREATE EXTENSION postgis;
DROP TABLE IF EXISTS places;
CREATE TABLE IF NOT EXISTS places ( placeid int NOT NULL PRIMARY KEY, placeidentifier text, placedescription text )
WITH ( OIDS=FALSE );
ALTER TABLE places OWNER TO postgres;
DROP TABLE IF EXISTS calendar;
CREATE TABLE calendar ( vscid int NOT NULL PRIMARY KEY, vsid bigint, vscdate date, vscday text )
WITH ( OIDS=FALSE );
ALTER TABLE calendar OWNER TO postgres;
DROP TABLE IF EXISTS routes;
CREATE TABLE routes
( routeid int NOT NULL PRIMARY KEY, routeidentifier text, routedescription text, routepublicidentifier text, routeversion text, routeservicetype text, routeservicemode text )
WITH ( OIDS=FALSE );
ALTER TABLE routes OWNER TO postgres;
DROP TABLE IF EXISTS trips;
CREATE TABLE trips
( tripid bigint NOT NULL PRIMARY KEY, routeid int, vscid int, tripnoteidentifier text, tripnotetext text, tripstart text, tripend text, tripshiftstart integer, tripshiftend integer, tripnoteidentifier2 text, tripnotetext2 text, placeidstart bigint, placeidend bigint, naturalkey text )
WITH ( OIDS=FALSE );
ALTER TABLE trips OWNER TO postgres;
DROP TABLE IF EXISTS segments;
CREATE TABLE segments
( segmentid bigint NOT NULL PRIMARY KEY, tripid bigint, stopid int, segmentsequence int, segmentstart text, segmentend text, segmentshiftstart integer, segmentshiftend integer )
WITH ( OIDS=FALSE );
ALTER TABLE segments OWNER TO postgres;
""")
routescount = db.prepare('SELECT COUNT(*) FROM routes;')
segmentscount = db.prepare('SELECT COUNT(*) FROM segments;')
tripscount = db.prepare('SELECT COUNT(*) FROM trips;')
print("Copying data into tables")
createDB = db.execute("""
COPY places FROM 'C:/Data/De Lijn/places.csv' DELIMITERS ';' CSV HEADER;
COPY calendar FROM 'C:/Data/De Lijn/calendar.csv' DELIMITERS ';' CSV HEADER;
COPY routes FROM 'C:/Data/De Lijn/routes.csv' DELIMITERS ';' CSV HEADER;
COPY trips FROM 'C:/Data/De Lijn/trips.csv' DELIMITERS ';' CSV HEADER;
COPY segments FROM 'C:/Data/De Lijn/segments.csv' DELIMITERS ';' CSV HEADER;
""")
print("Creating tables for stops")
createDB = db.execute("""
DROP TABLE IF EXISTS stops;
DROP TABLE IF EXISTS stops_DL;
DROP TABLE IF EXISTS stops_OSM;
CREATE TABLE stops
( stopid INT NOT NULL PRIMARY KEY, stopidentifier INT, description text, street text, municipality text, parentmunicipality text, x INT, y INT, stopisaccessible BOOLEAN, stopispublic BOOLEAN )
WITH ( OIDS=FALSE );
ALTER TABLE stops OWNER TO postgres;
COPY stops FROM 'C:/Data/De Lijn/stops.csv' DELIMITERS ';' CSV HEADER;
ALTER TABLE stops
ADD COLUMN lat DOUBLE PRECISION,
ADD COLUMN lon DOUBLE PRECISION,
ADD COLUMN route_ref text,
--ADD COLUMN geomDL geometry,
--ADD COLUMN geomOSM geometry,
ADD COLUMN OSM_name text,
ADD COLUMN OSM_city text,
ADD COLUMN OSM_street text,
ADD COLUMN OSM_operator text,
ADD COLUMN OSM_route_ref text,
ADD COLUMN OSM_source text,
ADD COLUMN OSM_node_ID text,
ADD COLUMN OSM_last_modified_by_user text,
ADD COLUMN OSM_last_modified_timestamp TIMESTAMP,
ADD COLUMN OSM_zone text,
ADD COLUMN zoneid integer;
SELECT AddGeometryColumn ('public','stops','geomdl',4326,'POINT',2);
SELECT AddGeometryColumn ('public','stops','geomosm',4326,'POINT',2);
CREATE TABLE stops_DL
( stopsPK int NOT NULL PRIMARY KEY,
last_change_timestamp timestamp,
stopidentifier int, -- this corresponds to ref in OSM
description text, -- this corresponds to name in OSM
street text,
municipality text,
parentmunicipality text,
stopisaccessible boolean,
stopispublic boolean,
route_ref text, -- this is calculated
geomDL geometry)
WITH ( OIDS=FALSE );
ALTER TABLE stops OWNER TO postgres;
CREATE TABLE stops_OSM
( stopsPK int NOT NULL PRIMARY KEY,
last_change_timestamp timestamp,
ref text, -- this corresponds to stopidentifier
name text,
operator text,
route_ref text,
zone text,
source text,
node_ID text, -- over time a stop may have used different nodes
version int,
last_modified_by text,
geomOSM geometry)
WITH ( OIDS=FALSE );
ALTER TABLE stops OWNER TO postgres;
""")
print("Create index on routes (routeidentifier), routes (routeversion), trips(routeid) and on segments (tripid)")
createDB = db.execute("""
CREATE INDEX ix_routeidentifier ON routes (routeidentifier);
CREATE INDEX ix_routeversion ON routes (routeversion);
CREATE INDEX ix_tripsrouteid ON trips(routeid);
CREATE INDEX ix_segmentstripid ON segments (tripid);
""")
print("Vacuum analyze to gather statistics for efficient use of indexes")
createDB = db.execute("""
VACUUM ANALYZE --VERBOSE;
""")
print(routescount(), tripscount(), segmentscount())
print("Remove older route versions")
createDB = db.execute("""
WITH currentversions AS (SELECT rte1.routeid FROM routes rte1
WHERE rte1.routeversion = (SELECT MAX(rte2.routeversion)
FROM routes rte2
JOIN trips ON trips.routeid=rte2.routeid -- we want the highest version in the routes table for which there are actual trips
WHERE rte1.routeidentifier=rte2.routeidentifier))
DELETE FROM routes rte
WHERE rte.routeid NOT IN (SELECT routeid from currentversions);
""")
print("Remove trips for older route versions")
createDB = db.execute("""
DELETE FROM trips trp
WHERE NOT EXISTS
(SELECT trp2.routeid FROM trips trp2
JOIN routes rte ON trp2.routeid=rte.routeid
AND trp.routeid=rte.routeid);
""")
print("Remove segments for older route versions")
createDB = db.execute("""
DELETE FROM segments sgt
WHERE NOT EXISTS (SELECT sgt2.segmentid FROM segments sgt2
JOIN trips trp ON sgt2.tripid=trp.tripid
AND sgt2.segmentid=sgt.segmentid);
""")
print("Creating indexes")
createDB = db.execute("""
CREATE INDEX ix_stopidentifier ON stops (stopidentifier);
CREATE INDEX ix_OSM_node_ID ON stops (OSM_node_ID);
CREATE INDEX ix_description ON stops (description);
CREATE INDEX ix_routepublicidentifier ON routes (routepublicidentifier);
CREATE INDEX ix_segmentstopid ON segments (stopid);
""")
print("Vacuum analyze to gather statistics for efficient use of indexes")
createDB = db.execute("""
VACUUM ANALYZE --VERBOSE;
""")
print(routescount(), tripscount(), segmentscount())
print("Creating stored procedures")
createDB = db.execute("""
CREATE OR REPLACE FUNCTION AllLinesPassingAtaStop(stopidentifierparameter int) RETURNS text AS $BODY$
DECLARE outlines text :='';
l record;
line text;
BEGIN
FOR l IN SELECT distinct(lpad(rte.routepublicidentifier, 5, '0'))
FROM public.trips trp
JOIN public.routes rte ON rte.routeid=trp.routeid AND
rte.routepublicidentifier NOT LIKE 'F%'
JOIN public.segments seg ON seg.tripid=trp.tripid
JOIN public.stops stp ON seg.stopid=stp.stopid
WHERE
stp.stopidentifier = stopidentifierparameter
ORDER BY lpad(rte.routepublicidentifier, 5, '0')
LOOP
line := l;
line := trim(leading '(' FROM line);
IF line = '00000'
THEN line:= '0';
ELSE line := trim(leading '0' FROM line);
END IF;
line := trim(trailing ')' FROM line);
outlines := outlines || ';' || line;
END LOOP;
RETURN trim(both ';' FROM outlines);
END $BODY$
LANGUAGE plpgsql VOLATILE COST 100;
ALTER FUNCTION AllLinesPassingAtaStop(int) OWNER TO postgres;
GRANT EXECUTE ON FUNCTION AllLinesPassingAtaStop(int) TO public;
CREATE OR REPLACE FUNCTION filloutlines() RETURNS void AS $BODY$
DECLARE
l record;
res text;
b text;
a int4;
coords geometry;
vlat double precision;
vlon double precision;
BEGIN
DROP INDEX IF EXISTS ix_geomDL;
FOR l IN SELECT stopid, stopidentifier, x, y FROM stops
LOOP res := AllLinesPassingAtaStop(l.stopidentifier);
coords := st_transform(st_setSRID(st_Point(l.x, l.y), 31370),4326);
vlat := st_y(coords);
vlon := st_x(coords);
UPDATE stops
SET route_ref=res,
geomDL = coords,
lat = vlat,
lon = vlon
WHERE stops.stopid=l.stopid;
RAISE NOTICE '% set to %',l.stopidentifier, res;
END LOOP;
END
$BODY$ LANGUAGE plpgsql VOLATILE COST 11;
ALTER FUNCTION filloutlines() OWNER TO postgres;
GRANT EXECUTE ON FUNCTION filloutlines() TO public;
""")
print("Converting from Lambert72 and adding route_ref to stops table")
filloutlines = db.proc('filloutlines()')
print(filloutlines())
# -- COPY stops (stopid, stopidentifier, description, street, municipality, parentmunicipality, lat, lon, route_ref) TO 'C:/Data/De Lijn/De Lijnstops.csv' DELIMITERS '#' CSV;
print("Creating index on description and spatial column containing coordinates from De Lijn")
createDB = db.execute("""
CREATE INDEX ix_geomDL ON stops USING gist(geomDL);
""")
createDB = db.execute("""
DROP TABLE IF EXISTS zones;
CREATE TABLE zones
(
zoneid serial NOT NULL,
zone text,
geomzone geometry,
CONSTRAINT zones_pkey PRIMARY KEY (zoneid)
)
WITH (
OIDS=FALSE
);
--SELECT AddGeometryColumn ('public','zones','geomzone',4326,'POLYGON',2);
CREATE INDEX ix_geomzone ON zones USING gist(geomzone);
ALTER TABLE zones
OWNER TO postgres;
GRANT ALL ON TABLE zones TO postgres;
GRANT ALL ON TABLE zones TO "Jo";
""")
print("Vacuum analyze to gather statistics for efficient use of indexes")
createDB = db.execute("""
VACUUM ANALYZE --VERBOSE;
""")
print("Creating a stored procedure for later use")
createDB = db.execute("""
CREATE OR REPLACE FUNCTION AllTripsForARouteVerbose(routeidentifierparam text) RETURNS table(tripid int,routeidentifier text,routedescription text,tripstart text,start text,terminus text) AS $BODY$
BEGIN
RETURN QUERY
SELECT DISTINCT
trp.tripid,
rte.routeidentifier,
rte.routedescription,
trp.tripstart,
(SELECT
st.description
FROM
public.stops st
JOIN public.segments seg1 ON seg1.stopid = st.stopid AND seg1.tripid = trp.tripid
WHERE
seg1.segmentsequence = (SELECT MIN(seg2.segmentsequence) FROM public.segments seg2 WHERE seg2.tripid = trp.tripid)) AS Start,
(SELECT
st.description
FROM
public.stops st
JOIN public.segments seg1 ON seg1.stopid = st.stopid AND seg1.tripid = trp.tripid
WHERE
seg1.segmentsequence = (SELECT MAX(seg2.segmentsequence) FROM public.segments seg2 WHERE seg2.tripid = trp.tripid)) AS Terminus
FROM public.trips trp
JOIN public.routes rte ON rte.routeid=trp.routeid
JOIN public.segments seg ON seg.tripid=trp.tripid
JOIN public.stops stp ON seg.stopid=stp.stopid
WHERE
rte.routeidentifier = routeidentifierparam
ORDER BY
trp.tripstart ASC;
END; $BODY$
LANGUAGE plpgsql VOLATILE COST 100;
ALTER FUNCTION AllTripsForARouteVerbose(text) OWNER TO postgres;
""")
import IntegrateStopsFromOSM
IntegrateStopsFromOSM.main('C:/Data/OSM/Overpass API queries/PT.osm')
#import UpdateZonesFromPDF
#UpdateZonesFromPDF.main('C:/Data/De Lijn/zones.txt')
import DeLijnData_in_Postgis_2_OSM
DeLijnData_in_Postgis_2_OSM.main()
import CreateWikiReport
CreateWikiReport.main()
Download relevant data from Openstreetmap with Overpass API
Download all bus stops and route relations in Flanders. recurse up and down to fetch all related stops, route relations and ways for the itineraries. Be aware that this is a hefty query even for the Overpass API. It returns 90MB of data and when run at the wrong time, it occasionally fails.
area["name:nl"~"Vlaams-Brabant|Antwerpen|..st-Vlaanderen"] -> .VBAntw;
area["alt_name:nl"="Belgisch Limburg"] -> .Limb;
(
node(area.VBAntw);
node(area.Limb);
) -> .allnodesinArea;
(
rel(bn.allnodesinArea)
["type"="route"]
["route"~"bus|tram"]
->.routes;
.routes << ->.route_masters;
(
node.allnodesinArea
["highway"="bus_stop"]
->.stops;
node.allnodesinArea
["amenity"="bus_station"]
->.busstations;
node.allnodesinArea
["railway"="tram_stop"]
->.tstops;
node.allnodesinArea
["public_transport"]
->.platforms;
node
["highway"="bus_stop"]
["operator"~"De Lijn|TEC|STIB|MIVB"];
node
["railway"="tram_stop"]
["operator"~"De Lijn|TEC|STIB|MIVB"];
node
["public_transport"="platform"]
["operator"~"De Lijn|TEC|STIB|MIVB"];
) -> .stopsPlatformsAndStopPositions;
(
way(bn.allnodesinArea)
["highway"="bus_guideway"];
way(bn.allnodesinArea)
["railway"="tram"];
) ->.tracks;
way(bn.allnodesinArea)
["amenity"="shelter"]
["shelter_type"="public_transport"]
->.shelters;
node(w.shelters);
node.allnodesinArea
["amenity"="shelter"]
["shelter_type"="public_transport"]
->.shelternodes;
node(r.routes);
way(bn)->.parentways1;
node(w.parentways1);
rel(bw.parentways1);
way(r.routes);
node(w);
way(bn.stopsPlatformsAndStopPositions);
rel(bn.stopsPlatformsAndStopPositions)
["type"="public_transport"]
-> .stopAttributes;
node(r.stopAttributes);
way(r.stopAttributes);
node(w);
node(w.tracks);
);
out meta;
This can be called as follows: Overpass recently added the possibility to download a zipped file. Browsers unzip this automatically, at the command prompt we still need to do this ourselves.
PT.cmd:
cd "C:\OSM\Overpass API queries\"
"C:\Program Files (x86)\GnuWin32\bin\wget.exe" --header="accept-encoding: gzip" -O "PT.osm.zip" --post-file="PT.overpass" "http://overpass-api.de/api/interpreter"
"C:\Program Files\7-Zip\7z.exe" e -y "PT.osm.zip" PT.osm
pause
Add data downloaded from OSM to this database
At the moment only stops which have a ref tag in OSM are added.
import postgresql, xml.sax, re, sys
# -*- coding: utf-8 -*-
sixdigitsRE= re.compile(r'\d\d\d\d\d\d')
db = postgresql.open('pq://Jo:tttttt@localhost:5432/DL')
nodeexists = db.prepare("""SELECT stopidentifier FROM stops
WHERE OSM_node_ID = $1;""")
refexists = db.prepare("""SELECT OSM_node_ID FROM stops
WHERE stopidentifier = $1;""")
purgeOSMdata = db.prepare("""UPDATE stops SET OSM_node_ID = NULL,
OSM_name = NULL,
OSM_city = NULL,
OSM_street = NULL,
OSM_operator = NULL,
OSM_route_ref = NULL,
OSM_source = NULL,
OSM_last_modified_by_user = NULL,
OSM_last_modified_timestamp = NULL,
OSM_zone = NULL
WHERE
OSM_node_ID IS NOT NULL;""")
updateOSMdata = db.prepare("""UPDATE stops SET OSM_node_ID = $1,
OSM_name = $2,
OSM_city = $3,
OSM_street = $4,
OSM_operator = $5,
OSM_route_ref = $6,
OSM_source = $7,
OSM_last_modified_by_user = $8,
OSM_last_modified_timestamp = $9::TEXT::TIMESTAMP,
OSM_zone = $10,
geomOSM = ST_SetSRID(ST_MakePoint($11::TEXT::DOUBLE PRECISION, $12::TEXT::DOUBLE PRECISION),4326)
WHERE
stopidentifier = $13::TEXT::INTEGER;""")
class OSMContentHandler(xml.sax.ContentHandler):
def __init__(self):
xml.sax.ContentHandler.__init__(self)
self.nodeattributes = {}
self.tags = {}
print("Purge OSM data from database first")
purgeOSMdata()
print("Updating database")
def startElement(self, tagname, attrs):
if tagname == "tag":
self.tags[attrs.getValue("k")] = attrs.getValue("v")
elif tagname == "node":
self.nodeattributes = attrs
self.tags = {}
def endElement(self, tagname):
name = operator = ref = route_ref = source = zone = addr_city = addr_street = user = timestamp = None
#updatequery = "UPDATE stops SET "
if tagname == "node":
if 'highway' in self.tags and self.tags['highway'] in ['bus_stop', 'bus_station'] or 'railway' in self.tags and self.tags['railway'] in ['tram_stop', 'station'] or 'public_transport' in self.tags and self.tags['public_transport'] in ['platform']:
#if self.nodeattributes["id"][0]=='-': print('Negative node id, upload your data first, then save in JOSM'); sys.exit()
if 'action' in self.nodeattributes and self.nodeattributes["action"]== 'delete': return
# First try to find out which record to update
# Do we have a ref?
if 'ref' in self.tags and self.tags['ref']!='noref':
if re.match(sixdigitsRE, self.tags['ref']):
ref = self.tags['ref']
refalreadyexists = refexists(int(ref))[0][0]
#print (ref + ' ' + str(refalreadyexists))
if refalreadyexists:
print (str(refalreadyexists) + ' ' + ref + ' is used twice, better fix this')
#if nodeexists(str(self.nodeattributes["id"])): print(self.nodeattributes["id"] + " already exists in the database"); pass
if 'name' in self.tags:
name = self.tags['name'].replace("'","''") # updatequery += "OSM_name = '" + self.tags['name'].replace("'","''") + "',"
if 'operator' in self.tags:
operator = self.tags['operator'] # updatequery += "OSM_operator = '" + self.tags['operator'] + "',"
if 'route_ref' in self.tags:
route_ref = self.tags['route_ref'] # updatequery += "OSM_route_ref = '" + self.tags['route_ref'] + "',"
if 'source' in self.tags:
source = self.tags['source'] # updatequery += "OSM_source = '" + self.tags['source'] + "',"
if 'addr:city' in self.tags:
addr_city = self.tags['addr:city'].replace("'","''") # updatequery += "OSM_city = '" + self.tags['addr:city'].replace("'","''") + "',"
if 'addr:street' in self.tags:
addr_street = self.tags['addr:street'].replace("'","''") # updatequery += "OSM_street = '" + self.tags['addr:street'].replace("'","''") + "',"
if 'zone' in self.tags:
zone = self.tags['zone'] # updatequery += "OSM_zone = '" + self.tags['zone'] + "',"
if 'user' in self.nodeattributes:
user = self.nodeattributes["user"]
#updatequery += "OSM_last_modified_by_user = '" + self.nodeattributes["user"] + "',"
if 'timestamp' in self.nodeattributes:
timestamp = self.nodeattributes["timestamp"]
#updatequery += "OSM_last_modified_timestamp = '" + self.nodeattributes["timestamp"] + "',"
#csvfile.write(self.nodeattributes['id'] + '#' + ref + '#' + zone + '#' + self.nodeattributes['lat'] + '#' + self.nodeattributes['lon'] + '#' + name + '#' + addr_city + '#' + addr_street + '#' + operator + '#' + route_ref + '#' + source + '\n')
if True: # updatequery[-1] == ',':
if ref: # and refexists(int(ref)):
updateOSMdata(self.nodeattributes["id"], name, addr_city, addr_street, operator, route_ref, source, user, timestamp, zone, self.nodeattributes['lon'], self.nodeattributes['lat'], ref)
'''
updatequery += """geomOSM = ST_SetSRID(ST_MakePoint(%s, %s),4326),
OSM_node_ID = '%s'
WHERE stopidentifier = %s;
""" % (self.nodeattributes['lon'], self.nodeattributes['lat'],
self.nodeattributes["id"],
ref)
#print (updatequery)
'''
#db.execute(updatequery)
#quit()
def main(sourceFileName):
source = open(sourceFileName, encoding='utf-8')
xml.sax.parse(source, OSMContentHandler())
print("Database updated with stops from " + sourceFileName)
if __name__ == "__main__":
main('C:/Data/OSM/Overpass API queries/PT.osm')
Output in useful formats
Creation of an OSM file with all the stops
Now we have all the relevant data in our database, we can start creating some output.
To reuse code, I created a 'library' with some functions:
#!/bin/python
# -*- coding: utf-8 -*-
import re
leaveCityNameAnywayRE=re.compile(r"(?ix)((^|\s)(station|dorp|markt|kerk|centrum)(\s|$))") # case insensitive search for terms surrounded by whitespace
citynamesToOmit = [
(r"Gent",""), # !Sas-van-Gent
(r"Antwerpen",""),
(r"Sint-Agatha-Berchem",""),
(r"Berchem",""), # !Sint-Agatha-Berchem
(r"Burcht",""),
(r"Borsbeek",""),
(r"Borgerhout",""),
(r"Deurne",""), # !Deurne (bij Diest)
(r"Hoboken",""),
(r"Ekeren",""),
(r"Merksem",""),
(r"Mortsel",""),
(r"Wilrijk",""),
(r"Zwijndrecht",""),
(r"Brussel",""),
(r"Anderlecht",""),
(r"Elsene",""),
(r"Etterbeek",""),
(r"Evere",""),
(r"Ganshoren",""),
(r"Jette",""),
(r"Koekelberg",""),
(r"Oudergem",""),
(r"Schaarbeek",""),
(r"Sint-Gillis",""),
(r"Sint-Jans-Molenbeek",""),
(r"Sint-Joost-ten-Node",""),
(r"Sint-Lambrechts-Woluwe",""),
(r"Sint-Pieters-Woluwe",""),
(r"Ukkel",""),
(r"Vorst",""), # !Vorst (bij Veerle) !Vorst-Laakdal
(r"Watermaal-Bosvoorde",""),
(r"Sint-Job-in-'t Goor","Sint-Job-in-'t-Goor "),
(r"SPV","Sint-Pieters-Voeren "),
(r"SMV","Sint-Martens-Voeren "),
(r"SGV","'s Gravenvoeren "),
(r"Mol Gompel ","Gompel "),
(r'Petegem A/D Leie','Petegem-aan-de-Leie'),
(r"Liège",""),
(r"Moeskroen",""),
(r"Ronse/Renaix",""),
(r"Ellezelles / Elzele",""),
(r"Bever/Bievene",""),
]
commonabbreviations=[
(500,'0',r"'T","'t"),
(500,'0',r'(?u)T Zand',"'t Zand"),
(500,'0',r"'S","'s"),
(500,'0',r"(?ui)D'\s*","d'"),
(500,'0',r"(?ui)l'\s*","l'"),
(500,'0',r"Aux","aux"),
(500,'0',r"Ij","IJ"),
(500,'0',r'Premetrostation ',''),
(500,'0',r' (afgeschaft)',''),
(500,'1',r'(?ui)\bKol\.*Veteranen','Koloniale Veteranen - Vétérans Coloniaux'),
(500,'1',r"(?ui)\bMarg\.*v\.*Oostenrijk","Margaretha van Oostenrijk - Marguerite d'Autriche"),
(500,'0',r'Sas Van Gent (Nl)','Sas van Gent'),
(500,'0',r'(?u)^Mo\s','Moelingen '),
(500,'0',r'(?ui)\bkerk','Kerk'),
(500,'0',r'(?ui)\bPl(\.|\b)','Plaats'), # could be Plein, as well
(500,'0',r'(?u)str(\.|$)','straat'),
(500,'0',r'O. Dendermondsestwg','Ouden Dendermondsesteenweg'),
(500,'0',r'(?u)stwg\.*','steenweg'),
(500,'0',r'(?u)(St(wg|w|g)\.*)','Steenweg'),
(500,'0',r'(?u)\bperron\b','Perron'),
(500,'0',r'Prov.','Provinciaal'),
(500,'0',r'Recreatiecentr.','Recreatiecentrum'),
(500,'0',r'(?u)\Bbn\b','baan'),
(500,'0',r'(?u)centr\.','centrum'),
(500,'0',r'(?ui)\bDom\.','Domein'),
(500,'0',r'(?ui)Krpt\.*','Kruispunt'),
#(500,'0',r'Keiheuvel Gt Kamping','Keiheuvel Gt Camping'),
(500,'0',r'(?u)\bBurg\.\s*','Burgemeester '),
(500,'0',r'Bos En Lommer','Bos en Lommer'),
(500,'1',r'Roesbrugge Dr. Gheysen','Roesbrugge Dokter Joseph Gheysen'),
(500,'0',r'(?u)\bDr\.','Dokter '),
(500,'0',r'(?ui)Vti','VTI'),
(500,'0',r'(?ui)\bVko','VKO'),
(500,'1',r'Turnhout Sint-Pieterinstituut','Turnhout Sint-Pietersinstituut'),
(500,'0',r'(?u)\s*Inst\.\s',' Instituut '),
(500,'0',r'(?u)Electr\.','Electrische'),
(500,'0',r'Ecole','École'),
(500,'0',r"(?u)\bEtat\b","État"),
(500,'0',r'Kon.Atheneum','Koninklijk Atheneum'),
(500,'0',r'(?u)^(.*)A(\.*[Zz]\.*?)\b(.*)$',r'\1Algemeen Ziekenhuis\3'),
(500,'0',r'(?ui)Ocmw','OCMW'),
#(r'(?ui)\bO\.C\.M\.W\.*','O.C.M.W.'),
(400,'0',r'OPZ','Openbaar Psychiatrisch Ziekenhuis'),
(500,'0',r'(?ui)(P\.?Z\.?)','Psychiatrisch Ziekenhuis'),
(500,'0',r'(?ui)Ger\.*\s*Centrum','Geriatrisch Centrum'),
(500,'0',r'(?ui)\bF\.*\s*Adriaenssens','Frans Adriaenssens'),
(500,'0',r"(?ui)\bW\.*\s*Alexander","Willem Alexander"),
(500,'0',r"(?ui)\bChem\.*\s*d\s*Angoussart","Chemin d Angoussart"),
(500,'0',r"(?ui)\bE\.*\s*Beauduin","Emile Beauduin"),
(500,'0',r'De Becker Remypl.','de Becker Remyplein'),
(500,'0',r"(?ui)\bP\.*\s*Benoit","Peter Benoit"),
(500,'0',r"(?ui)\bBesch\.*Werkplaats","Beschermde Werkplaats"),
(500,'0',r"(?ui)\bA\.*Blieck","Albert Blieck"),
(500,'0',r'K. Boom','Karel Boom'),
(500,'0',r'(?ui)L\.*p\.*\sBoon','Louis Paul Boon'),
(500,'0',r'Hospitaal J. Bracops','Joseph Bracops'),
(500,'0',r"(?ui)\bBrouw\.*\s*Lorier","Brouwerij Lorier"),
(500,'0',r'Brouw.','Brouwerij'),
(500,'0',r"(?ui)\bE\.*Carels","Emile Carels"),
(500,'0',r"(?ui)\bTh\.*\s*V\.*\s*Cauwenberghs","Theofiel Van Cauwenberghs"),
(500,'0',r'E. Claes','Ernest Claes'),
(500,'0',r'(?ui)\bP\.\sCop','Pastoor Cop'),
(500,'0',r'(?ui)\bH\.*\s*Conscience','Hendrik Conscience'),
(500,'0',r'Pr. Daens','Priester Daens'),
(500,'0',r"(?ui)\bGer\.*\s*Centrum\s*P\.*\s*Damiaan","Geriatrisch Centrum Pater Damiaan"),
(500,'0',r'(?ui)\bP\.\s?Damiaan','Pater Damiaan'),
(500,'0',r"(?ui)\bJ\.*\s*De\s*Block","Jozef De Block"),
(500,'0',r"(?ui)\bF\.*\s*De\s*Laet","Frans De Laet"),
(500,'0',r'A.Demanez','Albert Demanez'),
(500,'0',r"(?ui)\bJ\.*\s*De\s*Doncker","Jan De Doncker"),
(500,'0',r'(?ui)\bJ\.?\s*Dooghe','Julius Dooghe'),
(500,'0',r'Jette De Greef','Guillaume De Greef'),
(500,'0',r'(?ui)\bG\.*\s*De Kleermaekers','Guillaume De Kleermaekers'),
(500,'0',r'(?ui)\bJ\.*\s*B\.*\s*Delcorte','Jules Delcorde'),
(500,'0',r"(?ui)\bF\.*\s*Van\s*Dun","Frans van Dun"),
(400,'0',r"(?ui)\bJ\.*P\.*\s*David","Jan Baptist David"),
(500,'0',r"(?ui)\bJ\.*\s*David","Jan David"),
(500,'0',r'C.Everaart',' Cornelis Everaart'),
(500,'1',r'Meerle Groot Eysel','Meerle Groot Eyssel'),
(500,'0',r"(?ui)\bJ\.*\s*B\.*\s*Francis","Jan Baptist Francis"),
(500,'0',r"(?ui)\bM\.*\s*Gandhi","Mahatma Gandhi"),
(500,'0',r'(?ui)\bGebr\.','Gebroeders'),
(500,'0',r"(?ui)\bW\.*\s*Geets","Willem Geets"),
(500,'0',r'(?ui)\bG\.?\s*Gezelle','Guido Gezelle'),
(500,'0',r"(?ui)\bN\.*Gombert","Nikolaas Gombert"),
(500,'0',r"(?ui)\bG\.*\s*Le\s*Grelle","Gerard Le Grelle"),
(500,'0',r'Ned. Grens','Nederlandse Grens'),
(500,'0',r'(?ui)\bLod\.*\s*Guns','Louis Guns'),
(500,'0',r'(?ui)P\.*\s*De\s*Halleux','Paul De Halleux'),
(500,'0',r'(?ui)P\.*\s*Hens','Pater Hens'),
(500,'0',r"(?ui)\bW\.*\s*Herreyns","Willem Herreyns"),
(500,'0',r"La Houppe/d'Hoppe","La Houppe/D'Hoppe"),
(500,'0',r"(?ui)D'Ieteren","D'Ieteren"),
(500,'0',r'(?ui)\bH\.?\s*Familie','Heilige Familie'),
(500,'0',r'(?ui)\bH\.*\s*Hart','Heilig Hart'),
(500,'0',r'O.L.V.-ten-Steen','Onze-Lieve-Vrouw-ten-Steen'),
(500,'0',r'O.L.V.Middelareskerk','Onze-Lieve-Vrouw-Middelareskerk'),
(500,'0',r'(?ui)M\.\s*Middelares','Maria Middelares'),
(500,'0',r"(?ui)\bM\.*\s*Middelares\s*-\s*Maalte","Maria Middelares - Maalte"),
(500,'0',r'Brugge O.L.V. Kerk','Brugge Onze-Lieve-Vrouwekerk'),
(500,'0',r'(?ui)O\.*L\.*V\.*\s*College','Onze-Lieve-Vrouwcollege'),
(500,'0',r'(?ui)O\.*L\.*V\.*\s*Dreef','Onze-Lieve-Vrouwdreef'),
(500,'0',r'(?ui)O\.*L\.*V\.*\s*Plein','Onze-Lieve-Vrouwplein'),
(500,'0',r'(?ui)O\.*L\.*V\.*\s*Straat','Onze-Lieve-Vrouwstraat'),
(500,'0',r'(?ui)Onze\sLieve\sVrouw\sInstituut','Onze-Lieve-Vrouwinstituut'),
(500,'0',r'(?ui)O\.*L\.*V\.*-*Ziekenhuis','Onze-Lieve-Vrouwziekenhuis'),
(500,'0',r'(?ui)O\.*L\.*V\.*(\s|-)*K(?P<kerkkapel>erk|apel)',r'Onze-Lieve-Vrouwk\2'),
(500,'0',r'(?ui)(\bO(nze)?\.*\s*-*L(ieve)?\.*\s*-*V(\.|rouw)*)-*','Onze-Lieve-Vrouw-'),
(500,'0',r'(?ui)St\.*-Siméon','Saint-Siméon'),
(500,'0',r'(?u)St\.*-\b','Sint-'),
(500,'0',r'(?ui)\bK(on)*\.?\s*Astrid','Koningin Astrid'),
(500,'0',r'(?ui)\bK(on)*\.?\s*Elisabeth','Koningin Elisabeth'),
(500,'0',r'(?ui)\bJ\.?\s*Charlotte','Josephine Charlotte'),
(500,'0',r'(?ui)Prinses\s*J\.*\s*Charlotte','Prinses Josephine Charlotte'),
(500,'0',r'(?ui)Jachtl\.','Jachtlaan'),
(500,'0',r'Jambede Bois','Jambe de Bois'),
(500,'0',r"J. Jennes","Jozef Jennes"),
(500,'0',r' Jh ',' Jeugdherberg '),
(500,'0',r"(?ui)\bV\.*\s*Kegels","Victor Kegels"),
(500,'0',r"(?ui)\bG\.*De\s*Kleermaekers","Guillaume De Kleermaekers"),
(500,'0',r"(?ui)\bG\.*\s*Lambert","Guillaume Lambert"),
(500,'0',r'E. Lauder','Estée Lauder'),
(500,'0',r'Lokeren T Lavertje',"Lokeren 't Lavertje"),
(500,'0',r'(?ui)K\.*\s*De\s*Lens','Karel Lodewijk de Lens'),
(500,'0',r'(?ui)\bG\.*\s*Le\s*Grelle','Gerard Le Grelle'),
(500,'0',r'O.Leuvense Baan','Oude Leuvense Baan'),
(500,'0',r'(?ui)B.\s*Leysen','Bert Leysen'),
(500,'0',r'Marie Joseplein','Marie-Joséplein'),
(500,'0',r"(?ui)\bJ\.*de\s*Meeus","Graaf Joseph De Meeus"),
(500,'0',r"Mesanges","Mésanges"),
(500,'0',r"(?ui)\bC\.*Meunier","Constantin Meunier"),
(500,'0',r"(?ui)\bL\.*\s*Mortelmans","Lodewijk Mortelmans"),
(500,'0',r"(?ui)\bJ\.*\s*Moretus","Jules Moretus"),
(500,'0',r"(?ui)\bE\.*\s*Moyaerts","Emile Moyaerts"),
(500,'0',r'(?ui)H\.*Moeremans','Henri Moeremans'),
(500,'0',r'(?ui)I\.*Meyskens','Isidoor Meyskens'),
(500,'0',r"(?ui)\bL\.*Nantier","Leopold Nantier"),
(500,'0',r"(?ui)\bGebr\.*\s*Naudts","Gebroeders Naudts"),
(500,'0',r"(?ui)\bP\.*Nollekens","Pieter Nollekens"),
(500,'0',r'(?ui)\bnr\.*\s(?P<number>\d+)',r' nummer \1 '),
(500,'0',r'(?ui)\bnr\.*\s*(?P<naar>\w+)',r' naar \1 '),
(500,'0',r"(?ui)\bMarg\.*v\.*Oostenrijk","Margaretha van Oostenrijk - Marguerite d'Autriche"),
(500,'0',r'Opgeeistenlaan','Opgeëistenlaan'),
(500,'0',r'(?ui)\bJ\.*\s*Palfijn','Jan Palfijn'),
(500,'1',r'Dessel Perestraat','Dessel Peresstraat'),
(500,'1',r'Ukkel Pijnbomenstraat','Ukkel Pijnbomenweg'),
(500,'0',r"(?ui)\bLt\.*\s*Philippart","Luitenant Philippart"),
(500,'0',r"(?ui)\bJ\.*\s*Posenaer","Jozef Posenaer"),
(500,'0',r"(?ui)\bAug\.*Plovie","August Plovie"),
(500,'0',r'(?u)\bReg\.','Regiment'),
(500,'0',r'(?ui)\bJ\.*\s*Reusens','Jozef Reusens'),
(500,'0',r"(?ui)\bT\.*\s*Reyn","Theofiel Reyn"),
(500,'0',r'(?ui)\bRingl\.','Ringlaan'),
(500,'0',r'K. Roeland','Klokke Roeland'),
(500,'0',r"(?ui)\bA\.*\s*Rodenbach","Albrecht Rodenbach"),
(500,'0',r'(?ui)\bF\.*\s*Roosevelt','Franklin Roosevelt'),
(500,'0',r"(?ui)\bL\.*Ruelens","Leopold Ruelens"),
(500,'0',r'(?ui)J\.*\s*.*?M\.*\s*Sabbe','Julius en Maurits Sabbe'),
(500,'0',r'(?ui)\bM\.?\s*Sabbe','Maurits Sabbe'),
(500,'0',r'(?ui)\bH\.*\s*Schoofs','Hendrik Schoofs'),
(500,'0',r'(?ui)\bR\.*\s*Schuman',' Robert Schuman'),
(500,'0',r'(?ui)Serg\.*\s*','Sergeant '),
(500,'0',r'Sint-Pieters Station','Sint-Pietersstation'),
(500,'0',r"(?ui)\bE\.*Sohie","Edgard Sohie"),
(500,'0',r'(?ui)\bE\.*\s*Soudan','Eugène Soudan'),
(500,'0',r"(?ui)\bP\.*\s*H\.*\s*Spaak","Paul Henri Spaak"),
(500,'0',r"(?ui)\bH\.*\s*Stassen","Henri Stassen"),
(500,'0',r"(?ui)\bJ\.*Stas","Jan Stas"),
(500,'0',r'T Steppeke',"'t Steppeke"),
(500,'0',r"(?ui)\bE\.*\s*Steurs","Eduard Steurs"),
(500,'0',r"(?ui)\bG\.*\s*Stijnen","Gerardus Stijnen"),
(500,'0',r"(?ui)\bJ\.*\s*Stobbaerts","Jan Stobbaerts"),
(500,'0',r'Stijn Streuvelstraat','Stijn Streuvelsstraat'),
(500,'0',r'(?u)S\.*\s*Streuvels','Stijn Streuvels'),
(500,'0',r'(?ui)E\.*\s*Thieffry','Edmond Thieffry'),
(500,'0',r'(?ui)Transf\.','Transformatorstation'),
(500,'0',r'T. Tuts','Theo Tuts'),
(500,'0',r'C.V.D. Bussche',' Camiel Van den Bussche'),
(500,'0',r'(?ui)E\.*Vander\s*Steenenstr\.*','Emile Vandersteenen'),
(500,'0',r'(?ui)J\.*Vanderstraetenstr\.*','Jan Vanderstraeten'),
(500,'0',r'(?ui)D\.*\s*Vander\s*Vaeren','Désiré Vander Vaeren'),
(500,'0',r'E. Van Der Velde','Emiel Van Der Velde'),
(500,'0',r'Vandewielelaan','Gomar Vandewielelaan'),
(500,'0',r'(?ui)\bK\.?\s*V\.?\s*Mander','Karel Van Mander'),
(500,'0',r'(?ui)\bJ\.*\s*Van\s*Rijswijck','Jan Van Rijswijck'),
(500,'0',r'(?ui)A\.*\s*Vesalius','Andreas Vesalius'),
(500,'0',r'(?ui)\bR\.*\s*Veremanss*','Renaat Veremanss'),
(500,'0',r"(?ui)\bF\.*\s*Verbiest","Ferdinand Verbiest"),
(500,'0',r'(?ui)Fr\.*\s*De\s*Vriendt','Frans De Vriendt'),
(500,'0',r'Eglise Saint-Walburge','Église Sainte-Walburge'),
(500,'0',r'Wilsele Wijgm.stwg.','Wilsele Wijgmaalsesteenweg'),
#(r'(?ui)\bWijgm\.','Wijgmaal'),
(500,'0',r'J. De Wilde','Jean De Wilde'),
(500,'0',r"(?ui)\bK\.*\s*De\s*Wint","Karel De Wint"),
(500,'0',r'(?ui)\bW\.*\s*Wood','William Wood'),
(500,'0',r'Industriezone Bl Toren','Industriezone Blauwe Toren'),
(500,'0',r"(?ui)\bA\.*Blieck","Albert Blieck"),
(500,'0',r"(?ui)\bTh\.*Van\s*Loo","Thomas Van Loo"),
(500,'0',r"(?ui)\bAug\.*Plovie","August Plovie"),
(500,'0',r"(?ui)\bHotel\s*De\s*Ville","Hôtel De Ville"),
(500,'0',r"(?ui)\bVti\s","VTI"),
#(r"(?ui)\bPark\s*De\s*Rode\s*Poort","Park de Rode Poort"),
#r"(?ui)\bBaron\s*De\s*Serret","Baron de Serret"),
# (r"(?ui)\bCoq\s*Anglais","anglais"),
(500,'0',r"(?ui)\bA\.*\s*Rodenbach","Albrecht Rodenbach"),
(500,'0',r"Fort Van Beieren","Fort van Beieren"),
(500,'0',r"(?ui)\bBurg\.*\s*Capelle","Burgemeester Capelle"),
(500,'0',r"(?ui)\bG\.*\s*Gezelle","Guido Gezelle"),
(500,'0',r"(?ui)\bD'Alcantara","d'Alcantara"),
(500,'0',r"(?ui)\b/\s*Vloesberg\s*La\s*Houppe/D'Hoppe","/ Vloesberg La Houppe/D'Hoppe"),
(500,'0',r"(?ui)\bPC\s*St-Jan-Baptist","PC St-Jan-Baptist"),
(500,'0',r"(?ui)\bT'Jonge","'t Jonge"),
(500,'0',r"(?ui)\bInst\.*\s*Scheppers","Instituut Scheppers"),
(500,'0',r"(?ui)\bL\.*P\.*\s*Boon","L.P. Boon"),
(500,'0',r"(?ui)\bCC\b","Cultureel Centrum"),
(500,'0',r"(?ui)\bKon\.*\s*Tech\.*\s*Atheneum\b","Koninklijk Technisch Atheneum"),
(500,'0',r"(?ui)\bOcmw\s*/\s*Koevliet","OCMW / Koevliet"),
(500,'0',r"(?ui)\bBurg\.*Van\s*Ackerwijk","Burgemeester Van Ackerwijk"),
(500,'0',r"(?ui)\bC\.*\s*Verhavert","Cypriaan Verhavert"),
(500,'0',r"(?ui)\bSt-Kornelis","Sint-Kornelis"),
(500,'0',r"(?ui)\bV\.*\s*Kegels","Victor Kegels"),
(500,'0',r"(?ui)\bF\.*\s*De\s*Laet","Frans De Laet"),
(500,'0',r"(?ui)\bd'\s*Hand","d'Hand"),
(500,'0',r"(?ui)\bJ\.*\s*De\s*Doncker","Jan De Doncker"),
(500,'0',r"(?ui)\bBesch\.*Werkplaats","Beschermde Werkplaats"),
(500,'0',r"(?ui)\bE\.*\s*Vandervelde","Emile Vandervelde"),
(500,'0',r"(?ui)\bJ\.*\s*De\s*Block","Jozef De Block"),
(500,'0',r"(?ui)\bH\.*\s*Vos","Herman Vos"),
(500,'0',r"(?ui)\bE\.*\s*Vandervelde","Emile Vandervelde"),
(500,'0',r"(?ui)\bJ\.*\s*Stobbaerts","Jan Stobbaerts"),
(500,'0',r"(?ui)\bJ\.*\s*David","Jan David"),
(500,'0',r"(?ui)\b\s*Dorp"," Dorp"),
(500,'0',r"(?ui)\bKon\.*\s*Astrid","Kon. Astrid"),
(500,'0',r"(?ui)\bJ\.*\s*B\.*\s*Francis","Jan Baptist Francis"),
(500,'0',r"(?ui)\bKon\.*\s*Elisabeth","Koningin Elisabeth"),
(500,'0',r"(?ui)\bLt\.*\s*Philippart","Luitenant Philippart"),
(500,'0',r"(?ui)\bO\.*\s*Van\s*Kesbeeck","Oscar Van Kesbeeck"),
(500,'0',r"(?ui)\bR\.*\s*Verbeeck","René Verbeeck"),
(500,'0',r"(?ui)\bE\.*Sohie","Edgard Sohie"),
(500,'0',r"(?ui)\bW\.*\s*Geets","Willem Geets"),
(500,'0',r"(?ui)\bM\.*\s*Gandhi","Mahatma Gandhi"),
(500,'0',r"(?ui)\bJ\.*de\s*Meeus","Graaf Joseph De Meeus"),
(500,'0',r"Stwg.Tervuren /Stwg.Vilvoorde","Steenweg Tervuren/Steenweg Vilvoorde"),
(500,'0',r"(?ui)\bJ\.*F\.*\s*Willems","Jan Frans Willems"),
(500,'0',r"(?ui)\bStg\.*op\s*Brussel/Albert","Steenweg op Brussel/Albert"),
(500,'0',r"(?ui)\bTh\.*\s*V\.*\s*Cauwenberghs","Theofiel Van Cauwenberghs"),
(500,'0',r"(?ui)\bJ\.*\s*Jennes","Jozef Jennes"),
(500,'0',r"(?ui)\bL\.*Nantier","Leopold Nantier"),
(500,'0',r"(?ui)\bJ\.*\s*Van\s*Geel","Jan Frans van Geel"),
(500,'0',r"(?ui)\bJachtl\.*/Duisburgsestg","Jachtlaan/Duisburgsesteenweg"),
(500,'0',r"(?ui)\bCl\.*\s*Vanophem","Clement Vanophem"),
(500,'0',r"(?ui)\bP\.*\s*Benoit","Peter Benoit"),
(500,'0',r"(?ui)\bE\.*Carels","Emile Carels"),
(500,'0',r"(?ui)\bP\.*\s*Benoit","Peter Benoit"),
(500,'0',r"(?ui)\bChem\.*\s*d\s*Angoussart","Chemin d Angoussart"),
(500,'0',r"(?ui)\bVan\s*Den\s*Nest","Van den Nest"),
(500,'0',r"(?ui)\bRue\s*Th\.*Piat","Rue Théophile Piat"),
(500,'0',r"(?ui)\bAv\.*\s","Avenue "),
(500,'0',r"(?ui)\bSt-Bernardus","Sint-Bernardus"),
(500,'0',r"(?ui)\bP\.*Nollekens","Pieter Nollekens"),
(500,'0',r"(?ui)\bL\.*Wouters","Louis Wouters"),
(500,'0',r"(?ui)\bPnstr\.*/Leuvensebn","Pleinstraat/Leuvensebaan"),
#(r"(?ui)\bIndustr\.*\s*Interleuven","Industr. Interleuven"),
(500,'0',r"(?ui)\bL\.*Ruelens","Leopold Ruelens"),
(500,'0',r"(?ui)\bKerk\s*(St-Guibertus)","Kerk (Sint-Guibertus)"),
(500,'0',r"(?ui)\bC\.*\s*Vissenaeken","Cornelius Vissenaekens"),
#(r"(?ui)\bP\.*\s*Van\s*Lommel","P. van Lommel"),
(500,'0',r"(?ui)\bKne\s*Vleminckxstr\.*","Kleine Vleminckxstraat"),
(500,'0',r"(?ui)\bE\.*\s*Vandervelde","Emile Vandervelde"),
(500,'0',r"(?ui)\bE\.*\s*Van\s*Der\s*Velde","Emiel Van Der Velde"),
(500,'0',r"(?ui)\bJ\.*\s*Van\s*Geel","Jan Frans van Geel"),
(500,'0',r'(?ui)\bJ\.?\s*Van Hoof','Jef Van Hoof'),
(500,'0',r"(?ui)\bO\.*\s*Van\s*Kesbeeck","Oscar Van Kesbeeck"),
(500,'0',r"(?ui)\bTh\.*Van\s*Loo","Thomas Van Loo"),
(500,'0',r"(?ui)\bBurg\.*\s*Van\s*Nueten","Burgemeester Van Nueten"),
(500,'0',r"(?ui)\bCl\.*\s*Vanophem","Clement Vanophem"),
(500,'0',r"(?ui)\bL\.*\s*Van\s*Bercken","Lodewijk Van Bercken"),
(500,'0',r"(?ui)\bJ\.*\s*Van\s*Rijswijck","Jan Van Rijswijck"),
(500,'0',r"(?ui)\bK\.*\s*Van\s*De\s*Woestijne","Karel Van de Woestijne"),
(500,'0',r"(?ui)\bC\.*\s*Verhavert","Cypriaan Verhavert"),
(500,'0',r"(?ui)\bR\.*\s*Verbeeck","René Verbeeck"),
(500,'0',r"(?ui)\bKol\.*Veteranensquare","Koloniale Veteranen - Vétérans Coloniaux"),
(500,'0',r"(?ui)\bC\.*\s*Vissenaeken","Cornelius Vissenaekens"),
(500,'0',r"(?ui)\bPl\.*\s*Vondel","Plaats Vondel"),
(500,'0',r"(?ui)\bH\.*\s*Vos","Herman Vos"),
(500,'0',r"(?ui)J\.*\s*De\s*Wilde","Jean De Wilde"),
(500,'0',r"(?ui)\bJ\.*F\.*\s*Willems","Jan Frans Willems"),
(500,'0',r"(?ui)\bL\.*Wouters","Louis Wouters"),
#(r"(?ui)\bTC\s*Laakdal","TC Laakdal"),
#(r"(?ui)\bEXC\s*Vorst","EXC Vorst"),
(500,'0',r"(?ui)\bE\.*\s*Moyaerts","Emile Moyaerts"),
(500,'0',r"(?ui)\bE\.*\s*Beauduin","Emile Beauduin"),
(500,'0',r"(?ui)\bProvinciaal\s*Natuurcentrum","Bosbiologisch Centrum"),
(500,'0',r'Leuven Naamse Poort','Leuven Naamsepoort'),
(500,'0',r'Leuven Tiense Poort','Leuven Tiensepoort'),
(500,'0',r'Leuven Tervuurse Poort','Leuven Tervuursepoort'),
(999,'0',r'(?i)\s+',' '),
#(r'(?ui)\b',''),
#(r'',''),
]
def nameconversion(identifier,zone):
name=identifier
for order, final, short,long in commonabbreviations:
#print(zone + ' ' + name + ' ' + short + ' ' + long)
origname=name
if short[0:2]=='(?':
name=re.sub(short,long,name)
else:
name=name.replace(short,long)
if final and origname!=name: continue
#if 'rouw' in name: print (name)
#if 'Sabbe' in name: print(zone + ' ' + name)
if not(leaveCityNameAnywayRE.search(name)) and not(zone in ['80','68']) and not('Sas van Gent' in name):
#zone 80: Vorst bij Veerle, zone 68: Deurne bij Diest, zone 31: Sas van Gent
for city, replacement in citynamesToOmit:
if city in name:
name = name.replace(city+' ',replacement).strip()
continue
return name
def xmlsafe(name):
return name.replace('&','&').replace("'","'")
def urlsafe(name):
return xmlsafe(name).replace(' ','%20')
We start with an XML file which can be read by JOSM. The resulting file should not be uploaded directly to the server. Each and every stop needs to be vetted and double checked and dragged to a suitable position before uploading.
#!/bin/python
# -*- coding: utf-8 -*-
import postgresql, re, delijnosmlib
db = postgresql.open('pq://Jo:tttttt@localhost:5432/DL')
DeLijnStops = db.prepare("""SELECT geomdl,
stopidentifier,
description,
street,
municipality,
route_ref,
OSM_zone,
OSM_name,
OSM_node_ID,
round(ST_X(geomdl)::numeric, 6) AS lon,
round(ST_Y(geomdl)::numeric, 6) AS lat
FROM stops
WHERE description !~* 'dummy|afgeschaft'
ORDER BY geomdl;""")
def main():
with open('C:/Data/De Lijn/Haltes De Lijndata.osm', mode='w', encoding='utf-8') as osmfile:
osmfile.write("<?xml version='1.0' encoding='UTF-8'?>\n")
osmfile.write('<osm version="0.6" upload="no" generator="Python script"> \n')
stopslist = DeLijnStops()
identifiers = {}
for row in stopslist:
description = str(row['description'])
osmname = str(row['osm_name'])
name=delijnosmlib.nameconversion((description),str(row['osm_zone']))
if not(name in identifiers) and osmname != name and (' ' in osmname) and (' ' in name) and (str(row['osm_zone']) not in ('01','20')) and not ('erron' in name) and not ('oeren' in name) and not ('Dokter' in name) and not (' nummer' in name) and not (' naar ' in name) and not ('École' in name) and not ('steenweg' in name) and not ('Goor' in name) and not ('str.' in name) and not ('Ziekenhuis' in name) and not ('Koninklijk' in name) and not ('Ernest Claes' in name) and not ('Heilig' in name) and not ('Lieve-Vrouw' in name) and not ('Siméon' in name) and not ('Streuvels' in name) and not ('Moelingen' in name) and not ("'t" in name) and not ("'s" in name):
identifiers[name]=''
#print (description + ' -> ' + osmname + ' -> ' + name)
print(r''' (r"(?ui)\b''' + description.split(' ',1)[1].replace(' ','\s*').replace('.','\.*').replace('straat','').replace('laan','').replace('plein','').replace('lei','') + '''","''' + osmname.split(' ',1)[1].replace("''","'").replace('straat','').replace('laan','').replace('plein','').replace('lei','') + '''"),''')
name=delijnosmlib.xmlsafe(name)
ref=str(row['stopidentifier'])
street = city = city2 = route_ref = ''
if row['street']: street=delijnosmlib.xmlsafe(str(row['street']))
if row['municipality']: city=delijnosmlib.xmlsafe(str(row['municipality']))
lat=str(row['lat'])
lon=str(row['lon'])
if row['route_ref']: route_ref=delijnosmlib.xmlsafe(row['route_ref'])
osmfile.write(" <node id='-" + ref + "' visible='true' lat='" + lat + "' lon='" + lon + "' timestamp='2011-03-09T00:36:24Z' >" + '\n')
osmfile.write(' <tag k="highway" v="bus_stop" />' + "\n")
if not '"' in name:
osmfile.write(' <tag k="name" v="' + name + '" />' + "\n")
else:
osmfile.write(" <tag k='name' v='" + name + "' />" + '\n')
osmfile.write(' <tag k="ref" v="' + ref +'" />' + "\n")
if street or city:
osmfile.write(' <tag k="created_by" v="' + street + ' ' + city + '" />' + "\n")
#print( row)
if row['osm_zone']:
osmfile.write(' <tag k="zone" v="' + row['osm_zone'] + '" />' + "\n")
if not(row['osm_node_id']):
osmfile.write(' <tag k="odbl" v="' + 'new' + '" />' + "\n")
osmfile.write(' <tag k="operator" v="De Lijn" />' + "\n")
if route_ref:
osmfile.write(' <tag k="route_ref" v="' + route_ref + '" />' + "\n")
osmfile.write(' </node>' + "\n")
osmfile.write('</osm>')
if __name__ == "__main__":
main()
Feedback to De Lijn about stops which are more than a certain distance from what is in their DB
To create the report which can be found on this page:
#!/bin/python
# -*- coding: utf-8 -*-
import postgresql, datetime, delijnosmlib, re
from urllib.parse import urlencode
feestbusRE=re.compile(r'''(F\d+)''')
feestbusREsub=re.compile(r'''(?:;)?F\d+(;)?''')
ignorevandeRE=re.compile(r'''(?u)\s((?:[Vv]an)*\s*(?:[Oo]p)*\s*(?:[Dd]e(r|n)*)*)\s''')
db = postgresql.open('pq://Jo:tttttt@localhost:5432/DL')
locationdiffers = db.prepare("""SELECT round(ST_Distance_Sphere(geomdl, geomosm)),
stopidentifier,
description,
osm_name,
street,
osm_street,
osm_city,
route_ref,
osm_route_ref,
osm_zone,
osm_node_id,
osm_last_modified_by_user,
now() - osm_last_modified_timestamp AS dayswithoutchanges,
round(ST_X(ST_Centroid(ST_ShortestLine(geomdl, geomosm)))::numeric, 3) AS x,
round(ST_Y(ST_Centroid(ST_ShortestLine(geomdl, geomosm)))::numeric, 3) AS y,
round(ST_X(geomosm)::numeric, 3) AS lon,
round(ST_Y(geomosm)::numeric, 3) AS lat
FROM stops
WHERE ST_Distance_Sphere(geomdl, geomosm) > 35.0
ORDER BY ST_Distance_Sphere(geomdl, geomosm) DESC;""")
otherdifferences = db.prepare("""SELECT stopidentifier,
description,
osm_name,
street,
osm_street,
osm_city,
route_ref,
osm_route_ref,
osm_zone,
osm_node_id,
osm_last_modified_by_user,
now() - osm_last_modified_timestamp AS dayswithoutchanges,
round(ST_X(geomosm)::numeric, 3) AS lon,
round(ST_Y(geomosm)::numeric, 3) AS lat
FROM stops
WHERE description != osm_name OR route_ref != osm_route_ref -- OR description LIKE '%.%'
ORDER BY osm_zone DESC, stopidentifier;""")
def main():
targetFileName = 'C:/Data/De Lijn/WikiReport.txt'
with open(targetFileName, mode='w', encoding='utf-8') as wikifile:
wikifile.write('''
Instructions on how this list was created can be found here:
http://wiki.openstreetmap.org/w/index.php?title=WikiProject_Belgium/De_Lijndata#Feedback_to_De_Lijn_about_stops_which_are_more_than_a_certain_distance_from_what_is_in_their_DB
==Stops which differ in location==
{| class="wikitable" align="left" style="margin:0 0 2em 2em;"
|-
|+De Lijn Haltes
|-
!Afstand
!Nummer
!Haltenaam
!osm_name
!Straat
!osm_straat
!osm_stad
!Bediende lijnen
!osm_route_ref
!osm_zone
''')
stopslist = locationdiffers()
i=1
while i< len(stopslist):
j=i+1
#print(len(stopslist[j:]))
while j< len(stopslist[j:])+i:
#print (stopslist[i]['osm_name'], stopslist[j], stopslist[j]['osm_name'])
if stopslist[i]['description'] and stopslist[j]['description'] and stopslist[i]['description'][:16]==stopslist[j]['description'][:16]:
print('flipping for ' + stopslist[j]['osm_name'] + ' ' + str(i) + ' ' + str(j))
stopslist.insert(i+1, stopslist[j])
del stopslist[j+1]
i += 1
break
j += 1
i += 1
for row in stopslist:
wikifile.write('|-\r\n')
josmRClink = '' ;tags2add = '&addtags='
print (row['dayswithoutchanges'])
if not(row['osm_last_modified_by_user']=='Polyglot') or row['dayswithoutchanges'] > datetime.timedelta(30):
if row['route_ref'] != row['osm_route_ref']:
tags2add += 'route_ref=' + str(row['route_ref']) + '|'
if row['description'] != row['osm_name']:
#print ('test' + str(row['description']))
#for c in str(row['description']):
# print(repr(c), ord(c))
tags2add += "name=" + str(row['description']).replace(' ','%20') + '|'
if row['osm_street']: tags2add += "addr:street=|"
if row['osm_city']: tags2add += "addr:city=|"
josmRClink = '[http://localhost:8111/load_and_zoom?left=' + str(float(row['lon']) - 0.01) + '&right=' + str(float(row['lon']) + 0.01) + '&top=' + str(float(row['lat']) + 0.005) + '&bottom=' + str(float(row['lat']) - 0.005) + '&select=node' + str(row['osm_node_id']) + tags2add + ' ' + str(row['stopidentifier']) + ']'
else:
josmRClink = str(row['stopidentifier'])
wikifile.write('|align="right" | [http://tools.geofabrik.de/mc/?mt0=mapnik&mt1=googlemap&lon=' + str(row['x']) + '&lat=' + str(row['y']) + '&zoom=18 ' + str(int(row['round'])) + ']m||align="right" |' + josmRClink + '||align="right" | ' + str(row['description']) + '||align="right" | ' + str(row['osm_name']) + '||align="right" | ' + str(row['street']) + '||align="right" | ' + str(row['osm_street']) + '||align="right" | ' + str(row['osm_city']) + '||align="right" | ' + str(row['route_ref']) + '||align="right" | ' + str(row['osm_route_ref']) + '||align="right" | ' + str(row['osm_zone']) + '\r\n')
stopslist = otherdifferences()
print ('query ready')
# i=1
# while i< len(stopslist):
# j=i+1
# #print(len(stopslist[j:]))
# while j< len(stopslist[j:])+i:
# #print (stopslist[i]['osm_name'], stopslist[j], stopslist[j]['osm_name'])
# if stopslist[i]['description'] and stopslist[j]['description'] and stopslist[i]['description'][:16]==stopslist[j]['description'][:16]:
# #print('flipping for ' + stopslist[j]['osm_name'] + ' ' + str(i) + ' ' + str(j))
# stopslist.insert(i+1, stopslist[j])
# del stopslist[j+1]
# i += 1
# break
# j += 1
# i += 1
print ('rows sorted')
wikifile.write('''|-\r\n|}\r\n{| class="wikitable" align="left" style="margin:0 0 2em 2em;"
|-
|+De Lijn Haltes
|-
!Nummer
!Haltenaam<br/>osm_name
!osm_zone
!Bediende lijnen<br/>!osm_route_ref
''')
identifiers = {}
for row in stopslist:
josmRClink = '' ;tags2add = '&addtags='; route_ref=''
#print (row['dayswithoutchanges'])
description = delijnosmlib.nameconversion(str(row['description']),'')
name= str(row['osm_name']).replace("''","'")
#if '.' in str(row['description']):
if not(name in identifiers) and str(row['description']) != name and (' ' in str(row['description'])) and '.' in str(row['description']) and not('.' in name) and not ('erron' in name) and not ('oeren' in name) and not ('Dokter' in name) and not (' nummer' in name) and not (' naar ' in name) and not ('École' in name) and not ('steenweg' in name) and not ('Goor' in name) and not ('str.' in name) and not ('Ziekenhuis' in name) and not ('Koninklijk' in name) and not ('Ernest Claes' in name) and not ('Heilig' in name) and not ('Lieve-Vrouw' in name) and not ('Siméon' in name) and not ('Streuvels' in name) and not ('Moelingen' in name) and not ("'t" in name) and not ("'s" in name):
identifiers[name]=''
print(r''' (r"(?ui)\b''' + str(row['description']).split(' ',1)[1].replace(' ','\s*').replace('.','\.*').replace('straat','').replace('laan','').replace('plein','').replace('lei','') + '''","''' + name.replace("''","'").replace('straat','').replace('laan','').replace('plein','').replace('lei','') + '''"),''')
name_different = route_ref_different = False
if row['route_ref']:
route_ref=feestbusREsub.sub(' ',str(row['osm_route_ref']),10).strip()
#if 'erron 5' in name: print (name + ' ' + route_ref + ' ' + str(row['route_ref']))
if str(row['route_ref'])!= route_ref: route_ref_different = True
#if 'erron 5' in name: print (route_ref_different)
if ignorevandeRE.sub(' ',name,0)!=ignorevandeRE.sub(' ',description,0): name_different = True
if name_different: #or route_ref_different:
tags2add += 'route_ref=' + str(row['route_ref']) + '|'
tags2add += "name=" + delijnosmlib.urlsafe(description) + '|'
if row['osm_street']: tags2add += "addr:street=|"
if row['osm_city']: tags2add += "addr:city=|"
tags2add += "addr:country=|"
tags2add += "addr:postcode=|"
tags2add += "source=|"
josmRClink = '[http://localhost:8111/load_and_zoom?left=' + str(round(float(row['lon']) - 0.0025,3)) + '&right=' + str(round(float(row['lon']) + 0.0025,3)) + '&top=' + str(round(float(row['lat']) + 0.0025,3)) + '&bottom=' + str(round(float(row['lat']) - 0.0025,3)) + '&select=node' + str(row['osm_node_id']) + tags2add + ' ' + str(row['stopidentifier']) + ']'
wikifile.write('|-\r\n')
wikifile.write('|align="right" | ' + josmRClink + '||align="right" | ' + str(row['description']) + '<br/>' + name + '<br/>' + description + '||align="right" | ' + str(row['osm_zone']) + '||align="right" | ' + str(row['osm_route_ref']) + '<br/>' + route_ref + '<br/>' + str(row['route_ref']) + '\r\n')
wikifile.write('|-\r\n|}\r\n')
if __name__ == "__main__":
main()
The script was extended to create a report on stops for which the names or the route_ref differ between what was calculated and what is on Openstreetmap.
Work with the data
Creation of a route relation containing all the stops in the correct order
Adding stops is all very well, but they're only a building block of the routes those buses follow. Before it was very time consuming to create those routes. When all the stops and how they are related are in a database, it becomes possible to extract them in sequence.
#!/bin/python
# -*- coding: utf-8 -*-
import postgresql, random, re
from urllib.parse import urlencode
removePerronRE=re.compile(r"""(?xiu)
(?P<name>[\s*\S]+?)
(?P<perron>\s*perron\s*\d*)?
$
""") # case insensitive search removing Perron #
db = postgresql.open('pq://Jo:tttttt@localhost:5432/DL')
routeidentifiersQUERY = db.prepare(""" SELECT DISTINCT
rte.routeidentifier, rte.routedescription, rte.routepublicidentifier, rte.routeversion, rte.routeid
FROM public.routes rte
WHERE
rte.routepublicidentifier = $1
ORDER BY
rte.routeidentifier;""")
tripids = db.prepare(""" SELECT DISTINCT
trp.tripid,
rte.routeservicetype AS type,
rte.routeservicemode AS bustram,
rte.routedescription AS routedescription,
(SELECT
st.description
FROM
public.stops st
JOIN public.segments seg1 ON seg1.stopid = st.stopid AND seg1.tripid = trp.tripid
WHERE
seg1.segmentsequence = (SELECT MIN(seg2.segmentsequence) FROM public.segments seg2 WHERE seg2.tripid = trp.tripid)) AS fromstop,
(SELECT
st.description
FROM
public.stops st
JOIN public.segments seg1 ON seg1.stopid = st.stopid AND seg1.tripid = trp.tripid
WHERE
seg1.segmentsequence = (SELECT MAX(seg2.segmentsequence) FROM public.segments seg2 WHERE seg2.tripid = trp.tripid)) AS tostop
FROM public.trips trp
JOIN public.routes rte ON rte.routeid=trp.routeid
JOIN public.segments seg ON seg.tripid=trp.tripid
JOIN public.stops stp ON seg.stopid=stp.stopid
WHERE
rte.routeidentifier = $1;""")
nodeIDsofStops = db.prepare(""" SELECT DISTINCT
stp.osm_node_id,
stp.description,
stp.stopidentifier,
trp.tripstart,
seg.segmentsequence
FROM public.trips trp
JOIN public.routes rte ON rte.routeid=trp.routeid
JOIN public.segments seg ON seg.tripid=trp.tripid
JOIN public.stops stp ON seg.stopid=stp.stopid
AND stp.description !~* 'dummy|afgeschaft'
WHERE
trp.tripid = $1
ORDER BY
trp.tripstart ASC,
seg.segmentsequence ASC;""")
def main():
pub_ID=input('Enter line number(s) you want to create OSM route relations for: ')
if ',' in pub_ID:
for id in pub_ID.split(','):
print(id)
processRoute(id,'*')
else:
processRoute(pub_ID,'3214')
def processRoute(id,fn):
routeidentifiers = routeidentifiersQUERY(id)
if len(routeidentifiers)<2:
print(routeidentifiers)
print("Auto selecting: %s %s Version %s (%s)", routeidentifiers[0][2], routeidentifiers[0][1], routeidentifiers[0][3], routeidentifiers[0][4])
line=routeidentifiers[0][0]
else:
for i,route in enumerate(routeidentifiers):
print(i+1,route[0], route[1])
selected=input('Select a line: ')
line=routeidentifiers[int(selected)-1][0]
if fn == '*': fn = line
targetFileName = 'C:/Data/De Lijn/RoutesFor' + fn + '.osm'
distinctroutes = {}
print ("Calculating stop lists for: " + line)
with open(targetFileName, mode='w', encoding='utf-8') as osmroutesfile:
#print(tripids.string)
tripslist = tripids(line)
#print(tripslist)
stopnames = {}
for row in tripslist:
#print(row)
stops_as_string = ','
stopslist = nodeIDsofStops(row['tripid'])
for stop in stopslist:
#print(stop)
stopnames[stop[0]] = stop[1]
#print(stops_as_string)
#print(stop[0], stops_as_string.split(',')[-1], stop[1])
if stop[0]:
if stop[0] != stops_as_string.split(',')[-2]:
stops_as_string += stop[0] + ','
else:
stops_as_string += '"' + stop[1] + '",'
stops_as_string = stops_as_string[1:-1]
notfound=True
for sequence in distinctroutes.keys():
notfound=True
if len(stops_as_string)<len(sequence) and stops_as_string in sequence: notfound=False; break
if len(sequence)<len(stops_as_string) and sequence in stops_as_string:
del distinctroutes[sequence]
break
if notfound: distinctroutes[stops_as_string] = [row['fromstop'],row['tostop'],row['type'],row['bustram']]
#print(distinctrouteslist)
osmroutesfile.write("<?xml version='1.0' encoding='UTF-8'?>\r")
osmroutesfile.write("<osm version='0.6' upload='true' generator='Python'>\r")
i=1; routeslist = []
for stopssequence in distinctroutes:
fromstop,tostop,type,bustram = distinctroutes[stopssequence]
madeUpId = str(random.randint(100000, 900000))
routeslist.append(madeUpId)
osmroutesfile.write("<relation id='-" + madeUpId + "' timestamp='2013-02-13T03:23:07Z' visible='true' version='1'>\r")
print('\n' + str(i) + " " + id + " " + fromstop + " - " + tostop)
for osmstopID in stopssequence.split(','):
#print ('osmstopID: ' + osmstopID)
if osmstopID[0] == '"':
osmroutesfile.write(" <member type='node' ref='" + stopssequence.split(',')[0] + "' role='" + osmstopID + "'/>\r")
print(' ' + osmstopID + ' MISSING!!!!!!!!!!!!!')
else:
osmroutesfile.write(" <member type='node' ref='" + osmstopID + "' role='platform'/>\r")
print(' ' + stopnames[osmstopID])
osmroutesfile.write(''' <tag k="type" v="route" />\r''')
osmroutesfile.write(''' <tag k="odbl" v="tttttt" />\r''')
print (bustram)
if int(bustram)==1:
osmroutesfile.write(''' <tag k="route" v="tram" />\r''')
else:
osmroutesfile.write(''' <tag k="route" v="bus" />\r''')
#print(fromstop, tostop)
#print(re.search(removePerronRE,tostop).group(1))
#print(re.search(removePerronRE,fromstop).group(1))
osmroutesfile.write(''' <tag k="name" v="De Lijn ''' + id + ''' ''' + re.search(removePerronRE,fromstop).group('name') + ''' - ''' + re.search(removePerronRE,tostop).group('name') +'''" />\r''')
osmroutesfile.write(''' <tag k="ref" v="''' + id + '''" />\r''')
#osmroutesfile.write(''' <tag k="ref:De_Lijn" v="''' + line + '''" />\r''')
osmroutesfile.write(''' <tag k="from" v="''' + fromstop + '''" />\r''')
osmroutesfile.write(''' <tag k="to" v="''' + tostop + '''" />\r''')
osmroutesfile.write(''' <tag k="operator" v="De Lijn" />\r''')
servicetypes = ['regular','express','school','special','special','belbus']
servicetypesOSM=['', 'express','school','', '', 'on_demand']
#servicetype=servicetypes[int(type)]
if servicetypesOSM[int(type)]:
osmroutesfile.write(''' <tag k="bus" v="''' + servicetypesOSM[int(type)] + '''" />\r''')
osmroutesfile.write('''</relation>\r\r''')
i+=1
osmroutesfile.write("<relation id='-" + str(random.randint(100000, 900000)) + "' timestamp='2013-02-13T03:23:07Z' visible='true' version='1'>\r")
osmroutesfile.write(''' <tag k="type" v="route_master" />\r''')
try:
if int(bustram)==1:
osmroutesfile.write(''' <tag k="route_master" v="tram" />\r''')
else:
osmroutesfile.write(''' <tag k="route_master" v="bus" />\r''')
except NameError:
pass
osmroutesfile.write(''' <tag k="name" v="''' + row['routedescription'] +'''" />\r''')
osmroutesfile.write(''' <tag k="ref" v="''' + id + '''" />\r''')
osmroutesfile.write(''' <tag k="ref:De_Lijn" v="''' + line + '''" />\r''')
osmroutesfile.write(''' <tag k="operator" v="De Lijn" />\r''')
try:
if servicetypesOSM[int(type)]:
osmroutesfile.write(''' <tag k="bus" v="''' + servicetypesOSM[int(type)] + '''" />\r''')
except NameError:
pass
for routeId in routeslist:
osmroutesfile.write(" <member type='relation' ref='-" + routeId + "' role=''/>\r")
osmroutesfile.write('''</relation>\r\r''')
osmroutesfile.write("</osm>\r")
if __name__ == "__main__":
main()
Adding the ways nearest to the stops in above route relations automatically
Having correct sequences of stops is a tremendous help, but having all the ways next to those stops, is even better.
To run the following script, you need to add the scripting plugin to JOSM and install Jython.
#!/bin/jython
'''
FindWaysBelongingToRoutesStartingFromStops.jy
- Given a list of stops, find all ways belonging to the route
This code is released under the GNU General
Public License v2 or later.
The GPL v3 is accessible here:
http://www.gnu.org/licenses/gpl.html
The GPL v2 is accessible here:
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
It comes with no warranty whatsoever.
'''
from javax.swing import JOptionPane
from org.openstreetmap.josm import Main
import org.openstreetmap.josm.command as Command
import org.openstreetmap.josm.data.osm.Node as Node
import org.openstreetmap.josm.data.osm.Way as Way
import org.openstreetmap.josm.data.osm.Relation as Relation
import org.openstreetmap.josm.data.Bounds as Bounds
import org.openstreetmap.josm.data.osm.visitor.BoundingXYVisitor as BoundingXYVisitor
import org.openstreetmap.josm.data.osm.TagCollection as TagCollection
import org.openstreetmap.josm.data.osm.DataSet as DataSet
import org.openstreetmap.josm.data.osm.RelationMember as RelationMember
import org.openstreetmap.josm.gui.dialogs.relation.DownloadRelationMemberTask as DownloadRelationMemberTask
import org.openstreetmap.josm.actions.DownloadReferrersAction as DownloadReferrersAction
import re, time
import codecs
dummyRelation = Relation(); dummyWay = Way()
sideEffects = {
'addWayToRoute': None,
'createStopAreaRelations': None,
}
logVerbosity = 50
'''
10: only report problems that require attention
20: report on collection
30: report on network nodes
40: report on which routes are being checked
50: report everything
'''
def getMapView():
if Main.main and Main.main.map:
return Main.main.map.mapView
else:
return None
def findConnectingWay(way1,way2):
if way1.get('junction')=='roundabout':
endnodesway1=way1.getNodes()
else:
endnodesway1 = [way1.getNode(0),way1.getNode(len(way1.getNodes())-1)]
if way2.get('junction')=='roundabout':
endnodesway2=way2.getNodes()
else:
endnodesway2 = [way2.getNode(0),way2.getNode(len(way2.getNodes())-1)]
for endnode in endnodesway1:
#print dir(endnode)
parentways=endnode.getReferrers()
for parentway in parentways:
if parentway.getType()==dummyWay.getType():
if parentway.get('junction')=='roundabout':
endnodeInParentWays=parentway.getNodes()
else:
endnodeInParentWays = [parentway.getNode(0),parentway.getNode(len(parentway.getNodes())-1)]
for endnodeInParentWay in endnodeInParentWays:
if endnodeInParentWay in endnodesway2:
return parentway
return None
def checkPTroute(route, aDownloadWasNeeded):
if aDownloadWasNeeded:
return None, False, ''
print
waymemberslist = []
modified = False
#print dir(mv)
for member in route.getMembers():
"""Algorithm:
Is the node a member of a public_transport=stop_area?
Grab way from stop_area
Search near to the node for "highway -highway=bus_stop inview type:way -closed"
If one more than one highways are found: Is one of them member of another route=bus relation?
Also search for "public_transport=stop_position type:node"
If found: use the parent way
"""
if member.isNode():
#print dir(mv)
node = member.getNode()
print node.get('name')
found = False
for parentRelationOfNode in node.getReferrers():
if found: break
if parentRelationOfNode.getType() == dummyRelation.getType():
if parentRelationOfNode.get('type') and parentRelationOfNode.get('type') in ('public_transport'):
if parentRelationOfNode.get('public_transport') in ('stop_area','stop_position'):
for member in parentRelationOfNode.getMembers(): # now we are sure it's the correct kind of relation, drill down to find parent way of stop_position node
if found: break
if member.isNode():
stopPositionNodeCandidate=member.getNode()
if stopPositionNodeCandidate.get('public_transport') in ['stop_position']:
for parentWayCandidate in stopPositionNodeCandidate.getReferrers():
if parentWayCandidate.getType() == dummyWay.getType():
print 'connected through stop_area: '
print parentWayCandidate.getKeys()
waymemberslist.append(parentWayCandidate); found = True; break
if not(found):
# We couldn't determine the way by means of the stop_area relation
bboxCalculator = BoundingXYVisitor()
bboxCalculator.computeBoundingBox([node])
bboxCalculator.enlargeBoundingBox()
if bboxCalculator.getBounds():
mv.recalculateCenterScale(bboxCalculator)
#mv.zoomTo(node.getEastNorth())
ignorelist = [node]
stopPosition = Node()
for i in range(1,20):
candidates = mv.getAllNearest(mv.getPoint(node),ignorelist,Way().wayPredicate)
if candidates:
print len(candidates)
#print candidates
nodecandidates = mv.getAllNearest(mv.getPoint(node),[],Node().nodePredicate)
for candidate in nodecandidates:
# is there a stop_position node in the candidates?
if candidate.get('public_transport') in ['stop_position']:
stopPosition = candidate
ignorelist.append(candidate)
break
for candidate in candidates:
if candidate.get('highway') in ['primary', 'secondary', 'tertiary', 'unclassified', 'residential', 'service', 'living_street', 'trunk']:
#print candidate
#print candidate.getNode(0)
#print stopPosition
if not(member==route.getMember(0)) and candidate.getNode(0)==stopPosition:
continue # there is probably a better candidate which has this way as its end node, instead of as the starting node
else:
waymemberslist.append(candidate)
print 'using '
print candidate.getKeys()
found = True; break
else:
ignorelist.append(candidate)
print 'ignoring '
print candidate.getKeys()
if found: break
bboxCalculator.enlargeBoundingBox() # zoom out a bit and try again
if bboxCalculator.getBounds():
mv.recalculateCenterScale(bboxCalculator)
if not(found):
print 'Found no suitable candidate way for this stop'
else:
# We found a way and added it to the relation, but is this way connected to the previous way we found?
if len(waymemberslist) > 2:
notConnected=True
if waymemberslist[-1].get('junction')=='roundabout':
endnodeslatest=waymemberslist[-1].getNodes()
else:
endnodeslatest = [waymemberslist[-1].getNode(0),waymemberslist[-1].getNode(len(waymemberslist[-1].getNodes())-1)]
if waymemberslist[-2].get('junction')=='roundabout':
endnodesprevious=waymemberslist[-2].getNodes()
else:
endnodesprevious = [waymemberslist[-2].getNode(len(waymemberslist[-2].getNodes())-1),waymemberslist[-2].getNode(0)]
for endnodelatest in endnodeslatest:
if endnodelatest in endnodesprevious:
notConnected=False; break
if notConnected:
connectingWay=findConnectingWay(waymemberslist[-2],waymemberslist[-1])
if connectingWay:
waymemberslist.insert(-1,connectingWay)
elif False:
# Let's look for a relation containing both ways in the proper order
for parentrelation in waymemberslist[-2].getReferrers():
notThereYet = True
#print parentrelation
if parentrelation.getType() == dummyRelation.getType():
memberwaysOfParentRelation=[]
for member in parentrelation.getMembers():
if member.isWay(): memberwaysOfParentRelation.append(member.getWay())
#print membersOfParentRelation
#print waymemberslist[-1] in memberwaysOfParentRelation
if waymemberslist[-1] in memberwaysOfParentRelation and memberwaysOfParentRelation.index(waymemberslist[-1])>memberwaysOfParentRelation.index(waymemberslist[-2]):
notThereYet = True
for way in memberwaysOfParentRelation:
#print way
#print waymemberslist[-1]
#print waymemberslist[-2]
if notThereYet:
if way == waymemberslist[-2]:
notThereYet=False; continue
else:
if way == waymemberslist[-1]:
break
else:
waymemberslist.insert(-2,way)
if not(notThereYet): break
else:
print 'ALREADY CONNECTED TO PREVIOUS WAY !!!!!!!!!!'
#mv.zoomPrevious()
#mv.repaint()
#time.sleep(1)
#print
#print 'node:', node
#print 'candidates:', candidates
#print 'waymemberslist:', waymemberslist
#waymemberslist.extend(nodememberslist)
i=0; newRelation = Relation(route); commandsList = []; previousway=None
for way in waymemberslist:
newMember = RelationMember(str(i+1),way)
if not way==previousway: #not(newMember in newRelation.getMembers()):
newRelation.addMember(i, newMember)
i+=1; previousway=way
modified = True
#print dir(node)
#bboxCalculator = BoundingXYVisitor()
#bboxCalculator.computeBoundingBox([node])
#print bboxCalculator
#bboxCalculator.enlargeBoundingBox()
#if bboxCalculator.getBounds():
# mv.recalculateCenterScale(bboxCalculator)
#mv.zoomTo(node.getEastNorth())
#candidates = mv.getNearestNodes(mv.getPoint(node),node.nodePredicate)
if modified:
commandsList.append(Command.ChangeCommand(route, newRelation))
Main.main.undoRedo.add(Command.SequenceCommand("Adding ways directly adjacent to stop nodes", commandsList))
commandsList = []
modified = False
aDownloadWasNeeded = False
'''
Since Downloading referrers or missing members happens asynchronously in a separate worker thread
the script can run in three modes
1. No downloads allowed/offline run; output mentions that data was incomplete in its reports.
2. Download run; When incomplete items are encountered, they are scheduled to be downloaded. From then on, no more quality checks are performed on the data.
All hierarchies are still checked, looking for more incomplete data for which more downloads need to be scheduled.
3. Normal run; All data is available and proper reporting can be performed.
'''
dummy_way = Way()
dummy_relation = Relation()
mv = getMapView()
if mv and mv.editLayer and mv.editLayer.data:
selectedRelations = mv.editLayer.data.getSelectedRelations()
if not(selectedRelations):
JOptionPane.showMessageDialog(Main.parent, "Please select a route relation")
else:
for relation in selectedRelations:
if logVerbosity> 49: print relation
if relation.hasIncompleteMembers():
if 'downloadIncompleteMembers' in sideEffects:
aDownloadWasNeeded = True
print 'Downloading referrers for ', str(relation.get('name')), ' ', str(relation.get('note'))
DownloadRelationMemberTask.run(DownloadRelationMemberTask(relation, relation.getIncompleteMembers(), mv.editLayer ))
continue
else:
JOptionPane.showMessageDialog(Main.parent, 'Please download all incomplete member of the relations first')
exit()
relationType = relation.get('type')
if relationType == 'route':
checkPTroute(relation, aDownloadWasNeeded)
if aDownloadWasNeeded:
JOptionPane.showMessageDialog(Main.parent, 'There was incomplete data and downloading mode was initiated,\nNo further quality checks were performed.\nPlease run the script again when all downloads have completed')