OSMNX Basics
- Related topcoder competition
In [13]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
In [10]:
from __future__ import print_function
from __future__ import division
# from __future__ import unicode_literals
# from future.utils import raise_with_traceback
In [109]:
import os,sys
import re
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from pprint import pprint
from pathlib import Path
import pdb
# osmnx library
import networkx as nx
import osmnx as ox
import requests
import matplotlib.cm as cm
import matplotlib.colors as colors
ox.config(use_cache=True, log_console=True)
ox.__version__
# outputting helpers
from output_helpers import *
# preprocess helpers by CosmiQ
import apls, apls_tools, create_spacenet_masks, graphTools
In [ ]:
this_nb_path = os.getcwd()
print("this nb path: ", this_nb_path)
if this_nb_path not in sys.path:
sys.path.insert(0, this_nb_path)
sys.path
In [77]:
rt2idx, idx2rt = rt2idx_and_idx2rt()
print(rt2idx)
Load data path¶
Here is the structure of the spacenet dataset folder
In [34]:
print_tree(data_dir, max_nfiles=1)
In [35]:
def get_imgId(img_path):
"""
Extract img ID from the path to the img file.
For example, if a path is /home/hayley/Data/Vector/spacenetroads_AOI_2_Vegas_img1323.geojson
this function returns 1323
Args:
- img_path (str or pathlib.Path): str or path object to the file
Returns:
- int for the image ID
"""
if isinstance(img_path, Path):
img_path = str(img_path)
fname = img_path.split('_')[-1]
return int(re.findall(r'\d+', fname)[0])
In [36]:
vegas_root = Path("/home/hayley/Data_Spacenet/SpaceNet_Roads_Sample/AOI_2_Vegas_Roads_Sample/")
vegas_rgb = vegas_root/"RGB-PanSharpen"
vegas_vector = vegas_root/"geojson/spacenetroads"
k_root = Path("/home/hayley/Data_Spacenet/SpaceNet_Roads_Sample/AOI_5_Khartoum_Roads_Sample/")
k_rgb = k_root/"RGB-PanSharpen"
k_vector = k_root/"geojson/spacenetroads"
In [37]:
vegas_vector_fnames = [str(x) for x in vegas_vector.iterdir() if x.is_file() and x.suffix == '.geojson']
vegas_rgb_fnames = [str(x) for x in vegas_rgb.iterdir() if x.is_file() and x.suffix == '.tif']
# Sort them to have correct matching pairs
vegas_vector_fnames = sorted(vegas_vector_fnames, key=lambda x: get_imgId(x))
vegas_rgb_fnames = sorted(vegas_rgb_fnames, key=lambda x: get_imgId(x))
k_vector_fnames = [str(x) for x in k_vector.iterdir() if x.is_file() and x.suffix == '.geojson']
k_rgb_fnames = [str(x) for x in k_rgb.iterdir() if x.is_file() and x.suffix == '.tif']
k_vector_fnames = sorted(k_vector_fnames, key=lambda x: get_imgId(x))
k_rgb_fnames = sorted(k_rgb_fnames, key=lambda x: get_imgId(x))
print("number of vector files:", len(vegas_vector_fnames))
print("number of rgb files:", len(vegas_rgb_fnames))
print("number of vector files:", len(k_vector_fnames))
print("number of rgb files:", len(k_rgb_fnames))
In [38]:
# reload(apls_tools)
apls_tools.convert_to_8Bit(str(k_rgb_fnames[0]), './temp.tif')
In [39]:
import cv2
img = cv2.imread('temp.tif', -1)
img.dtype
Out[39]:
In [40]:
img.shape
Out[40]:
In [49]:
plt.imshow(img)
Out[49]:
In [42]:
img16 = cv2.imread(str(k_rgb_fnames[0]),-1)
img16.dtype == 'uint16'
Out[42]:
In [44]:
def show_in_8bits(fnames, axes=None, ncols=3, figsize=(20,20), verbose=False):
"""
show raster image in 8 bits. It silently performs 16->8bit conversion
if the input is 16 bits by calling `convert_to_8bits` in apls_tools.py
"""
if not isinstance(fnames, list):
raise ValueError("input must be a list")
from math import ceil
nrows = max(1, int(ceil(len(fnames)/ncols)))
if axes is None: f,axes = plt.subplots(nrows,ncols,figsize=figsize)
axes = axes.flatten()
for i,fname in enumerate(fnames):
img = cv2.imread(fname,-1)
if img.dtype == 'uint16':
if verbose:
print("{} is 16bits. We'll convert it to 8bits".format(fname))
out_name = './temp_{}.tif'.format(i)
apls_tools.convert_to_8Bit(fname, out_name)
img = cv2.imread(out_name,-1)
assert(img.dtype != 'uint16')
axes[i].imshow(img)
axes[i].set_title("_".join(fname.split('_')[-2:]))
# Delete any empty axes
for j in range(len(fnames),len(axes)):
f.delaxes(axes[j])
return f,axes[:len(fnames)]
In [45]:
show_in_8bits(k_rgb_fnames, ncols=4);
Create road mask¶
In [78]:
f = gpd.read_file(vegas_vector_fnames[1])
In [82]:
print(vegas_vector_fnames[1].split('/')[-1])
print(len(f))
print(f['road_type'].unique())
In [61]:
# f.columns
roadset = gpd.read_file(vegas_vector_fnames[1])
print(len(roadset))
roadset2 = roadset.iloc[:3, :]
In [66]:
roadset.to_file('~/big.geojson', driver='GeoJSON')
In [68]:
def gdf2geojson(gdf, out_name):
gdf.to_file(out_name, driver='GeoJSON')
In [69]:
gdf2geojson(roadset.copy(), 'big2.geojson')
In [60]:
roadset2.to_file('~/temp.geojson', driver='GeoJSON')
def get_width(gdf,
rt_col="road_type",
ln_col="lane_number"):
"""
Computes the width of a road (each row) based on the "road_type" and "lane_number" values
Args:
- gdf (geopandas.DataFrame): must have "road_type" and "lane_number
- rt_col (str): column name for the road type
- ln_col (str): column name for the number of lanes
Returns:
- widths (geopands.DataFrame): same length as gdf, each row encodes the width of the correpsonding row of gdf
"""
widths = gdf[rt_col] gdf[ln_col]
OSMNX library introduction¶
In [102]:
gpd_road = gpd.read_file(vegas_vector_fnames[1])
In [ ]:
city = ox.gdf_from_place("Berkeley, California")
ox.project_gdf(
In [ ]:
g = geo[0]
In [47]:
# get the boundary polygon for manhattan, save it as a shapefile, project it to UTM, and plot it
city = ox.gdf_from_place('Manhattan Island, New York, New York, USA')
ox.save_gdf_shapefile(city)
city = ox.project_gdf(city)
fig, ax = ox.plot_shape(city, figsize=(3,3))
In [48]:
# loc = (15.6030862, 32.5262226)
loc = (15.5190322, 32.494794) #kHARTOUM IMG8 center coordinate
g = ox.graph_from_point(loc, distance=200, distance_type='bbox', network_type='all_private')
g = ox.project_graph(g)
In [46]:
fig, ax = ox.plot_graph(g, node_size=10, node_color='g', fig_height=8)
In [84]:
# Get a networkx graph object from given address within the distnace
## The returned object's crs is lat-lon (ie, unprojected)
g = ox.graph_from_address('424 w. pico blvd, Los Angeles, CA',
distance = 500, distance_type='network',
network_type='walk')
In [86]:
## If we keep using crs=lat-lon:
gdf_nodes, gdf_edges = ox.graph_to_gdfs(g)
gdf_nodes.crs
#epsg:4326 is longlat
Out[86]:
In [89]:
gdf_edges.highway.unique()
Out[89]:
In [94]:
g.graph['crs'] #original (unprojected) crs: epsg:4236, aka. lat-lon crs
Out[94]:
In [92]:
g_projected = ox.project_graph(g) #project latlon to utm (default)
In [95]:
print(g_projected.graph['crs'])
In [98]:
ox.plot_graph(g_projected)
Out[98]:
In [230]:
gdf = ox.graph_to_gdfs(g_projected)
g_projected['buffer'] = g_projected.buffer(2)
fig, ax = ox.plot_graph(g2_projected, node_size=30)
In [102]:
nc = ['r' if 'highway' in d else 'b' for n,d in g2.nodes(data=True)]
In [122]:
print(g2.edges(data=True)[:3])
In [120]:
# ox.plot_graph(g2_projected, node_size=30, node_color=nc, );
In [116]:
temp
In [132]:
gdf_n, gdf_e = gdf2
print("gdf_n coloumns:", sorted(list(gdf_n.columns)))
print("gdf_e coloumns:", sorted(list(gdf_e.columns)))
gdf_n[:5]
In [160]:
# gdf_n.set_index('osmid', inplace=True)
# gdf_e.set_index('osmid', inplace=True)
gdf_n[:5]
Out[160]:
In [162]:
gdf_e[:5]
Out[162]:
Color network edges according to the road type¶
In [181]:
road_types = gdf_e.highway.unique()
print(road_types)
rtype2idx = {rtype:i for i,rtype in enumerate(road_types)}
print(rtype2idx)
colors = np.array(['r','b','g','k','y'])
print(colors)
In [193]:
# color edges by road type
# for name, group in gdf_e.groupby('highway'):
# print(name,len(group))
ec = [rtype2idx[r] for r in gdf_e['highway']]
ec = colors[ec]
gdf_e['road_type'] = ec
In [194]:
print(ec[:10])
In [184]:
print(gdf_e['geometry'][0])
In [186]:
gdf_n[gdf_n.index == 1842523671]
Out[186]:
In [195]:
ax = gdf_e.plot(color=ec, figsize=(20,20))
gdf_n.plot(ax=ax, color='y')
Out[195]:
In [323]:
def color_edge_by_type(G, ax=None, colors=None, figsize=(20,20)):
"""
Args:
- G (nx.graph or ox.graph) with nodes and edges
Draw a graph network with nodes and edges with the edges colored by its road_type
Returns:
- ax (plt.Axes)
"""
if ax is None:
f,ax = plt.subplots(figsize=figsize)
gdf_n, gdf_e = ox.graph_to_gdfs(G, nodes=True, edges=True)
road_types = gdf_e.highway.unique()
road2idx = {r:i for i,r in enumerate(road_types)}
rindices = [road2idx[r] for r in gdf_e['highway']]
# pdb.set_trace()
gdf_e['road_type'] = rindices
# colors
if colors is None:
colors = ('b', 'g', 'r', 'c', 'm', 'y', 'k', 'w') #todo: linspace and then map to cmap
colors = np.array(colors)
# Create color<->road_type textbar
used_colors = [road2idx[r] for r in road_types]
text = ''
for i,r in enumerate(road_types):
text += "".join([colors[road2idx[r]], ": ", r])
if i < len(road_types)-1:
text += '\n'
print(text)
gdf_e.plot(ax=ax,color=colors[rindices])
gdf_n.plot(ax=ax,color='k')
# ax.annotate(text, xy=(0.8, 0.8), xycoords='axes fraction')
# Better to use Anchored textbook
from matplotlib.offsetbox import AnchoredText
textbox = AnchoredText(text, loc='upper right', prop=dict(size='x-large'),frameon=True)
ax.add_artist(textbox)
return ax
In [324]:
color_edge_by_type(g2);
Create a buffer around a graph vector¶
In [202]:
gdf_e_small = gdf_e[:10].copy()
In [242]:
original = ox.project_gdf(gdf_e_small)
buffed = original.buffer(5)
original.plot()
buffed.plot()
Out[242]:
Let's try to create a buffer with different widths depending on the road_type
In [244]:
gdf_e_small.head()
Out[244]:
In [256]:
gdf_sample = gdf_e_small.copy()
gdf_sample['geometry'] #Series object
Out[256]:
In [ ]:
gdf_sample['buffed'] = gdf_sample['geometry'].buffer(1)
#Instead we want to create apply a buffer (input must be pd.Dataframe)
#with the width as a function of the road-type (and number of lanes)
#check unique values in the column `highway` and `lanes`
print("highway vals: ", gdf_sample['highway'].unique())
print("lanes vals: \n", gdf_sample['lanes'].value_counts(dropna=False))
gdf_sample.head()
pd.Series.value_counts()
In [277]:
# Basic road-type vs. number of lanes analysis
## First let's try the osm data from the website using osmnx
## Based on Khartoum spacenet dataset
k_bbox = [[32.4, 15.5],
[32.6, 15.7]]
k_center = (15.6331,32.5369) #flipped version of QGIS's coordinates
# k_center = (37.791427, -122.410018)
half = 500 #meter
k_graph = ox.graph_from_point(k_center,
distance=500,
distance_type='bbox',
network_type='all_private')
In [281]:
ox.plot_graph(k_graph)
Out[281]:
In [288]:
#nxgraph to geopandas DataFrame objects
k_gdf_n, k_gdf_e = ox.graph_to_gdfs(k_graph)
k_gdf_e['highway'].value_counts()
In [326]:
k_gdf_e.columns
Out[326]:
In [288]:
for name, g in k_gdf_e.groupby('highway'):
g.
Out[288]:
In [325]:
color_edge_by_type(k_graph)
Out[325]:
Geopandas DataFrame's CRS conversion¶
- gpd DataFrame object contains an attribute 'crs'
- this is set when the corresponding file is read (eg. in geojson, shapefile, etc the crs is specified).
- We can use ox.project_gdf(myGdf) to convert crs to UTM
- Or, we can use geopandas's DataFrame.to_crs()
In [211]:
original.crs
Out[211]:
In [214]:
ox.project_gdf(original).crs
Out[214]:
In [219]:
original.plot()
print(original.crs)
In [33]:
show_in_8bits([k_rgb_fnames[6]])
Out[33]:
In [34]:
# Let's get a vector data and the mask plotted on this image
gdf = gpd.read_file(k_vector_fnames[6])
In [36]:
gdf.head()
Out[36]:
In [39]:
gdf['type'] = gdf['road_type'].apply(lambda r: road2idx[
gdf['class'] = 'highway'
gdf['highway'] = 'highway'
In [69]:
gdf.apply(
Out[69]:
In [75]:
gdf = gdf.rename(columns={'geometry':'roads'}).set_geometry('roads')
In [78]:
gdf.columns
gdf.geometry.name
Out[78]:
In [77]:
roads = gdf['roads']
In [54]:
bound_series = geom.bounds
bound_series[:5]
Out[54]:
In [57]:
bound_series.iloc[:,0:2].min()
Out[57]:
In [58]:
bound_series.iloc[:,2:].max()
Out[58]:
In [60]:
geom.total_bounds
Out[60]:
In [79]:
gdf['centroid'] = roads.centroid
In [83]:
gdf.set_geometry('centroid') #returns a new obj
print(gdf.geometry.name)
gdf.plot(figsize=(20,20))
Out[83]: