Small Simplicity

Understanding Intelligence from Computational Perspective

Jan 02, 2020

OSMNX Basics

In [13]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
In [10]:
from __future__ import print_function 
from __future__ import division
# from __future__ import unicode_literals
# from future.utils import raise_with_traceback
In [109]:
import os,sys
import re
import pandas as pd
import geopandas as gpd

import numpy as np
import matplotlib.pyplot as plt

from pprint import pprint
from pathlib import Path
import pdb

# osmnx library
import networkx as nx
import osmnx as ox
import requests
import matplotlib.cm as cm
import matplotlib.colors as colors
ox.config(use_cache=True, log_console=True)
ox.__version__


# outputting helpers
from output_helpers import * 

# preprocess helpers by CosmiQ
import apls, apls_tools, create_spacenet_masks, graphTools
In [ ]:
this_nb_path = os.getcwd()
print("this nb path: ", this_nb_path)
if this_nb_path not in sys.path:
    sys.path.insert(0, this_nb_path)
sys.path
In [77]:
rt2idx, idx2rt = rt2idx_and_idx2rt()
print(rt2idx)
{'unclassifed': 5, 'Residential': 4, 'primary': 1, 'cart': 6, 'motorway': 0, 'tertiary': 3, 'secondary': 2}

Load data path

Here is the structure of the spacenet dataset folder

In [34]:
print_tree(data_dir, max_nfiles=1)
/
AOI_5_Khartoum_Roads_Sample/
    summaryData/
        AOI_5_Khartoum_Roads_Sample.csv
    MUL/
        MUL_AOI_5_Khartoum_img74.tif
        ...9 more files
    RGB-PanSharpen/
        RGB-PanSharpen_AOI_5_Khartoum_img220.tif
        ...9 more files
    geojson/
        spacenetroads/
            spacenetroads_AOI_5_Khartoum_img74.geojson
            ...9 more files
    PAN/
        PAN_AOI_5_Khartoum_img157.tif
        ...9 more files
    MUL-PanSharpen/
        MUL-PanSharpen_AOI_5_Khartoum_img220.tif
        ...9 more files
AOI_4_Shanghai_Roads_Sample/
    summaryData/
        AOI_4_Shanghai_Roads_Sample.csv
    MUL/
        MUL_AOI_4_Shanghai_img1196.tif
        ...9 more files
    RGB-PanSharpen/
        RGB-PanSharpen_AOI_4_Shanghai_img1841.tif
        ...9 more files
    geojson/
        spacenetroads/
            spacenetroads_AOI_4_Shanghai_img1841.geojson
            ...9 more files
    PAN/
        PAN_AOI_4_Shanghai_img1916.tif
        ...9 more files
    MUL-PanSharpen/
        MUL-PanSharpen_AOI_4_Shanghai_img1700.tif
        ...9 more files
AOI_3_Paris_Roads_Sample/
    summaryData/
        AOI_3_Paris_Roads_Sample.csv
    MUL/
        MUL_AOI_3_Paris_img340.tif
        ...9 more files
    RGB-PanSharpen/
        RGB-PanSharpen_AOI_3_Paris_img84.tif
        ...9 more files
    geojson/
        spacenetroads/
            spacenetroads_AOI_3_Paris_img28.geojson
            ...9 more files
    PAN/
        PAN_AOI_3_Paris_img432.tif
        ...9 more files
    MUL-PanSharpen/
        MUL-PanSharpen_AOI_3_Paris_img84.tif
        ...9 more files
AOI_2_Vegas_Roads_Sample/
    summaryData/
        AOI_2_Vegas_Roads_Sample.csv
    MUL/
        MUL_AOI_2_Vegas_img408.tif
        ...9 more files
    RGB-PanSharpen/
        RGB-PanSharpen_AOI_2_Vegas_img1454.tif
        ...9 more files
    geojson/
        spacenetroads/
            spacenetroads_AOI_2_Vegas_img408.geojson
            ...9 more files
    PAN/
        PAN_AOI_2_Vegas_img1521.tif
        ...9 more files
    MUL-PanSharpen/
        MUL-PanSharpen_AOI_2_Vegas_img230.tif
        ...9 more files
In [35]:
def get_imgId(img_path):
    """
    Extract img ID from the path to the img file. 
    For example, if a path is /home/hayley/Data/Vector/spacenetroads_AOI_2_Vegas_img1323.geojson
    this function returns 1323
    
    Args:
    - img_path (str or pathlib.Path): str or path object to the file
    Returns:
    - int for the image ID
    """
    if isinstance(img_path, Path):
        img_path = str(img_path)
        
    fname = img_path.split('_')[-1]
    return int(re.findall(r'\d+', fname)[0])
In [36]:
vegas_root = Path("/home/hayley/Data_Spacenet/SpaceNet_Roads_Sample/AOI_2_Vegas_Roads_Sample/")
vegas_rgb = vegas_root/"RGB-PanSharpen"
vegas_vector = vegas_root/"geojson/spacenetroads"

k_root = Path("/home/hayley/Data_Spacenet/SpaceNet_Roads_Sample/AOI_5_Khartoum_Roads_Sample/")
k_rgb = k_root/"RGB-PanSharpen"
k_vector = k_root/"geojson/spacenetroads"
In [37]:
vegas_vector_fnames =  [str(x) for x in vegas_vector.iterdir() if x.is_file() and x.suffix == '.geojson']
vegas_rgb_fnames = [str(x) for x in vegas_rgb.iterdir() if x.is_file() and x.suffix == '.tif']
# Sort them to have correct matching pairs
vegas_vector_fnames = sorted(vegas_vector_fnames, key=lambda x: get_imgId(x))
vegas_rgb_fnames = sorted(vegas_rgb_fnames, key=lambda x: get_imgId(x))

k_vector_fnames = [str(x) for x in k_vector.iterdir() if x.is_file() and x.suffix == '.geojson']
k_rgb_fnames = [str(x) for x in k_rgb.iterdir() if x.is_file() and x.suffix == '.tif']
k_vector_fnames = sorted(k_vector_fnames, key=lambda x: get_imgId(x))
k_rgb_fnames = sorted(k_rgb_fnames, key=lambda x: get_imgId(x))

print("number of vector files:", len(vegas_vector_fnames))
print("number of rgb files:", len(vegas_rgb_fnames))

print("number of vector files:", len(k_vector_fnames))
print("number of rgb files:", len(k_rgb_fnames))
number of vector files: 10
number of rgb files: 10
number of vector files: 10
number of rgb files: 10
In [38]:
# reload(apls_tools)
apls_tools.convert_to_8Bit(str(k_rgb_fnames[0]), './temp.tif')
In [39]:
import cv2
img = cv2.imread('temp.tif', -1)
img.dtype
Out[39]:
dtype('uint8')
In [40]:
img.shape
Out[40]:
(1300, 1300, 3)
In [49]:
plt.imshow(img)
Out[49]:
<matplotlib.image.AxesImage at 0x7f624e4f8e50>
In [42]:
img16 = cv2.imread(str(k_rgb_fnames[0]),-1)
img16.dtype == 'uint16'
Out[42]:
True
In [44]:
def show_in_8bits(fnames, axes=None, ncols=3, figsize=(20,20), verbose=False):
    """
    show raster image in 8 bits. It silently performs 16->8bit conversion 
    if the input is 16 bits by calling `convert_to_8bits` in apls_tools.py
    """
    if not isinstance(fnames, list):
        raise ValueError("input must be a list")
        
    from math import ceil
    nrows = max(1, int(ceil(len(fnames)/ncols)))
    if axes is None: f,axes = plt.subplots(nrows,ncols,figsize=figsize)
    axes = axes.flatten()
    
    for i,fname in enumerate(fnames):
        img = cv2.imread(fname,-1)

        if img.dtype == 'uint16':
            if verbose:
                print("{} is 16bits. We'll convert it to 8bits".format(fname))
            out_name = './temp_{}.tif'.format(i)
            apls_tools.convert_to_8Bit(fname, out_name)
            img = cv2.imread(out_name,-1)
        assert(img.dtype != 'uint16')

        axes[i].imshow(img)
        axes[i].set_title("_".join(fname.split('_')[-2:]))
    
    # Delete any empty axes
    for j in range(len(fnames),len(axes)): 
        f.delaxes(axes[j])

    return f,axes[:len(fnames)]
In [45]:
show_in_8bits(k_rgb_fnames, ncols=4);

Create road mask

In [78]:
f = gpd.read_file(vegas_vector_fnames[1])
In [82]:
print(vegas_vector_fnames[1].split('/')[-1])
print(len(f))
print(f['road_type'].unique())
spacenetroads_AOI_2_Vegas_img408.geojson
38
[u'5' u'3' u'6']
In [61]:
# f.columns
roadset = gpd.read_file(vegas_vector_fnames[1])
print(len(roadset))
roadset2 = roadset.iloc[:3, :]
38
In [66]:
roadset.to_file('~/big.geojson', driver='GeoJSON')
In [68]:
def gdf2geojson(gdf, out_name):
    gdf.to_file(out_name, driver='GeoJSON')
In [69]:
gdf2geojson(roadset.copy(), 'big2.geojson')
In [60]:
roadset2.to_file('~/temp.geojson', driver='GeoJSON')
def get_width(gdf,
             rt_col="road_type",
             ln_col="lane_number"):
    """
    Computes the width of a road (each row) based on the "road_type" and "lane_number" values
    Args:
    - gdf (geopandas.DataFrame): must have "road_type" and "lane_number
    - rt_col (str): column name for the road type
    - ln_col (str): column name for the number of lanes
    
    Returns:
    - widths (geopands.DataFrame): same length as gdf, each row encodes the width of the correpsonding row of gdf
    """
    widths = gdf[rt_col] gdf[ln_col]

OSMNX library introduction

In [102]:
gpd_road = gpd.read_file(vegas_vector_fnames[1])
In [ ]:
city = ox.gdf_from_place("Berkeley, California")
ox.project_gdf(
In [ ]:
g = geo[0]
In [47]:
# get the boundary polygon for manhattan, save it as a shapefile, project it to UTM, and plot it
city = ox.gdf_from_place('Manhattan Island, New York, New York, USA')
ox.save_gdf_shapefile(city)
city = ox.project_gdf(city)
fig, ax = ox.plot_shape(city, figsize=(3,3))
In [48]:
# loc = (15.6030862, 32.5262226)
loc = (15.5190322, 32.494794) #kHARTOUM IMG8 center coordinate

g = ox.graph_from_point(loc, distance=200, distance_type='bbox', network_type='all_private')
g = ox.project_graph(g)
In [46]:
fig, ax = ox.plot_graph(g, node_size=10, node_color='g', fig_height=8)
In [84]:
# Get a networkx graph object from given address within the distnace
## The returned object's crs is lat-lon (ie, unprojected)
g = ox.graph_from_address('424 w. pico blvd, Los Angeles, CA',
                           distance = 500, distance_type='network', 
                           network_type='walk')
In [86]:
## If we keep using crs=lat-lon:
gdf_nodes, gdf_edges = ox.graph_to_gdfs(g)
gdf_nodes.crs
#epsg:4326 is longlat
Out[86]:
{'init': 'epsg:4326'}
In [89]:
gdf_edges.highway.unique()
Out[89]:
array([u'residential', u'primary', u'secondary', u'service'], dtype=object)
In [94]:
g.graph['crs'] #original (unprojected) crs: epsg:4236, aka. lat-lon crs
Out[94]:
{'init': 'epsg:4326'}
In [92]:
g_projected = ox.project_graph(g) #project latlon to utm (default)
In [95]:
print(g_projected.graph['crs'])
{'units': 'm', 'ellps': 'GRS80', 'datum': 'NAD83', 'proj': 'utm', 'zone': 11}
In [98]:
ox.plot_graph(g_projected)
Out[98]:
(<Figure size 571.59x432 with 1 Axes>,
 <matplotlib.axes._subplots.AxesSubplot at 0x7f6254399cd0>)
In [230]:
gdf = ox.graph_to_gdfs(g_projected)
g_projected['buffer'] = g_projected.buffer(2)
fig, ax = ox.plot_graph(g2_projected, node_size=30)

AttributeErrorTraceback (most recent call last)
<ipython-input-230-4dfb34950f87> in <module>()
      1 g2_projected = ox.project_graph(g2)
----> 2 g2_projected['buffer'] = g2_projected.buffer(2)
      3 fig, ax = ox.plot_graph(g2_projected, node_size=30)

AttributeError: 'MultiDiGraph' object has no attribute 'buffer'
In [102]:
nc = ['r' if 'highway' in d else 'b' for n,d in g2.nodes(data=True)]
In [122]:
print(g2.edges(data=True)[:3])
[(1842523671, 123273663, {'lanes': u'2', 'name': u'5th Avenue', 'length': 71.94364640397427, 'oneway': False, 'highway': u'residential', 'osmid': 165524382}), (123017356, 123017328, {'lanes': u'4', 'name': u'West Pico Boulevard', 'length': 56.115353379720155, 'oneway': False, 'highway': u'primary', 'osmid': 398222698}), (123017356, 123017358, {'length': 115.33904865937451, 'oneway': False, 'name': u'South Windsor Boulevard', 'highway': u'residential', 'osmid': 13355759})]
In [120]:
# ox.plot_graph(g2_projected, node_size=30, node_color=nc, );
In [116]:
temp
In [132]:
gdf_n, gdf_e = gdf2
print("gdf_n coloumns:", sorted(list(gdf_n.columns)))
print("gdf_e coloumns:", sorted(list(gdf_e.columns)))
gdf_n[:5]
gdf_n coloumns: ['geometry', 'highway', 'osmid', 'x', 'y']
gdf_e coloumns: ['geometry', 'highway', 'key', 'lanes', 'length', 'name', 'oneway', 'osmid', 'u', 'v']
In [160]:
# gdf_n.set_index('osmid', inplace=True)
# gdf_e.set_index('osmid', inplace=True)

gdf_n[:5]
Out[160]:
highway x y geometry
osmid
21748281 traffic_signals -118.327 34.0478 POINT (-118.3267156 34.0478237)
60947745 traffic_signals -118.328 34.0447 POINT (-118.3284409 34.0446852)
122594255 NaN -118.325 34.0476 POINT (-118.324537 34.0476167)
122807243 NaN -118.327 34.0502 POINT (-118.3268412 34.0502142)
123017328 NaN -118.33 34.0481 POINT (-118.3295001 34.0480758)
In [162]:
gdf_e[:5]
Out[162]:
geometry highway key lanes length name oneway u v
osmid
165524382 LINESTRING (-118.3224853 34.0467876, -118.3224... residential 0 2 71.943646 5th Avenue False 1842523671 123273663
398222698 LINESTRING (-118.3301057 34.0481296, -118.3295... primary 0 4 56.115353 West Pico Boulevard False 123017356 123017328
13355759 LINESTRING (-118.3301057 34.0481296, -118.3305... residential 0 NaN 115.339049 South Windsor Boulevard False 123017356 123017358
398222698 LINESTRING (-118.3301057 34.0481296, -118.3308... primary 0 4 65.200256 West Pico Boulevard False 123017356 123741062
13401181 LINESTRING (-118.3305848 34.0471713, -118.3301... residential 0 NaN 136.564093 Victoria Park Drive False 123017358 123039316

Color network edges according to the road type

In [181]:
road_types = gdf_e.highway.unique()
print(road_types)
rtype2idx = {rtype:i for i,rtype in enumerate(road_types)}
print(rtype2idx)
colors = np.array(['r','b','g','k','y'])
print(colors)
[u'residential' u'primary' u'secondary' u'service']
{u'residential': 0, u'primary': 1, u'service': 3, u'secondary': 2}
['r' 'b' 'g' 'k' 'y']
In [193]:
# color edges by road type
# for name, group in gdf_e.groupby('highway'):
#     print(name,len(group))
ec = [rtype2idx[r] for r in gdf_e['highway']]
ec = colors[ec]
gdf_e['road_type'] = ec
In [194]:
print(ec[:10])
['r' 'b' 'r' 'b' 'r' 'r' 'b' 'b' 'g' 'r']
In [184]:
print(gdf_e['geometry'][0])
LINESTRING (-118.3224853 34.0467876, -118.3224879 34.0474346)
In [186]:
gdf_n[gdf_n.index == 1842523671]
Out[186]:
highway x y geometry
osmid
In [195]:
ax = gdf_e.plot(color=ec, figsize=(20,20))
gdf_n.plot(ax=ax, color='y')
Out[195]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f30c450cf50>
In [323]:
def color_edge_by_type(G, ax=None, colors=None, figsize=(20,20)):
    """
    Args:
    - G (nx.graph or ox.graph) with nodes and edges
    Draw a graph network with nodes and edges with the edges colored by its road_type
    
    Returns:
    - ax (plt.Axes)
    """
    if ax is None:
        f,ax = plt.subplots(figsize=figsize)
        
    gdf_n, gdf_e = ox.graph_to_gdfs(G, nodes=True, edges=True)
    road_types = gdf_e.highway.unique()
    
    road2idx = {r:i for i,r in enumerate(road_types)}
    rindices = [road2idx[r] for r in gdf_e['highway']]
#     pdb.set_trace()
    gdf_e['road_type'] = rindices
    
    # colors
    if colors is None: 
        colors = ('b', 'g', 'r', 'c', 'm', 'y', 'k', 'w') #todo: linspace and then map to cmap
    colors = np.array(colors)
    
    # Create color<->road_type textbar
    used_colors = [road2idx[r] for r in road_types]
    text = ''
    for i,r in enumerate(road_types):
        text += "".join([colors[road2idx[r]], ": ", r])
        if i < len(road_types)-1:
            text += '\n'
    print(text)
    gdf_e.plot(ax=ax,color=colors[rindices])
    gdf_n.plot(ax=ax,color='k')

#     ax.annotate(text, xy=(0.8, 0.8), xycoords='axes fraction')
    # Better to use Anchored textbook
    from matplotlib.offsetbox import AnchoredText
    
    textbox = AnchoredText(text, loc='upper right', prop=dict(size='x-large'),frameon=True)
    ax.add_artist(textbox)
    return ax

Another way to set the colors (better since it is less hard-coded)

reference

...
colors = pl.cm.jet(np.linspace(0,1,n))
plt.plot(x,y,colors=colors)
In [324]:
color_edge_by_type(g2);
b: residential
g: primary
r: secondary
c: service

Create a buffer around a graph vector

In [202]:
gdf_e_small = gdf_e[:10].copy()
In [242]:
original = ox.project_gdf(gdf_e_small)
buffed = original.buffer(5)

original.plot()
buffed.plot()
Out[242]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f30bde937d0>

Let's try to create a buffer with different widths depending on the road_type

In [244]:
gdf_e_small.head()
Out[244]:
geometry highway key lanes length name oneway u v road_type
osmid
165524382 LINESTRING (-118.3224853 34.0467876, -118.3224... residential 0 2 71.943646 5th Avenue False 1842523671 123273663 r
398222698 LINESTRING (-118.3301057 34.0481296, -118.3295... primary 0 4 56.115353 West Pico Boulevard False 123017356 123017328 b
13355759 LINESTRING (-118.3301057 34.0481296, -118.3305... residential 0 NaN 115.339049 South Windsor Boulevard False 123017356 123017358 r
398222698 LINESTRING (-118.3301057 34.0481296, -118.3308... primary 0 4 65.200256 West Pico Boulevard False 123017356 123741062 b
13401181 LINESTRING (-118.3305848 34.0471713, -118.3301... residential 0 NaN 136.564093 Victoria Park Drive False 123017358 123039316 r
In [256]:
gdf_sample = gdf_e_small.copy()
gdf_sample['geometry'] #Series object
Out[256]:
osmid
165524382                            LINESTRING (-118.3224853 34.0467876, -118.3224...
398222698                            LINESTRING (-118.3301057 34.0481296, -118.3295...
13355759                             LINESTRING (-118.3301057 34.0481296, -118.3305...
398222698                            LINESTRING (-118.3301057 34.0481296, -118.3308...
13401181                             LINESTRING (-118.3305848 34.0471713, -118.3301...
13355759                             LINESTRING (-118.3305848 34.0471713, -118.3301...
398199843                            LINESTRING (-118.3253792 34.047693, -118.32552...
398199843                            LINESTRING (-118.3253792 34.047693, -118.32453...
[398222656, 398222654, 403725847]    LINESTRING (-118.3273691 34.0443386, -118.3275...
165560843                            LINESTRING (-118.3273691 34.0443386, -118.3258...
Name: geometry, dtype: object
In [ ]:
gdf_sample['buffed'] = gdf_sample['geometry'].buffer(1)
#Instead we want to create apply a buffer (input must be pd.Dataframe) 
#with the width as a function of the road-type (and number of lanes)

#check unique values in the column `highway` and `lanes`
print("highway vals: ", gdf_sample['highway'].unique())
print("lanes vals: \n", gdf_sample['lanes'].value_counts(dropna=False))
gdf_sample.head()
pd.Series.value_counts()
In [277]:
# Basic road-type vs. number of lanes analysis
## First let's try the osm data from the website using osmnx
## Based on Khartoum spacenet dataset
k_bbox = [[32.4, 15.5],
            [32.6, 15.7]]
k_center = (15.6331,32.5369) #flipped version of QGIS's coordinates
# k_center = (37.791427, -122.410018)
half = 500 #meter
k_graph = ox.graph_from_point(k_center, 
                              distance=500, 
                              distance_type='bbox',
                              network_type='all_private')
In [281]:
ox.plot_graph(k_graph)
Out[281]:
(<Figure size 434.275x432 with 1 Axes>,
 <matplotlib.axes._subplots.AxesSubplot at 0x7f30bf5281d0>)
In [288]:
#nxgraph to geopandas DataFrame objects
k_gdf_n, k_gdf_e = ox.graph_to_gdfs(k_graph)
k_gdf_e['highway'].value_counts()
In [326]:
k_gdf_e.columns
Out[326]:
Index([u'geometry', u'highway', u'key', u'length', u'name', u'oneway',
       u'osmid', u'u', u'v'],
      dtype='object')
In [288]:
for name, g in k_gdf_e.groupby('highway'):
    g.
Out[288]:
residential     620
tertiary        143
unclassified     54
primary          17
service           2
primary_link      2
Name: highway, dtype: int64
In [325]:
color_edge_by_type(k_graph)
b: residential
g: unclassified
r: tertiary
c: primary
m: primary_link
y: service
Out[325]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f30be83b350>

Geopandas DataFrame's CRS conversion

  • gpd DataFrame object contains an attribute 'crs'
  • this is set when the corresponding file is read (eg. in geojson, shapefile, etc the crs is specified).
  • We can use ox.project_gdf(myGdf) to convert crs to UTM
  • Or, we can use geopandas's DataFrame.to_crs()

osmnx geopandas: crs conversion

In [211]:
original.crs
Out[211]:
{'init': 'epsg:4326'}
In [214]:
ox.project_gdf(original).crs
Out[214]:
{'datum': 'NAD83', 'ellps': 'GRS80', 'proj': 'utm', 'units': 'm', 'zone': 11}
In [219]:
original.plot()
print(original.crs)
{'units': 'm', 'ellps': 'GRS80', 'datum': 'NAD83', 'proj': 'utm', 'zone': 11}
In [33]:
show_in_8bits([k_rgb_fnames[6]])
Out[33]:
(<Figure size 1440x1440 with 1 Axes>,
 array([<matplotlib.axes._subplots.AxesSubplot object at 0x7f30bdd4dd90>], dtype=object))
In [34]:
# Let's get a vector data and the mask plotted on this image
gdf = gpd.read_file(k_vector_fnames[6])
In [36]:
gdf.head()
Out[36]:
bridge_type heading lane_numbe lane_number one_way_type paved road_id road_type origarea origlen partialDec truncated geometry
0 2 0 2 2 2 2 13023 5 0 0.000820 1 0 LINESTRING (32.555128897 15.730077578, 32.5552...
1 2 0 2 2 2 2 11579 5 0 0.001056 1 0 LINESTRING (32.555994091 15.730774351, 32.5560...
2 2 0 2 2 2 2 6826 5 0 0.000768 1 0 LINESTRING (32.555235216 15.730890506, 32.5559...
3 2 0 2 2 2 2 8875 5 0 0.000596 1 0 LINESTRING (32.554647607 15.730988874, 32.5552...
4 2 0 1 1 2 2 4913 5 0 0.000459 1 0 LINESTRING (32.555994091 15.730774351, 32.5560...
In [39]:
gdf['type'] = gdf['road_type'].apply(lambda r: road2idx[
gdf['class'] = 'highway'
gdf['highway'] = 'highway'
In [69]:
gdf.apply(
Out[69]:
'geometry'
In [75]:
gdf = gdf.rename(columns={'geometry':'roads'}).set_geometry('roads')
In [78]:
gdf.columns
gdf.geometry.name
Out[78]:
'roads'
In [77]:
roads = gdf['roads']
In [54]:
bound_series = geom.bounds
bound_series[:5]
Out[54]:
minx miny maxx maxy
0 32.555129 15.730078 32.555235 15.730891
1 32.555994 15.730768 32.556038 15.730774
2 32.555235 15.730774 32.555994 15.730891
3 32.554648 15.730891 32.555235 15.730989
4 32.555994 15.730774 32.556038 15.730992
In [57]:
bound_series.iloc[:,0:2].min()
Out[57]:
minx    32.552528
miny    15.727921
dtype: float64
In [58]:
bound_series.iloc[:,2:].max()
Out[58]:
maxx    32.556038
maxy    15.731431
dtype: float64
In [60]:
geom.total_bounds
Out[60]:
array([ 32.5525284,  15.7279212,  32.5560384,  15.7314312])
In [79]:
gdf['centroid'] = roads.centroid
In [83]:
gdf.set_geometry('centroid') #returns a new obj
print(gdf.geometry.name)
gdf.plot(figsize=(20,20))
roads
Out[83]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f30bd852190>

Nov 01, 2019

rasterio basics

Gdal/Rasterio Playground

Date: 11-16-2018 (Frdiay)

  1. Good gdal documentation on the outputs of gdalinfo and transform matrix

  2. Rasterio resources

Caution: it's not recommended to import osgeo.gdal and rasterio in the same kernel.

In [10]:
from pathlib import Path
import numpy as np
rgb_path = Path('/home/hayley/Data_Spacenet/SpaceNet_Roads_Sample/AOI_2_Vegas_Roads_Sample/RGB-PanSharpen/RGB-PanSharpen_AOI_2_Vegas_img1454.tif')
vec_path = Path('/home/hayley/Data_Spacenet/SpaceNet_Roads_Sample/AOI_2_Vegas_Roads_Sample/geojson/spacenetroads/spacenetroads_AOI_2_Vegas_img1454.geojson')
buffer_path = '/home/hayley/Data_Spacenet/SpaceNet_Roads_Sample/AOI_2_Vegas_Roads_Sample/geojson/buffer/buffer_AOI_2_Vegas_img1454.geojson'

Gdal

In [11]:
from osgeo import gdal
In [12]:
ds = gdal.Open(str(rgb_path))
In [13]:
gt = ds.GetGeoTransform()
print(gt)
gt_np = np.array(gt).reshape(2,3)
print(gt_np)
(-115.1671176, 2.699999999993434e-06, 0.0, 36.1247876997, 0.0, -2.6999999999988997e-06)
[[ -1.15167118e+02   2.70000000e-06   0.00000000e+00]
 [  3.61247877e+01   0.00000000e+00  -2.70000000e-06]]
In [14]:
ds.RasterXSize
Out[14]:
1300
In [15]:
print("upper left in CRS: ", gdal.ApplyGeoTransform(gt, 0, 0))
print("lower left in CRS: ", gdal.ApplyGeoTransform(gt, 0, ds.RasterYSize-1))
print("lower right in CRS: ", gdal.ApplyGeoTransform(gt, ds.RasterXSize-1, ds.RasterYSize-1))
print("upper right in CRS: ", gdal.ApplyGeoTransform(gt, ds.RasterXSize-1,0))

print("lower right in CRS: ", gdal.ApplyGeoTransform(gt, ds.RasterXSize, ds.RasterYSize))
upper left in CRS:  [-115.1671176, 36.1247876997]
lower left in CRS:  [-115.1671176, 36.121280399700005]
lower right in CRS:  [-115.1636103, 36.121280399700005]
upper right in CRS:  [-115.1636103, 36.1247876997]
lower right in CRS:  [-115.1636076, 36.1212776997]
In [16]:
ds2 = gdal.Open('./data/osgeo-workshop/geomatrix.tif')
In [ ]:
!gdalinfo './data/osgeo-workshop/geomatrix.tif'
In [ ]:
!gdalinfo '/home/hayley/mercator.tif'
In [19]:
!gdal_translate -of GIF ./data/osgeo-workshop/wellington_west.png ./wellington_west.gif
Input file size is 1831, 1835
0...10...20...30...40...50...60...70...80...90...100 - done.
In [ ]:
!gdalinfo -noct ./data/osgeo-workshop/wellington_west.png
In [ ]:
!cat ./data/osgeo-workshop/wellington_west.wld
In [22]:
!gdal_translate -a_srs EPSG:2193 ./data/osgeo-workshop/wellington_west.png wellington_west.tif
Input file size is 1831, 1835
0...10...20...30...40...50...60...70...80...90...100 - done.
In [ ]:
!gdalinfo -noct wellington_west.tif
In [24]:
import cv2
import matplotlib.pyplot as plt
In [25]:
im = cv2.imread('wellington_west.tif')
plt.imshow(im)
Out[25]:
<matplotlib.image.AxesImage at 0x7fca879b6210>
In [26]:
im_png = cv2.imread('./data/osgeo-workshop/wellington_west.png')
plt.imshow(im_png)
Out[26]:
<matplotlib.image.AxesImage at 0x7fca844d60d0>
In [27]:
!gdal_translate -ot Byte -of PNG -scale $rgb_path ./8bits.png
Input file size is 1300, 1300
0...10...20...30...40...50...60...70...80...90...100 - done.
In [28]:
temp = cv2.imread('8bits.png')
plt.imshow(temp)
Out[28]:
<matplotlib.image.AxesImage at 0x7fca8444d3d0>

Vector data: ogr

In [29]:
from osgeo import ogr

orginfo

  • -ro: read-only
  • -so: summary-only
    • provides extra informatio such as number of features, spatial extent in (xmin, ymin) – (xmax, ymax) format, coordinate system, list of attributes with their name, type and width.precision
  • -al: all-layers
In [ ]:
!ogrinfo -so -al $vec_path 

With Python API, we can get the similar information.

In [31]:
ds = ogr.Open(str(vec_path))
print("number of layers: ", ds.GetLayerCount())
number of layers:  1
In [32]:
lyr = ds.GetLayerByIndex(0) #here index is 0-based unlike gdal's band indexing (1-based)
print(lyr.GetLayerDefn())
<osgeo.ogr.FeatureDefn; proxy of <Swig Object of type 'OGRFeatureDefnShadow *' at 0x7fca844b0990> >
In [ ]:
for feat in lyr:
    print("Feature: ", feat.GetFID())
    for i in range(feat.GetFieldCount()):
        if feat.IsFieldSet(i):
            print('Field is ', feat.GetFieldDefnRef(i).GetName())
            print(str(feat.GetField(i)))
#     pdb.set_trace()
    geom = feat.GetGeometryRef()
    if geom is not None:
        print('Geometry: ', geom.ExportToWkt()[0:20]) # truncated geometry
        
In [ ]:
!ogrinfo -so -al $buffer_path

Vector to Raster: gdal_rasterize

documentation

  • the vector file and the raster must be in the same coordinate system
In [ ]:
!gdalinfo 8bits.png
In [36]:
!gdal_rasterize -burn 255  -l buffer_AOI_2_Vegas_img1454 -ts 1300 1300 -ot Byte $buffer_path ./buffer_raster.tif
0...10...20...30...40...50...60...70...80...90...100 - done.
In [37]:
buff_raster = cv2.imread('./buffer_raster.tif',-1)
In [39]:
plt.imshow(buff_raster)
Out[39]:
<matplotlib.image.AxesImage at 0x7fca84440890>

IPython Widget Basics

TODO: Use leaflet widget to interactively toggle these images for visual comparison: add three layers. one for the original map the other for thi buffer raster and another one with the osm basemap

In [40]:
import ipywidgets as widgets
In [41]:
widgets.IntSlider(min=0, max=10)
In [42]:
from IPython.display import display
w = widgets.IntSlider(0,100)
display(w)

Using interact function of ipywidgets

In [50]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
  1. Basic interact At the basic level. interact autogenerates UI controls for function arguments, and then calls the function with those arguments when you manipulate the controls interactively. To use interact, we need to define a function that we want to explore. Let's take a look at this function that has two arguments, x, and y.
In [51]:
def f(x,y): return np.cos(x) + np.sin(y)
print(f(10,10))
-1.38309263997
In [52]:
interact(f,x=widgets.FloatSlider(min=-10, max=10, value=0),y=fixed(10))
Out[52]:
<function __main__.f>
In [53]:
interact(f, x=(-10.,10.,0.1), y=fixed(0))
Out[53]:
<function __main__.f>

Using interactive function to get the widget returned

In [54]:
from IPython.display import display
In [55]:
def f(a,b):
    display(a+b)
    return a+b
In [56]:
w = interactive(f, a=(-1., 5.,0.2), b=(-10., 10., 0.3))
In [57]:
display(w)
In [58]:
w.children
Out[58]:
(FloatSlider(value=2.0, description=u'a', max=5.0, min=-1.0, step=0.2),
 FloatSlider(value=-0.09999999999999964, description=u'b', max=10.0, min=-10.0, step=0.3),
 Output())
In [59]:
w.kwargs
Out[59]:
{'a': 2.0, 'b': -0.09999999999999964}
In [60]:
w.result
Out[60]:
1.9000000000000004

Disabling continuous updates for long-running functions

In [61]:
def slow_function(i):
    print(int(i))
    print(list(x for x in range(int(i)) if
               str(x)==str(x)[::-1] and 
               str(x**2)==str(x**2)[::-1]))
    return
In [62]:
%%time
slow_function(1e6)
1000000
[0, 1, 2, 3, 11, 22, 101, 111, 121, 202, 212, 1001, 1111, 2002, 10001, 10101, 10201, 11011, 11111, 11211, 20002, 20102, 100001, 101101, 110011, 111111, 200002]
CPU times: user 517 ms, sys: 181 ms, total: 699 ms
Wall time: 524 ms
In [63]:
interact(slow_function, i=(1e5,1e7,1e5))
Out[63]:
<function __main__.slow_function>

Notice this widget is continuously updated as we slide the slider. Instead, we would like the function slow_function to be called with the value at the slider when we release the mouse. interact_manual function provides a variant of interaction that allows you to restrict execution so it is only done on demand. A button is added to the interact controls that allows you to trigger an execution.

In [64]:
interact_manual(slow_function, i=(1e5, 1e7, 1e5))
Out[64]:
<function __main__.slow_function>

Alternatively, we can input the FloatSlider object with continuous_update argument set to False:

In [ ]:
interact(slow_function, widgets.FloatSlider(min=1e5, max=1e7, step=1e5, continuous_update=False))
In [ ]:
interact(slow_function,i=FloatSlider(min=1e5, max=1e7, step=1e5, continuous_update=False));
In [68]:
def f(m,b):
    plt.figure(2)
    x = np.linspace(-10,10,num=1000)
    plt.plot(x, m*x + b)
    plt.ylim(-5,5)
    plt.show()
In [69]:
interactive_plot = interactive(f, m=(-2.,2.), b=(-10.,10.,0.5))
In [70]:
output = interactive_plot.children[-1]
In [72]:
type(output)
Out[72]:
ipywidgets.widgets.widget_output.Output
In [73]:
output.layout.height = '350px'
In [74]:
interactive_plot

More examples of widgets

In [82]:
widgets.IntProgress(min=0, max=100, step=1,
                    value=10,
                   description='Loading: ',
                   bar_style='info',
                   orientation='horizontal')
In [83]:
play = widgets.Play(min=0, max=100, step=1, value=10,
                    description='Press play',
                    disabled=False)
slider = widgets.IntSlider()
widgets.jslink((play,'value'), (slider, 'value'))
    
In [85]:
display(play, slider)
In [86]:
widgets.HBox([play, slider])
In [87]:
widgets.ColorPicker(
    concise=False,
    description='Pick a color',
    value='blue',
    disalbed=False)

Output widgets and logging

In [89]:
import ipywidgets as widgets
import logging

class OutputWidgetHandler(logging.Handler):
    """ Custom logging handler sending logs to an output widget """

    def __init__(self, *args, **kwargs):
        super(OutputWidgetHandler, self).__init__(*args, **kwargs)
        layout = {
            'width': '100%',
            'height': '160px',
            'border': '1px solid black'
        }
        self.out = widgets.Output(layout=layout)

    def emit(self, record):
        """ Overload of logging.Handler method """
        formatted_record = self.format(record)
        new_output = {
            'name': 'stdout',
            'output_type': 'stream',
            'text': formatted_record+'\n'
        }
        self.out.outputs = (new_output, ) + self.out.outputs

    def show_logs(self):
        """ Show the logs """
        display(self.out)

    def clear_logs(self):
        """ Clear the current logs """
        self.out.clear_output()


logger = logging.getLogger(__name__)
handler = OutputWidgetHandler()
handler.setFormatter(logging.Formatter('%(asctime)s  - [%(levelname)s] %(message)s'))
logger.addHandler(handler)
logger.setLevel(logging.INFO)
In [90]:
handler.show_logs()
In [92]:
handler.clear_logs()
logger.info('Starting program')

try:
    logger.info('About to try something dangerous...')
    1.0/0.0
except Exception as e:
    logger.exception('An error occurred!')

Image Browser widget

In [93]:
from sklearn import datasets
In [94]:
digits = datasets.load_digits()
In [98]:
digits.images.shape
Out[98]:
(1797, 8, 8)
In [103]:
plt.imshow(digits.images[0,:,:],cmap='gray')
Out[103]:
<matplotlib.image.AxesImage at 0x7fca841b2310>
In [106]:
def browse_image(digits):
    n = len(digits.images)
    def view_image(i):
        plt.imshow(digits.images[i], cmap=plt.cm.gray_r, interpolation='nearest')
#         plt.title("Training: ", digits.target[i])
        plt.show()
    interact(view_image, i=(0,n-1))
    return
In [107]:
browse_image(digits)

Image Browser

In [220]:
from io import BytesIO
from IPython.display import Image
from ipywidgets import interact, interactive, fixed
import matplotlib as mpl
from skimage import data, filters, io, img_as_float
import pdb
In [140]:
arr = img_as_float(data.coffee())
print(arr.min(), arr.max())
0.0 1.0
In [229]:
def arr2img(arr):
    """ Display 2d or 3d numpy array as an image."""
    if arr.ndim == 2:
        format, cmap = 'png', mpl.cm.gray
    elif arr.ndim == 3:
        format, cmap = 'jpg', None
    else:
        raise ValueError("input array should be either 2 or 3 dim")
    
    #Don't let matplotlib autoscale the color range (to the input array's min and max) 
    vmin, vmax = (0, 255) if arr.dtype=='uint8' else (0., 1.)    
    
    with BytesIO() as buffer:
        mpl.image.imsave(buffer, arr, vmin=vmin, vmax=vmax, format=format, cmap=cmap)
        out = buffer.getvalue()
    
    return Image(out)
    
In [230]:
arr2img(data.coins())
Out[230]:

Now let's create a simple function to edit this image. For instance, we can toggle each channel or blur the image.

In [231]:
def edit_img(arr, sigma=0.1, R=1.0, G=1.0, B=1.0):
    """ Apply a blurring to the numpy array"""
    output = filters.gaussian(arr, sigma=sigma, multichannel=True)

    colors = [R, G, B]
    if arr.ndim == 2:
        output = G * output
    elif arr.ndim > 2:
        for c in range(output.ndim):
            output[:,:,c] = colors[c] * output[:,:,c]
    display(arr2img(output))
#     return arr2img(output)
    
In [232]:
edit_img(data.text(), sigma=0., G=0.5);

Let's use ipywidgets's interactive functionality to see the effect of these parameters

In [233]:
limits = (0., 1., 0.01)
interactive(edit_img, arr=fixed(arr), sigma=(0., 5., 0.5), R=limits, G=limits, B=limits)
In [235]:
def choose_img(name):
    global img
    arr = getattr(data, name)()
    return arr2img(arr)
In [236]:
interact(choose_img, name=sorted(set(data.__all__) - {'load', 'data_dir', 'lfw_subset', 'stereo_motorcycle'}));
In [215]:
temp = data.page(); 
print(temp.ndim)
print(temp.dtype)
2
uint8
In [218]:
plt.imshow(temp,cmap=mpl.cm.gray)
Out[218]:
<matplotlib.image.AxesImage at 0x7fca4eeebc50>
In [219]:
temp.min(), temp.max()
Out[219]:
(0, 255)
In [ ]:
def edit_image(image, sigma, R,G,B):
    new_image = 

Now, let's set the image path to the spacenet dataset datapath

Sample dataset path

In [313]:
DATA = Path("/home/hayley/Data_Spacenet/")

# Sample dataset
sample_dir = Path("/home/hayley/Data_Spacenet/SpaceNet_Roads_Sample/")
sample_root_dirs = [sample_dir/ city for city in ["AOI_2_Vegas_Roads_Sample",  
                                                  "AOI_3_Paris_Roads_Sample", 
                                                  "AOI_4_Shanghai_Roads_Sample", 
                                                  "AOI_5_Khartoum_Roads_Sample"]
                   ]
sample_rgb_dirs = [root/"RGB-PanSharpen" for root in sample_root_dirs]
sample_vec_dirs = [root/"geojons/spacenetroads" for root in sample_root_dirs]
sample_buff_vec_dirs = [root/"geojson/buffer" for root in sample_root_dirs]     
In [286]:
def get_short_name(fname):
    """Extract a short name from pathlib.Path object to fname"""
    if isinstance(fname, str):
        fname = Path(fname)
    return "_".join(fname.name.split("_")[1:])
In [278]:
sample_vegas_rgbs = [fname for fname in sample_rgb_dirs[0].iterdir() if fname.suffix == '.tif']
In [279]:
print(str(sample_vegas_rgbs[0]))
print(short_name(sample_vegas_rgbs[0]))
/home/hayley/Data_Spacenet/SpaceNet_Roads_Sample/AOI_2_Vegas_Roads_Sample/RGB-PanSharpen/RGB-PanSharpen_AOI_2_Vegas_img1454.tif
AOI_2_Vegas_img1454.tif
In [250]:
import apls_tools
In [ ]:
apls_tools.convert_to_8Bit(
In [251]:
import rasterio
In [268]:
gmin, gmax = float('inf'), -float('inf')
for i in range(len(sample_vegas_rgbs)):
    arr = rasterio.open(sample_vegas_rgbs[i]).read()
    cmin, cmax = arr.min(), arr.max()
    if cmin < gmin:
        gmin = cmin
    if cmax > gmax:
        gmax = cmax
print("global min, max: ", gmin, ", ", gmax)
global min, max:  0 ,  2047
In [264]:
 
Out[264]:
Affine(2.699999999993434e-06, 0.0, -115.1671176,
       0.0, -2.6999999999988997e-06, 36.1247876997)
In [267]:
data = ds.read()
print(data.shape)
print(data.min(), data.max())
(3, 1300, 1300)
0 1909
In [270]:
def get_global_min_max(fnames):
    """compute the min and max of the image arrays in the fname list"""
    gmin, gmax = float('inf'), -float('inf')
    for fname in fnames:
        arr = rasterio.open(fname).read()
        cmin, cmax = arr.min(), arr.max()
        if cmin < gmin:
            gmin = cmin
        if cmax > gmax:
            gmax = cmax
    print("global min, max: ", gmin, ", ", gmax)
    return gmin, gmax
In [271]:
get_global_min_max(sample_vegas_rgbs)
global min, max:  0 ,  2047
Out[271]:
(0, 2047)
In [ ]:
 
In [ ]:
parent = sample_vegas_rgbs
out_dir = 'RGB-PanSharpen-8bits'
i=0
for in_path in sample_vegas_rgbs:
    in_name = get_short_name(in_path)
    out_name = "_".join([out_dir,in_name])
    out_path = in_path.parent.parent /out_dir/ out_name
    print("outname: ", str(outname))
    apls_tools.convert_to_8Bit(str(in_path), str(out_path))
    
    if i>10: 
        break
    i+=1
In [301]:
def batch_convert_to_8Bit(in_rgb_dir, out_dir_name='RGB-PanSharpen-8bits', verbose=False):
    
    if isinstance(in_rgb_dir, str):
        in_rgb_dir = Path(in_rgb_dir)
        
    # set output directory
    out_rgb_dir = in_rgb_dir.parent / out_dir_name
    print("in: ", str(in_rgb_dir))
    print("out: ", str(out_rgb_dir))
    
    if not out_rgb_dir.exists():
        out_rgb_dir.mkdir()
            
    # convert each image to 8 bit
    for i,in_path in enumerate(in_rgb_dir.iterdir()):
        if in_path.suffix != '.tif': continue
        in_name = get_short_name(in_path)
        out_name = "_".join([out_dir_name,in_name])

        out_path = out_rgb_dir/ out_name
        apls_tools.convert_to_8Bit(str(in_path), str(out_path))
        
        if (verbose and i%100==0):
            print("i :", i)
            print("outname: ", str(out_name))

    
In [302]:
batch_convert_to_8Bit(sample_rgb_dirs[0],verbose=True)
in:  /home/hayley/Data_Spacenet/SpaceNet_Roads_Sample/AOI_2_Vegas_Roads_Sample/RGB-PanSharpen
out:  /home/hayley/Data_Spacenet/SpaceNet_Roads_Sample/AOI_2_Vegas_Roads_Sample/RGB-PanSharpen-8bits
i : 0
outname:  RGB-PanSharpen-8bits_AOI_2_Vegas_img1454.tif

Convert all sample rgb images to 8bits

In [ ]:
for sample_rgb_dir in sample_rgb_dirs:
    batch_convert_to_8Bit(sample_rgb_dir)

Now let's make sure this conversion makes sense. We can check with QGIS.

Convert all training rgb images to 8bits

In [314]:
vegas_root = Path("/home/hayley/Data_Spacenet/AOI_2_Vegas_Roads_Train/")
paris_root = Path("/home/hayley/Data_Spacenet/AOI_3_Paris_Roads_Train/")
shanghai_root = Path("/home/hayley/Data_Spacenet/AOI_4_Shanghai_Roads_Train/")
k_root = Path("/home/hayley/Data_Spacenet/AOI_5_Khartoum_Roads_Train/")

train_root_dirs = [vegas_root, paris_root, shanghai_root, k_root]
train_rgb_dirs = [root/"RGB-PanSharpen" for root in train_root_dirs]
train_vec_dirs = [root/"geojson/spacenetroads" for root in train_root_dirs]
train_buff_vec_dirs = [root/"geojson/buffer" for root in train_root_dirs]
In [ ]:
for train_rgb_dir in train_rgb_dirs:
    batch_convert_to_8Bit(train_rgb_dir, verbose=True)

Now, let's make sure the number of 8bits files are the same as in the original training rgb images

In [307]:
from collections import Counter, defaultdict
In [ ]:
c = defaultdict(dict)
In [ ]:
original_counts = dict()
converted_counts = dict()
for i,train_rgb_dir in enumerate(train_rgb_dirs):
    train_rgb_8bits_dir = train_rgb_dir.parent/'RGB-PanSharpen-8bits'
    original_counts[i] = Counter(f.suffix for f in train_rgb_dir.iterdir())
    converted_counts[i] = Counter(f.suffix for f in train_rgb_8bits_dir.iterdir())
print("Original:")
print(original_counts)

print("8bits:")
print(converted_counts)
    
    

Perfect, so let's last double check a couple of them with QGIS to make sure the conversion is done right.

Okay, great. so now we have accomplished 16 RGB -> 8 bit RGB conversion. This will be useful for visualizing the buffer raster results. For model training, however, we will probably use the 16 bit images since it has more information.

Before moving on, I would like to check if any of the raster images contain an invalid data. First I check which values are considered invalid from the tif file's metadata:

In [374]:
sample_vegas_buff_vecs = sorted(list(f for f in sample_buff_vec_dirs[0].iterdir() if f.suffix == '.geojson'))
sample_vegas_8bit_rgbs = sorted(list(f for f in (sample_rgb_dir.parent/'RGB-PanSharpen-8bits').iterdir() if f.suffix=='.tif'))
sample_vegas_16bit_rgbs = sorted(list(f for f in sample_rgb_dir.iterdir() if f.suffix=='.tif'))
In [380]:
ds_16 = rasterio.open(sample_vegas_16bit_rgbs[0])
ds_8 = rasterio.open(sample_vegas_8bit_rgbs[0])
In [383]:
print("no data values: ")
print(ds_16.nodatavals)
print(ds_8.nodatavals)
no data values: 
(None, None, None)
(None, None, None)
In [384]:
ds_16.nodata

Great, so None incidcates invalid/missing values in the original RGB raster images.

Check if there is any invalid data in the 16bit and 8bit RGB images.

Now, let's see if any of the original 16bit RGB or processed 8bit RGB raster images contains None.

In [385]:
from collections import defaultdict
In [ ]:
%timeit
nones = [] # list of filenames (pathlib Path object) that contains any invalid value
zeros = {} #key=filename, value=number of zeros
for train_rgb_dir in train_rgb_dirs:
    print("="*80)
    print(str(train_rgb_dir.parent.name))
    for i,f in enumerate(train_rgb_dir.iterdir()):
        if i%99 == 0:
            print(i)
        if f.suffix != '.tif': continue
        with rasterio.open(f, 'r') as ds:
            arr = ds.read()
            has_none = np.any(np.isnan(arr))
            num_zeros = np.sum([True if val<1e-6 else False for val in arr.ravel()])
#             pdb.set_trace()
            if has_none:
                nones.append(f)
            if num_zeros > 0:
                zeros[str(f.name)] = num_zeros
                
print("Number of files with any invalid value")
print(len(nones))

print("Number of files with any zero: ")
print(len(zeros))
        
In [390]:
[True if i<5 else False for i in range(10)]
Out[390]:
[True, True, True, True, True, False, False, False, False, False]

Awesome, so there is no invalid value in the training dataset. How about value of 0 then? I'm inspecting this in preparation for the later process of generating road-buffer masks from buffer vector files. I'm going to check this at the same time when checking on None values in the above.

Reading buffer vector data using Fiona

reference

In [355]:
import fiona
from rasterio.mask import mask
from rasterio.plot import reshape_as_image, reshape_as_raster
In [357]:
ds = rasterio.open(sample_vegas_8bit_rgbs[0])
ds.crs
Out[357]:
CRS({'init': u'epsg:4326'})
In [356]:
arr = ds.read()
plt.imshow(reshape_as_image(arr))
Out[356]:
<matplotlib.image.AxesImage at 0x7fca4453a150>
In [335]:
with fiona.open(sample_vegas_buff_vecs[0]) as buff_file:
    geoms = [feature['geometry'] for feature in buff_file]
rasterio##todo: set outside of geometry to burn as -inf or none etc
In [359]:
with rasterio.open(sample_vegas_8bit_rgbs[0]) as src:
#     ones = np.array(
    out_img, out_transform = mask(src, geoms, crop=True, )
    out_img
    out_meta = src.meta.copy()
out_meta.update({"driver": "GTiff",
                 "height": out_img.shape[1],
                 "width": out_img.shape[2],
                 "transform": out_transform})

with rasterio.open("./temp.tif", "w", **out_meta) as dest:
    dest.write(out_img)
    
In [371]:
with rasterio.open(sample_vegas_8bit_rgbs[0]) as src:

    src_im = reshape_as_image(src.read())
    print(np.any(np.isnan(src_im)))
    print(np.sum(np.any(src_im<1e-6)))
False
1
In [ ]:
with rasterio.open('./temp.tif') as f:
    mask = reshape_as_image(f.read())
    print(mask.min(), mask.max())
    count = Counter(mask.ravel())
    print(count)
    pdb.set_trace()
    mask [ mask != 0] = 255
    plt.imshow(mask)
In [ ]:
def generate_buffer_vecs(road_vec_dir):
    """
    Given a path to `road_vec_dir`, generate a buffer vector file for each road vec file in that directory
    based on the road_type and num_lanes
    Args:
    - road_vec_dir (pathlib.Path): path to a directory containing spacenet road vector files (.geojson)
    Returns:
    - buff_vec_dir (pathlib.Path): path to a new directory containing the processed buffer vector files
        It is generated under the `geojson` folder called `buffer`.  This `buffer` directory contains
        buffer vector files for each road vec in the input directory.
    """
    #to implement
    pass

Jan 10, 2019

Introduction to Planet API

Planet API Introduction

In [ ]:
import os, json, requests
from requests.auth import HTTPBasicAuth
from pprint import pprint

os.environ['PL_API_KEY']  = '7d8af35b6e944f33bb5e33ada32ab4a0'
PL_API_KEY = os.getenv('PL_API_KEY')
PL_AUTH = HTTPBasicAuth(PLANET_API_KEY, '')

# Setup Planet Data API base URL
BASE_URL = "https://api.planet.com/data/v1"

#Setup the session
session = requests.Session()
In [ ]:
roi_data = {
  "type": "FeatureCollection",
  "features": [
    {
      "type": "Feature",
      "properties": {},
      "geometry": {
        "type": "Polygon",
        "coordinates": [
          [
            [
              -118.2967472076416,
              34.00578318672437
            ],
            [
              -118.22739601135252,
              34.00578318672437
            ],
            [
              -118.22739601135252,
              34.07846940942791
            ],
            [
              -118.2967472076416,
              34.07846940942791
            ],
            [
              -118.2967472076416,
              34.00578318672437
            ]
          ]
        ]
      }
    }
  ]
}
In [ ]:
roi_geom = roi_data['features'][0]['geometry']
In [ ]:
roi_geom
In [ ]:
# Filters to download data
In [ ]:
geom_filter = {
    'type': 'GeometryFilter',
    'field_name': 'geometry',
    'config': roi_geom
}

date_range_filter = {
    'type': 'DateRangeFilter',
    'field_name': 'acquired', #meaning, captured by satellite
    'config': {
        "gte": "2016-03-01T00:00:00.000Z",
        "lte": "2016-09-01T00:00:00.000Z"
    }
}

cloud_cover_filter = {
    'type':'RangeFilter',
    'field_name': 'cloud_cover',
    'config': {
        "lte":0.1
    }
}

combined_filter = {
    'type': 'AndFilter',
    'config': [geom_filter, date_range_filter, cloud_cover_filter]
}

Request data

In [ ]:
# PLANET_API_KEY = '7d8af35b6e944f33bb5e33ada32ab4a0'
ITEM_TYPE = 'PSScene4Band'
SEARCH_URL = 'https://api.planet.com/data/v1/quick-search'
ASSET_TYPE = 'analytic'

# api request object
search_request = {
    'interval': 'day',
    'item_types': [ITEM_TYPE],
    'filter': combined_filter
}

# send the POST request
search_result = requests.post(SEARCH_URL,
                              auth=PL_AUTH,
                              json=search_request)

# print(json.dumps(search_result.json(), indent=1))  
# 
In [ ]:
 
In [ ]:
print(search_result.json().keys())
pprint(search_result.json()['_links'])
In [ ]:
features = search_result.json()['features']
print('number of images retrieved: ', len(features))
scene_ids = [features[idx]['id'] for idx in range(len(features))]
scene_links = [f['_links'] for f in features]
pprint([(scene_id, scene_link) 
        for (scene_id, scene_link) in zip(scene_ids[:5], scene_links[:5])])
In [ ]:
id0 = img_ids[0]
id0_url = f'https://api.planet.com/data/v1/item-types/{item_type}/items/{id0}/assets'
print('id0_url: ', id0_url)
In [ ]:
def get_asset_url(item_type,scene_id):
    return f'https://api.planet.com/data/v1/item-types/{item_type}/items/{scene_id}/assets'

print(ASSET_URL(ITEM_TYPE, id0))
In [ ]:
# which assets are available for this image?
result = requests.get(
    id0_url,
    auth=PL_AUTH
).json()

pprint(result.keys())

Clip scenes to AOI

In [ ]:
#clip API request payload
CLIP_URL = 'https://api.planet.com/compute/ops/clips/v1'
clip_payload = {
    'aoi': roi_geom,
    'targets': [{'item_id': sid, 
                'item_type': ITEM_TYPE,
                'asset_type': ASSET_TYPE} for sid in scene_ids[:1]
               ]
}
pprint(clip_payload['targets'])
In [ ]:
# Request clip operation on target scenes
clip_response = requests.post(
    CLIP_URL,
    auth=PL_AUTH,
    json=clip_payload)
In [ ]:
result_url = clip_response
In [ ]:
check_state_request = requests.get(
    clip_response.json()['_links']['_self'],
    auth=PL_AUTH)
In [ ]:
if check_state_request.json()['state'] == 'succeeded':
    clip_download_url = check_state_request.json()['_links']['results'][0]
In [ ]:
clip_download_url
In [ ]:
clip_download_response = requests.get(clip_download_url, stream=True)
with open('./output/temp.zip', 'wb') as w:
    for data in tqdm(clip_download_response.iter_content()):
        w.write(data)
In [ ]:
#unzip
zipped = zipfile.ZipFile('./output/temp.zip')
zipped.extractall('./output/temp')
In [ ]:
!ls output
!unzip output/temp.zip

Activation and Downloading

In [ ]:
def activate(scene_ids, item_links, 
             item_type=ITEM_TYPE, asset_type=ASSET_TYPE):
    for scene_id in scene_ids:
        head_asset_url = get_asset_url(item_type, scene_id)
        asset_response = responses.get
    
    
In [ ]:
#is this image's analytic asset already activated?
print(result['analytic'].keys())
print(result['analytic']['status'])
In [ ]:
# let's activate this asset for download
# some useful links
links = result['analytic']['_links'];print(links)
self_link = links['_self']
activation_link = links['activate']
In [ ]:
# request the activation of this asset
activate_result = requests.get(
    activation_link,
    auth=PL_AUTH
)
In [ ]:
# check if the asset is activated
activation_status_result = requests.get(
    self_link,
    auth=PL_AUTH
)
In [ ]:
activation_status_result = activation_status_result.json()
In [ ]:
pprint(activation_status_result)
In [ ]:
# download link
dlink = activation_status_result['location']
print('download link: \n', dlink)
In [ ]:
dlink_response = requests.get(
        dlink, stream=True)
In [ ]:
import zipfile
from tqdm import tqdm
In [ ]:
# scene_id = '20160715_174333_0e0e'
scene_id = 'sample'
outname = f'./output/{scene_id}.zip'
with open(outname, 'wb') as w:
    for data in tqdm(dlink_response.iter_content()):
        w.write(data)
        
In [ ]:
dlink_result.json()

View downloaded image using rasterio

In [ ]:
import rasterio as rio
from rasterio.plot import reshape_as_image, reshape_as_raster

import matplotlib.pyplot as plt
%matplotlib inline

import pdb
In [ ]:
fname = './images/20160731_174506_0e30_3B_AnalyticMS.tif'
img = None
with rio.open(fname, 'r') as ds:
#     pdb.set_trace()
    b,g,r,n = ds.read()
    img = np.dstack([r,g,b])
#     img = reshape_as_image(ds.read())
plt.imshow(img)
In [ ]:
f,ax = plt.subplots(3,1,figsize=(20,20))
ax = ax.flatten()
for i,cimg in enumerate([r,g,b]):
    ax[i].hist(np.histogram(cimg))
    
    
In [ ]:
temp = np.dstack([b,g]); print(temp.shape)