初试Tensorflow

背景

路况在地图渲染时候,会针对不同的拥堵情况选择不同颜色。一般来讲,道路拥堵情况分为三个状态,畅通,拥堵,缓行,分别用绿色,黄色,红色来渲染。
我们面临的问题是,已知道路属性以及通行速度,需要对路况状态进行分类。解决方案是依据第三方路况提供的路况状态以及抓取的高德路况状态来训练一个三分类模型。

特征处理

应用的特征如下

feature description
speed 路况速度
maxspeed 道路最大速度
highway_level 道路等级,共有17种可能,使用one-hot-encoding
lanes 车道数
oneway 是否是单向路,使用one-hot-encoding

路况状态使用 0-1-2 分别表示缓行-拥堵-畅通
处理好的特征使用**\t**分割的文本处理,最后一列代表路况状态。

模型训练

模型使用TensorFlow 提供的DNN分类器。代码如下

#-*- coding: utf-8 -*-
"""
File Name: traffic_status_classifier.py
Author: ce39906
mail: ce39906@163.com
Created Time: 2018-09-03 19:11:57
"""

import sys
import time
import numpy as np
import tensorflow as tf

FEATURES = [ "speed",
             "maxspeed",
             "level_1",
             "level_2",
             "level_3",
             "level_4",
             "level_5",
             "level_6",
             "level_7",
             "level_8",
             "level_9",
             "level_10",
             "level_11",
             "level_12",
             "level_13",
             "level_14",
             "level_15",
             "level_16",
             "level_17",
             "lanes",
             "oneway_0",
             "oneway_1"]

def usage():
    print "python %s ${train_data_file}" % (sys.argv[0])

def read_data(train_data_file):
    xy_list = []
    with open(train_data_file, 'r') as f:
        for line in f:
            line = line.strip('\n')
            content = line.split('\t')
            xy = [int(float(x)) for x in content]
            xy_list.append(xy)
    # 80% as train data, 20% as test data
    train_xy = xy_list[ : int(len(xy_list) * 0.8)]
    test_xy = xy_list[int(len(xy_list) * 0.8) : ]

    train_x = [x[ : -1] for x in train_xy]
    train_y = [x[-1] for x in train_xy]

    test_x = [x[ : -1] for x in test_xy]
    test_y = [x[-1] for x in test_xy]

    return train_x, train_y, test_x, test_y

def list_2_tf_dataset(train_x, train_y, test_x, test_y):
    train_x = np.array(train_x)
    train_y_dataset = np.array(train_y)

    test_x = np.array(test_x)
    test_y_dataset = np.array(test_y)

    train_x_cols = []
    for col in train_x.T:
        train_x_cols.append(col)

    train_x_dataset = {}
    for i in range(len(FEATURES)):
        train_x_dataset[FEATURES[i]] = train_x_cols[i]

    test_x_cols = []
    for col in test_x.T:
        test_x_cols.append(col)

    test_x_dataset = {}
    for i in range(len(FEATURES)):
        test_x_dataset[FEATURES[i]] = test_x_cols[i]

    return train_x_dataset, train_y_dataset, test_x_dataset, test_y_dataset

def train_input_fn(features, labels, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((features, labels))

    # Shuffle, repeat, and batch the examples.
    dataset = dataset.shuffle(1000).repeat().batch(batch_size)

    return dataset

def eval_input_fn(features, labels, batch_size):

    if labels is None:
        inputs = features
    else:
        inputs = (features, labels)

    dataset = tf.data.Dataset.from_tensor_slices(inputs)

    # batch the example
    dataset = dataset.batch(batch_size)

    return dataset


def main():
    if len(sys.argv) != 2:
        usage()
        sys.exit()

    batch_size = 100
    steps = 10000

    train_data_file = sys.argv[1]
    # adapt to tensorflow format
    train_x_list, train_y_list, test_x_list, test_y_list = read_data(train_data_file)
    train_x, train_y, test_x, test_y = \
        list_2_tf_dataset(train_x_list, train_y_list, test_x_list, test_y_list)

    feature_columns = []
    for key in train_x.keys():
        feature_columns.append(tf.feature_column.numeric_column(key = key))

    start_time = time.time()
    classifier = tf.estimator.DNNClassifier(
        feature_columns = feature_columns,
        hidden_units = [10, 10],
        n_classes = 3,
        model_dir = './saved_model')

    # train the model
    classifier.train(
        input_fn = lambda:train_input_fn(train_x, train_y, batch_size),
        steps = steps)

    end_time = time.time()
    print 'Train DNN Classifier cost %fs.' %(end_time - start_time)

    # evaluate the model
    eval_result = classifier.evaluate(
        input_fn = lambda:eval_input_fn(test_x, test_y, batch_size))

    print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))

    # begin to saved the model
    feature_spec = {'speed' : tf.FixedLenFeature([], tf.int64),
                    'maxspeed' : tf.FixedLenFeature([], tf.int64),
                    'level_1' : tf.FixedLenFeature([], tf.int64),
                    'level_2' : tf.FixedLenFeature([], tf.int64),
                    'level_3' : tf.FixedLenFeature([], tf.int64),
                    'level_4' : tf.FixedLenFeature([], tf.int64),
                    'level_5' : tf.FixedLenFeature([], tf.int64),
                    'level_6' : tf.FixedLenFeature([], tf.int64),
                    'level_7' : tf.FixedLenFeature([], tf.int64),
                    'level_8' : tf.FixedLenFeature([], tf.int64),
                    'level_9' : tf.FixedLenFeature([], tf.int64),
                    'level_10' : tf.FixedLenFeature([], tf.int64),
                    'level_11' : tf.FixedLenFeature([], tf.int64),
                    'level_12' : tf.FixedLenFeature([], tf.int64),
                    'level_13' : tf.FixedLenFeature([], tf.int64),
                    'level_14' : tf.FixedLenFeature([], tf.int64),
                    'level_15' : tf.FixedLenFeature([], tf.int64),
                    'level_16' : tf.FixedLenFeature([], tf.int64),
                    'level_17' : tf.FixedLenFeature([], tf.int64),
                    'lanes' : tf.FixedLenFeature([], tf.int64),
                    'oneway_0' : tf.FixedLenFeature([], tf.int64),
                    'oneway_1' : tf.FixedLenFeature([], tf.int64)}

    def serving_input_receiver_fn():
        serialized_tf_example = tf.placeholder(
            dtype = tf.string,
            shape = None,
            name = "input_example_tensor")

        receiver_tensors = {'inputs' : serialized_tf_example}
        features = tf.parse_example(serialized_tf_example, feature_spec)

        return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)

    saved_model_dir = classifier.export_savedmodel(
        '.', serving_input_receiver_fn = serving_input_receiver_fn)

    print saved_model_dir

if __name__ == '__main__':
    main()

输出如下
pic

Python应用保存的模型

代码如下

#-*- coding: utf-8 -*-
"""
File Name: apply_saved_model.py
Author: ce39906
mail: ce39906@163.com
Created Time: 2018-09-04 18:48:37
"""
import sys
import tensorflow as tf
import numpy as np

def main():
    saved_model_dir = sys.argv[1]
    with tf.Session() as sess:
        tf.saved_model.loader.load(
            sess, [tf.saved_model.tag_constants.SERVING], saved_model_dir)

        predictor = tf.contrib.predictor.from_saved_model(saved_model_dir)

        features = tf.train.Features(feature =
                    {'speed' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [9])),
                    'maxspeed' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [90])),
                    'level_1' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [0])),
                    'level_2' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [0])),
                    'level_3' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [1])),
                    'level_4' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [0])),
                    'level_5' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [0])),
                    'level_6' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [0])),
                    'level_7' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [0])),
                    'level_8' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [0])),
                    'level_9' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [0])),
                    'level_10' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [0])),
                    'level_11' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [0])),
                    'level_12' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [0])),
                    'level_13' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [0])),
                    'level_14' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [0])),
                    'level_15' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [0])),
                    'level_16' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [0])),
                    'level_17' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [0])),
                    'lanes' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [3])),
                    'oneway_0' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [0])),
                    'oneway_1' : tf.train.Feature(int64_list =
                        tf.train.Int64List(value = [1]))})

        model_input = tf.train.Example(features = features)
        model_input = model_input.SerializeToString()
        output_dict = predictor({"inputs" : [model_input]})
        classes_list = output_dict['classes']
        scores_list = output_dict['scores']

        for scores, classes in zip(scores_list, classes_list):
            scores = scores.tolist()
            classes = classes.tolist()
            max_score = max(scores)
            max_idx = scores.index(max_score)
            print "Predicted traffic status is %s" % classes[max_idx]

if __name__ == '__main__':
    main()

输出如下
pic

C++应用保存的模型

TODO