2012年9月17日 星期一

Advance Monthly Retail Trade Report

Link

http://www.census.gov/retail/



Sourcing Phase 1: Download


"""
Example:
    http://www.census.gov/econ/currentdata/export/csv?programCode=MARTS&timeSlotType=12&startYear=1992&endYear=2012&categoryCode=44X72&dataTypeCode=SM&geoLevelCode=US&adjusted=yes&errorData=no&internal=false

"U.S. Census Bureau"
"Source: Advance Monthly Sales for Retail and Food Services"
"44X72: Retail Trade and Food Services: U.S. Total"
"Seasonally Adjusted Sales - Monthly [Millions of Dollars]"
"""
import datetime
import os
import re
import sys

class MigrateWebToContent():

    def __init__(self):
        self.url_template = \
            '''http://www.census.gov/econ/currentdata/export/csv?programCode=MARTS&timeSlotType=12&startYear=%d&endYear=%d&categoryCode=44X72&dataTypeCode=SM&geoLevelCode=US&adjusted=yes&errorData=no&internal=false'''
        self.wget = os.path.abspath('../thirdparty/wget/wget.exe')
        assert os.path.isfile(self.wget)

    """
    query
        'startYear'
        'endYear'
    """
    def migrate(self, query, dest_dir):
        if not os.path.exists(dest_dir):
            os.makedirs(dest_dir)
        url = self.__make_url(query)
        self.__wget(url, dest_dir)

    def __make_url(self, query):
        return self.url_template % (query['startYear'], query['endYear'])

    def __wget(self, url, dest_dir):
        dest_file = os.path.join(dest_dir, re.compile('https?://').sub('', url))
        dest_file_dir = os.path.dirname(dest_file)
        if not os.path.exists(dest_file_dir):
            os.makedirs(dest_file_dir)
     
        wget_cmdline = '''%s \"%s\" --waitretry=3 -P %s''' % (self.wget, url, dest_file_dir)
        os.system(wget_cmdline)

def main():
    q = {
        'startYear' : 1992,
        'endYear' : datetime.datetime.now().year,
    }
    m = MigrateWebToContent()
    m.migrate(q, '''./content/''')

if __name__ == '__main__':
    sys.exit(main())



Sourcing Phase 2: Rename

import logging
import os
import shutil
import sys

import logger

class MigrateContentToRenamed():

    def __init__(self):
        self.__logger = logging.getLogger()
        self.category_code = [
            '44X72', # Retail Trade and Food Services: U.S. Total
        ]
    
    def migrate_batch(self, src_dir, dest_dir):
        assert os.path.isdir(src_dir)
        if not os.path.exists(dest_dir):
            os.makedirs(dest_dir)

        for file in os.listdir(src_dir):
            args = self.__parse_args(file)
            category_code = args['categoryCode']
            if category_code not in self.category_code:
                self.__logger.error('''No such category_code. File => %s''' % file)
                continue
            adjusted_str = self.__adjusted_str(args['adjusted'])
            cvs_file = '''%s_%s_%s_%s.csv''' % (
                args['categoryCode'], args['startYear'], args['endYear'], adjusted_str
            )
            shutil.copy(os.path.join(src_dir, file), os.path.join(dest_dir, cvs_file))

    def __parse_args(self, args_line):
        args = {}
        for kvs in args_line.split('&'):
            kv = kvs.split('=')
            args[kv[0]] = kv[1]
        return args

    def __adjusted_str(self, adjusted):
        if adjusted == 'yes':
            return 'adjusted'
        else:
            return 'nonadjusted'


        
def main():
    logger.config_root(level=logging.INFO)
    m = MigrateContentToRenamed()
    m.migrate_batch('./content/www.census.gov/econ/currentdata/export',
                    './renamed/')
if __name__ == '__main__':
    sys.exit(main())



Sourcing Phase 3: Write to SQLite

"""
Schema:
drop table if exists AdvanceMonthlyRetailTradeReport;

create table if not exists AdvanceMonthlyRetailTradeReport 
(
    creation_dt datetime default current_timestamp,
    period datetime not null,
    number text,
    revision int default 0,
    unique (period, number, revision) on conflict ignore
);

CSV content:

"U.S. Census Bureau"
"Source: Advance Monthly Sales for Retail and Food Services"
"44X72: Retail Trade and Food Services: U.S. Total"
"Seasonally Adjusted Sales - Monthly [Millions of Dollars]"
"Period: 1992 to 2012"
"Data Extracted on: September 17, 2012 (9:52 pm)"

Period,Value
Jan-1992,164083
Feb-1992,164260
Mar-1992,163747
Apr-1992,164759
May-1992,165610
Jun-1992,166089
"""
import csv
import datetime
import logging
import os
import sqlite3
import sys

import logger

class MigrateRenamedToSqlite():

    def __init__(self):
        self.__logger = logging.getLogger()
        self.sql_insert = '''insert or ignore into
            AdvanceMonthlyRetailTradeReport(period, number) values('%s', '%s')
            '''

    def migrate_batch(self, src_dir, dest_db):
        assert os.path.isdir(src_dir)

        for file in os.listdir(src_dir):
            self.migrate(os.path.join(src_dir, file), dest_db)
            
    def migrate(self, src_csv, dest_db):
        self.__logger.debug('''%s => %s''' % (src_csv, dest_db))
        assert os.path.isfile(src_csv)
        assert os.path.isfile(dest_db)
        
        conn = sqlite3.connect(dest_db)
        cursor = conn.cursor()
        is_began = False
        csv_reader = csv.reader(open(src_csv, 'r'))
        for row in csv_reader:
            if not is_began:
                if len(row) is 2:
                    is_began = True
                    continue
            elif row[1] != 'NA':
                period = self.__period_str(row[0])
                number = row[1]
                sql_cmd = self.sql_insert % (period, number)
                cursor.execute(sql_cmd)

                self.__logger.debug('''(period, value): (%s, %s) => %s'''
                                    % (period, number, dest_db))
        conn.commit()
        cursor.close()
        conn.close()
    
    def __period_str(self, period):
        return datetime.datetime.strptime(period, '%b-%Y').strftime('%Y-%m-%d')

       

def main():
    logger.config_root(level=logging.DEBUG)
    m = MigrateRenamedToSqlite()
    m.migrate_batch('./renamed/', '../db/economicstotal.db')
if __name__ == '__main__':
    sys.exit(main())



Analyzing: Excel

Step 1: Open SQLite2009 Pro => Execute SQL command:

SELECT creation_dt
     , period
     , number
FROM [AdvanceMonthlyRetailTradeReport]
order by period


Step 2: In Result tab, right click => Export to Excel.

Step 3: Calculate YOY.

Step 4: Select period, number, yoy columns => Insert => Line Charts => Format YOY Data Series => Series Options => Plot Series On => Secondary Axis



沒有留言:

張貼留言