http://www.census.gov/retail/
Sourcing Phase 1: Download
"""
Example:
http://www.census.gov/econ/currentdata/export/csv?programCode=MARTS&timeSlotType=12&startYear=1992&endYear=2012&categoryCode=44X72&dataTypeCode=SM&geoLevelCode=US&adjusted=yes&errorData=no&internal=false
"U.S. Census Bureau"
"Source: Advance Monthly Sales for Retail and Food Services"
"44X72: Retail Trade and Food Services: U.S. Total"
"Seasonally Adjusted Sales - Monthly [Millions of Dollars]"
"""
import datetime
import os
import re
import sys
class MigrateWebToContent():
def __init__(self):
self.url_template = \
'''http://www.census.gov/econ/currentdata/export/csv?programCode=MARTS&timeSlotType=12&startYear=%d&endYear=%d&categoryCode=44X72&dataTypeCode=SM&geoLevelCode=US&adjusted=yes&errorData=no&internal=false'''
self.wget = os.path.abspath('../thirdparty/wget/wget.exe')
assert os.path.isfile(self.wget)
"""
query
'startYear'
'endYear'
"""
def migrate(self, query, dest_dir):
if not os.path.exists(dest_dir):
os.makedirs(dest_dir)
url = self.__make_url(query)
self.__wget(url, dest_dir)
def __make_url(self, query):
return self.url_template % (query['startYear'], query['endYear'])
def __wget(self, url, dest_dir):
dest_file = os.path.join(dest_dir, re.compile('https?://').sub('', url))
dest_file_dir = os.path.dirname(dest_file)
if not os.path.exists(dest_file_dir):
os.makedirs(dest_file_dir)
wget_cmdline = '''%s \"%s\" --waitretry=3 -P %s''' % (self.wget, url, dest_file_dir)
os.system(wget_cmdline)
def main():
q = {
'startYear' : 1992,
'endYear' : datetime.datetime.now().year,
}
m = MigrateWebToContent()
m.migrate(q, '''./content/''')
if __name__ == '__main__':
sys.exit(main())
Sourcing Phase 2: Rename
import logging
import os
import shutil
import sys
import logger
class MigrateContentToRenamed():
def __init__(self):
self.__logger = logging.getLogger()
self.category_code = [
'44X72', # Retail Trade and Food Services: U.S. Total
]
def migrate_batch(self, src_dir, dest_dir):
assert os.path.isdir(src_dir)
if not os.path.exists(dest_dir):
os.makedirs(dest_dir)
for file in os.listdir(src_dir):
args = self.__parse_args(file)
category_code = args['categoryCode']
if category_code not in self.category_code:
self.__logger.error('''No such category_code. File => %s''' % file)
continue
adjusted_str = self.__adjusted_str(args['adjusted'])
cvs_file = '''%s_%s_%s_%s.csv''' % (
args['categoryCode'], args['startYear'], args['endYear'], adjusted_str
)
shutil.copy(os.path.join(src_dir, file), os.path.join(dest_dir, cvs_file))
def __parse_args(self, args_line):
args = {}
for kvs in args_line.split('&'):
kv = kvs.split('=')
args[kv[0]] = kv[1]
return args
def __adjusted_str(self, adjusted):
if adjusted == 'yes':
return 'adjusted'
else:
return 'nonadjusted'
def main():
logger.config_root(level=logging.INFO)
m = MigrateContentToRenamed()
m.migrate_batch('./content/www.census.gov/econ/currentdata/export',
'./renamed/')
if __name__ == '__main__':
sys.exit(main())
Sourcing Phase 3: Write to SQLite
"""
Schema:
drop table if exists AdvanceMonthlyRetailTradeReport;
create table if not exists AdvanceMonthlyRetailTradeReport
(
creation_dt datetime default current_timestamp,
period datetime not null,
number text,
revision int default 0,
unique (period, number, revision) on conflict ignore
);
CSV content:
"U.S. Census Bureau"
"Source: Advance Monthly Sales for Retail and Food Services"
"44X72: Retail Trade and Food Services: U.S. Total"
"Seasonally Adjusted Sales - Monthly [Millions of Dollars]"
"Period: 1992 to 2012"
"Data Extracted on: September 17, 2012 (9:52 pm)"
Period,Value
Jan-1992,164083
Feb-1992,164260
Mar-1992,163747
Apr-1992,164759
May-1992,165610
Jun-1992,166089
"""
import csv
import datetime
import logging
import os
import sqlite3
import sys
import logger
class MigrateRenamedToSqlite():
def __init__(self):
self.__logger = logging.getLogger()
self.sql_insert = '''insert or ignore into
AdvanceMonthlyRetailTradeReport(period, number) values('%s', '%s')
'''
def migrate_batch(self, src_dir, dest_db):
assert os.path.isdir(src_dir)
for file in os.listdir(src_dir):
self.migrate(os.path.join(src_dir, file), dest_db)
def migrate(self, src_csv, dest_db):
self.__logger.debug('''%s => %s''' % (src_csv, dest_db))
assert os.path.isfile(src_csv)
assert os.path.isfile(dest_db)
conn = sqlite3.connect(dest_db)
cursor = conn.cursor()
is_began = False
csv_reader = csv.reader(open(src_csv, 'r'))
for row in csv_reader:
if not is_began:
if len(row) is 2:
is_began = True
continue
elif row[1] != 'NA':
period = self.__period_str(row[0])
number = row[1]
sql_cmd = self.sql_insert % (period, number)
cursor.execute(sql_cmd)
self.__logger.debug('''(period, value): (%s, %s) => %s'''
% (period, number, dest_db))
conn.commit()
cursor.close()
conn.close()
def __period_str(self, period):
return datetime.datetime.strptime(period, '%b-%Y').strftime('%Y-%m-%d')
def main():
logger.config_root(level=logging.DEBUG)
m = MigrateRenamedToSqlite()
m.migrate_batch('./renamed/', '../db/economicstotal.db')
if __name__ == '__main__':
sys.exit(main())
Analyzing: Excel
Step 1: Open SQLite2009 Pro => Execute SQL command:
SELECT creation_dt
, period
, number
FROM [AdvanceMonthlyRetailTradeReport]
order by period
Step 2: In Result tab, right click => Export to Excel.
Step 3: Calculate YOY.
Step 4: Select period, number, yoy columns => Insert => Line Charts => Format YOY Data Series => Series Options => Plot Series On => Secondary Axis
沒有留言:
張貼留言