2012年9月19日 星期三

US CPI



失控的 CPI,可以想像 QE3 推下去會更加失控。


數據沒經過季節調整。關於季節調整知識,可參考以下說明:

Because price data are used for different purposes by different groups, the Bureau of Labor Statistics publishes seasonally adjusted as well as unadjusted changes each month.

For analyzing general price trends in the economy, seasonally adjusted changes are usually preferred since they eliminate the effect of changes that normally occur at the same time and in about the same magnitude every year--such as price movements resulting from changing climatic conditions, production cycles, model changeovers, holidays, and sales. 

The unadjusted data are of primary interest to consumers concerned about the prices they actually pay. Unadjusted data also are used extensively for escalation purposes. Many collective bargaining contract agreements and pension plans, for example, tie compensation changes to the Consumer Price Index before adjustment for seasonal variation. ((經濟學家或許太單純,怎麼可以撇開氣候、產品週期等等不談呢?))

以上由 Python + SQLite + Excel 產出,以下略為敘述細節。


Sourcing Phase 1: Download


"""
<http://www.bls.gov/cpi/>
<ftp://ftp.bls.gov/pub/time.series/cu/>
"""
import datetime
import os
import re
import sys

class MigrateWebToContent():

    def __init__(self):
        self.url = 'ftp://ftp.bls.gov/pub/time.series/cu/cu.data.1.AllItems'
        self.wget = os.path.abspath('../thirdparty/wget/wget.exe')
        assert os.path.isfile(self.wget)

    def migrate(self, dest_dir):
        if not os.path.exists(dest_dir):
            os.makedirs(dest_dir)
        self.__wget(self.url, dest_dir)

    def __wget(self, url, dest_dir):
        dest_file = os.path.join(dest_dir, re.compile('ftp://').sub('', url))
        dest_file_dir = os.path.dirname(dest_file)
        if not os.path.exists(dest_file_dir):
            os.makedirs(dest_file_dir)
       
        wget_cmdline = '''%s \"%s\" --waitretry=3 -P %s''' % (self.wget, url, dest_file_dir)
        os.system(wget_cmdline)

def main():
    m = MigrateWebToContent()
    m.migrate('./content/')

if __name__ == '__main__':
    sys.exit(main())


Sourcing Phase 2: Rename

import logging
import os
import shutil
import sys

import logger

class MigrateContentToRenamed():

    def __init__(self):
        self.__logger = logging.getLogger()
    
    def migrate_batch(self, src_dir, dest_dir):
        assert os.path.isdir(src_dir)
        if not os.path.exists(dest_dir):
            os.makedirs(dest_dir)

        for file in os.listdir(src_dir):
            shutil.copy(os.path.join(src_dir, file), os.path.join(dest_dir, file))

def main():
    logger.config_root(level=logging.INFO)
    m = MigrateContentToRenamed()
    m.migrate_batch('./content/ftp.bls.gov/pub/time.series/cu/',
                    './renamed/')
 
if __name__ == '__main__':
    sys.exit(main())


Sourcing Phase 3: Write to SQLite

"""
SQLite Schema:
drop table if exists CpiUs;

create table if not exists CpiUs 
(
    creation_dt datetime default current_timestamp,
    period datetime not null,
    number text,
    revision int default 0,
    unique (period, revision) on conflict ignore
);


U.S. All items, 1982-84=100 - CUUR0000SA0

Record example:
series_id year period value footnote_codes
CUUR0000SA0       2012 M06     229.478          
CUUR0000SA0       2012 M07     229.104          
CUUR0000SA0       2012 M08     230.379
"""
import logging
import os
import sqlite3
import sys

import logger

class MigrateRenamedToSqlite():

    def __init__(self):
        self.__logger = logging.getLogger()
        self.item_prefix = 'CUUR0000SA0'
        self.sql_insert = '''replace into
            CpiUs(period, number) values('%s', '%s')
            '''
        
    def migrate_batch(self, src_dir, dest_db):
        assert os.path.isdir(src_dir)
        for file in os.listdir(src_dir):
            src_txt = os.path.join(src_dir, file)
            self.migrate(src_txt, dest_db)

    def migrate(self, src_txt, dest_db):
        self.__logger.debug('''%s => %s''' % (src_txt, dest_db))
        records = self.__fetch_records(src_txt)
        self.__write_db(records, dest_db)

    def __fetch_records(self, src_txt):
        src_file_fd = open(src_txt, 'rb')
        content = src_file_fd.read()
        src_file_fd.close()

        records = []
        lines = content.decode('utf-8').split('\n')
        for line in lines:
            if line.startswith(self.item_prefix):
                tokens = line.split()
                assert len(tokens) == 4
                
                # M13 indicates annual averages => Ignore
                if tokens[2] == 'M13':
                    continue
                
                r = [ '''%s-%s-01''' % (tokens[1], tokens[2][1:]), tokens[3] ]
                records.append(r)
        return records     

    def __write_db(self, records, dest_db):
        conn = sqlite3.connect(dest_db)
        cursor = conn.cursor()
      
        for r in records:
            sql_cmd = self.sql_insert % (r[0], r[1])
            cursor.execute(sql_cmd)
            self.__logger.debug('''(period, number): (%s, %s) => %s'''
                                % (r[0], r[1], dest_db))
        
        conn.commit()
        cursor.close()
        conn.close()

def main():
    logger.config_root(level=logging.DEBUG)
    m = MigrateRenamedToSqlite()
    m.migrate_batch('./renamed', '../db/economicstotal.db')
 
if __name__ == '__main__':
    sys.exit(main())


Reporting: Excel

Easy. 

沒有留言:

張貼留言