2012年9月18日 星期二

Capacity Utilization: Manufacturing, Mining, and Utilities

Link
http://www.federalreserve.gov/releases/g17/caputl.htm



Sourcing Phase 1: Download

"""
Data from January 1986 to present
Capacity Utilization: Manufacturing, Mining, and Utilities
<http://www.federalreserve.gov/releases/g17/ipdisk/utl_sa.txt>

Data through 1985
Capacity Utilization: Manufacturing, Mining, and Utilities
<http://www.federalreserve.gov/releases/g17/iphist/utlhist_sa.txt>
"""
import datetime
import os
import re
import sys

class MigrateWebToContent():

    def __init__(self):
        self.url_utl_sa = 'http://www.federalreserve.gov/releases/g17/ipdisk/utl_sa.txt'
        self.url_utlhist_sa = 'http://www.federalreserve.gov/releases/g17/iphist/utlhist_sa.txt'
        self.wget = os.path.abspath('../thirdparty/wget/wget.exe')
        assert os.path.isfile(self.wget)

    def migrate(self, dest_dir):
        if not os.path.exists(dest_dir):
            os.makedirs(dest_dir)
        self.__wget(self.url_utl_sa, dest_dir)
        self.__wget(self.url_utlhist_sa, dest_dir)

    def __wget(self, url, dest_dir):
        dest_file = os.path.join(dest_dir, re.compile('https?://').sub('', url))
        dest_file_dir = os.path.dirname(dest_file)
        if not os.path.exists(dest_file_dir):
            os.makedirs(dest_file_dir)
     
        wget_cmdline = '''%s \"%s\" --waitretry=3 -P %s''' % (self.wget, url, dest_file_dir)
        os.system(wget_cmdline)

def main():
    m = MigrateWebToContent()
    m.migrate('./content/')

if __name__ == '__main__':
    sys.exit(main())




Sourcing Phase 2: Rename

import logging
import os
import shutil
import sys

import logger

class MigrateContentToRenamed():

    def __init__(self):
        self.__logger = logging.getLogger()
 
    def migrate_batch(self, src_dir, dest_dir):
        assert os.path.isdir(src_dir)
        if not os.path.exists(dest_dir):
            os.makedirs(dest_dir)

        for file in os.listdir(src_dir):
            shutil.copy(os.path.join(src_dir, file), os.path.join(dest_dir, file))

def main():
    logger.config_root(level=logging.INFO)
    m = MigrateContentToRenamed()
    m.migrate_batch('./content/www.federalreserve.gov/releases/g17/ipdisk',
                    './renamed/')
    m.migrate_batch('./content/www.federalreserve.gov/releases/g17/iphist',
                    './renamed/')

if __name__ == '__main__':
    sys.exit(main())




Sourcing Phase 3: Write to Sqlite

"""
Schema.sql:

drop table if exists CapacityUtilization;

create table if not exists CapacityUtilization
(
    creation_dt datetime default current_timestamp,
    period_date datetime not null,
    number text,
    revision int default 0,
    unique (period_date, number) on conflict ignore
);

偷夾字在註解。

驟秋,肚子頗不舒服,心頭又有點兒煩悶。大概想她又不想她。還好有《金瓶梅》可看,這就是該死的三十歲人生,三分之一的棺材,沒什麼特別好說的。

且引一段話語敘述台灣困局。

規制恢弘,彷彿那給孤園黃金鋪地;雕鏤精製,依希似祇洹舍白玉為階。高閣摩空,旃檀氣直接九霄雲表;層基亙地,大雄殿可容千眾禪僧。兩翼嵬峨,盡是琳宮紺宇;廊房潔淨,果然精勝洞天。那時鐘鼓宣揚,盡道是寰中佛國;只這緇流濟楚,卻也像塵界人天。那知歲久年深,一瞬地時移事異。莽和尚縱酒撒潑,首壞清規;獃道人懶惰貪眠,不行打掃。漸成寂寞,斷絕門徒。以致凄涼,罕稀瞻仰。兼以烏鼠穿蝕,那堪風雨漂搖?棟宇摧頹,一而二,二而三,支撐摩計,墻垣柵塌,日復日,年復年,振起無人。朱紅櫺槅,拾來煨酒煨茶;合抱梁檻,拿去換鹽換米。風吹羅漢金消盡,雨打彌陀化作塵。吁嗟乎金碧焜炫,一旦為灌莽榛荊。

以上。以下繼續,這些廢文夾在註腳,程式可是能動哩。

Record format:

<http://www.federalreserve.gov/releases/g17/download.htm>

The format for each line in the files consists of an industry code,
a year, and 12 months of data (to four decimal places) when available.
The data have various start dates; the earliest is 1919.
"""
import logging
import os
import sqlite3
import sys

import logger

class MigrateRenamedToSqlite():

    def __init__(self):
        self.__logger = logging.getLogger()
        self.item_prefix = '''\"B50001\"'''
        self.sql_insert = '''replace into
            CapacityUtilization(period_date, number) values('%s', '%s')
            '''
     
    def migrate_batch(self, src_dir, dest_db):
        assert os.path.isdir(src_dir)
        for file in os.listdir(src_dir):
            src_txt = os.path.join(src_dir, file)
            self.migrate(src_txt, dest_db)

    def migrate(self, src_txt, dest_db):
        self.__logger.debug('''%s => %s''' % (src_txt, dest_db))
        records = self.__fetch_records(src_txt)
        self.__write_db(records, dest_db)

    def __fetch_records(self, src_txt):
        src_file_fd = open(src_txt, 'rb')
        content = src_file_fd.read()
        src_file_fd.close()

        records = []
        lines = content.decode('utf-8').split('\n')
        for line in lines:
            if line.startswith(self.item_prefix):
                tokens = line.split()
                assert len(tokens) > 1
                year = tokens[1]
                for i in range(2, len(tokens)):
                    r = [
                        '''%s-%02d-01''' % (year, i - 1),
                        tokens[i]
                    ]
                    records.append(r)
        return records  

    def __write_db(self, records, dest_db):
        conn = sqlite3.connect(dest_db)
        cursor = conn.cursor()
   
        for r in records:
            sql_cmd = self.sql_insert % (r[0], r[1])
            cursor.execute(sql_cmd)
            self.__logger.debug('''(period, number): (%s, %s) => %s'''
                                % (r[0], r[1], dest_db))
     
        conn.commit()
        cursor.close()
        conn.close()

def main():
    logger.config_root(level=logging.DEBUG)
    m = MigrateRenamedToSqlite()
    m.migrate_batch('./renamed', '../db/economicstotal.db')

if __name__ == '__main__':
    sys.exit(main())

沒有留言:

張貼留言