Tag Archives: python

10年IMDB250高分排行电影下载链接提取(python)

此代码用于提取http://www.zreading.net/671.html网页中10年IMDB250高分排行电影所有下载链接,用于批量导入下载。

# -*- coding: utf-8 -*-
import re
import urllib
url='http://www.zreading.net/671.html'
s=urllib.urlopen(url).read()
ss=s.replace('\n','')
urls=re.findall('<a id="(\S*?) href="(\S*?)" target="_blank">',ss,re.I)
for i in urls:
	print i[1]

#urls=re.findall('<a href="(\S*?)" target="_blank">',ss,re.I)

Learning Python(4)

chap9 文件和输入输出

1.文件内建函数–open() and file()
file_object = open(file_name,access_mode=’r’,buffering=-1)
access_mode:’r’ ‘w’ ‘a’ ‘U’—‘r’ ‘U’模式打开的文件必须已经存在,’w’模式打开的文件若存在则首先清空,然后重建。
buffering可选参数只是访问文件所采用的缓冲方式。0-不缓冲,1-缓冲一行,>1-使用给定值作为缓冲区大小,不提供参数或给定负值代表使用系统默认缓冲机制。
file()工厂函数,和open()实现相同的功能,建议使用open()来读写文件,在说明处理文件对象时使用file(),eg.if instance(f,file)
ps.UNS(Universal NEWLINE Support,通用换行符支持),当使用’U’标志打开文件时,所有行分隔符,或行结束符通过python的输入方法(read*())返回时都会被替换为换行符\n。 Continue reading

Learning Python(3)

chap7 映像和集合类型

1. 映像类型基本操作

1)创建字典和赋值

>>> dict1={}
>>> dict2={'name':'earth','port':80}
>>> dict1,dict2
({}, {'name': 'earth', 'port': 80})

工厂方法dict()创建字典

>>> fdict=dict((['x',1],['y',2]))
>>> fdict
{'y': 2, 'x': 1}

内建方法fromkeys()创建一个“默认”字典,字典中元素具有相同的值(如果没给出,默认为None)

>>> ddict={}.fromkeys((‘x’,’y’),-1) >>> ddict {‘y’: -1, ‘x’: -1} >>> >>> edict={}.fromkeys((‘foo’,’bar’)) >>> edict {‘foo’: None, ‘bar’: None}

2)访问字典中的值

>>> for key in dict2.keys():
...     print 'key=%s,value=%s' %(key,dict2[key])
...
key=name,value=earth
key=port,value=80

Continue reading

Learning Python(1)

Chap1

1.  Python的爸爸:Guido van Rossum

2.  Python的特点:高级、面向对象、可升级、可扩展、可移植性、易学易读易维护、健壮性、高效的快速原型开发工具、内存管理器、解释型和字节编译(字节编译结果是生成一种近似机器语言的中间形式)。

语言分类的维度

一、 High vs. Low
二、 General vs. Targeted
基本指令集适用于广泛的应用,还是只针对特定用途 Continue reading

python-oracle9i单表访问SQL统计信息

#!/usr/bin/env python
# -*- encoding: utf8 -*-
# Author: tracylling@gmail.cn
# Created: 2011-8-16

import cx_Oracle
import os
import sys
import re
os.environ['NLS_LANG'] ='AMERICAN_AMERICA.ZHS16GBK'

#input format : python oracle9i_liling.py -t table_name -o owner >> file_path/file_name.txt
OWNER = sys.argv[4]
TABLE_NAME = sys.argv[2]

TABLE_NAME1 = TABLE_NAME.upper()
OWNER1 = OWNER.upper()

print("------------Welcome To Oracle 9i Statistics Tools----------------")
print("")

#--login--#
def db_connect():
    try:
        conn = cx_Oracle.connect('tp/tp@ocn_test')
        return conn
    except cx_Oracle.DatabaseError,exc:
        error,=exc.args
        print "db_connect error"
        print >> sys.stderr, "Oracle-Error-Code:", error.code
        print >> sys.stderr, "Oracle-Error-Message:", error.message


#--table information--#
def table_info(cursor):
    try:
        cursor.execute("""
           select owner,segment_name,bytes / 1024 / 1024
             From dba_segments
            where segment_name = :table_name
              and segment_type = 'TABLE'
              and owner = :owner
           """,table_name = TABLE_NAME1,owner = OWNER1)
        for column_1,column_2,column_3 in cursor.fetchall():
            print "*********************Table Info*********************"
            print "Version      Table Name      Owner Name      Size(MB)"
            print "-"*52
            print "%s %12s %16s %12d" %(conn.version,TABLE_NAME1,OWNER1,column_3)
        
    except cx_Oracle.DatabaseError, exc:
        error, = exc.args
        print "table_info error"
        print >> sys.stderr, "Oracle-Error-Code:", error.code
        print >> sys.stderr, "Oracle-Error-Message:", error.message

#--index infomation--#
def index_info(cursor):
    try:
        print ""
        print "***********Index Info***********"
        print "INDEX_NAME             SIZE(MB)"
        print "-"*30
        cursor.execute("""
            select owner,segment_name, bytes / 1024 /1024
                From dba_segments
            where segment_name IN
                (SELECT INDEX_NAME FROM DBA_INDEXES 
                WHERE TABLE_NAME = :table_name)
                and segment_type = 'INDEX'
                and owner = :owner
            """,table_name = TABLE_NAME1,owner = OWNER1)
        for column_1,column_2,column_3 in cursor.fetchall():
                print "%-25s %d" % (column_2,column_3)
    except cx_Oracle.DatabaseError, exc:
            error, = exc.args
            print "index_info error"
            print >> sys.stderr, "Oracle-Error-Code:", error.code
            print >> sys.stderr, "Oracle-Error-Message:", error.message

#--SQL--#
def sql_join(cursor):
    dict = {}        #存放SQL连接并归一化后结果
    dict_table = {}  #存放全词匹配表名的SQL语句
    count = {}       #存放归一化语句条数,做除数
    match_join='(\s*:[A-Za-z0-9]+\s*,?\s*)+'
    match_table = '[^A-Za-z0-9_]' + TABLE_NAME + '[^A-Za-z0-9_]'
    rx = re.compile(match_table,re.IGNORECASE)
    
    print ""
    print "***********SQL EXECUTIONS Info***********"
    
    try:
        #print "start"
        cursor.execute("""
            select distinct hash_value
                From stats$sqltext
                """)
        hash_val = cursor.fetchall()          
        
        for i in hash_val:
            cursor.execute("""
                select sql_text
                from  stats$sqltext
                where hash_value = :hash_value
                order by piece
            """,i)
            sql_piece = cursor.fetchall()
            sql_text = ''
            for j in sql_piece :
                sql_text = sql_text + "%s" %(j)
            sql_text = sql_text.replace('\n\r',' ')
            sql_text = " ".join(sql_text.split())  #SQL语句拼接并去除多余的空格
            if rx.search(sql_text) is not None:    #匹配表名,存储进映射KV关系  k是hash_val,v是对应完整的SQLTEXT
                dict[i] = sql_text.upper()
                
        for key in dict.keys():
            dict[key] = re.sub(match_join,' :1 ',dict[key])  #归一化
            #print dict[key]
            cursor.execute("""SELECT hash_value,case
            when max_exec > min_exec and min_exec > 0 then
            trunc((max_exec - min_exec) / decode(trunc(max_time)-trunc(min_time),0,1,trunc(max_time)-trunc(min_time)))
            when max_exec = min_exec and min_exec > 0 then max_exec
            else
            0
            end as exec_daily,
            trunc(max_time)-trunc(min_time) diff_days,
            to_char(min_time, 'yyyy-mm-dd') as begin_snap,
            to_char(max_time, 'yyyy-mm-dd') as end_snap fROM 
            (select A.*,
            row_number() OVER(partition by hash_value order by max_time desc) rn
            from (select s.hash_value,
               min(t.snap_time) over(partition by hash_value order by s.snap_id) as min_time,
               max(t.snap_time) over(partition by hash_value order by s.snap_id) as max_time,
               min(s.executions) over(partition by hash_value order by s.snap_id) as min_exec,
               max(s.executions) over(partition by hash_value order by s.snap_id) as max_exec
            from stats$sql_summary s, stats$snapshot t
            where s.snap_id = t.snap_id and s.Hash_value = :arg) a)B WHERE B.RN =1
            """,key)
            executions = cursor.fetchall()            
            #print executions
            for line in executions:
                sql_hash_val = line[0]
                exec_daily = line[1]
                snap_time = line[4]
                if exec_daily is not None:                    
                    if dict_table.has_key(dict[key]):
                        dict_table[dict[key]] = dict_table[dict[key]] + exec_daily
                        count[dict[key]] = count[dict[key]] + 1
                    else :
                        dict_table[dict[key]] = exec_daily
                        count[dict[key]] = 1
                #else:
                    #exec_daily = 0
        print ""
        
        for keys in dict_table:
            dict_table[keys] = dict_table[keys]/count[keys]
            #print count[keys]
            #print dict_table[keys]
        
        for i in  sorted(dict_table.items(),key=lambda e:e[1],reverse=True):
            print "** SQL EXECUTIONS: %d **" %i[1] 
            print "%s" %i[0] 
            print "" 

    except cx_Oracle.DatabaseError, exc:
            error, = exc.args
            print "sql_join error"
            print >> sys.stderr, "Oracle-Error-Code:", error.code
            print >> sys.stderr, "Oracle-Error-Message:", error.message

#--main--#
def main(cursor):
        db_table_info = table_info(cursor)
        db_index_info = index_info(cursor)
        db_sql_join = sql_join(cursor)

if __name__ == '__main__':
    conn = db_connect()
    cursor = conn.cursor()
    main(cursor)

Python&&cx_Oracle

Python版本:python2.7.2
Instantclient版本:Version 10.2.0.4
cx_Oracle版本:cx_Oracle-5.0.1
下载软件及工程目录:/home/oracle/LL/python/software(/workstation)

1. 查看系统版本选择合适版本
uname -a
Linux inc-dba-ccbu-36-18 2.6.9-67.ELsmp #1 SMP Wed Nov 7 13:56:44 EST 2007 x86_64 x86_64 x86_64 GNU/Linux

2. 下载安装python2.7.2 – 稳定版
# wget http://www.python.org/ftp/python/2.7.1/Python-2.7.1.tar.bz2
# tar -jxvf Python-2.7.1.tar.bz2
# cd Python-2.7.1
# ./configure (默认安装在/usr/local/lib/python2.7,–prefix可指定)
# make && make install

Continue reading

Python连接MySQL操作

Python很强大啊,不用多说的……这里简单介绍一下其中的一个模块就是DB-API,也就是提供数据库应用编程接口的一个东西。
参考资料:Writing MySQL Scripts with Python DB-API
DB-API两层架构:顶层为所有的数据库引擎提供一个抽象的接口,底层包含特定数据库引擎的驱动等用于处理特定引擎云云。所以,在写Python脚本之前需要先装这个特定的驱动,比如MySQL,就需要安装MySQLdb模块。

第一步:MySQLdb Installation
详见(ubuntu10.04) MySQL-python-1.2.3

Continue reading