2727#
2828# Run with --help for command line details
2929#
30- q_version = "1.6.0notreleasedyet "
30+ q_version = "1.6.0 "
3131
3232__all__ = [ 'QTextAsData' ]
3333
@@ -50,8 +50,6 @@ import uuid
5050import cStringIO
5151import math
5252
53- csv .field_size_limit (sys .maxsize )
54-
5553DEBUG = False
5654
5755def get_stdout_encoding (encoding_override = None ):
@@ -210,6 +208,14 @@ class CouldNotConvertStringToNumericValueException(Exception):
210208def __str (self ):
211209return repr (self .msg )
212210
211+ class ColumnMaxLengthLimitExceededException (Exception ):
212+
213+ def __init__ (self , msg ):
214+ self .msg = msg
215+
216+ def __str (self ):
217+ return repr (self .msg )
218+
213219class CouldNotParseInputException (Exception ):
214220
215221def __init__ (self , msg ):
@@ -636,6 +642,11 @@ def encoded_csv_reader(encoding, f, dialect, **kwargs):
636642raise CouldNotConvertStringToNumericValueException (e .message )
637643else :
638644raise CouldNotParseInputException (str (e ))
645+ except Exception ,e :
646+ if str (e ).startswith ("field larger than field limit" ):
647+ raise ColumnMaxLengthLimitExceededException (str (e ))
648+ else :
649+ raise
639650
640651def normalized_filename (filename ):
641652if filename == '-' :
@@ -671,9 +682,13 @@ class MaterializedFileState(object):
671682except Exception ,e :
672683raise Exception ('Tried to skip BOM for "utf-8-sig" encoding and failed. Error message is ' + str (e ))
673684csv_reader = encoded_csv_reader (self .encoding , self .f , dialect = self .dialect )
674- for col_vals in csv_reader :
675- self .lines_read += 1
676- yield col_vals
685+ try :
686+ for col_vals in csv_reader :
687+ self .lines_read += 1
688+ yield col_vals
689+ except ColumnMaxLengthLimitExceededException ,e :
690+ msg = "Column length is larger than the maximum. Offending file is '%s' - Line is %s, counting from 1 (encoding %s). The line number is the raw line number of the file, ignoring whether there's a header or not" % (self .filename ,self .lines_read + 1 ,self .encoding )
691+ raise ColumnMaxLengthLimitExceededException (msg )
677692
678693def close (self ):
679694if self .f != sys .stdin :
@@ -1079,7 +1094,8 @@ class QInputParams(object):
10791094expected_column_count = None ,keep_leading_whitespace_in_values = False ,
10801095disable_double_double_quoting = False ,disable_escaped_double_quoting = False ,
10811096disable_column_type_detection = False ,
1082- input_quoting_mode = 'minimal' ,stdin_file = None ,stdin_filename = '-' ):
1097+ input_quoting_mode = 'minimal' ,stdin_file = None ,stdin_filename = '-' ,
1098+ max_column_length_limit = 131072 ):
10831099self .skip_header = skip_header
10841100self .delimiter = delimiter
10851101self .input_encoding = input_encoding
@@ -1091,6 +1107,7 @@ class QInputParams(object):
10911107self .disable_escaped_double_quoting = disable_escaped_double_quoting
10921108self .input_quoting_mode = input_quoting_mode
10931109self .disable_column_type_detection = disable_column_type_detection
1110+ self .max_column_length_limit = max_column_length_limit
10941111
10951112def merged_with (self ,input_params ):
10961113params = QInputParams (** self .__dict__ )
@@ -1113,7 +1130,6 @@ class QTextAsData(object):
11131130# Create DB object
11141131self .db = Sqlite3DB ()
11151132
1116-
11171133input_quoting_modes = {'minimal' : csv .QUOTE_MINIMAL ,
11181134'all' : csv .QUOTE_ALL ,
11191135# nonnumeric is not supported for input quoting modes, since we determine the data types
@@ -1149,6 +1165,8 @@ class QTextAsData(object):
11491165dialect_id = self .get_dialect_id (filename )
11501166csv .register_dialect (dialect_id , ** q_dialect )
11511167
1168+ csv .field_size_limit (input_params .max_column_length_limit )
1169+
11521170# Create a line splitter
11531171line_splitter = LineSplitter (input_params .delimiter , input_params .expected_column_count )
11541172
@@ -1261,6 +1279,8 @@ class QTextAsData(object):
12611279error = QError (e ,"Could not convert string to a numeric value. Did you use `-w nonnumeric` with unquoted string values? Error: %s" % e .msg ,58 )
12621280except CouldNotParseInputException ,e :
12631281error = QError (e ,"Could not parse the input. Please make sure to set the proper -w input-wrapping parameter for your input, and that you use the proper input encoding (-e). Error: %s" % e .msg ,59 )
1282+ except ColumnMaxLengthLimitExceededException ,e :
1283+ error = QError (e ,e .msg ,31 )
12641284except KeyboardInterrupt ,e :
12651285warnings .append (QWarning (e ,"Interrupted" ))
12661286except Exception , e :
@@ -1530,6 +1550,8 @@ def run_standalone():
15301550#-----------------------------------------------
15311551parser .add_option ("-v" , "--version" , dest = "version" , default = False , action = "store_true" ,
15321552help = "Print version" )
1553+ parser .add_option ("-V" , "--verbose" , dest = "verbose" , default = False , action = "store_true" ,
1554+ help = "Print debug info in case of problems" )
15331555#-----------------------------------------------
15341556input_data_option_group = OptionGroup (parser ,"Input Data Options" )
15351557input_data_option_group .add_option ("-H" , "--skip-header" , dest = "skip_header" , default = default_skip_header , action = "store_true" ,
@@ -1558,6 +1580,8 @@ def run_standalone():
15581580help = "Don't detect column types - All columns will be treated as text columns" )
15591581input_data_option_group .add_option ("-w" ,"--input-quoting-mode" ,dest = "input_quoting_mode" ,default = "minimal" ,
15601582help = "Input quoting mode. Possible values are all, minimal and none. Note the slightly misleading parameter name, and see the matching -W parameter for output quoting." )
1583+ input_data_option_group .add_option ("-M" ,"--max-column-length-limit" ,dest = "max_column_length_limit" ,default = 131072 ,
1584+ help = "Sets the maximum column length." )
15611585parser .add_option_group (input_data_option_group )
15621586#-----------------------------------------------
15631587output_data_option_group = OptionGroup (parser ,"Output Options" )
@@ -1681,6 +1705,14 @@ def run_standalone():
16811705# (since no input delimiter means any whitespace)
16821706options .output_delimiter = " "
16831707
1708+ try :
1709+ max_column_length_limit = int (options .max_column_length_limit )
1710+ if max_column_length_limit < 1 :
1711+ raise Exception ()
1712+ except :
1713+ print >> sys .stderr , "Max column length limit must be a positive integer (%s)" % max_column_length_limit
1714+ sys .exit (31 )
1715+
16841716default_input_params = QInputParams (skip_header = options .skip_header ,
16851717delimiter = options .delimiter ,
16861718input_encoding = options .encoding ,
@@ -1691,7 +1723,8 @@ def run_standalone():
16911723disable_double_double_quoting = options .disable_double_double_quoting ,
16921724disable_escaped_double_quoting = options .disable_escaped_double_quoting ,
16931725input_quoting_mode = options .input_quoting_mode ,
1694- disable_column_type_detection = options .disable_column_type_detection )
1726+ disable_column_type_detection = options .disable_column_type_detection ,
1727+ max_column_length_limit = max_column_length_limit )
16951728q_engine = QTextAsData (default_input_params = default_input_params )
16961729
16971730output_params = QOutputParams (
@@ -1700,7 +1733,7 @@ def run_standalone():
17001733output_quoting_mode = options .output_quoting_mode ,
17011734formatting = options .formatting ,
17021735output_header = options .output_header )
1703- q_output_printer = QOutputPrinter (output_params ,show_tracebacks = DEBUG )
1736+ q_output_printer = QOutputPrinter (output_params ,show_tracebacks = options . verbose )
17041737
17051738for query_str in query_strs :
17061739if options .analyze_only :
0 commit comments