#!/usr/bin/python

####################################################################################################
#
#                                           EXTERNAL LIBRARIES
#
####################################################################################################

from   lgclust      import *
import networkx     as     nx
import os
import sys
import time


####################################################################################################
#
#                                           FUNCTIONS
#
####################################################################################################

def print_Help() :
	sys.stdout.write('\n')
	sys.stdout.write('###############################################################################\n')
	sys.stdout.write('[ Goal ] :\n')
	sys.stdout.write('\n')
	sys.stdout.write('  This script aplly the centered ternary similarity modification graph\n')
	sys.stdout.write('  method. ')
	sys.stdout.write('\n')
	sys.stdout.write('-------------------------------------------------------------------------------\n')
	sys.stdout.write('[ Usage ] Needed arguments :\n')
	sys.stdout.write('\n')
	sys.stdout.write('   -i < file name > : input file containing items pair similarities.\n')
	sys.stdout.write('                      each line describe a mapping between two items.\n')
	sys.stdout.write('                      The format is:\n')
	sys.stdout.write('\n')
	sys.stdout.write('                      it_1   ini_1   end_1   it_2   ini_2   end_2 \n')
	sys.stdout.write('\n')
	sys.stdout.write('                      where parts of it_1 from position ini_1 to position end_1\n')
	sys.stdout.write('                      are mapped to parts of it_2 from position ini_2 to position\n')
	sys.stdout.write('                      end_2.\n')
	sys.stdout.write('\n')
	sys.stdout.write('                      it_1   <str> the label of the fisrt item.\n')
	sys.stdout.write('                      ini_1  <int> the position index where mapping starts\n')
	sys.stdout.write('                                   for item 1.\n')
	sys.stdout.write('                      end_1  <int> the position index where mapping ends\n')
	sys.stdout.write('                                   for item 1.\n')
	sys.stdout.write('                      ir_2   <str> the label of the second item.\n')
	sys.stdout.write('                      ini_2  <int> the position index where mapping starts\n')
	sys.stdout.write('                                   for item 2.\n')
	sys.stdout.write('                      end_2  <int> the position index where mapping ends\n')
	sys.stdout.write('                                   for item 2.\n')
	sys.stdout.write('\n')
	sys.stdout.write('                      [ini_1;end_1] and [ini_2;end_2] define two blocks that should\n')
	sys.stdout.write('                      have the same length.\n')
	sys.stdout.write('                      If two items share more than one block\n')
	sys.stdout.write('                      then each pair of block are given on a different line\n')
	sys.stdout.write('                      with the same item labels.\n')
	sys.stdout.write('\n')
	sys.stdout.write('-------------------------------------------------------------------------------\n')
	sys.stdout.write('[ Usage ] Arguments with default values:\n')
	sys.stdout.write('\n')
	sys.stdout.write('   -b               : force to computes the L(G) graph.\n')
	sys.stdout.write('   -g < file_name > : outputs file of kept item pair similarities.\n')
	sys.stdout.write('                      ie the initial G graph.\n')
	sys.stdout.write('   -s < float>      : threshold of centered ternary similarity which lies\n')
	sys.stdout.write('                      in the range 0-1 excluded.\n')
	sys.stdout.write('\n')
	sys.stdout.write('-------------------------------------------------------------------------------\n')
	sys.stdout.write('[ Usage ] Options :\n')
	sys.stdout.write('\n')
	sys.stdout.write('   -v               : verbose mode.\n')
	sys.stdout.write('   -h               : this help.\n')
	sys.stdout.write('   -c < file name > : output file for the clusters description.\n')
	sys.stdout.write('   -a < file name > : output file for the modified G graph.\n')
	sys.stdout.write('   -l < file name > : output file for the initial L(G) graph.\n')
	sys.stdout.write('   -m < file name > : output file for the modified L(G) graph.\n')
	sys.stdout.write('\n')
	sys.stdout.write('###############################################################################\n')
	sys.stdout.write('\n')



def main():
	####################################################################################################
	#
	#                                           PARAMETERS
	#
	####################################################################################################
	
	
	if bool( readArg(sys.argv, '-h', 1 , False,   False ) ) :
		print_Help()
		exit(0)
	
	if (
		( readArg(sys.argv, '-i', 2 , False,  'none') == 1 )
	   ):
	   print_Help()
	   exit(1)
	else :
		verbosity      =  bool( readArg(sys.argv, '-v', 1 ,     False,   False ) )
		Do_LG          =  bool( readArg(sys.argv, '-b', 1 ,     False,   False ) )
		#---------------------------------------
		Input_File     =        readArg(sys.argv, '-i', 2 , verbosity,  'none' )
		# test if the file exists
		if ( type(Input_File) == str ) and not os.access( Input_File , os.R_OK ) :
			sys.stderr.write('\n[Error]: -i argument invalid path: %s\n'%Input_File)
			exit(1)
			
		#---------------------------------------
		G_File         =        readArg(sys.argv, '-g', 2 , verbosity,  'none' )
		# test if the path is writable
		if ( type(G_File) == str ) and not ( len( os.path.dirname( G_File ) ) > 0 and  os.access( os.path.dirname( G_File ) , os.W_OK ) ):
			sys.stderr.write('\n[Error]: -g argument invalid path: %s\n'%G_File)
			exit(1)
		#---------------------------------------
		G_Mod_File     =        readArg(sys.argv, '-a', 2 , verbosity,  os.path.dirname( Input_File )+'.g_mod' )
		# test if the path is writable
		if ( type(G_Mod_File) == str ) and not ( len( os.path.dirname( G_Mod_File ) ) > 0 and  os.access( os.path.dirname( G_Mod_File ) , os.W_OK ) ):
			sys.stderr.write('\n[Error]: -a argument invalid path: %s\n'%G_Mod_File)
			exit(1)
		#---------------------------------------
		LG_File        =        readArg(sys.argv, '-l', 2 , verbosity,  'none' )
		# test if the path is writable
		if ( type(LG_File) == str ) and not ( len( os.path.dirname( LG_File ) ) > 0 and  os.access( os.path.dirname( LG_File ) , os.W_OK ) ):
			sys.stderr.write('\n[Error]: -l argument  invalid path: %s\n'%LG_File)
			exit(1)
		elif ( type(LG_File) == str ) and not( Do_LG )  and verbosity :
			sys.stdout.write('\n[Warning]: by default, without -b option, the L(G) graph is not computed. -l argument ignored and no file saved to : %s\n'%LG_File)
		
		#---------------------------------------
		LG_Mod_File    =        readArg(sys.argv, '-m', 2 , verbosity,  'none' )
		# test if the path is writable
		if ( type(LG_Mod_File) == str ) and not ( len( os.path.dirname( LG_Mod_File ) ) > 0 and  os.access( os.path.dirname( LG_Mod_File ) , os.W_OK ) ):
			sys.stderr.write('\n[Error]: -m argument invalid path: %s\n'%LG_Mod_File)
			exit(1)
		elif ( type(LG_Mod_File) == str ) and not( Do_LG ) and verbosity :
			sys.stdout.write('\n[Warning]: by default, without -b option, the L(G) graph is not computed. -m argument ignored and no file saved to : %s\n'%LG_Mod_File)			
		#---------------------------------------
		Clust_File     =        readArg(sys.argv, '-c', 2 , verbosity,  'none' )
		# test if the path is writable
		if ( type(Clust_File) == str ) and not ( len( os.path.dirname( Clust_File ) ) > 0 and  os.access( os.path.dirname( Clust_File ) , os.W_OK ) ) :
			sys.stderr.write('\n[Error]: -c argument invalid path: %s\n'%Clust_File)
			exit(1)
		#---------------------------------------
		Sim_Threshold  = float( readArg(sys.argv, '-s', 2 , verbosity,     0.5 ) ) 
		# test if the the threshold is in the range [0,1]
		if not ( Sim_Threshold <= 1. and Sim_Threshold > 0. ) :
			sys.stderr.write('\n[Error]: -s argument value should lies in the range 0-1\n')
			exit(1)
	
	
	####################################################################################################
	#
	#                                           PRINTINGS
	#
	####################################################################################################
	
	
	#---------------------------------------------------------------------------------------------------
	# Resume run parameters
	#---------------------------------------------------------------------------------------------------
	if verbosity :
		sys.stdout.write('\n')
		sys.stdout.write('[ Job start   ] %s\n'%time.ctime())
		sys.stdout.write('[ Input file  ] %s\n'%Input_File)
		sys.stdout.write('[ Threshold   ] %f\n'%Sim_Threshold)
		if Do_LG :
			sys.stdout.write('[ Method      ] L(G) based computation (speed).\n')
		else :
			sys.stdout.write('[ Method      ] G only based computation (memory).\n')
		
		sys.stdout.write('[ Output file ] \n')
		sys.stdout.write('\t[ Graph G             ] ')
		if type( G_File ) == str :
			sys.stdout.write('%s\n'%G_File)
		else :
			sys.stdout.write('not saved\n')
		
		sys.stdout.write('\t[ Graph L(G)          ] ')
		if type( LG_File ) == str :
			sys.stdout.write('%s\n'%LG_File)
		else :
			sys.stdout.write('not saved\n')
		
		sys.stdout.write('\t[ Graph L(G) modified ] ')
		if type( LG_Mod_File ) == str :
			sys.stdout.write('%s\n'%LG_Mod_File)
		else :
			sys.stdout.write('not saved\n')
		
		sys.stdout.write('\t[ Graph G modified    ] ')
		if type( G_Mod_File ) == str :
			sys.stdout.write('%s\n'%G_Mod_File)
		else :
			sys.stdout.write('not saved\n')
		
		sys.stdout.write('\t[ Cluster description ] ')
		if type( Clust_File ) == str :
			sys.stdout.write('%s\n'%Clust_File)
		else :
			sys.stdout.write('not saved\n')
		
		sys.stdout.flush()
	
	####################################################################################################
	#
	#                                           INITIALIZATIONS
	#
	####################################################################################################
	
	G               = nx.Graph() # G = (V,E)   : the graph of similarities between pairs of items
	                             #               with V the items and E the ensemble of items sharing
	                             #               a pair similarity
	
	LG              = nx.Graph() # L(G) = (E,F): line-graph of G: the graph of G adjacencies
	                             #               with E the ensemble of items sharing a pair similarity
	                             #               and V the ensemble of adjacencies in G
	
	LG_sub_marked   = nx.Graph() # Subgraph of L(G) over edges which fail on the test of centerd
	                             # ternary similarity
	
	Marked_LG_nodes = {}         # Dict of LG nodes which have at least an incident marked edge
	                             #               key:   the node
	                             #               value: the number of incident marked edges
	
	Marked_G_edges  = {}         # Dict of G edges which have at least an adjacent with which the 
	                             # centered ternary similarity test fails
	                             #               key:   the edge
	                             #               value: the number of adjacent failing edges
	
	Align_dict	    = {}         # Dict of correspondances maps of pairs of items
	                             #               key:   the pair of items
	                             #               value: an aligned_items object
	
	NAME_to_ID_dict = {}         # Dict of translation from items user nomencalture to internal
	                             # identification number
	                             #               key:   an item string name
	                             #               value: an integer
	
	ID_to_NAME_dict = {}         # Dict of translation from items internal identification number to
	                             # user nomencalture
	                             #               key:   an integer
	                             #               value: an item string name
	
	####################################################################################################
	#
	#                                           MAIN
	#
	####################################################################################################
	
	
	#---------------------------------------------------------------------------------------------------
	# Read and format the data
	#---------------------------------------------------------------------------------------------------
	if verbosity :
		sys.stdout.write('\n[  Start ] reading input file %s\n'%Input_File)
		sys.stdout.flush()
	
	fid   = open(Input_File,'r')
	
	id_nb = 0
	li_nb = 1
	li = fid.readline().split()
	while li !=[] :
		if not ( len( li ) == 6 ) :
			sys.stderr.write('\n[Error]: Problem reading line %d of input file %s.\n'%(li_nb , Input_File))
			exit(1)
		
		[ p1 , p1_min , p1_max , p2 , p2_min , p2_max ] = li
		
		if p1 != p2 :
			if not NAME_to_ID_dict.has_key( p1 ) :          # convert user name into internal ID
				NAME_to_ID_dict[    p1 ] = id_nb
				ID_to_NAME_dict[ id_nb ] = p1
				id_nb += 1
			
			p1 = NAME_to_ID_dict[ p1 ]
			if not NAME_to_ID_dict.has_key( p2 ) :
				NAME_to_ID_dict[    p2 ] = id_nb
				ID_to_NAME_dict[ id_nb ] = p2
				id_nb += 1
			
			p2 = NAME_to_ID_dict[ p2 ]
			
			if p1 < p2 :                                    # sort the items and their corresponding ranges
				it1 = p1
				it2 = p2
				bl1 = map(lambda x : int(x) , [ p1_min , p1_max ] )
				bl2 = map(lambda x : int(x) , [ p2_min , p2_max ] )
			else:
				it2 = p1
				it1 = p2
				bl2 = map(lambda x : int(x) , [ p1_min , p1_max ] )
				bl1 = map(lambda x : int(x) , [ p2_min , p2_max ] )
			
			pair = ( it1 , it2 )
			
			if not Align_dict.has_key( pair ) :             # Define an edge in G for each pair of items
				Align_dict[ pair ] = aligned_items(pair[0],pair[1])
				G.add_edge( pair[0] , pair[1] )
			
			Align_dict[ pair ].add_blocks(bl1,bl2)          # Store/Update the correspondance map between two items
		
		li = fid.readline().split()
	
	fid.close()
	
	if verbosity :
		sys.stdout.write('[    End ] reading input file %s\n'%Input_File)
		sys.stdout.flush()
	
	#---------------------------------------------------------------------------------------------------
	# Write the inital graph G
	#---------------------------------------------------------------------------------------------------
	if type( G_File ) == str :
		write_graph( G_File , G , 1 , ID_to_NAME_dict , verbosity )
	
	if verbosity :
		sys.stdout.write('\n')
		sys.stdout.write('[      G ] G inital graph:\n')
		sys.stdout.write('[      G ] Number of nodes                : %d\n'%G.number_of_nodes())
		sys.stdout.write('[      G ] Number of edges                : %d\n'%G.number_of_edges())
		sys.stdout.write('[      G ] Mean degree                    : %f\n'%mean(G.degree()))
		sys.stdout.write('[      G ] Number of connected components : %d\n'%nx.number_connected_components(G))
		sys.stdout.flush()
	
	
	if not Do_LG :
		#---------------------------------------------------------------------------------------------------
		# Mark G edges which has adjacent edges failing at centered ternary similarity test
		#---------------------------------------------------------------------------------------------------
		if verbosity :
			sys.stdout.write('\n[  Start ] marking edges failing at the centered ternary similarity test\n')
			sys.stdout.flush()
			iter = 0
			prog = ProgressBar( iter , G.number_of_edges(), 70 , mode='fixed', char='-')
		
		#Marked_G_edges = {}
		for g_edge in G.edges():
			mark_nb = number_of_fails_at_ternary_similarity_test( Align_dict , G , g_edge , Sim_Threshold )
			if mark_nb > 0 :
				Marked_G_edges[ g_edge ] = mark_nb
			
			if verbosity:
				iter += 1
				prog.increment_amount()
				sys.stderr.write( '%s\r'%(prog) )
				sys.stderr.flush()
		
		if verbosity :
			sys.stdout.write('\n[    End ] marking edges failing at the centered ternary similarity test\n')
			sys.stdout.flush()
		#---------------------------------------------------------------------------------------------------
		# Removes G edges which fail at the test of centered ternary similarity
		#---------------------------------------------------------------------------------------------------
		if verbosity :
			sys.stdout.write('\n[  Start ] removing edges failing at the centered ternary similarity test\n')
			iter   = 0
			total  = len(Marked_G_edges)
			if total>0:
				prog   = ProgressBar( iter , total , 70 , mode='fixed', char='-')
		
		G_Mod  =               G.copy()
		while len(Marked_G_edges) > 0 :
			# Search for the L(G) node - ie G edge - the most marked. This node is removed
			max_mark = 0
			for ( g_e , mark ) in Marked_G_edges.iteritems() :
				if mark > max_mark :
					max_mark = mark
					rem_g_e = g_e
			# Remove the the most marked G edge
			for g_e in neighbors_edges( G , rem_g_e ):
				if Marked_G_edges.has_key( g_e ) :
					if Marked_G_edges[ g_e ] == 1 :
						del Marked_G_edges[ g_e ]
						if verbosity :
							iter += 1
							prog.increment_amount()
					else:
						Marked_G_edges[ g_e ] -= 1
			G_Mod.remove_edge(rem_g_e[0],rem_g_e[1])
			del Marked_G_edges[ rem_g_e ]
			if verbosity :
				# progress-bar
				iter += 1
				prog.increment_amount()
				sys.stderr.write( '%s\r'%(prog) )
				sys.stderr.flush()
		
		if verbosity :
			sys.stderr.write( '\n'%(prog) ); sys.stderr.flush()
			sys.stdout.write('[    End ] edges failing at the centered ternary similarity test\n')
			sys.stdout.flush()
		
	else :
		#---------------------------------------------------------------------------------------------------
		# Create the line-graph: the graph of G adjacencies
		#---------------------------------------------------------------------------------------------------
		if verbosity :
			sys.stdout.write('\n[  Start ] building adjacency list\n')
			sys.stdout.flush()
			iter = 0
			prog = ProgressBar( iter , G.number_of_edges(), 70 , mode='fixed', char='-')
		
		for lg_node in G.edges(): # for each G edge search the adjacent nodes in G
			g_n1_neighbors = map(lambda g_node : [ lg_node ,[ lg_node[0] , g_node ] ] ,G.neighbors(lg_node[0]) )
			g_n2_neighbors = map(lambda g_node : [ lg_node ,[ lg_node[1] , g_node ] ] ,G.neighbors(lg_node[1]) )
			
			for lg_edge in g_n1_neighbors :     # for each adjacent edges in G add an edge in L(G)
				lg_n1 = lg_edge[0]
				lg_n2 = lg_edge[1] ; lg_n2.sort() 
				LG.add_edge( lg_n1 , tuple( lg_n2 ) )
			
			for lg_edge in g_n2_neighbors :
				lg_n1 = lg_edge[0]
				lg_n2 = lg_edge[1] ; lg_n2.sort() 
				LG.add_edge( lg_n1 , tuple( lg_n2 ) )
			if verbosity :
				# progress-bar
				iter += 1
				prog.increment_amount()
				sys.stderr.write( '%s\r'%(prog) )
				sys.stderr.flush()
		
		if verbosity :
			sys.stderr.write( '\n'%(prog) ) ; sys.stderr.flush()
			sys.stdout.write('[    End ] building adjacency list\n')
			sys.stdout.flush()
		
		#---------------------------------------------------------------------------------------------------
		# Write the modified graph L(G)
		#---------------------------------------------------------------------------------------------------
		if type( LG_File ) == str :
			write_graph( LG_File , LG , 2 , ID_to_NAME_dict , verbosity )
		
		#---------------------------------------------------------------------------------------------------
		# Print stats
		#---------------------------------------------------------------------------------------------------
		if verbosity :
			sys.stdout.write('\n')
			sys.stdout.write('[     LG ] L(G) inital graph:\n')
			sys.stdout.write('[     LG ] Number of nodes                : %d\n'%LG.number_of_nodes())
			sys.stdout.write('[     LG ] Number of edges                : %d\n'%LG.number_of_edges())
			sys.stdout.write('[     LG ] Mean degree                    : %f\n'%mean(LG.degree()))
			sys.stdout.flush()
		
		#---------------------------------------------------------------------------------------------------
		# Mark line-graph nodes which has incident edges failing at centered ternary similarity test
		#---------------------------------------------------------------------------------------------------
		if verbosity :
			sys.stdout.write('\n[  Start ] marking edges failing at the centered ternary similarity test\n')
			sys.stdout.flush()
			iter = 0
			prog = ProgressBar( iter , LG.number_of_edges(), 70 , mode='fixed', char='-')
		
		for edge in LG.edges():
			[  lg_n1 ,  lg_n2 ] = list(edge)
			[ ali_n1 , ali_n2 ] = map( lambda lg : Align_dict[ lg ] , list(edge) )
			# for each edge in L(G) - ie each adjacency in G - test the centered ternary similarity
			if not test_ternary_similarity( ali_n1 , ali_n2 , Sim_Threshold ) :
				# each L(G) node is marked as many times as it is involved in an edge failing at the test
				if not Marked_LG_nodes.has_key(lg_n1) :
					Marked_LG_nodes[lg_n1]  = 1
				else :
					Marked_LG_nodes[lg_n1] += 1
				if not Marked_LG_nodes.has_key(lg_n2) :
					Marked_LG_nodes[lg_n2]  = 1
				else :
					Marked_LG_nodes[lg_n2] += 1
				# Create/Update the sub-graph of L(G) containing all the edges failing at the test
				LG_sub_marked.add_edge(lg_n1,lg_n2)
			if verbosity :
				# progress-bar
				iter += 1
				prog.increment_amount()
				sys.stderr.write( '%s\r'%(prog) )
				sys.stderr.flush()
		
		if verbosity :
			sys.stderr.write( '\n'%(prog) ); sys.stderr.flush()
			sys.stdout.write('[    End ] marking edges failing at the centered ternary similarity test\n')
			sys.stdout.flush()
		#---------------------------------------------------------------------------------------------------
		# Print stats
		#---------------------------------------------------------------------------------------------------
		if verbosity :
			sys.stdout.write('\n')
			sys.stdout.write('[ Marked ] Centered ternary similarity:\n')
			sys.stdout.write('[ Marked ] Number of marked L(G) nodes    : %d\n'%LG_sub_marked.number_of_nodes())
			sys.stdout.write('[ Marked ] Number of marked L(G) edges    : %d\n'%LG_sub_marked.number_of_edges())
			sys.stdout.write('[ Marked ] Mean degree of marked edges    : %f\n'%mean(LG_sub_marked.degree()))
			sys.stdout.flush()
		
		
		#---------------------------------------------------------------------------------------------------
		# Removes G edges which fail at the test of centered ternary similarity
		#---------------------------------------------------------------------------------------------------
		if verbosity :
			sys.stdout.write('\n[  Start ] removing L(G) nodes incident to edges failing at the centered ternary similarity test\n')
		
		G_Mod  =               G.copy()
		if type( LG_Mod_File ) == str:
			LG_Mod =              LG.copy()
		
		if verbosity :
			iter   = 0
			total  = len(Marked_LG_nodes)
			prog = ProgressBar( iter , total , 70 , mode='fixed', char='-')
		while len(Marked_LG_nodes) > 0 :
			# Search for the L(G) node - ie G edge - the most marked. This node is removed
			max_mark = 0
			for ( lg_n , mark ) in Marked_LG_nodes.iteritems() :
				if mark > max_mark :
					max_mark = mark
					rem_lg_n = lg_n
			# Remove the L(G) node - ie G edge - the most marked
			for lg_n in LG_sub_marked.neighbors(rem_lg_n):
				if Marked_LG_nodes[ lg_n ] == 1 :
					del Marked_LG_nodes[ lg_n ]
					if verbosity :
						iter += 1
						prog.increment_amount()
				else:
					Marked_LG_nodes[ lg_n ] -= 1
				if LG_Mod_File != 1:
					LG_Mod.remove_edge(rem_lg_n,lg_n)
			LG_sub_marked.remove_node(rem_lg_n)
			G_Mod.remove_edge(rem_lg_n[0],rem_lg_n[1])
			del Marked_LG_nodes[ rem_lg_n ]
			if verbosity :
				# progress-bar
				iter += 1
				prog.increment_amount()
				sys.stderr.write( '%s\r'%(prog) )
				sys.stderr.flush()
		
		if verbosity :
			sys.stderr.write( '\n'%(prog) ); sys.stderr.flush()
			sys.stdout.write('[    End ] removing nodes incident to edges failing at the centered ternary similarity test\n')
			sys.stdout.flush()
		
		#---------------------------------------------------------------------------------------------------
		# Write the modified graph LG_Mod
		#---------------------------------------------------------------------------------------------------
		if type( LG_Mod_File ) == str:
			write_graph( LG_Mod_File , LG_Mod , 2 , ID_to_NAME_dict , verbosity )
		
	#---------------------------------------------------------------------------------------------------
	# Write the modified graph G_Mod
	#---------------------------------------------------------------------------------------------------
	if type( G_Mod_File ) == str:
		write_graph( G_Mod_File , G_Mod , 1 , ID_to_NAME_dict , verbosity )
	
	
	#---------------------------------------------------------------------------------------------------
	# Print stats
	#---------------------------------------------------------------------------------------------------
	if verbosity :
		sys.stdout.write('\n')
		sys.stdout.write('[  G_Mod ] G modified graph:\n')
		sys.stdout.write('[  G_Mod ] Number of nodes                : %d\n'%G_Mod.number_of_nodes())
		sys.stdout.write('[  G_Mod ] Number of edges                : %d\n'%G_Mod.number_of_edges())
		sys.stdout.write('[  G_Mod ] Mean degree                    : %f\n'%mean(G_Mod.degree()))
		sys.stdout.write('[  G_Mod ] Number of connected components : %d\n'%nx.number_connected_components(G_Mod))
		sys.stdout.flush()
	
	
	#---------------------------------------------------------------------------------------------------
	# Makes clusters of connected components and links the clusters by weighted edges, where the weight
	# stands for the number of removed G edges betweeen them
	#---------------------------------------------------------------------------------------------------
	if type( Clust_File ) == str :
		Clusters_To_ID = {}
		IDs_To_Cluster = {}
		Clusters_Links = {}
		cl_nb          = 0
		fod = open(Clust_File,'w')
		for i in nx.connected_components(G_Mod):                # write the clusters composition
			Clusters_To_ID[cl_nb] = i
			fod.write( 'cluster_%d '%(cl_nb) )
			for j in i :
				fod.write('%s '%ID_to_NAME_dict[j])
				IDs_To_Cluster[j] = cl_nb
			fod.write('\n')
			if verbosity :
				sys.stdout.write( 'cluster_%d : %d sequences\n'%(cl_nb,len(i)) )
			cl_nb += 1
		
		for (cluster_id,cluster) in Clusters_To_ID.iteritems(): # write the clusters links
			g  = G.subgraph(cluster)
			border_edges = []
			cluster_and_neighbors = map(lambda id: G.neighbors(id) , cluster)
			cluster_and_neighbors.append(cluster)
			cluster_and_neighbors = flatten_uniq(cluster_and_neighbors)
			for e in G.subgraph( cluster_and_neighbors ).edges() :
				if not g.has_edge(e[0],e[1]) and ( ( e[0] in cluster ) or ( e[1] in cluster ) ):
					border_edges.append(e)
			if border_edges != [] :
				for (n1,n2) in border_edges:
					if n2 in cluster :
						n_out = n1
					else:
						n_out = n2
					
					cl_link = [ cluster_id , IDs_To_Cluster[n_out] ] ; cl_link.sort(); cl_link = tuple(cl_link)
					
					if not Clusters_Links.has_key(cl_link) :
						Clusters_Links[ cl_link ]  = 1
					else :
						Clusters_Links[ cl_link ] += 1
		fod.write('[ Cluster connectivity] ------------------------------------------------\n')
		for ( link , weight ) in Clusters_Links.iteritems():
			fod.write( 'cluster_%d\tcluster_%d\t%d\n'%(link[0],link[1],weight/2) )
			if verbosity :
				sys.stdout.write( 'cluster_%d\tcluster_%d\t%d\n'%(link[0],link[1],weight/2) )
		
		
		fod.close()
	
	
if __name__ == '__main__':
        main()
	
