| /* | 
 |  * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru> | 
 |  * All rights reserved. | 
 |  * | 
 |  * This program is free software; you can redistribute it and/or modify | 
 |  * it under the terms of the GNU General Public License as published by | 
 |  * the Free Software Foundation; either version 2 of the License, or | 
 |  * (at your option) any later version. | 
 |  * | 
 |  * This program is distributed in the hope that it will be useful, | 
 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
 |  * GNU General Public License for more details. | 
 |  */ | 
 |  | 
 | #ifndef __DST_H | 
 | #define __DST_H | 
 |  | 
 | #include <linux/types.h> | 
 | #include <linux/connector.h> | 
 |  | 
 | #define DST_NAMELEN		32 | 
 | #define DST_NAME		"dst" | 
 |  | 
 | enum { | 
 | 	/* Remove node with given id from storage */ | 
 | 	DST_DEL_NODE	= 0, | 
 | 	/* Add remote node with given id to the storage */ | 
 | 	DST_ADD_REMOTE, | 
 | 	/* Add local node with given id to the storage to be exported and used by remote peers */ | 
 | 	DST_ADD_EXPORT, | 
 | 	/* Crypto initialization command (hash/cipher used to protect the connection) */ | 
 | 	DST_CRYPTO, | 
 | 	/* Security attributes for given connection (permissions for example) */ | 
 | 	DST_SECURITY, | 
 | 	/* Register given node in the block layer subsystem */ | 
 | 	DST_START, | 
 | 	DST_CMD_MAX | 
 | }; | 
 |  | 
 | struct dst_ctl | 
 | { | 
 | 	/* Storage name */ | 
 | 	char			name[DST_NAMELEN]; | 
 | 	/* Command flags */ | 
 | 	__u32			flags; | 
 | 	/* Command itself (see above) */ | 
 | 	__u32			cmd; | 
 | 	/* Maximum number of pages per single request in this device */ | 
 | 	__u32			max_pages; | 
 | 	/* Stale/error transaction scanning timeout in milliseconds */ | 
 | 	__u32			trans_scan_timeout; | 
 | 	/* Maximum number of retry sends before completing transaction as broken */ | 
 | 	__u32			trans_max_retries; | 
 | 	/* Storage size */ | 
 | 	__u64			size; | 
 | }; | 
 |  | 
 | /* Reply command carries completion status */ | 
 | struct dst_ctl_ack | 
 | { | 
 | 	struct cn_msg		msg; | 
 | 	int			error; | 
 | 	int			unused[3]; | 
 | }; | 
 |  | 
 | /* | 
 |  * Unfortunaltely socket address structure is not exported to userspace | 
 |  * and is redefined there. | 
 |  */ | 
 | #define SADDR_MAX_DATA	128 | 
 |  | 
 | struct saddr { | 
 | 	/* address family, AF_xxx	*/ | 
 | 	unsigned short		sa_family; | 
 | 	/* 14 bytes of protocol address	*/ | 
 | 	char			sa_data[SADDR_MAX_DATA]; | 
 | 	/* Number of bytes used in sa_data */ | 
 | 	unsigned short		sa_data_len; | 
 | }; | 
 |  | 
 | /* Address structure */ | 
 | struct dst_network_ctl | 
 | { | 
 | 	/* Socket type: datagram, stream...*/ | 
 | 	unsigned int		type; | 
 | 	/* Let me guess, is it a Jupiter diameter? */ | 
 | 	unsigned int		proto; | 
 | 	/* Peer's address */ | 
 | 	struct saddr		addr; | 
 | }; | 
 |  | 
 | struct dst_crypto_ctl | 
 | { | 
 | 	/* Cipher and hash names */ | 
 | 	char			cipher_algo[DST_NAMELEN]; | 
 | 	char			hash_algo[DST_NAMELEN]; | 
 |  | 
 | 	/* Key sizes. Can be zero for digest for example */ | 
 | 	unsigned int		cipher_keysize, hash_keysize; | 
 | 	/* Alignment. Calculated by the DST itself. */ | 
 | 	unsigned int		crypto_attached_size; | 
 | 	/* Number of threads to perform crypto operations */ | 
 | 	int			thread_num; | 
 | }; | 
 |  | 
 | /* Export security attributes have this bits checked in when client connects */ | 
 | #define DST_PERM_READ		(1<<0) | 
 | #define DST_PERM_WRITE		(1<<1) | 
 |  | 
 | /* | 
 |  * Right now it is simple model, where each remote address | 
 |  * is assigned to set of permissions it is allowed to perform. | 
 |  * In real world block device does not know anything but | 
 |  * reading and writing, so it should be more than enough. | 
 |  */ | 
 | struct dst_secure_user | 
 | { | 
 | 	unsigned int		permissions; | 
 | 	struct saddr		addr; | 
 | }; | 
 |  | 
 | /* | 
 |  * Export control command: device to export and network address to accept | 
 |  * clients to work with given device | 
 |  */ | 
 | struct dst_export_ctl | 
 | { | 
 | 	char			device[DST_NAMELEN]; | 
 | 	struct dst_network_ctl	ctl; | 
 | }; | 
 |  | 
 | enum { | 
 | 	DST_CFG	= 1, 		/* Request remote configuration */ | 
 | 	DST_IO,			/* IO command */ | 
 | 	DST_IO_RESPONSE,	/* IO response */ | 
 | 	DST_PING,		/* Keepalive message */ | 
 | 	DST_NCMD_MAX, | 
 | }; | 
 |  | 
 | struct dst_cmd | 
 | { | 
 | 	/* Network command itself, see above */ | 
 | 	__u32			cmd; | 
 | 	/* | 
 | 	 * Size of the attached data | 
 | 	 * (in most cases, for READ command it means how many bytes were requested) | 
 | 	 */ | 
 | 	__u32			size; | 
 | 	/* Crypto size: number of attached bytes with digest/hmac */ | 
 | 	__u32			csize; | 
 | 	/* Here we can carry secret data */ | 
 | 	__u32			reserved; | 
 | 	/* Read/write bits, see how they are encoded in bio structure */ | 
 | 	__u64			rw; | 
 | 	/* BIO flags */ | 
 | 	__u64			flags; | 
 | 	/* Unique command id (like transaction ID) */ | 
 | 	__u64			id; | 
 | 	/* Sector to start IO from */ | 
 | 	__u64			sector; | 
 | 	/* Hash data is placed after this header */ | 
 | 	__u8			hash[0]; | 
 | }; | 
 |  | 
 | /* | 
 |  * Convert command to/from network byte order. | 
 |  * We do not use hton*() functions, since there is | 
 |  * no 64-bit implementation. | 
 |  */ | 
 | static inline void dst_convert_cmd(struct dst_cmd *c) | 
 | { | 
 | 	c->cmd = __cpu_to_be32(c->cmd); | 
 | 	c->csize = __cpu_to_be32(c->csize); | 
 | 	c->size = __cpu_to_be32(c->size); | 
 | 	c->sector = __cpu_to_be64(c->sector); | 
 | 	c->id = __cpu_to_be64(c->id); | 
 | 	c->flags = __cpu_to_be64(c->flags); | 
 | 	c->rw = __cpu_to_be64(c->rw); | 
 | } | 
 |  | 
 | /* Transaction id */ | 
 | typedef __u64 dst_gen_t; | 
 |  | 
 | #ifdef __KERNEL__ | 
 |  | 
 | #include <linux/blkdev.h> | 
 | #include <linux/bio.h> | 
 | #include <linux/device.h> | 
 | #include <linux/mempool.h> | 
 | #include <linux/net.h> | 
 | #include <linux/poll.h> | 
 | #include <linux/rbtree.h> | 
 |  | 
 | #ifdef CONFIG_DST_DEBUG | 
 | #define dprintk(f, a...) printk(KERN_NOTICE f, ##a) | 
 | #else | 
 | static inline void __attribute__ ((format (printf, 1, 2))) | 
 | 	dprintk(const char *fmt, ...) {} | 
 | #endif | 
 |  | 
 | struct dst_node; | 
 |  | 
 | struct dst_trans | 
 | { | 
 | 	/* DST node we are working with */ | 
 | 	struct dst_node		*n; | 
 |  | 
 | 	/* Entry inside transaction tree */ | 
 | 	struct rb_node		trans_entry; | 
 |  | 
 | 	/* Merlin kills this transaction when this memory cell equals zero */ | 
 | 	atomic_t		refcnt; | 
 |  | 
 | 	/* How this transaction should be processed by crypto engine */ | 
 | 	short			enc; | 
 | 	/* How many times this transaction was resent */ | 
 | 	short			retries; | 
 | 	/* Completion status */ | 
 | 	int			error; | 
 |  | 
 | 	/* When did we send it to the remote peer */ | 
 | 	long			send_time; | 
 |  | 
 | 	/* My name is... | 
 | 	 * Well, computers does not speak, they have unique id instead */ | 
 | 	dst_gen_t		gen; | 
 |  | 
 | 	/* Block IO we are working with */ | 
 | 	struct bio		*bio; | 
 |  | 
 | 	/* Network command for above block IO request */ | 
 | 	struct dst_cmd		cmd; | 
 | }; | 
 |  | 
 | struct dst_crypto_engine | 
 | { | 
 | 	/* What should we do with all block requests */ | 
 | 	struct crypto_hash	*hash; | 
 | 	struct crypto_ablkcipher	*cipher; | 
 |  | 
 | 	/* Pool of pages used to encrypt data into before sending */ | 
 | 	int			page_num; | 
 | 	struct page		**pages; | 
 |  | 
 | 	/* What to do with current request */ | 
 | 	int			enc; | 
 | 	/* Who we are and where do we go */ | 
 | 	struct scatterlist	*src, *dst; | 
 |  | 
 | 	/* Maximum timeout waiting for encryption to be completed */ | 
 | 	long			timeout; | 
 | 	/* IV is a 64-bit sequential counter */ | 
 | 	u64			iv; | 
 |  | 
 | 	/* Secret data */ | 
 | 	void			*private; | 
 |  | 
 | 	/* Cached temporary data lives here */ | 
 | 	int			size; | 
 | 	void			*data; | 
 | }; | 
 |  | 
 | struct dst_state | 
 | { | 
 | 	/* The main state protection */ | 
 | 	struct mutex		state_lock; | 
 |  | 
 | 	/* Polling machinery for sockets */ | 
 | 	wait_queue_t 		wait; | 
 | 	wait_queue_head_t 	*whead; | 
 | 	/* Most of events are being waited here */ | 
 | 	wait_queue_head_t 	thread_wait; | 
 |  | 
 | 	/* Who owns this? */ | 
 | 	struct dst_node		*node; | 
 |  | 
 | 	/* Network address for this state */ | 
 | 	struct dst_network_ctl	ctl; | 
 |  | 
 | 	/* Permissions to work with: read-only or rw connection */ | 
 | 	u32			permissions; | 
 |  | 
 | 	/* Called when we need to clean private data */ | 
 | 	void			(* cleanup)(struct dst_state *st); | 
 |  | 
 | 	/* Used by the server: BIO completion queues BIOs here */ | 
 | 	struct list_head	request_list; | 
 | 	spinlock_t		request_lock; | 
 |  | 
 | 	/* Guess what? No, it is not number of planets */ | 
 | 	atomic_t		refcnt; | 
 |  | 
 | 	/* This flags is set when connection should be dropped */ | 
 | 	int			need_exit; | 
 |  | 
 | 	/* | 
 | 	 * Socket to work with. Second pointer is used for | 
 | 	 * lockless check if socket was changed before performing | 
 | 	 * next action (like working with cached polling result) | 
 | 	 */ | 
 | 	struct socket		*socket, *read_socket; | 
 |  | 
 | 	/* Cached preallocated data */ | 
 | 	void			*data; | 
 | 	unsigned int		size; | 
 |  | 
 | 	/* Currently processed command */ | 
 | 	struct dst_cmd		cmd; | 
 | }; | 
 |  | 
 | struct dst_info | 
 | { | 
 | 	/* Device size */ | 
 | 	u64			size; | 
 |  | 
 | 	/* Local device name for export devices */ | 
 | 	char			local[DST_NAMELEN]; | 
 |  | 
 | 	/* Network setup */ | 
 | 	struct dst_network_ctl	net; | 
 |  | 
 | 	/* Sysfs bits use this */ | 
 | 	struct device		device; | 
 | }; | 
 |  | 
 | struct dst_node | 
 | { | 
 | 	struct list_head	node_entry; | 
 |  | 
 | 	/* Hi, my name is stored here */ | 
 | 	char			name[DST_NAMELEN]; | 
 | 	/* My cache name is stored here */ | 
 | 	char			cache_name[DST_NAMELEN]; | 
 |  | 
 | 	/* Block device attached to given node. | 
 | 	 * Only valid for exporting nodes */ | 
 | 	struct block_device 	*bdev; | 
 | 	/* Network state machine for given peer */ | 
 | 	struct dst_state	*state; | 
 |  | 
 | 	/* Block IO machinery */ | 
 | 	struct request_queue	*queue; | 
 | 	struct gendisk		*disk; | 
 |  | 
 | 	/* Number of threads in processing pool */ | 
 | 	int			thread_num; | 
 | 	/* Maximum number of pages in single IO */ | 
 | 	int			max_pages; | 
 |  | 
 | 	/* I'm that big in bytes */ | 
 | 	loff_t			size; | 
 |  | 
 | 	/* Exported to userspace node information */ | 
 | 	struct dst_info		*info; | 
 |  | 
 | 	/* | 
 | 	 * Security attribute list. | 
 | 	 * Used only by exporting node currently. | 
 | 	 */ | 
 | 	struct list_head	security_list; | 
 | 	struct mutex		security_lock; | 
 |  | 
 | 	/* | 
 | 	 * When this unerflows below zero, university collapses. | 
 | 	 * But this will not happen, since node will be freed, | 
 | 	 * when reference counter reaches zero. | 
 | 	 */ | 
 | 	atomic_t		refcnt; | 
 |  | 
 | 	/* How precisely should I be started? */ | 
 | 	int 			(*start)(struct dst_node *); | 
 |  | 
 | 	/* Crypto capabilities */ | 
 | 	struct dst_crypto_ctl	crypto; | 
 | 	u8			*hash_key; | 
 | 	u8			*cipher_key; | 
 |  | 
 | 	/* Pool of processing thread */ | 
 | 	struct thread_pool	*pool; | 
 |  | 
 | 	/* Transaction IDs live here */ | 
 | 	atomic_long_t		gen; | 
 |  | 
 | 	/* | 
 | 	 * How frequently and how many times transaction | 
 | 	 * tree should be scanned to drop stale objects. | 
 | 	 */ | 
 | 	long			trans_scan_timeout; | 
 | 	int			trans_max_retries; | 
 |  | 
 | 	/* Small gnomes live here */ | 
 | 	struct rb_root		trans_root; | 
 | 	struct mutex		trans_lock; | 
 |  | 
 | 	/* | 
 | 	 * Transaction cache/memory pool. | 
 | 	 * It is big enough to contain not only transaction | 
 | 	 * itself, but additional crypto data (digest/hmac). | 
 | 	 */ | 
 | 	struct kmem_cache	*trans_cache; | 
 | 	mempool_t		*trans_pool; | 
 |  | 
 | 	/* This entity scans transaction tree */ | 
 | 	struct delayed_work 	trans_work; | 
 |  | 
 | 	wait_queue_head_t	wait; | 
 | }; | 
 |  | 
 | /* Kernel representation of the security attribute */ | 
 | struct dst_secure | 
 | { | 
 | 	struct list_head	sec_entry; | 
 | 	struct dst_secure_user	sec; | 
 | }; | 
 |  | 
 | int dst_process_bio(struct dst_node *n, struct bio *bio); | 
 |  | 
 | int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r); | 
 | int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le); | 
 |  | 
 | static inline struct dst_state *dst_state_get(struct dst_state *st) | 
 | { | 
 | 	BUG_ON(atomic_read(&st->refcnt) == 0); | 
 | 	atomic_inc(&st->refcnt); | 
 | 	return st; | 
 | } | 
 |  | 
 | void dst_state_put(struct dst_state *st); | 
 |  | 
 | struct dst_state *dst_state_alloc(struct dst_node *n); | 
 | int dst_state_socket_create(struct dst_state *st); | 
 | void dst_state_socket_release(struct dst_state *st); | 
 |  | 
 | void dst_state_exit_connected(struct dst_state *st); | 
 |  | 
 | int dst_state_schedule_receiver(struct dst_state *st); | 
 |  | 
 | void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str); | 
 |  | 
 | static inline void dst_state_lock(struct dst_state *st) | 
 | { | 
 | 	mutex_lock(&st->state_lock); | 
 | } | 
 |  | 
 | static inline void dst_state_unlock(struct dst_state *st) | 
 | { | 
 | 	mutex_unlock(&st->state_lock); | 
 | } | 
 |  | 
 | void dst_poll_exit(struct dst_state *st); | 
 | int dst_poll_init(struct dst_state *st); | 
 |  | 
 | static inline unsigned int dst_state_poll(struct dst_state *st) | 
 | { | 
 | 	unsigned int revents = POLLHUP | POLLERR; | 
 |  | 
 | 	dst_state_lock(st); | 
 | 	if (st->socket) | 
 | 		revents = st->socket->ops->poll(NULL, st->socket, NULL); | 
 | 	dst_state_unlock(st); | 
 |  | 
 | 	return revents; | 
 | } | 
 |  | 
 | static inline int dst_thread_setup(void *private, void *data) | 
 | { | 
 | 	return 0; | 
 | } | 
 |  | 
 | void dst_node_put(struct dst_node *n); | 
 |  | 
 | static inline struct dst_node *dst_node_get(struct dst_node *n) | 
 | { | 
 | 	atomic_inc(&n->refcnt); | 
 | 	return n; | 
 | } | 
 |  | 
 | int dst_data_recv(struct dst_state *st, void *data, unsigned int size); | 
 | int dst_recv_cdata(struct dst_state *st, void *cdata); | 
 | int dst_data_send_header(struct socket *sock, | 
 | 		void *data, unsigned int size, int more); | 
 |  | 
 | int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio); | 
 |  | 
 | int dst_process_io(struct dst_state *st); | 
 | int dst_export_crypto(struct dst_node *n, struct bio *bio); | 
 | int dst_export_send_bio(struct bio *bio); | 
 | int dst_start_export(struct dst_node *n); | 
 |  | 
 | int __init dst_export_init(void); | 
 | void dst_export_exit(void); | 
 |  | 
 | /* Private structure for export block IO requests */ | 
 | struct dst_export_priv | 
 | { | 
 | 	struct list_head		request_entry; | 
 | 	struct dst_state		*state; | 
 | 	struct bio			*bio; | 
 | 	struct dst_cmd			cmd; | 
 | }; | 
 |  | 
 | static inline void dst_trans_get(struct dst_trans *t) | 
 | { | 
 | 	atomic_inc(&t->refcnt); | 
 | } | 
 |  | 
 | struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen); | 
 | int dst_trans_remove(struct dst_trans *t); | 
 | int dst_trans_remove_nolock(struct dst_trans *t); | 
 | void dst_trans_put(struct dst_trans *t); | 
 |  | 
 | /* | 
 |  * Convert bio into network command. | 
 |  */ | 
 | static inline void dst_bio_to_cmd(struct bio *bio, struct dst_cmd *cmd, | 
 | 		u32 command, u64 id) | 
 | { | 
 | 	cmd->cmd = command; | 
 | 	cmd->flags = (bio->bi_flags << BIO_POOL_BITS) >> BIO_POOL_BITS; | 
 | 	cmd->rw = bio->bi_rw; | 
 | 	cmd->size = bio->bi_size; | 
 | 	cmd->csize = 0; | 
 | 	cmd->id = id; | 
 | 	cmd->sector = bio->bi_sector; | 
 | }; | 
 |  | 
 | int dst_trans_send(struct dst_trans *t); | 
 | int dst_trans_crypto(struct dst_trans *t); | 
 |  | 
 | int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl); | 
 | void dst_node_crypto_exit(struct dst_node *n); | 
 |  | 
 | static inline int dst_need_crypto(struct dst_node *n) | 
 | { | 
 | 	struct dst_crypto_ctl *c = &n->crypto; | 
 | 	/* | 
 | 	 * Logical OR is appropriate here, but boolean one produces | 
 | 	 * more optimal code, so it is used instead. | 
 | 	 */ | 
 | 	return (c->hash_algo[0] | c->cipher_algo[0]); | 
 | } | 
 |  | 
 | int dst_node_trans_init(struct dst_node *n, unsigned int size); | 
 | void dst_node_trans_exit(struct dst_node *n); | 
 |  | 
 | /* | 
 |  * Pool of threads. | 
 |  * Ready list contains threads currently free to be used, | 
 |  * active one contains threads with some work scheduled for them. | 
 |  * Caller can wait in given queue when thread is ready. | 
 |  */ | 
 | struct thread_pool | 
 | { | 
 | 	int			thread_num; | 
 | 	struct mutex		thread_lock; | 
 | 	struct list_head	ready_list, active_list; | 
 |  | 
 | 	wait_queue_head_t	wait; | 
 | }; | 
 |  | 
 | void thread_pool_del_worker(struct thread_pool *p); | 
 | void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id); | 
 | int thread_pool_add_worker(struct thread_pool *p, | 
 | 		char *name, | 
 | 		unsigned int id, | 
 | 		void *(* init)(void *data), | 
 | 		void (* cleanup)(void *data), | 
 | 		void *data); | 
 |  | 
 | void thread_pool_destroy(struct thread_pool *p); | 
 | struct thread_pool *thread_pool_create(int num, char *name, | 
 | 		void *(* init)(void *data), | 
 | 		void (* cleanup)(void *data), | 
 | 		void *data); | 
 |  | 
 | int thread_pool_schedule(struct thread_pool *p, | 
 | 		int (* setup)(void *stored_private, void *setup_data), | 
 | 		int (* action)(void *stored_private, void *setup_data), | 
 | 		void *setup_data, long timeout); | 
 | int thread_pool_schedule_private(struct thread_pool *p, | 
 | 		int (* setup)(void *private, void *data), | 
 | 		int (* action)(void *private, void *data), | 
 | 		void *data, long timeout, void *id); | 
 |  | 
 | #endif /* __KERNEL__ */ | 
 | #endif /* __DST_H */ |